Source code for chartify._core.plot

# -*- coding: utf-8 -*-
#
# Copyright (c) 2017-2018 Spotify AB
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Module for chart plots.

"""

import bokeh
import pandas as pd
import numpy as np
from chartify._core.colors import Color, color_palettes
from chartify._core.axes import NumericalYMixin, NumericalXMixin

from scipy.stats import gaussian_kde


class BasePlot:
    """Base for all plot classes."""

    def __init__(self, chart, y_range_name="default"):
        self._chart = chart
        self._y_range_name = y_range_name

    @staticmethod
    def _axis_format_precision(max_value, min_value):
        difference = abs(max_value - min_value)
        precision = abs(int(np.floor(np.log10(difference if difference else 1)))) + 1
        zeros = "".join(["0"] * precision)
        return "0,0.[{}]".format(zeros)

    @classmethod
    def _get_plot_class(cls, x_axis_type, y_axis_type):
        if x_axis_type == "categorical" and y_axis_type == "categorical":
            return PlotCategoricalXY
        elif x_axis_type not in ("categorical", "density") and y_axis_type not in (
            "categorical",
            "density",
        ):
            return PlotNumericXY
        elif x_axis_type == "density" and y_axis_type == "density":
            return PlotDensityXY
        elif x_axis_type == "datetime" and y_axis_type == "density":
            raise NotImplementedError("Plot for this axis type combination not yet implemented.")
        elif x_axis_type == "density" or y_axis_type == "density":
            return PlotNumericDensityXY
        else:
            return PlotMixedTypeXY

    def _get_color_and_order(self, data_frame, color_column, color_order, categorical_columns=None):
        """
        Returns:
            colors: List of hex colors or factor_cmap.
            color_order: List of values for each color.
        """
        if color_column is None:
            colors = [self._chart.style.color_palette.next_color()]
            color_order = [None]
        else:
            # Determine color order or verify integrity of specified order.
            if color_order is None:
                color_order = sorted(data_frame[color_column].unique())
            else:
                # Check that all color factors are present in the color order.
                if not set(data_frame[color_column].unique()).issubset(set(color_order)):
                    raise ValueError(
                        """Color order must include
                                     all unique factors of variable `%s`."""
                        % color_column
                    )

            next_colors = self._chart.style.color_palette.next_colors(color_order)
            if categorical_columns is None:  # Numeric data
                colors = next_colors
            else:
                # # Color column must be in the categorical_columns
                # try:
                #     color_index = categorical_columns.index(color_column)
                #     color_label = 'factors'
                # except ValueError:
                #     color_label = 'color_column'
                #     color_index = 0
                #     raise ValueError(
                #         '''`color_column` must be present
                #          in the `categorical_columns`'''
                #     )
                color_label = "color_column"
                color_index = 0
                color_order = [str(factor) for factor in color_order]
                colors = bokeh.transform.factor_cmap(
                    color_label,
                    palette=next_colors,
                    factors=color_order,
                    start=color_index,
                    end=color_index + 1,
                )
        return colors, color_order

    @staticmethod
    def _plot_with_legend(method, **kwargs):
        """Call plotting method with the associated kwargs.

        Removes the legend parameter if it is set to None because
        Bokeh breaks if None is passed to a legend parameter

        """
        legend_label = kwargs.pop("legend_label", None)
        legend_group = kwargs.pop("legend_group", None)

        if legend_label is not None:
            return method(**kwargs, legend_label=legend_label)
        elif legend_group is not None:
            return method(**kwargs, legend_group=legend_group)
        else:
            return method(**kwargs)

    @staticmethod
    def _cannonical_series_name(series_name):
        if series_name is None:
            series_name = ""
        return "Series:{}".format(series_name)

    @staticmethod
    def _named_column_data_source(data_frame, series_name):
        """Ensure consistent naming of column data sources.
        Naming ensures that Chart.data property will populate correctly.
        """
        cannonical_series_name = BasePlot._cannonical_series_name(series_name)
        return bokeh.models.ColumnDataSource(data_frame, name=cannonical_series_name)

    def _cast_datetime_axis(self, data_frame, column):
        if self._chart._x_axis_type == "datetime":
            if data_frame[column].dtype != "datetime64[ns]":
                return data_frame.astype({column: "datetime64[ns]"})
        return data_frame

    def __getattr__(self, item):
        """Override attribute error"""
        raise AttributeError(
            """Plot `{}` not avaiable for the given Chart.
            Try changing the Chart parameters x_axis_type and y_axis_type.
            """.format(
                item
            )
        )

    def _set_numeric_axis_default_format(self, data_frame, x_column=None, y_column=None):
        """Set numeric axis range based on the input data."""

        if isinstance(self._chart.axes, NumericalXMixin):
            # Warn user if they try to plot date data on a non-datetime axis.
            if data_frame[x_column].dtype == "datetime64[ns]":
                raise ValueError(
                    """Set chartify.Chart(x_axis_type='datetime')
                when plotting datetime data."""
                )
            # Warn user if they try to plot date data that hasn't been cast
            # to the proper dtype.
            elif data_frame[x_column].dtype == "O":
                raise ValueError(
                    """Attempting to plot `{}` on a numeric
                    axis. Ensure that chartify.Chart x_axis_type and y_axis_type
                    are set properly, or cast your input data appropriately.
                    """.format(
                        x_column
                    )
                )

        if isinstance(self._chart.axes, NumericalXMixin):
            max_x_value = data_frame[x_column].max()
            min_x_value = data_frame[x_column].min()
            max_x_value, min_x_value = max(max_x_value, 0), min(min_x_value, 0)
            self._chart.axes.set_xaxis_tick_format(self._axis_format_precision(max_x_value, min_x_value))

        if isinstance(self._chart.axes, NumericalYMixin):
            max_y_value = data_frame[y_column].max()
            min_y_value = data_frame[y_column].min()
            max_y_value, min_y_value = max(max_y_value, 0), min(min_y_value, 0)
            self._chart.axes.set_yaxis_tick_format(self._axis_format_precision(max_y_value, min_y_value))


[docs]class PlotCategoricalXY(BasePlot):
    """Plot functions for categorical x & y axes:

    Methods:
        - heatmap
    """

[docs]    def heatmap(
        self,
        data_frame,
        x_column,
        y_column,
        color_column,
        text_column=None,
        color_palette="RdBu",
        reverse_color_order=False,
        text_color="white",
        text_format="{:,.2f}",
        color_value_min=None,
        color_value_max=None,
        color_value_range=100,
    ):
        """Heatmap.

        Args:
            data_frame (pandas.DataFrame): Data source for the plot.
            x_column (str): Column name to plot on the x axis.
            y_column (str): Column name to plot on the y axis.
            color_column (str): Column name of numerical type to plot on
                the color dimension.
            text_column (str or None): Column name of the text labels.
            color_palette (str, chartify.ColorPalette): Color palette to
                apply to the heatmap.
                See chartify.color_palettes.show() for available color palettes.
            reverse_color_order (bool): Reverse order of the color palette.
            text_color (str): Color name or hex value.
                See chartify.color_palettes.show() for available color names.
            text_format: Python string formatting to apply to the text labels.
            color_value_min (float): Minimum value for the color palette.
                If None, will default to the min value of the
                color_column dimension.
            color_value_max (float): Maximum value for the color palette.
                If None, will default to the max value of the
                color_column dimension.
            color_value_range (int): The size of the range of colors in
                the color palette.
                A larger color range will result in greater variation
                among the cell colors.
        """
        # Cast all categorical columns to strings
        # Plotting functions will break with non-str types.
        type_map = {column: str for column in [x_column, y_column]}
        self._chart.figure.x_range.factors = data_frame[x_column].astype(str).unique()
        self._chart.figure.y_range.factors = data_frame[y_column].astype(str).unique()

        cast_data = data_frame[[x_column, y_column, color_column]].astype(type_map)

        source = self._named_column_data_source(cast_data, series_name=None)
        if text_color:
            text_color = Color(text_color).get_hex_l()
        if isinstance(color_palette, str):
            color_palette = color_palettes[color_palette]
        if reverse_color_order:
            color_palette = color_palette[::-1]
        color_palette = color_palette.expand_palette(color_value_range)
        color_palette = [c.get_hex_l() for c in color_palette.colors]

        # If not specified set the min and max value based on the data.
        if not color_value_min:
            color_value_min = data_frame[color_column].min()
        if not color_value_max:
            color_value_max = data_frame[color_column].max()
        mapper = bokeh.models.LinearColorMapper(palette=color_palette, low=color_value_min, high=color_value_max)
        self._chart.figure.rect(
            source=source,
            x=x_column,
            y=y_column,
            fill_color={"field": color_column, "transform": mapper},
            width=1,
            height=1,
            dilate=True,
            line_alpha=0,
        )

        if text_column:
            text_font = self._chart.style._get_settings("text_callout_and_plot")["font"]
            formatted_text = data_frame[text_column].map(text_format.format)
            source.add(formatted_text, "formatted_text")
            self._chart.figure.text(
                text="formatted_text",
                x=x_column,
                y=y_column,
                source=source,
                text_align="center",
                text_baseline="middle",
                text_color=text_color,
                text_font=text_font,
            )
        return self._chart


[docs]class PlotNumericXY(BasePlot):
    """Plot functions for numeric x & y axes:

    Methods:
        - line
        - scatter
        - text
        - area
    """

[docs]    def line(
        self,
        data_frame,
        x_column,
        y_column,
        color_column=None,
        color_order=None,
        line_dash="solid",
        line_width=4,
        alpha=1.0,
    ):
        """Line Chart.

        Note:
            This method will not automatically sort the x-axis.
            Try sorting the axis if the line graph looks strange.

        Args:
            data_frame (pandas.DataFrame): Data source for the plot.
            x_column (str): Column name to plot on the x axis.
            y_column (str): Column name to plot on the y axis.
            color_column (str, optional): Column name to group by on
                the color dimension.
            color_order (list, optional): List of values within the
                'color_column' for specific sorting of the colors.
            line_dash (str, optional): Dash style for the line. One of:
                - 'solid'
                - 'dashed'
                - 'dotted'
                - 'dotdash'
                - 'dashdot'
            line_width (int, optional): Width of the line
            alpha (float): Alpha value.
        """
        settings = self._chart.style._get_settings("line_plot")
        line_cap = settings["line_cap"]
        line_join = settings["line_join"]

        colors, color_values = self._get_color_and_order(data_frame, color_column, color_order)

        self._set_numeric_axis_default_format(data_frame, x_column, y_column)

        for color_value, color in zip(color_values, colors):
            if color_column is None:  # Single line
                sliced_data = data_frame
            else:
                sliced_data = data_frame[data_frame[color_column] == color_value]
            # Filter to only relevant columns.
            sliced_data = sliced_data[[col for col in sliced_data.columns if col in (x_column, y_column, color_column)]]

            cast_data = self._cast_datetime_axis(sliced_data, x_column)

            source = self._named_column_data_source(cast_data, series_name=color_value)

            color_value = str(color_value) if color_value is not None else color_value

            self._plot_with_legend(
                self._chart.figure.line,
                legend_label=color_value,
                x=x_column,
                y=y_column,
                source=source,
                line_width=line_width,
                color=color,
                line_join=line_join,
                line_cap=line_cap,
                line_dash=line_dash,
                alpha=alpha,
                y_range_name=self._y_range_name,
            )

        # Set legend defaults if there are multiple series.
        if color_column is not None:
            self._chart.style._apply_settings("legend")

        return self._chart

[docs]    def scatter(
        self,
        data_frame,
        x_column,
        y_column,
        size_column=None,
        color_column=None,
        color_order=None,
        alpha=1.0,
        marker="circle",
    ):
        """Scatter plot.

        Args:
            data_frame (pandas.DataFrame): Data source for the plot.
            x_column (str): Column name to plot on the x axis.
            y_column (str): Column name to plot on the y axis.
            size_column (str, optional): Column name of numerical values
                to plot on the size dimension.
            color_column (str, optional): Column name to group by on
                the color dimension.
            color_order (list, optional): List of values within the
                'color_column' for specific sorting of the colors.
            alpha (float): Alpha value.
            marker (str): marker type. Valid types:
                'asterisk', 'circle', 'circle_cross', 'circle_x', 'cross',
                'diamond', 'diamond_cross', 'hex', 'inverted_triangle',
                'square', 'square_x', 'square_cross', 'triangle',
                'x', '*', '+', 'o', 'ox', 'o+'
        """
        if size_column is None:
            size_column = 6

        colors, color_values = self._get_color_and_order(data_frame, color_column, color_order)

        self._set_numeric_axis_default_format(data_frame, x_column, y_column)

        for color_value, color in zip(color_values, colors):
            if color_column is None:  # Single series
                sliced_data = data_frame
            else:
                sliced_data = data_frame[data_frame[color_column] == color_value]
            # Filter to only relevant columns.
            sliced_data = sliced_data[
                [col for col in sliced_data.columns if col in (x_column, y_column, size_column, color_column)]
            ]
            cast_data = self._cast_datetime_axis(sliced_data, x_column)

            source = self._named_column_data_source(cast_data, series_name=color_value)

            color_value = str(color_value) if color_value is not None else color_value

            self._plot_with_legend(
                self._chart.figure.scatter,
                legend_label=color_value,
                x=x_column,
                y=y_column,
                size=size_column,
                source=source,
                fill_color=color,
                marker=marker,
                line_color=color,
                alpha=alpha,
                y_range_name=self._y_range_name,
            )

        # Set legend defaults if there are multiple series.
        if color_column is not None:
            self._chart.style._apply_settings("legend")

        return self._chart

[docs]    def text(
        self,
        data_frame,
        x_column,
        y_column,
        text_column,
        color_column=None,
        color_order=None,
        font_size="1em",
        x_offset=0,
        y_offset=0,
        angle=0,
        text_color=None,
    ):
        """Text plot.

        Args:
            data_frame (pandas.DataFrame): Data source for the plot.
            x_column (str): Column name to plot on the x axis.
            y_column (str): Column name to plot on the y axis.
            text_column (str): Column name to plot as text labels.
            color_column (str, optional): Column name to group by on the
                color dimension.
            color_order (list, optional): List of values within the
                'color_column' for specific sorting of the colors.
            font_size (str, optional): Size of text.
            x_offset (int, optional): # of pixels for horizontal text offset.
                Can be negative. Default: 0.
            y_offset (int, optional): # of pixels for vertical text offset.
                Can be negative. Default: 0.
            angle (int): Degrees from horizontal for text rotation.
            text_color (str): Color name or hex value.
                See chartify.color_palettes.show() for available color names.
                If omitted, will default to the next color in the
                current color palette.
        """
        text_font = self._chart.style._get_settings("text_callout_and_plot")["font"]
        if text_color:
            text_color = Color(text_color).get_hex_l()
            colors, color_values = [text_color], [None]
        else:
            colors, color_values = self._get_color_and_order(data_frame, color_column, color_order)

        self._set_numeric_axis_default_format(data_frame, x_column, y_column)

        for color_value, color in zip(color_values, colors):
            if color_column is None:  # Single series
                sliced_data = data_frame
            else:
                sliced_data = data_frame[data_frame[color_column] == color_value]
            # Filter to only relevant columns.
            sliced_data = sliced_data[
                [col for col in sliced_data.columns if col in (x_column, y_column, text_column, color_column)]
            ]
            cast_data = self._cast_datetime_axis(sliced_data, x_column)

            source = self._named_column_data_source(cast_data, series_name=color_value)

            self._chart.figure.text(
                text=text_column,
                x=x_column,
                y=y_column,
                text_font_size=font_size,
                source=source,
                text_color=color,
                y_offset=y_offset,
                x_offset=x_offset,
                angle=angle,
                angle_units="deg",
                text_font=text_font,
                y_range_name=self._y_range_name,
            )
        return self._chart

[docs]    def area(
        self,
        data_frame,
        x_column,
        y_column,
        second_y_column=None,
        color_column=None,
        color_order=None,
        stacked=False,
    ):
        """Area plot.

        Note:
            - When a single y_column is passed: Shade area between the
                y_values and zero.
            - Use `stacked` argument for stacked areas.
            - When both y_column and second_y_column are passed:
                Shade area between the two y_columns.

        Args:
            data_frame (pandas.DataFrame): Data source for the plot.
            x_column (str): Column name to plot on the x axis.
            y_column (str): Column name to plot on the y axis.
            second_y_column (str, optional): Column name to plot on
                the y axis.
            color_column (str, optional): Column name to group by on
                the color dimension.
            color_order (list, optional): List of values within the
                'color_column' for specific sorting of the colors.
            stacked (bool, optional): Stacked the areas.
                Only applicable with a single y_column.
                Default: False.
        """
        # Vertical option only applies to density plots
        vertical = self._chart.axes._vertical

        alpha = 0.2
        colors, color_values = self._get_color_and_order(data_frame, color_column, color_order)

        self._set_numeric_axis_default_format(data_frame, x_column, y_column)

        if color_column is not None:
            data_frame = (
                data_frame.set_index([x_column, color_column])
                .reindex(
                    index=pd.MultiIndex.from_product(
                        [
                            data_frame[x_column].unique(),
                            data_frame[color_column].unique(),
                        ],
                        names=[x_column, color_column],
                    )
                )
                .reset_index(drop=False)
                .fillna(0)
            )

        if second_y_column is None and color_column is not None:
            last_y = np.zeros(data_frame.groupby(color_column).size().iloc[0])

        for color_value, color in zip(color_values, colors):
            if color_column is None:
                data = data_frame

                if second_y_column is None:
                    alpha = 0.8
                    y_data = np.hstack((data[y_column], np.zeros(len(data[y_column]))))
                else:
                    y_data = pd.concat([data[y_column], data[second_y_column][::-1]])

            else:
                data = data_frame[data_frame[color_column] == color_value]

                if second_y_column is None:
                    y_data = np.hstack((data[y_column].reset_index(drop=True), last_y[::-1]))

                    if stacked:
                        alpha = 0.8
                        next_y = last_y + data[y_column].reset_index(drop=True)
                        y_data = np.hstack((next_y, last_y[::-1]))
                        last_y = next_y
                        # Reverse order of vertical legends to ensure
                        # that the order is consistent with the stack order.
                        self._chart._reverse_vertical_legend = True
                else:
                    y_data = pd.concat([data[y_column], data[second_y_column][::-1]])

            x_data = pd.concat([data[x_column], data[x_column][::-1]])

            sliced_data = pd.DataFrame({x_column: x_data, y_column: y_data})
            cast_data = self._cast_datetime_axis(sliced_data, x_column)
            source = self._named_column_data_source(cast_data, series_name=color_value)

            color_value = str(color_value) if color_value is not None else color_value

            if vertical:
                self._plot_with_legend(
                    self._chart.figure.patch,
                    legend_label=color_value,
                    x=x_column,
                    y=y_column,
                    alpha=alpha,
                    source=source,
                    color=color,
                    y_range_name=self._y_range_name,
                )

            else:
                self._plot_with_legend(
                    self._chart.figure.patch,
                    legend_label=color_value,
                    x=y_column,
                    y=x_column,
                    alpha=alpha,
                    source=source,
                    color=color,
                    y_range_name=self._y_range_name,
                )

        # Set legend defaults if there are multiple series.
        if color_column is not None:
            self._chart.style._apply_settings("legend")

        return self._chart


[docs]class PlotNumericDensityXY(BasePlot):
    """Plot functions for single density:

    Methods:
        - histogram
        - kde
    """

    # def __dir__(self):
    #     """Hide inherited plotting methods"""
    #     inherited_public_methods = [
    #         attr for attr in dir(PlotNumericXY)
    #         if callable(getattr(PlotNumericXY, attr))
    #         and not attr.startswith("_")
    #     ]
    #     return sorted((set(dir(self.__class__)) | set(self.__dict__.keys())) -
    #                   set(inherited_public_methods))

[docs]    def histogram(
        self,
        data_frame,
        values_column,
        color_column=None,
        color_order=None,
        method="count",
        bins="auto",
    ):
        """Histogram.

        Args:
            data_frame (pandas.DataFrame): Data source for the plot.
            values_column (str): Column of numeric values.
            color_column (str, optional): Column name to group by on
                the color dimension.
            color_order (list, optional): List of values within the
                'color_column' for specific sorting of the colors.
            method (str, optional):
            - 'count': Result will contain the number of samples at each bin.
            - 'density': Result is the value of the probability density
                function at each bin.
                The PDF is normalized so that the integral over the range is 1.
            - 'mass': Result is the value of the probability mass
                function at each bin.
                The PMF is normalized so that the value is equivalent to
                the sample count at each bin divided by the total count.
            bins (int or sequence of scalars or str, optional):
                If bins is an int, it defines the number of equal-width
                bins in the given range.
                If bins is a sequence, it defines the bin edges,
                including the rightmost edge, allowing for non-uniform
                bin widths. See numpy.histogram documentation for more details.
            - ‘auto’:
                Maximum of the ‘sturges’ and ‘fd’ estimators.
                Provides good all around performance.
            - ‘fd’ (Freedman Diaconis Estimator)
                Robust (resilient to outliers) estimator that takes into
                account data variability and data size.
            - ‘doane’
                An improved version of Sturges’ estimator that works
                better with non-normal datasets.
            - ‘scott’
                Less robust estimator that that takes into account data
                variability and data size.
            - ‘rice’
                Estimator does not take variability into account, only
                data size. Commonly overestimates number of bins required.
            - ‘sturges’
                R’s default method, only accounts for data size.
                Only optimal for gaussian data and underestimates number
                of bins for large non-gaussian datasets.
            - ‘sqrt’
                Square root (of data size) estimator, used by Excel and
                other programs for its speed and simplicity.
        """
        vertical = self._chart.axes._vertical

        colors, color_values = self._get_color_and_order(data_frame, color_column, color_order)

        for color_value, color in zip(color_values, colors):
            if color_column is None:  # Single line
                sliced_data = data_frame[[values_column]]
            else:
                sliced_data = data_frame[data_frame[color_column] == color_value][[values_column]]

            density = True if method == "density" else False
            hist, edges = np.histogram(sliced_data, density=density, bins=bins)

            if method == "mass":
                hist = hist * 1.0 / hist.sum()

            histogram_data = pd.DataFrame({"values": hist, "min_edge": edges[:-1], "max_edge": edges[1:]})

            source = self._named_column_data_source(histogram_data, series_name=color_value)

            color_value = str(color_value) if color_value is not None else color_value

            if vertical:
                self._plot_with_legend(
                    self._chart.figure.quad,
                    legend_label=color_value,
                    top="values",
                    bottom=0,
                    left="min_edge",
                    right="max_edge",
                    source=source,
                    fill_color=color,
                    line_color=color,
                    alpha=0.3,
                )

            else:
                self._plot_with_legend(
                    self._chart.figure.quad,
                    legend_label=color_value,
                    top="max_edge",
                    bottom="min_edge",
                    left=0,
                    right="values",
                    source=source,
                    fill_color=color,
                    line_color=color,
                    alpha=0.3,
                )

        # Set legend defaults if there are multiple series.
        if color_column is not None:
            self._chart.style._apply_settings("legend")

        return self._chart

[docs]    def kde(self, data_frame, values_column, color_column=None, color_order=None):
        """Kernel Density Estimate Plot.

        Args:
            data_frame (pandas.DataFrame): Data source for the plot.
            values_column (str): Column of numeric values.
            color_column (str, optional): Column name to group by on
                the color dimension.
            color_order (list, optional): List of values within the
                'color_column' for specific sorting of the colors.
        """
        # Vertical is unused since the logic is handled
        # by the area chart
        # vertical = self._chart.axes._vertical

        if color_column is not None:
            color_values = sorted(data_frame[color_column].unique())
        else:
            color_values = [None]

        data = pd.DataFrame()
        for color_value in color_values:
            if color_column is None:  # Single line
                sliced_data = data_frame
            else:
                sliced_data = data_frame[data_frame[color_column] == color_value]
            values = sliced_data[values_column]

            kde = gaussian_kde(values)
            index = np.linspace(values.min(), values.max(), 300)
            kde_pdf = kde.evaluate(index)
            data = pd.concat(
                [data, pd.DataFrame({"x": index, "y": kde_pdf, "color": color_value})],
                axis=0,
            )

        color_column = "color" if color_column is not None else None

        PlotNumericXY.area(
            self,
            data,
            "x",
            "y",
            color_column=color_column,
            color_order=color_values,
            stacked=False,
        )

        return self._chart


[docs]class PlotDensityXY(BasePlot):
    """Plot functions for denxity X & Y:

    Methods:
        - hexbin
    """

[docs]    def hexbin(
        self,
        data_frame,
        x_values_column,
        y_values_column,
        size,
        color_palette="Blues",
        reverse_color_order=False,
        orientation="pointytop",
        color_value_range=10,
    ):
        """Hexbin.

        Args:
            data_frame (pandas.DataFrame): Data source for the plot.
            x_values_column (str): Column of numeric values to bin into tiles.
            y_values_column (str): Column of numeric values to bin into tiles.
            size (float): Bin size for the tiles.
            color_palette (str, chartify.ColorPalette): Color palette to
                apply to the tiles.
                See chartify.color_palettes.show() for available color palettes.
            reverse_color_order (bool): Reverse order of the color palette.
            orientation (str): "pointytop" or "flattop". Whether the hexagonal
                tiles should be oriented with a pointed corner on top, or a
                flat side on top.
            color_value_range (int): The size of the range of colors in
                the color palette.
                A larger color range will result in greater variation
                among the cell colors.
        """
        if isinstance(color_palette, str):
            color_palette = color_palettes[color_palette]
        if reverse_color_order:
            color_palette = color_palette[::-1]
        color_palette = color_palette.expand_palette(color_value_range)
        color_palette = [c.get_hex_l() for c in color_palette.colors]

        # Set the chart aspect ratio otherwise the hexbins won't be symmetric.
        aspect_scale = self._chart.style.plot_width / self._chart.style.plot_height
        self._chart.figure.match_aspect = True
        self._chart.figure.aspect_scale = aspect_scale
        self._chart.figure.hexbin(
            data_frame[x_values_column],
            data_frame[y_values_column],
            size=size,
            orientation=orientation,
            aspect_scale=aspect_scale,
            palette=color_palette,
            line_color="white",
        )

        return self._chart


[docs]class PlotMixedTypeXY(BasePlot):
    """Plot functions for mixed type x & y axes:

    Methods:
        - bar
        - bar_stacked
        - lollipop
        - parallel
    """

    def _set_categorical_axis_default_factors(self, vertical, factors):
        """Reassign the categorical axis with the given factors."""
        if vertical:
            self._chart.figure.x_range.factors = factors
        else:
            self._chart.figure.y_range.factors = factors

    def _set_categorical_axis_default_range(self, vertical, data_frame, numeric_column):
        """Set numeric axis range based on the input data."""
        max_value = data_frame[numeric_column].max()
        min_value = data_frame[numeric_column].min()

        max_ge_zero = max_value >= 0
        min_ge_zero = min_value >= 0

        range_start, range_end = None, None
        if max_ge_zero and min_ge_zero:
            range_start = 0
        elif not max_ge_zero and not min_ge_zero:
            range_end = 0

        max_value = max(max_value, 0)
        min_value = min(min_value, 0)

        if vertical:
            self._chart.axes.set_yaxis_range(start=range_start, end=range_end)
            self._chart.axes.set_yaxis_tick_format(self._axis_format_precision(max_value, min_value))
        else:
            self._chart.axes.set_xaxis_range(start=range_start, end=range_end)
            self._chart.axes.set_xaxis_tick_format(self._axis_format_precision(max_value, min_value))

    @staticmethod
    def _get_bar_width(factors):
        """Get the bar width based on the number of factors"""
        n_factors = len(factors)
        if n_factors == 1:
            return 0.3
        elif n_factors == 2:
            return 0.5
        elif n_factors == 3:
            return 0.7
        else:
            return 0.9

    @staticmethod
    def _sort_categories_by_value(source, categorical_columns, categorical_order_ascending):
        # Recursively sort values within each level of the index.
        row_totals = source.sum(axis=1, numeric_only=True)
        row_totals.name = "sum"
        old_index = row_totals.index
        row_totals = row_totals.reset_index()
        row_totals.columns = ["_%s" % col for col in row_totals.columns]
        row_totals.index = old_index

        hierarchical_sort_cols = categorical_columns[:]
        for i, _ in enumerate(hierarchical_sort_cols):
            row_totals["level_%s" % i] = row_totals.groupby(hierarchical_sort_cols[: i + 1])["_sum"].transform(
                "sum"
            )
        row_totals = row_totals.sort_values(
            by=["level_%s" % i for i, _ in enumerate(hierarchical_sort_cols)],
            ascending=categorical_order_ascending,
        )
        return source.reindex(row_totals.index)

    @staticmethod
    def _sort_categories(
            source,
            categorical_columns,
            categorical_order_by,
            categorical_order_ascending
    ):

        is_string = isinstance(categorical_order_by, str)
        order_length = getattr(categorical_order_by, "__len__", None)
        # Sort the categories
        if is_string and categorical_order_by == "values":
            return PlotMixedTypeXY._sort_categories_by_value(
                source, categorical_columns, categorical_order_ascending)
        elif is_string and categorical_order_by == "labels":
            return source.sort_index(axis=0, ascending=categorical_order_ascending)
        # Manual sort
        elif not is_string and order_length is not None:
            return source.reindex(categorical_order_by, axis="index")

        raise ValueError("""Must be 'values', 'labels', or a list of values.""")

    def _construct_source(
        self,
        data_frame,
        categorical_columns,
        numeric_column,
        stack_column=None,
        normalize=False,
        categorical_order_by=None,
        categorical_order_ascending=False,
        color_column=None,
    ):
        """Constructs ColumnDataSource

        Returns:
            source: ColumnDataSource
            factors: list of categorical factors
            stack_values: list of stack values
        """
        # Cast categorical columns to a list.
        if not isinstance(categorical_columns, str):
            categorical_columns = [c for c in categorical_columns]
        else:
            categorical_columns = [categorical_columns]

        # Check that there's only one row per grouping
        grouping = categorical_columns[:]
        if stack_column is not None:
            grouping.append(stack_column)
        rows_per_grouping = data_frame.groupby(grouping).size()
        max_one_row_per_grouping = all(rows_per_grouping <= 1)
        if not max_one_row_per_grouping:
            raise ValueError(
                """Each categorical grouping should have at most 1 observation.
                Group the dataframe and aggregate before passing to
                the plot function.
                """
            )

        # Cast stack column to strings
        # Plotting functions will break with non-str types.
        type_map = {}
        if stack_column is not None:
            type_map[stack_column] = str
        # Apply mapping within pivot so original data frame isn't modified.
        source = pd.pivot_table(
            data_frame.astype(type_map),
            columns=stack_column,
            index=categorical_columns,
            values=numeric_column,
            aggfunc="sum",
        )
        # NA columns break the stacks
        # Might want to make this conditional in the future for parallel plots.
        source = source.fillna(0)

        if color_column:
            # Merge color column
            color_df = data_frame.astype(type_map)
            color_df["color_column"] = color_df[color_column].astype(str)
            color_df = color_df.set_index(categorical_columns)["color_column"]
            source = source.join(color_df)

        # Normalize values at the grouped levels.
        # Only relevant for stacked objects
        if normalize:
            source = source.div(source.sum(axis=1), axis=0)

        source = self._sort_categories(source, categorical_columns, categorical_order_by, categorical_order_ascending)

        # Cast all categorical columns to strings
        # Plotting functions will break with non-str types.
        if isinstance(source.index, pd.MultiIndex):
            for level in range(len(source.index.levels)):
                source.index = source.index.set_levels(source.index.levels[level].astype(str), level=level)
        else:
            source.index = source.index.astype(str)

        factors = source.index
        source = source.reset_index(drop=True)
        stack_values = source.columns
        source = self._named_column_data_source(source, series_name=None)
        source.add(factors, "factors")

        return source, factors, stack_values

    @staticmethod
    def _compute_boxplot_df(data_frame, categorical_columns, numeric_column):
        """Computes the data frames for a boxplot.

        Returns:
            quantlies_and_bounds: data frame for the boxes and whiskers of a
                boxplot
            outliers: data frame with outliers
        """
        # compute quantiles
        q_frame = data_frame.groupby(categorical_columns)[numeric_column].quantile([0.25, 0.5, 0.75])
        q_frame = q_frame.unstack().reset_index()
        q_frame.columns = categorical_columns + ["q1", "q2", "q3"]
        df_with_quantiles = pd.merge(data_frame, q_frame, on=categorical_columns, how="left")

        # compute IQR outlier bounds
        iqr = df_with_quantiles.q3 - df_with_quantiles.q1
        df_with_quantiles["upper"] = df_with_quantiles.q3 + 1.5 * iqr
        df_with_quantiles["lower"] = df_with_quantiles.q1 - 1.5 * iqr

        # adjust outlier bounds to closest observations still within bounds
        # for upper bound
        le_upper = df_with_quantiles[df_with_quantiles[numeric_column].le(df_with_quantiles.upper)]
        group_max_le_upper = le_upper.groupby(categorical_columns, as_index=False)[numeric_column].max()
        group_max_le_upper.columns = categorical_columns + ["upper"]

        df_with_quantiles.drop("upper", axis=1, inplace=True)
        df_with_quantiles = pd.merge(df_with_quantiles, group_max_le_upper, on=categorical_columns, how="left")

        # for lower bound
        ge_lower = df_with_quantiles[df_with_quantiles[numeric_column].ge(df_with_quantiles.lower)]
        group_min_ge_lower = ge_lower.groupby(categorical_columns, as_index=False)[numeric_column].min()
        group_min_ge_lower.columns = categorical_columns + ["lower"]
        df_with_quantiles.drop("lower", axis=1, inplace=True)
        df_with_quantiles = pd.merge(df_with_quantiles, group_min_ge_lower, on=categorical_columns, how="left")

        quantiles_and_bounds = (
            df_with_quantiles.groupby(categorical_columns)[["q1", "q2", "q3", "lower", "upper"]].first().reset_index()
        )

        outliers = df_with_quantiles[
            ~df_with_quantiles[numeric_column].between(df_with_quantiles.lower, df_with_quantiles.upper)
        ]

        return quantiles_and_bounds, outliers

[docs]    def text(
        self,
        data_frame,
        categorical_columns,
        numeric_column,
        text_column,
        color_column=None,
        color_order=None,
        categorical_order_by="values",
        categorical_order_ascending=False,
        font_size="1em",
        x_offset=0,
        y_offset=0,
        angle=0,
        text_color=None,
    ):
        """Text plot.

        Args:
            data_frame (pandas.DataFrame): Data source for the plot.
            categorical_columns (str or list): Column name to plot on
                the categorical axis.
            numeric_column (str): Column name to plot on the numerical axis.
            text_column (str): Column name to plot as text labels.
            color_column (str, optional): Column name to group by on the
                color dimension.
            color_order (list, optional): List of values within the
                'color_column' for specific color sort.
            categorical_order_by (str or array-like, optional):
                Dimension for ordering the categorical axis. Default 'values'.
                - 'values': Order categorical axis by the numerical axis
                    values. Default.
                - 'labels': Order categorical axis by the categorical labels.
                - array-like object (list, tuple, np.array): New labels
                    to conform the categorical axis to.
            categorical_order_ascending (bool, optional): Sort order of the
                categorical axis. Default False.
            font_size (str, optional): Size of text.
            x_offset (int, optional): # of pixels for horizontal text offset.
                Can be negative. Default: 0.
            y_offset (int, optional): # of pixels for vertical text offset.
                Can be negative. Default: 0.
            angle (int): Degrees from horizontal for text rotation.
            text_color (str): Color name or hex value.
                See chartify.color_palettes.show() for available color names.
                If omitted, will default to the next color in
                the current color palette.
        """
        vertical = self._chart.axes._vertical
        text_font = self._chart.style._get_settings("text_callout_and_plot")["font"]

        source, factors, _ = self._construct_source(
            data_frame,
            categorical_columns,
            numeric_column,
            categorical_order_by=categorical_order_by,
            categorical_order_ascending=categorical_order_ascending,
        )

        if text_color:
            text_color = Color(text_color).get_hex_l()
            colors, color_values = [text_color], [None]
        else:
            colors, color_values = self._get_color_and_order(data_frame, color_column, color_order)

        self._set_categorical_axis_default_factors(vertical, factors)

        if vertical:
            text_align = "center"
            text_baseline = "bottom"
            x_value, y_value = "factors", numeric_column
            y_offset = y_offset - 4
        else:
            y_value, x_value = "factors", numeric_column
            text_align = "left"
            text_baseline = "middle"
            x_offset = x_offset + 10

        for color_value, color in zip(color_values, colors):
            if color_column is None:  # Single series
                sliced_data = data_frame
            else:
                sliced_data = data_frame[data_frame[color_column] == color_value]

            # Construct a new source based on the sliced data.
            source, _, _ = self._construct_source(
                sliced_data,
                categorical_columns,
                numeric_column,
                categorical_order_by=categorical_order_by,
                categorical_order_ascending=categorical_order_ascending,
            )
            sliced_data = (
                sliced_data.astype(str).set_index(categorical_columns).reindex(source.data["factors"]).reset_index()
            )
            # Text column isn't in the source so it needs to be added.
            sliced_data["text_column"] = sliced_data[text_column]
            source.add(sliced_data["text_column"], name="text_column")

            self._chart.figure.text(
                text="text_column",
                x=x_value,
                y=y_value,
                text_font_size=font_size,
                source=source,
                text_color=color,
                y_offset=y_offset,
                x_offset=x_offset,
                angle=angle,
                angle_units="deg",
                text_align=text_align,
                text_baseline=text_baseline,
                text_font=text_font,
            )

        return self._chart

[docs]    def text_stacked(
        self,
        data_frame,
        categorical_columns,
        numeric_column,
        stack_column,
        text_column,
        normalize=False,
        stack_order=None,
        categorical_order_by="values",
        categorical_order_ascending=False,
        font_size="1em",
        x_offset=0,
        y_offset=0,
        angle=0,
        text_color=None,
    ):
        """Text plot for use with stacked plots.

        Args:
            data_frame (pandas.DataFrame): Data source for the plot.
            categorical_columns (str or list): Column name to plot on
                the categorical axis.
            numeric_column (str): Column name to plot on the numerical axis.
            text_column (str): Column name to plot as text labels.
                Note: Null text values will be omitted from the plot.
            stack_column (str): Column name to group by on the stack dimension.
            normalize (bool, optional): Normalize numeric dimension for
                100% stacked bars. Default False.
            stack_order (list, optional): List of values within the
                'stack_column' dimension for specific stack sort.
            categorical_order_by (str or array-like, optional):
                Dimension for ordering the categorical axis. Default 'values'.
                - 'values': Order categorical axis by the numerical
                    axis values. Default.
                - 'labels': Order categorical axis by the categorical labels.
                - array-like object (list, tuple, np.array): New labels
                    to conform the categorical axis to.
            categorical_order_ascending (bool, optional): Sort order of the
                categorical axis. Default False.
            font_size (str, optional): Size of text.
            x_offset (int, optional): # of pixels for horizontal text offset.
                Can be negative. Default: 0.
            y_offset (int, optional): # of pixels for vertical text offset.
                Can be negative. Default: 0.
            angle (int): Degrees from horizontal for text rotation.
            text_color (str): Color name or hex value.
                See chartify.color_palettes.show() for available color names.
                If omitted, will default to the next color in
                the current color palette.
        """
        vertical = self._chart.axes._vertical
        text_font = self._chart.style._get_settings("text_callout_and_plot")["font"]

        source, factors, stack_values = self._construct_source(
            data_frame,
            categorical_columns,
            numeric_column,
            stack_column,
            normalize=normalize,
            categorical_order_by=categorical_order_by,
            categorical_order_ascending=categorical_order_ascending,
        )

        if text_color:
            text_color = Color(text_color).get_hex_l()
            if stack_order is None:
                stack_order = sorted(data_frame[stack_column].unique())
            else:
                # If stack order is set then
                # make sure it includes all the levels.
                if not set(data_frame[stack_column].unique()).issubset(set(stack_order)):
                    raise ValueError(
                        """Color order must include
                                    all unique factors of variable `%s`."""
                        % stack_order
                    )
            colors, color_values = [text_color] * len(data_frame[stack_column].unique()), stack_order
        else:
            colors, color_values = self._get_color_and_order(data_frame, stack_column, stack_order)

        self._set_categorical_axis_default_factors(vertical, factors)
        self._set_categorical_axis_default_range(vertical, data_frame, numeric_column)

        # Set numeric axis format to percentages.
        if normalize:
            if vertical:
                self._chart.axes.set_yaxis_tick_format("0%")
            else:
                self._chart.axes.set_xaxis_tick_format("0%")

        text_baseline = "middle"
        if vertical:
            text_align = "center"
        else:
            text_align = "left"
            x_offset = x_offset + 10

        cumulative_numeric_value = None

        for color_value, color in zip(color_values, colors):
            sliced_data = data_frame[(data_frame[stack_column] == color_value)]
            # Reindex to be consistent with the factors.
            type_map = {column: str for column in categorical_columns}
            sliced_data = (
                sliced_data.astype(type_map).set_index(categorical_columns).reindex(index=factors).reset_index()
            )

            text_values = np.where(
                sliced_data[text_column].isna(),
                "",
                sliced_data[text_column].astype(str),
            )

            if cumulative_numeric_value is not None:
                cumulative_numeric_value = cumulative_numeric_value + source.data[color_value] * 0.5
            else:
                cumulative_numeric_value = source.data[color_value] * 0.5

            if vertical:
                x_value, y_value = factors, cumulative_numeric_value
            else:
                y_value, x_value = factors, cumulative_numeric_value

            self._chart.figure.text(
                text=text_values,
                x=x_value,
                y=y_value,
                text_font_size=font_size,
                text_color=color,
                y_offset=y_offset,
                x_offset=x_offset,
                angle=angle,
                angle_units="deg",
                text_align=text_align,
                text_baseline=text_baseline,
                text_font=text_font,
            )

            cumulative_numeric_value = cumulative_numeric_value + source.data[color_value] * 0.5

        return self._chart

[docs]    def bar(
        self,
        data_frame,
        categorical_columns,
        numeric_column,
        color_column=None,
        color_order=None,
        categorical_order_by="values",
        categorical_order_ascending=False,
    ):
        """Bar chart.

        Note:
            To change the orientation set x_axis_type or y_axis_type
            argument of the Chart object.

        Args:
            data_frame (pandas.DataFrame): Data source for the plot.
            categorical_columns (str or list): Column name to plot on
                the categorical axis.
            numeric_column (str): Column name to plot on the numerical axis.
            color_column (str, optional): Column name to group by on
                the color dimension.
            color_order (list, optional): List of values within the
                'color_column' for specific color sort.
            categorical_order_by (str or array-like, optional):
                Dimension for ordering the categorical axis. Default 'values'.
                - 'values': Order categorical axis by the numerical
                    axis values. Default.
                - 'labels': Order categorical axis by the categorical labels.
                - array-like object (list, tuple, np.array): New labels
                    to conform the categorical axis to.
            categorical_order_ascending (bool, optional): Sort order of the
                categorical axis. Default False.
        """
        vertical = self._chart.axes._vertical

        source, factors, _ = self._construct_source(
            data_frame,
            categorical_columns,
            numeric_column,
            categorical_order_by=categorical_order_by,
            categorical_order_ascending=categorical_order_ascending,
            color_column=color_column,
        )

        colors, color_values = self._get_color_and_order(data_frame, color_column, color_order, categorical_columns)

        if color_column is None:
            colors = colors[0]

        self._set_categorical_axis_default_factors(vertical, factors)
        self._set_categorical_axis_default_range(vertical, data_frame, numeric_column)
        bar_width = self._get_bar_width(factors)

        if color_column:
            legend = bokeh.core.properties.field("color_column")
            legend = "color_column"
        else:
            legend = None

        if vertical:
            self._plot_with_legend(
                self._chart.figure.vbar,
                legend_group=legend,
                x="factors",
                width=bar_width,
                top=numeric_column,
                bottom=0,
                line_color="white",
                source=source,
                fill_color=colors,
            )

        else:
            self._plot_with_legend(
                self._chart.figure.hbar,
                legend_group=legend,
                y="factors",
                height=bar_width,
                right=numeric_column,
                left=0,
                line_color="white",
                source=source,
                fill_color=colors,
            )

        # Set legend defaults if there are multiple series.
        if color_column is not None:
            self._chart.style._apply_settings("legend")
        return self._chart

[docs]    def interval(
        self,
        data_frame,
        categorical_columns,
        lower_bound_column,
        upper_bound_column,
        middle_column=None,
        categorical_order_by="values",
        categorical_order_ascending=False,
        color="black",
    ):
        """Interval.

        Args:
            data_frame (pandas.DataFrame): Data source for the plot.
            categorical_columns (str or list): Column name to plot on
                the categorical axis.
            lower_bound_column (str): Column name to plot on the
                numerical axis for the lower bound.
            upper_bound_column (str): Column name to plot on the
                numerical axis for the upper bound.
            middle_column (str, optional): Column name to plot on the
                numerical axis for the middle tick.
            categorical_order_by (str or array-like, optional):
                Dimension for ordering the categorical axis. Default 'values'.
                - 'values': Order categorical axis by the numerical
                    axis values. Default.
                - 'labels': Order categorical axis by the categorical labels.
                - array-like object (list, tuple, np.array): New labels
                    to conform the categorical axis to.
            categorical_order_ascending (bool, optional): Sort order of the
                categorical axis. Default False.
            color (str): Color name or hex value.
                See chartify.color_palettes.show() for available color names.
        """
        interval_color = Color(color).get_hex_l()

        vertical = self._chart.axes._vertical

        _, factors, _ = self._construct_source(
            data_frame,
            categorical_columns,
            lower_bound_column,
            categorical_order_by=categorical_order_by,
            categorical_order_ascending=categorical_order_ascending,
        )
        self._set_categorical_axis_default_factors(vertical, factors)

        # Set the axis precision
        max_value = max(data_frame[lower_bound_column].max(), data_frame[upper_bound_column].max())
        min_value = min(data_frame[lower_bound_column].min(), data_frame[upper_bound_column].min())
        max_value, min_value = max(max_value, 0), min(min_value, 0)
        if vertical:
            self._chart.axes.set_yaxis_tick_format(self._axis_format_precision(max_value, min_value))
        else:
            self._chart.axes.set_xaxis_tick_format(self._axis_format_precision(max_value, min_value))

        interval_settings = self._chart.style._get_settings("interval_plot")
        SPACE_BETWEEN_BARS = interval_settings["space_between_bars"]
        MARGIN = interval_settings["margin"]
        BAR_WIDTH = interval_settings["bar_width"]
        SPACE_BETWEEN_CATEGORIES = interval_settings["space_between_categories"]
        INTERVAL_END_STEM_SIZE = interval_settings["interval_end_stem_size"]
        INTERVAL_MIDPOINT_STEM_SIZE = interval_settings["interval_midpoint_stem_size"]

        def bar_edges(index, category_number):
            """Return start, midpoint, end edge coordinates"""
            bar_num = index + 1
            start = (
                bar_num * MARGIN
                + (bar_num - 1) * MARGIN
                + (bar_num - 1) * (BAR_WIDTH)
                + SPACE_BETWEEN_BARS * (bar_num - 1)
                + SPACE_BETWEEN_CATEGORIES * (category_number - 1)
            )
            midpoint = start + BAR_WIDTH / 2.0
            end = start + BAR_WIDTH
            return (start, midpoint, end)

        aggregate_columns = [lower_bound_column, upper_bound_column]
        if middle_column is not None:
            aggregate_columns.append(middle_column)
        # Categorical_columns to List
        if not isinstance(categorical_columns, str):
            categorical_columns = [c for c in categorical_columns]
        else:
            categorical_columns = [categorical_columns]
        # Cast categorical columns to str to prevent dates from breaking
        type_map = {column: str for column in categorical_columns}
        values = (
            data_frame.astype(type_map)
            .groupby(categorical_columns)[aggregate_columns]
            .sum()
            .reindex(factors)
            .reset_index()
        )
        # Need to keep track of changes to categorical columns
        # To calculate spacing between values
        values["new_heirarchy"] = False
        if len(categorical_columns) > 1:
            for col in categorical_columns[:-1]:
                values["new_column"] = values[col] != values[col].shift(1)
                values["new_heirarchy"] = values[["new_heirarchy", "new_column"]].max(axis=1)
            values["category_number"] = values["new_heirarchy"].cumsum()
        else:
            values["category_number"] = 1
        for index, row in values.iterrows():
            bar_midpoint = bar_edges(index, row["category_number"])[1]
            if vertical:
                # Vertical line
                self._chart.figure.segment(
                    bar_midpoint,
                    row[lower_bound_column],
                    bar_midpoint,
                    row[upper_bound_column],
                    color=interval_color,
                )
                # Top
                self._chart.figure.segment(
                    bar_midpoint - INTERVAL_END_STEM_SIZE,
                    row[upper_bound_column],
                    bar_midpoint + INTERVAL_END_STEM_SIZE,
                    row[upper_bound_column],
                    color=interval_color,
                )
                # Bottom
                self._chart.figure.segment(
                    bar_midpoint - INTERVAL_END_STEM_SIZE,
                    row[lower_bound_column],
                    bar_midpoint + INTERVAL_END_STEM_SIZE,
                    row[lower_bound_column],
                    color=interval_color,
                )
                # Middle
                if middle_column is not None:
                    self._chart.figure.segment(
                        bar_midpoint - INTERVAL_MIDPOINT_STEM_SIZE,
                        row[middle_column],
                        bar_midpoint + INTERVAL_MIDPOINT_STEM_SIZE,
                        row[middle_column],
                        color=interval_color,
                    )
            else:
                # Horizontal line
                self._chart.figure.segment(
                    row[lower_bound_column],
                    bar_midpoint,
                    row[upper_bound_column],
                    bar_midpoint,
                    color=interval_color,
                )
                # Left
                self._chart.figure.segment(
                    row[lower_bound_column],
                    bar_midpoint - INTERVAL_END_STEM_SIZE,
                    row[lower_bound_column],
                    bar_midpoint + INTERVAL_END_STEM_SIZE,
                    color=interval_color,
                )
                # Right
                self._chart.figure.segment(
                    row[upper_bound_column],
                    bar_midpoint - INTERVAL_END_STEM_SIZE,
                    row[upper_bound_column],
                    bar_midpoint + INTERVAL_END_STEM_SIZE,
                    color=interval_color,
                )
                # Middle
                if middle_column is not None:
                    self._chart.figure.segment(
                        row[middle_column],
                        bar_midpoint - INTERVAL_MIDPOINT_STEM_SIZE,
                        row[middle_column],
                        bar_midpoint + INTERVAL_MIDPOINT_STEM_SIZE,
                        color=interval_color,
                    )
        return self._chart

[docs]    def bar_stacked(
        self,
        data_frame,
        categorical_columns,
        numeric_column,
        stack_column,
        normalize=False,
        stack_order=None,
        categorical_order_by="values",
        categorical_order_ascending=False,
    ):
        """Plot stacked bar chart.

        Note:
            - To change the orientation set x_axis_type or y_axis_type
            argument of the Chart object.
            - Stacked numeric values must be all positive or all negative.
            To plot both positive and negative values on the same chart
            call this method twice. Once for the positive values and
            once for the negative values.

        Args:
            data_frame (pandas.DataFrame): Data source for the plot.
            categorical_columns (str or list): Column name to plot on
                the categorical axis.
            numeric_column (str): Column name to plot on the numerical axis.
            stack_column (str): Column name to group by on the stack dimension.
            normalize (bool, optional): Normalize numeric dimension for
                100% stacked bars. Default False.
            stack_order (list, optional): List of values within the
                'stack_column' dimension for specific stack sort.
            categorical_order_by (str or array-like, optional):
                Dimension for ordering the categorical axis. Default 'values'.
                - 'values': Order categorical axis by the numerical
                    axis values. Default.
                - 'labels': Order categorical axis by the categorical labels.
                - array-like object (list, tuple, np.array): New labels
                    to conform the categorical axis to.
            categorical_order_ascending (bool, optional): Sort order
                of the categorical axis. Default False.
        """

        vertical = self._chart.axes._vertical

        source, factors, stack_values = self._construct_source(
            data_frame,
            categorical_columns,
            numeric_column,
            stack_column,
            normalize=normalize,
            categorical_order_by=categorical_order_by,
            categorical_order_ascending=categorical_order_ascending,
        )

        colors, _ = self._get_color_and_order(data_frame, stack_column, stack_order)
        if stack_column is None:
            colors = colors[0]

        self._set_categorical_axis_default_factors(vertical, factors)
        self._set_categorical_axis_default_range(vertical, data_frame, numeric_column)
        bar_width = self._get_bar_width(factors)
        # Set numeric axis format to percentages.
        if normalize:
            if vertical:
                self._chart.axes.set_yaxis_tick_format("0%")
            else:
                self._chart.axes.set_xaxis_tick_format("0%")

        if stack_order is not None:
            if not set(stack_values).issubset(set(stack_order)):
                raise ValueError(
                    """Stack order must include all distinct
                                    values of the stack column `%s`
                                 """
                    % (stack_column)
                )
            stack_values = stack_order

        legend = [str(value) for value in stack_values]

        if vertical:
            self._plot_with_legend(
                self._chart.figure.vbar_stack,
                legend_label=legend,
                stackers=stack_values,
                x="factors",
                width=bar_width,
                line_color="white",
                source=source,
                fill_color=colors,
            )

        else:
            self._plot_with_legend(
                self._chart.figure.hbar_stack,
                legend_label=legend,
                stackers=stack_values,
                y="factors",
                height=bar_width,
                line_color="white",
                source=source,
                fill_color=colors,
            )

        self._chart.style._apply_settings("legend")
        # Reverse order of vertical legends to ensure that the order
        # is consistent with the stack order.
        self._chart._reverse_vertical_legend = True

        return self._chart

[docs]    def lollipop(
        self,
        data_frame,
        categorical_columns,
        numeric_column,
        color_column=None,
        color_order=None,
        categorical_order_by="values",
        categorical_order_ascending=False,
    ):
        """Lollipop chart.

        Note:
            To change the orientation set x_axis_type or y_axis_type
            argument of the Chart object.

        Args:
            data_frame (pandas.DataFrame): Data source for the plot.
            categorical_columns (str or list): Column name to plot on
                the categorical axis.
            numeric_column (str): Column name to plot on the numerical axis.
            color_column (str, optional): Column name to group by on
                the color dimension.
            color_order (list, optional):
                List of values within the 'color_column' for
                    specific color sort.
            categorical_order_by (str or array-like, optional):
                Dimension for ordering the categorical axis. Default 'values'.
                - 'values': Order categorical axis by the numerical axis values.
                - 'labels': Order categorical axis by the categorical labels.
                - array-like object (list, tuple, np.array): New labels
                    to conform the categorical axis to.
            categorical_order_ascending (bool, optional):
                Sort order of the categorical axis. Default False.
        """

        vertical = self._chart.axes._vertical

        source, factors, _ = self._construct_source(
            data_frame,
            categorical_columns,
            numeric_column,
            categorical_order_by=categorical_order_by,
            categorical_order_ascending=categorical_order_ascending,
            color_column=color_column,
        )

        colors, color_values = self._get_color_and_order(data_frame, color_column, color_order, categorical_columns)
        if color_column is None:
            colors = colors[0]

        self._set_categorical_axis_default_factors(vertical, factors)
        self._set_categorical_axis_default_range(vertical, data_frame, numeric_column)

        if color_column:
            legend = "color_column"
        else:
            legend = None

        if vertical:
            self._chart.figure.segment(
                "factors",
                0,
                "factors",
                numeric_column,
                line_width=2,
                line_color=colors,
                source=source,
            )

            self._plot_with_legend(
                self._chart.figure.circle,
                legend_group=legend,
                x="factors",
                y=numeric_column,
                size=10,
                fill_color=colors,
                line_color=colors,
                line_width=3,
                source=source,
            )

        else:
            self._chart.figure.segment(
                0,
                "factors",
                numeric_column,
                "factors",
                line_width=2,
                line_color=colors,
                source=source,
            )

            self._plot_with_legend(
                self._chart.figure.circle,
                legend_group=legend,
                x=numeric_column,
                y="factors",
                size=10,
                fill_color=colors,
                line_color=colors,
                line_width=3,
                source=source,
            )

        # Set legend defaults if there are multiple series.
        if color_column is not None:
            self._chart.style._apply_settings("legend")

        return self._chart

[docs]    def parallel(
        self,
        data_frame,
        categorical_columns,
        numeric_column,
        color_column=None,
        color_order=None,
        categorical_order_by="values",
        categorical_order_ascending=False,
        line_dash="solid",
        line_width=4,
        alpha=1.0,
    ):
        """Parallel coordinate plot.

        Args:
            data_frame (pandas.DataFrame): Data source for the plot.
            categorical_columns (str or list): Column name to plot on
                the categorical axis.
            numeric_column (str): Column name to plot on the numerical axis.
            color_column (str, optional): Column name to group by on
                the color dimension.
            color_order (list, optional): List of values within the
                'color_column' for specific color sort.
            categorical_order_by (str or array-like, optional):
                Dimension for ordering the categorical axis. Default 'values'.
                - 'values': Order categorical axis by the numerical axis values.
                - 'labels': Order categorical axis by the categorical labels.
                - array-like object (list, tuple, np.array): New labels
                    to conform the categorical axis to.
            categorical_order_ascending (bool, optional):
                Sort order of the categorical axis. Default False.
            line_dash (str, optional): Dash style for the line. One of:
                - 'solid'
                - 'dashed'
                - 'dotted'
                - 'dotdash'
                - 'dashdot'
            line_width (int, optional): Width of the line
            alpha (float): Alpha value
        """
        settings = self._chart.style._get_settings("line_plot")
        line_cap = settings["line_cap"]
        line_join = settings["line_join"]

        vertical = self._chart.axes._vertical

        source, factors, _ = self._construct_source(
            data_frame,
            categorical_columns,
            numeric_column,
            # Each color has its own stack for parallel plots.
            # This causes each color to appear as its own column.
            stack_column=color_column,
            categorical_order_by=categorical_order_by,
            categorical_order_ascending=categorical_order_ascending,
        )

        colors, color_values = self._get_color_and_order(data_frame, color_column, color_order)

        self._set_categorical_axis_default_factors(vertical, factors)
        self._set_numeric_axis_default_format(data_frame, numeric_column, numeric_column)

        for color_value, color in zip(color_values, colors):
            if color_column is None:  # Single series
                color_value = numeric_column
                legend = None
            else:
                legend = str(color_value)

            if vertical:
                x_value, y_value = "factors", str(color_value)
            else:
                y_value, x_value = "factors", str(color_value)

            self._plot_with_legend(
                self._chart.figure.line,
                legend_label=legend,
                x=x_value,
                y=y_value,
                source=source,
                line_width=line_width,
                color=color,
                line_join=line_join,
                line_cap=line_cap,
                line_dash=line_dash,
                alpha=alpha,
            )

        # Set legend defaults if there are multiple series.
        if color_column is not None:
            self._chart.style._apply_settings("legend")

[docs]    def scatter(
        self,
        data_frame,
        categorical_columns,
        numeric_column,
        size_column=None,
        color_column=None,
        color_order=None,
        categorical_order_by="count",
        categorical_order_ascending=False,
        alpha=1.0,
        marker="circle",
    ):
        """Scatter chart.

        Note:
            To change the orientation set x_axis_type or y_axis_type
            argument of the Chart object.

        Args:
            data_frame (pandas.DataFrame): Data source for the plot.
            categorical_columns (str or list): Column name to plot on
                the categorical axis.
            numeric_column (str): Column name to plot on the numerical axis.
            size_column (str, optional): Column name of numerical values
                to plot on the size dimension.
            color_column (str, optional): Column name to group by on
                the color dimension.
            color_order (list, optional):
                List of values within the 'color_column' for
                    specific color sort.
            categorical_order_by (str or array-like, optional):
                Dimension for ordering the categorical axis. Default 'count'.
                - 'count': Order categorical axis by the count of values.
                - 'labels': Order categorical axis by the categorical labels.
                - array-like object (list, tuple, np.array): New labels
                    to conform the categorical axis to.
            categorical_order_ascending (bool, optional):
                Sort order of the categorical axis. Default False.
            alpha (float): Alpha value.
            marker (str): marker type. Valid types:
                'asterisk', 'circle', 'circle_cross', 'circle_x', 'cross',
                'diamond', 'diamond_cross', 'hex', 'inverted_triangle',
                'square', 'square_x', 'square_cross', 'triangle',
                'x', '*', '+', 'o', 'ox', 'o+'
        """
        vertical = self._chart.axes._vertical

        if size_column is None:
            size_column = 15

        axis_factors = data_frame.groupby(categorical_columns).size()

        is_string = isinstance(categorical_order_by, str)
        order_length = getattr(categorical_order_by, "__len__", None)
        if is_string and categorical_order_by == "labels":
            axis_factors = axis_factors.sort_index(ascending=categorical_order_ascending).index
        elif is_string and categorical_order_by == "count":
            axis_factors = axis_factors.sort_values(ascending=categorical_order_ascending).index
        # User-specified order.
        elif not is_string and order_length is not None:
            axis_factors = categorical_order_by
        else:
            raise ValueError("""Must be 'count', 'labels', or a list of values.""")

        colors, color_values = self._get_color_and_order(data_frame, color_column, color_order)
        # Apply factors to the axis.
        self._set_categorical_axis_default_factors(vertical, axis_factors)

        for color_value, color in zip(color_values, colors):
            if color_column is None:  # Single series
                color_value = numeric_column
                legend = None
                sliced_data = data_frame
            else:
                legend = str(color_value)
                sliced_data = data_frame[data_frame[color_column] == color_value]
            # Filter to only relevant columns.
            data_factors = sliced_data.set_index(categorical_columns).index
            sliced_data = sliced_data[[col for col in sliced_data.columns if col in (numeric_column, size_column)]]
            source = self._named_column_data_source(sliced_data, series_name=color_value)
            source.add(data_factors, "factors")

            if vertical:
                x_value, y_value = "factors", numeric_column
            else:
                y_value, x_value = "factors", numeric_column

            self._plot_with_legend(
                self._chart.figure.scatter,
                legend_label=legend,
                x=x_value,
                y=y_value,
                size=size_column,
                fill_color=color,
                line_color=color,
                source=source,
                marker=marker,
                alpha=alpha,
            )

        # Set legend defaults if there are multiple series.
        if color_column is not None:
            self._chart.style._apply_settings("legend")

        return self._chart

[docs]    def boxplot(
        self,
        data_frame,
        categorical_columns,
        numeric_column,
        color_column=None,
        color_order=None,
        categorical_order_by="labels",
        categorical_order_ascending=True,
        outlier_marker="circle",
        outlier_color="black",
        outlier_alpha=0.3,
        outlier_size=15,
    ):
        """Box-and-whisker plot.

        Note:
            To change the orientation set x_axis_type or y_axis_type
            argument of the Chart object.

        Args:
            data_frame (pandas.DataFrame): Data source for the plot.
            categorical_columns (str or list): Column name to plot on
                the categorical axis.
            numeric_column (str): Column name to plot on the numerical axis.
            color_column (str, optional): Column name to group by on
                the color dimension.
            color_order (list, optional):
                List of values within the 'color_column' for
                    specific color sort.
            categorical_order_by (str or array-like, optional):
                Dimension for ordering the categorical axis. Default 'labels'.
                - 'labels': Order categorical axis by the categorical labels.
                - array-like object (list, tuple, np.array): New labels
                    to conform the categorical axis to.
            categorical_order_ascending (bool, optional):
                Sort order of the categorical axis. Default True.
            outlier_marker (str, optional): Outlier marker type. Valid types:
                'asterisk', 'circle', 'circle_cross', 'circle_x', 'cross',
                'diamond', 'diamond_cross', 'hex', 'inverted_triangle',
                'square', 'square_x', 'square_cross', 'triangle',
                'x', '*', '+', 'o', 'ox', 'o+' Default 'circle'
            outlier_color (str, optional): Color name or hex value.
                See chartify.color_palettes.show() for available color names.
                Default 'black'
            outlier_alpha (float, optional): Alpha value. Default 0.3
            outlier_size (float, optional): Size of outlier markers.
                Default 15
        """

        # check categorical_order_by value
        order_length = getattr(categorical_order_by, "__len__", None)
        is_string = isinstance(categorical_order_by, str)
        if (not is_string and order_length is None) or (is_string and categorical_order_by != "labels"):
            raise ValueError(
                """Argument categorical_order_by must be 'labels',
                             or a list of values."""
            )

        df_intervals_and_floating_bars, outliers = self._compute_boxplot_df(
            data_frame, categorical_columns, numeric_column
        )

        # upper and lower bound
        self.interval(
            df_intervals_and_floating_bars,
            categorical_columns,
            "lower",
            "upper",
            categorical_order_by=categorical_order_by,
            categorical_order_ascending=categorical_order_ascending,
        )

        # boxes for q1 to q2 and q2 to q3
        vertical = self._chart.axes._vertical

        source_low, _, _ = self._construct_source(
            df_intervals_and_floating_bars,
            categorical_columns,
            ["q1", "q2"],
            categorical_order_by=categorical_order_by,
            categorical_order_ascending=categorical_order_ascending,
            color_column=color_column,
        )

        source_high, factors, _ = self._construct_source(
            df_intervals_and_floating_bars,
            categorical_columns,
            ["q2", "q3"],
            categorical_order_by=categorical_order_by,
            categorical_order_ascending=categorical_order_ascending,
            color_column=color_column,
        )

        colors, _ = self._get_color_and_order(
            df_intervals_and_floating_bars,
            color_column,
            color_order,
            categorical_columns,
        )

        if color_column is None:
            colors = colors[0]

        self._set_categorical_axis_default_factors(vertical, factors)
        self._set_categorical_axis_default_range(vertical, data_frame, numeric_column)

        bar_width = self._get_bar_width(factors)

        if color_column:
            legend = bokeh.core.properties.field("color_column")
            legend = "color_column"
        else:
            legend = None

        if vertical:
            self._plot_with_legend(
                self._chart.figure.vbar,
                legend_group=None,
                x="factors",
                width=bar_width,
                top="q2",
                bottom="q1",
                line_color="white",
                source=source_low,
                fill_color=colors,
            )
            self._plot_with_legend(
                self._chart.figure.vbar,
                legend_group=legend,
                x="factors",
                width=bar_width,
                top="q3",
                bottom="q2",
                line_color="white",
                source=source_high,
                fill_color=colors,
            )

        else:
            self._plot_with_legend(
                self._chart.figure.hbar,
                legend_group=None,
                y="factors",
                height=bar_width,
                right="q2",
                left="q1",
                line_color="white",
                source=source_low,
                fill_color=colors,
            )
            self._plot_with_legend(
                self._chart.figure.hbar,
                legend_group=legend,
                y="factors",
                height=bar_width,
                right="q3",
                left="q2",
                line_color="white",
                source=source_high,
                fill_color=colors,
            )

        # outliers
        factors = outliers.set_index(categorical_columns).index
        outliers = outliers[[col for col in outliers.columns if col == numeric_column]]

        source_outliers = self._named_column_data_source(outliers, series_name=None)
        source_outliers.add(factors, "factors")

        if vertical:
            x_value, y_value = "factors", numeric_column
        else:
            y_value, x_value = "factors", numeric_column

        self._plot_with_legend(
            self._chart.figure.scatter,
            legend_label=None,
            x=x_value,
            y=y_value,
            size=outlier_size,
            fill_color=outlier_color,
            line_color=outlier_color,
            source=source_outliers,
            marker=outlier_marker,
            alpha=outlier_alpha,
        )

        return self._chart