Source code for plotting

"""
This module creates plots for visualizing sensitivity analysis dataframes.

`make_plot()` creates a radial plot of the first and total order indices.

`make_second_order_heatmap()` creates a square heat map showing the second
order interactions between model parameters.

"""
from collections import OrderedDict

import numpy as np
import pandas as pd

from bokeh.plotting import figure, ColumnDataSource
from bokeh.models import HoverTool
from bokeh.charts import Bar


[docs]def make_plot(dataframe=pd.DataFrame(), highlight=[],
              top=100, minvalues=0.01, stacked=True, lgaxis=True,
              errorbar=True, showS1=True, showST=True):
    """
    Basic method to plot first and total order sensitivity indices.

    This is the method to generate a Bokeh plot similar to the burtin example
    template at the Bokeh website. For clarification, parameters refer to an
    input being measured (Tmax, C, k2, etc.) and stats refer to the 1st or
    total order sensitivity index.

    Parameters
    -----------
    dataframe  : pandas dataframe
                 Dataframe containing sensitivity analysis results to be
                 plotted.
    highlight  : lst, optional
                 List of strings indicating which parameter wedges will be
                 highlighted.
    top        : int, optional
                 Integer indicating the number of parameters to display
                 (highest sensitivity values) (after minimum cutoff is
                 applied).
    minvalues  : float, optional
                 Cutoff minimum for which parameters should be plotted.
                 Applies to total order only.
    stacked    : bool, optional
                 Boolean indicating in bars should be stacked for each
                 parameter (True) or unstacked (False).
    lgaxis     : bool, optional
                 Boolean indicating if log axis should be used (True) or if a
                 linear axis should be used (False).
    errorbar   : bool, optional
                 Boolean indicating if error bars are shown (True) or are
                 omitted (False).
    showS1     : bool, optional
                 Boolean indicating whether 1st order sensitivity indices
                 will be plotted (True) or omitted (False).
    showST     : bool, optional
                 Boolean indicating whether total order sensitivity indices
                 will be plotted (True) or omitted (False).

                 **Note if showS1 and showST are both false, the plot will
                 default to showing ST data only instead of a blank plot**

    Returns
    --------
    p : bokeh figure
        A Bokeh figure of the data to be plotted
    """

    df = dataframe
    top = int(top)
    # Initialize boolean checks and check dataframe structure
    if (('S1' not in df) or ('ST' not in df) or ('Parameter' not in df) or
       ('ST_conf' not in df) or ('S1_conf' not in df)):
        raise Exception('Dataframe not formatted correctly')

    # Remove rows which have values less than cutoff values
    df = df[df['ST'] > minvalues]
    df = df.dropna()

    # Only keep top values indicated by variable top
    df = df.sort_values('ST', ascending=False)
    df = df.head(top)
    df = df.reset_index(drop=True)

    # Create arrays of colors and order labels for plotting
    colors = ["#a1d99b", "#31a354", "#546775", "#225ea8"]
    s1color = np.array(["#31a354"]*df.S1.size)
    sTcolor = np.array(["#a1d99b"]*df.ST.size)
    errs1color = np.array(["#225ea8"]*df.S1.size)
    errsTcolor = np.array(["#546775"]*df.ST.size)
    firstorder = np.array(["1st (S1)"]*df.S1.size)
    totalorder = np.array(["Total (ST)"]*df.S1.size)

    # Add column indicating which parameters should be highlighted
    tohighlight = df.Parameter.isin(highlight)
    df['highlighted'] = tohighlight

    back_color = {
                  True: "#aeaeb8",
                  False: "#e6e6e6",
                 }
    # Switch to bar chart if dataframe shrinks below 5 parameters
    if len(df) <= 5:
        if stacked is False:
            data = {
                    'Sensitivity': pd.Series.append(df.ST, df.S1),
                    'Parameter': pd.Series.append(df.Parameter, df.Parameter),
                    'Order': np.append(np.array(['ST']*len(df)),
                                       np.array(['S1']*len(df))),
                    'Confidence': pd.Series.append(df.ST_conf,
                                                   df.S1_conf)
                    }
            p = Bar(data, values='Sensitivity', label='Parameter',
                    group='Order', legend='top_right',
                    color=["#31a354", "#a1d99b"], ylabel='Sensitivity Indices')
        else:
            data = {
                    'Sensitivity': pd.Series.append(df.S1, (df.ST-df.S1)),
                    'Parameter': pd.Series.append(df.Parameter, df.Parameter),
                    'Order': np.append(np.array(['S1']*len(df)),
                                       np.array(['ST']*len(df))),
                    'Confidence': pd.Series.append(df.S1_conf,
                                                   df.ST_conf)
                    }
            p = Bar(data, values='Sensitivity', label='Parameter',
                    color='Order', legend='top_right',
                    stack='Order', palette=["#31a354", "#a1d99b"],
                    ylabel='Sensitivity Indices')

        return p

    # Create Dictionary of colors
    stat_color = OrderedDict()
    error_color = OrderedDict()
    for i in range(0, 2):
        stat_color[i] = colors[i]
    # Reset index of dataframe.
    for i in range(2, 4):
        error_color[i] = colors[i]

    # Sizing parameters
    width = 800
    height = 800
    inner_radius = 90
    outer_radius = 300 - 10

    # Determine wedge size based off number of parameters
    big_angle = 2.0 * np.pi / (len(df)+1)
    # Determine division of wedges for plotting bars based on # stats plotted
    # for stacked or unstacked bars
    if stacked is False:
        small_angle = big_angle / 5
    else:
        small_angle = big_angle / 3
    # tools enabled for bokeh figure
    plottools = "hover, wheel_zoom, save, reset, resize"  # , tap"
    # Initialize figure with tools, coloring, etc.
    p = figure(plot_width=width, plot_height=height, title="",
               x_axis_type=None, y_axis_type=None,
               x_range=(-350, 350), y_range=(-350, 350),
               min_border=0, outline_line_color="#e6e6e6",
               background_fill_color="#e6e6e6", border_fill_color="#e6e6e6",
               tools=plottools)
    # Specify labels for hover tool
    hover = p.select(dict(type=HoverTool))
    hover.tooltips = [("Order", "@Order"), ("Parameter", "@Param"),
                      ("Sensitivity", "@Sens"), ("Confidence", "@Conf")]
    hover.point_policy = "follow_mouse"

    p.xgrid.grid_line_color = None
    p.ygrid.grid_line_color = None

    # annular wedges divided into smaller sections for bars
    # Angles for axial line placement
    num_lines = np.arange(0, len(df)+1, 1)
    line_angles = np.pi/2 - big_angle/2 - num_lines*big_angle

    # Angles for data placement
    angles = np.pi/2 - big_angle/2 - df.index.to_series()*big_angle

    # circular axes and labels
    minlabel = min(round(np.log10(min(df.ST))), round(np.log10(min(df.S1))))
    labels = np.power(10.0, np.arange(0, minlabel-1, -1))

    # Set max radial line to correspond to 1.1 * maximum value + error
    maxvalST = max(df.ST+df.ST_conf)
    maxvalS1 = max(df.S1+df.S1_conf)
    maxval = max(maxvalST, maxvalS1)
    labels = np.append(labels, 0.0)
    labels[0] = round(1.1*maxval, 1)

    # Determine if radial axis are log or linearly scaled
    if lgaxis is True:
            radii = (((np.log10(labels / labels[0])) +
                     labels.size) * (outer_radius - inner_radius) /
                     labels.size + inner_radius)
            radii[-1] = inner_radius
    else:
        labels = np.delete(labels, -2)
        radii = (outer_radius - inner_radius)*labels/labels[0] + inner_radius

    # Convert sensitivity values to the plotted values
    # Same conversion as for the labels above
    # Also calculate the angle to which the bars are placed
    # Add values to the dataframe for future reference
    cols = np.array(['S1', 'ST'])
    for statistic in range(0, 2):
        if lgaxis is True:
            radius_of_stat = (((np.log10(df[cols[statistic]] / labels[0])) +
                              labels.size) * (outer_radius - inner_radius) /
                              labels.size + inner_radius)
            lower_of_stat = (((np.log10((df[cols[statistic]] -
                               df[cols[statistic]+'_conf']) / labels[0])) +
                              labels.size) * (outer_radius - inner_radius) /
                             labels.size + inner_radius)
            higher_of_stat = (((np.log10((df[cols[statistic]] +
                               df[cols[statistic]+'_conf']) / labels[0])) +
                              labels.size) * (outer_radius - inner_radius) /
                              labels.size + inner_radius)
        else:

            radius_of_stat = ((outer_radius - inner_radius) *
                              df[cols[statistic]]/labels[0] + inner_radius)
            lower_of_stat = ((outer_radius - inner_radius) *
                             (df[cols[statistic]] -
                             df[cols[statistic]+'_conf'])/labels[0] +
                             inner_radius)
            higher_of_stat = ((outer_radius - inner_radius) *
                              ((df[cols[statistic]] +
                               df[cols[statistic]+'_conf'])/labels[0]) +
                              inner_radius)

        if stacked is False:
            startA = -big_angle + angles + (2*statistic + 1)*small_angle
            stopA = -big_angle + angles + (2*statistic + 2)*small_angle
            df[cols[statistic]+'_err_angle'] = pd.Series((startA+stopA)/2,
                                                         index=df.index)
        else:
            startA = -big_angle + angles + (1)*small_angle
            stopA = -big_angle + angles + (2)*small_angle
            if statistic == 0:
                df[cols[statistic]+'_err_angle'] = pd.Series((startA*2 +
                                                              stopA)/3,
                                                             index=df.index)
            if statistic == 1:
                df[cols[statistic]+'_err_angle'] = pd.Series((startA +
                                                              stopA*2)/3,
                                                             index=df.index)
        df[cols[statistic]+'radial'] = pd.Series(radius_of_stat,
                                                 index=df.index)
        df[cols[statistic]+'upper'] = pd.Series(higher_of_stat,
                                                index=df.index)
        df[cols[statistic]+'lower'] = pd.Series(lower_of_stat,
                                                index=df.index)
        df[cols[statistic]+'_start_angle'] = pd.Series(startA,
                                                       index=df.index)
        df[cols[statistic]+'_stop_angle'] = pd.Series(stopA,
                                                      index=df.index)

        # df[cols[statistic]+'_err_angle'] = pd.Series((startA+stopA)/2,
        #                                              index=df.index)
        inner_rad = np.ones_like(angles)*inner_radius
        df[cols[statistic]+'lower'] = df[cols[statistic]+'lower'].fillna(90)
    # Store plotted values into dictionary to be add glyphs
    pdata = pd.DataFrame({
                         'x': np.append(np.zeros_like(inner_rad),
                                        np.zeros_like(inner_rad)),
                         'y': np.append(np.zeros_like(inner_rad),
                                        np.zeros_like(inner_rad)),
                         'ymin': np.append(inner_rad, inner_rad),
                         'ymax': pd.Series.append(df[cols[1]+'radial'],
                                                  df[cols[0]+'radial']
                                                  ).reset_index(drop=True),
                         'starts': pd.Series.append(df[cols[1] +
                                                    '_start_angle'],
                                                    df[cols[0] +
                                                    '_start_angle']
                                                    ).reset_index(drop=True),
                         'stops': pd.Series.append(df[cols[1] +
                                                      '_stop_angle'],
                                                   df[cols[0] +
                                                      '_stop_angle']
                                                   ).reset_index(drop=True),
                         'Param': pd.Series.append(df.Parameter,
                                                       df.Parameter
                                                   ).reset_index(drop=True),
                         'Colors': np.append(sTcolor, s1color),
                         'Error Colors': np.append(errsTcolor, errs1color),
                         'Conf': pd.Series.append(df.ST_conf,
                                                        df.S1_conf
                                                  ).reset_index(drop=True),
                         'Order': np.append(totalorder, firstorder),
                         'Sens': pd.Series.append(df.ST, df.S1
                                                  ).reset_index(drop=True),
                         'Lower': pd.Series.append(df.STlower,
                                                   df.S1lower
                                                   ).reset_index(drop=True),
                         'Upper': pd.Series.append(df.STupper,
                                                   df.S1upper,
                                                   ).reset_index(drop=True),
                         'Err_Angle': pd.Series.append(df.ST_err_angle,
                                                       df.S1_err_angle,
                                                       ).reset_index(drop=True)
                         })
    # removed S1 or ST values if indicated by input
    if showS1 is False:
        pdata = pdata.head(len(df))
    if showST is False:
        pdata = pdata.tail(len(df))
    # convert dataframe to ColumnDataSource for glyphs
    pdata_s = ColumnDataSource(pdata)

    colors = [back_color[highl] for highl in df.highlighted]
    p.annular_wedge(
                    0, 0, inner_radius, outer_radius, -big_angle+angles,
                    angles, color=colors,
                    )
    # Adding axis lines and labels
    p.circle(0, 0, radius=radii, fill_color=None, line_color="white")
    p.text(0, radii[:], [str(r) for r in labels[:]],
           text_font_size="8pt", text_align="center", text_baseline="middle")

    # Specify that the plotted bars are the only thing to activate hovertool
    hoverable = p.annular_wedge(x='x', y='y', inner_radius='ymin',
                                outer_radius='ymax',
                                start_angle='starts',
                                end_angle='stops',
                                color='Colors',
                                source=pdata_s
                                )
    hover.renderers = [hoverable]

    # Add error bars
    if errorbar is True:
        p.annular_wedge(0, 0, pdata['Lower'], pdata['Upper'],
                        pdata['Err_Angle'],
                        pdata['Err_Angle'],
                        color=pdata['Error Colors'], line_width=1.0)

        p.annular_wedge(0, 0, pdata['Lower'], pdata['Lower'],
                        pdata['starts'],
                        pdata['stops'],
                        color=pdata['Error Colors'], line_width=2.0)

        p.annular_wedge(0, 0, pdata['Upper'], pdata['Upper'],
                        pdata['starts'],
                        pdata['stops'],
                        color=pdata['Error Colors'], line_width=2.0)
    # Placement of parameter labels
    xr = (radii[0]*1.1)*np.cos(np.array(-big_angle/2 + angles))
    yr = (radii[0]*1.1)*np.sin(np.array(-big_angle/2 + angles))

    label_angle = np.array(-big_angle/2+angles)
    label_angle[label_angle < -np.pi/2] += np.pi

    # Placing Labels and Legend
    legend_text = ['ST', 'ST Conf', 'S1', 'S1 Conf']
    p.text(xr, yr, df.Parameter, angle=label_angle,
           text_font_size="9pt", text_align="center", text_baseline="middle")

    p.rect([-40, -40], [30, -10], width=30, height=13,
           color=list(stat_color.values()))
    p.rect([-40, -40], [10, -30], width=30, height=1,
           color=list(error_color.values()))
    p.text([-15, -15, -15, -15], [30, 10, -10, -30], text=legend_text,
           text_font_size="9pt", text_align="left", text_baseline="middle")
    p.annular_wedge(0, 0, inner_radius-10, outer_radius+10,
                    -big_angle+line_angles, -big_angle+line_angles,
                    color="#999999")

    return p


[docs]def make_second_order_heatmap(df, top=10, name='', mirror=True, include=[]):
    """
    Plot a heat map of the second order sensitivity indices from a given
    dataframe.  If you are choosing a high value of `top` then making
    this plot gets expensive and it is recommended to set mirror to False.

    Parameters
    -----------
    df     : pandas dataframe
             dataframe with second order sensitivity indices. This
             dataframe should be formatted in the standard output format
             from a Sobol sensitivity analysis in SALib.
    top    : int, optional
             integer specifying the number of parameter interactions to
             plot (those with the 'top' greatest values are displayed).
    name   : str, optional
             string indicating the name of the output measure
             you are plotting.
    mirror : bool, optional
             boolean indicating whether you would like to plot the mirror
             image (reflection across the diagonal).  This mirror image
             contains the same information as plotted already, but will
             increase the computation time for large dataframes.
    include: list, optional
             a list of parameters that you would like to make sure are shown
             on the heat map (even if they are not in the `top` subset)

    Returns
    --------
    p : bokeh figure
        A Bokeh figure to be plotted
    """

    # Confirm that df contains second order sensitivity indices
    if (list(df.columns.values) !=
            ['Parameter_1', 'Parameter_2', 'S2', 'S2_conf']):
        raise TypeError('df must contain second order sensitivity data')

    # Make sure `top` != 0 (it must be at least 1, even if a list is
    # specified for `include`.
    if top <= 0:
        top = 1
        print '`top` cannot be <= 0; it has been set to 1'

    # Colormap to use for plot
    colors = ["#f7fbff", "#deebf7", "#c6dbef", "#9ecae1", "#6baed6",
              "#4292c6", "#2171b5", "#08519c", "#08306b"]

    # Slice the dataframe to include only the top parameters
    df_top = df.sort_values('S2', ascending=False).head(top)

    # Make a list of all the parameters that interact with each other
    labels = list(set(
        [x for x in pd.concat([df_top.Parameter_1, df_top.Parameter_2])]))
    for item in include:
        if item not in labels:
            labels.append(item)
    xlabels = labels
    ylabels = labels

    # Use this to scale the heat map so the max sensitivity index is darkest
    maxval = np.max(df.S2)

    xlabel = []
    ylabel = []
    color = []
    s2 = []
    s2_conf = []
    for px in xlabels:
        for py in ylabels:
            xlabel.append(px)
            ylabel.append(py)
            # sens is a dataframe with S2 and S2_conf that is stored for
            # each box of the heat map
            sens = (df[df.Parameter_1.isin([px]) & df.Parameter_2.isin([py])]
                    .ix[:, ['S2', 'S2_conf']])
            # dfs can be empty if there are no corresponding pairs in the
            # source dataframe (for example a parameter interacting with
            # itself).
            if sens.empty and not mirror:
                s2.append(float('NaN'))
                s2_conf.append(float('NaN'))
                color.append("#b3b3b3")
            # This heat map is symmetric across the diagonal, so this elif
            # statement populates the mirror image if you've chosen to
            elif sens.empty and mirror:
                sens_mirror = (df[df.Parameter_1.isin([py]) &
                                  df.Parameter_2.isin([px])]
                               .ix[:, ['S2', 'S2_conf']])
                if sens_mirror.empty:
                    s2.append(float('NaN'))
                    s2_conf.append(float('NaN'))
                    color.append("#b3b3b3")
                else:
                    s2.append(sens_mirror.S2.values[0])
                    s2_conf.append(sens_mirror.S2_conf.values[0])
                    color.append(colors[int(round((sens_mirror.S2.values[0] /
                                                   maxval) * 7) + 1)])
            # This else handles the standard (un-mirrored) boxes of the plot
            else:
                s2.append(sens.S2.values[0])
                s2_conf.append(sens.S2_conf.values[0])
                color.append(colors[int(round((sens.S2.values[0] /
                                               maxval) * 7) + 1)])

    source = ColumnDataSource(data=dict(xlabel=xlabel, ylabel=ylabel, s2=s2,
                              s2_conf=s2_conf, color=color))

    # Initialize the plot
    plottools = "resize, hover, save, pan, box_zoom, wheel_zoom, reset"
    p = figure(title="%s second order sensitivities" % name,
               x_range=list(reversed(labels)), y_range=labels,
               x_axis_location="above", plot_width=700, plot_height=700,
               toolbar_location="right", tools=plottools)
    p.grid.grid_line_color = None
    p.axis.axis_line_color = None
    p.axis.major_tick_line_color = None
    p.axis.major_label_text_font_size = "8pt"
    p.axis.major_label_standoff = 0
    p.xaxis.major_label_orientation = 1.1

    # Plot the second order data
    p.rect("xlabel", "ylabel", 1, 1, source=source,
           color="color", line_color=None)

    p.select_one(HoverTool).tooltips = [
        ('Interaction', '@xlabel-@ylabel'),
        ('S2', '@s2'),
        ('S2_conf', '@s2_conf'),
        ]

    return p