"""
This module creates plots for visualizing sensitivity analysis dataframes.
`make_plot()` creates a radial plot of the first and total order indices.
`make_second_order_heatmap()` creates a square heat map showing the second
order interactions between model parameters.
"""
from collections import OrderedDict
import numpy as np
import pandas as pd
from bokeh.plotting import figure, ColumnDataSource
from bokeh.models import HoverTool
from bokeh.charts import Bar
[docs]def make_plot(dataframe=pd.DataFrame(), highlight=[],
top=100, minvalues=0.01, stacked=True, lgaxis=True,
errorbar=True, showS1=True, showST=True):
"""
Basic method to plot first and total order sensitivity indices.
This is the method to generate a Bokeh plot similar to the burtin example
template at the Bokeh website. For clarification, parameters refer to an
input being measured (Tmax, C, k2, etc.) and stats refer to the 1st or
total order sensitivity index.
Parameters
-----------
dataframe : pandas dataframe
Dataframe containing sensitivity analysis results to be
plotted.
highlight : lst, optional
List of strings indicating which parameter wedges will be
highlighted.
top : int, optional
Integer indicating the number of parameters to display
(highest sensitivity values) (after minimum cutoff is
applied).
minvalues : float, optional
Cutoff minimum for which parameters should be plotted.
Applies to total order only.
stacked : bool, optional
Boolean indicating in bars should be stacked for each
parameter (True) or unstacked (False).
lgaxis : bool, optional
Boolean indicating if log axis should be used (True) or if a
linear axis should be used (False).
errorbar : bool, optional
Boolean indicating if error bars are shown (True) or are
omitted (False).
showS1 : bool, optional
Boolean indicating whether 1st order sensitivity indices
will be plotted (True) or omitted (False).
showST : bool, optional
Boolean indicating whether total order sensitivity indices
will be plotted (True) or omitted (False).
**Note if showS1 and showST are both false, the plot will
default to showing ST data only instead of a blank plot**
Returns
--------
p : bokeh figure
A Bokeh figure of the data to be plotted
"""
df = dataframe
top = int(top)
# Initialize boolean checks and check dataframe structure
if (('S1' not in df) or ('ST' not in df) or ('Parameter' not in df) or
('ST_conf' not in df) or ('S1_conf' not in df)):
raise Exception('Dataframe not formatted correctly')
# Remove rows which have values less than cutoff values
df = df[df['ST'] > minvalues]
df = df.dropna()
# Only keep top values indicated by variable top
df = df.sort_values('ST', ascending=False)
df = df.head(top)
df = df.reset_index(drop=True)
# Create arrays of colors and order labels for plotting
colors = ["#a1d99b", "#31a354", "#546775", "#225ea8"]
s1color = np.array(["#31a354"]*df.S1.size)
sTcolor = np.array(["#a1d99b"]*df.ST.size)
errs1color = np.array(["#225ea8"]*df.S1.size)
errsTcolor = np.array(["#546775"]*df.ST.size)
firstorder = np.array(["1st (S1)"]*df.S1.size)
totalorder = np.array(["Total (ST)"]*df.S1.size)
# Add column indicating which parameters should be highlighted
tohighlight = df.Parameter.isin(highlight)
df['highlighted'] = tohighlight
back_color = {
True: "#aeaeb8",
False: "#e6e6e6",
}
# Switch to bar chart if dataframe shrinks below 5 parameters
if len(df) <= 5:
if stacked is False:
data = {
'Sensitivity': pd.Series.append(df.ST, df.S1),
'Parameter': pd.Series.append(df.Parameter, df.Parameter),
'Order': np.append(np.array(['ST']*len(df)),
np.array(['S1']*len(df))),
'Confidence': pd.Series.append(df.ST_conf,
df.S1_conf)
}
p = Bar(data, values='Sensitivity', label='Parameter',
group='Order', legend='top_right',
color=["#31a354", "#a1d99b"], ylabel='Sensitivity Indices')
else:
data = {
'Sensitivity': pd.Series.append(df.S1, (df.ST-df.S1)),
'Parameter': pd.Series.append(df.Parameter, df.Parameter),
'Order': np.append(np.array(['S1']*len(df)),
np.array(['ST']*len(df))),
'Confidence': pd.Series.append(df.S1_conf,
df.ST_conf)
}
p = Bar(data, values='Sensitivity', label='Parameter',
color='Order', legend='top_right',
stack='Order', palette=["#31a354", "#a1d99b"],
ylabel='Sensitivity Indices')
return p
# Create Dictionary of colors
stat_color = OrderedDict()
error_color = OrderedDict()
for i in range(0, 2):
stat_color[i] = colors[i]
# Reset index of dataframe.
for i in range(2, 4):
error_color[i] = colors[i]
# Sizing parameters
width = 800
height = 800
inner_radius = 90
outer_radius = 300 - 10
# Determine wedge size based off number of parameters
big_angle = 2.0 * np.pi / (len(df)+1)
# Determine division of wedges for plotting bars based on # stats plotted
# for stacked or unstacked bars
if stacked is False:
small_angle = big_angle / 5
else:
small_angle = big_angle / 3
# tools enabled for bokeh figure
plottools = "hover, wheel_zoom, save, reset, resize" # , tap"
# Initialize figure with tools, coloring, etc.
p = figure(plot_width=width, plot_height=height, title="",
x_axis_type=None, y_axis_type=None,
x_range=(-350, 350), y_range=(-350, 350),
min_border=0, outline_line_color="#e6e6e6",
background_fill_color="#e6e6e6", border_fill_color="#e6e6e6",
tools=plottools)
# Specify labels for hover tool
hover = p.select(dict(type=HoverTool))
hover.tooltips = [("Order", "@Order"), ("Parameter", "@Param"),
("Sensitivity", "@Sens"), ("Confidence", "@Conf")]
hover.point_policy = "follow_mouse"
p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None
# annular wedges divided into smaller sections for bars
# Angles for axial line placement
num_lines = np.arange(0, len(df)+1, 1)
line_angles = np.pi/2 - big_angle/2 - num_lines*big_angle
# Angles for data placement
angles = np.pi/2 - big_angle/2 - df.index.to_series()*big_angle
# circular axes and labels
minlabel = min(round(np.log10(min(df.ST))), round(np.log10(min(df.S1))))
labels = np.power(10.0, np.arange(0, minlabel-1, -1))
# Set max radial line to correspond to 1.1 * maximum value + error
maxvalST = max(df.ST+df.ST_conf)
maxvalS1 = max(df.S1+df.S1_conf)
maxval = max(maxvalST, maxvalS1)
labels = np.append(labels, 0.0)
labels[0] = round(1.1*maxval, 1)
# Determine if radial axis are log or linearly scaled
if lgaxis is True:
radii = (((np.log10(labels / labels[0])) +
labels.size) * (outer_radius - inner_radius) /
labels.size + inner_radius)
radii[-1] = inner_radius
else:
labels = np.delete(labels, -2)
radii = (outer_radius - inner_radius)*labels/labels[0] + inner_radius
# Convert sensitivity values to the plotted values
# Same conversion as for the labels above
# Also calculate the angle to which the bars are placed
# Add values to the dataframe for future reference
cols = np.array(['S1', 'ST'])
for statistic in range(0, 2):
if lgaxis is True:
radius_of_stat = (((np.log10(df[cols[statistic]] / labels[0])) +
labels.size) * (outer_radius - inner_radius) /
labels.size + inner_radius)
lower_of_stat = (((np.log10((df[cols[statistic]] -
df[cols[statistic]+'_conf']) / labels[0])) +
labels.size) * (outer_radius - inner_radius) /
labels.size + inner_radius)
higher_of_stat = (((np.log10((df[cols[statistic]] +
df[cols[statistic]+'_conf']) / labels[0])) +
labels.size) * (outer_radius - inner_radius) /
labels.size + inner_radius)
else:
radius_of_stat = ((outer_radius - inner_radius) *
df[cols[statistic]]/labels[0] + inner_radius)
lower_of_stat = ((outer_radius - inner_radius) *
(df[cols[statistic]] -
df[cols[statistic]+'_conf'])/labels[0] +
inner_radius)
higher_of_stat = ((outer_radius - inner_radius) *
((df[cols[statistic]] +
df[cols[statistic]+'_conf'])/labels[0]) +
inner_radius)
if stacked is False:
startA = -big_angle + angles + (2*statistic + 1)*small_angle
stopA = -big_angle + angles + (2*statistic + 2)*small_angle
df[cols[statistic]+'_err_angle'] = pd.Series((startA+stopA)/2,
index=df.index)
else:
startA = -big_angle + angles + (1)*small_angle
stopA = -big_angle + angles + (2)*small_angle
if statistic == 0:
df[cols[statistic]+'_err_angle'] = pd.Series((startA*2 +
stopA)/3,
index=df.index)
if statistic == 1:
df[cols[statistic]+'_err_angle'] = pd.Series((startA +
stopA*2)/3,
index=df.index)
df[cols[statistic]+'radial'] = pd.Series(radius_of_stat,
index=df.index)
df[cols[statistic]+'upper'] = pd.Series(higher_of_stat,
index=df.index)
df[cols[statistic]+'lower'] = pd.Series(lower_of_stat,
index=df.index)
df[cols[statistic]+'_start_angle'] = pd.Series(startA,
index=df.index)
df[cols[statistic]+'_stop_angle'] = pd.Series(stopA,
index=df.index)
# df[cols[statistic]+'_err_angle'] = pd.Series((startA+stopA)/2,
# index=df.index)
inner_rad = np.ones_like(angles)*inner_radius
df[cols[statistic]+'lower'] = df[cols[statistic]+'lower'].fillna(90)
# Store plotted values into dictionary to be add glyphs
pdata = pd.DataFrame({
'x': np.append(np.zeros_like(inner_rad),
np.zeros_like(inner_rad)),
'y': np.append(np.zeros_like(inner_rad),
np.zeros_like(inner_rad)),
'ymin': np.append(inner_rad, inner_rad),
'ymax': pd.Series.append(df[cols[1]+'radial'],
df[cols[0]+'radial']
).reset_index(drop=True),
'starts': pd.Series.append(df[cols[1] +
'_start_angle'],
df[cols[0] +
'_start_angle']
).reset_index(drop=True),
'stops': pd.Series.append(df[cols[1] +
'_stop_angle'],
df[cols[0] +
'_stop_angle']
).reset_index(drop=True),
'Param': pd.Series.append(df.Parameter,
df.Parameter
).reset_index(drop=True),
'Colors': np.append(sTcolor, s1color),
'Error Colors': np.append(errsTcolor, errs1color),
'Conf': pd.Series.append(df.ST_conf,
df.S1_conf
).reset_index(drop=True),
'Order': np.append(totalorder, firstorder),
'Sens': pd.Series.append(df.ST, df.S1
).reset_index(drop=True),
'Lower': pd.Series.append(df.STlower,
df.S1lower
).reset_index(drop=True),
'Upper': pd.Series.append(df.STupper,
df.S1upper,
).reset_index(drop=True),
'Err_Angle': pd.Series.append(df.ST_err_angle,
df.S1_err_angle,
).reset_index(drop=True)
})
# removed S1 or ST values if indicated by input
if showS1 is False:
pdata = pdata.head(len(df))
if showST is False:
pdata = pdata.tail(len(df))
# convert dataframe to ColumnDataSource for glyphs
pdata_s = ColumnDataSource(pdata)
colors = [back_color[highl] for highl in df.highlighted]
p.annular_wedge(
0, 0, inner_radius, outer_radius, -big_angle+angles,
angles, color=colors,
)
# Adding axis lines and labels
p.circle(0, 0, radius=radii, fill_color=None, line_color="white")
p.text(0, radii[:], [str(r) for r in labels[:]],
text_font_size="8pt", text_align="center", text_baseline="middle")
# Specify that the plotted bars are the only thing to activate hovertool
hoverable = p.annular_wedge(x='x', y='y', inner_radius='ymin',
outer_radius='ymax',
start_angle='starts',
end_angle='stops',
color='Colors',
source=pdata_s
)
hover.renderers = [hoverable]
# Add error bars
if errorbar is True:
p.annular_wedge(0, 0, pdata['Lower'], pdata['Upper'],
pdata['Err_Angle'],
pdata['Err_Angle'],
color=pdata['Error Colors'], line_width=1.0)
p.annular_wedge(0, 0, pdata['Lower'], pdata['Lower'],
pdata['starts'],
pdata['stops'],
color=pdata['Error Colors'], line_width=2.0)
p.annular_wedge(0, 0, pdata['Upper'], pdata['Upper'],
pdata['starts'],
pdata['stops'],
color=pdata['Error Colors'], line_width=2.0)
# Placement of parameter labels
xr = (radii[0]*1.1)*np.cos(np.array(-big_angle/2 + angles))
yr = (radii[0]*1.1)*np.sin(np.array(-big_angle/2 + angles))
label_angle = np.array(-big_angle/2+angles)
label_angle[label_angle < -np.pi/2] += np.pi
# Placing Labels and Legend
legend_text = ['ST', 'ST Conf', 'S1', 'S1 Conf']
p.text(xr, yr, df.Parameter, angle=label_angle,
text_font_size="9pt", text_align="center", text_baseline="middle")
p.rect([-40, -40], [30, -10], width=30, height=13,
color=list(stat_color.values()))
p.rect([-40, -40], [10, -30], width=30, height=1,
color=list(error_color.values()))
p.text([-15, -15, -15, -15], [30, 10, -10, -30], text=legend_text,
text_font_size="9pt", text_align="left", text_baseline="middle")
p.annular_wedge(0, 0, inner_radius-10, outer_radius+10,
-big_angle+line_angles, -big_angle+line_angles,
color="#999999")
return p
[docs]def make_second_order_heatmap(df, top=10, name='', mirror=True, include=[]):
"""
Plot a heat map of the second order sensitivity indices from a given
dataframe. If you are choosing a high value of `top` then making
this plot gets expensive and it is recommended to set mirror to False.
Parameters
-----------
df : pandas dataframe
dataframe with second order sensitivity indices. This
dataframe should be formatted in the standard output format
from a Sobol sensitivity analysis in SALib.
top : int, optional
integer specifying the number of parameter interactions to
plot (those with the 'top' greatest values are displayed).
name : str, optional
string indicating the name of the output measure
you are plotting.
mirror : bool, optional
boolean indicating whether you would like to plot the mirror
image (reflection across the diagonal). This mirror image
contains the same information as plotted already, but will
increase the computation time for large dataframes.
include: list, optional
a list of parameters that you would like to make sure are shown
on the heat map (even if they are not in the `top` subset)
Returns
--------
p : bokeh figure
A Bokeh figure to be plotted
"""
# Confirm that df contains second order sensitivity indices
if (list(df.columns.values) !=
['Parameter_1', 'Parameter_2', 'S2', 'S2_conf']):
raise TypeError('df must contain second order sensitivity data')
# Make sure `top` != 0 (it must be at least 1, even if a list is
# specified for `include`.
if top <= 0:
top = 1
print '`top` cannot be <= 0; it has been set to 1'
# Colormap to use for plot
colors = ["#f7fbff", "#deebf7", "#c6dbef", "#9ecae1", "#6baed6",
"#4292c6", "#2171b5", "#08519c", "#08306b"]
# Slice the dataframe to include only the top parameters
df_top = df.sort_values('S2', ascending=False).head(top)
# Make a list of all the parameters that interact with each other
labels = list(set(
[x for x in pd.concat([df_top.Parameter_1, df_top.Parameter_2])]))
for item in include:
if item not in labels:
labels.append(item)
xlabels = labels
ylabels = labels
# Use this to scale the heat map so the max sensitivity index is darkest
maxval = np.max(df.S2)
xlabel = []
ylabel = []
color = []
s2 = []
s2_conf = []
for px in xlabels:
for py in ylabels:
xlabel.append(px)
ylabel.append(py)
# sens is a dataframe with S2 and S2_conf that is stored for
# each box of the heat map
sens = (df[df.Parameter_1.isin([px]) & df.Parameter_2.isin([py])]
.ix[:, ['S2', 'S2_conf']])
# dfs can be empty if there are no corresponding pairs in the
# source dataframe (for example a parameter interacting with
# itself).
if sens.empty and not mirror:
s2.append(float('NaN'))
s2_conf.append(float('NaN'))
color.append("#b3b3b3")
# This heat map is symmetric across the diagonal, so this elif
# statement populates the mirror image if you've chosen to
elif sens.empty and mirror:
sens_mirror = (df[df.Parameter_1.isin([py]) &
df.Parameter_2.isin([px])]
.ix[:, ['S2', 'S2_conf']])
if sens_mirror.empty:
s2.append(float('NaN'))
s2_conf.append(float('NaN'))
color.append("#b3b3b3")
else:
s2.append(sens_mirror.S2.values[0])
s2_conf.append(sens_mirror.S2_conf.values[0])
color.append(colors[int(round((sens_mirror.S2.values[0] /
maxval) * 7) + 1)])
# This else handles the standard (un-mirrored) boxes of the plot
else:
s2.append(sens.S2.values[0])
s2_conf.append(sens.S2_conf.values[0])
color.append(colors[int(round((sens.S2.values[0] /
maxval) * 7) + 1)])
source = ColumnDataSource(data=dict(xlabel=xlabel, ylabel=ylabel, s2=s2,
s2_conf=s2_conf, color=color))
# Initialize the plot
plottools = "resize, hover, save, pan, box_zoom, wheel_zoom, reset"
p = figure(title="%s second order sensitivities" % name,
x_range=list(reversed(labels)), y_range=labels,
x_axis_location="above", plot_width=700, plot_height=700,
toolbar_location="right", tools=plottools)
p.grid.grid_line_color = None
p.axis.axis_line_color = None
p.axis.major_tick_line_color = None
p.axis.major_label_text_font_size = "8pt"
p.axis.major_label_standoff = 0
p.xaxis.major_label_orientation = 1.1
# Plot the second order data
p.rect("xlabel", "ylabel", 1, 1, source=source,
color="color", line_color=None)
p.select_one(HoverTool).tooltips = [
('Interaction', '@xlabel-@ylabel'),
('S2', '@s2'),
('S2_conf', '@s2_conf'),
]
return p