# -*- coding: utf-8 -*-
"""
In this file all the matplolib wrappers are located.
"""
from .helpers import get_optimal_bin_size, manager
from .colors import b2cm
import pandas as pd
import numpy as np
from matplotlib.colors import hex2color
import matplotlib.pyplot as plt
def _hist_init(data, bins=None, xrange=None):
xaxis = manager.get_x_axis()
if xaxis is None or bins is not None or xrange is not None:
if bins is None:
bins = get_optimal_bin_size(len(data))
_, xaxis = np.histogram(data, bins, xrange)
return xaxis
[docs]def text(t, x=0.8, y=0.9, fontsize=22, *args, **kwargs):
"""
Args:
t:
x:
y:
fontsize:
*args:
**kwargs:
Returns:
"""
plt.text(x, y, t, transform=plt.gca().transAxes, fontsize=fontsize, *args, **kwargs)
STYLES_facecolor = [None, 'none', 'none', 'none', 'none', 'none']
STYLES_hatches = [None, '///', r"\\\ ", ".+", 'xxx', '--', '++', 'xx', '//', '*', 'o', 'O', '.']
[docs]def hist(data, bins=None, fill=False, range=None, lw=1., ax=None, style=None, color=None, scale=None, weights=None,
label=None, edgecolor=None, *args, **kwargs):
"""
Args:
data:
bins:
fill:
range:
lw:
ax:
style:
color:
scale:
weights:
*args:
**kwargs:
Returns:
"""
if ax is None:
ax = plt.gca()
xaxis = _hist_init(data, bins, xrange=range)
if type(data) is pd.Series:
data = data.values
if isinstance(color, int):
color = b2cm[color % len(b2cm)]
if color is None:
color = next(ax._get_lines.prop_cycler)["color"]
# convert color
if not isinstance(color, list) or isinstance(color, tuple):
color = hex2color(color)
if style is not None:
fill = True
else:
style = 0
if weights is None:
weights = np.ones(len(data))
if scale is not None:
if isinstance(scale, int) or isinstance(scale, float):
if not isinstance(scale, bool):
weights *= scale
else:
print("Please provide int or float with scale")
edgecolor = color if edgecolor is None else edgecolor
if fill:
fc = (*color, 0.5) if style == 0 else 'none'
# y, xaxis, _ = ax.hist(data, xaxis, range=range, histtype='step',
# lw=lw, color=color, weights=weights, *args, **kwargs)
y, xaxis, patches = ax.hist(data, xaxis, range=range, lw=lw, histtype='stepfilled', hatch=STYLES_hatches[style],
edgecolor=edgecolor, facecolor=fc, linewidth=lw, weights=weights, label=label,
color=color, *args, **kwargs)
else:
y, xaxis, patches = ax.hist(data, xaxis, range=range, histtype='step', lw=lw, color=color, weights=weights,
label=label, *args, **kwargs)
manager.set_x_axis(xaxis)
return y, xaxis, patches
[docs]def to_stack(df, col, by):
"""
Args:
df:
col:
by:
Returns:
"""
g = df.groupby(by)
x_data = []
for gr in g.groups:
x_data.append(g.get_group(gr)[col].values)
return x_data
[docs]def stacked(df, col=None, by=None, bins=None, color=None, range=None, lw=.5, ax=None, edgecolor='black',
*args, **kwargs):
""" Create stacked histogram
Args:
df (DataFrame or list of arrays):
col:
by:
bins:
color:
lw:
*args:
**kwargs:
Returns:
"""
if isinstance(df, pd.DataFrame):
assert col is not None, "Please provide column"
assert by is not None, "Please provide by"
data = to_stack(df, col, by)
else:
assert isinstance(df, list), "Please provide DataFrame or List"
data = df
if ax is None:
ax = plt.gca()
if color is None:
from sciplot.colors import b2helix
n_stacks = len(data)
if n_stacks < 20:
color = b2helix(n_stacks)
xaxis = _hist_init(data[0], bins, xrange=range)
y, xaxis, stuff = ax.hist(data, xaxis, histtype='stepfilled',
lw=lw, color=color, edgecolor=edgecolor, stacked=True, *args, **kwargs)
manager.set_x_axis(xaxis)
return y[-1], xaxis, stuff # dangerous list index
[docs]def errorhist(data, bins=None, color=None, normed=False, fmt='.', range=None, scale=None,
x_err=False, box=False, ax=None, weights=None, plot_zero=True, label=None, *args, **kwargs):
"""
Args:
data:
bins:
color:
normed:
fmt:
range:
scale:
x_err:
box:
ax:
weights:
plot_zero:
label:
*args:
**kwargs:
Returns:
"""
xaxis = _hist_init(data, bins, xrange=range)
if ax is None:
ax = plt.gca()
if type(data) is pd.Series:
data = data.values
if weights is None:
weights = np.ones(len(data))
if scale is not None:
if isinstance(scale, int) or isinstance(scale, float):
if not isinstance(scale, bool):
weights *= scale
else:
print("Please provide int or float with scale")
y, x = np.histogram(data, xaxis, normed=normed, weights=weights)
err = (-0.5 + np.sqrt(np.array(y + 0.25)), +0.5 + np.sqrt(np.array(y + 0.25))) # np.sqrt(np.array(y))
bin_centers = (x[1:] + x[:-1]) / 2.0
if isinstance(color, int):
color = b2cm[color % len(b2cm)]
if color is None:
color = next(ax._get_lines.prop_cycler)["color"]
# https://www-cdf.fnal.gov/physics/statistics
if normed:
yom, x = np.histogram(data, xaxis, weights=weights)
err = (np.sqrt(np.array(yom)) *(y/yom), np.sqrt(np.array(yom)) * (y/yom))
if x_err is not False or box:
x_err = (x[1]-x[0])/2.0
else:
x_err = None
errorbar(bin_centers, y, err, x_err, box, plot_zero, fmt, color, ax, label=label)
manager.set_x_axis(xaxis)
return y, bin_centers, err
[docs]def errorbar(bin_centers, y, y_err, x_err=None, box=False, plot_zero=True, fmt='.',
color=None, ax=None, label=None, *args, **kwargs):
if ax is None:
ax = plt.gca()
if len(y_err) != 2:
y_err = y_err, y_err
if color is None:
color = next(ax._get_lines.prop_cycler)["color"]
toplot = np.ones(len(y)).astype(bool)
if plot_zero is False:
toplot[y == 0] = False
y_err = (y_err[0][[toplot]], y_err[1][toplot])
if x_err is not None:
x_err = x_err[toplot]
bin_centers = bin_centers[toplot]
y = y[toplot]
if box:
assert x_err is not None, "Please provide x-err"
hi = y_err[0] + y_err[1]
lo = y - y_err[0]
ax.errorbar(bin_centers, y, color=color, xerr=x_err, fmt=' ')
ax.bar(bin_centers[toplot], hi, bottom=lo, align='center', color=color, alpha=.7,
width=2 * x_err, label=label,
edgecolor=color, *args, **kwargs)
else:
ax.errorbar(bin_centers, y, yerr=y_err, xerr=x_err, fmt=fmt, color=color,label=label, *args, **kwargs)
[docs]def xlim(low=None, high=None, ax=None):
"""
Args:
low:
high:
ax:
Returns:
"""
xaxis = manager.get_x_axis()
if xaxis is not None:
if ax is None:
ax = plt.gca()
ax.set_xlim(np.min(xaxis), np.max(xaxis))
if low is not None or high is not None:
ax.set_xlim(low, high)
[docs]def save(filename, bottom=0.15, left=0.13, right=0.96, top=0.95, *args, **kwargs):
""" Save a file and do the subplot_adjust to fit the page with larger labels
Args:
filename:
bottom:
left:
right:
top:
*args:
**kwargs:
Returns:
"""
plt.subplots_adjust(bottom=bottom, left=left, right=right, top=top)
plt.savefig(filename, *args, **kwargs)
[docs]def sig_bkg_plot(df, col, by=None, ax=None, bins=None, range=None, labels=None):
# foreseen usage
if isinstance(df, pd.DataFrame):
# by is not a boolean index
if isinstance(by, str):
x = to_stack(df, col, by)
if len(x) > 2 :
print("Waring, more than two categories in %s!" % by)
assert len(x) > 1, "Did not found any categories in %s!" % by
x_sig = x[1]
x_bkg = x[0]
# by is a boolean index
else:
x_sig = df[col][by].values
x_bkg = df[col][~by].values
# Alternative usage, passing two arrays
else:
x_sig = df
x_bkg = col
xaxis = _hist_init(np.append(x_sig, x_bkg), bins, xrange=range)
if labels is None:
labels = ["Background", "Signal"]
hist(x_bkg, xaxis, style=0, label=labels[0], ax=ax)
hist(x_sig, xaxis, lw=2, color=0, label=labels[1], ax=ax)
plt.legend()
xlim()