2D Histograms in physt¶
[1]:
# Necessary import evil
import physt
from physt import h1, h2, histogramdd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
np.random.seed(42)
[2]:
# Some data
x = np.random.normal(100, 1, 1000)
y = np.random.normal(10, 10, 1000)
[3]:
# Create a simple histogram
histogram = h2(x, y, [8, 4], name="Some histogram", axis_names=["x", "y"])
histogram
[3]:
Histogram2D('Some histogram', bins=(8, 4), total=1000, dtype=int64)
[4]:
# Frequencies are a 2D-array
histogram.frequencies
[4]:
array([[ 0, 2, 4, 0],
[ 3, 26, 20, 5],
[ 17, 78, 104, 10],
[ 26, 163, 147, 17],
[ 17, 136, 96, 17],
[ 6, 41, 38, 6],
[ 1, 11, 7, 0],
[ 0, 1, 0, 1]], dtype=int64)
Multidimensional binning¶
In most cases, binning methods that apply for 1D histograms, can be used also in higher dimensions. In such cases, each parameter can be either scalar (applies to all dimensions) or a list/tuple with independent values for each dimension. This also applies for range that has to be list/tuple of tuples.
[6]:
histogram = h2(x, y, "fixed_width", bin_width=[2, 10], name="Fixed-width bins", axis_names=["x", "y"])
histogram.plot();
histogram.numpy_bins
[6]:
[array([ 96., 98., 100., 102., 104.]),
array([-20., -10., 0., 10., 20., 30., 40., 50.])]
[7]:
histogram = h2(x, y, "quantile", bin_count=[3, 4], name="Quantile bins", axis_names=["x", "y"])
histogram.plot(cmap_min=0);
histogram.numpy_bins
[7]:
[array([ 96.75873266, 99.54993453, 100.40825276, 103.85273149]),
array([-19.40388635, 3.93758311, 10.63077132, 17.28882177,
41.93107568])]
[8]:
histogram = h2(x, y, "human", bin_count=5, name="Human-friendly bins", axis_names=["x", "y"])
histogram.plot();
histogram.numpy_bins
[8]:
[array([ 96., 98., 100., 102., 104.]),
array([-20., -10., 0., 10., 20., 30., 40., 50.])]
Plotting¶
2D¶
[ ]:
# Default is workable
ax = histogram.plot()
[9]:
# Custom colormap, no colorbar
import matplotlib.cm as cm
fig, ax = plt.subplots()
ax = histogram.plot(ax=ax, cmap=cm.copper, show_colorbar=False, grid_color=cm.copper(0.5))
ax.set_title("Custom colormap");
[10]:
# Use a named colormap + limit it to a range of values
import matplotlib.cm as cm
fig, ax = plt.subplots()
ax = histogram.plot(ax=ax, cmap="Oranges", show_colorbar=True, cmap_min=20, cmap_max=100, show_values=True)
ax.set_title("Clipped colormap");
[11]:
# Show labels (and hide zero bins), no grid(lw=0)
ax = histogram.plot(show_values=True, show_zero=False, cmap=cm.RdBu, format_value=float, lw=0)
Large histograms as images¶
Plotting histograms in this way gets problematic with more than roughly 50x50 bins. There is an alternative, though, partially inspired by the datashader
project - plot the histogram as bitmap, which works very fast even for very large histograms.
Note: This method does not work for histograms with irregular bins.
[12]:
x = np.random.normal(100, 1, 1000000)
y = np.random.normal(10, 10, 1000000)
[13]:
fig, axes = plt.subplots(1, 3, figsize=(12, 4))
h2(x, y, 20, name="20 bins - map").plot("map", cmap="rainbow", lw=0, alpha=1, ax=axes[0], show_colorbar=False)
h2(x, y, 20, name="20 bins - image").plot("image", cmap="rainbow", alpha=1, ax=axes[1])
h2(x, y, 500, name="500 bins - image").plot("image", cmap="rainbow", alpha=1, ax=axes[2]);
See that the output is equivalent to map without lines.
Transformation¶
Sometimes, the value range is too big to show details. Therefore, it may be of some use to transform the values by a function, e.g. logarithm.
[14]:
fig, axes = plt.subplots(1, 3, figsize=(12, 4))
h2(x, y, 20, name="20 bins - map").plot("map", alpha=1, lw=0, show_zero=False, cmap="rainbow", ax=axes[0], show_colorbar=False, cmap_normalize="log")
h2(x, y, 20, name="20 bins - image").plot("image", alpha=1, ax=axes[1], cmap="rainbow", cmap_normalize="log")
h2(x, y, 500, name="500 bins - image").plot("image", alpha=1, ax=axes[2], cmap="rainbow", cmap_normalize="log");
[15]:
# Composition - show histogram overlayed with "points"
fig, ax = plt.subplots(figsize=(8, 7))
h_2 = h2(x, y, 30)
h_2.plot("map", lw=0, alpha=0.9, cmap="Blues", ax=ax, cmap_normalize="log", show_zero=False)
# h2(x, y, 300).plot("image", alpha=1, cmap="Greys", ax=ax, transform=lambda x: x > 0);
# Not working currently
[15]:
<AxesSubplot:xlabel='axis0', ylabel='axis1'>
3D¶
By this, we mean 3D bar plots of 2D histograms (not a visual representation of 3D histograms).
[16]:
histogram.plot("bar3d", cmap="rainbow");
[17]:
histogram.plot("bar3d", color="red");
Projections¶
[18]:
proj1 = histogram.projection("x", name="Projection to X")
proj1.plot(errors=True)
proj1
[18]:
Histogram1D('Projection to X', bins=(4,), total=1000, dtype=int64)
[19]:
proj2 = histogram.projection("y", name="Projection to Y")
proj2.plot(errors=True)
proj2
[19]:
Histogram1D('Projection to Y', bins=(7,), total=1000, dtype=int64)
Adaptive 2D histograms¶
[20]:
# Create and add two histograms with adaptive binning
height1 = np.random.normal(180, 5, 1000)
weight1 = np.random.normal(80, 2, 1000)
ad1 = h2(height1, weight1, "fixed_width", bin_width=1, adaptive=True)
ad1.plot(show_zero=False)
height2 = np.random.normal(160, 5, 1000)
weight2 = np.random.normal(70, 2, 1000)
ad2 = h2(height2, weight2, "fixed_width", bin_width=1, adaptive=True)
ad2.plot(show_zero=False)
(ad1 + ad2).plot(show_zero=False);
N-dimensional histograms¶
Although is not easy to visualize them, it is possible to create histograms of any dimensions that behave similar to 2D ones. Warning: be aware that the memory consumption can be significant.
[21]:
# Create a 4D histogram
data = [np.random.rand(1000)[:, np.newaxis] for i in range(4)]
data = np.concatenate(data, axis=1)
h4 = histogramdd(data, [3, 2, 2, 3], axis_names="abcd")
h4
[21]:
HistogramND(bins=(3, 2, 2, 3), total=1000, dtype=int64)
[22]:
h4.frequencies
[22]:
array([[[[31, 28, 33],
[21, 22, 22]],
[[25, 29, 28],
[29, 35, 28]]],
[[[20, 25, 20],
[28, 32, 31]],
[[30, 28, 24],
[29, 21, 27]]],
[[[27, 26, 33],
[21, 35, 30]],
[[38, 30, 32],
[25, 30, 27]]]], dtype=int64)
[23]:
h4.projection("a", "d", name="4D -> 2D").plot(show_values=True, format_value=int, cmap_min="min");
[24]:
h4.projection("d", name="4D -> 1D").plot("scatter", errors=True);
Support for pandas DataFrames (without pandas dependency ;-))¶
[25]:
# Load notorious example data set
iris = sns.load_dataset('iris')
[28]:
iris = sns.load_dataset('iris')
iris_hist = physt.h2(iris["sepal_length"], iris["sepal_width"], "human", bin_count=[12, 7], name="Iris")
iris_hist.plot(show_zero=False, cmap=cm.gray_r, show_values=True, format_value=int);
[29]:
iris_hist.projection("sepal_length").plot();