There are currently 5 examples provided for the EnergyFlow package. They currently focus on demonstrating the various architectures included as part of EnergyFlow (see Architectures). For examples involving the computation of EFPs or EMDs, see the demos.
To install the examples to the default directory, ~/.energyflow/examples/
, simply run
python -c "import energyflow; energyflow.utils.get_examples()"
See the get_examples
function for more detailed information.
efp_example.py
An example involving Energy Flow Polynomials (EFPs) and a linear
classifier (Fisher's Linear Discriminant by default). First, the
EFPSet
class is used to compute the EFPs
up to the specified dmax
, the default being dmax=5
. Then linear
classifiers are trained for different numbers of EFPs as input,
determined by taking all EFPs up to degree d
with d
from 1
to dmax
. The output of the example is a plot of the ROC curves
for the classifiers with different numbers of EFP inputs.
# standard library imports
from __future__ import absolute_import, division, print_function
# standard numerical library imports
import numpy as np
# energyflow imports
import energyflow as ef
from energyflow.archs import LinearClassifier
from energyflow.datasets import qg_jets
from energyflow.utils import data_split, standardize, to_categorical
# attempt to import sklearn
try:
from sklearn.metrics import roc_auc_score, roc_curve
except:
print('please install scikit-learn in order to make ROC curves')
roc_curve = False
# attempt to import matplotlib
try:
import matplotlib.pyplot as plt
except:
print('please install matploltib in order to make plots')
plt = False
################################### SETTINGS ###################################
# data controls
num_data = 20000
test_frac = 0.2
# efp parameters
dmax = 5
measure = 'hadr'
beta = 0.5
# plotting
colors = ['tab:red', 'tab:orange', 'tab:olive', 'tab:green', 'tab:blue']
################################################################################
# load data
X, y = qg_jets.load(num_data)
print('Loaded quark and gluon jets')
# calculate EFPs
print('Calculating d <= {} EFPs for {} jets... '.format(dmax, num_data), end='')
efpset = ef.EFPSet(('d<=', dmax), measure='hadr', beta=beta)
masked_X = [x[x[:,0] > 0] for x in X]
X = efpset.batch_compute(masked_X)
print('Done')
# train models with different numbers of EFPs as input
rocs = []
for d in range(1, dmax+1):
# build architecture
model = LinearClassifier(linclass_type='lda')
# select EFPs with degree <= d
X_d = X[:,efpset.sel(('d<=', d))]
# do train/val/test split
(X_train, X_test, y_train, y_test) = data_split(X_d, y, val=0, test=test_frac)
print('Done train/val/test split')
# train model
model.fit(X_train, y_train)
# get predictions on test data
preds = model.predict(X_test)
# get ROC curve if we have sklearn
if roc_curve:
rocs.append(roc_curve(y_test, preds[:,1]))
# get area under the ROC curve
auc = roc_auc_score(y_test, preds[:,1])
print()
print('EFPs d <= {} AUC:'.format(d), auc)
print()
# make ROC curve plot if we have matplotlib
if plt:
# some nicer plot settings
plt.rcParams['figure.figsize'] = (4,4)
plt.rcParams['font.family'] = 'serif'
plt.rcParams['figure.autolayout'] = True
# iterate over the ROC curves and plot them
for i,d in enumerate(range(1, dmax+1)):
plt.plot(rocs[i][1], 1-rocs[i][0], '-', color=colors[i],
label='LDA: d <= {} EFPs'.format(d))
# axes labels
plt.xlabel('Quark Jet Efficiency')
plt.ylabel('Gluon Jet Rejection')
# axes limits
plt.xlim(0, 1)
plt.ylim(0, 1)
# make legend and show plot
plt.legend(loc='lower left', frameon=False)
plt.show()
efn_example.py
An example involving Energy Flow Networks (EFNs), which were introduced
in 1810.05165. The EFN
class is used to construct the network architecture. The output
of the example is a plot of the ROC curves obtained by the EFN as well as
the jet mass and constituent multiplicity observables.
# standard library imports
from __future__ import absolute_import, division, print_function
# standard numerical library imports
import numpy as np
# energyflow imports
import energyflow as ef
from energyflow.archs import EFN
from energyflow.datasets import qg_jets
from energyflow.utils import data_split, to_categorical
# attempt to import sklearn
try:
from sklearn.metrics import roc_auc_score, roc_curve
except:
print('please install scikit-learn in order to make ROC curves')
roc_curve = False
# attempt to import matplotlib
try:
import matplotlib.pyplot as plt
except:
print('please install matploltib in order to make plots')
plt = False
################################### SETTINGS ##################################
# the commented values correspond to those in 1810.05165
###############################################################################
# data controls, can go up to 2000000 total for full dataset
train, val, test = 75000, 10000, 15000
# train, val, test = 1000000, 200000, 200000
# network architecture parameters
Phi_sizes, F_sizes = (100, 100, 128), (100, 100, 100)
# Phi_sizes, F_sizes = (100, 100, 256), (100, 100, 100)
# network training parameters
num_epoch = 5
batch_size = 500
###############################################################################
# load data
X, y = qg_jets.load(train + val + test)
# ignore pid information
X = X[:,:,:3]
# convert labels to categorical
Y = to_categorical(y, num_classes=2)
print('Loaded quark and gluon jets')
# preprocess by centering jets and normalizing pts
for x in X:
mask = x[:,0] > 0
yphi_avg = np.average(x[mask,1:3], weights=x[mask,0], axis=0)
x[mask,1:3] -= yphi_avg
x[mask,0] /= x[:,0].sum()
print('Finished preprocessing')
# do train/val/test split
(z_train, z_val, z_test,
p_train, p_val, p_test,
Y_train, Y_val, Y_test) = data_split(X[:,:,0], X[:,:,1:], Y, val=val, test=test)
print('Done train/val/test split')
print('Model summary:')
# build architecture
efn = EFN(input_dim=2, Phi_sizes=Phi_sizes, F_sizes=F_sizes)
# train model
efn.fit([z_train, p_train], Y_train,
epochs=num_epoch,
batch_size=batch_size,
validation_data=([z_val, p_val], Y_val),
verbose=1)
# get predictions on test data
preds = efn.predict([z_test, p_test], batch_size=1000)
# get ROC curve if we have sklearn
if roc_curve:
efn_fp, efn_tp, threshs = roc_curve(Y_test[:,1], preds[:,1])
# get area under the ROC curve
auc = roc_auc_score(Y_test[:,1], preds[:,1])
print()
print('EFN AUC:', auc)
print()
# make ROC curve and filter plot if we have matplotlib
if plt:
# some nicer plot settings
plt.rcParams['font.family'] = 'serif'
plt.rcParams['figure.autolayout'] = True
fig, axes = plt.subplots(1, 2, figsize=(8,4))
######################### ROC Curve Plot #########################
# get multiplicity and mass for comparison
masses = np.asarray([ef.ms_from_p4s(ef.p4s_from_ptyphims(x).sum(axis=0)) for x in X])
mults = np.asarray([np.count_nonzero(x[:,0]) for x in X])
mass_fp, mass_tp, threshs = roc_curve(Y[:,1], -masses)
mult_fp, mult_tp, threshs = roc_curve(Y[:,1], -mults)
# plot the ROC curves
axes[0].plot(efn_tp, 1-efn_fp, '-', color='black', label='EFN')
axes[0].plot(mass_tp, 1-mass_fp, '-', color='blue', label='Jet Mass')
axes[0].plot(mult_tp, 1-mult_fp, '-', color='red', label='Multiplicity')
# axes labels
axes[0].set_xlabel('Quark Jet Efficiency')
axes[0].set_ylabel('Gluon Jet Rejection')
# axes limits
axes[0].set_xlim(0, 1)
axes[0].set_ylim(0, 1)
# make legend and show plot
axes[0].legend(loc='lower left', frameon=False)
######################### Filter Plot #########################
# plot settings
R, n = 0.4, 100
colors = ['Reds', 'Oranges', 'Greens', 'Blues', 'Purples', 'Greys']
grads = np.linspace(0.45, 0.55, 4)
# evaluate filters
X, Y, Z = efn.eval_filters(R, n=n)
# plot filters
for i,z in enumerate(Z):
axes[1].contourf(X, Y, z/np.max(z), grads, cmap=colors[i%len(colors)])
axes[1].set_xticks(np.linspace(-R, R, 5))
axes[1].set_yticks(np.linspace(-R, R, 5))
axes[1].set_xticklabels(['-R', '-R/2', '0', 'R/2', 'R'])
axes[1].set_yticklabels(['-R', '-R/2', '0', 'R/2', 'R'])
axes[1].set_xlabel('Translated Rapidity y')
axes[1].set_ylabel('Translated Azimuthal Angle phi')
axes[1].set_title('Energy Flow Network Latent Space', fontdict={'fontsize': 10})
plt.show()
pfn_example.py
An example involving Particle Flow Networks (PFNs), which were
introduced in 1810.05165. The
PFN
class is used to construct the
network architecture. The output of the example is a plot of the
ROC curves obtained by the PFN as well as the jet mass and
constituent multiplicity observables.
# standard library imports
from __future__ import absolute_import, division, print_function
# standard numerical library imports
import numpy as np
# energyflow imports
import energyflow as ef
from energyflow.archs import PFN
from energyflow.datasets import qg_jets
from energyflow.utils import data_split, remap_pids, to_categorical
# attempt to import sklearn
try:
from sklearn.metrics import roc_auc_score, roc_curve
except:
print('please install scikit-learn in order to make ROC curves')
roc_curve = False
# attempt to import matplotlib
try:
import matplotlib.pyplot as plt
except:
print('please install matploltib in order to make plots')
plt = False
################################### SETTINGS ###################################
# the commented values correspond to those in 1810.05165
###############################################################################
# data controls, can go up to 2000000 for full dataset
train, val, test = 75000, 10000, 15000
# train, val, test = 1000000, 200000, 200000
use_pids = True
# network architecture parameters
Phi_sizes, F_sizes = (100, 100, 128), (100, 100, 100)
# Phi_sizes, F_sizes = (100, 100, 256), (100, 100, 100)
# network training parameters
num_epoch = 5
batch_size = 500
################################################################################
# load data
X, y = qg_jets.load(train + val + test)
# convert labels to categorical
Y = to_categorical(y, num_classes=2)
print('Loaded quark and gluon jets')
# preprocess by centering jets and normalizing pts
for x in X:
mask = x[:,0] > 0
yphi_avg = np.average(x[mask,1:3], weights=x[mask,0], axis=0)
x[mask,1:3] -= yphi_avg
x[mask,0] /= x[:,0].sum()
# handle particle id channel
if use_pids:
remap_pids(X, pid_i=3)
else:
X = X[:,:,:3]
print('Finished preprocessing')
# do train/val/test split
(X_train, X_val, X_test,
Y_train, Y_val, Y_test) = data_split(X, Y, val=val, test=test)
print('Done train/val/test split')
print('Model summary:')
# build architecture
pfn = PFN(input_dim=X.shape[-1], Phi_sizes=Phi_sizes, F_sizes=F_sizes)
# train model
pfn.fit(X_train, Y_train,
epochs=num_epoch,
batch_size=batch_size,
validation_data=(X_val, Y_val),
verbose=1)
# get predictions on test data
preds = pfn.predict(X_test, batch_size=1000)
# get ROC curve if we have sklearn
if roc_curve:
pfn_fp, pfn_tp, threshs = roc_curve(Y_test[:,1], preds[:,1])
# get area under the ROC curve
auc = roc_auc_score(Y_test[:,1], preds[:,1])
print()
print('PFN AUC:', auc)
print()
# make ROC curve plot if we have matplotlib
if plt:
# get multiplicity and mass for comparison
masses = np.asarray([ef.ms_from_p4s(ef.p4s_from_ptyphims(x).sum(axis=0)) for x in X])
mults = np.asarray([np.count_nonzero(x[:,0]) for x in X])
mass_fp, mass_tp, threshs = roc_curve(Y[:,1], -masses)
mult_fp, mult_tp, threshs = roc_curve(Y[:,1], -mults)
# some nicer plot settings
plt.rcParams['figure.figsize'] = (4,4)
plt.rcParams['font.family'] = 'serif'
plt.rcParams['figure.autolayout'] = True
# plot the ROC curves
plt.plot(pfn_tp, 1-pfn_fp, '-', color='black', label='PFN')
plt.plot(mass_tp, 1-mass_fp, '-', color='blue', label='Jet Mass')
plt.plot(mult_tp, 1-mult_fp, '-', color='red', label='Multiplicity')
# axes labels
plt.xlabel('Quark Jet Efficiency')
plt.ylabel('Gluon Jet Rejection')
# axes limits
plt.xlim(0, 1)
plt.ylim(0, 1)
# make legend and show plot
plt.legend(loc='lower left', frameon=False)
plt.show()
cnn_example.py
An example involving jet images and convolutional neural networks (CNNs).
The CNN
class is used to provide a network architecture
based on that described in 1612.01551.
Jet images are constructed using the pixelate
function and can be either one-channel (grayscale), meaning that only
$p_T$ information is used, or two-channel (color), meaning that $p_T$
information and local charged particle counts are used. The images are
preprocessed by subtracting the average image in the training set and
dividing by the per-pixel standard deviations, using the
zero_center
and
standardize
functions, respectively.
The output of the example is a plot of the ROC curves of the CNN
as well as the jet mass and constituent multiplicity observables.
Note that the number of epochs is quite small because it is quite time consuming to train a CNN without a GPU (which will speed up this example immensely).
# standard library imports
from __future__ import absolute_import, division, print_function
# standard numerical library imports
import numpy as np
# energyflow imports
import energyflow as ef
from energyflow.archs import CNN
from energyflow.datasets import qg_jets
from energyflow.utils import data_split, pixelate, standardize, to_categorical, zero_center
# attempt to import sklearn
try:
from sklearn.metrics import roc_auc_score, roc_curve
except:
print('please install scikit-learn in order to make ROC curves')
roc_curve = False
# attempt to import matplotlib
try:
import matplotlib.pyplot as plt
except:
print('please install matploltib in order to make plots')
plt = False
################################### SETTINGS ###################################
# data controls
num_data = 100000
val_frac, test_frac = 0.1, 0.15
# image parameters
R = 0.4
img_width = 2*R
npix = 33
nb_chan = 2
norm = True
# required network architecture parameters
input_shape = (nb_chan, npix, npix)
filter_sizes = [8, 4, 4]
num_filters = [8, 8, 8] # very small so can run on non-GPUs in reasonable time
# optional network architecture parameters
dense_sizes = [50]
pool_sizes = 2
# network training parameters
num_epoch = 2
batch_size = 100
################################################################################
# load data
X, y = qg_jets.load(num_data=num_data)
# convert labels to categorical
Y = to_categorical(y, num_classes=2)
print('Loaded quark and gluon jets')
# make jet images
images = np.asarray([pixelate(x, npix=npix, img_width=img_width, nb_chan=nb_chan,
charged_counts_only=True, norm=norm) for x in X])
print('Done making jet images')
# do train/val/test split
(X_train, X_val, X_test,
Y_train, Y_val, Y_test) = data_split(images, Y, val=val_frac, test=test_frac)
print('Done train/val/test split')
# preprocess by zero centering images and standardizing each pixel
X_train, X_val, X_test = standardize(*zero_center(X_train, X_val, X_test))
print('Finished preprocessing')
print('Model summary:')
# build architecture
hps = {'input_shape': input_shape,
'filter_sizes': filter_sizes,
'num_filters': num_filters,
'dense_sizes': dense_sizes,
'pool_sizes': pool_sizes}
cnn = CNN(hps)
# train model
cnn.fit(X_train, Y_train,
epochs=num_epoch,
batch_size=batch_size,
validation_data=(X_val, Y_val),
verbose=1)
# get predictions on test data
preds = cnn.predict(X_test, batch_size=1000)
# get ROC curve if we have sklearn
if roc_curve:
cnn_fp, cnn_tp, threshs = roc_curve(Y_test[:,1], preds[:,1])
# get area under the ROC curve
auc = roc_auc_score(Y_test[:,1], preds[:,1])
print()
print('CNN AUC:', auc)
print()
# make ROC curve plot if we have matplotlib
if plt:
# get multiplicity and mass for comparison
masses = np.asarray([ef.ms_from_p4s(ef.p4s_from_ptyphims(x).sum(axis=0)) for x in X])
mults = np.asarray([np.count_nonzero(x[:,0]) for x in X])
mass_fp, mass_tp, threshs = roc_curve(Y[:,1], -masses)
mult_fp, mult_tp, threshs = roc_curve(Y[:,1], -mults)
# some nicer plot settings
plt.rcParams['figure.figsize'] = (4,4)
plt.rcParams['font.family'] = 'serif'
plt.rcParams['figure.autolayout'] = True
# plot the ROC curves
plt.plot(cnn_tp, 1-cnn_fp, '-', color='black', label='CNN')
plt.plot(mass_tp, 1-mass_fp, '-', color='blue', label='Jet Mass')
plt.plot(mult_tp, 1-mult_fp, '-', color='red', label='Multiplicity')
# axes labels
plt.xlabel('Quark Jet Efficiency')
plt.ylabel('Gluon Jet Rejection')
# axes limits
plt.xlim(0, 1)
plt.ylim(0, 1)
# make legend and show plot
plt.legend(loc='lower left', frameon=False)
plt.show()
dnn_example.py
An example involving deep, fully-connected neural networks (DNNs).
The DNN
class is used to construct the network
architecture.
The inputs are taken to be the $N$-subjettiness observables as specified as part of the phase space basis from 1704.08249, cut off at some total number of observables. The output of the example is a plot showing the ROC curves obtained from training the DNN on different numbers of $N$-subjettiness observables.
# standard library imports
from __future__ import absolute_import, division, print_function
# standard numerical library imports
import numpy as np
# energyflow imports
import energyflow as ef
from energyflow.archs import DNN
from energyflow.datasets import qg_nsubs
from energyflow.utils import data_split, to_categorical
# attempt to import sklearn
try:
from sklearn.metrics import roc_auc_score, roc_curve
except:
print('please install scikit-learn in order to make ROC curves')
roc_curve = False
# attempt to import matplotlib
try:
import matplotlib.pyplot as plt
except:
print('please install matploltib in order to make plots')
plt = False
################################### SETTINGS ###################################
# data controls
num_data = 100000
val_frac, test_frac = 0.1, 0.15
# network architecture parameters
dense_sizes = (100, 100)
# network training parameters
num_epoch = 10
batch_size = 100
# sweep parameters
num_nsubs = [1, 2, 4, 8, 16, 32]
colors = ['tab:red', 'tab:orange', 'tab:olive', 'tab:green', 'tab:blue', 'tab:purple']
################################################################################
# load data
X, y = qg_nsubs.load(num_data=num_data)
# convert labels to categorical
Y = to_categorical(y, num_classes=2)
print('Loaded quark and gluon jets')
print('Model summary:')
# train models with different numbers of nsubs as input
rocs = []
for i,num_nsub in enumerate(num_nsubs):
# build architecture
dnn = DNN(input_dim=num_nsub, dense_sizes=dense_sizes, summary=(i==0))
# do train/val/test split
(X_train, X_val, X_test,
Y_train, Y_val, Y_test) = data_split(X[:,:num_nsub], Y, val=val_frac, test=test_frac)
print('Done train/val/test split')
# train model
dnn.fit(X_train, Y_train,
epochs=num_epoch,
batch_size=batch_size,
validation_data=(X_val, Y_val),
verbose=1)
# get predictions on test data
preds = dnn.predict(X_test, batch_size=1000)
# get ROC curve if we have sklearn
if roc_curve:
rocs.append(roc_curve(Y_test[:,1], preds[:,1]))
# get area under the ROC curve
auc = roc_auc_score(Y_test[:,1], preds[:,1])
print()
print('{} nsubs DNN AUC:'.format(num_nsub), auc)
print()
# make ROC curve plot if we have matplotlib
if plt:
# some nicer plot settings
plt.rcParams['figure.figsize'] = (4,4)
plt.rcParams['font.family'] = 'serif'
plt.rcParams['figure.autolayout'] = True
# iterate over the ROC curves and plot them
for i in range(len(rocs)):
plt.plot(rocs[i][1], 1-rocs[i][0], '-', color=colors[i],
label='DNN: {} N-subs'.format(num_nsubs[i]))
# axes labels
plt.xlabel('Quark Jet Efficiency')
plt.ylabel('Gluon Jet Rejection')
# axes limits
plt.xlim(0, 1)
plt.ylim(0, 1)
# make legend and show plot
plt.legend(loc='lower left', frameon=False)
plt.show()