-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathDetecting-the-Higgs-Boson-With-TPUs.py
212 lines (177 loc) · 7.79 KB
/
Detecting-the-Higgs-Boson-With-TPUs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
# Detecting the Higgs Boson With TPUs
"""
Searching for the Higgs Boson
The Standard Model is a theory in particle physics that describes some of the most basic forces of nature. One fundamental particle, the Higgs boson, is what accounts for the mass of matter. First theorized in the 1964, the Higgs boson eluded observation for almost fifty years. In 2012 it was finally observed experimentally at the Large Hadron Collider. These experiments produced millions of gigabytes of data.
Large and complicated datasets like these are where deep learning excels. In this notebook, we'll build a Wide and Deep neural network to determine whether an observed particle collision produced a Higgs boson or not.
The Collision Data
The collision of protons at high energy can produce new particles like the Higgs boson. These particles can't be directly observed, however, since they decay almost instantly. So to detect the presence of a new particle, we instead observe the behavior of the particles they decay into, their "decay products".
The Higgs dataset contains 21 "low-level" features of the decay products and also 7 more "high-level" features derived from these.
Wide and Deep Neural Networks
A Wide and Deep network trains a linear layer side-by-side with a deep stack of dense layers. Wide and Deep networks are often effective on tabular datasets.[^1]
Both the dataset and the model are much larger than what we used in the course. To speed up training, we'll use Kaggle's Tensor Processing Units (TPUs), an accelerator ideal for large workloads.
We've collected some hyperparameters here to make experimentation easier. Fork this notebook by clicking here to try it yourself!
"""
# Model Configuration
UNITS = 2 ** 11 # 2048
ACTIVATION = 'relu'
DROPOUT = 0.1
# Training Configuration
BATCH_SIZE_PER_REPLICA = 2 ** 11 # powers of 128 are best
# The next few sections set up the TPU computation, data pipeline, and neural network model.
# If you'd just like to see the results, feel free to skip to the end!
# Setup
# In addition to our imports, this section contains some code that will connect our notebook to the TPU
# and create a distribution strategy. Each TPU has eight computational cores acting independently.
# With a distribution strategy, we define how we want to divide up the work between them.
# TensorFlow
import tensorflow as tf
print("Tensorflow version " + tf.__version__)
# Detect and init the TPU
try: # detect TPUs
tpu = tf.distribute.cluster_resolver.TPUClusterResolver.connect() # TPU detection
strategy = tf.distribute.TPUStrategy(tpu)
except ValueError: # detect GPUs
strategy = tf.distribute.get_strategy() # default strategy that works on CPU and single GPU
print("Number of accelerators: ", strategy.num_replicas_in_sync)
# Plotting
import pandas as pd
import matplotlib.pyplot as plt
# Matplotlib defaults
plt.style.use('seaborn-whitegrid')
plt.rc('figure', autolayout=True)
plt.rc('axes', labelweight='bold', labelsize='large',
titleweight='bold', titlesize=18, titlepad=10)
# Data
from kaggle_datasets import KaggleDatasets
from tensorflow.io import FixedLenFeature
AUTO = tf.data.experimental.AUTOTUNE
# Model
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import callbacks
# Tensorflow version 2.4.1
# Number of accelerators: 8
# Notice that TensorFlow now detects eight accelerators. Using a TPU is a bit like using eight GPUs at once.
# Load Data
# The dataset has been encoded in a binary file format called TFRecords.
# These two functions will parse the TFRecords and build a TensorFlow tf.data.Dataset object that we can use
# for training.
def make_decoder(feature_description):
def decoder(example):
example = tf.io.parse_single_example(example, feature_description)
features = tf.io.parse_tensor(example['features'], tf.float32)
features = tf.reshape(features, [28])
label = example['label']
return features, label
return decoder
def load_dataset(filenames, decoder, ordered=False):
AUTO = tf.data.experimental.AUTOTUNE
ignore_order = tf.data.Options()
if not ordered:
ignore_order.experimental_deterministic = False
dataset = (
tf.data
.TFRecordDataset(filenames, num_parallel_reads=AUTO)
.with_options(ignore_order)
.map(decoder, AUTO)
)
return dataset
dataset_size = int(11e6)
validation_size = int(5e5)
training_size = dataset_size - validation_size
# For model.fit
batch_size = BATCH_SIZE_PER_REPLICA * strategy.num_replicas_in_sync
steps_per_epoch = training_size // batch_size
validation_steps = validation_size // batch_size
# For model.compile
steps_per_execution = 256
feature_description = {
'features': FixedLenFeature([], tf.string),
'label': FixedLenFeature([], tf.float32),
}
decoder = make_decoder(feature_description)
data_dir = KaggleDatasets().get_gcs_path('higgs-boson')
train_files = tf.io.gfile.glob(data_dir + '/training' + '/*.tfrecord')
valid_files = tf.io.gfile.glob(data_dir + '/validation' + '/*.tfrecord')
ds_train = load_dataset(train_files, decoder, ordered=False)
ds_train = (
ds_train
.cache()
.repeat()
.shuffle(2 ** 19)
.batch(batch_size)
.prefetch(AUTO)
)
ds_valid = load_dataset(valid_files, decoder, ordered=False)
ds_valid = (
ds_valid
.batch(batch_size)
.cache()
.prefetch(AUTO)
)
# Model
# Now that the data is ready, let's define the network.
# We're defining the deep branch of the network using Keras's Functional API,
# which is a bit more flexible that the Sequential method we used in the course.
def dense_block(units, activation, dropout_rate, l1=None, l2=None):
def make(inputs):
x = layers.Dense(units)(inputs)
x = layers.BatchNormalization()(x)
x = layers.Activation(activation)(x)
x = layers.Dropout(dropout_rate)(x)
return x
return make
with strategy.scope():
# Wide Network
wide = keras.experimental.LinearModel()
# Deep Network
inputs = keras.Input(shape=[28])
x = dense_block(UNITS, ACTIVATION, DROPOUT)(inputs)
x = dense_block(UNITS, ACTIVATION, DROPOUT)(x)
x = dense_block(UNITS, ACTIVATION, DROPOUT)(x)
x = dense_block(UNITS, ACTIVATION, DROPOUT)(x)
x = dense_block(UNITS, ACTIVATION, DROPOUT)(x)
outputs = layers.Dense(1)(x)
deep = keras.Model(inputs=inputs, outputs=outputs)
# Wide and Deep Network
wide_and_deep = keras.experimental.WideDeepModel(
linear_model=wide,
dnn_model=deep,
activation='sigmoid',
)
wide_and_deep.compile(
loss='binary_crossentropy',
optimizer='adam',
metrics=['AUC', 'binary_accuracy'],
experimental_steps_per_execution=steps_per_execution,
)
# Training
# During training, we'll use the EarlyStopping callback as usual.
# Notice that we've also defined a learning rate schedule.
# It's been found that gradually decreasing the learning rate over the course of training can improve performance
# (the weights "settle in" to a minimum). This schedule will multiply the learning rate by 0.2 if the validation loss
# didn't decrease after an epoch.
early_stopping = callbacks.EarlyStopping(
patience=2,
min_delta=0.001,
restore_best_weights=True,
)
lr_schedule = callbacks.ReduceLROnPlateau(
patience=0,
factor=0.2,
min_lr=0.001,
)
history = wide_and_deep.fit(
ds_train,
validation_data=ds_valid,
epochs=50,
steps_per_epoch=steps_per_epoch,
validation_steps=validation_steps,
callbacks=[early_stopping, lr_schedule],
)
history_frame = pd.DataFrame(history.history)
history_frame.loc[:, ['loss', 'val_loss']].plot(title='Cross-entropy Loss')
history_frame.loc[:, ['auc', 'val_auc']].plot(title='AUC');
history_frame = pd.DataFrame(history.history)
history_frame.loc[:, ['loss', 'val_loss']].plot(title='Cross-entropy Loss')
history_frame.loc[:, ['auc', 'val_auc']].plot(title='AUC');