-
Notifications
You must be signed in to change notification settings - Fork 0
/
simulate_mf_trajectory_inf_from_token.py
249 lines (198 loc) · 12.7 KB
/
simulate_mf_trajectory_inf_from_token.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
import numpy as np
from models.HopfieldSelfAttentionNNMFInfNPE import HopfieldSelfAttentionNNMFInfNPE
from models.Embedding import Embedding
from plotting.plotting import (plot_save_statistics, plot_save_plane, plot_save_fft, plot_save_autocorrelation,
plot_lyapunov_graphs)
import os
import yaml
import time
def create_dir(filepath):
plot_save_folder_path = os.path.dirname(filepath)
# Create folder if it does not exist and we are saving the image
if not os.path.exists(plot_save_folder_path):
os.makedirs(plot_save_folder_path)
def save_context(context_window, folder_path_chpt, seed, num_transient_steps, max_sim_steps):
"""
Saves the mean-field values associated to the context window
"""
att_window, mo_window, mv_window, mq_window, mk_window, pe_window = context_window
chpt_path = folder_path_chpt + f"/seed-{str(seed)}" + "-transient_steps-" + str(
num_transient_steps) + "-max_sim_steps-" + str(max_sim_steps) + ".npz"
np.savez_compressed(chpt_path,
att_window=att_window,
mo_window=mo_window,
mv_window=mv_window,
mq_window=mq_window,
mk_window=mk_window,
pe_window=pe_window)
if __name__ == "__main__":
# Set the variables for the experiments
cfg_path = 'cfgs/bif_diagram_inf_0_zoom-in.yaml'
# Load cfg
with open(cfg_path, 'r') as file:
cfg = yaml.safe_load(file)
# Instantiate vocabulary
tentative_semantic_embedding_size = cfg["semantic_embedding_size"] # Variable to set the size of the matrices from which to compute the corrs
positional_embedding_size = 2 # Number of bits dedicated to the positional embedding
context_size = 2 ** positional_embedding_size # Context size
embedding_size = tentative_semantic_embedding_size + positional_embedding_size # Total size of the "tentative" embedding
vocab = Embedding(tentative_semantic_embedding_size, positional_embedding_size) # Vocabulary helper class
vocab.initialize_pos_encoder() # Initiate some functionalities
# Create variables for the Hopfield Transformer (HT)
seed = 1 # Seed for the correlations
num_feat_patterns = 3 # Number of patterns
beta_list = [1.255, 1.26405, 1.266, 1.27, 1.28, 1.4] # Different values of beta to simulate
beta_list = [1.255] # Different values of beta to simulate
scaling_o = cfg["scaling_o"] # Not scaled
beta_att = cfg["beta_att"]
scaling_att = cfg["scaling_att"] # Beta_att * scaling_att make gamma from the paper
num_transient_steps = cfg["num_transient_steps"] # Num. of transient steps
max_sim_steps = cfg["max_sim_steps"] # Max. simulated steps
saved_steps = max_sim_steps - num_transient_steps
correlations_from_weights = cfg["correlations_from_weights"] # Variable for choosing how to set the correlations
# If = 3, we compute them from segments as in the paper
pe_mode = cfg["pe_mode"] # Choose how to initialize the PE. Set it randomly.
epsilon_pe = cfg["epsilon_pe"] # epsilon in the paper
normalize_weights_str_att = cfg["normalize_weights_str_att"] # U in the paper
normalize_weights_str_o = cfg["normalize_weights_str_o"] # Normalization in the output
compute_inf_normalization = cfg["compute_inf_normalization"] # Deal with normalization constraint in infinity
compute_lyapunov = True # True if you want to compute the Lyapunov exponents
save_not_plot = False # True -> Save; False -> Plot
save_context_cond = False # If true, save context so it can be later loaded to start another execution
show_title = True # Whether to show the title on top
for beta in beta_list:
# Create seed for reproducibility
np.random.seed(seed)
# Create Hopfield Transformer Class
HT = HopfieldSelfAttentionNNMFInfNPE(beta, beta_att, num_feat_patterns=num_feat_patterns,
positional_embedding_bitsize=positional_embedding_size, vocab=vocab,
context_size=context_size, max_sim_steps=max_sim_steps,
min_saved_step=num_transient_steps,
normalize_weights_str_att=normalize_weights_str_att,
normalize_weights_str_o=normalize_weights_str_o,
correlations_from_weights=correlations_from_weights,
semantic_embedding_bitsize=tentative_semantic_embedding_size,
epsilon_pe=epsilon_pe, pe_mode=pe_mode,
compute_inf_normalization=compute_inf_normalization,
scaling_o=scaling_o,
scaling_att=scaling_att)
# Reset/initialize the structures for saving data
HT.reset_data()
print(f"Simulating MF Self-Attention NN for beta {beta}...")
# Choose as initial token one of the encoded features
ini_token = HT.Wo_SE[0]
start = time.time()
HT.simulate_from_token(ini_token, max_steps=max_sim_steps, compute_lyapunov=compute_lyapunov)
end = time.time()
print("Done.")
print("Execution time = ", (end - start)/60, " minutes")
# Plotting
print("Plotting statistics...")
num_plotting_steps = max_sim_steps
# Create some strings to define the paths for saving the plots
if normalize_weights_str_o == normalize_weights_str_att:
normalize_weights_name_str = "-normalize_weights-" + normalize_weights_str_att
else:
normalize_weights_name_str = ("-normalize_weights_att-" + normalize_weights_str_att +
"-normalize_weights_o-" + normalize_weights_str_o)
save_non_transient_str = f"-num_transient_steps-{num_transient_steps}"
folder_path = f"results_betas_ft/beta{beta}/"
# Create dir if it does not exist
if save_not_plot or save_context_cond:
create_dir(folder_path)
if save_context_cond:
cw = HT.get_context_window()
save_context(cw, folder_path, seed, num_transient_steps, max_sim_steps)
if show_title:
title = fr"$\beta$ = {round(beta, 5)}"
else:
title = None
# Select what statistic to show. One of either ["mo", "mo_se", "mv", "mq", "mk"]
stats_to_show = ["mo_se", "att"]
# Select format for image saving
# image_format = ".jpeg"
image_format = ".pdf"
# Loop over the different stats if required
for stat_name in stats_to_show:
show_1_feat = 0 # Defines that it's only going to show 1 feature and what's its index
plot_windows = [250, 350, 5000] # Different plotting windows for the trajectories
for plot_window in plot_windows:
offset = 0 # Offset the trajectory to visit different points
# Define the steps to show
plot_range = [offset, offset + plot_window] # Define the steps to plot
if plot_range[1] >= saved_steps:
raise Exception("The rightmost index is greater than the number of steps.")
rg = range(plot_range[0], plot_range[1])
# Define path to save
plot_save_path_traj = (folder_path + f"/traj-seed-{str(seed)}-{stat_name}" + "-transient_steps-" +
str(num_transient_steps) + "-max_sim_steps-" + str(max_sim_steps) +
"-plot_window-" + str(plot_window) + image_format)
create_dir(plot_save_path_traj)
# Plot the trajectory
plot_save_statistics(HT.mf_statistics[stat_name][rg, :], stat_name, num_feat_patterns,
len(rg), min_num_step=0,
show_max_num_patterns=num_feat_patterns,
save_not_plot=save_not_plot, save_path=plot_save_path_traj, title=title,
plot_hilbert=False, show_1_feat=show_1_feat)
# FFT Path
plot_save_path_fft = (folder_path + f"/fft-seed-{str(seed)}-{stat_name}" + "-transient_steps-" +
str(num_transient_steps) + "-max_sim_steps-" + str(max_sim_steps) + image_format)
# Adjust axis for the FFT if required
adjust_y_axis = 1.0
if beta == 1.266:
adjust_y_axis = 0.3
# Plot FFT
plot_save_fft(HT.mf_statistics[stat_name], stat_name, num_feat_patterns, saved_steps,
show_max_num_patterns=num_feat_patterns,
save_not_plot=save_not_plot, save_path=plot_save_path_fft, title=title,
show_1_feat=show_1_feat, adjust_y_axis=adjust_y_axis)
# Same for log FFT
# plot_save_path_fft_log = (folder_path + f"/log-fft-seed-{str(seed)}-{stat_name}" + "-transient_steps-" + str(num_transient_steps) + image_format)
# plot_save_fft(HT.mf_statistics[stat_name], stat_name, num_feat_patterns, saved_steps,
# show_max_num_patterns=num_feat_patterns, save_not_plot=save_not_plot,
# save_path=plot_save_path_fft_log, title=title, show_1_feat=show_1_feat, log=True)
# Same for the AutoCorrelation Function
plot_save_path_ACF = (folder_path + f"/acf-seed-{str(seed)}-{stat_name}" + "-transient_steps-" + str(
num_transient_steps) + "-max_sim_steps-" + str(max_sim_steps) + image_format)
plot_save_autocorrelation(HT.mf_statistics[stat_name], stat_name, num_feat_patterns, saved_steps,
show_max_num_patterns=num_feat_patterns, save_not_plot=save_not_plot,
save_path=plot_save_path_ACF, title=title, show_1_feat=show_1_feat)
print("Done.")
# Define the statistics you want to plot against each other
# In this case the feature mo with only the semantic information
stats_to_plot_list = [[["mo_se"], ["mo_se"]], [["att"], ["att"]]]
for stats_to_plot in stats_to_plot_list:
# Define the index of the features you want to compare against each other
feat_idx = [[0], [1]]
# Define path for saving the plane
plot_save_path_plane = (
folder_path + f"/plane-seed-{str(seed)}" + "-transient_steps-" + str(num_transient_steps) +
"-max_sim_steps-" + str(max_sim_steps) + image_format)
# Set larger dots for the periodic trajectory
larger_dots = False
if beta == 1.27:
larger_dots = True
# Load statistics
stat_results_beta_list_0 = [HT.mf_statistics[stats_to_plot[feat_idx[0][0]][0]]]
stat_results_beta_list_1 = [HT.mf_statistics[stats_to_plot[feat_idx[1][0]][0]]]
# Plot plane
plot_save_plane(stat_results_beta_list_0,
stat_results_beta_list_1, max_sim_steps - num_transient_steps, feat_idx,
tag_names=stats_to_plot, save_path=plot_save_path_plane, save_not_plot=save_not_plot,
title=title, larger_dots=larger_dots)
lowres_lya = False
image_format_lya = image_format
if lowres_lya:
image_format_lya = ".jpg"
if compute_lyapunov:
# Reorder in descending order, filter out components associated to Positional Encoding rotations (last components)
sorted_S = np.sort(HT.S[:HT.num_feat_patterns * HT.context_size])[::-1]
print("Sorted Lyapunov exponents in descencing order", sorted_S)
plot_save_path_lya = (
folder_path + f"/lyapunov-{str(seed)}" + "-transient_steps-" + str(
num_transient_steps) + "-max_sim_steps-" + str(max_sim_steps) + image_format_lya)
# Plot lyapunov related statistics
plot_lyapunov_graphs(HT.S_i_sum, HT.num_feat_patterns, HT.pe_bit_size, context_size, beta,
save_not_plot=save_not_plot, save_path=plot_save_path_lya)
print("Inf flag")
print(HT.S_inf_flag)