-
Notifications
You must be signed in to change notification settings - Fork 0
/
simulation.py
405 lines (312 loc) · 12.5 KB
/
simulation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
import random
import networkx as nx
from tabulate import tabulate
import statistics
from multiprocessing import Pool
import sys
# Sets random seed
random.seed(16)
# Tracker to maintain unique ids
id_iterator = 0
# How many periods to run simulation
# Must be greater than max(frequencies)
# All elements of frequencies should be
# (approximately) factors in order to avoid
# having leftovers.
#
# Has to be +1 so modular arithmetic works
RUN_LEN = 351
# How many times to run each parameterization
SAMPLE_SIZE = 10
# How large should the pool be at the start
START_SIZE = 0
class Exchange:
def __init__(self, start_size, inflow, expiry_rate, frequency):
# For tracking economy status
self.patients = nx.Graph()
self.critical_patients = set()
# Record parameters of simulation
self.start_size = start_size
self.inflow = inflow
self.expiry_rate = expiry_rate
self.frequency = frequency
# For tracking economy's performance
self.matches = []
self.expiries = []
self.sizes = []
self.ages = []
self.probs = []
def add_patients(self, num_patients):
# Ensures all nodes have unique id
global id_iterator
for i in range(num_patients):
# Creates a node with a personalized probability prob
# Immediately increments iterator to ensure id is not reused
new_id = id_iterator
id_iterator = id_iterator + 1
# Create node for new patient
new_prob = random.random()
self.patients.add_node(new_id, prob=new_prob, age=0)
# For each existing node, test whether they match
# Test for each node up to the newest
for patient in self.patients.nodes(data=True):
# Skip if you're looking at the new node
if patient[0] == new_id:
continue
# Get probability from existing node
old_prob = patient[1]['prob']
# Test whether there's a match, and if so create an edge
match_chance = old_prob * new_prob
if random.random() < match_chance:
# Starts with weight of zero since not currently
# connected to critical node
self.patients.add_edge(patient[0], new_id, weight=0)
return self.patients.nodes()
def activate_critical(self):
for patient in list(self.get_patients()):
if random.random() < self.expiry_rate:
# Node has become critical
self.critical_patients.add(patient)
def run_match(self):
critical_edges = set()
# For each critical patient...
for patient in self.critical_patients:
# Get list of edges connected to node...
edges = self.patients.edges(patient, data=True)
for edge in edges:
# Add to set of edges we care about
critical_edges.add((edge[0], edge[1]))
patient_1 = edge[0]
patient_2 = edge[1]
# Boost weight by one since at least one connected node is critical
self.patients[patient_1][patient_2]['weight'] = self.patients[patient_1][patient_2]['weight'] + 1
# Generate graph composed only of critical pieces
# If we don't do this, zero-value edges may be part of
# maximal matching
critical_subgraph = self.patients.edge_subgraph(critical_edges)
# Find the maximal matching now that we've weighted the edges
max_match = nx.algorithms.max_weight_matching(critical_subgraph)
# Record how many matches we made
matches = 2 * len(max_match)
# Get node attribute data
# (There's definitely a better way to do this,
# but I can't get it to work)
node_ages = nx.get_node_attributes(self.patients, "age")
node_probs = nx.get_node_attributes(self.patients, "prob")
# Now remove all matched patients
for edge in max_match:
for patient in edge:
# Remove it from the unmatched critical patients
# if it was a critical patient
if patient in self.critical_patients:
self.critical_patients.remove(patient)
self.add_age(node_ages[patient])
self.add_prob(node_probs[patient])
# Remove the node
self.patients.remove_node(patient)
# Any unmatched critical patients expire
expiries = len(self.critical_patients)
for patient in list(self.critical_patients):
self.patients.remove_node(patient)
self.critical_patients.remove(patient)
# Save data to tracker
self.add_expiries(expiries)
self.add_matches(matches)
self.add_size(self.patients.order())
def age_patients(self):
# Age all remaining patients
age_dict = {}
for patient in self.get_patients():
new_age = nx.classes.function.get_node_attributes(self.patients, "age")[patient] + 1
age_dict[patient] = new_age
nx.classes.function.set_node_attributes(self.patients, age_dict, "age")
def dump_garbage(self):
# When we return the exchange, we don't need all the bulky
# graph data anymore
self.patients = 0
self.critical_patients = 0
# Myriad getters/setters for tracking
def get_patients(self):
return self.patients.nodes()
def add_matches(self, matches):
self.matches.append(matches)
def add_expiries(self, expiries):
self.expiries.append(expiries)
def add_size(self, sizes):
self.sizes.append(sizes)
def add_age(self, age):
self.ages.append(age)
def add_prob(self, difficulty):
self.probs.append(difficulty)
def get_start_size(self):
return self.start_size
def get_inflow(self):
return self.inflow
def get_expiry_rate(self):
return self.expiry_rate
def get_frequency(self):
return self.frequency
def get_matches(self):
return self.matches
def get_expiries(self):
return self.expiries
def get_sizes(self):
return self.sizes
def get_ages(self):
return self.ages
def get_probs(self):
return self.probs
class Simulation:
def __init__(self, parameterization, results):
self.start_size = parameterization[0]
self.inflow = parameterization[1]
self.expiry_rate = parameterization[2]
self.frequency = parameterization[3]
self.results = results
# Concats ages
self.ages = []
for result in results:
self.ages = self.ages + result.get_ages()
# Concats probs
self.probs = []
for result in results:
self.probs = self.probs + result.get_probs()
def get_inflow(self):
return self.inflow
def get_expiry_rate(self):
return self.expiry_rate
def get_frequency(self):
return self.frequency
def get_avg_matches(self):
return statistics.mean([sum(result.get_matches()) for result in self.results])
def get_sd_matches(self):
return statistics.stdev([sum(result.get_matches()) for result in self.results])
def get_avg_age(self):
return statistics.mean(self.ages)
def get_sd_age(self):
return statistics.stdev(self.ages)
def get_avg_prob(self):
return statistics.mean(self.probs)
def get_sd_prob(self):
return statistics.stdev(self.probs)
def take_sample(parameterization):
# Unpack the parameterization
# Note sample_size is only there so we can use
# the same tuple as passed to run_sim
start_size, inflow, expiry_rate, frequency, sample_size = parameterization
# Initialize exchange
ex = Exchange(start_size, inflow, expiry_rate, frequency)
# Add starting pool
ex.add_patients(start_size)
# Run the simulation
# Currently runs for a year
for i in range(1, RUN_LEN):
# If inflow is the total number of patients,
# inflow//frequency is the number of patients
# that arrive each period
ex.add_patients(inflow)
# Each patient in the pool has a chance of becoming critical
ex.activate_critical()
# If a patient expires at
# rate expiry_rate in a day, they expire
# at rate expiry_rate * 365 in a year
if not i % frequency:
# Matched patients are removed
ex.run_match()
# Age all remaining patients
ex.age_patients()
# Dump elements we don't need anymore
ex.dump_garbage()
return ex
def print_table(values, sds, inflows, exp_rates, frequencies):
for exp_rate in exp_rates:
table = []
for inflow in inflows:
table.append([inflow] + [str(values[freq][inflow][exp_rate]) for freq in frequencies])
table.append(["(SD)"] + ["(" + str(sds[freq][inflow][exp_rate]) + ")" for freq in frequencies])
output = tabulate(table, headers=["Inflow\\Frequency"] + [str(freq) for freq in frequencies])
print("Expiry rate==" + str(exp_rate))
print(output)
def table_dict(frequencies, inflows):
"""
Used to store results of simulations in a way
that is easy to convert to tabular form
"""
freq_tables = {}
for freq in frequencies:
freq_tables[freq] = {}
for inflow in inflows:
freq_tables[freq][inflow] = {}
return freq_tables
def run_sim(parameterization):
print("Running parameterization: " + str(parameterization), flush=True)
sample_size = parameterization[4]
with Pool() as pool:
results = pool.map(take_sample, [parameterization] * sample_size)
return Simulation(parameterization, results)
def main():
# How many samples of each parameterization do we want?
sample_size = SAMPLE_SIZE
# Boost the maximum recursion depth to prevent crashes
sys.setrecursionlimit(10000)
# How many times more slowly does the "slow" match run?
frequencies = [
1, # Daily
7, # Weekly
30, # Monthly
87, # Quarterly
350 # Yearly
]
# What is your chance of criticality each period?
exp_rates = [
.1, # Low
.5, # Med
.9 # High
]
# How many new patients arrive each period?
inflows = [
3, # Slow
10, # Med
25 # Fast
]
# The list of possible combinations
# Will contain tuples of the form (start_size, inflow_exp_rate, freq)
# which will get passed as the arguments of run_sim
parameterizations = []
# Generate a tuple for each combination of the above
for freq in frequencies:
for exp_rate in exp_rates:
for inflow in inflows:
parameterizations.append((START_SIZE, inflow, exp_rate, freq, sample_size))
# Run the simulations
simulations = []
for parameterization in parameterizations:
simulations.append(run_sim(parameterization))
# Pulls and saves the results of each simulation to the dict match_tables, then
# outputs it in tabular format
# Print match count table
match_values = table_dict(frequencies, inflows)
match_sds = table_dict(frequencies, inflows)
for sim in simulations:
match_values[sim.get_frequency()][sim.get_inflow()][sim.get_expiry_rate()] = sim.get_avg_matches()
match_sds[sim.get_frequency()][sim.get_inflow()][sim.get_expiry_rate()] = sim.get_sd_matches()
print("Matches:")
print_table(match_values, match_sds, inflows, exp_rates, frequencies)
# Print average matched age table
age_values = table_dict(frequencies, inflows)
age_sds = table_dict(frequencies, inflows)
for sim in simulations:
age_values[sim.get_frequency()][sim.get_inflow()][sim.get_expiry_rate()] = sim.get_avg_age()
age_sds[sim.get_frequency()][sim.get_inflow()][sim.get_expiry_rate()] = sim.get_sd_age()
print("Ages:")
print_table(age_values, age_sds, inflows, exp_rates, frequencies)
# Print average matched prob table
prob_values = table_dict(frequencies, inflows)
prob_sds = table_dict(frequencies, inflows)
for sim in simulations:
prob_values[sim.get_frequency()][sim.get_inflow()][sim.get_expiry_rate()] = sim.get_avg_prob()
prob_sds[sim.get_frequency()][sim.get_inflow()][sim.get_expiry_rate()] = sim.get_sd_prob()
print("Probs:")
print_table(prob_values, prob_sds, inflows, exp_rates, frequencies)
if __name__ == "__main__":
main()