-
Notifications
You must be signed in to change notification settings - Fork 115
/
run-all-experiments.py
154 lines (117 loc) · 6.1 KB
/
run-all-experiments.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
import networkx as nx
import matplotlib.pyplot as plt
import pandas as pd
import link_prediction_scores as lp
import pickle
import os
NUM_REPEATS = 10
RANDOM_SEED = 0
FRAC_EDGES_HIDDEN = [0.25, 0.5, 0.75]
### ---------- Load in FB Graphs ---------- ###
FB_EGO_USERS = [0, 107, 1684, 1912, 3437, 348, 3980, 414, 686, 698]
fb_graphs = {} # Dictionary to store all FB ego network graphs
# Read in each FB Ego graph
# Store graphs in dictionary as (adj, features) tuples
for user in FB_EGO_USERS:
network_dir = './fb-processed/{0}-adj-feat.pkl'.format(user)
with open(network_dir, 'rb') as f:
adj, features = pickle.load(f)
# Store in dict
fb_graphs[user] = (adj, features)
# Read in combined FB graph
combined_dir = './fb-processed/combined-adj-sparsefeat.pkl'
with open(combined_dir, 'rb') as f:
adj, features = pickle.load(f)
fb_graphs['combined'] = (adj, features)
### ---------- Create Random NetworkX Graphs ---------- ###
# Dictionary to store all nx graphs
nx_graphs = {}
# Small graphs
N_SMALL = 200
nx_graphs['er-small'] = nx.erdos_renyi_graph(n=N_SMALL, p=.02, seed=RANDOM_SEED) # Erdos-Renyi
nx_graphs['ws-small'] = nx.watts_strogatz_graph(n=N_SMALL, k=5, p=.1, seed=RANDOM_SEED) # Watts-Strogatz
nx_graphs['ba-small'] = nx.barabasi_albert_graph(n=N_SMALL, m=2, seed=RANDOM_SEED) # Barabasi-Albert
nx_graphs['pc-small'] = nx.powerlaw_cluster_graph(n=N_SMALL, m=2, p=.02, seed=RANDOM_SEED) # Powerlaw Cluster
nx_graphs['sbm-small'] = nx.random_partition_graph(sizes=[N_SMALL/10]*10, p_in=.1, p_out=.01, seed=RANDOM_SEED) # Stochastic Block Model
# Larger graphs
N_LARGE = 1000
nx_graphs['er-large'] = nx.erdos_renyi_graph(n=N_LARGE, p=.01, seed=RANDOM_SEED) # Erdos-Renyi
nx_graphs['ws-large'] = nx.watts_strogatz_graph(n=N_LARGE, k=3, p=.1, seed=RANDOM_SEED) # Watts-Strogatz
nx_graphs['ba-large'] = nx.barabasi_albert_graph(n=N_LARGE, m=2, seed=RANDOM_SEED) # Barabasi-Albert
nx_graphs['pc-large'] = nx.powerlaw_cluster_graph(n=N_LARGE, m=2, p=.02, seed=RANDOM_SEED) # Powerlaw Cluster
nx_graphs['sbm-large'] = nx.random_partition_graph(sizes=[N_LARGE/10]*10, p_in=.05, p_out=.005, seed=RANDOM_SEED) # Stochastic Block Model
# Remove isolates from random graphs
for g_name, nx_g in nx_graphs.iteritems():
isolates = nx.isolates(nx_g)
if len(isolates) > 0:
for isolate_node in isolates:
nx_graphs[g_name].remove_node(isolate_node)
### ---------- Run Link Prediction Tests ---------- ###
for i in range(NUM_REPEATS):
## ---------- FACEBOOK ---------- ###
fb_results = {}
# Check existing experiment results, increment file number by 1
past_results = os.listdir('./results/')
experiment_num = 0
experiment_file_name = 'fb-experiment-{}-results.pkl'.format(experiment_num)
while (experiment_file_name in past_results):
experiment_num += 1
experiment_file_name = 'fb-experiment-{}-results.pkl'.format(experiment_num)
FB_RESULTS_DIR = './results/' + experiment_file_name
TRAIN_TEST_SPLITS_FOLDER = './train-test-splits/'
# Iterate over fractions of edges to hide
for frac_hidden in FRAC_EDGES_HIDDEN:
val_frac = 0.1
test_frac = frac_hidden - val_frac
# Iterate over each graph
for g_name, graph_tuple in fb_graphs.iteritems():
adj = graph_tuple[0]
feat = graph_tuple[1]
experiment_name = 'fb-{}-{}-hidden'.format(g_name, frac_hidden)
print "Current experiment: ", experiment_name
# # TODO: remove this!
# if experiment_name !='fb-combined-0.25-hidden' and \
# experiment_name != 'fb-combined-0.5-hidden' and \
# experiment_name != 'fb-combined-0.75-hidden':
# continue
train_test_split_file = TRAIN_TEST_SPLITS_FOLDER + experiment_name + '.pkl'
# Run all link prediction methods on current graph, store results
fb_results[experiment_name] = lp.calculate_all_scores(adj, feat, \
test_frac=test_frac, val_frac=val_frac, \
random_state=RANDOM_SEED, verbose=2,
train_test_split_file=train_test_split_file)
# Save experiment results at each iteration
with open(FB_RESULTS_DIR, 'wb') as f:
pickle.dump(fb_results, f, protocol=2)
# Save final experiment results
with open(FB_RESULTS_DIR, 'wb') as f:
pickle.dump(fb_results, f, protocol=2)
### ---------- NETWORKX ---------- ###
nx_results = {}
# Check existing experiment results, increment file number by 1
past_results = os.listdir('./results/')
experiment_num = 0
experiment_file_name = 'nx-experiment-{}-results.pkl'.format(experiment_num)
while (experiment_file_name in past_results):
experiment_num += 1
experiment_file_name = 'nx-experiment-{}-results.pkl'.format(experiment_num)
NX_RESULTS_DIR = './results/' + experiment_file_name
# Iterate over fractions of edges to hide
for frac_hidden in FRAC_EDGES_HIDDEN:
val_frac = 0.1
test_frac = frac_hidden - val_frac
# Iterate over each graph
for g_name, nx_g in nx_graphs.iteritems():
adj = nx.adjacency_matrix(nx_g)
experiment_name = 'nx-{}-{}-hidden'.format(g_name, frac_hidden)
print "Current experiment: ", experiment_name
# Run all link prediction methods on current graph, store results
nx_results[experiment_name] = lp.calculate_all_scores(adj, \
test_frac=test_frac, val_frac=val_frac, \
random_state=RANDOM_SEED, verbose=0)
# Save experiment results at each iteration
with open(NX_RESULTS_DIR, 'wb') as f:
pickle.dump(nx_results, f, protocol=2)
# Save final experiment results
with open(NX_RESULTS_DIR, 'wb') as f:
pickle.dump(nx_results, f, protocol=2)