-
Notifications
You must be signed in to change notification settings - Fork 47
/
fl_devices.py
158 lines (110 loc) · 5.43 KB
/
fl_devices.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
import random
import torch
from torch.utils.data import DataLoader
import numpy as np
from sklearn.cluster import AgglomerativeClustering
device = "cuda" if torch.cuda.is_available() else "cpu"
def train_op(model, loader, optimizer, epochs=1):
model.train()
for ep in range(epochs):
running_loss, samples = 0.0, 0
for x, y in loader:
x, y = x.to(device), y.to(device)
optimizer.zero_grad()
loss = torch.nn.CrossEntropyLoss()(model(x), y)
running_loss += loss.item()*y.shape[0]
samples += y.shape[0]
loss.backward()
optimizer.step()
return running_loss / samples
def eval_op(model, loader):
model.train()
samples, correct = 0, 0
with torch.no_grad():
for i, (x, y) in enumerate(loader):
x, y = x.to(device), y.to(device)
y_ = model(x)
_, predicted = torch.max(y_.data, 1)
samples += y.shape[0]
correct += (predicted == y).sum().item()
return correct/samples
def copy(target, source):
for name in target:
target[name].data = source[name].data.clone()
def subtract_(target, minuend, subtrahend):
for name in target:
target[name].data = minuend[name].data.clone()-subtrahend[name].data.clone()
def reduce_add_average(targets, sources):
for target in targets:
for name in target:
tmp = torch.mean(torch.stack([source[name].data for source in sources]), dim=0).clone()
target[name].data += tmp
def flatten(source):
return torch.cat([value.flatten() for value in source.values()])
def pairwise_angles(sources):
angles = torch.zeros([len(sources), len(sources)])
for i, source1 in enumerate(sources):
for j, source2 in enumerate(sources):
s1 = flatten(source1)
s2 = flatten(source2)
angles[i,j] = torch.sum(s1*s2)/(torch.norm(s1)*torch.norm(s2)+1e-12)
return angles.numpy()
class FederatedTrainingDevice(object):
def __init__(self, model_fn, data):
self.model = model_fn().to(device)
self.data = data
self.W = {key : value for key, value in self.model.named_parameters()}
def evaluate(self, loader=None):
return eval_op(self.model, self.eval_loader if not loader else loader)
class Client(FederatedTrainingDevice):
def __init__(self, model_fn, optimizer_fn, data, idnum, batch_size=128, train_frac=0.8):
super().__init__(model_fn, data)
self.optimizer = optimizer_fn(self.model.parameters())
self.data = data
n_train = int(len(data)*train_frac)
n_eval = len(data) - n_train
data_train, data_eval = torch.utils.data.random_split(self.data, [n_train, n_eval])
self.train_loader = DataLoader(data_train, batch_size=batch_size, shuffle=True)
self.eval_loader = DataLoader(data_eval, batch_size=batch_size, shuffle=False)
self.id = idnum
self.dW = {key : torch.zeros_like(value) for key, value in self.model.named_parameters()}
self.W_old = {key : torch.zeros_like(value) for key, value in self.model.named_parameters()}
def synchronize_with_server(self, server):
copy(target=self.W, source=server.W)
def compute_weight_update(self, epochs=1, loader=None):
copy(target=self.W_old, source=self.W)
self.optimizer.param_groups[0]["lr"]*=0.99
train_stats = train_op(self.model, self.train_loader if not loader else loader, self.optimizer, epochs)
subtract_(target=self.dW, minuend=self.W, subtrahend=self.W_old)
return train_stats
def reset(self):
copy(target=self.W, source=self.W_old)
class Server(FederatedTrainingDevice):
def __init__(self, model_fn, data):
super().__init__(model_fn, data)
self.loader = DataLoader(self.data, batch_size=128, shuffle=False)
self.model_cache = []
def select_clients(self, clients, frac=1.0):
return random.sample(clients, int(len(clients)*frac))
def aggregate_weight_updates(self, clients):
reduce_add_average(target=self.W, sources=[client.dW for client in clients])
def compute_pairwise_similarities(self, clients):
return pairwise_angles([client.dW for client in clients])
def cluster_clients(self, S):
clustering = AgglomerativeClustering(affinity="precomputed", linkage="complete").fit(-S)
c1 = np.argwhere(clustering.labels_ == 0).flatten()
c2 = np.argwhere(clustering.labels_ == 1).flatten()
return c1, c2
def aggregate_clusterwise(self, client_clusters):
for cluster in client_clusters:
reduce_add_average(targets=[client.W for client in cluster],
sources=[client.dW for client in cluster])
def compute_max_update_norm(self, cluster):
return np.max([torch.norm(flatten(client.dW)).item() for client in cluster])
def compute_mean_update_norm(self, cluster):
return torch.norm(torch.mean(torch.stack([flatten(client.dW) for client in cluster]),
dim=0)).item()
def cache_model(self, idcs, params, accuracies):
self.model_cache += [(idcs,
{name : params[name].data.clone() for name in params},
[accuracies[i] for i in idcs])]