forked from hclhkbu/gtopkssgd
-
Notifications
You must be signed in to change notification settings - Fork 0
/
compression.py
94 lines (78 loc) · 2.88 KB
/
compression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
# -*- coding: utf-8 -*-
from __future__ import print_function
import torch
import numpy as np
import time
class NoneCompressor():
@staticmethod
def compress(tensor, name=None):
return tensor, tensor.dtype
@staticmethod
def decompress(tensor, ctc, name=None):
z = tensor
return z
class TopKCompressor():
"""
Sparse Communication for Distributed Gradient Descent, Alham Fikri Aji et al., 2017
"""
residuals = {}
c = 0
sparsities = []
t = 0.
zero_conditions = {}
values = {}
indexes = {}
name = 'topk'
@staticmethod
def compress(tensor, name=None, sigma_scale=2.5, ratio=0.05):
start = time.time()
with torch.no_grad():
if name not in TopKCompressor.residuals:
TopKCompressor.residuals[name] = torch.zeros_like(tensor.data)
# top-k solution
numel = tensor.numel()
k = max(int(numel * ratio), 1)
tensor.data.add_(TopKCompressor.residuals[name].data)
values, indexes = torch.topk(torch.abs(tensor.data), k=k)
values = tensor.data[indexes]
if name not in TopKCompressor.zero_conditions:
TopKCompressor.zero_conditions[name] = torch.ones(numel, dtype=torch.float32, device=tensor.device)
zero_condition = TopKCompressor.zero_conditions[name]
zero_condition.fill_(1.0)
zero_condition[indexes] = 0.0
TopKCompressor.residuals[name].data.fill_(0.)
TopKCompressor.residuals[name].data = tensor.data * zero_condition
tensor.data.sub_(TopKCompressor.residuals[name].data)
TopKCompressor.values[name] = values
TopKCompressor.indexes[name] = indexes
return tensor, indexes
@staticmethod
def get_residuals(name, like_tensor):
if name not in TopKCompressor.residuals:
TopKCompressor.residuals[name] = torch.zeros_like(like_tensor.data)
return TopKCompressor.residuals[name]
@staticmethod
def add_residuals(included_indexes, name):
with torch.no_grad():
residuals = TopKCompressor.residuals[name]
if type(included_indexes) is np.ndarray:
indexes_t = torch.from_numpy(included_indexes).cuda(residuals.device).long()
else:
indexes_t = included_indexes
values = TopKCompressor.values[name]
values.data[indexes_t] = 0.0
residuals.data[TopKCompressor.indexes[name]] += values.data
@staticmethod
def decompress(tensor, ctc, name=None):
z = tensor
return z
class TopKCompressor2(TopKCompressor):
name = 'topk2'
class gTopKCompressor(TopKCompressor):
name = 'gtopk'
compressors = {
'topk': TopKCompressor,
'topk2': TopKCompressor2,
'gtopk': gTopKCompressor,
'none': NoneCompressor
}