-
Notifications
You must be signed in to change notification settings - Fork 0
/
pretrain.py
115 lines (87 loc) · 3.04 KB
/
pretrain.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import numpy as np
import h5py
import pandas as pd
import scipy.sparse as sp
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.parameter import Parameter
from torch.utils.data import DataLoader
from torch.optim import Adam, SGD
from torch.nn import Linear
from torch.utils.data import Dataset
from sklearn.cluster import KMeans
from census_process import *
# from evaluation import eva
#torch.cuda.set_device(3)
class AE(nn.Module):
def __init__(self, n_enc_1, n_enc_2, n_enc_3, n_dec_1, n_dec_2, n_dec_3,
n_input, n_z):
super(AE, self).__init__()
self.enc_1 = Linear(n_input, n_enc_1)
self.enc_2 = Linear(n_enc_1, n_enc_2)
self.enc_3 = Linear(n_enc_2, n_enc_3)
self.z_layer = Linear(n_enc_3, n_z)
self.dec_1 = Linear(n_z, n_dec_1)
self.dec_2 = Linear(n_dec_1, n_dec_2)
self.dec_3 = Linear(n_dec_2, n_dec_3)
self.x_bar_layer = Linear(n_dec_3, n_input)
def forward(self, x):
enc_h1 = F.relu(self.enc_1(x))
enc_h2 = F.relu(self.enc_2(enc_h1))
enc_h3 = F.relu(self.enc_3(enc_h2))
z = self.z_layer(enc_h3)
dec_h1 = F.relu(self.dec_1(z))
dec_h2 = F.relu(self.dec_2(dec_h1))
dec_h3 = F.relu(self.dec_3(dec_h2))
x_bar = self.x_bar_layer(dec_h3)
return x_bar, z
class LoadDataset(Dataset):
def __init__(self, data):
self.x = data
def __len__(self):
return self.x.shape[0]
def __getitem__(self, idx):
return torch.from_numpy(np.array(self.x[idx])).float(), \
torch.from_numpy(np.array(idx))
def adjust_learning_rate(optimizer, epoch):
lr = 0.001 * (0.1 ** (epoch // 20))
for param_group in optimizer.param_groups:
param_group['lr'] = lr
def pretrain_ae(model, dataset):
train_loader = DataLoader(dataset, batch_size=256, shuffle=True)
print(model)
optimizer = Adam(model.parameters(), lr=1e-3)
for epoch in range(30):
# adjust_learning_rate(optimizer, epoch)
for batch_idx, (x, _) in enumerate(train_loader):
x = x.cuda()
x_bar, _ = model(x)
loss = F.mse_loss(x_bar, x)
optimizer.zero_grad()
loss.backward()
optimizer.step()
with torch.no_grad():
x = torch.Tensor(dataset.x).cuda().float()
x_bar, z = model(x)
loss = F.mse_loss(x_bar, x)
print('{} loss: {}'.format(epoch, loss))
kmeans = KMeans(n_clusters=4, n_init=20).fit(z.data.cpu().numpy())
# eva(y, kmeans.labels_, epoch)
torch.save(model.state_dict(), 'pretrain/ae_pretrain_gcn.pkl')
# x, n_input = concat_df('data/census21_lon_lsoa/')
x = np.load('pretrain/att_data.npy')
n_input = x.shape[1]
# x = np.eye(4994)
# n_input = 4994
model = AE(
n_enc_1=500,
n_enc_2=500,
n_enc_3=2000,
n_dec_1=2000,
n_dec_2=500,
n_dec_3=500,
n_input=n_input,
n_z=10,).cuda()
dataset = LoadDataset(x)
pretrain_ae(model, dataset)