-
Notifications
You must be signed in to change notification settings - Fork 0
/
preproc.py
135 lines (85 loc) · 2.92 KB
/
preproc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
#!/usr/bin/env python
# coding: utf-8
# In[1]:
import pandas as pd
from fastai.text import *
from fastai.callbacks import *
from sklearn.model_selection import train_test_split
from pathlib import Path, PurePosixPath
import pickle as pkl
'''from pynvml import *
#learn.destroy()
torch.cuda.empty_cache()
nvmlInit()
handle = nvmlDeviceGetHandleByIndex(0)
info = nvmlDeviceGetMemoryInfo(handle)
print("Total memory:", info.total/1000000000)
print("Free memory:", info.free/1000000000)
print("Used memory:", info.used/1000000000)'''
PATH = Path('.')
## Training data
data_lm = TextLMDataBunch.from_csv(
PATH, 'train10%.csv',
text_cols='title',
label_cols='category',
valid_pct=0.05,
max_vocab=100000, bs=256
)
#data_lm.save('data_lm_exportmin.pkl')
data_lm.save('data_lm10%.pkl')
data_lm = load_data(PATH, 'data_lm10%.pkl', bs=64)
data_lm.vocab
len(data_lm.vocab.itos)
# LM Train
'''learn = language_model_learner(data_lm, AWD_LSTM, drop_mult=0.05, pretrained=True)
print('Finding learning rate...')
learn.lr_find()
learn.recorder.plot(skip_end=20, skip_start=40, suggestion=True)
#If finetuning an existing model, we must first train the head
learn.freeze()
learn.fit_one_cycle(1, 2e-2, moms=(0.8,0.7))
#If finetuning an existing model, we must then unfreeze the model and train it completely
learn.unfreeze()
learn.fit_one_cycle(
2, 3e-3, callbacks=[SaveModelCallback(learn, every='epoch', monitor='accuracy', name='lm')]
)'''
# Classification
data_clas = TextClasDataBunch.from_csv(
PATH, 'train10%.csv',
bs=1024,
vocab = data_lm.vocab,
text_cols='title',
label_cols='category'
)
data_clas.save('data_clas10%.pkl')
data_clas = load_data(PATH,'data_clas10%.pkl', bs=1792)
data_clas.batch_size
len(data_clas.vocab.itos)
#data_clas.vocab = data_lm.vocab
#learn.destroy()
data_clas.show_batch(rows=10)
## Classifier Training
learn = text_classifier_learner(data_clas, AWD_LSTM, drop_mult=0.05, bptt=12, path=PATH)
learn.load_encoder('lm_enc')
learn.freeze()
learn.lr_find()
learn.recorder.plot(skip_end=10, skip_start=15, suggestion=True)
learn.fit_one_cycle(1, 1e-1, moms=(0.8, 0.7))
learn.save('clas-head-10%')
learn = learn.load('clas-head-10%')
learn.data.batch_size
learn.lr_find(start_lr=1e-6, end_lr=2e-1)
learn.recorder.plot(suggestion=True)
learn.freeze_to(-2)
learn.fit_one_cycle(2, slice(7e-3/(2.6**4), 7e-3), moms=(0.8, 0.7))
learn.save('clas-head2-10%')
learn = learn.load('clas-head2-10%')
learn.unfreeze()
learn.data.batch_size
learn.lr_find(start_lr=1e-6, end_lr=2e-1)
learn.recorder.plot(suggestion=True)
learn.fit_one_cycle(
2, slice(7e-4/(2.6**4), 7e-4), moms=(0.8, 0.7)
, callbacks=[SaveModelCallback(learn, every='epoch', monitor='accuracy', name='class_fit_full2')]
)
learn.save('clas-full2')