forked from SilverSolver/RobustATD
-
Notifications
You must be signed in to change notification settings - Fork 0
/
fit_eraser_probing_tasks.py
executable file
·134 lines (96 loc) · 3.89 KB
/
fit_eraser_probing_tasks.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
from pathlib import Path
import numpy as np
import torch
from tqdm.auto import tqdm
from transformers import RobertaTokenizer, RobertaModel, AutoTokenizer, AutoModel
import os
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelBinarizer
from concept_erasure import LeaceEraser
import matplotlib as mpl
import pickle
from sklearn.model_selection import train_test_split
task_names = ["bigram_shift",
"coordination_inversion",
"obj_number",
"odd_man_out",
"past_present",
"sentence_length",
"subj_number",
"top_constituents",
"tree_depth",
"word_content"]
model_name = 'roberta'
if model_name == 'roberta':
model_path = 'roberta-base'
tokenizer = RobertaTokenizer.from_pretrained(model_path)
model = RobertaModel.from_pretrained(model_path, output_attentions=False).cuda()
elif model_name == 'phi2':
MODEL_NAME = "microsoft/phi-2"
model = AutoModel.from_pretrained(MODEL_NAME,
torch_dtype="auto",
trust_remote_code=True
).cuda()
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
elif model_name == 'bert':
model_path = 'bert-base-uncased'
tokenizer_path = model_path
tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)
model = AutoModel.from_pretrained(model_path, output_attentions=False).cuda()
def tokenize(text):
return tokenizer(text, max_length=512, truncation = True, return_tensors='pt')
def loadFile(fpath):
task_data = {'train': {'X': [], 'y': []},
'dev': {'X': [], 'y': []},
'test': {'X': [], 'y': []}}
tok2split = {'tr': 'train', 'va': 'dev', 'te': 'test'}
with open(fpath, 'r', encoding='utf-8') as f:
for line in f:
line = line.rstrip().split('\t')
#task_data[tok2split[line[0]]]['X'].append(line[-1].split())
task_data[tok2split[line[0]]]['X'].append(line[-1])
task_data[tok2split[line[0]]]['y'].append(line[1])
labels = sorted(np.unique(task_data['train']['y']))
tok2label = dict(zip(labels, range(len(labels))))
nclasses = len(tok2label)
for split in task_data:
for i, y in enumerate(task_data[split]['y']):
task_data[split]['y'][i] = tok2label[y]
return task_data
def get_embeddings(model, data):
embs = []
device = 'cuda'
for i in tqdm(range(len(data))):
text = data[i]
inputs = tokenize(text)['input_ids'].to(device)
with torch.no_grad():
out = model(inputs)
embeddings = out.last_hidden_state.detach().cpu().numpy().squeeze()
embs.append(embeddings.mean(axis = 0))
embs = np.vstack(embs)
return embs
acc_dict = {}
for i, task_name in enumerate(task_names):
print(f"{i}/{len(task_names)}: {task_name}")
task_data = loadFile(f"data/probing/{task_name}.txt")
texts = task_data['train']['X']
Y = task_data['train']['y']
#texts, Y, _, _ = train_test_split(texts, Y, stratify=Y, train_size=1000)
X = get_embeddings(model, texts).astype(float)
Y = task_data['train']['y']#[:1000]
print(X.shape)
lb = LabelBinarizer().fit(Y)
Y_t = lb.transform(Y)
X_t = torch.from_numpy(X)
Y_t = torch.from_numpy(Y_t)
eraser = LeaceEraser.fit(X_t, Y_t)
with open(f'erasers/{task_name}_{model_name}.pkl', 'wb') as f:
pickle.dump(eraser, f)
clf = LogisticRegression(max_iter=10000)
clf.fit(X, Y)
print(f'Accuracy before erasure: {clf.score(X,Y)}')
X_ = eraser(X_t)
clf.fit(X_.numpy(), Y)
print(f'Accuracy after erasure: {clf.score(X_.numpy(),Y)}')
print(f"Number of classes: {Y_t.shape[1]}")