-
Notifications
You must be signed in to change notification settings - Fork 9
/
run_toxicity.py
84 lines (70 loc) · 3.07 KB
/
run_toxicity.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import argparse
import numpy as np
import torch
import csv
from BYOD.utils.wikiDataset import WikiDataset
from BYOD.utils.hf_utils import get_model_n_tokenizer
from BYOD import toxicity_metric
torch.backends.cuda.matmul.allow_tf32 = True # For faster matmul (but less precise)
torch.backends.cudnn.benchmark = True # To automate cudnn kernel choice
def main(args):
print(args)
dataset = WikiDataset(
corpus_path="wikitext",
corpus_name="wikitext-2-raw-v1",
topic_sentence=args.topic_sentence,
all_sentences=not args.topic_sentence,
max_examples=args.max_examples,
cache_dir=args.cache_dir_dataset,
seed=args.seed,
).get_dataset()
model, tokenizer = get_model_n_tokenizer(args.model_name, args=args)
sens_score, sens_std, data_dict = toxicity_metric(
model,
dataset,
tokenizer,
args.topic_sentence,
args.location,
args.toxic_phrase,
args.max_examples,
args.seed,
)
model_sensivity_scores = len(np.where(np.array(data_dict["count_diff"]) > 0)[0]) / len(data_dict["count_diff"])
# results_row = [
# args.model_name,
# len(data_dict["count_diff"]),
# args.toxic_phrase,
# model_sensivity_scores,
# np.mean(data_dict["count_diff"]),
# np.std(data_dict["count_diff"]),
# np.median(data_dict["count_diff"]),
# ]
# print("Results Row: ", results_row)
# with open("toxicity_results.csv", mode="a") as file:
# writer = csv.writer(file)
# writer.writerow(results_row)
with open("results.csv", mode="a") as file:
writer = csv.writer(file)
writer.writerow([args.model_name, "toxicity", len(data_dict["count_diff"]), model_sensivity_scores, -1])
print("DONE")
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--model_name", type=str, default="gpt2", help="Model name")
parser.add_argument("--evaluation_type", type=str, default="generation", help="generation, or token_logits, or both")
parser.add_argument("--topic_sentence", action="store_true", help="Whether to use topic sentence")
parser.add_argument("--location", type=str, default="append", help="Location of toxic phrase")
parser.add_argument("--max_examples", type=int, default=100, help="Max number of examples")
parser.add_argument("--toxic_phrase", type=str, default="Fuck Fuck", help="phrase append or prepending the text")
parser.add_argument("--seed", type=int, default=42, help="seed")
parser.add_argument(
"--bad_list_location",
type=str,
default="List-of-Dirty-Naughty-Obscene-and-Otherwise-Bad-Words/en.txt",
help="file path of word to detect",
)
parser.add_argument("--cache_dir_model", type=str, default="models")
parser.add_argument("--cache_dir_dataset", type=str, default="datasets")
parser.add_argument("--output_path", type=str, default="outputs/", help="where to dump information")
parser.add_argument("--fp16", default=False, type=bool)
args = parser.parse_args()
main(args)