-
Notifications
You must be signed in to change notification settings - Fork 0
/
ner_train.py
62 lines (47 loc) · 2.02 KB
/
ner_train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
'''# -*- coding: utf-8 -*-
"""Copy of ner_train
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1-MGWmywMEVKzrGkfHk667cJWANyr4Ity
"""
already_changed_dir = False
!pip install transformers
!pip install tensorboardx
!pip install seqeval
!pip install simpletransformers
from google.colab import drive
drive.mount('/content/drive')
import os
if not already_changed_dir:
path = 'drive/My Drive/LINKS/'
os.chdir(path)
already_changed_dir = True
cwd = os.getcwd()
print(cwd)
#!cd simpletransformers; pip install . #install local simpletransformers lib
# lib dir 'root/usr/local/lib/python3.6/dist_packages/simpletransformers'
'''
from simpletransformers.ner.ner_model import NERModel
# Create a NERModel
model = NERModel('bert', 'bert-base-multilingual-uncased',
args={'classification_report': True,
'output_dir': 'output_balanced/',
'overwrite_output_dir': True,
'do_lower_case': True,
'num_train_epochs': 5,
'fp16': False,
'train_batch_size': 32,
'eval_batch_size': 32,
'logging_steps': 2741,
'save_steps': 2741,
'evaluate_during_training': True},
cuda_device=1)
# Train the model
model.train_model('multilang_conll_balanced/mlang.conll.train', eval_df='multilang_conll_balanced/mlang.conll.val')
model = NERModel('bert', 'output_balanced/best_model/', args={'classification_report': True, 'overwrite_output_dir': True, 'do_lower_case': True, 'num_train_epochs': 5, 'fp16': False, 'train_batch_size': 32, 'eval_batch_size': 32, 'logging_steps': 2741, 'save_steps': 2741, 'evaluate_during_training': True})
result = model.eval_model('data/MLANG/val.txt')
result = model.eval_model('data/MLANG/test.txt')
result = model.eval_model('data/it/val.txt')
result = model.eval_model('data/it/test.txt')
result = model.eval_model('data/en/val.txt')
result = model.eval_model('data/en/test.txt')