-
Notifications
You must be signed in to change notification settings - Fork 0
/
add-contextual-features.py
78 lines (67 loc) · 1.96 KB
/
add-contextual-features.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#!/usr/bin/env python
"""
An example for part-of-speech tagging.
Copyright 2010,2011 Naoaki Okazaki.
"""
# Separator of field values.
separator = ' '
# Field names of the input data.
fields = 'w num cap sym p1 p2 p3 p4 s1 s2 s3 s4 y'
# Feature template. This template is identical to the one bundled in CRF++
# distribution, but written in a Python object.
templates = (
(('num', 0), ),
(('cap', 0), ),
(('sym', 0), ),
(('p1', 0), ),
(('p2', 0), ),
(('p3', 0), ),
(('p4', 0), ),
(('s1', 0), ),
(('s2', 0), ),
(('s3', 0), ),
(('s4', 0), ),
(('w', 0), ),
(('w', -1), ),
(('w', 1), ),
(('w', -2), ),
(('w', 2), ),
(('w', -2), ('w', -1)),
(('w', -1), ('w', 0)),
(('w', 0), ('w', 1)),
(('w', 1), ('w', 2)),
(('w', -2), ('w', -1), ('w', 0)),
(('w', -1), ('w', 0), ('w', 1)),
(('w', 0), ('w', 1), ('w', 2)),
(('w', -2), ('w', -1), ('w', 0), ('w', 1)),
(('w', -1), ('w', 0), ('w', 1), ('w', 2)),
(('w', -2), ('w', -1), ('w', 0), ('w', 1), ('w', 2)),
(('w', 0), ('w', -1)),
(('w', 0), ('w', -2)),
(('w', 0), ('w', -3)),
(('w', 0), ('w', -4)),
(('w', 0), ('w', -5)),
(('w', 0), ('w', -6)),
(('w', 0), ('w', -7)),
(('w', 0), ('w', -8)),
(('w', 0), ('w', -9)),
(('w', 0), ('w', 1)),
(('w', 0), ('w', 2)),
(('w', 0), ('w', 3)),
(('w', 0), ('w', 4)),
(('w', 0), ('w', 5)),
(('w', 0), ('w', 6)),
(('w', 0), ('w', 7)),
(('w', 0), ('w', 8)),
(('w', 0), ('w', 9)),
)
import crfutils
def feature_extractor(X):
# Apply feature templates to obtain features (in fact, attributes)
crfutils.apply_templates(X, templates)
if X:
# Append BOS and EOS features manually
X[0]['F'].append('__BOS__') # BOS feature
X[-1]['F'].append('__EOS__') # EOS feature
if __name__ == '__main__':
crfutils.main(feature_extractor, fields=fields, sep=separator)