-
Notifications
You must be signed in to change notification settings - Fork 8
/
phrasecombine.py
50 lines (44 loc) · 1.3 KB
/
phrasecombine.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import sys
class PhraseCombiner:
def __init__(self, dictfile, char=''):
self.pfdic = {}
self.char = char
with open(dictfile, 'r') as f:
for ln in f:
word = ln.strip()
for index in range(len(word) - 1):
if word[:index+1] not in self.pfdic:
self.pfdic[word[:index + 1]] = 0
self.pfdic[word] = 1
def combine(self, tokens):
N = len(tokens)
pos = 0
res = []
while pos < N:
i = pos
frag = tokens[pos]
maxph = None
maxpos = 0
while i < N and frag in self.pfdic:
if self.pfdic[frag]:
maxph = frag
maxpos = i
i += 1
frag = self.char.join(tokens[pos:i+1])
if maxph is None:
maxph = tokens[pos]
pos += 1
else:
pos = maxpos + 1
res.append(maxph)
return res
def main(dictfile):
pc = PhraseCombiner(dictfile, '')
for ln in sys.stdin:
tks = ln.strip().split()
print(' '.join(pc.combine(tks)))
return 0
if __name__ == '__main__':
sys.exit(main(sys.argv[1]))