-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlinesplit.py
executable file
·69 lines (55 loc) · 1.83 KB
/
linesplit.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
"""
This class takes a line in our format '...foo...bar...' and
splits it into all combinations of adjacent groupings.
examples:
foo.bar -> (foo., .bar, foo.bar)
..foo..bar..biz.. -> (..foo.., ..bar.., ..biz..,
..foo..bar.., ..bar..biz..,
..foo..bar..biz..)
"""
import itertools
import re
def _grouper(iter, n):
for offset in range(n):
for index, item in enumerate(iter):
if index+offset+1 <= len(iter):
yield iter[index:index+offset+1]
def _shave(lhs, mid, rhs):
"""
Shave off the leading and trailing .'s of words that are
adjacent to other words. This is so that the word finder
doesn't create words that run together.
examples:
(..raj, ..dar.., bar..) -> (..raj., .dar., .bar..)
"""
if lhs:
assert mid[0] == '.'
lhs = lhs + '.'
mid = mid[1:]
if rhs:
assert mid[-1] == '.'
mid = mid[:-1]
rhs = '.' + rhs
return (lhs, mid, rhs)
def split(text):
"""
Split the text (format: ...bar..foo) into combinations
of adjacent groupings
"""
groups = list(re.finditer('[.]*[^.]+', text))
groupings = _grouper(groups, len(groups))
for grouping in groupings:
lhs = grouping[0]
rhs = grouping[-1]
lhs_text = text[:lhs.start()]
grp_text = text[lhs.start():rhs.end()]
rhs_text = text[rhs.end():]
# Now -- add the trailing ...'s to the grp
grp = re.match('^[.]+', rhs_text)
if grp:
grp_text = grp_text + rhs_text[grp.start():grp.end()]
rhs_text = rhs_text[grp.end():]
lhs_text, grp_text, rhs_text = _shave(lhs_text, grp_text, rhs_text)
yield (lhs_text, grp_text, rhs_text)
if __name__ == '__main__':
print list(linesplit('..hong..kong..phoey..'))