-
Notifications
You must be signed in to change notification settings - Fork 0
/
node_factory.py
153 lines (122 loc) · 4.65 KB
/
node_factory.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
from lxml import etree as et
from utils import is_empty_string
# ############ RELATED #########################
def get_ns(tag):
'''Prefix tag with TEI namespace.'''
return r'{http://www.tei-c.org/ns/1.0}' + tag
# ################# MAIN #######################
def create_entry_parent_node(lemma):
'''Create entry parent node with its attributes.'''
entry_node = et.Element(get_ns("entry"))
entry_node.set('sortKey', f"{lemma}")
entry_node.set('{http://www.w3.org/XML/1998/namespace}id', f"LBR.{lemma}")
entry_node.set('{http://www.w3.org/XML/1998/namespace}lang', "la")
return entry_node
# ############ INPUT STYLE NODES ################
def create_bold_hi_node(text):
hi_node = et.Element(get_ns("hi"))
hi_node.set('rend', 'bold')
hi_node.text = text
return hi_node
def create_normal_hi_node(text):
hi_node = et.Element(get_ns("hi"))
hi_node.text = text
return hi_node
def create_italic_hi_node(text):
hi_node = et.Element(get_ns("hi"))
hi_node.set('rend', 'italic')
hi_node.text = text
return hi_node
# ############### MOSTLY MORPH ##################
def create_extra_morph(extra_morph):
extra = et.Element(get_ns("extraMorph"))
extra.text = extra_morph
return extra
def create_form_lemma_node(text):
form_lemma = et.Element(get_ns("form"))
form_lemma.set('type', 'lemma')
form_lemma.append(create_orth_node(text))
return form_lemma
def create_orth_node(text):
orth_node = et.Element(get_ns("orth"))
orth_node.text = text
return orth_node
def create_form_inflected_node(text):
form_inflected = et.Element(get_ns("form"))
form_inflected.set('type', 'inflected')
form_inflected.text = text
return form_inflected
def create_pc_node(text):
pc = et.Element(get_ns("pc"))
pc.text = text
return pc
def create_gram_grp(text, gram_type="pos"):
gram_prnt = et.Element(get_ns("gramGrp"))
gram_chld = et.Element(get_ns("gram"))
gram_chld.set('type', f'{gram_type}')
gram_chld.text = text
gram_prnt.append(gram_chld)
return gram_prnt
def create_praep_xml(content0, content1):
form_lemma = create_form_lemma_node(content0)
gram_grp = create_gram_grp('praep.', "pos")
colloc_node = et.Element(get_ns('gram'))
colloc_node.set('type', 'colloc')
colloc_node.text = content1.replace('praep.', '')
gram_grp.append(colloc_node)
return [form_lemma, gram_grp]
# ############### MOSTLY SENSE ##################
def create_sense_container_non_numbered(title_lemma):
sense_container = et.Element(get_ns("sense"))
sense_container.set('{http://www.w3.org/XML/1998/namespace}id', f"LBR.{title_lemma}.1")
return sense_container
def create_sense_container(title_lemma, sense_number=['1']):
sense_container = et.Element(get_ns("sense"))
xml_id_contents = title_lemma + '.' + ''.join(sense_number)
sense_container.set('{http://www.w3.org/XML/1998/namespace}id', f"LBR.{xml_id_contents}")
return sense_container
def create_label(label_content):
label = et.Element(get_ns("lbl"))
label.text = label_content
return label
def create_usg_node(node_content):
res = []
pc_text = None
if node_content.strip().endswith(':'):
idx_pc = node_content.index(':')
pc_text = node_content[idx_pc:]
node_content = node_content[:idx_pc]
usg_node = et.Element(get_ns("usg"))
usg_node.set('type', '???')
usg_node.text = node_content
res.append(usg_node)
if pc_text:
pc_node = create_pc_node(pc_text)
res.append(pc_node)
return res
def create_def_node(node_content):
result = []
for i in range(len(node_content.split(', '))):
x = node_content.split(', ')[i]
for y in range(len([z for z in x.split('; ') if not is_empty_string(z)])):
def_node = et.Element(get_ns("def"))
def_node.set('{http://www.w3.org/XML/1998/namespace}lang', 'bg')
def_node.text = x.split('; ')[y]
result.append(def_node)
if y < len(x.split('; '))-1:
result.append(create_pc_node('; '))
if i < len(node_content.split(', '))-1:
result.append(create_pc_node(', '))
return result
def assemble_cit_nodes(cit_type, quote_content):
if cit_type == 'translation':
cit_node = et.Element(get_ns("cit"))
cit_node.set('type', 'translation')
cit_node.set('{http://www.w3.org/XML/1998/namespace}lang', 'bg')
elif cit_type == 'example':
cit_node = et.Element(get_ns("cit"))
cit_node.set('type', 'example')
quote_node = et.Element(get_ns("quote"))
quote_node.text = quote_content
cit_node.append(quote_node)
return cit_node