-
Notifications
You must be signed in to change notification settings - Fork 0
/
sentenceDependencies.py
74 lines (58 loc) · 2.48 KB
/
sentenceDependencies.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Dec 12 13:49:31 2017
@author: diana
"""
import numpy as np
# creates an object from headsArr from MST and labels list from model
def createSentenceDependencies(sentenceInWords, sentenceInTags, headsArr, labels):
assert len(sentenceInWords) == headsArr.size == len(labels), "Length mismatch: {0} words, {1} heads, {2} labels".format(len(sentenceInWords), headsArr.size, len(labels))
sentenceDep = SentenceDependencies()
for i, w in enumerate(sentenceInWords):
sentenceDep.addToken(Token(index=i+1, word=w, POSTag=sentenceInTags[i], head=headsArr[i], label=labels[i]))
return sentenceDep
class Token:
def __init__(self, index, word, POSTag, head, label):
self.index = index
self.word = word
self.POSTag = POSTag
self.head = head
self.label = label
def __str__(self):
return "Index: {0}, Word: \"{1}\", POSTag: {2}, Head: {3}, Label: {4}".format(self.index, self.word, self.POSTag, self.head, self.label)
class SentenceDependencies:
def __init__(self):
self.tokens = {}
self.sentenceInWords = []
self.sentenceInTags = []
def addToken(self, token):
self.tokens[token.index] = token
def __str__(self):
strList = []
for k in self.tokens:
strList.append(str(self.tokens[k]))
return '\n'.join(strList)
def getSentenceInWordsAndInTags(self):
if len(self.sentenceInWords) > 0:
assert len(self.sentenceInTags) > 0
return self.sentenceInWords, self.sentenceInTags
assert len(self.tokens) > 0
for k,v in self.tokens.items():
self.sentenceInWords.append(v.word)
self.sentenceInTags.append(v.POSTag)
return self.sentenceInWords, self.sentenceInTags
def getHeadsForWords(self):
# numpy arr where value i is the head for word i
sentenceLength = len(self.tokens)
arr = np.zeros(sentenceLength, dtype=int) # account for the root, first element is 0
for k, v in self.tokens.items():
arr[k] = v.head
return arr
def getLabelsForWords(self, l2i):
# numpy arr where value i is the label index for word i
sentenceLength = len(self.tokens)
arr = np.zeros(sentenceLength)
for k, v in self.tokens.items():
arr[k] = l2i[v.label]
return arr