Skip to content

Commit

Permalink
fix ner
Browse files Browse the repository at this point in the history
  • Loading branch information
ablodge committed Jul 8, 2020
1 parent 3b12898 commit f3e8920
Showing 1 changed file with 6 additions and 12 deletions.
18 changes: 6 additions & 12 deletions parser/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,18 +21,12 @@ def main():
ner = []
pos = []
for sent in doc.sentences:
entity_types = [e.type for e in sent.entities]
entity_ids = [[w.id for w in e.words] for e in sent.entities]
for word in sent.words:
tokens.append(word.text)
lemmas.append(word.lemma)
pos.append(word.pos)
entity_type = 'O'
for span, type in zip(entity_ids, entity_types):
if word.id in span:
entity_type = type
break
ner.append(entity_type)
for token in sent.tokens:
for word in token.words:
tokens.append(word.text)
lemmas.append(word.lemma)
pos.append(word.pos)
ner.append(token.ner)
amr['lemmas'] = lemmas
amr['pos'] = pos
amr['tokens'] = tokens
Expand Down

0 comments on commit f3e8920

Please sign in to comment.