-
Notifications
You must be signed in to change notification settings - Fork 2
/
NERTagger.py
34 lines (31 loc) · 1.04 KB
/
NERTagger.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import nltk
from nltk.corpus import wordnet
person_list = []
person_names=person_list
def get_human_names(text):
tokens = nltk.tokenize.word_tokenize(text)
pos = nltk.pos_tag(tokens)
sentt = nltk.ne_chunk(pos, binary = False)
person = []
name = ""
for subtree in sentt.subtrees(filter=lambda t: t.label() == 'PERSON'):
for leaf in subtree.leaves():
person.append(leaf[0])
if len(person) > 1: #avoid grabbing lone surnames
for part in person:
name += part + ' '
if name[:-1] not in person_list:
person_list.append(name[:-1])
name = ''
person = []
# print (person_list)
def get_names(text):
get_human_names(text)
for person in person_list:
person_split = person.split(" ")
for name in person_split:
if wordnet.synsets(name):
if(name in person):
person_names.remove(person)
break
return person_names