-
Notifications
You must be signed in to change notification settings - Fork 20
/
Copy pathscrape-wordnet.py
90 lines (63 loc) · 2.11 KB
/
scrape-wordnet.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
from nltk.corpus import wordnet as wn
from collections import defaultdict
def meronyms():
for x in wn.all_synsets():
if not x.part_meronyms(): continue
# what are all the different (meronyms) parts that x is made of?
lengths = defaultdict(lambda: defaultdict(list))
#parts = x.substance_meronyms()
parts = x.part_meronyms()
for y in parts:
# if there is already a synonym
for yy in y.lemma_names():
lengths[len(yy)][y].append(yy)
if any(len(z) > 1 for z in lengths.values()):
print()
print(f'> {x} has {len(parts)} parts.')
for n in lengths:
if len(lengths[n]) > 1:
yyy = []
for y in lengths[n]:
yyy.append('/'.join(yy for yy in lengths[n][y]))
print(f'{n}: {", ".join(yyy)}')
#meronyms()
def hypernyms():
V = set()
for X in wn.all_synsets():
for Y in X.hypernyms():
for x in X.lemma_names():
for y in Y.lemma_names():
if len(x) == len(y):
V.add(tuple(sorted([x, y])))
for x, y in sorted(V, key=lambda z: (len(z[0]), z)):
print(x, y)
print()
print(f'found {len(V)} pairs')
def antonyms():
V = set()
for syn in wn.all_synsets():
for l in syn.lemmas():
x = l.name()
for Y in l.antonyms():
y = Y.name()
if len(x) == len(y):
V.add(tuple(sorted([x, y])))
for x, y in sorted(V, key=lambda z: (len(z[0]), z)):
print(x, y)
print()
print(f'found {len(V)} pairs')
#hypernyms()
#antonyms()
def syns(x):
for sense in wn.synsets(x):
for lemma in sense.lemmas():
x = lemma.name()
yield x
def suggest(x, y):
for xx in sorted(set(syns(x)), key=lambda xx: (len(xx), x)):
for yy in sorted(set(syns(y))):
if len(xx) == len(yy):
print(xx, yy)
#suggest('vertices', 'edges')
#suggest('start', 'stop')
suggest('grow', 'shrink')