-
Notifications
You must be signed in to change notification settings - Fork 0
/
comp-theme.py
126 lines (104 loc) · 7.13 KB
/
comp-theme.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import pickle
import time
from gensim import models
import argparse
from constraints.ConstraintContainsSyllables import ConstraintContainsSyllables
from constraints.ConstraintPhraseRhymesWith import ConstraintPhraseRhymesWith
from constraints.ConstraintSimilarSemanticMeaning import ConstraintSimilarSemanticMeaning
from constraints.ConstraintMatchesPoetryScheme import ConstraintMatchesPoetryScheme
from utility.Utility import *
from utility.print import *
from utility.ChimpSentenceGenerator import *
from models.CHiMP import *
def load_model(file_to_load):
with open(file_to_load, "rb") as handle:
model = pickle.load(handle)
return model
def process_comp_limerick_themes(length, model, output_file, num_sentences_to_try: int, theme: str, word2vec, threshhold):
rhyme_a = "back"
rhyme_b = "beat"
# Constraints -------------------------------------------------------------
hidden_constraints = []
observed_constraints = []
for _ in range(length):
observed_constraints.append(None)
hidden_constraints.append(None)
theme_constraint = ConstraintSimilarSemanticMeaning(
theme=theme,
similarity_threshhold=threshhold,
model=word2vec,
verbose=False
)
# hidden_constraints[0] = [ConstraintIsPartOfSpeech("NNP", True)]
# TODO - Double check the question marks
observed_constraints[0] = [ConstraintContainsSyllables(1), ConstraintMatchesPoetryScheme(".", None, 1, min_syllables=1), theme_constraint]
observed_constraints[1] = [ConstraintContainsSyllables(2), ConstraintMatchesPoetryScheme("1.", None, 2, min_syllables=2)]
observed_constraints[2] = [ConstraintContainsSyllables(2), ConstraintMatchesPoetryScheme(".1", None, 2, min_syllables=2)]
observed_constraints[3] = [ConstraintContainsSyllables(2), ConstraintMatchesPoetryScheme("..", None, 1, min_syllables=1)]
observed_constraints[4] = [ConstraintPhraseRhymesWith(word=rhyme_a, position_of_rhyme=-1, must_rhyme=True),
ConstraintContainsSyllables(1),
ConstraintMatchesPoetryScheme("1", None, 1, min_syllables=1)
]
observed_constraints[5] = [ConstraintContainsSyllables(1), ConstraintMatchesPoetryScheme(".", None, 1, min_syllables=1)]
observed_constraints[6] = [ConstraintContainsSyllables(2), ConstraintMatchesPoetryScheme("1.", None, 2, min_syllables=2)]
observed_constraints[7] = [ConstraintContainsSyllables(2), ConstraintMatchesPoetryScheme(".1", None, 2, min_syllables=2)]
observed_constraints[8] = [ConstraintContainsSyllables(2), ConstraintMatchesPoetryScheme("..", None, 1, min_syllables=1)]
observed_constraints[9] = [ConstraintPhraseRhymesWith(word=rhyme_a, position_of_rhyme=-1, must_rhyme=True),
ConstraintContainsSyllables(1),
ConstraintMatchesPoetryScheme("1", None, 1, min_syllables=1)
]
observed_constraints[10] = [ConstraintContainsSyllables(1), ConstraintMatchesPoetryScheme(".", None, 1, min_syllables=1)]
observed_constraints[11] = [ConstraintContainsSyllables(1), ConstraintMatchesPoetryScheme("1", None, 1, min_syllables=1)]
observed_constraints[12] = [ConstraintContainsSyllables(1), ConstraintMatchesPoetryScheme(".", None, 1, min_syllables=1)]
observed_constraints[13] = [ConstraintContainsSyllables(1), ConstraintMatchesPoetryScheme(".", None, 1, min_syllables=1)]
observed_constraints[14] = [ConstraintPhraseRhymesWith(word=rhyme_b, position_of_rhyme=-1, must_rhyme=True),
ConstraintContainsSyllables(1),
ConstraintMatchesPoetryScheme("1", None, 1, min_syllables=1)
]
observed_constraints[15] = [ConstraintContainsSyllables(1), ConstraintMatchesPoetryScheme(".", None, 1, min_syllables=1)]
observed_constraints[16] = [ConstraintContainsSyllables(1), ConstraintMatchesPoetryScheme("1", None, 1, min_syllables=1)]
observed_constraints[17] = [ConstraintContainsSyllables(1), ConstraintMatchesPoetryScheme(".", None, 1, min_syllables=1)]
observed_constraints[18] = [ConstraintContainsSyllables(1), ConstraintMatchesPoetryScheme(".", None, 1, min_syllables=1)]
observed_constraints[19] = [ConstraintMatchesString(rhyme_b),
ConstraintContainsSyllables(1),
ConstraintMatchesPoetryScheme("1", None, 1, min_syllables=1)
]
observed_constraints[20] = [ConstraintContainsSyllables(1), ConstraintMatchesPoetryScheme(".", None, 1, min_syllables=1)]
observed_constraints[21] = [ConstraintContainsSyllables(2), ConstraintMatchesPoetryScheme("1.", None, 2, min_syllables=2)]
observed_constraints[22] = [ConstraintContainsSyllables(2), ConstraintMatchesPoetryScheme(".1", None, 2, min_syllables=2)]
observed_constraints[23] = [ConstraintContainsSyllables(2), ConstraintMatchesPoetryScheme("..", None, 1, min_syllables=1)]
observed_constraints[24] = [ConstraintPhraseRhymesWith(word=rhyme_a, position_of_rhyme=-1, must_rhyme=True),
ConstraintContainsSyllables(1),
ConstraintMatchesPoetryScheme("1", None, 1, min_syllables=1)
]
# Start -------------------------------------------------------------
# print("CoMP - Limerick - Themes")
sentence_output_file = f"output/comp-theme-{theme}-{threshhold}-prod.txt"
# sentence_output_file = None
startTime = time.time()
NHHMM = ConstrainedHiddenMarkovProcess(length, model, hidden_constraints, observed_constraints)
NHHMM.process()
sentence_generator = ChimpSentenceGenerator(NHHMM, length)
sentences = sentence_generator.count_all_sentences(num_try=num_sentences_to_try, sentence_output_file=sentence_output_file)
executionTime = (time.time() - startTime)
# print(f"CoMP Finished in {str(executionTime)} seconds with theme: {theme} and threshhold: {threshhold}.")
print(f"CoMP Finished. Seconds: {str(executionTime)}. Theme: {theme}. Threshhold: {threshhold}. Number of sentences: {sentences}.\n", file=open(output_file, "a"))
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Process some integers.')
parser.add_argument('--theme', help='the theme')
parser.add_argument('--threshhold', help='threshhold percentage')
args = parser.parse_args()
theme = args.theme
threshhold = float(args.threshhold)
if theme == "" or threshhold == "":
quit(1)
# This is only for the long run -
text_file_name = "2016_fic.txt"
pickle_file = f"pickle_files/{text_file_name}_hmm.pickle"
model = load_model(pickle_file)
output_file = f"logs/comp-themes-{theme}.txt"
# word2vec = models.KeyedVectors.load_word2vec_format('/home/biggbs/gensim-data/glove-twitter-25/glove-twitter-25')
word2vec = models.KeyedVectors.load_word2vec_format('/Users/biggbs/gensim-data/glove-twitter-25/glove-twitter-25')
# num_sentences_to_try = 10000
num_sentences_to_try = 5
process_comp_limerick_themes(25, model, output_file, num_sentences_to_try, theme, word2vec, threshhold)