From d9322f148fd9a0732991a787a813caeb7c542d8b Mon Sep 17 00:00:00 2001 From: Mansi Gupta <44631632+Mansi145@users.noreply.github.com> Date: Sun, 29 Mar 2020 21:17:11 +0530 Subject: [PATCH] Setup Travis CI_resolved (#98) * travis added * added linting to travis * updated unit testing * added script for unit testing * replaced unittest with pytest * linting fixed --- .travis.yml | 9 +++++ app.py | 8 +++-- chatbot.py | 42 +++++++++++------------ config.py | 4 +-- constants.py | 2 +- databaseconnect.py | 34 +++++++++---------- features.py | 60 ++++++++++++++++++--------------- featuresDump.py | 66 ++++++++++++++++++------------------- googleMapsApiModule.py | 62 ++++++++++++++++++---------------- init.py | 5 ++- test_googleMapsApiModule.py | 21 ++++++------ utilities.py | 16 ++++----- 12 files changed, 176 insertions(+), 153 deletions(-) create mode 100644 .travis.yml diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..d359248 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,9 @@ +language: python +python: + - "3.6" +cache: pip +install: + - pip install -r requirements.txt +script: + - flake8 . + - pytest diff --git a/app.py b/app.py index 15223aa..fa3a84b 100644 --- a/app.py +++ b/app.py @@ -3,15 +3,17 @@ app = Flask(__name__) + @app.route('/chatbot/', methods=['GET']) def chat(user_input): try: response = message_to_bot(user_input, clf, learn_response) - except: - return jsonify({'message': ('Unable to get response', learn_response)}, 500) - + except: # noqa: E722 + return jsonify({'message': ('Unable to get response', learn_response)}, 500) # noqa: E501 + return jsonify({'message': response}, 200) + if __name__ == '__main__': clf, learn_response = setup() app.run(debug=False) diff --git a/chatbot.py b/chatbot.py index 286e319..ac03469 100644 --- a/chatbot.py +++ b/chatbot.py @@ -1,6 +1,6 @@ import utilities import databaseconnect -import googleMapsApiModule +import googleMapsApiModule import logging import logger_config location_dict = {"origin": "null", "destination": "null"} @@ -29,23 +29,23 @@ def message_to_bot(H, clf, learn_response): learn_response = 3 return B, learn_response if learn_response == 3: - location_dict["destination"]=H - origin, destination = location_dict["origin"], location_dict["destination"] - googleMapsApiModule.direction(origin,destination) + location_dict["destination"] = H + origin, destination = location_dict["origin"], location_dict["destination"] # noqa: E501 + googleMapsApiModule.direction(origin, destination) B = "I will certainly help you with that." learn_response = 0 return B, learn_response - if "bye" in H.lower().split(" "): #check in words within H + if "bye" in H.lower().split(" "): # check in words within H B = "Bye! I'll miss you!" return B, learn_response # exit loop if not H: - B = "Please say something!" - return B, learn_response #empty input - # grammar parsing + B = "Please say something!" + return B, learn_response # empty input + # grammar parsing subj = set() obj = set() verb = set() - triples,root = utilities.parse_sentence(H) + triples, root = utilities.parse_sentence(H) triples = list(triples) for t in triples: if t[0][1][:2] == 'VB': @@ -55,7 +55,7 @@ def message_to_bot(H, clf, learn_response): subj.add(t[2][0]) if relation[-3:] == 'obj': obj.add(t[2][0]) - logging.debug("\t"+"Subject: "+str(subj)+"\n"+"\t"+"Object: "+str(obj)+"\n"+"\t"+"Topic: "+str(root)+"\n"+"\t"+"Verb: "+str(verb)) + logging.debug("\t"+"Subject: "+str(subj)+"\n"+"\t"+"Object: "+str(obj)+"\n"+"\t"+"Topic: "+str(root)+"\n"+"\t"+"Verb: "+str(verb)) # noqa: E501 subj = list(subj) obj = list(obj) verb = list(verb) @@ -67,33 +67,33 @@ def message_to_bot(H, clf, learn_response): proper_nouns.add(t[2][0]) proper_nouns == list(proper_nouns) logging.debug("\t"+"Proper Nouns: "+str(proper_nouns)) - #classification - classification = utilities.classify_sentence(clf,H) - #logging.debug(classification) + # classification + classification = utilities.classify_sentence(clf, H) + # logging.debug(classification) if learn_response == 0: databaseconnect.add_to_database(classification, subj, root, verb, H) if (classification == 'C'): B = databaseconnect.get_chat_response() elif (classification == 'Q'): - B, learn_response = databaseconnect.get_question_response(subj, root, verb) - if learn_response == 1 and (len(proper_nouns) == 0 or (len(proper_nouns) == 1 and H.split(" ", 1)[0] != "Where")): - databaseconnect.add_learnt_statement_to_database(subj, root, verb) - if learn_response == 1 and (len(proper_nouns) >= 2 or (len(proper_nouns) == 1 and H.split(" ", 1)[0] == "Where")): + B, learn_response = databaseconnect.get_question_response(subj, root, verb) # noqa: E501 + if learn_response == 1 and (len(proper_nouns) == 0 or (len(proper_nouns) == 1 and H.split(" ", 1)[0] != "Where")): # noqa: E501 + databaseconnect.add_learnt_statement_to_database(subj, root, verb) # noqa: E501 + if learn_response == 1 and (len(proper_nouns) >= 2 or (len(proper_nouns) == 1 and H.split(" ", 1)[0] == "Where")): # noqa: E501 learn_response = 0 B = "I will certainly help you with that." else: B = "Oops! I'm not trained for this yet." else: B, learn_response = databaseconnect.learn_question_response(H) - if (len(proper_nouns) >= 2 or (len(proper_nouns) >= 1 and H.split(" ", 1)[0] == "Where")) and len(subj) != 0: + if (len(proper_nouns) >= 2 or (len(proper_nouns) >= 1 and H.split(" ", 1)[0] == "Where")) and len(subj) != 0: # noqa: E501 if subj[0] == "distance": if len(proper_nouns) == 2: location_dict["origin"] = proper_nouns.pop() location_dict["destination"] = proper_nouns.pop() - origin, destination = location_dict["origin"], location_dict["destination"] - googleMapsApiModule.direction(origin,destination) + origin, destination = location_dict["origin"], location_dict["destination"] # noqa: E501 + googleMapsApiModule.direction(origin, destination) else: - B = "I didn't get that. Can you please give me the origin location?" + B = "I didn't get that. Can you please give me the origin location?" # noqa: E501 learn_response = 2 if len(proper_nouns) == 1: location = proper_nouns.pop() diff --git a/config.py b/config.py index c4b6629..87bf30a 100644 --- a/config.py +++ b/config.py @@ -6,7 +6,7 @@ else: load_dotenv('ENV/.env') -### MAKE SURE you have filled environment variables in `.env` files in `./ENV/` folder +''' MAKE SURE you have filled environment variables in `.env` files in `./ENV/` folder''' # noqa: E501 user = os.getenv("DB_USER") password = os.getenv("DB_PASSWORD") @@ -21,5 +21,5 @@ # your_path_to_stanford-corenlp-x.x.x-models.jar stanford_path_to_models_jar = os.getenv("STANFORD_PATH_TO_MODELS_JAR") -# for eg. 'C:\\Program\ Files\\Java\\jdk1.8.0_201\\bin\\java.exe' or '/usr/local/openjdk-11/bin/java' +# for eg. 'C:\\Program\ Files\\Java\\jdk1.8.0_201\\bin\\java.exe' or '/usr/local/openjdk-11/bin/java' # noqa: E501 javahome = os.getenv("JAVAHOME") diff --git a/constants.py b/constants.py index e8ed995..cf68d5e 100644 --- a/constants.py +++ b/constants.py @@ -1 +1 @@ -BASE_URL = {'direction': "https://www.google.com/maps/dir/?api=1", 'geocoding': "https://www.google.com/maps/search/?api=1&query"} +BASE_URL = {'direction': "https://www.google.com/maps/dir/?api=1", 'geocoding': "https://www.google.com/maps/search/?api=1&query"} # noqa: E501 diff --git a/databaseconnect.py b/databaseconnect.py index 78701ff..ff4de58 100644 --- a/databaseconnect.py +++ b/databaseconnect.py @@ -37,7 +37,7 @@ def connection_to_database(): # logging.debug("Connected") logging.debug('MySQL connected') return conn - except: + except: # noqa: E722 raise Exception("DATABASE NOT CONNECTED") @@ -46,10 +46,10 @@ def connection_to_database(): def setup_database(): db = connection_to_database() cur = db.cursor() - cur.execute("CREATE TABLE IF NOT EXISTS chat_table(id INTEGER PRIMARY KEY AUTO_INCREMENT, root_word VARCHAR(40), subject VARCHAR(40), verb VARCHAR(40), sentence VARCHAR(200))") - cur.execute("CREATE TABLE IF NOT EXISTS statement_table(id INTEGER PRIMARY KEY AUTO_INCREMENT, root_word VARCHAR(40), subject VARCHAR(40), verb VARCHAR(40), sentence VARCHAR(200))") - cur.execute("CREATE TABLE IF NOT EXISTS question_table(id INTEGER PRIMARY KEY AUTO_INCREMENT, root_word VARCHAR(40), subject VARCHAR(40), verb VARCHAR(40), sentence VARCHAR(200))") - cur.execute("CREATE TABLE IF NOT EXISTS directions_table(id INTEGER PRIMARY KEY AUTO_INCREMENT, origin_location VARCHAR(100), destination_location VARCHAR(100))") + cur.execute("CREATE TABLE IF NOT EXISTS chat_table(id INTEGER PRIMARY KEY AUTO_INCREMENT, root_word VARCHAR(40), subject VARCHAR(40), verb VARCHAR(40), sentence VARCHAR(200))") # noqa: E501 + cur.execute("CREATE TABLE IF NOT EXISTS statement_table(id INTEGER PRIMARY KEY AUTO_INCREMENT, root_word VARCHAR(40), subject VARCHAR(40), verb VARCHAR(40), sentence VARCHAR(200))") # noqa: E501 + cur.execute("CREATE TABLE IF NOT EXISTS question_table(id INTEGER PRIMARY KEY AUTO_INCREMENT, root_word VARCHAR(40), subject VARCHAR(40), verb VARCHAR(40), sentence VARCHAR(200))") # noqa: E501 + cur.execute("CREATE TABLE IF NOT EXISTS directions_table(id INTEGER PRIMARY KEY AUTO_INCREMENT, origin_location VARCHAR(100), destination_location VARCHAR(100))") # noqa: E501 @logger_config.logger @@ -59,7 +59,7 @@ def add_to_database(classification, subject, root, verb, H): cur = db.cursor() cur = db.cursor(buffered=True) if classification == 'C': - cur.execute(f"INSERT INTO chat_table(root_word,verb,sentence) VALUES ('{root}','{verb}','{H}')") + cur.execute(f"INSERT INTO chat_table(root_word,verb,sentence) VALUES ('{root}','{verb}','{H}')") # noqa: E501 db.commit() elif classification == 'Q': cur.execute("SELECT sentence FROM question_table") @@ -71,7 +71,7 @@ def add_to_database(classification, subject, root, verb, H): break if exist == 0: # do not add if question already exists - cur.execute(f"INSERT INTO question_table(subject,root_word,verb,sentence) VALUES ('{subject}','{root}','{verb}','{H}')") + cur.execute(f"INSERT INTO question_table(subject,root_word,verb,sentence) VALUES ('{subject}','{root}','{verb}','{H}')") # noqa: E501 db.commit() else: cur.execute("SELECT sentence FROM statement_table") @@ -82,7 +82,7 @@ def add_to_database(classification, subject, root, verb, H): exist = 1 break if exist == 0: # do not add if question already exists - cur.execute(f"INSERT INTO statement_table(subject,root_word,verb,sentence) VALUES ('{subject}','{root}','{verb}','{H}')") + cur.execute(f"INSERT INTO statement_table(subject,root_word,verb,sentence) VALUES ('{subject}','{root}','{verb}','{H}')") # noqa: E501 db.commit() @@ -116,7 +116,7 @@ def get_question_response(subject, root, verb): found = 1 break if found == 1: - cur.execute(f"SELECT sentence FROM statement_table WHERE verb='{verb}'") + cur.execute(f"SELECT sentence FROM statement_table WHERE verb='{verb}'") # noqa: E501 res = cur.fetchone() B = res[0] return B, 0 @@ -132,22 +132,22 @@ def get_question_response(subject, root, verb): found = 1 break if found == 1: - cur.execute(f"SELECT verb FROM statement_table WHERE subject='{subject}'") + cur.execute(f"SELECT verb FROM statement_table WHERE subject='{subject}'") # noqa: E501 res = cur.fetchone() - checkVerb = res[0] # checkVerb is a string while verb is a list. checkVerb ['verb'] + checkVerb = res[0] # checkVerb is a string while verb is a list. checkVerb ['verb'] # noqa: E501 if checkVerb == '[]': - cur.execute(f"SELECT sentence FROM statement_table WHERE subject='{subject}'") + cur.execute(f"SELECT sentence FROM statement_table WHERE subject='{subject}'") # noqa: E501 res = cur.fetchone() B = res[0] return B, 0 else: if checkVerb[2:-2] == verb[0]: - cur.execute(f"SELECT sentence FROM statement_table WHERE subject='{subject}'") + cur.execute(f"SELECT sentence FROM statement_table WHERE subject='{subject}'") # noqa: E501 res = cur.fetchone() B = res[0] return B, 0 else: - B = "Sorry I don't know the response to this. Please train me." + B = "Sorry I don't know the response to this. Please train me." # noqa: E501 return B, 1 else: B = "Sorry I don't know the response to this. Please train me." @@ -158,7 +158,7 @@ def get_question_response(subject, root, verb): def add_learnt_statement_to_database(subject, root, verb): db = connection_to_database() cur = db.cursor() - cur.execute(f"INSERT INTO statement_table(subject,root_word,verb) VALUES ('{subject}','{root}','{verb}')") + cur.execute(f"INSERT INTO statement_table(subject,root_word,verb) VALUES ('{subject}','{root}','{verb}')") # noqa: E501 db.commit() @@ -169,7 +169,7 @@ def learn_question_response(H): cur.execute("SELECT id FROM statement_table ORDER BY id DESC") res = cur.fetchone() last_id = res[0] - cur.execute(f"UPDATE statement_table SET sentence='{H}' WHERE id={last_id}") + cur.execute(f"UPDATE statement_table SET sentence='{H}' WHERE id={last_id}") # noqa: E501 db.commit() B = "Thank you! I have learnt this." return B, 0 @@ -185,7 +185,7 @@ def clear_table(table_name): for table in tables_to_be_cleaned: describe_table(cur, table) - if input("Enter 'Y' to confirm cleaning of BOTH tables: ") in ("Y", "y"): + if input("Enter 'Y' to confirm cleaning of BOTH tables: ") in ("Y", "y"): # noqa: E501 for table in tables_to_be_cleaned: cur.execute(f"DELETE FROM {table}") db.commit() diff --git a/features.py b/features.py index 903cd5e..22007aa 100644 --- a/features.py +++ b/features.py @@ -16,7 +16,7 @@ lemma = nltk.wordnet.WordNetLemmatizer() sno = nltk.stem.SnowballStemmer('english') -line = ["xxx", "Oracle 12.2 will be released for on-premises users on 15 March 2017", 0, "S"] +line = ["xxx", "Oracle 12.2 will be released for on-premises users on 15 March 2017", 0, "S"] # noqa: E501 pos = [] # list of PartsOfSpeech output = "" # comma separated string @@ -70,7 +70,8 @@ 'VB-VBN', 'VBZ-NNP'] -# Because python dict's return key-vals in random order, provide ordered list to pass to ML models +'''Because python dict's return key-vals in random order, provide ordered + list to pass to ML models''' feature_keys = ["id", "wordCount", "stemmedCount", @@ -98,8 +99,8 @@ @logger_config.logger def strip_sentence(sentence): sentence = sentence.strip(",") - sentence = ''.join(filter(lambda x: x in string.printable, sentence)) # strip out non-alpha-numerix - sentence = sentence.translate(str.maketrans('', '', string.punctuation)) # strip punctuation + sentence = ''.join(filter(lambda x: x in string.printable, sentence)) # strip out non-alpha-numerix # noqa: E501 + sentence = sentence.translate(str.maketrans('', '', string.punctuation)) # strip punctuation # noqa: E501 return(sentence) @@ -184,7 +185,7 @@ def exists_stemmed_end_NN(stemmed): @logger_config.logger -# Go through the predefined list of start-tuples, 1 / 0 if given startTuple occurs in the list +# Go through the predefined list of start-tuples, 1 / 0 if given startTuple occurs in the list # noqa: E501 def exists_startTuple(startTuple): exists_startTuples = [] for tstring in startTuples: # startTuples defined as global var @@ -196,7 +197,7 @@ def exists_startTuple(startTuple): @logger_config.logger -# Go through the predefined list of end-tuples, 1 / 0 if given Tuple occurs in the list +# Go through the predefined list of end-tuples, 1 / 0 if given Tuple occurs in the list # noqa: E501 def exists_endTuple(endTuple): exists_endTuples = [] for tstring in endTuples: # endTuples defined as global var @@ -208,7 +209,7 @@ def exists_endTuple(endTuple): @logger_config.logger -# loop round list of triples and construct a list of binary 1/0 vals if triples occur in list +# loop round list of triples and construct a list of binary 1/0 vals if triples occur in list # noqa: E501 def exists_triples(triples, tripleSet): exists = [] for tstring in tripleSet: @@ -223,12 +224,12 @@ def exists_triples(triples, tripleSet): # Get a sentence and spit out the POS triples def get_triples(pos): list_of_triple_strings = [] - pos = [i[1] for i in pos] # extract the 2nd element of the POS tuples in list + pos = [i[1] for i in pos] # extract the 2nd element of the POS tuples in list # noqa: E501 n = len(pos) if n > 2: # need to have three items for i in range(0, n-2): - t = "-".join(pos[i:i+3]) # pull out 3 list item from counter, convert to string + t = "-".join(pos[i:i+3]) # pull out 3 list item from counter, convert to string # noqa: E501 list_of_triple_strings.append(t) return list_of_triple_strings @@ -238,7 +239,7 @@ def get_first_last_tuples(sentence): first_last_tuples = [] sentenceParsed = word_tokenize(sentence) pos = nltk.pos_tag(sentenceParsed) # Parts Of Speech - pos = [i[1] for i in pos] # extract the 2nd element of the POS tuples in list + pos = [i[1] for i in pos] # extract the 2nd element of the POS tuples in list # noqa: E501 n = len(pos) first = "" @@ -255,9 +256,11 @@ def get_first_last_tuples(sentence): @logger_config.logger def lemmatize(sentence): """ - pass in a sentence as a string, return just core text that has been "lematised" - stop words are removed - could effect ability to detect if this is a question or answer - - depends on import lemma = nltk.wordnet.WordNetLemmatizer() and from nltk.corpus import stopwords + pass in a sentence as a string, return just core text that has + been "lematised" stop words are removed - could effect ability to detect if + this is a question or answer - depends on import + lemma = nltk.wordnet.WordNetLemmatizer() + and from nltk.corpus import stopwords """ stop_words = set(stopwords.words('english')) word_tokens = word_tokenize(sentence) @@ -277,8 +280,10 @@ def lemmatize(sentence): def stematize(sentence): """ pass in a sentence as a string, return just core text stemmed - stop words are removed - could effect ability to detect if this is a question or answer - - depends on import sno = nltk.stem.SnowballStemmer('english') and from nltk.corpus import stopwords + stop words are removed - could effect ability to detect if this is a + question or answer - depends on import + sno = nltk.stem.SnowballStemmer('english') + and from nltk.corpus import stopwords """ stop_words = set(stopwords.words('english')) word_tokens = word_tokenize(sentence) @@ -313,8 +318,8 @@ def get_string(id, sentence, c="X"): verbBeforeNoun = exists_vb_before_nn(pos) - output = id + "," + str(wordCount) + "," + str(stemmedCount) + "," + str(qVerbCombo) + "," + str(qMark) + "," + str(verbBeforeNoun) - header = header + "id,wordCount,stemmedCount,qVerbCombo,qMark,verbBeforeNoun" + output = id + "," + str(wordCount) + "," + str(stemmedCount) + "," + str(qVerbCombo) + "," + str(qMark) + "," + str(verbBeforeNoun) # noqa: E501 + header = header + "id,wordCount,stemmedCount,qVerbCombo,qMark,verbBeforeNoun" # noqa: E501 # list of POS-TYPES to count , generate a list of counts in the CSV line for ptype in ["VBG", "VBZ", "NNP", "NN", "NNS", "NNPS", "PRP", "CD"]: @@ -338,7 +343,7 @@ def get_string(id, sentence, c="X"): header = header + "endTuple" + str(i+1) + "," # look for special Triple Combinations ## - triples = get_triples(pos) # all the triple sequences in the sentence POS list + triples = get_triples(pos) # all the triple sequences in the sentence POS list # noqa: E501 list1 = exists_triples(triples, questionTriples) total = sum(list1) @@ -364,7 +369,7 @@ def features_dict(id, sentence, c="X"): pos = get_pos(sentence) features["id"] = id - features["qMark"] = count_qmark(sentence) # count Qmarks before stripping punctuation + features["qMark"] = count_qmark(sentence) # count Qmarks before stripping punctuation # noqa: E501 sentence = strip_sentence(sentence) stemmed = stematize(sentence) startTuple, endTuple = get_first_last_tuples(sentence) @@ -388,13 +393,13 @@ def features_dict(id, sentence, c="X"): features["endTuple" + str(i)] = list1[i] # look for special Triple Combinations ## - triples = get_triples(pos) # all the triple sequences in the sentence POS list + triples = get_triples(pos) # all the triple sequences in the sentence POS list # noqa: E501 - list1 = exists_triples(triples, questionTriples) # a list of 1/0 for hits on this triple-set - features["qTripleScore"] = sum(list1) # add all the triple matches up to get a score + list1 = exists_triples(triples, questionTriples) # a list of 1/0 for hits on this triple-set # noqa: E501 + features["qTripleScore"] = sum(list1) # add all the triple matches up to get a score # noqa: E501 - list1 = exists_triples(triples, statementTriples) # Do same check for the Statement t-set - features["sTripleScore"] = sum(list1) # add all the triple matches up to get a score + list1 = exists_triples(triples, statementTriples) # Do same check for the Statement t-set # noqa: E501 + features["sTripleScore"] = sum(list1) # add all the triple matches up to get a score # noqa: E501 features["class"] = c # Class Type on end @@ -416,8 +421,9 @@ def features_series(features_dict): # MAIN ## if __name__ == '__main__': - # ID, WordCount, StemmedCount, Qmark, VBG, StemmedEnd, StartTuples, EndTuples, QuestionTriples, StatementTriples, Class - # [1/0] [NN-NN?] [3 x binary] [3 x binary] [10 x binary] [10 x binary] + '''ID, WordCount, StemmedCount, Qmark, VBG, StemmedEnd, StartTuples, + EndTuples, QuestionTriples, StatementTriples, Class + [1/0] [NN-NN?] [3 x binary] [3 x binary] [10 x binary] [10 x binary]''' logging.debug("Starting...") @@ -433,7 +439,7 @@ def features_series(features_dict): id = hashlib.md5(str(sentence).encode('utf-8')).hexdigest()[:16] features = features_dict(id, sentence, c) - pos = get_pos(sentence) # NLTK Parts Of Speech, duplicated just for the printout + pos = get_pos(sentence) # NLTK Parts Of Speech, duplicated just for the printout # noqa: E501 logging.debug(pos) logging.debug(features) diff --git a/featuresDump.py b/featuresDump.py index 51b40f6..eb7c23c 100644 --- a/featuresDump.py +++ b/featuresDump.py @@ -1,11 +1,11 @@ # Use the features.py module to dump out features # read in a CSV of sentences and bulk-dump to dump.csv of features -#Input CSV fmt: 1st field is sentence ID, 2nd field is text to process, 3rd field is class +# Input CSV fmt: 1st field is sentence ID, 2nd field is text to process, 3rd field is class # noqa: E501 import csv import sys import hashlib from pathlib import Path -import features # features.py is bepoke util to extract NLTK POS features from sentences +import features # features.py is bepoke util to extract NLTK POS features from sentences # noqa: E501 import logging log = logging.getLogger(__name__) @@ -18,7 +18,6 @@ logging.debug("reading input from ", FNAME) - if len(sys.argv) > 2: FOUT = Path(sys.argv[2]) else: @@ -30,43 +29,44 @@ fout = open(FOUT, 'wt', newline='') keys = ["id", -"wordCount", -"stemmedCount", -"stemmedEndNN", -"CD", -"NN", -"NNP", -"NNPS", -"NNS", -"PRP", -"VBG", -"VBZ", -"startTuple0", -"endTuple0", -"endTuple1", -"endTuple2", -"verbBeforeNoun", -"qMark", -"qVerbCombo", -"qTripleScore", -"sTripleScore", -"class"] + "wordCount", + "stemmedCount", + "stemmedEndNN", + "CD", + "NN", + "NNP", + "NNPS", + "NNS", + "PRP", + "VBG", + "VBZ", + "startTuple0", + "endTuple0", + "endTuple1", + "endTuple2", + "verbBeforeNoun", + "qMark", + "qVerbCombo", + "qTripleScore", + "sTripleScore", + "class" + ] reader = csv.reader(fin) loopCount = 0 -next(reader) #Assume we have a header +next(reader) # Assume we have a header for line in reader: sentence = line[0] - c = line[1] #class-label - id = hashlib.md5(str(sentence).encode('utf-8')).hexdigest()[:16] # generate a unique ID + c = line[1] # class-label + id = hashlib.md5(str(sentence).encode('utf-8')).hexdigest()[:16] # generate a unique ID # noqa: E501 output = "" header = "" - #get header and string output - #output, header = features.get_string(id,sentence,c) - f = features.features_dict(id,sentence, c) + # get header and string output + # output, header = features.get_string(id,sentence,c) + f = features.features_dict(id, sentence, c) for key in keys: value = f[key] @@ -74,11 +74,11 @@ output = output + ", " + str(value) if loopCount == 0: # only extract and print header for first dict item - header = header[1:] #strip the first ","" off - logging.debug(header) + header = header[1:] # strip the first ","" off + logging.debug(header) fout.writelines(header + '\n') - output = output[1:] #strip the first ","" off + output = output[1:] # strip the first ","" off loopCount = loopCount + 1 logging.debug(output) diff --git a/googleMapsApiModule.py b/googleMapsApiModule.py index cc14026..306eeb4 100644 --- a/googleMapsApiModule.py +++ b/googleMapsApiModule.py @@ -5,7 +5,7 @@ import requests import logging import logger_config -from datetime import datetime, date +from datetime import datetime, date # noqa: F401 import calendar @@ -17,39 +17,45 @@ @logger_config.logger def direction(origin, destination): - result = gmaps.directions(origin, destination) + result = gmaps.directions(origin, destination) # noqa address = f'origin={origin}&destination={destination}' result_url = f'{BASE_URL["direction"]}&{address.lower().replace(" ", "+")}' logging.debug(result_url) webbrowser.open_new(result_url) return result_url -@logger_config.logger -def get_timestamp(date_time): - yr,mon,day,hr,mi=map(int,date_time.split()) - d=datetime(yr,mon,day,hr,mi) - timestamp = calendar.timegm(d.timetuple()) - return timestamp -@logger_config.logger + + +@logger_config.logger +def get_timestamp(date_time): + yr, mon, day, hr, mi = map(int, date_time.split()) + d = datetime(yr, mon, day, hr, mi) + timestamp = calendar.timegm(d.timetuple()) + return timestamp + + +@logger_config.logger def get_lat_lng(place): - response = requests.get(f'https://maps.googleapis.com/maps/api/geocode/json?address={place}&key={config.key}') - resp_json_payload = response.json() - lat_lng=resp_json_payload['results'][0]['geometry']['location'] - return (lat_lng) -@logger_config.logger -def timezone(place,date_time): #format of datetime should be YYYY MM DD Hrs Mins and place should be a normal string - lat_lng=get_lat_lng(place) - timestamp=get_timestamp(date_time) - response= requests.get(f'https://maps.googleapis.com/maps/api/timezone/json?location={lat_lng["lat"]},{lat_lng["lng"]}×tamp={timestamp}&key={config.key}') - resp_dict= response.json() - for key in resp_dict: - print(f"{key} : {resp_dict[key]}") + response = requests.get(f'https://maps.googleapis.com/maps/api/geocode/json?address={place}&key={config.key}') # noqa: E501 + resp_json_payload = response.json() + lat_lng = resp_json_payload['results'][0]['geometry']['location'] + return (lat_lng) + + +@logger_config.logger +def timezone(place, date_time): # format of datetime should be YYYY MM DD Hrs Mins and place should be a normal string # noqa: E501 + lat_lng = get_lat_lng(place) + timestamp = get_timestamp(date_time) + response = requests.get(f'https://maps.googleapis.com/maps/api/timezone/json?location={lat_lng["lat"]},{lat_lng["lng"]}×tamp={timestamp}&key={config.key}') # noqa: E501 + resp_dict = response.json() + for key in resp_dict: + print(f"{key} : {resp_dict[key]}") @logger_config.logger def geocoding(search_location): result = gmaps.geocode(search_location) logging.debug("Formatted Address: "+result[0]['formatted_address']) - logging.debug("Latitude: "+str(result[0]['geometry']['location']['lat']) + " " + "Longitude: " + str(result[0]['geometry']['location']['lng'])) + logging.debug("Latitude: "+str(result[0]['geometry']['location']['lat']) + " " + "Longitude: " + str(result[0]['geometry']['location']['lng'])) # noqa: E501 address = search_location result_url = f'{BASE_URL["geocoding"]}={address.lower().replace(" ", "+")}' webbrowser.open_new(result_url) @@ -59,7 +65,7 @@ def geocoding(search_location): @logger_config.logger def mapsstatic(search_location): address = search_location - result_url = f'https://maps.googleapis.com/maps/api/staticmap?center={address.lower().replace(" ", "+")}&zoom=13&scale=1&size=600x350&maptype=roadmap&key={config.key}&format=png&visual_refresh=true&markers=size:mid%7Ccolor:0xff0000%7Clabel:L%7C{address.lower().replace(" ", "+")}' + result_url = f'https://maps.googleapis.com/maps/api/staticmap?center={address.lower().replace(" ", "+")}&zoom=13&scale=1&size=600x350&maptype=roadmap&key={config.key}&format=png&visual_refresh=true&markers=size:mid%7Ccolor:0xff0000%7Clabel:L%7C{address.lower().replace(" ", "+")}' # noqa: E501 logging.debug(result_url) webbrowser.open_new(result_url) return result_url @@ -76,7 +82,7 @@ def mapsstatic(search_location): @logger_config.logger def elevation(search_location): result = gmaps.geocode(search_location) - json = requests.get(f'https://maps.googleapis.com/maps/api/elevation/json?locations={result[0]["geometry"]["location"]["lat"]},{result[0]["geometry"]["location"]["lng"]}&key={config.key}').json() + json = requests.get(f'https://maps.googleapis.com/maps/api/elevation/json?locations={result[0]["geometry"]["location"]["lat"]},{result[0]["geometry"]["location"]["lng"]}&key={config.key}').json() # noqa: E501 result_value = json['results'][0]['elevation'] position = "above" if result_value > 0 else "below" print(f'{search_location} is {round(result_value,2)} metres {position} sea level') @@ -85,10 +91,10 @@ def elevation(search_location): @logger_config.logger def places(search_location): address = search_location - json = requests.get(f'{BASE_URL["places_textsearch"]}={address.lower().replace(" ", "+")}&inputtype=textquery&fields=photos,formatted_address,place_id&key={config.key}').json() + json = requests.get(f'{BASE_URL["places_textsearch"]}={address.lower().replace(" ", "+")}&inputtype=textquery&fields=photos,formatted_address,place_id&key={config.key}').json() # noqa: E501 logging.debug("Address:"+json["candidates"][0]["formatted_address"]) - details = requests.get(f'{BASE_URL["places_details"]}={json["candidates"][0]["place_id"]}&fields=rating,formatted_phone_number&key={config.key}').json() + details = requests.get(f'{BASE_URL["places_details"]}={json["candidates"][0]["place_id"]}&fields=rating,formatted_phone_number&key={config.key}').json() # noqa: E501 logging.debug("Rating:"+str(details["result"]["rating"])) logging.debug("Phone:"+details["result"]["formatted_phone_number"]) - photo = f'{BASE_URL["places_photos"]}={json["candidates"][0]["photos"][0]["photo_reference"]}&key={config.key}' - webbrowser.open_new(photo) \ No newline at end of file + photo = f'{BASE_URL["places_photos"]}={json["candidates"][0]["photos"][0]["photo_reference"]}&key={config.key}' # noqa: E501 + webbrowser.open_new(photo) diff --git a/init.py b/init.py index 6080658..95e2d6b 100644 --- a/init.py +++ b/init.py @@ -4,8 +4,7 @@ EXIT_CONVERSATION = "Bye! I'll miss you!" while(True): received_message = input("You: ") - send_message, learn_response = message_to_bot(received_message, clf, learn_response) + send_message, learn_response = message_to_bot(received_message, clf, learn_response) # noqa: E501 print("MapBot: " + send_message) if send_message == EXIT_CONVERSATION: - break - \ No newline at end of file + break diff --git a/test_googleMapsApiModule.py b/test_googleMapsApiModule.py index 7c96781..f3f29cf 100644 --- a/test_googleMapsApiModule.py +++ b/test_googleMapsApiModule.py @@ -2,29 +2,30 @@ import config import pytest from googlemaps.exceptions import ApiError - -class TestClass: + + +class TestClass(): def test_direction_with_valid_input(self): - result = googleMapsApiModule.direction('paris','brussels') - assert result == "https://www.google.com/maps/dir/?api=1&origin=paris&destination=brussels" + result = googleMapsApiModule.direction('paris', 'brussels') + assert result == "https://www.google.com/maps/dir/?api=1&origin=paris&destination=brussels" # noqa: E501 def test_direction_with_invalid_input(self): with pytest.raises(ApiError): - result = googleMapsApiModule.direction('kjajw','qwiuq') - assert result == "https://www.google.com/maps/dir/?api=1&origin=kjajw&destination=qwiuq" + result = googleMapsApiModule.direction('kjajw', 'qwiuq') + assert result == "https://www.google.com/maps/dir/?api=1&origin=kjajw&destination=qwiuq" # noqa: E501 def test_geocoding_with_valid_input(self): result = googleMapsApiModule.geocoding('denver') - assert result == "https://www.google.com/maps/search/?api=1&query=denver" + assert result == "https://www.google.com/maps/search/?api=1&query=denver" # noqa: E501 def test_geocoding_with_invalid_input(self): with pytest.raises(IndexError): result = googleMapsApiModule.geocoding('kahakd...') - assert result == "https://www.google.com/maps/search/?api=1&query=kahakd..." + assert result == "https://www.google.com/maps/search/?api=1&query=kahakd..." # noqa: E501 def test_mapsstatic_with_valid_input(self): result = googleMapsApiModule.mapsstatic('sydney') - assert result == "https://maps.googleapis.com/maps/api/staticmap?center=sydney&zoom=13&scale=1&size=600x350&maptype=roadmap&key="+config.key+"&format=png&visual_refresh=true&markers=size:mid%7Ccolor:0xff0000%7Clabel:L%7Csydney" + assert result == "https://maps.googleapis.com/maps/api/staticmap?center=sydney&zoom=13&scale=1&size=600x350&maptype=roadmap&key="+config.key+"&format=png&visual_refresh=true&markers=size:mid%7Ccolor:0xff0000%7Clabel:L%7Csydney" # noqa: E501 def test_elevation_with_valid_input(self): result = googleMapsApiModule.elevation('moscow') @@ -33,4 +34,4 @@ def test_elevation_with_valid_input(self): def test_elevation_with_invalid_input(self): with pytest.raises(IndexError): result = googleMapsApiModule.elevation('hihih') - assert type(result) is float \ No newline at end of file + assert type(result) is float diff --git a/utilities.py b/utilities.py index 4270555..e6b97b7 100644 --- a/utilities.py +++ b/utilities.py @@ -16,16 +16,16 @@ def setup_nltk(): @logger_config.logger # grammar parsing -def parse_sentence(user_input): # returns root word, triples of StanfordDependencyParser +def parse_sentence(user_input): # returns root word, triples of StanfordDependencyParser # noqa: E501 import os from nltk.parse.stanford import StanfordDependencyParser import config path_to_jar = config.stanford_path_to_jar path_to_models_jar = config.stanford_path_to_models_jar - dependency_parser = StanfordDependencyParser(path_to_jar=path_to_jar, path_to_models_jar=path_to_models_jar) + dependency_parser = StanfordDependencyParser(path_to_jar=path_to_jar, path_to_models_jar=path_to_models_jar) # noqa: E501 os.environ['JAVAHOME'] = config.javahome result = dependency_parser.raw_parse(user_input) - dep = next(result) # get next item from the iterator result + dep = next(result) # get next item from the iterator result return dep.triples(), dep.root["word"] @@ -37,20 +37,20 @@ def classify_model(): from sklearn.ensemble import RandomForestClassifier FNAME = Path('analysis/featuresDump.csv') df = pd.read_csv(filepath_or_buffer=FNAME, ) - df.columns = df.columns[:].str.strip() # Strip any leading spaces from col names + df.columns = df.columns[:].str.strip() # Strip any leading spaces from col names # noqa: E501 df['class'] = df['class'].map(lambda x: x.strip()) width = df.shape[1] # split into test and training (is_train: True / False col) np.random.seed(seed=1) df['is_train'] = np.random.uniform(0, 1, len(df)) <= .75 - train, test = df[df['is_train'] == True], df[df['is_train'] == False] - features = df.columns[1:width-1] # remove the first ID col and last col=classifier + train, test = df[df['is_train'] == True], df[df['is_train'] == False] # noqa: E712, E501 + features = df.columns[1:width-1] # remove the first ID col and last col=classifier # noqa: E501 # Fit an RF Model for "class" given features clf = RandomForestClassifier(n_jobs=2, n_estimators=100) clf.fit(train[features], train['class']) # Predict against test set preds = clf.predict(test[features]) - predout = pd.DataFrame({'id': test['id'], 'predicted': preds, 'actual': test['class']}) + predout = pd.DataFrame({'id': test['id'], 'predicted': preds, 'actual': test['class']}) # noqa: E501, F841 return clf @@ -86,6 +86,6 @@ def classify_sentence(clf, user_input): values.append(myFeatures[key]) s = pd.Series(values) width = len(s) - myFeatures = s[1:width-1] # All but the last item (this is the class for supervised learning mode) + myFeatures = s[1:width-1] # All but the last item (this is the class for supervised learning mode) # noqa: E501 predict = clf.predict([myFeatures]) return predict[0].strip()