diff --git a/etc/testing/spelling/onto_spellchecker.py b/etc/testing/spelling/onto_spellchecker.py index 48fbb883..35037844 100644 --- a/etc/testing/spelling/onto_spellchecker.py +++ b/etc/testing/spelling/onto_spellchecker.py @@ -1,5 +1,6 @@ import argparse import json +import logging import re import sys @@ -14,7 +15,11 @@ parser.add_argument('--spell', help='Path to file with new spells', metavar='SPELL') parser.add_argument('--ontology', help='Path to ontology file', metavar='ONTOLOGY') args = parser.parse_args() - + + logging.basicConfig( + format='%(message)s', + filename='spellcheck_log.log') + spell = SpellChecker() with open(file=args.spell) as new_spell_file: new_spell_words = json.load(new_spell_file) @@ -23,7 +28,7 @@ ontology = Graph() ontology.parse(args.ontology) - bad_words_in_triples = set() + misspelled_words_in_triples = set() for (subject, predicate, object) in ontology: if ONTOLOGY_IRI_FILTER in str(subject): @@ -37,7 +42,7 @@ trunc_text = re.sub(pattern=HTTP_REGEX, repl='', string=literal_value) words = spell.split_words(text=trunc_text) for word in words: - word = word.replace("'s","") + word = word.replace("'s", "") if word == word.upper(): continue else: @@ -51,10 +56,13 @@ if word not in spell and word[:-1] not in spell and not word.lower() in spell: if len(word) > 2: triple = '|'.join([subject, predicate, object]) - bad_words_in_triples.add(word + ' in triple: ' + triple) - bad_words_in_triples = list(bad_words_in_triples) - bad_words_in_triples.sort() - for bad_word_in_triple in bad_words_in_triples: - print('Possibly mispelled word', bad_word_in_triple) - if len(bad_words_in_triples) > 0: + misspelled_words_in_triples.add('|'.join([word, triple])) + + misspelled_words_in_triples = list(misspelled_words_in_triples) + misspelled_words_in_triples.sort() + for word_in_triple in misspelled_words_in_triples: + logging.warning(msg=word_in_triple) + + if len(misspelled_words_in_triples) > 0: + print('Possible spelling errors found - for the details consult spellcheck_log.log file') sys.exit(1)