Skip to content

Commit

Permalink
update to python script added
Browse files Browse the repository at this point in the history
Signed-off-by: mereolog <[email protected]>
  • Loading branch information
mereolog committed Oct 2, 2023
1 parent 11b5aa7 commit 01a52f1
Showing 1 changed file with 17 additions and 9 deletions.
26 changes: 17 additions & 9 deletions etc/testing/spelling/onto_spellchecker.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import argparse
import json
import logging
import re
import sys

Expand All @@ -14,7 +15,11 @@
parser.add_argument('--spell', help='Path to file with new spells', metavar='SPELL')
parser.add_argument('--ontology', help='Path to ontology file', metavar='ONTOLOGY')
args = parser.parse_args()


logging.basicConfig(
format='%(message)s',
filename='spellcheck_log.log')

spell = SpellChecker()
with open(file=args.spell) as new_spell_file:
new_spell_words = json.load(new_spell_file)
Expand All @@ -23,7 +28,7 @@
ontology = Graph()
ontology.parse(args.ontology)

bad_words_in_triples = set()
misspelled_words_in_triples = set()

for (subject, predicate, object) in ontology:
if ONTOLOGY_IRI_FILTER in str(subject):
Expand All @@ -37,7 +42,7 @@
trunc_text = re.sub(pattern=HTTP_REGEX, repl='', string=literal_value)
words = spell.split_words(text=trunc_text)
for word in words:
word = word.replace("'s","")
word = word.replace("'s", "")
if word == word.upper():
continue
else:
Expand All @@ -51,10 +56,13 @@
if word not in spell and word[:-1] not in spell and not word.lower() in spell:
if len(word) > 2:
triple = '|'.join([subject, predicate, object])
bad_words_in_triples.add(word + ' in triple: ' + triple)
bad_words_in_triples = list(bad_words_in_triples)
bad_words_in_triples.sort()
for bad_word_in_triple in bad_words_in_triples:
print('Possibly mispelled word', bad_word_in_triple)
if len(bad_words_in_triples) > 0:
misspelled_words_in_triples.add('|'.join([word, triple]))

misspelled_words_in_triples = list(misspelled_words_in_triples)
misspelled_words_in_triples.sort()
for word_in_triple in misspelled_words_in_triples:
logging.warning(msg=word_in_triple)

if len(misspelled_words_in_triples) > 0:
print('Possible spelling errors found - for the details consult spellcheck_log.log file')
sys.exit(1)

0 comments on commit 01a52f1

Please sign in to comment.