From 4609309b73c60318766bf9336569068e4337f24f Mon Sep 17 00:00:00 2001 From: Steve Piercy Date: Sun, 13 Mar 2022 01:19:02 -0800 Subject: [PATCH 1/5] Add valid word characters as a configuration option See https://github.com/sphinx-contrib/spelling/issues/119#issuecomment-918367773 --- sphinxcontrib/spelling/__init__.py | 2 ++ sphinxcontrib/spelling/builder.py | 1 + sphinxcontrib/spelling/checker.py | 3 ++- 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/sphinxcontrib/spelling/__init__.py b/sphinxcontrib/spelling/__init__.py index 02b9362c..ed57141f 100644 --- a/sphinxcontrib/spelling/__init__.py +++ b/sphinxcontrib/spelling/__init__.py @@ -54,6 +54,8 @@ def setup(app): app.add_config_value('spelling_filters', [], 'env') # Set a user-provided list of files to ignore app.add_config_value('spelling_exclude_patterns', [], 'env') + # Set valid word characters + app.add_config_value('spelling_valid_word_characters', 'en_US', 'env') # Choose whether or not the misspelled output should be displayed # in the terminal app.add_config_value('spelling_verbose', True, 'env') diff --git a/sphinxcontrib/spelling/builder.py b/sphinxcontrib/spelling/builder.py index 55d7449c..0ac2ca38 100644 --- a/sphinxcontrib/spelling/builder.py +++ b/sphinxcontrib/spelling/builder.py @@ -86,6 +86,7 @@ def init(self): word_list_filename=word_list, filters=f, context_line=self.config.spelling_show_whole_line, + valid_word_chars=self.config.spelling_valid_word_characters, ) def _load_filter_classes(self, filters): diff --git a/sphinxcontrib/spelling/checker.py b/sphinxcontrib/spelling/checker.py index 6e56e5b8..e61b298c 100644 --- a/sphinxcontrib/spelling/checker.py +++ b/sphinxcontrib/spelling/checker.py @@ -21,7 +21,8 @@ class SpellingChecker: """ def __init__(self, lang, suggest, word_list_filename, - tokenizer_lang='en_US', filters=None, context_line=False): + tokenizer_lang='en_US', filters=None, context_line=False, + valid_word_chars=None): if enchant_import_error is not None: raise RuntimeError( 'Cannot instantiate SpellingChecker ' From d24ca715dec19d2621e6b8d445fe6c865b686f52 Mon Sep 17 00:00:00 2001 From: Steve Piercy Date: Tue, 15 Mar 2022 01:51:07 -0700 Subject: [PATCH 2/5] Change configuration value of `spelling_valid_word_characters` to a tuple of strings --- sphinxcontrib/spelling/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sphinxcontrib/spelling/__init__.py b/sphinxcontrib/spelling/__init__.py index ed57141f..a70fd938 100644 --- a/sphinxcontrib/spelling/__init__.py +++ b/sphinxcontrib/spelling/__init__.py @@ -55,7 +55,7 @@ def setup(app): # Set a user-provided list of files to ignore app.add_config_value('spelling_exclude_patterns', [], 'env') # Set valid word characters - app.add_config_value('spelling_valid_word_characters', 'en_US', 'env') + app.add_config_value('spelling_valid_word_characters', ("'","-",), 'env') # Choose whether or not the misspelled output should be displayed # in the terminal app.add_config_value('spelling_verbose', True, 'env') From 86cc577d4d77f80b453631fd376e1b5b9df12065 Mon Sep 17 00:00:00 2001 From: Steve Piercy Date: Tue, 15 Mar 2022 23:48:36 -0700 Subject: [PATCH 3/5] Add unreleased history change log entry --- docs/source/history.rst | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/docs/source/history.rst b/docs/source/history.rst index 53da8853..f8de817f 100644 --- a/docs/source/history.rst +++ b/docs/source/history.rst @@ -6,6 +6,27 @@ unmaintained +Unreleased +========== + +Bug Fixes +--------- + +None. + +New Features +------------ + +- `#147 `__ + Adds the ability to pass in valid word characters as the + configuration option ``spelling_valid_word_characters``, where its + value is a tuple of strings. For example, + ``spelling_valid_word_characters = ("'","-",)``. By default + PyEnchant considers only ``'`` as a valid word character. This pull + request also adds the hyphen ``-`` as a valid word character to + prevent PyEnchant from tokenizing hyphenated words into two words + and marking them as misspellings. + 7.3.2 ===== From 5fb96cac22edc636484e4d2356341d4496a02225 Mon Sep 17 00:00:00 2001 From: Steve Piercy Date: Tue, 15 Mar 2022 23:49:42 -0700 Subject: [PATCH 4/5] Fix lint --- docs/source/history.rst | 2 +- sphinxcontrib/spelling/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/history.rst b/docs/source/history.rst index f8de817f..90d2b1ac 100644 --- a/docs/source/history.rst +++ b/docs/source/history.rst @@ -21,7 +21,7 @@ New Features Adds the ability to pass in valid word characters as the configuration option ``spelling_valid_word_characters``, where its value is a tuple of strings. For example, - ``spelling_valid_word_characters = ("'","-",)``. By default + ``spelling_valid_word_characters = ("'", "-",)``. By default PyEnchant considers only ``'`` as a valid word character. This pull request also adds the hyphen ``-`` as a valid word character to prevent PyEnchant from tokenizing hyphenated words into two words diff --git a/sphinxcontrib/spelling/__init__.py b/sphinxcontrib/spelling/__init__.py index a70fd938..058633ce 100644 --- a/sphinxcontrib/spelling/__init__.py +++ b/sphinxcontrib/spelling/__init__.py @@ -55,7 +55,7 @@ def setup(app): # Set a user-provided list of files to ignore app.add_config_value('spelling_exclude_patterns', [], 'env') # Set valid word characters - app.add_config_value('spelling_valid_word_characters', ("'","-",), 'env') + app.add_config_value('spelling_valid_word_characters', ("'", "-",), 'env') # Choose whether or not the misspelled output should be displayed # in the terminal app.add_config_value('spelling_verbose', True, 'env') From dded9ce3337cd02fc2cbd969fcde1a09aede937c Mon Sep 17 00:00:00 2001 From: Steve Piercy Date: Wed, 16 Mar 2022 04:40:46 -0700 Subject: [PATCH 5/5] Add valid word spelling --- docs/source/spelling_wordlist.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/source/spelling_wordlist.txt b/docs/source/spelling_wordlist.txt index 7132e29a..1dee3d2d 100644 --- a/docs/source/spelling_wordlist.txt +++ b/docs/source/spelling_wordlist.txt @@ -6,6 +6,7 @@ pypi reStructuredText sphinxcontrib tokenizer +tokenizing txt wikis wordfiles