diff --git a/MANIFEST.in b/MANIFEST.in index 3dd5dae..fd272b7 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -12,4 +12,5 @@ include klpt/data/test_cases_stem.json include klpt/data/test_cases_tokenize.json include klpt/data/test_cases.json include klpt/data/tokenize.json -include klpt/data/wergor.json \ No newline at end of file +include klpt/data/wergor.json +include klpt/data/kmr-Latn.att \ No newline at end of file diff --git a/README.md b/README.md index f8f6019..85e6b6d 100644 --- a/README.md +++ b/README.md @@ -20,13 +20,12 @@ Documentation + + PyPI version +

- - - ### Welcome / *Hûn bi xêr hatin* / بە خێر بێن! 🙂 @@ -247,7 +246,7 @@ The Stem module deals with various tasks, mainly through the following functions - `correct_spelling`: spell error correction - `analyze`: morphological analysis -Please note that only Sorani is supported in this version in this module. The module is based on the [Kurdish Hunspell project](https://github.com/sinaahmadi/KurdishHunspell). +The module is based on the [Kurdish Hunspell project](https://github.com/sinaahmadi/KurdishHunspell) for Sorani and the [Apertium project](https://github.com/apertium/apertium-kmr) for Kurmanji. Please note that this module is currently getting further completed and we are aware of its current shortcomings. ```python >>> from klpt.stem import Stem @@ -258,6 +257,10 @@ False (False, ['ستاندبووت', 'سووتاندبووت', 'سووڕاندبووت', 'ڕووتاندبووت', 'فەوتاندبووت', 'بووژاندبووت']) >>> stemmer.analyze("دیتبامن") [{'pos': 'verb', 'description': 'past_stem_transitive_active', 'base': 'دیت', 'terminal_suffix': 'بامن'}] + +>>> stemmer = Stem("Kurmanji", "Latin") +>>> stemmer.analyze("dibêjim") +[{'base': 'gotin', 'description': 'vblex_tv_pri_p1_sg', 'pos': '', 'terminal_suffix': '', 'formation': ''}] ``` 📖 **Please note that a more complete documentation of the toolkit will be available soon.** diff --git a/dist/klpt-0.1.1-py3-none-any.whl b/dist/klpt-0.1.1-py3-none-any.whl new file mode 100644 index 0000000..435daa0 Binary files /dev/null and b/dist/klpt-0.1.1-py3-none-any.whl differ diff --git a/dist/klpt-0.1.1.tar.gz b/dist/klpt-0.1.1.tar.gz new file mode 100644 index 0000000..dbe7ca6 Binary files /dev/null and b/dist/klpt-0.1.1.tar.gz differ diff --git a/dist/klpt-0.1.2-py3-none-any.whl b/dist/klpt-0.1.2-py3-none-any.whl new file mode 100644 index 0000000..2126bde Binary files /dev/null and b/dist/klpt-0.1.2-py3-none-any.whl differ diff --git a/dist/klpt-0.1.2.tar.gz b/dist/klpt-0.1.2.tar.gz new file mode 100644 index 0000000..48edca6 Binary files /dev/null and b/dist/klpt-0.1.2.tar.gz differ diff --git a/klpt/stem.py b/klpt/stem.py index 578492b..78890f3 100644 --- a/klpt/stem.py +++ b/klpt/stem.py @@ -35,6 +35,10 @@ class Stem: (False, ['ستاندبووت', 'سووتاندبووت', 'سووڕاندبووت', 'ڕووتاندبووت', 'فەوتاندبووت', 'بووژاندبووت']) >>> stemmer.analyze("دیتبامن") [{'pos': 'verb', 'description': 'past_stem_transitive_active', 'base': 'دیت', 'terminal_suffix': 'بامن'}] + + >>> stemmer = Stem("Kurmanji", "Latin") + >>> stemmer.analyze("dibêjim") + [{'base': 'gotin', 'description': 'vblex_tv_pri_p1_sg', 'pos': '', 'terminal_suffix': '', 'formation': ''}] ``` """ diff --git a/setup.py b/setup.py index 1471a6d..f25035e 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ setup( name="klpt", - version="0.1.0", + version="0.1.2", description="Kurdish Language Processing Toolkit", long_description=long_description, long_description_content_type="text/markdown",