Skip to content

Commit

Permalink
tests for Armenian
Browse files Browse the repository at this point in the history
  • Loading branch information
Jean-Baptiste-Camps committed Dec 10, 2024
1 parent a9ad3eb commit 358236f
Showing 1 changed file with 23 additions and 0 deletions.
23 changes: 23 additions & 0 deletions tests/test_load_corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -578,9 +578,18 @@ def test_normalise(self):
expected_default = "hello mr how are you doing s o"
self.assertEqual(results, expected_default)
# WHEN
results = superstyl.preproc.pipe.normalise(text, no_ascii=True)
# THEN
expected_default = "hello mr 𓀁 how are you doing ſ õ"
self.assertEqual(results, expected_default)
# WHEN
results = superstyl.preproc.pipe.normalise(text, keep_punct=True)
# THEN
expected_keeppunct = "Hello, Mr. , how are SSSS you; doing? s o"
# WHEN
results = superstyl.preproc.pipe.normalise(text, keep_punct=True, no_ascii=True)
# THEN
expected_keeppunct = "Hello, Mr. 𓀁, how are §§ you; doing? ſ õ"
self.assertEqual(results, expected_keeppunct)
# WHEN
results = superstyl.preproc.pipe.normalise(text, keep_sym=True)
Expand All @@ -600,6 +609,20 @@ def test_normalise(self):
# gives: 'Coucou 😵 💫'
# because of the way NFC normalisation is handled probably

# Test for Armenian
# GIVEN
text = " քան զսակաւս ։ Ահա նշանագրեցի"
# WHEN
results = superstyl.preproc.pipe.normalise(text, no_ascii=True)
# THEN
expected_default = "քան զսակաւս ահա նշանագրեցի"
self.assertEqual(results, expected_default)
# WHEN
results = superstyl.preproc.pipe.normalise(text, keep_punct=True, no_ascii=True)
# THEN
expected_keeppunct = "քան զսակաւս ։ Ահա նշանագրեցի"
self.assertEqual(results, expected_keeppunct)

def test_detect_lang(self):
french = "Bonjour, Monsieur, comment allez-vous?"
# NB: it fails on that !!!
Expand Down

0 comments on commit 358236f

Please sign in to comment.