From badb1cc89b2c70051c834012eb867dbd127abc05 Mon Sep 17 00:00:00 2001 From: Jean-Baptiste Camps Date: Tue, 10 Dec 2024 11:57:12 +0100 Subject: [PATCH] bug fix --- superstyl/preproc/pipe.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/superstyl/preproc/pipe.py b/superstyl/preproc/pipe.py index 8fd0c684..9c9ec254 100755 --- a/superstyl/preproc/pipe.py +++ b/superstyl/preproc/pipe.py @@ -103,12 +103,12 @@ def normalise(text, keep_punct=False, keep_sym=False, no_ascii=False): #out = re.sub(r"[\W0-9]+", " ", text.lower()) out = re.sub(r"[^\p{L}\p{M}]+", " ", text.lower()) + if no_ascii is not True: + out = unidecode.unidecode(out) + # Normalise unicode out = unicodedata.normalize("NFC", out) - if no_ascii is not True: - out = unidecode.unidecode(out) - out = re.sub(r"\s+", " ", out).strip() return out