Skip to content

Commit

Permalink
bug fix
Browse files Browse the repository at this point in the history
  • Loading branch information
Jean-Baptiste-Camps committed Dec 10, 2024
1 parent 3127f0e commit badb1cc
Showing 1 changed file with 3 additions and 3 deletions.
6 changes: 3 additions & 3 deletions superstyl/preproc/pipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,12 +103,12 @@ def normalise(text, keep_punct=False, keep_sym=False, no_ascii=False):
#out = re.sub(r"[\W0-9]+", " ", text.lower())
out = re.sub(r"[^\p{L}\p{M}]+", " ", text.lower())

if no_ascii is not True:
out = unidecode.unidecode(out)

# Normalise unicode
out = unicodedata.normalize("NFC", out)

if no_ascii is not True:
out = unidecode.unidecode(out)

out = re.sub(r"\s+", " ", out).strip()

return out
Expand Down

0 comments on commit badb1cc

Please sign in to comment.