From 2805e5d49eb99db4b327ec4de20955d2aefee037 Mon Sep 17 00:00:00 2001 From: lv <992526373@qq.com> Date: Sun, 28 Apr 2024 17:12:13 +0800 Subject: [PATCH] fix: add tests for whitespace token issue in JiebaTokenizer --- tests/nlu/tokenizers/test_jieba_tokenizer.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/nlu/tokenizers/test_jieba_tokenizer.py b/tests/nlu/tokenizers/test_jieba_tokenizer.py index c0628f901a87..f2d93471d84a 100644 --- a/tests/nlu/tokenizers/test_jieba_tokenizer.py +++ b/tests/nlu/tokenizers/test_jieba_tokenizer.py @@ -37,6 +37,11 @@ def create_jieba(config: Optional[Dict] = None) -> JiebaTokenizer: ["Micheal", "你好", "吗", "?"], [(0, 7), (7, 9), (9, 10), (10, 11)], ), + ( + "安装 rasa 应用", + ["安装", "rasa", "应用"], + [(0, 2), (3, 7), (8, 10)], + ), ], ) def test_jieba(text, expected_tokens, expected_indices):