fix tests

ku-nlp · Apr 13, 2024 · 919310d · 919310d
1 parent 9532593
commit 919310d
Show file tree

Hide file tree

Showing 5 changed files with 8 additions and 10 deletions.
diff --git a/tests/cli/test_cli.py b/tests/cli/test_cli.py
@@ -97,7 +97,7 @@ def test_text_input():
         # ("EOD", "EOD\nEOD\n"),  # TODO
         ("おはよう", "おはよう\nEOD\n"),
         ("おはよう．", "おはよう.\nEOD\n"),
-        ("おはよう #今日も一日", "おはよう␣＃今日も一日\nEOD\n"),
+        ("おはよう #今日も一日", "おはよう ＃今日も一日\nEOD\n"),
         ("おはよう。\nこんにちは。\nこんばんわ。\n", "おはよう。こんにちは。こんばんわ。\nEOD\n"),
         ("おはよう。EOD", "おはよう。EOD\nEOD\n"),
     ],

diff --git a/tests/data/datasets/typo_files/0.jsonl b/tests/data/datasets/typo_files/0.jsonl
@@ -1,2 +1,2 @@
-{"pre_text": "待つの木が枯れる", "post_text": "松の木が枯れる", "kdrs": ["R:松", "D", "K", "K", "K", "K", "K", "K", "K"], "inss": ["_", "_", "_", "_", "_", "_", "_", "_", "_"]}
-{"pre_text": "紹介ことなかった", "post_text": "紹介することがなかった", "kdrs": ["K", "K", "K", "K", "K", "K", "K", "K", "K"], "inss": ["_", "_", "I:する", "_", "I:が", "_", "_", "_", "_"]}
+{"pre_text": "待つの木が枯れる", "post_text": "松の木が枯れる", "kdr_tags": ["R:松", "D", "K", "K", "K", "K", "K", "K", "K"], "ins_tags": ["_", "_", "_", "_", "_", "_", "_", "_", "_"]}
+{"pre_text": "紹介ことなかった", "post_text": "紹介することがなかった", "kdr_tags": ["K", "K", "K", "K", "K", "K", "K", "K", "K"], "ins_tags": ["_", "_", "I:する", "_", "I:が", "_", "_", "_", "_"]}
diff --git a/tests/datamodule/datasets/test_word_dataset.py b/tests/datamodule/datasets/test_word_dataset.py
@@ -78,7 +78,6 @@ def test_encode(data_dir: Path, word_tokenizer: PreTrainedTokenizerBase, dataset
     max_seq_length = 64
     document_split_stride = 1
     dataset = WordDataset(str(path), word_tokenizer, max_seq_length, document_split_stride, **dataset_kwargs)
-    assert dataset.tokenizer_input_format == "text"
     dataset.examples[1].load_discourse_document(Document.from_knp(path.joinpath("1.knp").read_text()))
     num_examples = len(dataset)
 
@@ -368,7 +367,6 @@ def test_split_into_words_encode(
     dataset = WordDataset(
         str(path), split_into_words_word_tokenizer, max_seq_length, document_split_stride, **dataset_kwargs
     )
-    assert dataset.tokenizer_input_format == "words"
     dataset.examples[1].load_discourse_document(Document.from_knp(path.joinpath("1.knp").read_text()))
     num_examples = len(dataset)
 

diff --git a/tests/utils/test_reading_prediction.py b/tests/utils/test_reading_prediction.py
@@ -33,7 +33,7 @@
                 [False, False, False, False, True],
                 [False, False, False, False, False],
             ],
-            ["ふせい", "␣", "な", "にゅうりょく"],
+            ["ふせい", "_", "な", "にゅうりょく"],
         ),
         (
             ["[UNK]", "ふせい", "[ID]", "[ID]", "にゅうりょく"],

diff --git a/tests/utils/test_word_normalization.py b/tests/utils/test_word_normalization.py
@@ -4,7 +4,7 @@
 from kwja.utils.word_normalization import (
     MorphemeDenormalizer,
     MorphemeNormalizer,
-    get_normalized,
+    get_normalized_surf,
     get_word_norm_op_tags,
 )
 
@@ -73,7 +73,7 @@
 
 @pytest.mark.parametrize(("surf", "ops", "expected"), wellformed_list)
 def test_gen_normalized_surf(surf, ops, expected):
-    assert get_normalized(surf, ops, strict=True) == expected
+    assert get_normalized_surf(surf, ops, strict=True) == expected
 
 
 @pytest.mark.parametrize(("surf", "expected", "normalized"), wellformed_list)
@@ -93,12 +93,12 @@ def test_get_normalization_opns(surf, expected, normalized):
 @pytest.mark.parametrize(("surf", "ops", "expected"), malformed_list)
 def test_gen_normalized_surf_malformed(surf, ops, expected):
     with pytest.raises(ValueError):
-        get_normalized(surf, ops, strict=True)
+        get_normalized_surf(surf, ops, strict=True)
 
 
 @pytest.mark.parametrize(("surf", "ops", "expected"), malformed_list)
 def test_gen_normalized_surf_malformed_loose(surf, ops, expected):
-    assert get_normalized(surf, ops, strict=False) == expected
+    assert get_normalized_surf(surf, ops, strict=False) == expected
 
 
 def test_morpheme_normalizer():