Skip to content

Commit

Permalink
fix tests
Browse files Browse the repository at this point in the history
  • Loading branch information
omukazu committed Apr 13, 2024
1 parent 9532593 commit 919310d
Show file tree
Hide file tree
Showing 5 changed files with 8 additions and 10 deletions.
2 changes: 1 addition & 1 deletion tests/cli/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def test_text_input():
# ("EOD", "EOD\nEOD\n"), # TODO
("おはよう", "おはよう\nEOD\n"),
("おはよう.", "おはよう.\nEOD\n"),
("おはよう #今日も一日", "おはよう#今日も一日\nEOD\n"),
("おはよう #今日も一日", "おはよう #今日も一日\nEOD\n"),
("おはよう。\nこんにちは。\nこんばんわ。\n", "おはよう。こんにちは。こんばんわ。\nEOD\n"),
("おはよう。EOD", "おはよう。EOD\nEOD\n"),
],
Expand Down
4 changes: 2 additions & 2 deletions tests/data/datasets/typo_files/0.jsonl
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
{"pre_text": "待つの木が枯れる", "post_text": "松の木が枯れる", "kdrs": ["R:松", "D", "K", "K", "K", "K", "K", "K", "K"], "inss": ["_", "_", "_", "_", "_", "_", "_", "_", "_"]}
{"pre_text": "紹介ことなかった", "post_text": "紹介することがなかった", "kdrs": ["K", "K", "K", "K", "K", "K", "K", "K", "K"], "inss": ["_", "_", "I:する", "_", "I:が", "_", "_", "_", "_"]}
{"pre_text": "待つの木が枯れる", "post_text": "松の木が枯れる", "kdr_tags": ["R:松", "D", "K", "K", "K", "K", "K", "K", "K"], "ins_tags": ["_", "_", "_", "_", "_", "_", "_", "_", "_"]}
{"pre_text": "紹介ことなかった", "post_text": "紹介することがなかった", "kdr_tags": ["K", "K", "K", "K", "K", "K", "K", "K", "K"], "ins_tags": ["_", "_", "I:する", "_", "I:が", "_", "_", "_", "_"]}
2 changes: 0 additions & 2 deletions tests/datamodule/datasets/test_word_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,6 @@ def test_encode(data_dir: Path, word_tokenizer: PreTrainedTokenizerBase, dataset
max_seq_length = 64
document_split_stride = 1
dataset = WordDataset(str(path), word_tokenizer, max_seq_length, document_split_stride, **dataset_kwargs)
assert dataset.tokenizer_input_format == "text"
dataset.examples[1].load_discourse_document(Document.from_knp(path.joinpath("1.knp").read_text()))
num_examples = len(dataset)

Expand Down Expand Up @@ -368,7 +367,6 @@ def test_split_into_words_encode(
dataset = WordDataset(
str(path), split_into_words_word_tokenizer, max_seq_length, document_split_stride, **dataset_kwargs
)
assert dataset.tokenizer_input_format == "words"
dataset.examples[1].load_discourse_document(Document.from_knp(path.joinpath("1.knp").read_text()))
num_examples = len(dataset)

Expand Down
2 changes: 1 addition & 1 deletion tests/utils/test_reading_prediction.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
[False, False, False, False, True],
[False, False, False, False, False],
],
["ふせい", "", "な", "にゅうりょく"],
["ふせい", "_", "な", "にゅうりょく"],
),
(
["[UNK]", "ふせい", "[ID]", "[ID]", "にゅうりょく"],
Expand Down
8 changes: 4 additions & 4 deletions tests/utils/test_word_normalization.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from kwja.utils.word_normalization import (
MorphemeDenormalizer,
MorphemeNormalizer,
get_normalized,
get_normalized_surf,
get_word_norm_op_tags,
)

Expand Down Expand Up @@ -73,7 +73,7 @@

@pytest.mark.parametrize(("surf", "ops", "expected"), wellformed_list)
def test_gen_normalized_surf(surf, ops, expected):
assert get_normalized(surf, ops, strict=True) == expected
assert get_normalized_surf(surf, ops, strict=True) == expected


@pytest.mark.parametrize(("surf", "expected", "normalized"), wellformed_list)
Expand All @@ -93,12 +93,12 @@ def test_get_normalization_opns(surf, expected, normalized):
@pytest.mark.parametrize(("surf", "ops", "expected"), malformed_list)
def test_gen_normalized_surf_malformed(surf, ops, expected):
with pytest.raises(ValueError):
get_normalized(surf, ops, strict=True)
get_normalized_surf(surf, ops, strict=True)


@pytest.mark.parametrize(("surf", "ops", "expected"), malformed_list)
def test_gen_normalized_surf_malformed_loose(surf, ops, expected):
assert get_normalized(surf, ops, strict=False) == expected
assert get_normalized_surf(surf, ops, strict=False) == expected


def test_morpheme_normalizer():
Expand Down

0 comments on commit 919310d

Please sign in to comment.