From b17204b3f116307d7161b8873e2020b7b0571d59 Mon Sep 17 00:00:00 2001 From: ikawaha Date: Tue, 29 Dec 2020 23:18:33 +0900 Subject: [PATCH 1/3] Update --- tokenizer/token.go | 12 +------- tokenizer/token_test.go | 2 +- tokenizer/tokenizer_option_test.go | 26 ++++++++-------- tokenizer/tokenizer_test.go | 48 ++++++++++++++++++------------ 4 files changed, 44 insertions(+), 44 deletions(-) diff --git a/tokenizer/token.go b/tokenizer/token.go index 3bd92fbe..4d859710 100644 --- a/tokenizer/token.go +++ b/tokenizer/token.go @@ -207,15 +207,5 @@ func (t Token) pickupFromFeatures(key string) (string, bool) { // String returns a string representation of a token. func (t Token) String() string { - return fmt.Sprintf("%q (%d: %d, %d) %v [%d]", t.Surface, t.Position, t.Start, t.End, t.Class, t.ID) -} - -// Equal returns true if tokens are equal. This function compares values other than the `Index` field. -func (t Token) Equal(v Token) bool { - return t.ID == v.ID && - t.Class == v.Class && - t.Position == v.Position && - t.Start == v.Start && - t.End == v.End && - t.Surface == v.Surface + return fmt.Sprintf("%d:%q (%d: %d, %d) %v [%d]", t.Index, t.Surface, t.Position, t.Start, t.End, t.Class, t.ID) } diff --git a/tokenizer/token_test.go b/tokenizer/token_test.go index 7e818dd5..0183f502 100644 --- a/tokenizer/token_test.go +++ b/tokenizer/token_test.go @@ -354,7 +354,7 @@ func Test_TokenString(t *testing.T) { End: 1, Surface: "テスト", } - want := `"テスト" (0: 0, 1) DUMMY [123]` + want := `0:"テスト" (0: 0, 1) DUMMY [123]` got := fmt.Sprintf("%v", tok) if got != want { t.Errorf("want %v, got %v", want, got) diff --git a/tokenizer/tokenizer_option_test.go b/tokenizer/tokenizer_option_test.go index 058795cf..c4ae7bb9 100644 --- a/tokenizer/tokenizer_option_test.go +++ b/tokenizer/tokenizer_option_test.go @@ -46,15 +46,15 @@ func Test_AnalyzeWithUserDict(t *testing.T) { } tokens := tnz.Analyze("関西国際空港", Normal) expected := []Token{ - {ID: -1, Surface: "BOS"}, - {ID: 2, Surface: "関西国際空港", Start: 0, End: 6, Class: TokenClass(lattice.USER)}, - {ID: -1, Surface: "EOS", Start: 6, End: 6, Position: len("関西国際空港")}, + {Index: 0, ID: -1, Surface: "BOS"}, + {Index: 1, ID: 2, Surface: "関西国際空港", Start: 0, End: 6, Class: TokenClass(lattice.USER)}, + {Index: 2, ID: -1, Surface: "EOS", Start: 6, End: 6, Position: len("関西国際空港")}, } if len(tokens) != len(expected) { t.Fatalf("got %v, expected %v", tokens, expected) } for i, tok := range tokens { - if !tok.Equal(expected[i]) { + if !equalTokens(tok, expected[i]) { t.Errorf("%dth token, expected %v, got %v", i, expected[i], tok) } } @@ -86,15 +86,15 @@ func Test_AnalyzeWithSearchModeWithUserDict(t *testing.T) { tokens := tnz.Analyze("関西国際空港", Search) expected := []Token{ - {ID: -1, Surface: "BOS"}, - {ID: 2, Surface: "関西国際空港", Start: 0, End: 6, Class: TokenClass(lattice.USER)}, - {ID: -1, Surface: "EOS", Start: 6, End: 6, Position: len("関西国際空港")}, + {Index: 0, ID: -1, Surface: "BOS"}, + {Index: 1, ID: 2, Surface: "関西国際空港", Start: 0, End: 6, Class: TokenClass(lattice.USER)}, + {Index: 2, ID: -1, Surface: "EOS", Start: 6, End: 6, Position: len("関西国際空港")}, } if len(tokens) != len(expected) { t.Fatalf("expected %v, got %v", expected, tokens) } for i, tok := range tokens { - if !tok.Equal(expected[i]) { + if !equalTokens(tok, expected[i]) { t.Errorf("%dth token, expected %v, got %v", i, expected[i], tok) } } @@ -126,15 +126,15 @@ func Test_AnalyzeWithExtendedModeWithUserDict(t *testing.T) { tokens := tnz.Analyze("関西国際空港", Extended) expected := []Token{ - {ID: -1, Surface: "BOS"}, - {ID: 2, Surface: "関西国際空港", Start: 0, End: 6, Class: TokenClass(lattice.USER)}, - {ID: -1, Surface: "EOS", Start: 6, End: 6, Position: len("関西国際空港")}, + {Index: 0, ID: -1, Surface: "BOS"}, + {Index: 1, ID: 2, Surface: "関西国際空港", Start: 0, End: 6, Class: TokenClass(lattice.USER)}, + {Index: 2, ID: -1, Surface: "EOS", Start: 6, End: 6, Position: len("関西国際空港")}, } if len(tokens) != len(expected) { t.Fatalf("expected %v, got %v", expected, tokens) } for i, tok := range tokens { - if !tok.Equal(expected[i]) { + if !equalTokens(tok, expected[i]) { t.Errorf("%dth token, expected %v, got %v", i, expected[i], tok) } } @@ -158,7 +158,7 @@ func TestTokenizer_Analyze_OmitBOSEOS(t *testing.T) { t.Fatalf("got %v, expected %v", tokens, expected) } for i, tok := range tokens { - if !tok.Equal(expected[i]) { + if !equalTokens(tok, expected[i]) { t.Errorf("%dth token, expected %v, got %v", i, expected[i], tok) } } diff --git a/tokenizer/tokenizer_test.go b/tokenizer/tokenizer_test.go index 9ee95a3d..fd1b6bc6 100644 --- a/tokenizer/tokenizer_test.go +++ b/tokenizer/tokenizer_test.go @@ -15,6 +15,16 @@ const ( testDictPath = "../testdata/ipa.dict" ) +func equalTokens(lhs, rhs Token) bool { + return lhs.Index == rhs.Index && + lhs.ID == rhs.ID && + lhs.Class == rhs.Class && + lhs.Position == rhs.Position && + lhs.Start == rhs.Start && + lhs.End == rhs.End && + lhs.Surface == rhs.Surface +} + func Example_tokenize_mode() { d, err := dict.LoadDictFile(testDictPath) if err != nil { @@ -75,14 +85,14 @@ func Test_AnalyzeEmptyInput(t *testing.T) { } tokens := tnz.Analyze("", Normal) expected := []Token{ - {ID: -1, Surface: "BOS"}, - {ID: -1, Surface: "EOS"}, + {Index: 0, ID: -1, Surface: "BOS"}, + {Index: 1, ID: -1, Surface: "EOS"}, } if len(tokens) != len(expected) { t.Fatalf("got %v, expected %v", tokens, expected) } for i, tok := range tokens { - if !tok.Equal(expected[i]) { + if !equalTokens(tok, expected[i]) { t.Errorf("got %v, expected %v", tok, expected[i]) } } @@ -100,15 +110,15 @@ func Test_Analyze(t *testing.T) { input := "関西国際空港" tokens := tnz.Analyze(input, Normal) want := []Token{ - {ID: -1, Surface: "BOS"}, - {ID: 372978, Surface: input, Position: 0, Start: 0, End: 6, Class: TokenClass(lattice.KNOWN)}, - {ID: -1, Surface: "EOS", Position: len(input), Start: 6, End: 6}, + {Index: 0, ID: -1, Surface: "BOS"}, + {Index: 1, ID: 372978, Surface: input, Position: 0, Start: 0, End: 6, Class: TokenClass(lattice.KNOWN)}, + {Index: 2, ID: -1, Surface: "EOS", Position: len(input), Start: 6, End: 6}, } if len(tokens) != len(want) { t.Fatalf("got %v, want %v", tokens, want) } for i, tok := range tokens { - if !tok.Equal(want[i]) { + if !equalTokens(tok, want[i]) { t.Errorf("got %+v, want %+v", tok, want[i]) } } @@ -125,15 +135,15 @@ func Test_AnalyzeUnknown(t *testing.T) { } tokens := tnz.Analyze("ポポピ", Normal) expected := []Token{ - {ID: -1, Surface: "BOS"}, - {ID: 34, Surface: "ポポピ", Start: 0, End: 3, Class: TokenClass(lattice.UNKNOWN)}, - {ID: -1, Surface: "EOS", Start: 3, End: 3, Position: 9}, + {Index: 0, ID: -1, Surface: "BOS"}, + {Index: 1, ID: 34, Surface: "ポポピ", Start: 0, End: 3, Class: TokenClass(lattice.UNKNOWN)}, + {Index: 2, ID: -1, Surface: "EOS", Start: 3, End: 3, Position: 9}, } if len(tokens) != len(expected) { t.Fatalf("got %v, expected %v", tokens, expected) } for i, tok := range tokens { - if !tok.Equal(expected[i]) { + if !equalTokens(tok, expected[i]) { t.Errorf("got %v, expected %v", tok, expected[i]) } } @@ -189,14 +199,14 @@ func Test_AnalyzeWithSearchModeEmptyInput(t *testing.T) { } tokens := tnz.Analyze("", Search) expected := []Token{ - {ID: -1, Surface: "BOS"}, - {ID: -1, Surface: "EOS"}, + {Index: 0, ID: -1, Surface: "BOS"}, + {Index: 1, ID: -1, Surface: "EOS"}, } if len(tokens) != len(expected) { t.Fatalf("got %v, expected %v", tokens, expected) } for i, tok := range tokens { - if !tok.Equal(expected[i]) { + if !equalTokens(tok, expected[i]) { t.Errorf("got %v, expected %v", tok, expected[i]) } } @@ -224,7 +234,7 @@ func Test_AnalyzeWithSearchMode(t *testing.T) { t.Fatalf("got %v, expected %v", tokens, expected) } for i, tok := range tokens { - if tok.Index != expected[i].Index || !tok.Equal(expected[i]) { + if !equalTokens(tok, expected[i]) { t.Errorf("got %v, expected %v", tok, expected[i]) } } @@ -250,7 +260,7 @@ func Test_AnalyzeWithSearchModeUnknown(t *testing.T) { t.Fatalf("got %v, expected %v", tokens, expected) } for i, tok := range tokens { - if tok.Index != expected[i].Index || !tok.Equal(expected[i]) { + if !equalTokens(tok, expected[i]) { t.Errorf("got %v, expected %v", tok, expected[i]) } } @@ -275,7 +285,7 @@ func Test_AnalyzeWithExtendedModeEmpty(t *testing.T) { t.Fatalf("got %v, expected %v", tokens, expected) } for i, tok := range tokens { - if tok.Index != expected[i].Index || !tok.Equal(expected[i]) { + if !equalTokens(tok, expected[i]) { t.Errorf("got %v, expected %v", tok, expected[i]) } } @@ -303,7 +313,7 @@ func Test_AnalyzeWithExtendedMode(t *testing.T) { t.Fatalf("got %v, expected %v", tokens, expected) } for i, tok := range tokens { - if tok.Index != expected[i].Index || !tok.Equal(expected[i]) { + if !equalTokens(tok, expected[i]) { t.Errorf("got %v, expected %v", tok, expected[i]) } } @@ -331,7 +341,7 @@ func Test_AnalyzeWithExtendedModeUnknown(t *testing.T) { t.Fatalf("got %v, expected %v", tokens, expected) } for i, tok := range tokens { - if tok.Index != expected[i].Index || !tok.Equal(expected[i]) { + if !equalTokens(tok, expected[i]) { t.Errorf("got %v, expected %v", tok, expected[i]) } } From f9d969bcf5dd582052ff61a627631b63683ab2b2 Mon Sep 17 00:00:00 2001 From: ikawaha Date: Tue, 29 Dec 2020 23:20:42 +0900 Subject: [PATCH 2/3] Fix equality of tokens --- tokenizer/token.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tokenizer/token.go b/tokenizer/token.go index 4d859710..f6b12581 100644 --- a/tokenizer/token.go +++ b/tokenizer/token.go @@ -209,3 +209,10 @@ func (t Token) pickupFromFeatures(key string) (string, bool) { func (t Token) String() string { return fmt.Sprintf("%d:%q (%d: %d, %d) %v [%d]", t.Index, t.Surface, t.Position, t.Start, t.End, t.Class, t.ID) } + +// Equal returns true if tokens are equal. This function compares values other than the `Index` field. +func (t Token) Equal(v Token) bool { + return t.ID == v.ID && + t.Class == v.Class && + t.Surface == v.Surface +} From 585d8635ce392ea88cc953801cce4a1fa53cd0e2 Mon Sep 17 00:00:00 2001 From: ikawaha Date: Tue, 29 Dec 2020 23:25:38 +0900 Subject: [PATCH 3/3] Fix a comment --- tokenizer/token.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tokenizer/token.go b/tokenizer/token.go index f6b12581..0b2387aa 100644 --- a/tokenizer/token.go +++ b/tokenizer/token.go @@ -210,7 +210,7 @@ func (t Token) String() string { return fmt.Sprintf("%d:%q (%d: %d, %d) %v [%d]", t.Index, t.Surface, t.Position, t.Start, t.End, t.Class, t.ID) } -// Equal returns true if tokens are equal. This function compares values other than the `Index` field. +// Equal returns true if tokens are equal. func (t Token) Equal(v Token) bool { return t.ID == v.ID && t.Class == v.Class &&