Skip to content

Commit

Permalink
Merge pull request #228 from ikawaha/fix/update-token
Browse files Browse the repository at this point in the history
Update
  • Loading branch information
ikawaha authored Dec 29, 2020
2 parents 9463033 + 585d863 commit 46263e8
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 38 deletions.
7 changes: 2 additions & 5 deletions tokenizer/token.go
Original file line number Diff line number Diff line change
Expand Up @@ -207,15 +207,12 @@ func (t Token) pickupFromFeatures(key string) (string, bool) {

// String returns a string representation of a token.
func (t Token) String() string {
return fmt.Sprintf("%q (%d: %d, %d) %v [%d]", t.Surface, t.Position, t.Start, t.End, t.Class, t.ID)
return fmt.Sprintf("%d:%q (%d: %d, %d) %v [%d]", t.Index, t.Surface, t.Position, t.Start, t.End, t.Class, t.ID)
}

// Equal returns true if tokens are equal. This function compares values other than the `Index` field.
// Equal returns true if tokens are equal.
func (t Token) Equal(v Token) bool {
return t.ID == v.ID &&
t.Class == v.Class &&
t.Position == v.Position &&
t.Start == v.Start &&
t.End == v.End &&
t.Surface == v.Surface
}
2 changes: 1 addition & 1 deletion tokenizer/token_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,7 @@ func Test_TokenString(t *testing.T) {
End: 1,
Surface: "テスト",
}
want := `"テスト" (0: 0, 1) DUMMY [123]`
want := `0:"テスト" (0: 0, 1) DUMMY [123]`
got := fmt.Sprintf("%v", tok)
if got != want {
t.Errorf("want %v, got %v", want, got)
Expand Down
26 changes: 13 additions & 13 deletions tokenizer/tokenizer_option_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,15 +46,15 @@ func Test_AnalyzeWithUserDict(t *testing.T) {
}
tokens := tnz.Analyze("関西国際空港", Normal)
expected := []Token{
{ID: -1, Surface: "BOS"},
{ID: 2, Surface: "関西国際空港", Start: 0, End: 6, Class: TokenClass(lattice.USER)},
{ID: -1, Surface: "EOS", Start: 6, End: 6, Position: len("関西国際空港")},
{Index: 0, ID: -1, Surface: "BOS"},
{Index: 1, ID: 2, Surface: "関西国際空港", Start: 0, End: 6, Class: TokenClass(lattice.USER)},
{Index: 2, ID: -1, Surface: "EOS", Start: 6, End: 6, Position: len("関西国際空港")},
}
if len(tokens) != len(expected) {
t.Fatalf("got %v, expected %v", tokens, expected)
}
for i, tok := range tokens {
if !tok.Equal(expected[i]) {
if !equalTokens(tok, expected[i]) {
t.Errorf("%dth token, expected %v, got %v", i, expected[i], tok)
}
}
Expand Down Expand Up @@ -86,15 +86,15 @@ func Test_AnalyzeWithSearchModeWithUserDict(t *testing.T) {

tokens := tnz.Analyze("関西国際空港", Search)
expected := []Token{
{ID: -1, Surface: "BOS"},
{ID: 2, Surface: "関西国際空港", Start: 0, End: 6, Class: TokenClass(lattice.USER)},
{ID: -1, Surface: "EOS", Start: 6, End: 6, Position: len("関西国際空港")},
{Index: 0, ID: -1, Surface: "BOS"},
{Index: 1, ID: 2, Surface: "関西国際空港", Start: 0, End: 6, Class: TokenClass(lattice.USER)},
{Index: 2, ID: -1, Surface: "EOS", Start: 6, End: 6, Position: len("関西国際空港")},
}
if len(tokens) != len(expected) {
t.Fatalf("expected %v, got %v", expected, tokens)
}
for i, tok := range tokens {
if !tok.Equal(expected[i]) {
if !equalTokens(tok, expected[i]) {
t.Errorf("%dth token, expected %v, got %v", i, expected[i], tok)
}
}
Expand Down Expand Up @@ -126,15 +126,15 @@ func Test_AnalyzeWithExtendedModeWithUserDict(t *testing.T) {

tokens := tnz.Analyze("関西国際空港", Extended)
expected := []Token{
{ID: -1, Surface: "BOS"},
{ID: 2, Surface: "関西国際空港", Start: 0, End: 6, Class: TokenClass(lattice.USER)},
{ID: -1, Surface: "EOS", Start: 6, End: 6, Position: len("関西国際空港")},
{Index: 0, ID: -1, Surface: "BOS"},
{Index: 1, ID: 2, Surface: "関西国際空港", Start: 0, End: 6, Class: TokenClass(lattice.USER)},
{Index: 2, ID: -1, Surface: "EOS", Start: 6, End: 6, Position: len("関西国際空港")},
}
if len(tokens) != len(expected) {
t.Fatalf("expected %v, got %v", expected, tokens)
}
for i, tok := range tokens {
if !tok.Equal(expected[i]) {
if !equalTokens(tok, expected[i]) {
t.Errorf("%dth token, expected %v, got %v", i, expected[i], tok)
}
}
Expand All @@ -158,7 +158,7 @@ func TestTokenizer_Analyze_OmitBOSEOS(t *testing.T) {
t.Fatalf("got %v, expected %v", tokens, expected)
}
for i, tok := range tokens {
if !tok.Equal(expected[i]) {
if !equalTokens(tok, expected[i]) {
t.Errorf("%dth token, expected %v, got %v", i, expected[i], tok)
}
}
Expand Down
48 changes: 29 additions & 19 deletions tokenizer/tokenizer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,16 @@ const (
testDictPath = "../testdata/ipa.dict"
)

func equalTokens(lhs, rhs Token) bool {
return lhs.Index == rhs.Index &&
lhs.ID == rhs.ID &&
lhs.Class == rhs.Class &&
lhs.Position == rhs.Position &&
lhs.Start == rhs.Start &&
lhs.End == rhs.End &&
lhs.Surface == rhs.Surface
}

func Example_tokenize_mode() {
d, err := dict.LoadDictFile(testDictPath)
if err != nil {
Expand Down Expand Up @@ -75,14 +85,14 @@ func Test_AnalyzeEmptyInput(t *testing.T) {
}
tokens := tnz.Analyze("", Normal)
expected := []Token{
{ID: -1, Surface: "BOS"},
{ID: -1, Surface: "EOS"},
{Index: 0, ID: -1, Surface: "BOS"},
{Index: 1, ID: -1, Surface: "EOS"},
}
if len(tokens) != len(expected) {
t.Fatalf("got %v, expected %v", tokens, expected)
}
for i, tok := range tokens {
if !tok.Equal(expected[i]) {
if !equalTokens(tok, expected[i]) {
t.Errorf("got %v, expected %v", tok, expected[i])
}
}
Expand All @@ -100,15 +110,15 @@ func Test_Analyze(t *testing.T) {
input := "関西国際空港"
tokens := tnz.Analyze(input, Normal)
want := []Token{
{ID: -1, Surface: "BOS"},
{ID: 372978, Surface: input, Position: 0, Start: 0, End: 6, Class: TokenClass(lattice.KNOWN)},
{ID: -1, Surface: "EOS", Position: len(input), Start: 6, End: 6},
{Index: 0, ID: -1, Surface: "BOS"},
{Index: 1, ID: 372978, Surface: input, Position: 0, Start: 0, End: 6, Class: TokenClass(lattice.KNOWN)},
{Index: 2, ID: -1, Surface: "EOS", Position: len(input), Start: 6, End: 6},
}
if len(tokens) != len(want) {
t.Fatalf("got %v, want %v", tokens, want)
}
for i, tok := range tokens {
if !tok.Equal(want[i]) {
if !equalTokens(tok, want[i]) {
t.Errorf("got %+v, want %+v", tok, want[i])
}
}
Expand All @@ -125,15 +135,15 @@ func Test_AnalyzeUnknown(t *testing.T) {
}
tokens := tnz.Analyze("ポポピ", Normal)
expected := []Token{
{ID: -1, Surface: "BOS"},
{ID: 34, Surface: "ポポピ", Start: 0, End: 3, Class: TokenClass(lattice.UNKNOWN)},
{ID: -1, Surface: "EOS", Start: 3, End: 3, Position: 9},
{Index: 0, ID: -1, Surface: "BOS"},
{Index: 1, ID: 34, Surface: "ポポピ", Start: 0, End: 3, Class: TokenClass(lattice.UNKNOWN)},
{Index: 2, ID: -1, Surface: "EOS", Start: 3, End: 3, Position: 9},
}
if len(tokens) != len(expected) {
t.Fatalf("got %v, expected %v", tokens, expected)
}
for i, tok := range tokens {
if !tok.Equal(expected[i]) {
if !equalTokens(tok, expected[i]) {
t.Errorf("got %v, expected %v", tok, expected[i])
}
}
Expand Down Expand Up @@ -189,14 +199,14 @@ func Test_AnalyzeWithSearchModeEmptyInput(t *testing.T) {
}
tokens := tnz.Analyze("", Search)
expected := []Token{
{ID: -1, Surface: "BOS"},
{ID: -1, Surface: "EOS"},
{Index: 0, ID: -1, Surface: "BOS"},
{Index: 1, ID: -1, Surface: "EOS"},
}
if len(tokens) != len(expected) {
t.Fatalf("got %v, expected %v", tokens, expected)
}
for i, tok := range tokens {
if !tok.Equal(expected[i]) {
if !equalTokens(tok, expected[i]) {
t.Errorf("got %v, expected %v", tok, expected[i])
}
}
Expand Down Expand Up @@ -224,7 +234,7 @@ func Test_AnalyzeWithSearchMode(t *testing.T) {
t.Fatalf("got %v, expected %v", tokens, expected)
}
for i, tok := range tokens {
if tok.Index != expected[i].Index || !tok.Equal(expected[i]) {
if !equalTokens(tok, expected[i]) {
t.Errorf("got %v, expected %v", tok, expected[i])
}
}
Expand All @@ -250,7 +260,7 @@ func Test_AnalyzeWithSearchModeUnknown(t *testing.T) {
t.Fatalf("got %v, expected %v", tokens, expected)
}
for i, tok := range tokens {
if tok.Index != expected[i].Index || !tok.Equal(expected[i]) {
if !equalTokens(tok, expected[i]) {
t.Errorf("got %v, expected %v", tok, expected[i])
}
}
Expand All @@ -275,7 +285,7 @@ func Test_AnalyzeWithExtendedModeEmpty(t *testing.T) {
t.Fatalf("got %v, expected %v", tokens, expected)
}
for i, tok := range tokens {
if tok.Index != expected[i].Index || !tok.Equal(expected[i]) {
if !equalTokens(tok, expected[i]) {
t.Errorf("got %v, expected %v", tok, expected[i])
}
}
Expand Down Expand Up @@ -303,7 +313,7 @@ func Test_AnalyzeWithExtendedMode(t *testing.T) {
t.Fatalf("got %v, expected %v", tokens, expected)
}
for i, tok := range tokens {
if tok.Index != expected[i].Index || !tok.Equal(expected[i]) {
if !equalTokens(tok, expected[i]) {
t.Errorf("got %v, expected %v", tok, expected[i])
}
}
Expand Down Expand Up @@ -331,7 +341,7 @@ func Test_AnalyzeWithExtendedModeUnknown(t *testing.T) {
t.Fatalf("got %v, expected %v", tokens, expected)
}
for i, tok := range tokens {
if tok.Index != expected[i].Index || !tok.Equal(expected[i]) {
if !equalTokens(tok, expected[i]) {
t.Errorf("got %v, expected %v", tok, expected[i])
}
}
Expand Down

0 comments on commit 46263e8

Please sign in to comment.