Skip to content

Commit

Permalink
Merge pull request #182 from bab2min/dev_issue_181
Browse files Browse the repository at this point in the history
Fix sentence splitting bug on SSO tag
  • Loading branch information
bab2min authored Sep 6, 2024
2 parents 41aae65 + e195cfc commit f6a714f
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 1 deletion.
3 changes: 2 additions & 1 deletion src/Kiwi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -375,7 +375,8 @@ namespace kiwi
(tokens[i - 1].tag == POSTag::so
|| tokens[i - 1].tag == POSTag::sw
|| tokens[i - 1].tag == POSTag::sp
|| tokens[i - 1].tag == POSTag::se)
|| tokens[i - 1].tag == POSTag::se
|| tokens[i - 1].tag == POSTag::sso)
&& tokens[i - 1].endPos() == tokens[i].position
&& tokens[i - 1].position > tokens[i - 2].endPos();
if (nestedSentEnd)
Expand Down
13 changes: 13 additions & 0 deletions test/test_cpp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1124,6 +1124,19 @@ TEST(KiwiCpp, IssueP131_SentenceSplitError)
EXPECT_EQ(res[1], std::make_pair((size_t)10, (size_t)12));
}

TEST(KiwiCpp, Issue181_SentenceSplitError)
{
const char16_t* text = u"존 슈발John Schwall은 그에 꼭 들어맞는 흥미로운 사례였다. 슈발의 아버지와 할아버지는 스테이튼 아일랜드의 소방관이었다. “제 친가 쪽의 남자들은 모두 소방관이에요. 전 다 른 일을 하고 싶었죠.” 슈발이 말했다.";
Kiwi& kiwi = reuseKiwiInstance();
auto res = kiwi.splitIntoSents(text);
EXPECT_EQ(res.size(), 5);
EXPECT_EQ(res[0], std::make_pair((size_t)0, (size_t)38));
EXPECT_EQ(res[1], std::make_pair((size_t)39, (size_t)72));
EXPECT_EQ(res[2], std::make_pair((size_t)73, (size_t)97));
EXPECT_EQ(res[3], std::make_pair((size_t)98, (size_t)115));
EXPECT_EQ(res[4], std::make_pair((size_t)116, (size_t)124));
}

TEST(KiwiCpp, AddRule)
{
Kiwi& okiwi = reuseKiwiInstance();
Expand Down

0 comments on commit f6a714f

Please sign in to comment.