Skip to content

Commit

Permalink
Merge pull request #192 from bab2min/dev/issue191
Browse files Browse the repository at this point in the history
앞쪽의 특수 문자가 형태소와 잘못 결합하여 분석되는 버그 수정
  • Loading branch information
bab2min authored Oct 12, 2024
2 parents c75659c + 5933afc commit c1da90c
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 1 deletion.
3 changes: 2 additions & 1 deletion src/KTrie.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -653,7 +653,8 @@ size_t kiwi::splitByTrie(
const auto scanStart = max(endPosMap[nBeginWithMultiplier].first, (uint32_t)1), scanEnd = endPosMap[nBeginWithMultiplier].second;
const bool longestMatched = scanStart < scanEnd && any_of(out.begin() + scanStart, out.begin() + scanEnd, [&](const KGraphNode& g)
{
return nBeginWithMultiplier == g.endPos && lastSpecialEndPos == g.endPos - (g.uform.empty() ? g.form->sizeWithoutSpace() : g.uform.size()) * posMultiplier;
const auto start = g.endPos - (g.uform.empty() ? g.form->sizeWithoutSpace() : g.uform.size()) * posMultiplier;
return nBeginWithMultiplier == g.endPos && (lastSpecialEndPos == start || specialStartPos == start);
});

// insert unknown form
Expand Down
12 changes: 12 additions & 0 deletions test/test_cpp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,18 @@ TEST(KiwiCpp, UserTag)
EXPECT_EQ(tokens[7].tag, POSTag::user2);
}

TEST(KiwiCpp, STagPrefix)
{
Kiwi& kiwi = reuseKiwiInstance();
auto res = kiwi.analyze(u"자신있는 지역은 `후분양`으로 나올듯 싶습니다.", Match::allWithNormalizing).first;
EXPECT_EQ(res[0].str, u"자신");
EXPECT_EQ(res[1].str, u"");
EXPECT_EQ(res[2].str, u"");
EXPECT_EQ(res[3].str, u"지역");
EXPECT_EQ(res[4].str, u"");
EXPECT_EQ(res[5].str, u"`");
}

TEST(KiwiCpp, HSDataset)
{
KiwiBuilder kw{ MODEL_PATH, 0, BuildOption::default_, };
Expand Down

0 comments on commit c1da90c

Please sign in to comment.