Skip to content

Commit

Permalink
Merge pull request #8 from bab2min/develop
Browse files Browse the repository at this point in the history
fix bug #7
  • Loading branch information
bab2min authored Dec 29, 2019
2 parents 03276f5 + 22734e2 commit 3e9fc7e
Show file tree
Hide file tree
Showing 11 changed files with 115 additions and 77 deletions.
12 changes: 7 additions & 5 deletions .github/workflows/pull_request_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,13 @@ jobs:
#
# steps:
# - uses: actions/checkout@v1
# - name: Build & Test
# - name: Build
# run: |
# /opt/python/${{ matrix.cp }}/bin/python -m pip install pytest konlpy
# /opt/python/${{ matrix.cp }}/bin/python setup.py build install
# /opt/python/${{ matrix.cp }}/bin/python -m pytest test.py
# - name: Test
# run: |
# /opt/python/${{ matrix.cp }}/bin/python -m pip install pytest konlpy
# /opt/python/${{ matrix.cp }}/bin/python -m pytest --verbose test.py

build_macos:
name: Build for macOS
Expand All @@ -44,7 +46,7 @@ jobs:
- name: Test
run: |
python -m pip install pytest konlpy
python -m pytest test.py
python -m pytest --verbose test.py
build_windows:
name: Build for Windows
Expand All @@ -68,4 +70,4 @@ jobs:
- name: Test
run: |
python -m pip install pytest konlpy
python -m pytest test.py
python -m pytest --verbose test.py
16 changes: 10 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -197,13 +197,17 @@ reader와 receiver를 사용한 예시는 다음과 같습니다.
self.input = open(input, encoding='utf-8')
self.output = open(output, 'w', encoding='utf-8')

def read(self, id):
if id == 0:
def read(self, sent_id):
if sent_id == 0:
self.input.seek(0)
return self.input.readline()

def write(self, id, res):
print('Analyzed %dth row' % id)
self.iter = iter(self.input)
try:
return next(self.iter)
except StopIteration:
return None

def write(self, sent_id, res):
print('Analyzed %dth row' % sent_id)
self.output.write(' '.join(map(lambda x:x[0]+'/'+x[1], res[0][0])) + '\n')

def __del__(self):
Expand Down
2 changes: 2 additions & 0 deletions kiwipiepy.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@
<None Include="kiwipiepy\__main__.py" />
<None Include="README.md" />
<None Include="setup.py" />
<None Include="x64\Debug\test.py" />
<None Include="x64\Release\test.py" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="src\core\BakedMap.hpp" />
Expand Down
12 changes: 12 additions & 0 deletions kiwipiepy.vcxproj.filters
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,12 @@
<UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
<Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
</Filter>
<Filter Include="Release">
<UniqueIdentifier>{7797df98-8827-48a4-bba9-d27a0b3aa861}</UniqueIdentifier>
</Filter>
<Filter Include="Debug">
<UniqueIdentifier>{006c5263-f679-408c-9729-996ce9cc7008}</UniqueIdentifier>
</Filter>
</ItemGroup>
<ItemGroup>
<ClCompile Include="src\KiwiPy.cpp">
Expand Down Expand Up @@ -51,6 +57,12 @@
<None Include="README.md" />
<None Include="kiwipiepy\__init__.py" />
<None Include="kiwipiepy\__main__.py" />
<None Include="x64\Release\test.py">
<Filter>Release</Filter>
</None>
<None Include="x64\Debug\test.py">
<Filter>Debug</Filter>
</None>
</ItemGroup>
<ItemGroup>
<ClInclude Include="src\core\BakedMap.hpp">
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
setup(
name='kiwipiepy',

version='0.7.3',
version='0.7.4',

description='Kiwi, the Korean Tokenizer for Python',
long_description=long_description,
Expand Down
111 changes: 62 additions & 49 deletions src/KiwiPy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,45 @@
using namespace std;
using namespace kiwi;

struct UniquePyObj
{
PyObject* obj;
UniquePyObj(PyObject* _obj = nullptr) : obj(_obj) {}
~UniquePyObj()
{
Py_XDECREF(obj);
}

UniquePyObj(const UniquePyObj&) = delete;
UniquePyObj& operator=(const UniquePyObj&) = delete;

UniquePyObj(UniquePyObj&& o)
{
std::swap(obj, o.obj);
}

UniquePyObj& operator=(UniquePyObj&& o)
{
std::swap(obj, o.obj);
return *this;
}

PyObject* get() const
{
return obj;
}

operator bool() const
{
return !!obj;
}

operator PyObject*() const
{
return obj;
}
};

#if PY_MAJOR_VERSION < 3
string PyUnicode_AsUTF8(PyObject* obj)
{
Expand Down Expand Up @@ -208,18 +247,14 @@ static PyObject* kiwi__extractWords(KiwiObject* self, PyObject* args, PyObject *
{
auto res = self->inst->extractWords([argReader](size_t id) -> u16string
{
PyObject* argList = Py_BuildValue("(n)", id);
PyObject* retVal = PyEval_CallObject(argReader, argList);
Py_DECREF(argList);
UniquePyObj argList = Py_BuildValue("(n)", id);
UniquePyObj retVal = PyEval_CallObject(argReader, argList);
if (!retVal) throw bad_exception();
if (PyObject_Not(retVal))
{
Py_DECREF(retVal);
return {};
}
auto p = Kiwi::toU16(PyUnicode_AsUTF8(retVal));
Py_DECREF(retVal);
return p;
return Kiwi::toU16(PyUnicode_AsUTF8(retVal));
}, minCnt, maxWordLen, minScore);

PyObject* retList = PyList_New(res.size());
Expand Down Expand Up @@ -256,18 +291,14 @@ static PyObject* kiwi__extractFilterWords(KiwiObject* self, PyObject* args, PyOb
{
auto res = self->inst->extractWords([argReader](size_t id) -> u16string
{
PyObject* argList = Py_BuildValue("(n)", id);
PyObject* retVal = PyEval_CallObject(argReader, argList);
Py_DECREF(argList);
UniquePyObj argList = Py_BuildValue("(n)", id);
UniquePyObj retVal = PyEval_CallObject(argReader, argList);
if (!retVal) throw bad_exception();
if (PyObject_Not(retVal))
{
Py_DECREF(retVal);
return {};
}
auto p = Kiwi::toU16(PyUnicode_AsUTF8(retVal));
Py_DECREF(retVal);
return p;
return Kiwi::toU16(PyUnicode_AsUTF8(retVal));
}, minCnt, maxWordLen, minScore);

res = self->inst->filterExtractedWords(move(res), posScore);
Expand Down Expand Up @@ -305,18 +336,14 @@ static PyObject* kiwi__extractAddWords(KiwiObject* self, PyObject* args, PyObjec
{
auto res = self->inst->extractAddWords([argReader](size_t id) -> u16string
{
PyObject* argList = Py_BuildValue("(n)", id);
PyObject* retVal = PyEval_CallObject(argReader, argList);
Py_DECREF(argList);
UniquePyObj argList = Py_BuildValue("(n)", id);
UniquePyObj retVal = PyEval_CallObject(argReader, argList);
if (!retVal) throw bad_exception();
if (PyObject_Not(retVal))
{
Py_DECREF(retVal);
return {};
}
auto p = Kiwi::toU16(PyUnicode_AsUTF8(retVal));
Py_DECREF(retVal);
return p;
return Kiwi::toU16(PyUnicode_AsUTF8(retVal));
}, minCnt, maxWordLen, minScore, posScore);

PyObject* retList = PyList_New(res.size());
Expand Down Expand Up @@ -421,7 +448,7 @@ PyObject* resToPyList(const vector<KResult>& res)
{
PyList_SetItem(rList, jdx++, Py_BuildValue("(ssnn)", Kiwi::toU8(q.str()).c_str(), tagToString(q.tag()), (size_t)q.pos(), (size_t)q.len()));
}
PyList_SetItem(retList, idx++, Py_BuildValue("(Of)", rList, p.second));
PyList_SetItem(retList, idx++, Py_BuildValue("(Nf)", rList, p.second));
}
return retList;
}
Expand All @@ -434,16 +461,16 @@ static PyObject* kiwi__analyze(KiwiObject* self, PyObject* args, PyObject *kwarg
static const char* kwlist[] = { "text", "top_n", nullptr };
if (PyArg_ParseTupleAndKeywords(args, kwargs, "s|n", (char**)kwlist, &text, &topN))
{
try
//try
{
auto res = self->inst->analyze(text, topN);
return resToPyList(res);
}
catch (const exception& e)
/*catch (const exception& e)
{
PyErr_SetString(PyExc_Exception, e.what());
return nullptr;
}
}*/
}
PyErr_Clear();
}
Expand All @@ -458,26 +485,19 @@ static PyObject* kiwi__analyze(KiwiObject* self, PyObject* args, PyObject *kwarg
if (!PyCallable_Check(receiver)) return PyErr_SetString(PyExc_TypeError, "'analyze' requires 2nd parameter as callable"), nullptr;
self->inst->analyze(topN, [&reader](size_t id)->u16string
{
PyObject* argList = Py_BuildValue("(n)", id);
PyObject* retVal = PyEval_CallObject(reader, argList);
Py_DECREF(argList);
UniquePyObj argList = Py_BuildValue("(n)", id);
UniquePyObj retVal = PyEval_CallObject(reader, argList);
if (!retVal) throw bad_exception();
if (PyObject_Not(retVal))
{
Py_DECREF(retVal);
return {};
}
auto p = Kiwi::toU16(PyUnicode_AsUTF8(retVal));
Py_DECREF(retVal);
return p;
return Kiwi::toU16(PyUnicode_AsUTF8(retVal));
}, [&receiver](size_t id, vector<KResult>&& res)
{
PyObject* l = resToPyList(res);
PyObject* argList = Py_BuildValue("(nO)", id, l);
PyObject* ret = PyEval_CallObject(receiver, argList);
UniquePyObj argList = Py_BuildValue("(nN)", id, resToPyList(res));
UniquePyObj ret = PyEval_CallObject(receiver, argList);
if(!ret) throw bad_exception();
Py_DECREF(ret);
Py_DECREF(argList);
});
Py_INCREF(Py_None);
return Py_None;
Expand Down Expand Up @@ -513,26 +533,19 @@ static PyObject* kiwi__perform(KiwiObject* self, PyObject* args, PyObject *kwarg

self->inst->perform(topN, [&reader](size_t id)->u16string
{
PyObject* argList = Py_BuildValue("(n)", id);
PyObject* retVal = PyEval_CallObject(reader, argList);
Py_DECREF(argList);
UniquePyObj argList = Py_BuildValue("(n)", id);
UniquePyObj retVal = PyEval_CallObject(reader, argList);
if (!retVal) throw bad_exception();
if (PyObject_Not(retVal))
{
Py_DECREF(retVal);
return {};
}
auto p = Kiwi::toU16(PyUnicode_AsUTF8(retVal));
Py_DECREF(retVal);
return p;
return Kiwi::toU16(PyUnicode_AsUTF8(retVal));
}, [&receiver](size_t id, vector<KResult>&& res)
{
PyObject* l = resToPyList(res);
PyObject* argList = Py_BuildValue("(nO)", id, l);
PyObject* ret = PyEval_CallObject(receiver, argList);
UniquePyObj argList = Py_BuildValue("(nN)", id, resToPyList(res));
UniquePyObj ret = PyEval_CallObject(receiver, argList);
if (!ret) throw bad_exception();
Py_DECREF(ret);
Py_DECREF(argList);
}, minCnt, maxWordLen, minScore, posScore);
Py_INCREF(Py_None);
return Py_None;
Expand Down
4 changes: 2 additions & 2 deletions src/core/KMemory.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ namespace kiwi

void* allocate()
{
assert(_CrtCheckMemory());
//assert(_CrtCheckMemory());
//std::lock_guard<std::mutex> lg(lock);
if (!freeList)
{
Expand All @@ -37,7 +37,7 @@ namespace kiwi
//fprintf(stderr, "deallocate %p\n", p);
*((void**)p) = freeList;
freeList = (void**)p;
assert(_CrtCheckMemory());
//assert(_CrtCheckMemory());
}

private:
Expand Down
6 changes: 4 additions & 2 deletions src/core/KModelMgr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -468,12 +468,13 @@ void KModelMgr::solidify()
trieRoot[i].val = &forms[i - 1];
}

bool once = false;
for (size_t i = (size_t)KPOSTag::SN; i < forms.size(); ++i)
{
auto& f = forms[i];
if (f.candidate.empty()) continue;
size_t realSize = f.form.size();
if (f.form.find(u'\x2665') != k_string::npos)
if (!once && f.form.find(u'\x2665') != k_string::npos)
{
realSize = f.form.find(u'\x2665') + 1;
}
Expand All @@ -482,9 +483,10 @@ void KModelMgr::solidify()
trieRoot.emplace_back();
return &trieRoot.back();
});
if (f.form.find(u'\x2665') != k_string::npos)
if (!once && f.form.find(u'\x2665') != k_string::npos)
{
f.form = f.form.substr(f.form.find(u'\x2665') + 1);
once = true;
}
}
trieRoot[0].fillFail();
Expand Down
4 changes: 2 additions & 2 deletions src/core/KTrie.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,10 @@ vector<KGraphNode> KTrie::split(const k_string& str) const
return nBegin == g.lastPos && lastSpecialEndPos == g.lastPos - (g.uform.empty() ? g.form->form.size() : g.uform.size());
});

// inserting unknown form
// insert unknown form
if (nBegin > lastSpecialEndPos && !longestMatched
&& !(0x11A8 <= cand->form[0] && cand->form[0] < (0x11A7 + 28))
&& str[nBegin - space -1] != 0x11BB) // cannot ends with ¤¶
&& str[nBegin - space -1] != 0x11BB) // cannot end with ¤¶
{
auto it2 = spacePos.find(lastSpecialEndPos - 1);
int space2 = it2 == spacePos.end() ? 0 : it2->second;
Expand Down
6 changes: 3 additions & 3 deletions src/core/Kiwi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -764,12 +764,12 @@ vector<KResult> Kiwi::analyze(const u16string & str, size_t topN) const
sents.emplace_back(chunk);
}
}
sents.emplace_back(str.end());
if(sents.back() != str.end()) sents.emplace_back(str.end());
if (sents.size() <= 1) return vector<KResult>(1);
vector<KResult> ret = analyzeSent(sents[0], sents[1], topN);
if (ret.empty())
{
ret.emplace_back();
return ret;
return vector<KResult>(1);
}
while (ret.size() < topN) ret.emplace_back(ret.back());
for (size_t i = 2; i < sents.size(); ++i)
Expand Down
Loading

0 comments on commit 3e9fc7e

Please sign in to comment.