Skip to content

Commit

Permalink
Merge pull request #11 from bab2min/develop
Browse files Browse the repository at this point in the history
prepare for 0.7.5
  • Loading branch information
bab2min authored Mar 4, 2020
2 parents 3e9fc7e + a68a422 commit fb39cb6
Show file tree
Hide file tree
Showing 12 changed files with 343 additions and 211 deletions.
42 changes: 21 additions & 21 deletions .github/workflows/deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,28 +5,28 @@ on:
types: published

jobs:
# build_manylinux:
# name: Build for manylinux
# runs-on: ubuntu-latest
# container:
# image: docker://quay.io/pypa/manylinux1_x86_64
build_manylinux:
name: Build for manylinux
runs-on: ubuntu-latest
container:
image: docker://quay.io/pypa/manylinux1_x86_64

# steps:
# - uses: actions/checkout@v1
# - name: Deploy
# continue-on-error: True
# env:
# TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
# TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
# run: |
# for cp in cp34-cp34m cp35-cp35m cp36-cp36m cp37-cp37m cp38-cp38
# do
# /opt/python/${cp}/bin/python -m pip install twine wheel
# /opt/python/${cp}/bin/python setup.py build sdist bdist_wheel
# auditwheel repair dist/*-${cp}-linux_x86_64.whl
# done
# /opt/python/cp38-cp38/bin/python -m twine upload bdist/*.tar.gz
# /opt/python/cp38-cp38/bin/python -m twine upload wheelhouse/*.whl
steps:
- uses: actions/checkout@v1
- name: Deploy
continue-on-error: True
env:
TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
run: |
for cp in cp35-cp35m cp36-cp36m cp37-cp37m cp38-cp38
do
/opt/python/${cp}/bin/python -m pip install twine wheel
/opt/python/${cp}/bin/python setup.py build sdist bdist_wheel
auditwheel repair dist/*-${cp}-linux_x86_64.whl
done
/opt/python/cp38-cp38/bin/python -m twine upload bdist/*.tar.gz
/opt/python/cp38-cp38/bin/python -m twine upload wheelhouse/*.whl
build_macos:
name: Build for macOS
Expand Down
38 changes: 19 additions & 19 deletions .github/workflows/pull_request_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,25 +6,25 @@ on:
- master

jobs:
# build_manylinux:
# name: Build for manylinux
# runs-on: ubuntu-latest
# container:
# image: docker://quay.io/pypa/manylinux1_x86_64
# strategy:
# max-parallel: 4
# matrix:
# cp: [cp34-cp34m, cp35-cp35m, cp36-cp36m, cp37-cp37m, cp38-cp38]
#
# steps:
# - uses: actions/checkout@v1
# - name: Build
# run: |
# /opt/python/${{ matrix.cp }}/bin/python setup.py build install
# - name: Test
# run: |
# /opt/python/${{ matrix.cp }}/bin/python -m pip install pytest konlpy
# /opt/python/${{ matrix.cp }}/bin/python -m pytest --verbose test.py
build_manylinux:
name: Build for manylinux
runs-on: ubuntu-latest
container:
image: docker://quay.io/pypa/manylinux1_x86_64
strategy:
max-parallel: 4
matrix:
cp: [cp35-cp35m, cp36-cp36m, cp37-cp37m, cp38-cp38]

steps:
- uses: actions/checkout@v1
- name: Build
run: |
/opt/python/${{ matrix.cp }}/bin/python setup.py build install
- name: Test
run: |
/opt/python/${{ matrix.cp }}/bin/python -m pip install pytest konlpy
/opt/python/${{ matrix.cp }}/bin/python -m pytest --verbose test.py
build_macos:
name: Build for macOS
Expand Down
8 changes: 4 additions & 4 deletions kiwipiepy.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -118,8 +118,8 @@
<LinkIncremental>true</LinkIncremental>
<TargetExt>.pyd</TargetExt>
<TargetName>_kiwipiepy</TargetName>
<IncludePath>C:\Users\Administrator\AppData\Local\Programs\Python\Python37\include;$(IncludePath)</IncludePath>
<LibraryPath>C:\Users\Administrator\AppData\Local\Programs\Python\Python37\libs;$(LibraryPath)</LibraryPath>
<IncludePath>C:\Users\Administrator\AppData\Local\Programs\Python\Python38\include;$(IncludePath)</IncludePath>
<LibraryPath>C:\Users\Administrator\AppData\Local\Programs\Python\Python38\libs;$(LibraryPath)</LibraryPath>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<LinkIncremental>false</LinkIncremental>
Expand All @@ -130,8 +130,8 @@
<LinkIncremental>false</LinkIncremental>
<TargetExt>.pyd</TargetExt>
<TargetName>_kiwipiepy</TargetName>
<IncludePath>C:\Users\Administrator\AppData\Local\Programs\Python\Python37\include;$(IncludePath)</IncludePath>
<LibraryPath>C:\Users\Administrator\AppData\Local\Programs\Python\Python37\libs;$(LibraryPath)</LibraryPath>
<IncludePath>C:\Users\Administrator\AppData\Local\Programs\Python\Python38\include;$(IncludePath)</IncludePath>
<LibraryPath>C:\Users\Administrator\AppData\Local\Programs\Python\Python38\libs;$(LibraryPath)</LibraryPath>
</PropertyGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<ClCompile>
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
setup(
name='kiwipiepy',

version='0.7.4',
version='0.7.5',

description='Kiwi, the Korean Tokenizer for Python',
long_description=long_description,
Expand Down
28 changes: 22 additions & 6 deletions src/KiwiPy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -254,14 +254,19 @@ static PyObject* kiwi__extractWords(KiwiObject* self, PyObject* args, PyObject *
{
return {};
}
return Kiwi::toU16(PyUnicode_AsUTF8(retVal));
if (!PyUnicode_Check(retVal)) throw runtime_error{ "reader must return a value in 'str' type" };
auto utf8 = PyUnicode_AsUTF8(retVal);
if (!utf8) throw bad_exception();
return Kiwi::toU16(utf8);
}, minCnt, maxWordLen, minScore);

PyObject* retList = PyList_New(res.size());
size_t idx = 0;
for (auto& r : res)
{
PyList_SetItem(retList, idx++, Py_BuildValue("(sfnf)", Kiwi::toU8(r.form).c_str(), r.score, r.freq, r.posScore[KPOSTag::NNP]));
auto v = Py_BuildValue("(sfnf)", Kiwi::toU8(r.form).c_str(), r.score, r.freq, r.posScore[KPOSTag::NNP]);
if (!v) throw bad_exception();
PyList_SetItem(retList, idx++, v);
}
return retList;
}
Expand Down Expand Up @@ -298,7 +303,10 @@ static PyObject* kiwi__extractFilterWords(KiwiObject* self, PyObject* args, PyOb
{
return {};
}
return Kiwi::toU16(PyUnicode_AsUTF8(retVal));
if (!PyUnicode_Check(retVal)) throw runtime_error{ "reader must return a value in 'str' type" };
auto utf8 = PyUnicode_AsUTF8(retVal);
if (!utf8) throw bad_exception();
return Kiwi::toU16(utf8);
}, minCnt, maxWordLen, minScore);

res = self->inst->filterExtractedWords(move(res), posScore);
Expand Down Expand Up @@ -343,7 +351,10 @@ static PyObject* kiwi__extractAddWords(KiwiObject* self, PyObject* args, PyObjec
{
return {};
}
return Kiwi::toU16(PyUnicode_AsUTF8(retVal));
if (!PyUnicode_Check(retVal)) throw runtime_error{ "reader must return a value in 'str' type" };
auto utf8 = PyUnicode_AsUTF8(retVal);
if (!utf8) throw bad_exception();
return Kiwi::toU16(utf8);
}, minCnt, maxWordLen, minScore, posScore);

PyObject* retList = PyList_New(res.size());
Expand Down Expand Up @@ -492,7 +503,10 @@ static PyObject* kiwi__analyze(KiwiObject* self, PyObject* args, PyObject *kwarg
{
return {};
}
return Kiwi::toU16(PyUnicode_AsUTF8(retVal));
if (!PyUnicode_Check(retVal)) throw runtime_error{ "reader must return a value in 'str' type" };
auto utf8 = PyUnicode_AsUTF8(retVal);
if (!utf8) throw bad_exception();
return Kiwi::toU16(utf8);
}, [&receiver](size_t id, vector<KResult>&& res)
{
UniquePyObj argList = Py_BuildValue("(nN)", id, resToPyList(res));
Expand Down Expand Up @@ -540,7 +554,9 @@ static PyObject* kiwi__perform(KiwiObject* self, PyObject* args, PyObject *kwarg
{
return {};
}
return Kiwi::toU16(PyUnicode_AsUTF8(retVal));
auto utf8 = PyUnicode_AsUTF8(retVal);
if (!utf8) throw bad_exception();
return Kiwi::toU16(utf8);
}, [&receiver](size_t id, vector<KResult>&& res)
{
UniquePyObj argList = Py_BuildValue("(nN)", id, resToPyList(res));
Expand Down
13 changes: 2 additions & 11 deletions src/core/KModelMgr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -416,7 +416,7 @@ KModelMgr::KModelMgr(const char * modelPath)
#else
{
ifstream ifs{ modelPath + string{ "sj.morph" }, ios_base::binary };
if (ifs.fail()) throw KiwiException{ "[KModelMgr] Failed to find file '"s +modelPath + "sj.morph'." };
if (ifs.fail()) throw KiwiException{ std::string{"[KModelMgr] Failed to find file '"} + modelPath + "sj.morph'." };
ifs.seekg(0, ios_base::end);
string buffer(ifs.tellg(), 0);
ifs.seekg(0);
Expand All @@ -426,7 +426,7 @@ KModelMgr::KModelMgr(const char * modelPath)
}
{
ifstream ifs{ modelPath + string{ "sj.lang" }, ios_base::binary };
if (ifs.fail()) throw KiwiException{ "[KModelMgr] Failed to find file '"s +modelPath + "sj.lang'." };
if (ifs.fail()) throw KiwiException{ std::string{"[KModelMgr] Failed to find file '"} + modelPath + "sj.lang'." };
ifs.seekg(0, ios_base::end);
string buffer(ifs.tellg(), 0);
ifs.seekg(0);
Expand Down Expand Up @@ -474,20 +474,11 @@ void KModelMgr::solidify()
auto& f = forms[i];
if (f.candidate.empty()) continue;
size_t realSize = f.form.size();
if (!once && f.form.find(u'\x2665') != k_string::npos)
{
realSize = f.form.find(u'\x2665') + 1;
}
trieRoot[0].build(&f.form[0], realSize, &f, [this]()
{
trieRoot.emplace_back();
return &trieRoot.back();
});
if (!once && f.form.find(u'\x2665') != k_string::npos)
{
f.form = f.form.substr(f.form.find(u'\x2665') + 1);
once = true;
}
}
trieRoot[0].fillFail();

Expand Down
10 changes: 9 additions & 1 deletion src/core/KWordDetector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -373,7 +373,7 @@ vector<KWordDetector::WordInfo> KWordDetector::extractWords(const function<u16st
}
}

sort(cands.begin(), cands.end(), [](const auto& a, const auto& b)
sort(cands.begin(), cands.end(), [](const WordInfo& a, const WordInfo& b)
{
return a.score > b.score;
});
Expand All @@ -392,6 +392,14 @@ vector<KWordDetector::WordInfo> KWordDetector::extractWords(const function<u16st

for (auto& r : cands)
{
/*
removing unpaired surrogate
*/
if ((r.form.back() & 0xFC00) == 0xD800)
{
r.form.pop_back();
}

/*
removing hyper-matched forms
ex) correct form: ABC, matched forms: ABC, ABC_D, ABC_E ...
Expand Down
4 changes: 2 additions & 2 deletions src/core/KWordDetector.h
Original file line number Diff line number Diff line change
Expand Up @@ -221,12 +221,12 @@ namespace kiwi

std::reverse_iterator<const char16_t*> rbegin() const
{
return std::make_reverse_iterator(end());
return std::reverse_iterator<const char16_t*>(end());
}

std::reverse_iterator<const char16_t*> rend() const
{
return std::make_reverse_iterator(begin());
return std::reverse_iterator<const char16_t*>(begin());
}

char16_t& front()
Expand Down
37 changes: 32 additions & 5 deletions src/core/Kiwi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,15 @@
#include "KFeatureTestor.h"
#include "logPoisson.h"

namespace kiwi
{
template<typename T, typename... Args>
std::unique_ptr<T> make_unique(Args&&... args)
{
return std::unique_ptr<T>(new T(std::forward<Args>(args)...));
}
}

using namespace std;
using namespace kiwi;

Expand All @@ -24,12 +33,12 @@ Kiwi::Kiwi(const char * modelPath, size_t _maxCache, size_t _numThread, size_t o
#else
{
ifstream ifs{ modelPath + string{ "extract.mdl" }, ios_base::binary };
if (ifs.fail()) throw KiwiException{ "[Kiwi] Failed to find model file '"s + modelPath + "extract.mdl'." };
if (ifs.fail()) throw KiwiException{ std::string{"[Kiwi] Failed to find model file '"} +modelPath + "extract.mdl'." };
detector.loadPOSModel(ifs);
detector.loadNounTailModel(ifs);
}
#endif
mdl = make_unique<KModelMgr>(modelPath);
mdl = kiwi::make_unique<KModelMgr>(modelPath);

if (options & LOAD_DEFAULT_DICT)
{
Expand All @@ -49,7 +58,7 @@ int Kiwi::addUserWord(const u16string & str, KPOSTag tag, float userScore)
int Kiwi::loadUserDictionary(const char * userDictPath)
{
ifstream ifs{ userDictPath };
if(ifs.fail()) throw KiwiException("[loadUserDictionary] Failed to open '"s + userDictPath + "'"s);
if (ifs.fail()) throw KiwiException(std::string{ "[loadUserDictionary] Failed to open '" } +userDictPath + "'");
ifs.exceptions(std::istream::badbit);
string line;
while (getline(ifs, line))
Expand Down Expand Up @@ -112,7 +121,7 @@ vector<KWordDetector::WordInfo> Kiwi::extractWords(const function<u16string(size

vector<KWordDetector::WordInfo> Kiwi::filterExtractedWords(vector<KWordDetector::WordInfo>&& words, float posThreshold) const
{
auto old = make_unique<KModelMgr>(*mdl);
auto old = kiwi::make_unique<KModelMgr>(*mdl);
swap(old, const_cast<Kiwi*>(this)->mdl);
const_cast<Kiwi*>(this)->prepare();

Expand Down Expand Up @@ -285,13 +294,31 @@ struct WordLL
MInfos morphs;
float accScore = 0;
const KNLangModel::Node* node = nullptr;

WordLL()
{
}

WordLL(const MInfos& _morphs, float _accScore, const KNLangModel::Node* _node)
: morphs{_morphs}, accScore{_accScore}, node{_node}
{
}
};

struct WordLLP
{
const MInfos* morphs = nullptr;
float accScore = 0;
const KNLangModel::Node* node = nullptr;

WordLLP()
{
}

WordLLP(const MInfos* _morphs, float _accScore, const KNLangModel::Node* _node)
: morphs{_morphs}, accScore{_accScore}, node{_node}
{
}
};

typedef vector<WordLL, pool_allocator<WordLL>> WordLLs;
Expand Down Expand Up @@ -657,7 +684,7 @@ void Kiwi::analyze(size_t topN, const function<u16string(size_t)>& reader, const

void Kiwi::perform(size_t topN, const function<u16string(size_t)>& reader, const function<void(size_t, vector<KResult>&&)>& receiver, size_t minCnt, size_t maxWordLen, float minScore, float posThreshold) const
{
auto old = make_unique<KModelMgr>(*mdl);
auto old = kiwi::make_unique<KModelMgr>(*mdl);
swap(old, const_cast<Kiwi*>(this)->mdl);
const_cast<Kiwi*>(this)->extractAddWords(reader, minCnt, maxWordLen, minScore, posThreshold);
const_cast<Kiwi*>(this)->prepare();
Expand Down
15 changes: 9 additions & 6 deletions src/core/KiwiHeader.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@

#include <memory>
#include <locale>
#include <codecvt>

#include <chrono>
#include <mutex>
Expand All @@ -41,12 +40,16 @@ namespace kiwi
{
typedef char16_t k_char;

class KiwiException : public std::exception
class KiwiException : public std::runtime_error
{
std::string msg;
public:
KiwiException(const std::string& _msg) : msg(_msg) {}
const char* what() const noexcept override { return msg.c_str(); }
using std::runtime_error::runtime_error;
};

class KiwiUnicodeException : public std::runtime_error
{
public:
using std::runtime_error::runtime_error;
};


Expand Down Expand Up @@ -78,4 +81,4 @@ namespace std
}
};
}
#endif
#endif
Loading

0 comments on commit fb39cb6

Please sign in to comment.