From a6720b8fd066377015f4e3a4b4e3e4981890b1d5 Mon Sep 17 00:00:00 2001 From: mh-northlander Date: Thu, 6 Jun 2024 14:26:13 +0900 Subject: [PATCH 1/2] pass string as split mode --- python/py_src/sudachipy/command_line.py | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/python/py_src/sudachipy/command_line.py b/python/py_src/sudachipy/command_line.py index 60efa4bb..07f59c19 100644 --- a/python/py_src/sudachipy/command_line.py +++ b/python/py_src/sudachipy/command_line.py @@ -19,7 +19,7 @@ import sys from pathlib import Path -from . import Dictionary, SplitMode +from . import Dictionary from . import __version__ from . import sudachipy @@ -93,13 +93,6 @@ def _command_tokenize(args, print_usage): _input_files_checker(args, print_usage) - if args.mode == "A": - mode = SplitMode.A - elif args.mode == "B": - mode = SplitMode.B - else: - mode = SplitMode.C - output = sys.stdout if args.fpath_out: output = open(args.fpath_out, "w", encoding="utf-8") @@ -121,7 +114,7 @@ def _command_tokenize(args, print_usage): # precompute output POS strings morphs = [",".join(ms) for ms in all_morphs] - tokenizer_obj = dict_.create(mode=mode) + tokenizer_obj = dict_.create(mode=args.mode) input_ = fileinput.input( args.in_files, openhook=fileinput.hook_encoded("utf-8")) run(tokenizer_obj, input_, output, print_all, morphs, is_stdout=args.fpath_out is None) @@ -216,7 +209,7 @@ def main(): parser_tk.add_argument("-r", dest="fpath_setting", metavar="file", help="the setting file in JSON format") parser_tk.add_argument( - "-m", dest="mode", choices=["A", "B", "C"], default="C", help="the mode of splitting") + "-m", dest="mode", choices="AaBbCc", default="C", help="the mode of splitting") parser_tk.add_argument("-o", dest="fpath_out", metavar="file", help="the output file") parser_tk.add_argument("-s", dest="system_dict_type", metavar='string', choices=["small", "core", "full"], @@ -258,7 +251,7 @@ def main(): required_named_ubd = parser_ubd.add_argument_group( 'required named arguments') required_named_ubd.add_argument('-s', dest='system_dic', metavar='file', required=True, - help='system dictionary path') + help='system dictionary path') parser_ubd.add_argument("in_files", metavar="file", nargs=argparse.ONE_OR_MORE, help='source files with CSV format (one or more)') parser_ubd.set_defaults(handler=_command_user_build, From fb0d661632f6c5f38dfc47a0a482dab1d671ec72 Mon Sep 17 00:00:00 2001 From: mh-northlander Date: Mon, 10 Jun 2024 09:48:16 +0900 Subject: [PATCH 2/2] add and hide imports --- python/py_src/sudachipy/__init__.py | 17 ++++++++++------- python/py_src/sudachipy/config.py | 12 ++++++------ 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/python/py_src/sudachipy/__init__.py b/python/py_src/sudachipy/__init__.py index 04e14f18..bdf67f40 100644 --- a/python/py_src/sudachipy/__init__.py +++ b/python/py_src/sudachipy/__init__.py @@ -4,23 +4,26 @@ SplitMode, MorphemeList, Morpheme, - WordInfo + WordInfo, ) +from .config import Config +from . import errors -from importlib import import_module -from importlib.util import find_spec -from pathlib import Path +from importlib import import_module as _import_module +from importlib.util import find_spec as _find_spec +from pathlib import Path as _Path __version__ = "0.6.9-a1" -_DEFAULT_RESOURCEDIR = Path(__file__).resolve().parent / 'resources' +_DEFAULT_RESOURCEDIR = _Path(__file__).resolve().parent / 'resources' _DEFAULT_SETTINGFILE = _DEFAULT_RESOURCEDIR / 'sudachi.json' _DEFAULT_RESOURCEDIR = str(_DEFAULT_RESOURCEDIR.resolve()) _DEFAULT_SETTINGFILE = str(_DEFAULT_SETTINGFILE.resolve()) def _get_absolute_dict_path(dict_type: str) -> str: - pkg_path = Path(import_module(f'sudachidict_{dict_type}').__file__).parent + pkg_path = _Path(_import_module( + f'sudachidict_{dict_type}').__file__).parent dic_path = pkg_path / 'resources' / 'system.dic' return str(dic_path.resolve()) @@ -29,7 +32,7 @@ def _find_dict_path(dict_type='core'): if dict_type not in ['small', 'core', 'full']: raise ValueError('"dict_type" must be "small", "core", or "full".') - is_installed = find_spec(f'sudachidict_{dict_type}') + is_installed = _find_spec(f'sudachidict_{dict_type}') if is_installed: return _get_absolute_dict_path(dict_type) else: diff --git a/python/py_src/sudachipy/config.py b/python/py_src/sudachipy/config.py index f0894344..c7c03aa1 100644 --- a/python/py_src/sudachipy/config.py +++ b/python/py_src/sudachipy/config.py @@ -11,11 +11,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import dataclasses -from dataclasses import dataclass, asdict -from json import dumps +import dataclasses as _dataclasses +from json import dumps as _dumps -@dataclass + +@_dataclasses.dataclass class Config: """ SudachiPy rich configuration object. @@ -65,10 +65,10 @@ def as_jsons(self): """ Convert this Config object to the json string """ - return dumps(_filter_nulls(asdict(self))) + return _dumps(_filter_nulls(_dataclasses.asdict(self))) def update(self, **changes): - return dataclasses.replace(self, **changes) + return _dataclasses.replace(self, **changes) def _filter_nulls(data: dict) -> dict: