WorksApplications · mh-northlander · Oct 11, 2021 · Oct 8, 2021 · Oct 8, 2021 · Oct 8, 2021
diff --git a/python/src/dictionary.rs b/python/src/dictionary.rs
@@ -1,17 +1,17 @@
 /*
- * Copyright (c) 2021 Works Applications Co., Ltd.
+ *  Copyright (c) 2021 Works Applications Co., Ltd.
  *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
  *
- *     http://www.apache.org/licenses/LICENSE-2.0
+ *      http://www.apache.org/licenses/LICENSE-2.0
  *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ *   Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
  */
 
 use std::path::PathBuf;
@@ -29,8 +29,9 @@ use crate::tokenizer::{PySplitMode, PyTokenizer};
 #[pyclass(module = "sudachi.dictionary", name = "Dictionary")]
 #[pyo3(text_signature = "(config_path, resource_dir)")]
 #[derive(Clone)]
+#[repr(transparent)]
 pub struct PyDictionary {
-    pub(super) dictionary: Arc<JapaneseDictionary>,
+    pub(super) dictionary: Option<Arc<JapaneseDictionary>>,
 }
 
 #[pymethods]
@@ -50,16 +51,24 @@ impl PyDictionary {
             ))
         })?);
 
-        Ok(Self { dictionary })
+        Ok(Self {
+            dictionary: Some(dictionary),
+        })
     }
 
     /// Creates a sudachi tokenizer
     #[pyo3(text_signature = "($self, mode)")]
     #[args(mode = "None")]
     fn create(&self, mode: Option<PySplitMode>) -> PyTokenizer {
-        let tokenizer = StatelessTokenizer::new(self.dictionary.clone());
+        let tokenizer = StatelessTokenizer::new(self.dictionary.as_ref().unwrap().clone());
         let mode = mode.unwrap_or(PySplitMode::C).into();
 
         PyTokenizer::new(tokenizer, mode)
     }
+
+    /// Close this dictionary
+    #[pyo3(text_signature = "($self)")]
+    fn close(&mut self) {
+        self.dictionary = None;
+    }
 }
diff --git a/python/src/morpheme.rs b/python/src/morpheme.rs
@@ -31,6 +31,7 @@ type PyMorphemeList = MorphemeList<Arc<JapaneseDictionary>>;
 
 /// A list of morphemes
 #[pyclass(module = "sudachi.morpheme", name = "MorphemeList")]
+#[repr(transparent)]
 pub struct PyMorphemeListWrapper {
     inner: Arc<PyMorphemeList>,
 }
@@ -40,9 +41,11 @@ impl PyMorphemeListWrapper {
     /// Returns an empty morpheme list with dictionary
     #[classmethod]
     #[pyo3(text_signature = "(dict)")]
-    fn empty(_cls: &PyType, dict: PyDictionary) -> Self {
+    fn empty(_cls: &PyType, dict: &PyDictionary) -> Self {
         Self {
-            inner: Arc::new(PyMorphemeList::empty(dict.dictionary.clone())),
+            inner: Arc::new(PyMorphemeList::empty(
+                dict.dictionary.as_ref().unwrap().clone(),
+            )),
         }
     }
 

diff --git a/python/src/tokenizer.rs b/python/src/tokenizer.rs
@@ -27,10 +27,15 @@ use crate::morpheme::PyMorphemeListWrapper;
 
 /// Unit to split text
 ///
-/// This implementation is a workaround. Waiting for the pyo3 enum feature.
-/// ref: [PyO3 issue #834](https://github.com/PyO3/pyo3/issues/834).
+/// A == short mode
+/// B == middle mode
+/// C == long mode
+//
+// This implementation is a workaround. Waiting for the pyo3 enum feature.
+// ref: [PyO3 issue #834](https://github.com/PyO3/pyo3/issues/834).
 #[pyclass(module = "sudachi.tokenizer", name = "SplitMode")]
 #[derive(Clone, PartialEq, Eq)]
+#[repr(transparent)]
 pub struct PySplitMode {
     mode: u8,
 }
@@ -47,16 +52,6 @@ impl PySplitMode {
     pub const C: Self = Self { mode: 2 };
 }
 
-impl From<Mode> for PySplitMode {
-    fn from(mode: Mode) -> Self {
-        match mode {
-            Mode::A => PySplitMode::A,
-            Mode::B => PySplitMode::B,
-            Mode::C => PySplitMode::C,
-        }
-    }
-}
-
 impl From<PySplitMode> for Mode {
     fn from(mode: PySplitMode) -> Self {
         match mode {
@@ -67,18 +62,6 @@ impl From<PySplitMode> for Mode {
     }
 }
 
-impl std::str::FromStr for PySplitMode {
-    type Err = &'static str;
-    fn from_str(s: &str) -> Result<Self, Self::Err> {
-        match s {
-            "A" | "a" => Ok(PySplitMode::A),
-            "B" | "b" => Ok(PySplitMode::B),
-            "C" | "c" => Ok(PySplitMode::C),
-            _ => Err("Mode must be one of \"A\", \"B\", or \"C\" (in lower or upper case)."),
-        }
-    }
-}
-
 #[pyclass(module = "sudachi.tokenizer", name = "Tokenizer")]
 pub struct PyTokenizer {
     tokenizer: StatelessTokenizer<Arc<JapaneseDictionary>>,
@@ -93,23 +76,33 @@ impl PyTokenizer {
 
 #[pymethods]
 impl PyTokenizer {
+    #[classattr]
+    #[allow(non_snake_case)]
+    fn SplitMode() -> PySplitMode {
+        PySplitMode::C
+    }
+
     /// Break text into morphemes
-    #[pyo3(text_signature = "($self, text, /, mode, enable_debug)")]
-    #[args(text, mode = "None", enable_debug = "None")]
+    ///
+    /// This ignores the logger provided
+    #[pyo3(text_signature = "($self, text, /, mode, logger)")]
+    #[args(text, mode = "None", logger = "None")]
+    #[allow(unused_variables)]
     fn tokenize(
         &self,
         text: &str,
         mode: Option<PySplitMode>,
-        enable_debug: Option<bool>, // want to take logger instead of debug flag
+        logger: Option<PyObject>,
     ) -> PyResult<PyMorphemeListWrapper> {
         let mode: Mode = match mode {
             Some(m) => m.into(),
             None => self.mode,
         };
+        let enable_debug = false;
 
         let morphemes = self
             .tokenizer
-            .tokenize(text, mode, enable_debug.unwrap_or(false))
+            .tokenize(text, mode, enable_debug)
             .map_err(|e| {
                 PyException::new_err(format!("Error while tokenization: {}", e.to_string()))
             })?

diff --git a/python/src/word_info.rs b/python/src/word_info.rs
@@ -1,3 +1,19 @@
+/*
+ *  Copyright (c) 2021 Works Applications Co., Ltd.
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *   Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
 use pyo3::prelude::*;
 
 use sudachi::dic::lexicon::word_infos::WordInfo;