Skip to content

Commit

Permalink
use crate::errors to send err
Browse files Browse the repository at this point in the history
  • Loading branch information
mh-northlander committed Oct 25, 2024
1 parent d78bf49 commit c943da8
Show file tree
Hide file tree
Showing 8 changed files with 74 additions and 92 deletions.
4 changes: 2 additions & 2 deletions python/src/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -142,8 +142,8 @@ fn as_data_source<'p>(py: Python<'p>, data: &'p PyAny) -> PyResult<DataSource<'p
let data = data.downcast::<PyBytes>()?;
Ok(DataSource::Data(data.as_bytes()))
} else {
Err(pyo3::exceptions::PyValueError::new_err(format!(
"data source should can be only Path, bytes or str, was {}: {}",
errors::wrap(Err(format!(
"data source should be Path, bytes or str, was {}: {}",
data,
data.get_type()
)))
Expand Down
67 changes: 29 additions & 38 deletions python/src/dictionary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ use std::str::FromStr;
use std::sync::Arc;
use sudachi::analysis::Mode;

use crate::errors::{wrap, wrap_ctx, SudachiError as SudachiErr};
use sudachi::analysis::stateless_tokenizer::DictionaryAccess;
use sudachi::config::{Config, ConfigBuilder, SurfaceProjection};
use sudachi::dic::dictionary::JapaneseDictionary;
Expand All @@ -35,6 +34,7 @@ use sudachi::plugin::input_text::InputTextPlugin;
use sudachi::plugin::oov::OovProviderPlugin;
use sudachi::plugin::path_rewrite::PathRewritePlugin;

use crate::errors;
use crate::morpheme::{PyMorphemeListWrapper, PyProjector};
use crate::pos_matcher::PyPosMatcher;
use crate::pretokenizer::PyPretokenizer;
Expand Down Expand Up @@ -110,7 +110,7 @@ impl PyDictionary {
config: Option<&PyAny>,
) -> PyResult<Self> {
if config.is_some() && config_path.is_some() {
return Err(SudachiErr::new_err("Both config and config_path options were specified at the same time, use one of them"));
return errors::wrap(Err("Both config and config_path options were specified at the same time, use one of them"));
}

let default_config = read_default_config(py)?;
Expand All @@ -131,13 +131,10 @@ impl PyDictionary {
};

if dict_type.is_some() {
let cat = PyModule::import(py, "builtins")?.getattr("DeprecationWarning")?;
PyErr::warn(
errors::warn_deprecation(
py,
cat,
"Parameter dict_type of Dictionary() is deprecated, use dict instead",
1,
)?;
)?
}

let config_builder = match resource_dir {
Expand Down Expand Up @@ -177,12 +174,10 @@ impl PyDictionary {
}
}

let jdic = JapaneseDictionary::from_cfg(&config).map_err(|e| {
SudachiErr::new_err(format!(
"Error while constructing dictionary: {}",
e.to_string()
))
})?;
let jdic = errors::wrap_ctx(
JapaneseDictionary::from_cfg(&config),
"Error while constructing dictionary",
)?;

let pos_data = jdic
.grammar()
Expand Down Expand Up @@ -238,7 +233,7 @@ impl PyDictionary {
let mut required_fields = self.config.projection.required_subset();
let dict = self.dictionary.as_ref().unwrap().clone();
let projobj = if let Some(s) = projection {
let proj = wrap(SurfaceProjection::try_from(s.to_str()?))?;
let proj = errors::wrap(SurfaceProjection::try_from(s.to_str()?))?;
required_fields = proj.required_subset();
Some(morpheme_projection(proj, &dict))
} else {
Expand Down Expand Up @@ -301,7 +296,7 @@ impl PyDictionary {
let subset = parse_field_subset(fields)?;
if let Some(h) = handler.as_ref() {
if !h.as_ref(py).is_callable() {
return Err(SudachiErr::new_err("handler must be callable"));
return errors::wrap(Err("handler must be callable"));
}
}

Expand Down Expand Up @@ -357,12 +352,12 @@ impl PyDictionary {
// this needs to be a variable
let mut borrow = l.try_borrow_mut();
let out_list = match borrow {
Err(_) => return Err(SudachiErr::new_err("out was used twice at the same time")),
Ok(ref mut ms) => ms.internal_mut(py),
Err(_) => return errors::wrap(Err("out was used twice at the same time")),
};

out_list.clear();
wrap_ctx(out_list.lookup(surface, InfoSubset::all()), surface)?;
errors::wrap_ctx(out_list.lookup(surface, InfoSubset::all()), surface)?;
Ok(l)
}

Expand All @@ -380,7 +375,7 @@ impl PyDictionary {
}

fn __repr__(&self) -> PyResult<String> {
wrap(config_repr(&self.config))
errors::wrap(config_repr(&self.config))
}
}

Expand Down Expand Up @@ -413,18 +408,21 @@ fn config_repr(cfg: &Config) -> Result<String, std::fmt::Error> {

pub(crate) fn extract_mode<'py>(py: Python<'py>, mode: &'py PyAny) -> PyResult<Mode> {
if mode.is_instance_of::<PyString>() {
let mode = mode.str()?.to_str()?;
Mode::from_str(mode).map_err(|e| SudachiErr::new_err(e).into())
errors::wrap(Mode::from_str(mode.str()?.to_str()?))
} else if mode.is_instance_of::<PySplitMode>() {
let mode = mode.extract::<PySplitMode>()?;
Ok(Mode::from(mode))
} else {
Err(SudachiErr::new_err(("unknown mode", mode.into_py(py))))
errors::wrap(Err(format!(
"mode should be sudachipy.SplitMode or str, was {}: {}",
mode,
mode.get_type()
)))
}
}

fn read_config_from_fs(path: Option<&Path>) -> PyResult<ConfigBuilder> {
wrap(ConfigBuilder::from_opt_file(path))
errors::wrap(ConfigBuilder::from_opt_file(path))
}

fn read_config(config_opt: &PyAny) -> PyResult<ConfigBuilder> {
Expand All @@ -433,13 +431,13 @@ fn read_config(config_opt: &PyAny) -> PyResult<ConfigBuilder> {
// looks like json
if config_str.starts_with("{") && config_str.ends_with("}") {
let result = ConfigBuilder::from_bytes(config_str.as_bytes());
return wrap(result);
return errors::wrap(result);
}
let p = Path::new(config_str);
if p.exists() && p.is_file() {
return read_config_from_fs(Some(p));
}
return Err(SudachiErr::new_err(format!(
return errors::wrap(Err(format!(
"config file [{}] do not exist or is not a file",
p.display()
)));
Expand All @@ -450,17 +448,18 @@ fn read_config(config_opt: &PyAny) -> PyResult<ConfigBuilder> {
let cfg_as_str = config_opt.call_method0("as_jsons")?;
return read_config(cfg_as_str);
}
Err(SudachiErr::new_err((
format!("passed config was not a string, json object or sudachipy.config.Config object"),
config_opt.into_py(py),
errors::wrap(Err(format!(
"config should be sudachipy.Config or str which represents a file path or json obj, was {}: {}",
config_opt,
config_opt.get_type()
)))
}

pub(crate) fn read_default_config(py: Python) -> PyResult<ConfigBuilder> {
let path = PyModule::import(py, "sudachipy")?.getattr("_DEFAULT_SETTINGFILE")?;
let path = path.downcast::<PyString>()?.to_str()?;
let path = PathBuf::from(path);
wrap_ctx(ConfigBuilder::from_opt_file(Some(&path)), &path)
errors::wrap_ctx(ConfigBuilder::from_opt_file(Some(&path)), &path)
}

pub(crate) fn get_default_resource_dir(py: Python) -> PyResult<PathBuf> {
Expand All @@ -484,10 +483,7 @@ fn locate_system_dict(py: Python, path: &Path) -> PyResult<PathBuf> {
}
match path.to_str() {
Some(name @ ("small" | "core" | "full")) => find_dict_path(py, name),
_ => Err(SudachiErr::new_err(format!(
"invalid dictionary path {:?}",
path
))),
_ => errors::wrap(Err(format!("invalid dictionary path {:?}", path))),
}
}

Expand All @@ -509,12 +505,7 @@ fn parse_field_subset(data: Option<&PySet>) -> PyResult<InfoSubset> {
"split_a" => InfoSubset::SPLIT_A,
"split_b" => InfoSubset::SPLIT_B,
"synonym_group_id" => InfoSubset::SYNONYM_GROUP_ID,
x => {
return Err(SudachiErr::new_err(format!(
"Invalid WordInfo field name {}",
x
)))
}
x => return errors::wrap(Err(format!("Invalid WordInfo field name {}", x))),
};
}
Ok(subset)
Expand Down
8 changes: 7 additions & 1 deletion python/src/errors.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021 Works Applications Co., Ltd.
* Copyright (c) 2021-2024 Works Applications Co., Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -14,6 +14,8 @@
* limitations under the License.
*/

use pyo3::exceptions::PyDeprecationWarning;
use pyo3::prelude::*;
use pyo3::{import_exception, PyResult};
use std::fmt::{Debug, Display};

Expand All @@ -33,3 +35,7 @@ pub fn wrap_ctx<T, E: Display, C: Debug + ?Sized>(v: Result<T, E>, ctx: &C) -> P
Err(e) => Err(SudachiError::new_err(format!("{:?}: {}", ctx, e))),
}
}

pub fn warn_deprecation(py: Python<'_>, msg: &str) -> PyResult<()> {
PyErr::warn(py, &py.get_type::<PyDeprecationWarning>(), msg, 1)
}
31 changes: 11 additions & 20 deletions python/src/morpheme.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,14 @@ use std::fmt::Write;
use std::ops::Deref;
use std::sync::Arc;

use pyo3::exceptions::{PyException, PyIndexError};
use pyo3::exceptions::PyIndexError;
use pyo3::prelude::*;
use pyo3::types::{PyList, PyString, PyTuple, PyType};

use sudachi::prelude::{Morpheme, MorphemeList};

use crate::dictionary::{extract_mode, PyDicData, PyDictionary};
use crate::errors;
use crate::projection::MorphemeProjection;
use crate::word_info::PyWordInfo;

Expand Down Expand Up @@ -92,12 +93,9 @@ impl PyMorphemeListWrapper {
#[classmethod]
#[pyo3(text_signature = "(dict: sudachipy.Dictionary) -> sudachipy.MorphemeList")]
fn empty(_cls: &PyType, py: Python, dict: &PyDictionary) -> PyResult<Self> {
let cat = PyModule::import(py, "builtins")?.getattr("DeprecationWarning")?;
PyErr::warn(
errors::warn_deprecation(
py,
cat,
"Use Tokenizer.tokenize(\"\") if you need an empty MorphemeList.",
1,
)?;

let cloned = dict.dictionary.as_ref().unwrap().clone();
Expand Down Expand Up @@ -176,9 +174,7 @@ impl PyMorphemeListWrapper {
list: slf.clone_ref(py),
index: i,
};
pymorph
.write_repr(py, &mut result)
.map_err(|_| PyException::new_err("format failed"))?;
errors::wrap_ctx(pymorph.write_repr(py, &mut result), "format failed")?;
result.push_str(",\n");
}
result.push_str("]>");
Expand Down Expand Up @@ -380,16 +376,14 @@ impl PyMorpheme {
let mut borrow = out_cell.try_borrow_mut();
let out_ref = match borrow {
Ok(ref mut v) => v.internal_mut(py),
Err(_) => return Err(PyException::new_err("out was used twice")),
Err(_) => return errors::wrap(Err("out was used twice at the same time")),
};

out_ref.clear();
let splitted = list
.internal(py)
.split_into(mode, self.index, out_ref)
.map_err(|e| {
PyException::new_err(format!("Error while splitting morpheme: {}", e.to_string()))
})?;
let splitted = errors::wrap_ctx(
list.internal(py).split_into(mode, self.index, out_ref),
"Error while splitting morpheme",
)?;

if add_single.unwrap_or(true) && !splitted {
list.internal(py)
Expand Down Expand Up @@ -433,9 +427,7 @@ impl PyMorpheme {
/// Returns the word info
#[pyo3(text_signature = "($self) -> sudachipy.WordInfo")]
fn get_word_info(&self, py: Python) -> PyResult<PyWordInfo> {
let cat = PyModule::import(py, "builtins")?.getattr("DeprecationWarning")?;
PyErr::warn(py, cat, "Users should not touch the raw WordInfo.", 1)?;

errors::warn_deprecation(py, "Users should not touch the raw WordInfo.")?;
Ok(self.morph(py).get_word_info().clone().into())
}

Expand All @@ -451,8 +443,7 @@ impl PyMorpheme {

pub fn __repr__<'py>(&'py self, py: Python<'py>) -> PyResult<String> {
let mut result = String::new();
self.write_repr(py, &mut result)
.map_err(|_| PyException::new_err("failed to format repr"))?;
errors::wrap_ctx(self.write_repr(py, &mut result), "failed to format repr")?;
Ok(result)
}
}
4 changes: 2 additions & 2 deletions python/src/pos_matcher.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,14 @@

use std::sync::Arc;

use pyo3::exceptions::PyException;
use pyo3::prelude::*;
use pyo3::types::{PyBool, PyIterator, PyTuple};

use sudachi::analysis::stateless_tokenizer::DictionaryAccess;
use sudachi::pos::PosMatcher;

use crate::dictionary::PyDicData;
use crate::errors;
use crate::morpheme::PyMorpheme;

#[pyclass(name = "PosMatcher", module = "sudachipy")]
Expand Down Expand Up @@ -106,7 +106,7 @@ impl PyPosMatcher {
}

if start_len == data.len() {
Err(PyException::new_err(format!(
errors::wrap(Err(format!(
"POS {:?} did not match any elements",
elem.repr()?
)))
Expand Down
4 changes: 2 additions & 2 deletions python/src/pretokenizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
*/

use crate::dictionary::PyDicData;
use crate::errors::wrap;
use crate::errors;
use crate::morpheme::{PyMorphemeList, PyMorphemeListWrapper, PyProjector};
use pyo3::intern;
use pyo3::prelude::*;
Expand Down Expand Up @@ -49,7 +49,7 @@ impl PerThreadPreTokenizer {

pub fn tokenize(&mut self, data: &str) -> PyResult<()> {
self.tokenizer.reset().push_str(data);
wrap(self.tokenizer.do_tokenize())?;
errors::wrap(self.tokenizer.do_tokenize())?;
Ok(())
}

Expand Down
18 changes: 7 additions & 11 deletions python/src/projection.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
*/

use crate::dictionary::PyDicData;
use crate::errors;
use crate::morpheme::PyProjector;
use pyo3::types::PyString;
use pyo3::{PyResult, Python};
Expand Down Expand Up @@ -174,18 +175,13 @@ pub(crate) fn parse_projection_raw<D: DictionaryAccess>(
value: &str,
dict: &D,
) -> PyResult<(PyProjector, SurfaceProjection)> {
match SurfaceProjection::try_from(value) {
Ok(v) => {
if v == SurfaceProjection::Surface {
Ok((None, SurfaceProjection::Surface))
} else {
Ok((Some(morpheme_projection(v, dict)), v))
}
errors::wrap_ctx(SurfaceProjection::try_from(value).map(|v| {
if v == SurfaceProjection::Surface {
(None, SurfaceProjection::Surface)
} else {
(Some(morpheme_projection(v, dict)), v)
}
Err(e) => Err(crate::errors::SudachiError::new_err(format!(
"invalid surface projection: {e:?}"
))),
}
}), "invalid surface projection")
}

pub(crate) fn parse_projection_opt<D: DictionaryAccess>(
Expand Down
Loading

0 comments on commit c943da8

Please sign in to comment.