Skip to content

Commit

Permalink
Fix splitting words containing ', e.g., I've
Browse files Browse the repository at this point in the history
  • Loading branch information
csukuangfj committed Oct 26, 2023
1 parent fcde4c4 commit 1705dde
Showing 1 changed file with 3 additions and 1 deletion.
4 changes: 3 additions & 1 deletion sherpa-onnx/csrc/text-utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,8 @@ template bool SplitStringToFloats(const std::string &full, const char *delim,
bool omit_empty_strings,
std::vector<double> *out);

static bool IsPunct(char c) { return c != '\'' && std::ispunct(c); }

static std::vector<std::string> MergeCharactersIntoWords(
const std::vector<std::string> &words) {
std::vector<std::string> ans;
Expand All @@ -174,7 +176,7 @@ static std::vector<std::string> MergeCharactersIntoWords(
while (i < n) {
const auto &w = words[i];
if (w.size() > 1 ||
(w.size() == 1 && (std::ispunct(w[0]) || std::isspace(w[0])))) {
(w.size() == 1 && (IsPunct(w[0]) || std::isspace(w[0])))) {
if (prev != -1) {
std::string t;
for (; prev < i; ++prev) {
Expand Down

0 comments on commit 1705dde

Please sign in to comment.