Skip to content

Commit

Permalink
Merge anon namespaces; tweak some comments.
Browse files Browse the repository at this point in the history
  • Loading branch information
davidlion committed Jul 19, 2024
1 parent ca648cc commit 506c7c7
Show file tree
Hide file tree
Showing 2 changed files with 94 additions and 93 deletions.
185 changes: 93 additions & 92 deletions components/core/src/clp/regex_utils/regex_translation_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,32 +17,9 @@ using std::string_view;

namespace clp::regex_utils {
namespace {
// Class for storing regex translation analysis states, capture group, quantifier information, etc.
class TranslatorState;

/**
* Functions that handle current-state-specific tasks before transitioning to the next state.
*
* @param[in, out] state The object that stores translator's internal information. The primary
* state member variable is always updated if a transition occures. Even if there's no state
* transition, other analysis info may be updated.
* @param[in, out] it The iterator that represents the current regex string scan position. May be
* updated to advance or backtrack the scan position.
* @param[out] wildcard_str The translated wildcard string. May or may not be updated.
* @param[in] config The translator config.
* @return 0 (clp::regex_utils::ErrorCode::Success) upon successful operation. Otherwise, return
* related error code.
* Class for storing regex translation analysis states, capture group, quantifier information, etc.
*/
using StateTransitionFuncSig
= auto(TranslatorState& state,
string_view::const_iterator& it,
string& wildcard_str,
RegexToWildcardTranslatorConfig const& config) -> error_code;
[[nodiscard]] StateTransitionFuncSig normal_state_transition;
[[nodiscard]] StateTransitionFuncSig dot_state_transition;
[[nodiscard]] StateTransitionFuncSig end_state_transition;
[[nodiscard]] StateTransitionFuncSig final_state_cleanup;

class TranslatorState {
public:
/**
Expand Down Expand Up @@ -76,65 +53,56 @@ class TranslatorState {
// Members
RegexPatternState m_state{RegexPatternState::NORMAL};
};
} // namespace

auto regex_to_wildcard(string_view regex_str) -> OUTCOME_V2_NAMESPACE::std_result<string> {
return regex_to_wildcard(regex_str, {false, false});
}

auto regex_to_wildcard(string_view regex_str, RegexToWildcardTranslatorConfig const& config)
-> OUTCOME_V2_NAMESPACE::std_result<string> {
if (regex_str.empty()) {
return string{};
}

TranslatorState state;
string_view::const_iterator it{regex_str.cbegin()};
string wildcard_str;

// If there is no starting anchor character, append multichar wildcard prefix
if (cRegexStartAnchor == *it) {
++it;
} else if (config.add_prefix_suffix_wildcards()) {
wildcard_str += cZeroOrMoreCharsWildcard;
}

error_code ec{};
while (it != regex_str.cend()) {
switch (state.get_state()) {
case TranslatorState::RegexPatternState::NORMAL:
ec = normal_state_transition(state, it, wildcard_str, config);
break;
case TranslatorState::RegexPatternState::DOT:
ec = dot_state_transition(state, it, wildcard_str, config);
break;
case TranslatorState::RegexPatternState::END:
ec = end_state_transition(state, it, wildcard_str, config);
break;
default:
ec = ErrorCode::IllegalState;
break;
}
if (ec) {
return ec;
}
++it;
}

ec = final_state_cleanup(state, it, wildcard_str, config);
if (ec) {
return ec;
}
return wildcard_str;
}
/**
* Functions that handle current-state-specific tasks before transitioning to the next state.
*
* @param[in, out] state The object that stores translator's internal information. The primary
* state member variable is always updated if a transition occures. Even if there's no state
* transition, other analysis info may be updated.
* @param[in, out] it The iterator that represents the current regex string scan position. May be
* updated to advance or backtrack the scan position.
* @param[out] wildcard_str The translated wildcard string. May or may not be updated.
* @param[in] config The translator config.
* @return clp::regex_utils::ErrorCode
*/
using StateTransitionFuncSig
= auto(TranslatorState& state,
string_view::const_iterator& it,
string& wildcard_str,
RegexToWildcardTranslatorConfig const& config) -> error_code;

namespace {
/**
* Treats each character literally and directly append it to the wildcard string, unless it is a
* meta-character.
*
* Each meta-character either triggers a state transition, or makes the regex string untranslatable.
*/
[[nodiscard]] StateTransitionFuncSig normal_state_transition;

/**
* Attempts to translate regex wildcard patterns that start with `.` character.
*
* Performs the following translation if possible:
* <ul>
* <li> `.*` gets translated into `*`</li>
* <li> `.+` gets translated into `?*`</li>
* <li> `.` gets translated into `?`</li>
* </ul>
*/
[[nodiscard]] StateTransitionFuncSig dot_state_transition;

/**
* Disallows the appearances of other characters after encountering an end anchor in the string.
*/
[[nodiscard]] StateTransitionFuncSig end_state_transition;

/**
* States other than the NORMAL state may require special handling after the whole regex string has
* been scanned and processed.
*/
[[nodiscard]] StateTransitionFuncSig final_state_cleanup;

auto normal_state_transition(
TranslatorState& state,
string_view::const_iterator& it,
Expand Down Expand Up @@ -168,16 +136,6 @@ auto normal_state_transition(
return ErrorCode::Success;
}

/**
* Attempts to translate regex wildcard patterns that start with `.` character.
*
* Performs the following translation if possible:
* <ul>
* <li> `.*` gets translated into `*`</li>
* <li> `.+` gets translated into `?*`</li>
* <li> `.` gets translated into `?`</li>
* </ul>
*/
auto dot_state_transition(
TranslatorState& state,
string_view::const_iterator& it,
Expand All @@ -201,9 +159,6 @@ auto dot_state_transition(
return ErrorCode::Success;
}

/**
* Disallows the appearances of other characters after encountering an end anchor in the string.
*/
auto end_state_transition(
[[maybe_unused]] TranslatorState& state,
string_view::const_iterator& it,
Expand All @@ -216,10 +171,6 @@ auto end_state_transition(
return ErrorCode::Success;
}

/**
* States other than the NORMAL state may require special handling after the whole regex string has
* been scanned and processed.
*/
auto final_state_cleanup(
TranslatorState& state,
[[maybe_unused]] string_view::const_iterator& it,
Expand All @@ -244,4 +195,54 @@ auto final_state_cleanup(
return ErrorCode::Success;
}
} // namespace

auto regex_to_wildcard(string_view regex_str) -> OUTCOME_V2_NAMESPACE::std_result<string> {
return regex_to_wildcard(regex_str, {false, false});
}

auto regex_to_wildcard(string_view regex_str, RegexToWildcardTranslatorConfig const& config)
-> OUTCOME_V2_NAMESPACE::std_result<string> {
if (regex_str.empty()) {
return string{};
}

TranslatorState state;
string_view::const_iterator it{regex_str.cbegin()};
string wildcard_str;

// If there is no starting anchor character, append multichar wildcard prefix
if (cRegexStartAnchor == *it) {
++it;
} else if (config.add_prefix_suffix_wildcards()) {
wildcard_str += cZeroOrMoreCharsWildcard;
}

error_code ec{};
while (it != regex_str.cend()) {
switch (state.get_state()) {
case TranslatorState::RegexPatternState::NORMAL:
ec = normal_state_transition(state, it, wildcard_str, config);
break;
case TranslatorState::RegexPatternState::DOT:
ec = dot_state_transition(state, it, wildcard_str, config);
break;
case TranslatorState::RegexPatternState::END:
ec = end_state_transition(state, it, wildcard_str, config);
break;
default:
ec = ErrorCode::IllegalState;
break;
}
if (ec) {
return ec;
}
++it;
}

ec = final_state_cleanup(state, it, wildcard_str, config);
if (ec) {
return ec;
}
return wildcard_str;
}
} // namespace clp::regex_utils
2 changes: 1 addition & 1 deletion components/core/tests/test-regex_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ TEST_CASE("regex_to_wildcard", "[regex_utils][regex_to_wildcard]") {
REQUIRE((regex_to_wildcard(". xyz ^.* zyx .").error() == ErrorCode::IllegalCaret));
}

// Test anchors and prefix/suffix wildcards
TEST_CASE("regex_to_wildcard_anchor_config", "[regex_utils][regex_to_wildcard][anchor_config]") {
// Test anchors and prefix/suffix wildcards
RegexToWildcardTranslatorConfig const config{false, true};
REQUIRE(((regex_to_wildcard("^", config).value() == "*")));
REQUIRE((regex_to_wildcard("$", config).value() == "*"));
Expand Down

0 comments on commit 506c7c7

Please sign in to comment.