forked from y-scope/clp
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add initial implementation of regex_utils library containing a regex …
…to wildcard string translator and a corresponding std::error_code enum and category. (y-scope#482) Co-authored-by: Bingran Hu <[email protected]> Co-authored-by: davidlion <[email protected]> Co-authored-by: Lin Zhihao <[email protected]> Co-authored-by: Kirk Rodrigues <[email protected]>
- Loading branch information
1 parent
24e4690
commit 44aaff9
Showing
11 changed files
with
611 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
set( | ||
REGEX_UTILS_HEADER_LIST | ||
"constants.hpp" | ||
"ErrorCode.hpp" | ||
"regex_translation_utils.hpp" | ||
"RegexToWildcardTranslatorConfig.hpp" | ||
) | ||
add_library( | ||
regex_utils | ||
ErrorCode.cpp | ||
regex_translation_utils.cpp | ||
${REGEX_UTILS_HEADER_LIST} | ||
) | ||
add_library(clp::regex_utils ALIAS regex_utils) | ||
target_include_directories(regex_utils | ||
PRIVATE | ||
../ | ||
"${PROJECT_SOURCE_DIR}/submodules" | ||
) | ||
target_compile_features(regex_utils PRIVATE cxx_std_20) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
#include "regex_utils/ErrorCode.hpp" | ||
|
||
#include <string> | ||
#include <string_view> | ||
#include <system_error> | ||
|
||
namespace clp::regex_utils { | ||
using std::error_code; | ||
|
||
namespace { | ||
using std::error_category; | ||
using std::string; | ||
using std::string_view; | ||
|
||
/** | ||
* Class for giving the error codes more detailed string descriptions. | ||
*/ | ||
class ErrorCodeCategory : public error_category { | ||
public: | ||
/** | ||
* @return The class of errors. | ||
*/ | ||
[[nodiscard]] auto name() const noexcept -> char const* override; | ||
|
||
/** | ||
* @param The error code encoded in int. | ||
* @return The descriptive message for the error. | ||
*/ | ||
[[nodiscard]] auto message(int ev) const -> string override; | ||
}; | ||
|
||
auto ErrorCodeCategory::name() const noexcept -> char const* { | ||
return "regex utility"; | ||
} | ||
|
||
auto ErrorCodeCategory::message(int ev) const -> string { | ||
switch (static_cast<ErrorCode>(ev)) { | ||
case ErrorCode::Success: | ||
return "Success."; | ||
|
||
case ErrorCode::IllegalState: | ||
return "Unrecognized state."; | ||
|
||
case ErrorCode::UntranslatableStar: | ||
return "Unable to express regex quantifier `*` in wildcard, which repeats a token for " | ||
"zero or more occurences, unless it is combined with a wildcard `.`"; | ||
|
||
case ErrorCode::UntranslatablePlus: | ||
return "Unable to express regex quantifier `+` in wildcard, which repeats a token for " | ||
"one or more occurences, unless it is combined with a wildcard `.`"; | ||
|
||
case ErrorCode::UnsupportedQuestionMark: | ||
return "Unable to express regex quantifier `?` in wildcard, which makes the preceding " | ||
"token optional, unless the translator supports returning a list of possible " | ||
"wildcard translations."; | ||
|
||
case ErrorCode::UnsupportedPipe: | ||
return "Unable to express regex OR `|` in wildcard, which allows the query string to " | ||
"match a single token out of a series of options, unless the translator " | ||
"supports returning a list of possible wildcard translations."; | ||
|
||
case ErrorCode::IllegalCaret: | ||
return "Failed to translate due to start anchor `^` in the middle of the string."; | ||
|
||
case ErrorCode::IllegalDollarSign: | ||
return "Failed to translate due to end anchor `$` in the middle of the string."; | ||
|
||
case ErrorCode::UnmatchedParenthesis: | ||
return "Unmatched opening `(` or closing `)`."; | ||
|
||
default: | ||
return "(unrecognized error)"; | ||
} | ||
} | ||
|
||
ErrorCodeCategory const cErrorCodeCategoryInstance; | ||
} // namespace | ||
|
||
auto make_error_code(ErrorCode e) -> error_code { | ||
return {static_cast<int>(e), cErrorCodeCategoryInstance}; | ||
} | ||
} // namespace clp::regex_utils |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
#ifndef CLP_REGEX_UTILS_ERRORCODE_HPP | ||
#define CLP_REGEX_UTILS_ERRORCODE_HPP | ||
|
||
#include <cstdint> | ||
#include <system_error> | ||
#include <type_traits> | ||
|
||
namespace clp::regex_utils { | ||
/** | ||
* Enum class for propagating and handling various regex utility errors. | ||
* More detailed descriptions can be found in ErrorCode.cpp. | ||
*/ | ||
enum class ErrorCode : uint8_t { | ||
Success = 0, | ||
IllegalState, | ||
UntranslatableStar, | ||
UntranslatablePlus, | ||
UnsupportedQuestionMark, | ||
UnsupportedPipe, | ||
IllegalCaret, | ||
IllegalDollarSign, | ||
UnmatchedParenthesis, | ||
}; | ||
|
||
/** | ||
* Wrapper function to turn a regular enum class into an std::error_code. | ||
* | ||
* @param An error code enum. | ||
* @return The corresponding std::error_code type variable. | ||
*/ | ||
[[nodiscard]] auto make_error_code(ErrorCode ec) -> std::error_code; | ||
} // namespace clp::regex_utils | ||
|
||
namespace std { | ||
template <> | ||
struct is_error_code_enum<clp::regex_utils::ErrorCode> : true_type {}; | ||
} // namespace std | ||
|
||
#endif // CLP_REGEX_UTILS_ERRORCODE_HPP |
46 changes: 46 additions & 0 deletions
46
components/core/src/clp/regex_utils/RegexToWildcardTranslatorConfig.hpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
#ifndef CLP_REGEX_UTILS_REGEXTOWILDCARDTRANSLATORCONFIG_HPP | ||
#define CLP_REGEX_UTILS_REGEXTOWILDCARDTRANSLATORCONFIG_HPP | ||
|
||
namespace clp::regex_utils { | ||
/** | ||
* Allows users to customize and fine tune how to translate a regex string to wildcard. | ||
* | ||
* This class won't affect the core logic and state trasition mechanics of the regex to wildcard | ||
* translator, but it can make the translator more versatile. For detailed descriptions of how each | ||
* option should be used, see the getter function docstrings. | ||
*/ | ||
class RegexToWildcardTranslatorConfig { | ||
public: | ||
RegexToWildcardTranslatorConfig( | ||
bool case_insensitive_wildcard, | ||
bool add_prefix_suffix_wildcards | ||
) | ||
: m_case_insensitive_wildcard{case_insensitive_wildcard}, | ||
m_add_prefix_suffix_wildcards{add_prefix_suffix_wildcards} {}; | ||
|
||
/** | ||
* @return True if the final translated wildcard string will be fed into a case-insensitive | ||
* wildcard analyzer. In such cases, we can safely translate charset patterns such as [aA] [Bb] | ||
* into singular lowercase characters a, b. | ||
*/ | ||
[[nodiscard]] auto case_insensitive_wildcard() const -> bool { | ||
return m_case_insensitive_wildcard; | ||
} | ||
|
||
/** | ||
* @return True if in the absense of starting or ending anchors in the regex string, we append | ||
* prefix or suffix zero or more characters wildcards. In other words, this config is true if | ||
* the search is a substring search, and false if the search is an exact search. | ||
*/ | ||
[[nodiscard]] auto add_prefix_suffix_wildcards() const -> bool { | ||
return m_add_prefix_suffix_wildcards; | ||
} | ||
|
||
private: | ||
// Variables | ||
bool m_case_insensitive_wildcard; | ||
bool m_add_prefix_suffix_wildcards; | ||
}; | ||
} // namespace clp::regex_utils | ||
|
||
#endif // CLP_REGEX_UTILS_REGEXTOWILDCARDTRANSLATORCONFIG_HPP |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
#ifndef CLP_REGEX_UTILS_CONSTANTS_HPP | ||
#define CLP_REGEX_UTILS_CONSTANTS_HPP | ||
|
||
namespace clp::regex_utils { | ||
// Wildcard meta characters | ||
constexpr char cZeroOrMoreCharsWildcard{'*'}; | ||
constexpr char cSingleCharWildcard{'?'}; | ||
|
||
// Regex meta characters | ||
constexpr char cRegexZeroOrMore{'*'}; | ||
constexpr char cRegexOneOrMore{'+'}; | ||
constexpr char cRegexZeroOrOne{'?'}; | ||
constexpr char cRegexStartAnchor{'^'}; | ||
constexpr char cRegexEndAnchor{'$'}; | ||
constexpr char cEscapeChar{'\\'}; | ||
constexpr char cCharsetNegate{'^'}; | ||
} // namespace clp::regex_utils | ||
|
||
#endif // CLP_REGEX_UTILS_CONSTANTS_HPP |
Oops, something went wrong.