Apply suggestions from code review

Co-authored-by: kirkrodrigues <[email protected]>
y-scope · Jun 26, 2024 · 6696553 · 6696553
1 parent db6b089
commit 6696553
Show file tree

Hide file tree

Showing 2 changed files with 25 additions and 34 deletions.
diff --git a/components/core/src/clp/ffi/utils.hpp b/components/core/src/clp/ffi/utils.hpp
@@ -9,11 +9,11 @@
 
 namespace clp::ffi {
 /**
- * Validates whether the given string is UTF-8 encoded, and escapes any characters to generate to
- * make the string compatible with JSON specification.
+ * Validates whether the given string is UTF-8 encoded, and escapes any characters to make the
+ * string compatible with the JSON specification.
  * @param raw The raw string to escape.
  * @return The escaped string on success.
- * @return std::nullopt if the string contains none-UTF8 encoded byte sequence.
+ * @return std::nullopt if the string contains any non-UTF-8-encoded byte sequences.
  */
 [[nodiscard]] auto validate_and_escape_utf8_string(std::string_view raw
 ) -> std::optional<std::string>;
@@ -27,8 +27,7 @@ namespace clp::ffi {
 /**
  * Validates whether the given string is UTF-8 encoded, optionally escaping ASCII characters using
  * the given handler.
- * @tparam EscapeHandler Method to optionally escape any ASCII character in the string. Signature:
- * (std::string_view::const_iterator it_ascii_char) -> void
+ * @tparam EscapeHandler Method to optionally escape any ASCII character in the string.
  * @param src
  * @param escape_handler
  * @return Whether the input is a valid UTF-8 encoded string.
@@ -39,25 +38,16 @@ generic_validate_utf8_string(std::string_view src, EscapeHandler escape_handler)
 
 namespace utils_hpp {
 /**
- * Validates whether the given byte is a valid UTF-8 header with continuation bytes, and set code
- * point and code point range accordingly.
- * The valid code point range is defined as following:
- * .----------------------------------------------------------.
- * | Continuation Length | First Code Point | Last Code Point |
- * |---------------------|------------------|-----------------|
- * | 1 Byte              | 0x80             | 0x7FF           |
- * | 2 Byte              | 0x800            | 0xFFFF          |
- * | 3 Byte              | 0x10000          | 0x10FFFF        |
- * |---------------------|------------------|-----------------|
- * @param header Input byte to validate
- * @param num_continuation_bytes Outputs the number of continuation bytes corresponded to the header
- * byte, if the header is valid.
- * @param code_point Outputs the code extracted from the header byte, if the header is valid.
- * @param code_point_lower_bound Outputs the lower bound of the valid code point range corresponded
- * with the header byte, if the header if valid.
- * @param code_point_upper_bound Outputs the upper bound of the valid code point range corresponded
- * with the header byte, if the header if valid.
- * @return Whether the input byte is a valid header byte.
+ * Validates whether the given byte is a valid lead byte for a multi-byte UTF-8 character, parses
+ * the byte, and returns the parsed properties as well as associated properties.
+ * @param header Byte to validate.
+ * @param num_continuation_bytes Returns the number of continuation bytes expected.
+ * @param code_point Returns the code point bits parsed from the lead byte.
+ * @param code_point_lower_bound Returns the lower bound of the code point range for the UTF-8
+ * character.
+ * @param code_point_upper_bound Returns the upper bound of the code point range for the UTF-8
+ * character.
+ * @return Whether the input byte is a valid lead byte for a multi-byte UTF-8 character.
  */
 [[nodiscard]] auto validate_header_byte_and_set_code_point(
         uint8_t header,
@@ -75,16 +65,16 @@ namespace utils_hpp {
 
 /*
  * @param byte
- * @return Whether the input byte is a valid UTF-8 continuation byte. A valid UTF-8 continuation
- * byte should match 0b10xx_xxxx.
+ * @return Whether the input byte is a valid UTF-8 continuation byte.
  */
 [[nodiscard]] auto is_valid_utf8_continuation_byte(uint8_t byte) -> bool;
 
 /**
- * Updates the code point by applying the payload of the given continuation byte.
+ * Parses the code-point bits from the given continuation byte and combines them with the given
+ * code point.
  * @param code_point
  * @param continuation_byte
- * @return Updated code point.
+ * @return The updated code point.
  */
 [[nodiscard]] auto update_code_point(uint32_t code_point, uint8_t continuation_byte) -> uint32_t;
 }  // namespace utils_hpp
@@ -130,7 +120,7 @@ auto generic_validate_utf8_string(std::string_view src, EscapeHandler escape_han
     }
 
     if (0 != num_continuation_bytes_to_validate) {
-        // Incomplete continuation byte sequence
+        // Incomplete UTF-8 character
         return false;
     }
 

diff --git a/components/core/tests/test-ffi_utils.cpp b/components/core/tests/test-ffi_utils.cpp
@@ -20,14 +20,14 @@ namespace {
  * Gets an expected escaped string by first convert the raw string into a json string and then dumps
  * the a printable string using nlohmann::json.
  * @param raw
- * @return Escaped string dumped by nlohmann::json, with surrounding '"' dropped.
+ * @return The input string after escaping any characters that are invalid in JSON strings.
  */
 [[nodiscard]] auto get_expected_escaped_string(std::string_view raw) -> std::string;
 
 /**
- * Generates a UTF-8 encoded byte sequence of a given code point with the given number of
- * continuation bytes. The range of the code point is not validated, which means the generated byte
- * sequence can be overlong.
+ * Generates a UTF-8 encoded byte sequence with the given code point and number of continuation
+ * bytes. The range of the code point is not validated, which means the generated byte sequence can
+ * be invalid (overlong or exceeding the valid range of UTF-8 code points).
  * @param code_point
  * @param num_continuation_bytes
  * @return The encoded UTF-8 byte sequence.
@@ -98,7 +98,7 @@ TEST_CASE("escape_utf8_string_basic", "[ffi][utils]") {
     actual = validate_and_escape_utf8_string(test_str);
     REQUIRE((actual.has_value() && actual.value() == get_expected_escaped_string(test_str)));
 
-    // Test valid UTF8 chars with continuation bytes
+    // Test valid UTF-8 chars with continuation bytes
     std::vector<std::string> const valid_utf8{
             "\n",
             "\xF0\xA0\x80\x8F",  // https://en.wiktionary.org/wiki/%F0%A0%80%8F
@@ -147,6 +147,7 @@ TEST_CASE("escape_utf8_string_with_invalid_continuation", "[ffi][utils]") {
     test_str = valid_utf8_byte_sequence;
     constexpr char cInvalidHeaderByte{'\xFF'};
     test_str.front() = cInvalidHeaderByte;
+    REQUIRE((false == is_utf8_encoded(test_str)));
     REQUIRE((false == validate_and_escape_utf8_string(test_str).has_value()));
 
     // Test invalid continuation bytes