diff --git a/crates/swc_icu_messageformat_parser/src/parser.rs b/crates/swc_icu_messageformat_parser/src/parser.rs index c7d3feec..653be8da 100644 --- a/crates/swc_icu_messageformat_parser/src/parser.rs +++ b/crates/swc_icu_messageformat_parser/src/parser.rs @@ -831,8 +831,14 @@ impl<'s> Parser<'s> { while !self.is_eof() && is_potential_element_name_char(self.char()) { self.bump(); } - - &self.message[start_offset..self.offset()] + #[cfg(feature = "utf16")] + return Box::leak( + self.message_utf16[start_offset..self.offset()] + .to_string() + .into_boxed_str(), + ); + #[cfg(not(feature = "utf16"))] + return &self.message[start_offset..self.offset()]; } fn parse_literal(&self, nesting_level: usize, parent_arg_type: &str) -> Result { @@ -1735,9 +1741,16 @@ impl<'s> Parser<'s> { if self.is_eof() { return None; } - self.message[self.offset() + self.char().len_utf8()..] + + #[cfg(feature = "utf16")] + return self.message_utf16[self.offset() + self.char().len_utf16()..] .chars() - .next() + .next(); + + #[cfg(not(feature = "utf16"))] + return self.message[self.offset() + self.char().len_utf8()..] + .chars() + .next(); } /// Returns true if the next call to `bump` would return false. diff --git a/crates/swc_icu_messageformat_parser/tests/fixtures/unicode_1 b/crates/swc_icu_messageformat_parser/tests/fixtures/unicode_1 new file mode 100644 index 00000000..0d4408eb --- /dev/null +++ b/crates/swc_icu_messageformat_parser/tests/fixtures/unicode_1 @@ -0,0 +1,43 @@ +รถ๐Ÿš€ +--- +{} +--- +{ + "err": null, + "val": [ + { + "children": [ + { + "location": { + "end": { + "column": 6, + "line": 1, + "offset": 6 + }, + "start": { + "column": 4, + "line": 1, + "offset": 3 + } + }, + "type": 0, + "value": "รถ๐Ÿš€" + } + ], + "location": { + "end": { + "column": 10, + "line": 1, + "offset": 10 + }, + "start": { + "column": 1, + "line": 1, + "offset": 0 + } + }, + "type": 8, + "value": "a" + } + ] +} \ No newline at end of file diff --git a/crates/swc_icu_messageformat_parser/tests/run_parser_e2e.rs b/crates/swc_icu_messageformat_parser/tests/run_parser_e2e.rs index ac3e0fef..2845569f 100644 --- a/crates/swc_icu_messageformat_parser/tests/run_parser_e2e.rs +++ b/crates/swc_icu_messageformat_parser/tests/run_parser_e2e.rs @@ -38,6 +38,7 @@ fn read_sections(file: PathBuf) -> TestFixtureSections { fixture("tests/fixtures/treat_unicode_nbsp_as_whitespace") )] #[cfg_attr(feature = "utf16", fixture("tests/fixtures/trivial_2"))] +#[cfg_attr(feature = "utf16", fixture("tests/fixtures/unicode_1"))] #[fixture("tests/fixtures/uppercase_tag_1")] #[fixture("tests/fixtures/expect_number_arg_skeleton_token_1")] #[fixture("tests/fixtures/self_closing_tag_1")]