diff --git a/src/xkbcomp/rules.c b/src/xkbcomp/rules.c index daa4f3dec..75511e8fe 100644 --- a/src/xkbcomp/rules.c +++ b/src/xkbcomp/rules.c @@ -1101,13 +1101,37 @@ read_rules_file(struct xkb_context *ctx, /* Skip UTF-8 encoded BOM (U+FEFF) */ /* See: https://www.unicode.org/faq/utf_bom.html#bom5 */ - scanner_str(&scanner, "\xef\xbb\xbf", 3); + if (scanner_str(&scanner, "\xef\xbb\xbf", 3) || scanner.len < 2) { + goto initial; + } - ret = matcher_match(matcher, &scanner, include_depth, string, size, path); + /* Early detection of wrong file encoding, e.g. UTF-16 or UTF-32 */ + if (scanner.s[0] == '\0' || scanner.s[1] == '\0') { + if (scanner.s[0] != '\0') + scanner.token_column++; + scanner_err(&scanner, "unexpected NULL character."); + goto encoding_error; + } + if (!is_ascii(scanner.s[0])) { + scanner_err(&scanner, "unexpected non-ASCII character."); + goto encoding_error; + } +initial: + ret = matcher_match(matcher, &scanner, include_depth, string, size, path); unmap_file(string, size); + out: return ret; + +encoding_error: + scanner_err(&scanner, + "This could be a file encoding issue. " + "Supported encodings must be backward compatible with ASCII."); + scanner_err(&scanner, + "E.g. ISO/CEI 8859 and UTF-8 are supported " + "but UTF-16, UTF-32 and CP1026 are not."); + return false; } bool diff --git a/test/data/rules/utf-16be_with_bom b/test/data/rules/utf-16be_with_bom new file mode 100644 index 000000000..ea44babc5 Binary files /dev/null and b/test/data/rules/utf-16be_with_bom differ diff --git a/test/data/rules/utf-16le_with_bom b/test/data/rules/utf-16le_with_bom new file mode 100644 index 000000000..9faf37e9a Binary files /dev/null and b/test/data/rules/utf-16le_with_bom differ diff --git a/test/data/rules/utf-32be b/test/data/rules/utf-32be new file mode 100644 index 000000000..588e32e07 Binary files /dev/null and b/test/data/rules/utf-32be differ diff --git a/test/rules-file.c b/test/rules-file.c index 302aa6878..726ec89bf 100644 --- a/test/rules-file.c +++ b/test/rules-file.c @@ -106,6 +106,42 @@ main(int argc, char *argv[]) }; assert(test_rules(ctx, &test_utf_8_with_bom)); + struct test_data test_utf_16le_with_bom = { + .rules = "utf-16le_with_bom", + + .model = "my_model", .layout = "my_layout", .variant = "my_variant", + .options = "my_option", + + .keycodes = "my_keycodes", .types = "my_types", + .compat = "my_compat|some:compat", + .symbols = "my_symbols+extra_variant", + }; + assert(!test_rules(ctx, &test_utf_16le_with_bom)); + + struct test_data test_utf_16be_with_bom = { + .rules = "utf-16be_with_bom", + + .model = "my_model", .layout = "my_layout", .variant = "my_variant", + .options = "my_option", + + .keycodes = "my_keycodes", .types = "my_types", + .compat = "my_compat|some:compat", + .symbols = "my_symbols+extra_variant", + }; + assert(!test_rules(ctx, &test_utf_16be_with_bom)); + + struct test_data test_utf_32be = { + .rules = "utf-32be", + + .model = "my_model", .layout = "my_layout", .variant = "my_variant", + .options = "my_option", + + .keycodes = "my_keycodes", .types = "my_types", + .compat = "my_compat|some:compat", + .symbols = "my_symbols+extra_variant", + }; + assert(!test_rules(ctx, &test_utf_32be)); + struct test_data test1 = { .rules = "simple",