Skip to content

Commit

Permalink
rules: early detection of invalid encoding
Browse files Browse the repository at this point in the history
  • Loading branch information
wismill committed Oct 30, 2023
1 parent 43b13fc commit 28d8e81
Show file tree
Hide file tree
Showing 5 changed files with 62 additions and 2 deletions.
28 changes: 26 additions & 2 deletions src/xkbcomp/rules.c
Original file line number Diff line number Diff line change
Expand Up @@ -1101,13 +1101,37 @@ read_rules_file(struct xkb_context *ctx,

/* Skip UTF-8 encoded BOM (U+FEFF) */
/* See: https://www.unicode.org/faq/utf_bom.html#bom5 */
scanner_str(&scanner, "\xef\xbb\xbf", 3);
if (scanner_str(&scanner, "\xef\xbb\xbf", 3) || scanner.len < 2) {
goto initial;
}

ret = matcher_match(matcher, &scanner, include_depth, string, size, path);
/* Early detection of wrong file encoding, e.g. UTF-16 or UTF-32 */
if (scanner.s[0] == '\0' || scanner.s[1] == '\0') {
if (scanner.s[0] != '\0')
scanner.token_column++;
scanner_err(&scanner, "unexpected NULL character.");
goto encoding_error;
}
if (!is_ascii(scanner.s[0])) {
scanner_err(&scanner, "unexpected non-ASCII character.");
goto encoding_error;
}

initial:
ret = matcher_match(matcher, &scanner, include_depth, string, size, path);
unmap_file(string, size);

out:
return ret;

encoding_error:
scanner_err(&scanner,
"This could be a file encoding issue. "
"Supported encodings must be backward compatible with ASCII.");
scanner_err(&scanner,
"E.g. ISO/CEI 8859 and UTF-8 are supported "
"but UTF-16, UTF-32 and CP1026 are not.");
return false;
}

bool
Expand Down
Binary file added test/data/rules/utf-16be_with_bom
Binary file not shown.
Binary file added test/data/rules/utf-16le_with_bom
Binary file not shown.
Binary file added test/data/rules/utf-32be
Binary file not shown.
36 changes: 36 additions & 0 deletions test/rules-file.c
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,42 @@ main(int argc, char *argv[])
};
assert(test_rules(ctx, &test_utf_8_with_bom));

struct test_data test_utf_16le_with_bom = {
.rules = "utf-16le_with_bom",

.model = "my_model", .layout = "my_layout", .variant = "my_variant",
.options = "my_option",

.keycodes = "my_keycodes", .types = "my_types",
.compat = "my_compat|some:compat",
.symbols = "my_symbols+extra_variant",
};
assert(!test_rules(ctx, &test_utf_16le_with_bom));

struct test_data test_utf_16be_with_bom = {
.rules = "utf-16be_with_bom",

.model = "my_model", .layout = "my_layout", .variant = "my_variant",
.options = "my_option",

.keycodes = "my_keycodes", .types = "my_types",
.compat = "my_compat|some:compat",
.symbols = "my_symbols+extra_variant",
};
assert(!test_rules(ctx, &test_utf_16be_with_bom));

struct test_data test_utf_32be = {
.rules = "utf-32be",

.model = "my_model", .layout = "my_layout", .variant = "my_variant",
.options = "my_option",

.keycodes = "my_keycodes", .types = "my_types",
.compat = "my_compat|some:compat",
.symbols = "my_symbols+extra_variant",
};
assert(!test_rules(ctx, &test_utf_32be));

struct test_data test1 = {
.rules = "simple",

Expand Down

0 comments on commit 28d8e81

Please sign in to comment.