Skip to content

Commit

Permalink
rules: skip heading UTF-8 encoded BOM (U+FEFF)
Browse files Browse the repository at this point in the history
Leading BOM is legal and is used as a signature — an indication that
an otherwise unmarked text file is in UTF-8.
See: https://www.unicode.org/faq/utf_bom.html#bom5 for further details.
  • Loading branch information
wismill committed Oct 30, 2023
1 parent 8d66817 commit a1cc806
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 0 deletions.
4 changes: 4 additions & 0 deletions src/xkbcomp/rules.c
Original file line number Diff line number Diff line change
Expand Up @@ -1099,6 +1099,10 @@ read_rules_file(struct xkb_context *ctx,

scanner_init(&scanner, matcher->ctx, string, size, path, NULL);

/* Skip UTF-8 encoded BOM (U+FEFF) */
/* See: https://www.unicode.org/faq/utf_bom.html#bom5 */
scanner_str(&scanner, "\xef\xbb\xbf", 3);

ret = matcher_match(matcher, &scanner, include_depth, string, size, path);

unmap_file(string, size);
Expand Down
22 changes: 22 additions & 0 deletions test/data/rules/utf-8_with_bom
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
// NOTE: this file is encoded in UTF-8 with a leading BOM (U+FEFF)
! model = keycodes
my_model = my_keycodes
* = default_keycodes

! layout variant = symbols
my_layout my_variant = my_symbols+extra_variant

! layout = symbols
my_layout = my_symbols
* = default_symbols

! model = types
my_model = my_types
* = default_types

! model = compat
my_model = my_compat
* = default_compat

! option = compat
my_option = |some:compat
12 changes: 12 additions & 0 deletions test/rules-file.c
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,18 @@ main(int argc, char *argv[])
ctx = test_get_context(0);
assert(ctx);

struct test_data test_utf_8_with_bom = {
.rules = "utf-8_with_bom",

.model = "my_model", .layout = "my_layout", .variant = "my_variant",
.options = "my_option",

.keycodes = "my_keycodes", .types = "my_types",
.compat = "my_compat|some:compat",
.symbols = "my_symbols+extra_variant",
};
assert(test_rules(ctx, &test_utf_8_with_bom));

struct test_data test1 = {
.rules = "simple",

Expand Down

0 comments on commit a1cc806

Please sign in to comment.