From adef3743dc3aa4b266815b54dcf7a96a3e4f0297 Mon Sep 17 00:00:00 2001
From: Pierre Le Marre <dev@wismill.eu>
Date: Sun, 29 Oct 2023 07:20:29 +0100
Subject: [PATCH 1/3] Compose: skip heading UTF-8 encoded BOM (U+FEFF)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Leading BOM is legal and is used as a signature — an indication that
an otherwise unmarked text file is in UTF-8.
See: https://www.unicode.org/faq/utf_bom.html#bom5 for further details.
---
 src/compose/parser.c |  4 ++++
 test/compose.c       | 11 +++++++++++
 2 files changed, 15 insertions(+)
diff --git a/src/compose/parser.c b/src/compose/parser.c
index 5545a33f8..6740f21b2 100644
--- a/src/compose/parser.c
+++ b/src/compose/parser.c
@@ -534,6 +534,10 @@ parse(struct xkb_compose_table *table, struct scanner *s,
     production.mods = 0;
     production.modmask = 0;
 
+    /* Skip UTF-8 encoded BOM (U+FEFF) */
+    /* See: https://www.unicode.org/faq/utf_bom.html#bom5 */
+    scanner_str(s, "\xef\xbb\xbf", 3);
+
     /* fallthrough */
 
 initial_eol:
diff --git a/test/compose.c b/test/compose.c
index 8c633d704..d7192f67c 100644
--- a/test/compose.c
+++ b/test/compose.c
@@ -172,6 +172,16 @@ test_compose_seq_buffer(struct xkb_context *ctx, const char *buffer, ...)
     return ok;
 }
 
+static void
+test_compose_utf8_bom(struct xkb_context *ctx)
+{
+    const char *buffer = "\xef\xbb\xbf<A> : X";
+    assert(test_compose_seq_buffer(ctx, buffer,
+        XKB_KEY_A, XKB_COMPOSE_FEED_ACCEPTED, XKB_COMPOSE_COMPOSED, "X", XKB_KEY_X,
+        XKB_KEY_NoSymbol));
+}
+
+
 static void
 test_seqs(struct xkb_context *ctx)
 {
@@ -723,6 +733,7 @@ main(int argc, char *argv[])
     unsetenv("XLOCALEDIR");
 #endif
 
+    test_compose_utf8_bom(ctx);
     test_seqs(ctx);
     test_conflicting(ctx);
     test_XCOMPOSEFILE(ctx);

From 8d6681759e5f063f849dcfb8a7dcd497c688d1e0 Mon Sep 17 00:00:00 2001
From: Pierre Le Marre <dev@wismill.eu>
Date: Sun, 29 Oct 2023 07:31:34 +0100
Subject: [PATCH 2/3] xkbcomp: skip heading UTF-8 encoded BOM (U+FEFF)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Leading BOM is legal and is used as a signature — an indication that
an otherwise unmarked text file is in UTF-8.
See: https://www.unicode.org/faq/utf_bom.html#bom5 for further
details.
---
 src/xkbcomp/scanner.c |  5 +++++
 test/buffercomp.c     | 12 ++++++++++++
 2 files changed, 17 insertions(+)

diff --git a/src/xkbcomp/scanner.c b/src/xkbcomp/scanner.c
index 57babbb20..8bff375df 100644
--- a/src/xkbcomp/scanner.c
+++ b/src/xkbcomp/scanner.c
@@ -199,6 +199,11 @@ XkbParseString(struct xkb_context *ctx, const char *string, size_t len,
 {
     struct scanner scanner;
     scanner_init(&scanner, ctx, string, len, file_name, NULL);
+
+    /* Skip UTF-8 encoded BOM (U+FEFF) */
+    /* See: https://www.unicode.org/faq/utf_bom.html#bom5 */
+    scanner_str(&scanner, "\xef\xbb\xbf", 3);
+
     return parse(ctx, &scanner, map);
 }
 
diff --git a/test/buffercomp.c b/test/buffercomp.c
index 9a7603654..b9b5e9d8d 100644
--- a/test/buffercomp.c
+++ b/test/buffercomp.c
@@ -78,6 +78,18 @@ main(int argc, char *argv[])
     keymap = test_compile_buffer(ctx, "", 0);
     assert(!keymap);
 
+    /* Accept UTF-8 encoded BOM (U+FEFF) */
+    const char *bom =
+        "\xef\xbb\xbfxkb_keymap {"
+        "  xkb_keycodes { include \"evdev\" };"
+        "  xkb_types { include \"complete\" };"
+        "  xkb_compat { include \"complete\" };"
+        "  xkb_symbols { include \"pc\" };"
+        "};";
+    keymap = test_compile_buffer(ctx, bom, strlen(bom));
+    assert(keymap);
+    xkb_keymap_unref(keymap);
+
     /* Make sure we can recompile our output for a normal keymap from rules. */
     keymap = test_compile_rules(ctx, NULL, NULL,
                                 "ru,ca,de,us", ",multix,neo,intl", NULL);

From a1cc8062362f2d7b06f257c9f1feba768d52fe1a Mon Sep 17 00:00:00 2001
From: Pierre Le Marre <dev@wismill.eu>
Date: Sun, 29 Oct 2023 07:44:39 +0100
Subject: [PATCH 3/3] rules: skip heading UTF-8 encoded BOM (U+FEFF)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Leading BOM is legal and is used as a signature — an indication that
an otherwise unmarked text file is in UTF-8.
See: https://www.unicode.org/faq/utf_bom.html#bom5 for further details.
---
 src/xkbcomp/rules.c            |  4 ++++
 test/data/rules/utf-8_with_bom | 22 ++++++++++++++++++++++
 test/rules-file.c              | 12 ++++++++++++
 3 files changed, 38 insertions(+)
 create mode 100644 test/data/rules/utf-8_with_bom

diff --git a/src/xkbcomp/rules.c b/src/xkbcomp/rules.c
index f5d9c4960..daa4f3dec 100644
--- a/src/xkbcomp/rules.c
+++ b/src/xkbcomp/rules.c
@@ -1099,6 +1099,10 @@ read_rules_file(struct xkb_context *ctx,
 
     scanner_init(&scanner, matcher->ctx, string, size, path, NULL);
 
+    /* Skip UTF-8 encoded BOM (U+FEFF) */
+    /* See: https://www.unicode.org/faq/utf_bom.html#bom5 */
+    scanner_str(&scanner, "\xef\xbb\xbf", 3);
+
     ret = matcher_match(matcher, &scanner, include_depth, string, size, path);
 
     unmap_file(string, size);
diff --git a/test/data/rules/utf-8_with_bom b/test/data/rules/utf-8_with_bom
new file mode 100644
index 000000000..a3c3a73ab
--- /dev/null
+++ b/test/data/rules/utf-8_with_bom
@@ -0,0 +1,22 @@
+﻿// NOTE: this file is encoded in UTF-8 with a leading BOM (U+FEFF)
+! model         = keycodes
+  my_model      = my_keycodes
+  *             = default_keycodes
+
+! layout        variant    = symbols
+  my_layout     my_variant = my_symbols+extra_variant
+
+! layout        = symbols
+  my_layout     = my_symbols
+  *             = default_symbols
+
+! model         = types
+  my_model      = my_types
+  *             = default_types
+
+! model         = compat
+  my_model      = my_compat
+  *             = default_compat
+
+! option        = compat
+  my_option     = |some:compat
diff --git a/test/rules-file.c b/test/rules-file.c
index d217ba960..302aa6878 100644
--- a/test/rules-file.c
+++ b/test/rules-file.c
@@ -94,6 +94,18 @@ main(int argc, char *argv[])
     ctx = test_get_context(0);
     assert(ctx);
 
+    struct test_data test_utf_8_with_bom = {
+        .rules = "utf-8_with_bom",
+
+        .model = "my_model", .layout = "my_layout", .variant = "my_variant",
+        .options = "my_option",
+
+        .keycodes = "my_keycodes", .types = "my_types",
+        .compat = "my_compat|some:compat",
+        .symbols = "my_symbols+extra_variant",
+    };
+    assert(test_rules(ctx, &test_utf_8_with_bom));
+
     struct test_data test1 = {
         .rules = "simple",