From 3fda20957ea56f84423357d5501a1c250451f01a Mon Sep 17 00:00:00 2001 From: Pierre Le Marre Date: Fri, 12 Jul 2024 09:17:34 +0200 Subject: [PATCH] keysyms: Require only 5 bytes for UTF-8 encoding Require only 5 bytes for the buffer of `xkb_keysym_to_utf8`, as UTF-8 encodes code points on up to 4 bytes + 1 byte for the NULL-terminating byte. Previous standard [RFC 2279] (1998) required up to 6 bytes per code point, but has been superseded by [RFC 3629] (2003). [RFC 2279]: https://datatracker.ietf.org/doc/html/rfc2279 [RFC 3629]: https://datatracker.ietf.org/doc/html/rfc3629 --- changes/api/418.bugfix.md | 5 +++++ include/xkbcommon/xkbcommon.h | 2 +- src/compose/state.c | 2 +- src/keysym-utf.c | 4 +++- src/keysym.h | 3 +++ src/keysym.h.jinja | 3 +++ src/state.c | 2 +- test/keysym.c | 5 +++-- test/utf8.c | 3 ++- tools/tools-common.c | 5 ++++- 10 files changed, 26 insertions(+), 8 deletions(-) create mode 100644 changes/api/418.bugfix.md diff --git a/changes/api/418.bugfix.md b/changes/api/418.bugfix.md new file mode 100644 index 000000000..3d39df466 --- /dev/null +++ b/changes/api/418.bugfix.md @@ -0,0 +1,5 @@ +`xkb_keysym_to_utf8`: Require only 5 bytes for the buffer, as UTF-8 encodes code +points on up to 4 bytes + 1 byte for the NULL-terminating byte. +Previous standard [RFC 2279](https://datatracker.ietf.org/doc/html/rfc2279) +(1998) required up to 6 bytes per code point, but has been superseded by +[RFC 3629](https://datatracker.ietf.org/doc/html/rfc3629) (2003). diff --git a/include/xkbcommon/xkbcommon.h b/include/xkbcommon/xkbcommon.h index e3631df8e..f4c512ed1 100644 --- a/include/xkbcommon/xkbcommon.h +++ b/include/xkbcommon/xkbcommon.h @@ -494,7 +494,7 @@ xkb_keysym_from_name(const char *name, enum xkb_keysym_flags flags); * * @param[in] keysym The keysym. * @param[out] buffer A buffer to write the UTF-8 string into. - * @param[in] size The size of buffer. Must be at least 7. + * @param[in] size The size of buffer. Must be at least 5. * * @returns The number of bytes written to the buffer (including the * terminating byte). If the keysym does not have a Unicode diff --git a/src/compose/state.c b/src/compose/state.c index ca8b8f16f..7bf77935b 100644 --- a/src/compose/state.c +++ b/src/compose/state.c @@ -168,7 +168,7 @@ xkb_compose_state_get_utf8(struct xkb_compose_state *state, /* If there's no string specified, but only a keysym, try to do the * most helpful thing. */ if (node->leaf.utf8 == 0 && node->leaf.keysym != XKB_KEY_NoSymbol) { - char utf8[7]; + char utf8[XKB_KEYSYM_UTF8_MAX_SIZE]; int ret; ret = xkb_keysym_to_utf8(node->leaf.keysym, utf8, sizeof(utf8)); diff --git a/src/keysym-utf.c b/src/keysym-utf.c index 46bf777e7..f1a054ef7 100644 --- a/src/keysym-utf.c +++ b/src/keysym-utf.c @@ -954,7 +954,9 @@ xkb_keysym_to_utf8(xkb_keysym_t keysym, char *buffer, size_t size) { uint32_t codepoint; - if (size < 7) + /* Unicode code point takes up to 4 bytes in UTF-8, plus the + * NULL-terminating byte. */ + if (size < XKB_KEYSYM_UTF8_MAX_SIZE) return -1; codepoint = xkb_keysym_to_utf32(keysym); diff --git a/src/keysym.h b/src/keysym.h index 10d2c8a22..965c9bf65 100644 --- a/src/keysym.h +++ b/src/keysym.h @@ -78,6 +78,9 @@ #define XKB_KEYSYM_UNICODE_MAX 0x0110ffff /** Maximum keysym name length */ #define XKB_KEYSYM_NAME_MAX_SIZE 27 +/** Maximum bytes to encode the Unicode representation of a keysym in UTF-8: + * 4 bytes + NULL-terminating byte */ +#define XKB_KEYSYM_UTF8_MAX_SIZE 5 bool xkb_keysym_is_assigned(xkb_keysym_t ks); diff --git a/src/keysym.h.jinja b/src/keysym.h.jinja index d94ef8595..f29e33541 100644 --- a/src/keysym.h.jinja +++ b/src/keysym.h.jinja @@ -78,6 +78,9 @@ #define XKB_KEYSYM_UNICODE_MAX 0x0110ffff /** Maximum keysym name length */ #define XKB_KEYSYM_NAME_MAX_SIZE {{ XKB_KEYSYM_NAME_MAX_SIZE }} +/** Maximum bytes to encode the Unicode representation of a keysym in UTF-8: + * 4 bytes + NULL-terminating byte */ +#define XKB_KEYSYM_UTF8_MAX_SIZE 5 bool xkb_keysym_is_assigned(xkb_keysym_t ks); diff --git a/src/state.c b/src/state.c index 543658bef..99b302265 100644 --- a/src/state.c +++ b/src/state.c @@ -1022,7 +1022,7 @@ xkb_state_key_get_utf8(struct xkb_state *state, xkb_keycode_t kc, const xkb_keysym_t *syms; int nsyms; int offset; - char tmp[7]; + char tmp[XKB_KEYSYM_UTF8_MAX_SIZE]; sym = get_one_sym_for_string(state, kc); if (sym != XKB_KEY_NoSymbol) { diff --git a/test/keysym.c b/test/keysym.c index d6c99e9de..71b30cb09 100644 --- a/test/keysym.c +++ b/test/keysym.c @@ -170,7 +170,7 @@ test_keysym(xkb_keysym_t keysym, const char *expected) static int test_utf8(xkb_keysym_t keysym, const char *expected) { - char s[8]; + char s[XKB_KEYSYM_UTF8_MAX_SIZE]; int ret; ret = xkb_keysym_to_utf8(keysym, s, sizeof(s)); @@ -366,9 +366,10 @@ main(void) /* Check assigned keysyms bounds */ assert((int32_t)XKB_KEYSYM_MIN_ASSIGNED <= (int32_t)ks && ks <= XKB_KEYSYM_MAX_ASSIGNED); /* Check utf8 */ + /* Older implementation required 7 bytes for old UTF-8 (see RFC 2279) */ char utf8[7]; int needed = xkb_keysym_to_utf8(ks, utf8, sizeof(utf8)); - assert(0 <= needed && needed <= 5); + assert(0 <= needed && needed <= XKB_KEYSYM_UTF8_MAX_SIZE); /* Check maximum name length */ char name[XKB_KEYSYM_NAME_MAX_SIZE]; needed = xkb_keysym_iterator_get_name(iter, name, sizeof(name)); diff --git a/test/utf8.c b/test/utf8.c index 3a224e725..e90dcdab1 100644 --- a/test/utf8.c +++ b/test/utf8.c @@ -29,6 +29,7 @@ #include #include +#include "src/keysym.h" #include "test.h" #include "utf8.h" #include "utils.h" @@ -155,7 +156,7 @@ test_is_valid_utf8(void) static void check_utf32_to_utf8(uint32_t unichar, int expected_length, const char *expected) { - char buffer[7]; + char buffer[XKB_KEYSYM_UTF8_MAX_SIZE]; int length; length = utf32_to_utf8(unichar, buffer); diff --git a/tools/tools-common.c b/tools/tools-common.c index 8eb3f4bfd..bf086a3a3 100644 --- a/tools/tools-common.c +++ b/tools/tools-common.c @@ -157,7 +157,10 @@ tools_print_keycode_state(const char *prefix, xkb_keysym_t sym; const xkb_keysym_t *syms; int nsyms; - char s[MAX(XKB_COMPOSE_MAX_STRING_SIZE, XKB_KEYSYM_NAME_MAX_SIZE)]; +#define BUFFER_SIZE MAX(XKB_COMPOSE_MAX_STRING_SIZE, XKB_KEYSYM_NAME_MAX_SIZE) + assert(XKB_KEYSYM_UTF8_MAX_SIZE <= BUFFER_SIZE); + char s[BUFFER_SIZE]; +#undef BUFFER_SIZE xkb_layout_index_t layout; enum xkb_compose_status status;