Skip to content

Commit

Permalink
keysyms: Require only 5 bytes for UTF-8 encoding
Browse files Browse the repository at this point in the history
Require only 5 bytes for the buffer of `xkb_keysym_to_utf8`, as UTF-8
encodes code points on up to 4 bytes + 1 byte for the NULL-terminating
byte.

Previous standard [RFC 2279] (1998) required up to 6 bytes per code
point, but has been superseded by [RFC 3629] (2003).

[RFC 2279]: https://datatracker.ietf.org/doc/html/rfc2279
[RFC 3629]: https://datatracker.ietf.org/doc/html/rfc3629
  • Loading branch information
wismill committed Jul 22, 2024
1 parent 1d8a25d commit 3fda209
Show file tree
Hide file tree
Showing 10 changed files with 26 additions and 8 deletions.
5 changes: 5 additions & 0 deletions changes/api/418.bugfix.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
`xkb_keysym_to_utf8`: Require only 5 bytes for the buffer, as UTF-8 encodes code
points on up to 4 bytes + 1 byte for the NULL-terminating byte.
Previous standard [RFC 2279](https://datatracker.ietf.org/doc/html/rfc2279)
(1998) required up to 6 bytes per code point, but has been superseded by
[RFC 3629](https://datatracker.ietf.org/doc/html/rfc3629) (2003).
2 changes: 1 addition & 1 deletion include/xkbcommon/xkbcommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -494,7 +494,7 @@ xkb_keysym_from_name(const char *name, enum xkb_keysym_flags flags);
*
* @param[in] keysym The keysym.
* @param[out] buffer A buffer to write the UTF-8 string into.
* @param[in] size The size of buffer. Must be at least 7.
* @param[in] size The size of buffer. Must be at least 5.
*
* @returns The number of bytes written to the buffer (including the
* terminating byte). If the keysym does not have a Unicode
Expand Down
2 changes: 1 addition & 1 deletion src/compose/state.c
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ xkb_compose_state_get_utf8(struct xkb_compose_state *state,
/* If there's no string specified, but only a keysym, try to do the
* most helpful thing. */
if (node->leaf.utf8 == 0 && node->leaf.keysym != XKB_KEY_NoSymbol) {
char utf8[7];
char utf8[XKB_KEYSYM_UTF8_MAX_SIZE];
int ret;

ret = xkb_keysym_to_utf8(node->leaf.keysym, utf8, sizeof(utf8));
Expand Down
4 changes: 3 additions & 1 deletion src/keysym-utf.c
Original file line number Diff line number Diff line change
Expand Up @@ -954,7 +954,9 @@ xkb_keysym_to_utf8(xkb_keysym_t keysym, char *buffer, size_t size)
{
uint32_t codepoint;

if (size < 7)
/* Unicode code point takes up to 4 bytes in UTF-8, plus the
* NULL-terminating byte. */
if (size < XKB_KEYSYM_UTF8_MAX_SIZE)
return -1;

codepoint = xkb_keysym_to_utf32(keysym);
Expand Down
3 changes: 3 additions & 0 deletions src/keysym.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,9 @@
#define XKB_KEYSYM_UNICODE_MAX 0x0110ffff
/** Maximum keysym name length */
#define XKB_KEYSYM_NAME_MAX_SIZE 27
/** Maximum bytes to encode the Unicode representation of a keysym in UTF-8:
* 4 bytes + NULL-terminating byte */
#define XKB_KEYSYM_UTF8_MAX_SIZE 5

bool
xkb_keysym_is_assigned(xkb_keysym_t ks);
Expand Down
3 changes: 3 additions & 0 deletions src/keysym.h.jinja
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,9 @@
#define XKB_KEYSYM_UNICODE_MAX 0x0110ffff
/** Maximum keysym name length */
#define XKB_KEYSYM_NAME_MAX_SIZE {{ XKB_KEYSYM_NAME_MAX_SIZE }}
/** Maximum bytes to encode the Unicode representation of a keysym in UTF-8:
* 4 bytes + NULL-terminating byte */
#define XKB_KEYSYM_UTF8_MAX_SIZE 5

bool
xkb_keysym_is_assigned(xkb_keysym_t ks);
Expand Down
2 changes: 1 addition & 1 deletion src/state.c
Original file line number Diff line number Diff line change
Expand Up @@ -1022,7 +1022,7 @@ xkb_state_key_get_utf8(struct xkb_state *state, xkb_keycode_t kc,
const xkb_keysym_t *syms;
int nsyms;
int offset;
char tmp[7];
char tmp[XKB_KEYSYM_UTF8_MAX_SIZE];

sym = get_one_sym_for_string(state, kc);
if (sym != XKB_KEY_NoSymbol) {
Expand Down
5 changes: 3 additions & 2 deletions test/keysym.c
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ test_keysym(xkb_keysym_t keysym, const char *expected)
static int
test_utf8(xkb_keysym_t keysym, const char *expected)
{
char s[8];
char s[XKB_KEYSYM_UTF8_MAX_SIZE];
int ret;

ret = xkb_keysym_to_utf8(keysym, s, sizeof(s));
Expand Down Expand Up @@ -366,9 +366,10 @@ main(void)
/* Check assigned keysyms bounds */
assert((int32_t)XKB_KEYSYM_MIN_ASSIGNED <= (int32_t)ks && ks <= XKB_KEYSYM_MAX_ASSIGNED);
/* Check utf8 */
/* Older implementation required 7 bytes for old UTF-8 (see RFC 2279) */
char utf8[7];
int needed = xkb_keysym_to_utf8(ks, utf8, sizeof(utf8));
assert(0 <= needed && needed <= 5);
assert(0 <= needed && needed <= XKB_KEYSYM_UTF8_MAX_SIZE);
/* Check maximum name length */
char name[XKB_KEYSYM_NAME_MAX_SIZE];
needed = xkb_keysym_iterator_get_name(iter, name, sizeof(name));
Expand Down
3 changes: 2 additions & 1 deletion test/utf8.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include <stddef.h>
#include <string.h>

#include "src/keysym.h"
#include "test.h"
#include "utf8.h"
#include "utils.h"
Expand Down Expand Up @@ -155,7 +156,7 @@ test_is_valid_utf8(void)

static void
check_utf32_to_utf8(uint32_t unichar, int expected_length, const char *expected) {
char buffer[7];
char buffer[XKB_KEYSYM_UTF8_MAX_SIZE];
int length;

length = utf32_to_utf8(unichar, buffer);
Expand Down
5 changes: 4 additions & 1 deletion tools/tools-common.c
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,10 @@ tools_print_keycode_state(const char *prefix,
xkb_keysym_t sym;
const xkb_keysym_t *syms;
int nsyms;
char s[MAX(XKB_COMPOSE_MAX_STRING_SIZE, XKB_KEYSYM_NAME_MAX_SIZE)];
#define BUFFER_SIZE MAX(XKB_COMPOSE_MAX_STRING_SIZE, XKB_KEYSYM_NAME_MAX_SIZE)
assert(XKB_KEYSYM_UTF8_MAX_SIZE <= BUFFER_SIZE);
char s[BUFFER_SIZE];
#undef BUFFER_SIZE
xkb_layout_index_t layout;
enum xkb_compose_status status;

Expand Down

0 comments on commit 3fda209

Please sign in to comment.