From 031c1dd8755ff38a9f28a2beecd05c7a359df3ed Mon Sep 17 00:00:00 2001 From: Pierre Le Marre Date: Sun, 1 Sep 2024 08:32:21 +0200 Subject: [PATCH 1/5] compose: Add Compose table dump internal API MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Move `print_compose_table_entry` to own file and add a `file` argument to select output. - Add `xkb_compose_table_dump` to dump a Compose table. This change is needed in order to share the feature “dump a Compose table” between tests and tools. --- meson.build | 12 +++++- src/compose/dump.c | 92 +++++++++++++++++++++++++++++++++++++++ src/compose/dump.h | 91 +++------------------------------------ src/compose/escape.h | 95 +++++++++++++++++++++++++++++++++++++++++ src/compose/table.h | 4 +- test/compose.c | 2 +- tools/compile-compose.c | 48 ++------------------- 7 files changed, 209 insertions(+), 135 deletions(-) create mode 100644 src/compose/dump.c create mode 100644 src/compose/escape.h diff --git a/meson.build b/meson.build index 01a7c87f8..60c8df2b9 100644 --- a/meson.build +++ b/meson.build @@ -503,6 +503,9 @@ if build_tools # Tool: compose executable('xkbcli-compile-compose', 'tools/compile-compose.c', + 'src/compose/dump.c', + 'src/compose/dump.h', + 'src/compose/escape.h', dependencies: tools_dep, install: true, install_dir: dir_libexec) @@ -765,7 +768,14 @@ test( ) test( 'compose', - executable('test-compose', 'test/compose.c', dependencies: test_dep), + executable( + 'test-compose', + 'test/compose.c', + 'src/compose/dump.c', + 'src/compose/dump.h', + 'src/compose/escape.h', + dependencies: test_dep + ), env: test_env, ) test( diff --git a/src/compose/dump.c b/src/compose/dump.c new file mode 100644 index 000000000..60ccbc0bc --- /dev/null +++ b/src/compose/dump.c @@ -0,0 +1,92 @@ +/* + * Copyright © 2021 Ran Benita + * Copyright © 2023 Pierre Le Marre + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "config.h" + +#include "src/darray.h" +#include +#include +#include + +#include "xkbcommon/xkbcommon-compose.h" +#include "parser.h" +#include "escape.h" +#include "dump.h" +#include "src/keysym.h" +#include "src/utils.h" + +bool +print_compose_table_entry(FILE *file, struct xkb_compose_table_entry *entry) +{ + size_t nsyms; + const xkb_keysym_t *syms = xkb_compose_table_entry_sequence(entry, &nsyms); + char buf[XKB_KEYSYM_NAME_MAX_SIZE]; + for (size_t i = 0; i < nsyms; i++) { + xkb_keysym_get_name(syms[i], buf, sizeof(buf)); + fprintf(file, "<%s>", buf); + if (i + 1 < nsyms) { + fprintf(file, " "); + } + } + fprintf(file, " : "); + const char *utf8 = xkb_compose_table_entry_utf8(entry); + if (*utf8 != '\0') { + char *escaped = escape_utf8_string_literal(utf8); + if (!escaped) { + fprintf(stderr, "ERROR: Cannot escape the string: allocation error\n"); + return false; + } else { + fprintf(file, " \"%s\"", escaped); + free(escaped); + } + } + const xkb_keysym_t keysym = xkb_compose_table_entry_keysym(entry); + if (keysym != XKB_KEY_NoSymbol) { + xkb_keysym_get_name(keysym, buf, sizeof(buf)); + fprintf(file, " %s", buf); + } + fprintf(file, "\n"); + return true; +} + +bool +xkb_compose_table_dump(FILE *file, struct xkb_compose_table *table) +{ + struct xkb_compose_table_entry *entry; + struct xkb_compose_table_iterator *iter = xkb_compose_table_iterator_new(table); + + if (!iter) + return false; + + bool ok = true; + while ((entry = xkb_compose_table_iterator_next(iter))) { + if (!print_compose_table_entry(file, entry)) { + ok = false; + break; + } + } + + xkb_compose_table_iterator_free(iter); + return ok; +} diff --git a/src/compose/dump.h b/src/compose/dump.h index 76990f3d0..ab7891acb 100644 --- a/src/compose/dump.h +++ b/src/compose/dump.h @@ -1,95 +1,14 @@ -/* - * Copyright © 2023 Pierre Le Marre - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - - #ifndef COMPOSE_DUMP_H #define COMPOSE_DUMP_H #include "config.h" -#include - -#include "src/utils.h" +#include "table.h" -/* Ad-hoc escaping for UTF-8 string - * - * Note that it only escapes the strict minimum to get a valid Compose file. - * It also escapes hexadecimal digits after an hexadecimal escape. This is not - * strictly needed by the current implementation: "\x0abcg" parses as "␊bcg", - * but better be cautious than sorry and produce "\x0a\x62\x63g" instead. - * In the latter string there is no ambiguity and no need to know the maximum - * number of digits supported by the escape sequence. - */ -static inline char* -escape_utf8_string_literal(const char *from) -{ - const size_t length = strlen(from); - /* Longest escape is converting ASCII character to "\xNN" */ - char* to = calloc(4 * length + 1, sizeof(*to)); - if (!to) - return NULL; +bool +print_compose_table_entry(FILE *file, struct xkb_compose_table_entry *entry); - size_t t = 0; - bool previous_is_hex_escape = false; - uint8_t nbytes = 0; - for (size_t f = 0; f < length;) { - if ((unsigned char) from[f] < 0x80) { - /* ASCII */ - if (from[f] <= 0x10 || from[f] == 0x7f || - (is_xdigit(from[f]) && previous_is_hex_escape)) - { - /* Control character or - hexadecimal digit following an hexadecimal escape */ - snprintf_safe(&to[t], 5, "\\x%02x", from[f]); - t += 4; - previous_is_hex_escape = true; - } else if (from[f] == '"' || from[f] == '\\') { - /* Quote and backslash */ - snprintf_safe(&to[t], 3, "\\%c", from[f]); - t += 2; - previous_is_hex_escape = false; - } else { - /* Other characters */ - to[t++] = from[f]; - previous_is_hex_escape = false; - } - f++; - continue; - } - /* Test next byte for the next Unicode codepoint’s bytes count */ - else if ((unsigned char) from[f] < 0xe0) - nbytes = 2; - else if ((unsigned char) from[f] < 0xf0) - nbytes = 3; - else - nbytes = 4; - memcpy(&to[t], &from[f], nbytes); - t += nbytes; - f += nbytes; - previous_is_hex_escape = false; - } - to[t++] = '\0'; - return realloc(to, t); -} +bool +xkb_compose_table_dump(FILE *file, struct xkb_compose_table *table); #endif diff --git a/src/compose/escape.h b/src/compose/escape.h new file mode 100644 index 000000000..2fe14e8ce --- /dev/null +++ b/src/compose/escape.h @@ -0,0 +1,95 @@ +/* + * Copyright © 2023 Pierre Le Marre + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + + +#ifndef COMPOSE_ESCAPE_H +#define COMPOSE_ESCAPE_H + +#include "config.h" + +#include + +#include "src/utils.h" + +/* Ad-hoc escaping for UTF-8 string + * + * Note that it only escapes the strict minimum to get a valid Compose file. + * It also escapes hexadecimal digits after an hexadecimal escape. This is not + * strictly needed by the current implementation: "\x0abcg" parses as "␊bcg", + * but better be cautious than sorry and produce "\x0a\x62\x63g" instead. + * In the latter string there is no ambiguity and no need to know the maximum + * number of digits supported by the escape sequence. + */ +static inline char* +escape_utf8_string_literal(const char *from) +{ + const size_t length = strlen(from); + /* Longest escape is converting ASCII character to "\xNN" */ + char* to = calloc(4 * length + 1, sizeof(*to)); + if (!to) + return NULL; + + size_t t = 0; + bool previous_is_hex_escape = false; + uint8_t nbytes = 0; + for (size_t f = 0; f < length;) { + if ((unsigned char) from[f] < 0x80) { + /* ASCII */ + if (from[f] <= 0x10 || from[f] == 0x7f || + (is_xdigit(from[f]) && previous_is_hex_escape)) + { + /* Control character or + hexadecimal digit following an hexadecimal escape */ + snprintf_safe(&to[t], 5, "\\x%02x", from[f]); + t += 4; + previous_is_hex_escape = true; + } else if (from[f] == '"' || from[f] == '\\') { + /* Quote and backslash */ + snprintf_safe(&to[t], 3, "\\%c", from[f]); + t += 2; + previous_is_hex_escape = false; + } else { + /* Other characters */ + to[t++] = from[f]; + previous_is_hex_escape = false; + } + f++; + continue; + } + /* Test next byte for the next Unicode codepoint’s bytes count */ + else if ((unsigned char) from[f] < 0xe0) + nbytes = 2; + else if ((unsigned char) from[f] < 0xf0) + nbytes = 3; + else + nbytes = 4; + memcpy(&to[t], &from[f], nbytes); + t += nbytes; + f += nbytes; + previous_is_hex_escape = false; + } + to[t++] = '\0'; + return realloc(to, t); +} + +#endif diff --git a/src/compose/table.h b/src/compose/table.h index f6904a1c2..5615db29a 100644 --- a/src/compose/table.h +++ b/src/compose/table.h @@ -25,8 +25,8 @@ #define COMPOSE_COMPOSE_H #include "xkbcommon/xkbcommon-compose.h" -#include "utils.h" -#include "context.h" +#include "src/utils.h" +#include "src/context.h" /* * The compose table data structure is a ternary search tree. diff --git a/test/compose.c b/test/compose.c index 905b1af1b..56842911b 100644 --- a/test/compose.c +++ b/test/compose.c @@ -31,7 +31,7 @@ #include "src/utf8.h" #include "src/keysym.h" #include "src/compose/parser.h" -#include "src/compose/dump.h" +#include "src/compose/escape.h" static const char * compose_status_string(enum xkb_compose_status status) diff --git a/tools/compile-compose.c b/tools/compile-compose.c index 6f37d5762..bcc970233 100644 --- a/tools/compile-compose.c +++ b/tools/compile-compose.c @@ -53,40 +53,6 @@ usage(FILE *fp, char *progname) " LC_ALL, LC_TYPE and LANG.\n"); } -static bool -print_compose_table_entry(struct xkb_compose_table_entry *entry) -{ - size_t nsyms; - const xkb_keysym_t *syms = xkb_compose_table_entry_sequence(entry, &nsyms); - char buf[XKB_KEYSYM_NAME_MAX_SIZE]; - for (size_t i = 0; i < nsyms; i++) { - xkb_keysym_get_name(syms[i], buf, sizeof(buf)); - printf("<%s>", buf); - if (i + 1 < nsyms) { - printf(" "); - } - } - printf(" : "); - const char *utf8 = xkb_compose_table_entry_utf8(entry); - if (*utf8 != '\0') { - char *escaped = escape_utf8_string_literal(utf8); - if (!escaped) { - fprintf(stderr, "ERROR: Cannot escape the string: allocation error\n"); - return false; - } else { - printf(" \"%s\"", escaped); - free(escaped); - } - } - const xkb_keysym_t keysym = xkb_compose_table_entry_keysym(entry); - if (keysym != XKB_KEY_NoSymbol) { - xkb_keysym_get_name(keysym, buf, sizeof(buf)); - printf(" %s", buf); - } - printf("\n"); - return true; -} - int main(int argc, char *argv[]) { @@ -175,18 +141,10 @@ main(int argc, char *argv[]) } } - struct xkb_compose_table_iterator *iter = xkb_compose_table_iterator_new(compose_table); - struct xkb_compose_table_entry *entry; - while ((entry = xkb_compose_table_iterator_next(iter))) { - if (!print_compose_table_entry(entry)) { - ret = EXIT_FAILURE; - goto entry_error; - } - } - ret = EXIT_SUCCESS; + ret = xkb_compose_table_dump(stdout, compose_table) + ? EXIT_SUCCESS + : EXIT_FAILURE; -entry_error: - xkb_compose_table_iterator_free(iter); out: xkb_compose_table_unref(compose_table); file_error: From ddc58a1ab8b4df6c0eb7c7eb9c293076967ecdd3 Mon Sep 17 00:00:00 2001 From: Pierre Le Marre Date: Sun, 1 Sep 2024 09:07:00 +0200 Subject: [PATCH 2/5] compose: Add quickcheck test for traversal MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Test against the `foreach` reference implementation: - Suffle compose file lines randomly; - Compare traversal entry by entry. The `foreach` Compose traversal implementation is based on Ran’s work: https://github.com/bluetech/libxkbcommon/commit/f7f3c3c385fdc9ae91135f95e4b10f072603b812 --- meson.build | 6 +++ test/compose-iter.c | 78 +++++++++++++++++++++++++++++++++++++ test/compose-iter.h | 26 +++++++++++++ test/compose.c | 93 ++++++++++++++++++++++++++++++++++++++++++-- test/shuffle-lines.c | 89 ++++++++++++++++++++++++++++++++++++++++++ test/shuffle-lines.h | 13 +++++++ 6 files changed, 302 insertions(+), 3 deletions(-) create mode 100644 test/compose-iter.c create mode 100644 test/compose-iter.h create mode 100644 test/shuffle-lines.c create mode 100644 test/shuffle-lines.h diff --git a/meson.build b/meson.build index 60c8df2b9..037f40811 100644 --- a/meson.build +++ b/meson.build @@ -771,6 +771,10 @@ test( executable( 'test-compose', 'test/compose.c', + 'test/shuffle-lines.c', + 'test/shuffle-lines.h', + 'test/compose-iter.c', + 'test/compose-iter.h', 'src/compose/dump.c', 'src/compose/dump.h', 'src/compose/escape.h', @@ -863,6 +867,8 @@ if valgrind.found() '--track-origins=yes', '--gen-suppressions=all', '--error-exitcode=99'], + # This is used in some tests, to avoid excessive run time. + env: {'RUNNING_VALGRIND': '1'}, timeout_multiplier : 10) else message('valgrind not found, disabling valgrind test setup') diff --git a/test/compose-iter.c b/test/compose-iter.c new file mode 100644 index 000000000..4d5fa4e29 --- /dev/null +++ b/test/compose-iter.c @@ -0,0 +1,78 @@ +/* + * Copyright © 2022 Ran Benita + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "config.h" + +#include "src/darray.h" +#include +#include +#include + +#include "xkbcommon/xkbcommon-compose.h" +#include "src/compose/escape.h" +#include "src/compose/parser.h" +#include "src/keysym.h" +#include "src/utils.h" +#include "test/compose-iter.h" + +/* Reference implentation of Compose table traversal */ +static void +for_each_helper(struct xkb_compose_table *table, + xkb_compose_table_iter_t iter, + void *data, + xkb_keysym_t *syms, + size_t nsyms, + uint16_t p) +{ + if (!p) { + return; + } + const struct compose_node *node = &darray_item(table->nodes, p); + for_each_helper(table, iter, data, syms, nsyms, node->lokid); + syms[nsyms++] = node->keysym; + if (node->is_leaf) { + struct xkb_compose_table_entry entry = { + .sequence = syms, + .sequence_length = nsyms, + .keysym = node->leaf.keysym, + .utf8 = &darray_item(table->utf8, node->leaf.utf8), + }; + iter(&entry, data); + } else { + for_each_helper(table, iter, data, syms, nsyms, node->internal.eqkid); + } + nsyms--; + for_each_helper(table, iter, data, syms, nsyms, node->hikid); +} + +XKB_EXPORT void +xkb_compose_table_for_each(struct xkb_compose_table *table, + xkb_compose_table_iter_t iter, + void *data) +{ + if (darray_size(table->nodes) <= 1) { + return; + } + xkb_keysym_t syms[MAX_LHS_LEN]; + for_each_helper(table, iter, data, syms, 0, 1); +} diff --git a/test/compose-iter.h b/test/compose-iter.h new file mode 100644 index 000000000..320cc06f1 --- /dev/null +++ b/test/compose-iter.h @@ -0,0 +1,26 @@ +#ifndef COMPOSE_LEGACY_ITER_H +#define COMPOSE_LEGACY_ITER_H + +#include "config.h" +#include "src/compose/table.h" + +/** + * The iterator function type used by xkb_compose_table_for_each(). + */ +typedef void +(*xkb_compose_table_iter_t)(struct xkb_compose_table_entry *entry, + void *data); + +/** + * Run a specified function for every valid entry in the table. + * + * The entries are returned in lexicographic order of the left-hand + * side of entries. This does not correspond to the order in which + * the entries appear in the Compose file. + */ +void +xkb_compose_table_for_each(struct xkb_compose_table *table, + xkb_compose_table_iter_t iter, + void *data); + +#endif diff --git a/test/compose.c b/test/compose.c index 56842911b..2bf509099 100644 --- a/test/compose.c +++ b/test/compose.c @@ -32,6 +32,9 @@ #include "src/keysym.h" #include "src/compose/parser.h" #include "src/compose/escape.h" +#include "src/compose/dump.h" +#include "test/shuffle-lines.h" +#include "test/compose-iter.h" static const char * compose_status_string(enum xkb_compose_status status) @@ -717,8 +720,45 @@ test_eq_entry(struct xkb_compose_table_entry *entry, xkb_keysym_t keysym, const return ok; } +static bool +test_eq_entries(struct xkb_compose_table_entry *entry1, struct xkb_compose_table_entry *entry2) +{ + if (!entry1 || !entry2) + goto error; + bool ok = true; + if (entry1->keysym != entry2->keysym || + !streq_null(entry1->utf8, entry2->utf8) || + entry1->sequence_length != entry2->sequence_length) + ok = false; + for (size_t k = 0; k < entry1->sequence_length; k++) { + if (entry1->sequence[k] != entry2->sequence[k]) + ok = false; + } + if (ok) + return true; +error: +#define print_entry(msg, entry) \ + fprintf(stderr, msg); \ + if (entry) \ + print_compose_table_entry(stderr, entry); \ + else \ + fprintf(stderr, "\n"); + print_entry("Expected: ", entry1); + print_entry("Got: ", entry2); +#undef print_entry + return false; +} + static void -test_traverse(struct xkb_context *ctx) +compose_traverse_fn(struct xkb_compose_table_entry *entry_ref, void *data) +{ + struct xkb_compose_table_iterator *iter = (struct xkb_compose_table_iterator *)data; + struct xkb_compose_table_entry *entry = xkb_compose_table_iterator_next(iter); + assert(test_eq_entries(entry_ref, entry)); +} + +static void +test_traverse(struct xkb_context *ctx, size_t quickcheck_loops) { struct xkb_compose_table *table; struct xkb_compose_table_iterator *iter; @@ -796,6 +836,34 @@ test_traverse(struct xkb_context *ctx) xkb_compose_table_iterator_free(iter); xkb_compose_table_unref(table); + + /* QuickCheck: shuffle compose file lines and compare against + * reference implementation */ + char *input = test_read_file("locale/en_US.UTF-8/Compose"); + assert(input); + struct text_line lines[6000]; + size_t input_length = strlen(input); + size_t lines_count = split_lines(input, input_length, lines, ARRAY_SIZE(lines)); + /* Note: we may add additional new line char */ + char *shuffled = calloc(input_length + 1, sizeof(char)); + assert(shuffled); + for (size_t k = 0; k < quickcheck_loops; k++) { + size_t shuffled_length = shuffle_lines(lines, lines_count, shuffled); + table = xkb_compose_table_new_from_buffer(ctx, shuffled, shuffled_length, "", + XKB_COMPOSE_FORMAT_TEXT_V1, + XKB_COMPOSE_COMPILE_NO_FLAGS); + assert(table); + + iter = xkb_compose_table_iterator_new(table); + assert(iter); + xkb_compose_table_for_each(table, compose_traverse_fn, iter); + assert(xkb_compose_table_iterator_next(iter) == NULL); + xkb_compose_table_iterator_free(iter); + + xkb_compose_table_unref(table); + } + free(shuffled); + free(input); } static void @@ -938,6 +1006,15 @@ test_encode_escape_sequences(struct xkb_context *ctx) # undef MAX_CODE_POINTS_COUNT } +/* CLI positional arguments: + * 1. Seed for the pseudo-random generator: + * - Leave it unset or set it to “-” to use current time. + * - Use an integer to set it explicitly. + * 2. Number of quickcheck loops: + * - Leave it unset to use the default. It depends if the `RUNNING_VALGRIND` + * environment variable is set. + * - Use an integer to set it explicitly. + */ int main(int argc, char *argv[]) { @@ -950,7 +1027,7 @@ main(int argc, char *argv[]) /* Initialize pseudo-random generator with program arg or current time */ int seed; - if (argc == 2) { + if (argc >= 2 && !streq(argv[1], "-")) { seed = atoi(argv[1]); } else { seed = (int)time(NULL); @@ -958,6 +1035,16 @@ main(int argc, char *argv[]) fprintf(stderr, "Seed for the pseudo-random generator: %d\n", seed); srand(seed); + /* Determine number of loops for quickchecks */ + size_t quickcheck_loops = 100; /* Default */ + if (argc > 2) { + /* From command-line */ + quickcheck_loops = (size_t)atoi(argv[2]); + } else if (getenv("RUNNING_VALGRIND") != NULL) { + /* Reduce if running Valgrind */ + quickcheck_loops = quickcheck_loops / 20; + } + /* * Ensure no environment variables but “top_srcdir” is set. This ensures * that user Compose file paths are unset before the tests and set @@ -985,7 +1072,7 @@ main(int argc, char *argv[]) test_modifier_syntax(ctx); test_include(ctx); test_override(ctx); - test_traverse(ctx); + test_traverse(ctx, quickcheck_loops); test_string_length(ctx); test_decode_escape_sequences(ctx); test_encode_escape_sequences(ctx); diff --git a/test/shuffle-lines.c b/test/shuffle-lines.c new file mode 100644 index 000000000..12b2fabe9 --- /dev/null +++ b/test/shuffle-lines.c @@ -0,0 +1,89 @@ +/* + * Copyright © 2024 Pierre Le Marre + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "config.h" + +#include +#include +#include "src/utils.h" +#include "test/shuffle-lines.h" + +/* Split string into lines */ +size_t +split_lines(const char *input, size_t input_length, + struct text_line *output, size_t output_length) +{ + const char *start = input; + char *next; + size_t l; + size_t i = 0; + + for (l = 0; i < input_length && l < output_length && *start != '\0'; l++) { + /* Look for newline character */ + next = strchr(start, 0x0a); + output[l].start = start; + if (next == NULL) { + /* Not found: add the rest of the string */ + output[l++].length = strlen(start); + break; + } + output[l].length = (size_t)(next - start) + 1; + start = next + 1; + i += output[l].length; + } + return l; +} + +size_t +shuffle_lines(struct text_line *lines, size_t length, char *output) +{ + /* Shuffle lines in-place using Fisher–Yates algorithm. + * See: https://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle */ + + assert(length < RAND_MAX); + char *out = output; + if (length > 1) { + /* 1. Set the current i to the last line. + * 2. Take a random line j before the current line i. + * 3. Swap the lines i and j. + * 4. Append line i to the output. + * 5. If i is the first line, stop. Else decrease i and go to 2). + */ + for (size_t i = length - 1; i > 0; i--) { + /* Swap current line with random line before it */ + size_t j = (size_t)(rand() % (i+1)); + struct text_line tmp = lines[j]; + lines[j] = lines[i]; + lines[i] = tmp; + /* Append current line */ + memcpy(out, lines[i].start, lines[i].length); + out += lines[i].length; + /* Ensure line ends with newline */ + if (out[-1] != '\n') { + out[0] = '\n'; + out++; + } + } + } + return (size_t)(out - output); +} diff --git a/test/shuffle-lines.h b/test/shuffle-lines.h new file mode 100644 index 000000000..d0c596bf5 --- /dev/null +++ b/test/shuffle-lines.h @@ -0,0 +1,13 @@ +#include + +struct text_line { + const char *start; + size_t length; +}; + +size_t +split_lines(const char *input, size_t input_length, + struct text_line *output, size_t output_length); + +size_t +shuffle_lines(struct text_line *lines, size_t length, char *output); From b8271a626605bf53e889b38a23dbea72dd774f3a Mon Sep 17 00:00:00 2001 From: Pierre Le Marre Date: Sun, 1 Sep 2024 10:57:40 +0200 Subject: [PATCH 3/5] compose: Add roundtrip test parse/dump --- meson.build | 3 +++ test/compose.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+) diff --git a/meson.build b/meson.build index 037f40811..550443583 100644 --- a/meson.build +++ b/meson.build @@ -133,6 +133,9 @@ if cc.has_header_symbol('stdio.h', 'asprintf', prefix: system_ext_define) elif cc.has_header_symbol('stdio.h', 'vasprintf', prefix: system_ext_define) configh_data.set('HAVE_VASPRINTF', 1) endif +if cc.has_header_symbol('stdio.h', 'open_memstream', prefix: system_ext_define) + configh_data.set('HAVE_OPEN_MEMSTREAM', 1) +endif if cc.has_header_symbol('stdlib.h', 'secure_getenv', prefix: system_ext_define) configh_data.set('HAVE_SECURE_GETENV', 1) elif cc.has_header_symbol('stdlib.h', '__secure_getenv', prefix: system_ext_define) diff --git a/test/compose.c b/test/compose.c index 2bf509099..5876206ca 100644 --- a/test/compose.c +++ b/test/compose.c @@ -24,6 +24,7 @@ #include "config.h" #include #include +#include #include "xkbcommon/xkbcommon-compose.h" @@ -1006,6 +1007,63 @@ test_encode_escape_sequences(struct xkb_context *ctx) # undef MAX_CODE_POINTS_COUNT } +/* Roundtrip check: check that a table parsed from a file and the table parsed + * from the dump of the previous table are identical */ +static void +test_roundtrip(struct xkb_context *ctx) +{ +/* TODO: add support for systems without open_memstream */ +#if HAVE_OPEN_MEMSTREAM + bool ok = false; + + /* Parse reference file */ + char *input = test_read_file("locale/en_US.UTF-8/Compose"); + assert(input); + size_t input_length = strlen(input); + struct xkb_compose_table *ref_table = xkb_compose_table_new_from_buffer( + ctx, input, input_length, "", + XKB_COMPOSE_FORMAT_TEXT_V1, + XKB_COMPOSE_COMPILE_NO_FLAGS + ); + free(input); + assert(ref_table); + + /* Dump reference Compose table */ + char *output; + size_t output_length = 0; + FILE *output_file = open_memstream(&output, &output_length); + assert(output_file); + + ok = xkb_compose_table_dump(output_file, ref_table); + fclose(output_file); + assert(input); + + if (!ok) { + free(output); + xkb_compose_table_unref(ref_table); + exit(TEST_SETUP_FAILURE); + } + + /* Parse dumped table */ + struct xkb_compose_table *table = xkb_compose_table_new_from_buffer( + ctx, output, output_length, "", + XKB_COMPOSE_FORMAT_TEXT_V1, + XKB_COMPOSE_COMPILE_NO_FLAGS + ); + free(output); + assert(table); + + /* Check roundtrip by comparing table entries */ + struct xkb_compose_table_iterator *iter = xkb_compose_table_iterator_new(table); + xkb_compose_table_for_each(ref_table, compose_traverse_fn, iter); + assert(xkb_compose_table_iterator_next(iter) == NULL); + + xkb_compose_table_iterator_free(iter); + xkb_compose_table_unref(table); + xkb_compose_table_unref(ref_table); +#endif +} + /* CLI positional arguments: * 1. Seed for the pseudo-random generator: * - Leave it unset or set it to “-” to use current time. @@ -1076,6 +1134,7 @@ main(int argc, char *argv[]) test_string_length(ctx); test_decode_escape_sequences(ctx); test_encode_escape_sequences(ctx); + test_roundtrip(ctx); xkb_context_unref(ctx); return 0; From c83b56f199e5b6adaecf5e4e44f55874aca81944 Mon Sep 17 00:00:00 2001 From: Pierre Le Marre Date: Tue, 30 Jul 2024 14:45:35 +0200 Subject: [PATCH 4/5] compose: Add optional foreach traversal to benchmark This allow us to compare the iterator API to the fastest implementation, `foreach`. --- bench/compose-traversal.c | 33 ++++++++++++++++++++++++++------- meson.build | 10 +++++++++- 2 files changed, 35 insertions(+), 8 deletions(-) diff --git a/bench/compose-traversal.c b/bench/compose-traversal.c index be24beba7..11ff64365 100644 --- a/bench/compose-traversal.c +++ b/bench/compose-traversal.c @@ -23,27 +23,40 @@ #include "config.h" +#include #include #include "xkbcommon/xkbcommon-compose.h" +#include "../test/compose-iter.h" #include "../test/test.h" #include "bench.h" #define BENCHMARK_ITERATIONS 1000 +static void +compose_fn(struct xkb_compose_table_entry *entry, void *data) +{ + assert (entry); +} + +/* Benchmark compose traversal using: + * • the internal recursive function `xkb_compose_table_for_each` if `foreach` is + * is passed as argument to the program; + * • else the iterator API (`xkb_compose_table_iterator_new`, …). + */ int -main(void) +main(int argc, char *argv[]) { struct xkb_context *ctx; char *path; FILE *file; struct xkb_compose_table *table; - struct xkb_compose_table_iterator *iter; - struct xkb_compose_table_entry *entry; struct bench bench; char *elapsed; + bool use_foreach_impl = (argc > 1 && strcmp(argv[1], "foreach") == 0); + ctx = test_get_context(CONTEXT_NO_FLAG); assert(ctx); @@ -68,11 +81,17 @@ main(void) bench_start(&bench); for (int i = 0; i < BENCHMARK_ITERATIONS; i++) { - iter = xkb_compose_table_iterator_new(table); - while ((entry = xkb_compose_table_iterator_next(iter))) { - assert (entry); + if (use_foreach_impl) { + xkb_compose_table_for_each(table, compose_fn, NULL); + } else { + struct xkb_compose_table_iterator *iter; + struct xkb_compose_table_entry *entry; + iter = xkb_compose_table_iterator_new(table); + while ((entry = xkb_compose_table_iterator_next(iter))) { + assert (entry); + } + xkb_compose_table_iterator_free(iter); } - xkb_compose_table_iterator_free(iter); } bench_stop(&bench); diff --git a/meson.build b/meson.build index 550443583..ff3a731d7 100644 --- a/meson.build +++ b/meson.build @@ -917,7 +917,15 @@ benchmark( ) benchmark( 'compose-traversal', - executable('bench-compose-traversal', 'bench/compose-traversal.c', dependencies: test_dep), + executable( + 'bench-compose-traversal', + 'bench/compose-traversal.c', + 'bench/bench.h', + 'test/compose-iter.c', + 'test/compose-iter.h', + 'test/test.h', + dependencies: test_dep + ), env: bench_env, ) benchmark( From 582d68b5bdf6860a9bb38cff3a1659ce81888703 Mon Sep 17 00:00:00 2001 From: Pierre Le Marre Date: Mon, 8 Jul 2024 09:19:00 +0200 Subject: [PATCH 5/5] compose: Make iteration API faster New implementation without explicit direction --- src/compose/table.c | 186 ++++++++++++++++++++++++-------------------- src/compose/table.h | 3 +- 2 files changed, 105 insertions(+), 84 deletions(-) diff --git a/src/compose/table.c b/src/compose/table.c index 40926159c..14261b2e5 100644 --- a/src/compose/table.c +++ b/src/compose/table.c @@ -242,17 +242,13 @@ xkb_compose_table_entry_utf8(struct xkb_compose_table_entry *entry) return entry->utf8; } -enum node_direction { - NODE_LEFT = 0, - NODE_DOWN, - NODE_RIGHT, - NODE_UP -}; +#if MAX_COMPOSE_NODES_LOG2 > 31 +#error "Cannot implement bit field xkb_compose_table_iterator_pending_node.offset" +#endif -struct xkb_compose_table_iterator_cursor { - uint32_t node_offset:30; /* WARNING: ensure it fits MAX_COMPOSE_NODES */ - uint8_t direction:2; /* enum node_direction: current direction - * traversing the tree */ +struct xkb_compose_table_iterator_pending_node { + uint32_t offset:31; + bool processed:1; }; struct xkb_compose_table_iterator { @@ -260,7 +256,7 @@ struct xkb_compose_table_iterator { /* Current entry */ struct xkb_compose_table_entry entry; /* Stack of pending nodes to process */ - darray(struct xkb_compose_table_iterator_cursor) cursors; + darray(struct xkb_compose_table_iterator_pending_node) pending_nodes; }; XKB_EXPORT struct xkb_compose_table_iterator * @@ -282,16 +278,26 @@ xkb_compose_table_iterator_new(struct xkb_compose_table *table) iter->entry.sequence = sequence; iter->entry.sequence_length = 0; - darray_init(iter->cursors); - /* Add first cursor only if there is at least one non-dummy node */ - if (darray_size(iter->table->nodes) > 1) { - const struct xkb_compose_table_iterator_cursor cursor = { - .direction = NODE_LEFT, - /* Offset 0 is a dummy null entry, skip it. */ - .node_offset = 1 - }; - darray_append(iter->cursors, cursor); + darray_init(iter->pending_nodes); + /* Short-circuit if table contains only the dummy entry */ + if (darray_size(table->nodes) == 1) { + return iter; } + /* Add root node */ + struct xkb_compose_table_iterator_pending_node pending = { + .offset = 1, + .processed = false + }; + darray_append(iter->pending_nodes, pending); + const struct compose_node *node = &darray_item(iter->table->nodes, + pending.offset); + + /* Find the first left-most node and store intermediate nodes as pending */ + while (node->lokid) { + pending.offset = node->lokid; + darray_append(iter->pending_nodes, pending); + node = &darray_item(iter->table->nodes, pending.offset); + }; return iter; } @@ -300,7 +306,7 @@ XKB_EXPORT void xkb_compose_table_iterator_free(struct xkb_compose_table_iterator *iter) { xkb_compose_table_unref(iter->table); - darray_free(iter->cursors); + darray_free(iter->pending_nodes); free(iter->entry.sequence); free(iter); } @@ -308,75 +314,89 @@ xkb_compose_table_iterator_free(struct xkb_compose_table_iterator *iter) XKB_EXPORT struct xkb_compose_table_entry * xkb_compose_table_iterator_next(struct xkb_compose_table_iterator *iter) { - /* - * This function takes the following recursive traversal function, - * and makes it non-recursive and resumable. The iter->cursors stack - * is analogous to the call stack, and cursor->direction to the - * instruction pointer of a stack frame. - * - * traverse(xkb_keysym_t *sequence, size_t sequence_length, uint16_t p) { - * if (!p) return - * // cursor->direction == NODE_LEFT - * node = &darray_item(table->nodes, p) - * traverse(sequence, sequence_length, node->lokid) - * // cursor->direction == NODE_DOWN - * sequence[sequence_length++] = node->keysym - * if (node->is_leaf) - * emit(sequence, sequence_length, node->leaf.keysym, table->utf[node->leaf.utf8]) - * else - * traverse(sequence, sequence_length, node->internal.eqkid) - * sequence_length-- - * // cursor->direction == NODE_RIGHT - * traverse(sequence, sequence_length, node->hikid) - * // cursor->direction == NODE_UP - * } + /* Traversal algorithm (simplified): + * 1. Resume last pending node from the stack as the current pending node. + * 2. If the node is not yet processed, go to 5. + * 3. Remove node from the stack and remove last keysym from the entry. + * 4. If there is a right arrow, set it as the current pending node + * (unprocessed) and go to 6; else go to 1. + * 5. Follow down arrow: set the pending node as processed, then: + * a) if it is a leaf, complete the entry and return it. + * b) else append the child node to the stack and set it as the current + * pending node. + * 6. Find the next left-most arrow and store intermediate pending nodes. + * 7. Go to 5. */ - struct xkb_compose_table_iterator_cursor *cursor; + struct xkb_compose_table_iterator_pending_node *pending; const struct compose_node *node; - while (!darray_empty(iter->cursors)) { - cursor = &darray_item(iter->cursors, darray_size(iter->cursors) - 1); - node = &darray_item(iter->table->nodes, cursor->node_offset); + /* Iterator is empty if there is no pending nodes */ + if (unlikely(darray_empty(iter->pending_nodes))) { + return NULL; + } - switch (cursor->direction) { - case NODE_LEFT: - cursor->direction = NODE_DOWN; - if (node->lokid) { - struct xkb_compose_table_iterator_cursor new_cursor = {node->lokid, NODE_LEFT}; - darray_append(iter->cursors, new_cursor); - } - break; - - case NODE_DOWN: - cursor->direction = NODE_RIGHT; - assert (iter->entry.sequence_length <= MAX_LHS_LEN); - iter->entry.sequence[iter->entry.sequence_length] = node->keysym; - iter->entry.sequence_length++; - if (node->is_leaf) { - iter->entry.keysym = node->leaf.keysym; - iter->entry.utf8 = &darray_item(iter->table->utf8, node->leaf.utf8); - return &iter->entry; - } else { - struct xkb_compose_table_iterator_cursor new_cursor = {node->internal.eqkid, NODE_LEFT}; - darray_append(iter->cursors, new_cursor); - } - break; - - case NODE_RIGHT: - cursor->direction = NODE_UP; - iter->entry.sequence_length--; - if (node->hikid) { - struct xkb_compose_table_iterator_cursor new_cursor = {node->hikid, NODE_LEFT}; - darray_append(iter->cursors, new_cursor); + /* Resume to the last element in the stack */ + pending = &darray_item(iter->pending_nodes, + darray_size(iter->pending_nodes) - 1); + node = &darray_item(iter->table->nodes, pending->offset); + + struct xkb_compose_table_iterator_pending_node new = { + .processed = false, + .offset = 0 + }; + + /* Remove processed leaves until an unprocessed right arrow or parent + * is found, then update entry accordingly */ + while (pending->processed) { + /* Remove last keysym */ + iter->entry.sequence_length--; + if (node->hikid) { + /* Follow right arrow: replace current pending node */ + pending->processed = false; + pending->offset = node->hikid; + /* Process child’s left arrow */ + goto node_left; + } else { + /* Remove processed node */ + darray_remove_last(iter->pending_nodes); + if (unlikely(darray_empty(iter->pending_nodes))) { + /* No more nodes to process */ + return NULL; } - break; - - case NODE_UP: - darray_remove_last(iter->cursors); - break; + /* Get parent node */ + pending = &darray_item(iter->pending_nodes, + darray_size(iter->pending_nodes) - 1); + node = &darray_item(iter->table->nodes, pending->offset); } } - return NULL; + while (1) { + /* Follow down arrow */ + pending->processed = true; + iter->entry.sequence[iter->entry.sequence_length] = node->keysym; + iter->entry.sequence_length++; + if (node->is_leaf) { + /* Leaf: return entry */ + iter->entry.keysym = node->leaf.keysym; + iter->entry.utf8 = &darray_item(iter->table->utf8, node->leaf.utf8); + return &iter->entry; + } + /* Not a leaf: process child node */ + new.offset = node->internal.eqkid; + darray_append(iter->pending_nodes, new); + pending = &darray_item(iter->pending_nodes, + darray_size(iter->pending_nodes) - 1); +node_left: + node = &darray_item(iter->table->nodes, pending->offset); + /* Find the next left-most arrow and store intermediate pending nodes */ + while (node->lokid) { + /* Follow left arrow */ + new.offset = node->lokid; + darray_append(iter->pending_nodes, new); + pending = &darray_item(iter->pending_nodes, + darray_size(iter->pending_nodes) - 1); + node = &darray_item(iter->table->nodes, new.offset); + } + } } diff --git a/src/compose/table.h b/src/compose/table.h index 5615db29a..c8a6579a2 100644 --- a/src/compose/table.h +++ b/src/compose/table.h @@ -77,7 +77,8 @@ /* 7 nodes for every potential Unicode character and then some should be * enough for all purposes. */ -#define MAX_COMPOSE_NODES (1 << 23) +#define MAX_COMPOSE_NODES_LOG2 23 +#define MAX_COMPOSE_NODES (1 << MAX_COMPOSE_NODES_LOG2) struct compose_node { xkb_keysym_t keysym;