Skip to content

Commit

Permalink
Compose: allow overlapping sequences
Browse files Browse the repository at this point in the history
Currently xkbcommon does not support Compose overlapping sequences,
unlike GTK and ibus. It means a sequence that is a prefix to a longer
one is simply discarded with a warning.

This is unfortunate:
- There is discrepency for users of xkbcommon, e.g. Qt-based apps.
- It is impossible to have a sequence that is the prefix of another one.
  As a consequence, if one imports e.g. the system locale Compose file
  (as many – most ? – custom Compose files do), some sequences become
  impossible.

  Example: I used `<Multi_key> <minus> <period> : "•" U2022` but
  upstream change in libX11 added `<Multi_key> <minus> <period> <e> : "ė̄"`,
  so I had to introduce an alternative sequence for Qt apps, although
  the previous one works in GTK-based apps.

This commit introduces the new following API:
- `xkb_compose_compile_flags`:
  `XKB_COMPOSE_COMPILE_OVERLAPPING_SEQUENCES`: Allow overlapping sequences
- `xkb_compose_status`:
  `XKB_COMPOSE_CANDIDATE`: A complete sequence has been matched, but a
  longer sequence also exists.
  • Loading branch information
wismill committed Nov 1, 2023
1 parent e9fc965 commit 0a4f1a9
Show file tree
Hide file tree
Showing 13 changed files with 421 additions and 95 deletions.
67 changes: 54 additions & 13 deletions include/xkbcommon/xkbcommon-compose.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,14 +83,23 @@ extern "C" {
* @page compose-conflicting Conflicting Sequences
* @parblock
*
* To avoid ambiguity, a sequence is not allowed to be a prefix of another.
* Sequences of length 1 are allowed.
*
* <b><em>Without</em> `XKB_COMPOSE_COMPILE_OVERLAPPING_SEQUENCES`</b>
*
* To avoid ambiguity, a sequence is *not* allowed to be a prefix of another.
* In such a case, the conflict is resolved thus:
*
* 1. A longer sequence overrides a shorter one.
* 2. An equal sequence overrides an existing one.
* 3. A shorter sequence does not override a longer one.
*
* Sequences of length 1 are allowed.
* <b><em>With</em> `XKB_COMPOSE_COMPILE_OVERLAPPING_SEQUENCES`</b>
*
* Overlapping sequences of different lengths are *allowed* to co-exist.
* Conflicts are resolved with the following rule:
*
* 1. An equal sequence overrides an existing one.
*
* @endparblock
*/
Expand Down Expand Up @@ -141,7 +150,11 @@ struct xkb_compose_state;
/** Flags affecting Compose file compilation. */
enum xkb_compose_compile_flags {
/** Do not apply any flags. */
XKB_COMPOSE_COMPILE_NO_FLAGS = 0
XKB_COMPOSE_COMPILE_NO_FLAGS = 0,
/** Allow overlapping sequences
* @since 1.7.0
*/
XKB_COMPOSE_COMPILE_OVERLAPPING_SEQUENCES
};

/** The recognized Compose file formats. */
Expand Down Expand Up @@ -450,7 +463,7 @@ enum xkb_compose_state_flags {
* @param flags
* Optional flags for the compose state, or 0.
*
* @returns A new compose state, or NULL on failure.
* @returns A new compose state, or `NULL` on failure.
*
* @memberof xkb_compose_state
*/
Expand All @@ -471,7 +484,7 @@ xkb_compose_state_ref(struct xkb_compose_state *state);
/**
* Release a reference on a compose state object, and possibly free it.
*
* @param state The object. If NULL, do nothing.
* @param state The object. If `NULL`, do nothing.
*
* @memberof xkb_compose_state
*/
Expand Down Expand Up @@ -499,6 +512,16 @@ enum xkb_compose_status {
XKB_COMPOSE_NOTHING,
/** In the middle of a sequence. */
XKB_COMPOSE_COMPOSING,
/** A complete sequence has been matched, but a longer sequence also exists.
*
* @since 1.7.0
*/
XKB_COMPOSE_CANDIDATE,
/** The last sequence was accepted due to an unmatched keysym.
*
* @since 1.7.0
*/
XKB_COMPOSE_CANDIDATE_ACCEPTED,
/** A complete sequence has been matched. */
XKB_COMPOSE_COMPOSED,
/** The last sequence was cancelled due to an unmatched keysym. */
Expand All @@ -524,27 +547,45 @@ enum xkb_compose_feed_result {
* have no effect on the status or otherwise.
*
* The following is a description of the possible status transitions, in
* the format CURRENT STATUS => NEXT STATUS, given a non-ignored input
* keysym `keysym`:
* the format `CURRENT STATUS` => `NEXT STATUS`, given a non-ignored
* input keysym `keysym`:
*
@verbatim
NOTHING or CANCELLED or COMPOSED =>
NOTHING or CANCELLED or COMPOSED or CANDIDATE_ACCEPTED =>
NOTHING if keysym does not start a sequence.
COMPOSING if keysym starts a sequence.
CANDIDATE if keysym starts and terminates a single-keysym sequence,
but a longer sequence also exists.
COMPOSED if keysym starts and terminates a single-keysym sequence.
COMPOSING =>
COMPOSING if keysym advances any of the currently possible
sequences but does not terminate any of them.
CANDIDATE if keysym terminates one of the currently possible
sequences, but a longer sequence also exists.
COMPOSED if keysym terminates one of the currently possible
sequences.
CANCELLED if keysym does not advance any of the currently
possible sequences.
CANDIDATE =>
COMPOSING if keysym advances any of the currently possible
sequences but does not terminate any of them.
CANDIDATE if keysym terminates one of the currently possible
sequences, but a longer sequence also exists.
COMPOSED if keysym terminates one of the currently possible
sequences.
CANDIDATE_ACCEPTED
if keysym does not advance any of the currently
possible sequences, but a candidate was proposed previously.
@endverbatim
*
* @note `CANDIDATE` and `CANDIDATE_ACCEPTED` are only possible when compiling
* using `XKB_COMPOSE_COMPILE_OVERLAPPING_SEQUENCES`.
*
* The current Compose formats do not support multiple-keysyms.
* Therefore, if you are using a function such as xkb_state_key_get_syms()
* and it returns more than one keysym, consider feeding XKB_KEY_NoSymbol
* and it returns more than one keysym, consider feeding `XKB_KEY_NoSymbol`
* instead.
*
* @param state
Expand All @@ -565,7 +606,7 @@ xkb_compose_state_feed(struct xkb_compose_state *state,
/**
* Reset the Compose sequence state machine.
*
* The status is set to XKB_COMPOSE_NOTHING, and the current sequence
* The status is set to `XKB_COMPOSE_NOTHING`, and the current sequence
* is discarded.
*
* @memberof xkb_compose_state
Expand All @@ -586,7 +627,7 @@ xkb_compose_state_get_status(struct xkb_compose_state *state);
* Get the result Unicode/UTF-8 string for a composed sequence.
*
* See @ref compose-overview for more details. This function is only
* useful when the status is XKB_COMPOSE_COMPOSED.
* useful when the status is `XKB_COMPOSE_COMPOSED` or `XKB_COMPOSE_CANDIDATE`.
*
* @param[in] state
* The compose state.
Expand Down Expand Up @@ -618,10 +659,10 @@ xkb_compose_state_get_utf8(struct xkb_compose_state *state,
* Get the result keysym for a composed sequence.
*
* See @ref compose-overview for more details. This function is only
* useful when the status is XKB_COMPOSE_COMPOSED.
* useful when the status is `XKB_COMPOSE_COMPOSED` or `XKB_COMPOSE_CANDIDATE`.
*
* @returns The result keysym. If the sequence is not complete, or does
* not specify a result keysym, returns XKB_KEY_NoSymbol.
* not specify a result keysym, returns `XKB_KEY_NoSymbol`.
*
* @memberof xkb_compose_state
**/
Expand Down
102 changes: 84 additions & 18 deletions src/compose/parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -347,13 +347,23 @@ add_production(struct xkb_compose_table *table, struct scanner *s,
uint32_t curr = darray_size(table->nodes) == 1 ? 0 : 1;
uint32_t *pptr = NULL;
struct compose_node *node = NULL;
bool allow_overlapping;

/* Warn before potentially going over the limit, discard silently after. */
if (darray_size(table->nodes) + production->len + MAX_LHS_LEN > MAX_COMPOSE_NODES)
// TODO: adapt limit if overlapping is disallowed?
/*
* Warn before potentially going over the limit, discard silently after.
*
* We may add up to production->len * 2 - 1 nodes:
* • one node per keysym in the sequence
* • plus one node per keysym for overlap, except for the last node.
*/
if (darray_size(table->nodes) + production->len * 2 - 1 + MAX_LHS_LEN > MAX_COMPOSE_NODES)
scanner_warn(s, "too many sequences for one Compose file; will ignore further lines");
if (darray_size(table->nodes) + production->len >= MAX_COMPOSE_NODES)
if (darray_size(table->nodes) + production->len * 2 - 1 >= MAX_COMPOSE_NODES)
return;

allow_overlapping = !!(table->flags & XKB_COMPOSE_COMPILE_OVERLAPPING_SEQUENCES);

/*
* Insert the sequence to the ternary search tree, creating new nodes as
* needed.
Expand All @@ -376,8 +386,9 @@ add_production(struct xkb_compose_table *table, struct scanner *s,
.lokid = 0,
.hikid = 0,
.internal = {
.eqkid = 0,
.resid = 0,
.is_leaf = false,
.eqkid = 0,
},
};
curr = darray_size(table->nodes);
Expand All @@ -397,47 +408,102 @@ add_production(struct xkb_compose_table *table, struct scanner *s,
pptr = &node->hikid;
curr = node->hikid;
} else if (!last) {
/* Adding intermediate node */
if (node->is_leaf) {
scanner_warn(s, "a sequence already exists which is a prefix of this sequence; overriding");
node->internal.eqkid = 0;
/* Existing leaf */
if (allow_overlapping) {
/* Backup overlapping sequence result */
struct compose_node overlapping = {
.keysym = node->keysym,
.lokid = 0,
.hikid = 0,
.leaf = node->leaf
};
darray_append(table->nodes, overlapping);
node = &darray_item(table->nodes, curr);
node->internal.resid = darray_size(table->nodes) - 1;
} else {
scanner_warn(s, "a sequence already exists which is a prefix of this sequence; overriding");
node->internal.resid = 0;
}
/* Reset node */
node->internal.is_leaf = false;
node->internal.eqkid = 0;
}
lhs_pos++;
pptr = &node->internal.eqkid;
curr = node->internal.eqkid;
} else {
/* Adding the last node of the sequence and the result */
struct compose_node *result = NULL;
bool has_previous_leaf;
if (node->is_leaf) {
/* Existing leaf */
has_previous_leaf = true;
result = node;
} else if (node->internal.eqkid != 0) {
/* Existing non-leaf */
if (!allow_overlapping) {
scanner_warn(s, "this compose sequence is a prefix of another; skipping line");
return;
} else if (node->internal.resid) {
/* Reuse existing overlapping sequence result */
result = &darray_item(table->nodes, node->internal.resid);
has_previous_leaf = true;
} else {
/* Create a new overlapping sequence result */
node->internal.resid = darray_size(table->nodes);
struct compose_node overlapping = {
.keysym = node->keysym,
.lokid = 0,
.hikid = 0,
.leaf = {
.utf8 = 0,
.is_leaf = true,
.keysym = XKB_KEY_NoSymbol
}
};
darray_append(table->nodes, overlapping);
node = &darray_item(table->nodes, curr);
result = &darray_item(table->nodes,
node->internal.resid);
has_previous_leaf = false;
}
} else {
/* New leaf */
has_previous_leaf = false;
node->is_leaf = true;
result = node;
}
if (has_previous_leaf) {
bool same_string =
(node->leaf.utf8 == 0 && !production->has_string) ||
(result->leaf.utf8 == 0 && !production->has_string) ||
(
node->leaf.utf8 != 0 && production->has_string &&
streq(&darray_item(table->utf8, node->leaf.utf8),
result->leaf.utf8 != 0 && production->has_string &&
streq(&darray_item(table->utf8, result->leaf.utf8),
production->string)
);
bool same_keysym =
(node->leaf.keysym == XKB_KEY_NoSymbol && !production->has_keysym) ||
(result->leaf.keysym == XKB_KEY_NoSymbol && !production->has_keysym) ||
(
node->leaf.keysym != XKB_KEY_NoSymbol && production->has_keysym &&
node->leaf.keysym == production->keysym
result->leaf.keysym != XKB_KEY_NoSymbol && production->has_keysym &&
result->leaf.keysym == production->keysym
);
if (same_string && same_keysym) {
scanner_warn(s, "this compose sequence is a duplicate of another; skipping line");
return;
} else {
scanner_warn(s, "this compose sequence already exists; overriding");
}
} else if (node->internal.eqkid != 0) {
scanner_warn(s, "this compose sequence is a prefix of another; skipping line");
return;
}
node->is_leaf = true;
result->is_leaf = true;
if (production->has_string) {
node->leaf.utf8 = darray_size(table->utf8);
result->leaf.utf8 = darray_size(table->utf8);
darray_append_items(table->utf8, production->string,
strlen(production->string) + 1);
}
if (production->has_keysym) {
node->leaf.keysym = production->keysym;
result->leaf.keysym = production->keysym;
}
return;
}
Expand Down
33 changes: 24 additions & 9 deletions src/compose/state.c
Original file line number Diff line number Diff line change
Expand Up @@ -143,14 +143,18 @@ xkb_compose_state_get_status(struct xkb_compose_state *state)
prev_node = &darray_item(state->table->nodes, state->prev_context);
node = &darray_item(state->table->nodes, state->context);

if (state->context == 0 && !prev_node->is_leaf)
return XKB_COMPOSE_CANCELLED;

if (state->context == 0)
if (state->context == 0) {
if (!prev_node->is_leaf)
return prev_node->internal.resid
? XKB_COMPOSE_CANDIDATE_ACCEPTED
: XKB_COMPOSE_CANCELLED;
return XKB_COMPOSE_NOTHING;
}

if (!node->is_leaf)
return XKB_COMPOSE_COMPOSING;
return node->internal.resid
? XKB_COMPOSE_CANDIDATE
: XKB_COMPOSE_COMPOSING;

return XKB_COMPOSE_COMPOSED;
}
Expand All @@ -162,8 +166,14 @@ xkb_compose_state_get_utf8(struct xkb_compose_state *state,
const struct compose_node *node =
&darray_item(state->table->nodes, state->context);

if (!node->is_leaf)
goto fail;
if (!node->is_leaf) {
if (node->internal.resid) {
node = &darray_item(state->table->nodes,
node->internal.resid);
} else {
goto fail;
}
}

/* If there's no string specified, but only a keysym, try to do the
* most helpful thing. */
Expand Down Expand Up @@ -195,7 +205,12 @@ xkb_compose_state_get_one_sym(struct xkb_compose_state *state)
{
const struct compose_node *node =
&darray_item(state->table->nodes, state->context);
if (!node->is_leaf)
if (node->is_leaf) {
return node->leaf.keysym;
} else if (node->internal.resid) {
return darray_item(state->table->nodes,
node->internal.resid).leaf.keysym;
} else {
return XKB_KEY_NoSymbol;
return node->leaf.keysym;
}
}
Loading

0 comments on commit 0a4f1a9

Please sign in to comment.