Skip to content

Commit

Permalink
wip and recognize word-aligned
Browse files Browse the repository at this point in the history
  • Loading branch information
gnprice committed Dec 7, 2024
1 parent c009101 commit 8e2dda7
Showing 1 changed file with 48 additions and 38 deletions.
86 changes: 48 additions & 38 deletions lib/model/emoji.dart
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,19 @@ class EmojiStoreImpl with EmojiStore {
/// (Rather vacuous for the moment; this structure will
/// gain more substance in an upcoming commit.)
enum EmojiMatchQuality {
exact, prefix, other;
/// The query matches the whole emoji name (or the literal emoji itself).
exact,

/// The query matches a prefix of the emoji name, but not the whole name.
prefix,

/// The query matches starting at the start of a word in the emoji name,
/// but not the start of the whole name.
wordAligned,

/// The query matches somewhere in the emoji name,
/// but not at the start of any word.
other;

/// The best possible quality of match.
static const best = exact;
Expand Down Expand Up @@ -374,26 +386,30 @@ class EmojiAutocompleteView extends AutocompleteView<EmojiAutocompleteQuery, Emo
return match == null ? null : EmojiAutocompleteResult(match, candidate);
}

static const _numResultRanks = 7;
static const _numResultRanks = 9;

// Compare sort_emojis in Zulip web:shared/src/typeahead.ts .
// Differences in behavior include:
// Behavior differences we might copy:
// * Web ranks each name of a Unicode emoji separately.
// * Web recognizes a word-aligned match starting after [ /-] as well as [_].
//
// Behavior differences that web should probably fix (of varying impact):
// * Among popular emoji with non-exact matches,
// web doesn't prioritize by quality of match; we do.
// * OTOH web only counts an emoji as "popular" for ranking if the query
// is a prefix of a word in the emoji's name or alias.
// TODO reconcile that; requiring word boundary may be helpful;
// rejecting query that spans words seems unhelpful.
// * TODO web ranks matches starting at a word boundary ahead of other
// non-prefix matches; we don't yet.
// * Web distinguishes prefix matches by case-sensitive vs. not; we don't.
// That doesn't seem helpful for emoji search.
// web doesn't prioritize prefix over word-aligned; we do.
// (This affects just one case: for query "o",
// we put :octopus: before :working_on_it:.)
// * Web only counts an emoji as "popular" for ranking if the query
// is a prefix of a single word in the name; so "thumbs_" or "working_on_i"
// lose the ranking boost for :thumbs_up: and :working_on_it: respectively.
// * Web starts with only case-sensitive exact matches ("perfect matches"),
// and puts case-insensitive exact matches just ahead of prefix matches;
// it also distinguishes prefix matches by case-sensitive vs. not.
// We use case-insensitive matches throughout;
// case seems unhelpful for emoji search.
// * Web suppresses Unicode emoji names shadowed by a realm emoji
// only if the latter is also a match for the query. That mostly works,
// because emoji with the same name will mostly both match or both not;
// but it breaks if the Unicode emoji was a literal match.
// TODO(web): that's a bug
static int _rankResult(EmojiAutocompleteResult result) {
if (result.matchQuality == EmojiMatchQuality.exact) {
return 0;
Expand All @@ -408,10 +424,10 @@ class EmojiAutocompleteView extends AutocompleteView<EmojiAutocompleteQuery, Emo
ReactionType.unicodeEmoji => false,
};
return switch (result.matchQuality) {
EmojiMatchQuality.exact => throw Error(), // handled above
EmojiMatchQuality.prefix => isPopular ? 1 : isCustomEmoji ? 3 : 4,
// TODO word-boundary vs. not
EmojiMatchQuality.other => isPopular ? 2 : isCustomEmoji ? 5 : 6,
EmojiMatchQuality.exact => throw Error(), // handled above
EmojiMatchQuality.prefix => isPopular ? 1 : isCustomEmoji ? 3 : 5,
EmojiMatchQuality.wordAligned => isPopular ? 2 : isCustomEmoji ? 4 : 6,
EmojiMatchQuality.other => isCustomEmoji ? 7 : 8,
};
}

Expand Down Expand Up @@ -460,29 +476,23 @@ class EmojiAutocompleteQuery extends ComposeAutocompleteQuery {
}

EmojiMatchQuality? _matchName(String emojiName) {
if (emojiName == _adjusted) return EmojiMatchQuality.exact;
if (emojiName.startsWith(_adjusted)) return EmojiMatchQuality.prefix;
if (_nameMatches(emojiName)) return EmojiMatchQuality.other;
return null;
}

// Compare query_matches_string_in_order in Zulip web:shared/src/typeahead.ts .
bool _nameMatches(String emojiName) {
// Compare query_matches_string_in_order in Zulip web:shared/src/typeahead.ts .
// TODO(#1067) this assumes emojiName is already lower-case (and no diacritics)
const String separator = '_';

if (!_adjusted.contains(separator)) {
// If the query is a single token (doesn't contain a separator),
// the match can be anywhere in the string.
return emojiName.contains(_adjusted);
if (emojiName == _adjusted) return EmojiMatchQuality.exact;
if (emojiName.startsWith(_adjusted)) return EmojiMatchQuality.prefix;
const String sep = '_';
if (emojiName.contains(sep + _adjusted)) return EmojiMatchQuality.wordAligned;
if (_adjusted.contains(sep)) {
// If there is a separator in the query, then we
// require the match to start at the start of a token.
// (E.g. for 'ab_cd_ef', query could be 'ab_c' or 'cd_ef',
// but not 'b_cd_ef'.)
return null;
}

// If there is a separator in the query, then we
// require the match to start at the start of a token.
// (E.g. for 'ab_cd_ef', query could be 'ab_c' or 'cd_ef',
// but not 'b_cd_ef'.)
assert(!emojiName.startsWith(_adjusted)); // checked before calling this method
return emojiName.contains(separator + _adjusted);
// If the query is a single token (doesn't contain a separator),
// the match can be anywhere in the string.
if (emojiName.contains(_adjusted)) return EmojiMatchQuality.other;
return null;
}

@override
Expand Down

0 comments on commit 8e2dda7

Please sign in to comment.