From 37652dbc9370492e000758f75b73aee7397f324a Mon Sep 17 00:00:00 2001 From: Greg Price Date: Fri, 6 Dec 2024 21:45:33 -0800 Subject: [PATCH] emoji: Rank by quality of match (exact, prefix, other) Fixes part of #1068. --- lib/model/emoji.dart | 79 +++++++++++++++----- test/model/emoji_test.dart | 144 +++++++++++++++++++++++++++---------- 2 files changed, 167 insertions(+), 56 deletions(-) diff --git a/lib/model/emoji.dart b/lib/model/emoji.dart index eb12d9904f..2e365f812f 100644 --- a/lib/model/emoji.dart +++ b/lib/model/emoji.dart @@ -223,6 +223,9 @@ class EmojiStoreImpl with EmojiStore { } List _generateAllCandidates() { + // See also [EmojiAutocompleteQuery._rankResult]; + // that ranking takes precedence over the order of this list. + // // Compare `emoji_picker.rebuild_catalog` in Zulip web; // `composebox_typeahead.update_emoji_data` which receives its output; // and `emoji.update_emojis` which builds part of its input. @@ -321,20 +324,30 @@ class EmojiStoreImpl with EmojiStore { } /// The quality of an emoji's match to an autocomplete query. -/// -/// (Rather vacuous for the moment; this structure will -/// gain more substance in an upcoming commit.) enum EmojiMatchQuality { - match; + /// The query matches the whole emoji name (or the literal emoji itself). + exact, + + /// The query matches a prefix of the emoji name, but not the whole name. + prefix, + + /// The query matches somewhere in the emoji name, but not at the start. + other; /// The best possible quality of match. - static const best = match; + static const best = exact; /// The better of the two given qualities of match, /// where null represents no match at all. static EmojiMatchQuality? bestOf(EmojiMatchQuality? a, EmojiMatchQuality? b) { if (b == null) return a; - return b; + if (a == null) return b; + return compare(a, b) <= 0 ? a : b; + } + + /// Comparator that puts better matches first. + static int compare(EmojiMatchQuality a, EmojiMatchQuality b) { + return Enum.compareByIndex(a, b); } } @@ -404,11 +417,11 @@ class EmojiAutocompleteQuery extends ComposeAutocompleteQuery { // Compare get_emoji_matcher in Zulip web:shared/src/typeahead.ts . @visibleForTesting EmojiMatchQuality? match(EmojiCandidate candidate) { - if (_adjusted == '') return EmojiMatchQuality.match; + if (_adjusted == '') return EmojiMatchQuality.prefix; if (candidate.emojiDisplay case UnicodeEmojiDisplay(:var emojiUnicode)) { if (_adjusted == emojiUnicode) { - return EmojiMatchQuality.match; + return EmojiMatchQuality.exact; } } @@ -421,13 +434,20 @@ class EmojiAutocompleteQuery extends ComposeAutocompleteQuery { } EmojiMatchQuality? _matchName(String emojiName) { - return _nameMatches(emojiName) ? EmojiMatchQuality.match : null; - } + // Compare query_matches_string_in_order in Zulip web:shared/src/typeahead.ts + // for a Boolean version of this logic (match vs. no match), + // and triage_raw in the same file web:shared/src/typeahead.ts + // for the finer distinctions. + // See also commentary in [_rankResult]. - // Compare query_matches_string_in_order in Zulip web:shared/src/typeahead.ts . - bool _nameMatches(String emojiName) { // TODO(#1067) this assumes emojiName is already lower-case (and no diacritics) + if (emojiName == _adjusted) return EmojiMatchQuality.exact; + if (emojiName.startsWith(_adjusted)) return EmojiMatchQuality.prefix; + if (_nameMatches(emojiName)) return EmojiMatchQuality.other; + return null; + } + bool _nameMatches(String emojiName) { if (!_adjusted.contains(_separator)) { // If the query is a single token (doesn't contain a separator), // the match can be anywhere in the string. @@ -438,21 +458,46 @@ class EmojiAutocompleteQuery extends ComposeAutocompleteQuery { // require the match to start at the start of a token. // (E.g. for 'ab_cd_ef', query could be 'ab_c' or 'cd_ef', // but not 'b_cd_ef'.) - return emojiName.startsWith(_adjusted) - || emojiName.contains(_sepAdjusted); + assert(!emojiName.startsWith(_adjusted)); // checked before calling this method + return emojiName.contains(_sepAdjusted); } /// A measure of the result's quality in the context of the query, /// ranked from 0 (best) to one less than [_numResultRanks]. static int _rankResult(EmojiMatchQuality matchQuality) { - // TODO(#1068): rank emoji results (popular, realm, other; exact match, prefix, other) + // See also [EmojiStoreImpl._generateAllCandidates]; + // emoji which this function ranks equally + // will appear in the order they were put in by that method. + // + // Compare sort_emojis in Zulip web: + // https://github.com/zulip/zulip/blob/83a121c7e/web/shared/src/typeahead.ts#L322-L382 + // + // Behavior differences we should or might copy, TODO(#1068): + // * Web ranks popular emoji > custom emoji > others; we don't yet. + // * Web ranks matches starting at a word boundary ahead of + // other non-prefix matches; we don't yet. + // * Web ranks each name of a Unicode emoji separately. + // + // Behavior differences that web should probably fix, TODO(web): + // * Web starts with only case-sensitive exact matches ("perfect matches"), + // and puts case-insensitive exact matches just ahead of prefix matches; + // it also distinguishes prefix matches by case-sensitive vs. not. + // We use case-insensitive matches throughout; + // case seems unhelpful for emoji search. + // * Web suppresses Unicode emoji names shadowed by a realm emoji + // only if the latter is also a match for the query. That mostly works, + // because emoji with the same name will mostly both match or both not; + // but it breaks if the Unicode emoji was a literal match. + return switch (matchQuality) { - EmojiMatchQuality.match => 0, + EmojiMatchQuality.exact => 0, + EmojiMatchQuality.prefix => 1, + EmojiMatchQuality.other => 2, }; } /// The number of possible values returned by [_rankResult]. - static const _numResultRanks = 1; + static const _numResultRanks = 3; @override String toString() { diff --git a/test/model/emoji_test.dart b/test/model/emoji_test.dart index 054d9ca5dc..416b6ea7b7 100644 --- a/test/model/emoji_test.dart +++ b/test/model/emoji_test.dart @@ -307,9 +307,44 @@ void main() { check(view.results).single.which( isUnicodeResult(names: ['smile'])); }); + + Future> resultsOf( + String query, { + Map realmEmoji = const {}, + Map>? unicodeEmoji, + }) async { + final store = prepare(realmEmoji: realmEmoji, unicodeEmoji: unicodeEmoji); + final view = EmojiAutocompleteView.init(store: store, + query: EmojiAutocompleteQuery(query)); + bool done = false; + view.addListener(() { done = true; }); + await Future(() {}); + check(done).isTrue(); + return view.results; + } + + test('results end-to-end', () async { + final unicodeEmoji = { + '1f4d3': ['notebook'], '1f516': ['bookmark'], '1f4d6': ['book']}; + + // Empty query -> base ordering. + check(await resultsOf('', unicodeEmoji: unicodeEmoji)).deepEquals([ + isUnicodeResult(names: ['notebook']), + isUnicodeResult(names: ['bookmark']), + isUnicodeResult(names: ['book']), + isZulipResult(), + ]); + + // With query, exact match precedes prefix match precedes other. + check(await resultsOf('book', unicodeEmoji: unicodeEmoji)).deepEquals([ + isUnicodeResult(names: ['book']), + isUnicodeResult(names: ['bookmark']), + isUnicodeResult(names: ['notebook']), + ]); + }); }); - group('EmojiAutocompleteQuery.match', () { + group('EmojiAutocompleteQuery', () { EmojiCandidate unicode(List names, {String? emojiCode}) { emojiCode ??= '10ffff'; return EmojiCandidate(emojiType: ReactionType.unicodeEmoji, @@ -333,63 +368,71 @@ void main() { } test('one-word query matches anywhere in name', () { - check(matchOfName('', 'smile')).match; - check(matchOfName('s', 'smile')).match; - check(matchOfName('sm', 'smile')).match; - check(matchOfName('smile', 'smile')).match; - check(matchOfName('m', 'smile')).match; - check(matchOfName('mile', 'smile')).match; - check(matchOfName('e', 'smile')).match; + check(matchOfName('', 'smile')).prefix; + check(matchOfName('s', 'smile')).prefix; + check(matchOfName('sm', 'smile')).prefix; + check(matchOfName('smile', 'smile')).exact; + check(matchOfName('m', 'smile')).other; + check(matchOfName('mile', 'smile')).other; + check(matchOfName('e', 'smile')).other; check(matchOfName('smiley', 'smile')).none; check(matchOfName('a', 'smile')).none; - check(matchOfName('o', 'open_book')).match; - check(matchOfName('open', 'open_book')).match; - check(matchOfName('pe', 'open_book')).match; - check(matchOfName('boo', 'open_book')).match; - check(matchOfName('ok', 'open_book')).match; + check(matchOfName('o', 'open_book')).prefix; + check(matchOfName('open', 'open_book')).prefix; + check(matchOfName('pe', 'open_book')).other; + check(matchOfName('boo', 'open_book')).other; + check(matchOfName('ok', 'open_book')).other; }); test('multi-word query matches from start of a word', () { - check(matchOfName('open_', 'open_book')).match; - check(matchOfName('open_b', 'open_book')).match; - check(matchOfName('open_book', 'open_book')).match; + check(matchOfName('open_', 'open_book')).prefix; + check(matchOfName('open_b', 'open_book')).prefix; + check(matchOfName('open_book', 'open_book')).exact; check(matchOfName('pen_', 'open_book')).none; check(matchOfName('n_b', 'open_book')).none; - check(matchOfName('blue_dia', 'large_blue_diamond')).match; + check(matchOfName('blue_dia', 'large_blue_diamond')).other; }); test('spaces in query behave as underscores', () { - check(matchOfName('open ', 'open_book')).match; - check(matchOfName('open b', 'open_book')).match; - check(matchOfName('open book', 'open_book')).match; + check(matchOfName('open ', 'open_book')).prefix; + check(matchOfName('open b', 'open_book')).prefix; + check(matchOfName('open book', 'open_book')).exact; check(matchOfName('pen ', 'open_book')).none; check(matchOfName('n b', 'open_book')).none; - check(matchOfName('blue dia', 'large_blue_diamond')).match; + check(matchOfName('blue dia', 'large_blue_diamond')).other; }); test('query is lower-cased', () { - check(matchOfName('Smi', 'smile')).match; + check(matchOfName('Smi', 'smile')).prefix; }); test('query matches aliases same way as primary name', () { - check(matchOfNames('a', ['a', 'b'])).match; - check(matchOfNames('b', ['a', 'b'])).match; + check(matchOfNames('a', ['a', 'b'])).exact; + check(matchOfNames('b', ['a', 'b'])).exact; check(matchOfNames('c', ['a', 'b'])).none; - check(matchOfNames('pe', ['x', 'open_book'])).match; - check(matchOfNames('ok', ['x', 'open_book'])).match; + check(matchOfNames('pe', ['x', 'open_book'])).other; + check(matchOfNames('ok', ['x', 'open_book'])).other; - check(matchOfNames('open_', ['x', 'open_book'])).match; - check(matchOfNames('open b', ['x', 'open_book'])).match; + check(matchOfNames('open_', ['x', 'open_book'])).prefix; + check(matchOfNames('open b', ['x', 'open_book'])).prefix; check(matchOfNames('pen_', ['x', 'open_book'])).none; - check(matchOfNames('Smi', ['x', 'smile'])).match; + check(matchOfNames('Smi', ['x', 'smile'])).prefix; + }); + + test('best match among name and aliases prevails', () { + check(matchOfNames('a', ['ab', 'a', 'ba', 'x'])).exact; + check(matchOfNames('a', ['ba', 'ab', 'x'])).prefix; + check(matchOfNames('a', ['ba', 'ab'])).prefix; + check(matchOfNames('a', ['ba', 'x'])).other; + check(matchOfNames('a', ['x', 'y', 'z'])).none; }); test('query matches literal Unicode value', () { @@ -403,13 +446,13 @@ void main() { check(matchOfLiteral('1f642', aka: '1f642', '1f642')).none; // Matching the Unicode value the code describes does countโ€ฆ - check(matchOfLiteral('๐Ÿ™‚', aka: '\u{1f642}', '1f642')).match; + check(matchOfLiteral('๐Ÿ™‚', aka: '\u{1f642}', '1f642')).exact; // โ€ฆ and failing to match it doesn't make a match. check(matchOfLiteral('๐Ÿ™', aka: '\u{1f641}', '1f642')).none; // Multi-code-point emoji work fine. check(matchOfLiteral('๐Ÿณโ€๐ŸŒˆ', aka: '\u{1f3f3}\u{200d}\u{1f308}', - '1f3f3-200d-1f308')).match; + '1f3f3-200d-1f308')).exact; // Only exact matches count; no partial matches. check(matchOfLiteral('๐Ÿณ', aka: '\u{1f3f3}', '1f3f3-200d-1f308')).none; @@ -430,11 +473,11 @@ void main() { resolvedStillUrl: eg.realmUrl.resolve('/emoji/1-still.png'))); } - check(matchOf('eqeq', realmCandidate('eqeq'))).match; - check(matchOf('open_', realmCandidate('open_book'))).match; + check(matchOf('eqeq', realmCandidate('eqeq'))).exact; + check(matchOf('open_', realmCandidate('open_book'))).prefix; check(matchOf('n_b', realmCandidate('open_book'))).none; - check(matchOf('blue dia', realmCandidate('large_blue_diamond'))).match; - check(matchOf('Smi', realmCandidate('smile'))).match; + check(matchOf('blue dia', realmCandidate('large_blue_diamond'))).other; + check(matchOf('Smi', realmCandidate('smile'))).prefix; }); test('can match Zulip extra emoji', () { @@ -446,11 +489,32 @@ void main() { emojiType: ReactionType.zulipExtraEmoji, emojiCode: 'zulip', emojiName: 'zulip')); - check(matchOf('z', zulipCandidate)).match; - check(matchOf('Zulip', zulipCandidate)).match; - check(matchOf('p', zulipCandidate)).match; + check(matchOf('z', zulipCandidate)).prefix; + check(matchOf('Zulip', zulipCandidate)).exact; + check(matchOf('p', zulipCandidate)).other; check(matchOf('x', zulipCandidate)).none; }); + + int? rankOf(String query, EmojiCandidate candidate) { + return EmojiAutocompleteQuery(query).testCandidate(candidate)?.rank; + } + + void checkPrecedes(String query, EmojiCandidate a, EmojiCandidate b) { + check(rankOf(query, a)!).isLessThan(rankOf(query, b)!); + } + + test('ranks exact before prefix before other match', () { + checkPrecedes('o', unicode(['o']), unicode(['onion'])); + checkPrecedes('o', unicode(['onion']), unicode(['book'])); + }); + + test('full list of ranks', () { + check([ + rankOf('o', unicode(['o'])), // exact + rankOf('o', unicode(['onion'])), // prefix + rankOf('o', unicode(['book'])), // other + ]).deepEquals([0, 1, 2]); + }); }); } @@ -476,7 +540,9 @@ extension EmojiCandidateChecks on Subject { } extension EmojiMatchQualityChecks on Subject { - void get match => equals(EmojiMatchQuality.match); + void get exact => equals(EmojiMatchQuality.exact); + void get prefix => equals(EmojiMatchQuality.prefix); + void get other => equals(EmojiMatchQuality.other); void get none => isNull(); }