From d3681263736604b60b3d20bfbe2dbdaf709e9cec Mon Sep 17 00:00:00 2001 From: Greg Price Date: Sat, 7 Dec 2024 01:09:03 -0800 Subject: [PATCH] emoji: Recognize word-aligned matches in ranking --- lib/model/emoji.dart | 50 +++++++++++++++--------------- test/model/emoji_test.dart | 62 ++++++++++++++++++++++++++++---------- 2 files changed, 70 insertions(+), 42 deletions(-) diff --git a/lib/model/emoji.dart b/lib/model/emoji.dart index 376bf9a232..a456836889 100644 --- a/lib/model/emoji.dart +++ b/lib/model/emoji.dart @@ -342,7 +342,15 @@ enum EmojiMatchQuality { /// The query matches a prefix of the emoji name, but not the whole name. prefix, - /// The query matches somewhere in the emoji name, but not at the start. + /// The query matches starting at the start of a word in the emoji name, + /// but not the start of the whole name. + /// + /// For example a name "ab_cd_ef" would match queries "c" or "cd_e" + /// at this level, but not a query "b_cd_ef". + wordAligned, + + /// The query matches somewhere in the emoji name, + /// but not at the start of any word. other; /// The best possible quality of match. @@ -453,25 +461,17 @@ class EmojiAutocompleteQuery extends ComposeAutocompleteQuery { // See also commentary in [_rankResult] above. // TODO(#1067) this assumes emojiName is already lower-case (and no diacritics) - if (emojiName == _adjusted) return EmojiMatchQuality.exact; - if (emojiName.startsWith(_adjusted)) return EmojiMatchQuality.prefix; - if (_nameMatches(emojiName)) return EmojiMatchQuality.other; - return null; - } - - bool _nameMatches(String emojiName) { + if (emojiName == _adjusted) return EmojiMatchQuality.exact; + if (emojiName.startsWith(_adjusted)) return EmojiMatchQuality.prefix; + if (emojiName.contains(_sepAdjusted)) return EmojiMatchQuality.wordAligned; if (!_adjusted.contains(_separator)) { // If the query is a single token (doesn't contain a separator), - // the match can be anywhere in the string. - return emojiName.contains(_adjusted); + // allow a match anywhere in the string, too. + if (emojiName.contains(_adjusted)) return EmojiMatchQuality.other; + } else { + // Otherwise, require at least a word-aligned match. } - - // If there is a separator in the query, then we - // require the match to start at the start of a token. - // (E.g. for 'ab_cd_ef', query could be 'ab_c' or 'cd_ef', - // but not 'b_cd_ef'.) - assert(!emojiName.startsWith(_adjusted)); // checked before calling this method - return emojiName.contains(_sepAdjusted); + return null; } /// A measure of the result's quality in the context of the query, @@ -479,11 +479,9 @@ class EmojiAutocompleteQuery extends ComposeAutocompleteQuery { static int _rankResult(EmojiMatchQuality matchQuality, EmojiCandidate candidate) { // Compare sort_emojis in Zulip web:shared/src/typeahead.ts . // - // Behavior differences we should or might copy, TODO(#1068): - // * Web ranks matches starting at a word boundary ahead of - // other non-prefix matches; we don't yet. - // * Relatedly, web favors popular emoji only upon a word-aligned match. + // Behavior differences we might copy, TODO: // * Web ranks each name of a Unicode emoji separately. + // * Web recognizes a word-aligned match starting after [ /-] as well as [_]. // // Behavior differences that web should probably fix, TODO(web): // * Among popular emoji with non-exact matches, @@ -512,15 +510,15 @@ class EmojiAutocompleteQuery extends ComposeAutocompleteQuery { ReactionType.unicodeEmoji => false, }; return switch (matchQuality) { - EmojiMatchQuality.exact => 0, - EmojiMatchQuality.prefix => isPopular ? 1 : isCustomEmoji ? 3 : 4, - // TODO word-boundary vs. not - EmojiMatchQuality.other => isPopular ? 2 : isCustomEmoji ? 5 : 6, + EmojiMatchQuality.exact => 0, + EmojiMatchQuality.prefix => isPopular ? 1 : isCustomEmoji ? 3 : 5, + EmojiMatchQuality.wordAligned => isPopular ? 2 : isCustomEmoji ? 4 : 6, + EmojiMatchQuality.other => isCustomEmoji ? 7 : 8, }; } /// The number of possible values returned by [_rankResult]. - static const _numResultRanks = 7; + static const _numResultRanks = 9; @override String toString() { diff --git a/test/model/emoji_test.dart b/test/model/emoji_test.dart index ced905b04e..c60421d5bf 100644 --- a/test/model/emoji_test.dart +++ b/test/model/emoji_test.dart @@ -347,7 +347,7 @@ void main() { check(matchOfName('o', 'open_book')).prefix; check(matchOfName('open', 'open_book')).prefix; check(matchOfName('pe', 'open_book')).other; - check(matchOfName('boo', 'open_book')).other; + check(matchOfName('boo', 'open_book')).wordAligned; check(matchOfName('ok', 'open_book')).other; }); @@ -359,7 +359,7 @@ void main() { check(matchOfName('pen_', 'open_book')).none; check(matchOfName('n_b', 'open_book')).none; - check(matchOfName('blue_dia', 'large_blue_diamond')).other; + check(matchOfName('blue_dia', 'large_blue_diamond')).wordAligned; }); test('spaces in query behave as underscores', () { @@ -370,7 +370,7 @@ void main() { check(matchOfName('pen ', 'open_book')).none; check(matchOfName('n b', 'open_book')).none; - check(matchOfName('blue dia', 'large_blue_diamond')).other; + check(matchOfName('blue dia', 'large_blue_diamond')).wordAligned; }); test('query is lower-cased', () { @@ -389,13 +389,17 @@ void main() { check(matchOfNames('open b', ['x', 'open_book'])).prefix; check(matchOfNames('pen_', ['x', 'open_book'])).none; + check(matchOfNames('blue_dia', ['x', 'large_blue_diamond'])).wordAligned; + check(matchOfNames('Smi', ['x', 'smile'])).prefix; }); test('best match among name and aliases prevails', () { - check(matchOfNames('a', ['ab', 'a', 'ba', 'x'])).exact; - check(matchOfNames('a', ['ba', 'ab', 'x'])).prefix; - check(matchOfNames('a', ['ba', 'ab'])).prefix; + check(matchOfNames('a', ['ab', 'a', 'b_a', 'ba', 'x'])).exact; + check(matchOfNames('a', ['ba', 'ab', 'b_a', 'x'])).prefix; + check(matchOfNames('a', ['ba', 'ab', 'b_a'])).prefix; + check(matchOfNames('a', ['ba', 'b_a', 'x'])).wordAligned; + check(matchOfNames('a', ['b_a', 'ba'])).wordAligned; check(matchOfNames('a', ['ba', 'x'])).other; check(matchOfNames('a', ['x', 'y', 'z'])).none; }); @@ -441,7 +445,7 @@ void main() { check(matchOf('eqeq', realmCandidate('eqeq'))).exact; check(matchOf('open_', realmCandidate('open_book'))).prefix; check(matchOf('n_b', realmCandidate('open_book'))).none; - check(matchOf('blue dia', realmCandidate('large_blue_diamond'))).other; + check(matchOf('blue dia', realmCandidate('large_blue_diamond'))).wordAligned; check(matchOf('Smi', realmCandidate('smile'))).prefix; }); @@ -476,10 +480,12 @@ void main() { final octopus = unicode(['octopus'], emojiCode: '1f419'); final workingOnIt = unicode(['working_on_it'], emojiCode: '1f6e0'); + final love = unicode(['love'], emojiCode: '2764'); // aka :heart: - test('ranks exact before prefix before other match', () { + test('ranks match quality exact/prefix/word-aligned/other', () { checkPrecedes('o', unicode(['o']), unicode(['onion'])); - checkPrecedes('o', unicode(['onion']), unicode(['book'])); + checkPrecedes('o', unicode(['onion']), unicode(['squared_ok'])); + checkPrecedes('o', unicode(['squared_ok']), unicode(['book'])); }); test('ranks popular before realm before other Unicode', () { @@ -498,28 +504,51 @@ void main() { checkPrecedes('o', octopus, realmCandidate('open_book')); }); - test('ranks popular-vs-not more significant than prefix/other', () { - // Popular other beats realm prefix. + test('ranks popular-vs-not more significant than prefix/word-aligned', () { + // Popular word-aligned beats realm prefix. checkPrecedes('o', workingOnIt, realmCandidate('open_book')); }); - test('ranks prefix/other more significant than custom/other', () { - // Generic Unicode prefix beats realm other. - checkPrecedes('o', unicode(['ok']), realmCandidate('yo')); + test('ranks popular as if generic when non-word-aligned', () { + // Generic word-aligned beats popular other. + checkPrecedes('o', unicode(['squared_ok']), love); + // Popular other ranks below even custom other… + checkPrecedes('o', realmCandidate('yo'), love); + // … and same as generic Unicode other. + checkSameRank('o', love, unicode(['book'])); + + // And that emoji really does count as popular, + // beating custom emoji when both have a prefix match. + checkPrecedes('l', love, realmCandidate('logs')); + }); + + test('ranks custom/other more significant than prefix/word-aligned', () { + // Custom word-aligned beats generic prefix. + checkPrecedes('o', realmCandidate('laughing_blue_octopus'), + unicode(['ok'])); + }); + + test('ranks word-aligned/other more significant than custom/other', () { + // Generic Unicode word-aligned beats realm other. + checkPrecedes('o', unicode(['squared_ok']), realmCandidate('yo')); }); test('full list of ranks', () { check([ rankOf('o', unicode(['o'])), // exact (generic) rankOf('o', octopus), // prefix popular - rankOf('o', workingOnIt), // other popular + rankOf('o', workingOnIt), // word-aligned popular rankOf('o', realmCandidate('open_book')), // prefix realm rankOf('z', zulipCandidate()), // == prefix :zulip: + rankOf('y', realmCandidate('thank_you')), // word-aligned realm + // (word-aligned :zulip: is impossible because the name is one word) rankOf('o', unicode(['ok'])), // prefix generic + rankOf('o', unicode(['squared_ok'])), // word-aligned generic rankOf('o', realmCandidate('yo')), // other realm rankOf('p', zulipCandidate()), // == other :zulip: rankOf('o', unicode(['book'])), // other generic - ]).deepEquals([0, 1, 2, 3, 3, 4, 5, 5, 6]); + rankOf('o', love), // == other popular + ]).deepEquals([0, 1, 2, 3, 3, 4, 5, 6, 7, 7, 8, 8]); }); }); } @@ -548,6 +577,7 @@ extension EmojiCandidateChecks on Subject { extension EmojiMatchQualityChecks on Subject { void get exact => equals(EmojiMatchQuality.exact); void get prefix => equals(EmojiMatchQuality.prefix); + void get wordAligned => equals(EmojiMatchQuality.wordAligned); void get other => equals(EmojiMatchQuality.other); void get none => isNull(); }