Skip to content

Commit

Permalink
emoji: Recognize word-aligned matches in ranking
Browse files Browse the repository at this point in the history
Fixes zulip#1068.
  • Loading branch information
gnprice committed Dec 9, 2024
1 parent bb8935a commit a885520
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 42 deletions.
50 changes: 24 additions & 26 deletions lib/model/emoji.dart
Original file line number Diff line number Diff line change
Expand Up @@ -379,7 +379,15 @@ enum EmojiMatchQuality {
/// The query matches a prefix of the emoji name, but not the whole name.
prefix,

/// The query matches somewhere in the emoji name, but not at the start.
/// The query matches starting at the start of a word in the emoji name,
/// but not the start of the whole name.
///
/// For example a name "ab_cd_ef" would match queries "c" or "cd_e"
/// at this level, but not a query "b_cd_ef".
wordAligned,

/// The query matches somewhere in the emoji name,
/// but not at the start of any word.
other;

/// The best possible quality of match.
Expand Down Expand Up @@ -490,25 +498,17 @@ class EmojiAutocompleteQuery extends ComposeAutocompleteQuery {
// See also commentary in [_rankResult].

// TODO(#1067) this assumes emojiName is already lower-case (and no diacritics)
if (emojiName == _adjusted) return EmojiMatchQuality.exact;
if (emojiName.startsWith(_adjusted)) return EmojiMatchQuality.prefix;
if (_nameMatches(emojiName)) return EmojiMatchQuality.other;
return null;
}

bool _nameMatches(String emojiName) {
if (emojiName == _adjusted) return EmojiMatchQuality.exact;
if (emojiName.startsWith(_adjusted)) return EmojiMatchQuality.prefix;
if (emojiName.contains(_sepAdjusted)) return EmojiMatchQuality.wordAligned;
if (!_adjusted.contains(_separator)) {
// If the query is a single token (doesn't contain a separator),
// the match can be anywhere in the string.
return emojiName.contains(_adjusted);
// allow a match anywhere in the string, too.
if (emojiName.contains(_adjusted)) return EmojiMatchQuality.other;
} else {
// Otherwise, require at least a word-aligned match.
}

// If there is a separator in the query, then we
// require the match to start at the start of a token.
// (E.g. for 'ab_cd_ef', query could be 'ab_c' or 'cd_ef',
// but not 'b_cd_ef'.)
assert(!emojiName.startsWith(_adjusted)); // checked before calling this method
return emojiName.contains(_sepAdjusted);
return null;
}

/// A measure of the result's quality in the context of the query,
Expand All @@ -521,11 +521,9 @@ class EmojiAutocompleteQuery extends ComposeAutocompleteQuery {
// Compare sort_emojis in Zulip web:
// https://github.com/zulip/zulip/blob/83a121c7e/web/shared/src/typeahead.ts#L322-L382
//
// Behavior differences we should or might copy, TODO(#1068):
// * Web ranks matches starting at a word boundary ahead of
// other non-prefix matches; we don't yet.
// * Relatedly, web favors popular emoji only upon a word-aligned match.
// Behavior differences we might copy, TODO:
// * Web ranks each name of a Unicode emoji separately.
// * Web recognizes a word-aligned match starting after [ /-] as well as [_].
//
// Behavior differences that web should probably fix, TODO(web):
// * Among popular emoji with non-exact matches,
Expand Down Expand Up @@ -554,15 +552,15 @@ class EmojiAutocompleteQuery extends ComposeAutocompleteQuery {
ReactionType.unicodeEmoji => false,
};
return switch (matchQuality) {
EmojiMatchQuality.exact => 0,
EmojiMatchQuality.prefix => isPopular ? 1 : isCustomEmoji ? 3 : 4,
// TODO word-boundary vs. not
EmojiMatchQuality.other => isPopular ? 2 : isCustomEmoji ? 5 : 6,
EmojiMatchQuality.exact => 0,
EmojiMatchQuality.prefix => isPopular ? 1 : isCustomEmoji ? 3 : 5,
EmojiMatchQuality.wordAligned => isPopular ? 2 : isCustomEmoji ? 4 : 6,
EmojiMatchQuality.other => isCustomEmoji ? 7 : 8,
};
}

/// The number of possible values returned by [_rankResult].
static const _numResultRanks = 7;
static const _numResultRanks = 9;

@override
String toString() {
Expand Down
62 changes: 46 additions & 16 deletions test/model/emoji_test.dart
Original file line number Diff line number Diff line change
Expand Up @@ -384,7 +384,7 @@ void main() {
check(matchOfName('o', 'open_book')).prefix;
check(matchOfName('open', 'open_book')).prefix;
check(matchOfName('pe', 'open_book')).other;
check(matchOfName('boo', 'open_book')).other;
check(matchOfName('boo', 'open_book')).wordAligned;
check(matchOfName('ok', 'open_book')).other;
});

Expand All @@ -396,7 +396,7 @@ void main() {
check(matchOfName('pen_', 'open_book')).none;
check(matchOfName('n_b', 'open_book')).none;

check(matchOfName('blue_dia', 'large_blue_diamond')).other;
check(matchOfName('blue_dia', 'large_blue_diamond')).wordAligned;
});

test('spaces in query behave as underscores', () {
Expand All @@ -407,7 +407,7 @@ void main() {
check(matchOfName('pen ', 'open_book')).none;
check(matchOfName('n b', 'open_book')).none;

check(matchOfName('blue dia', 'large_blue_diamond')).other;
check(matchOfName('blue dia', 'large_blue_diamond')).wordAligned;
});

test('query is lower-cased', () {
Expand All @@ -426,13 +426,17 @@ void main() {
check(matchOfNames('open b', ['x', 'open_book'])).prefix;
check(matchOfNames('pen_', ['x', 'open_book'])).none;

check(matchOfNames('blue_dia', ['x', 'large_blue_diamond'])).wordAligned;

check(matchOfNames('Smi', ['x', 'smile'])).prefix;
});

test('best match among name and aliases prevails', () {
check(matchOfNames('a', ['ab', 'a', 'ba', 'x'])).exact;
check(matchOfNames('a', ['ba', 'ab', 'x'])).prefix;
check(matchOfNames('a', ['ba', 'ab'])).prefix;
check(matchOfNames('a', ['ab', 'a', 'b_a', 'ba', 'x'])).exact;
check(matchOfNames('a', ['ba', 'ab', 'b_a', 'x'])).prefix;
check(matchOfNames('a', ['ba', 'ab', 'b_a'])).prefix;
check(matchOfNames('a', ['ba', 'b_a', 'x'])).wordAligned;
check(matchOfNames('a', ['b_a', 'ba'])).wordAligned;
check(matchOfNames('a', ['ba', 'x'])).other;
check(matchOfNames('a', ['x', 'y', 'z'])).none;
});
Expand Down Expand Up @@ -478,7 +482,7 @@ void main() {
check(matchOf('eqeq', realmCandidate('eqeq'))).exact;
check(matchOf('open_', realmCandidate('open_book'))).prefix;
check(matchOf('n_b', realmCandidate('open_book'))).none;
check(matchOf('blue dia', realmCandidate('large_blue_diamond'))).other;
check(matchOf('blue dia', realmCandidate('large_blue_diamond'))).wordAligned;
check(matchOf('Smi', realmCandidate('smile'))).prefix;
});

Expand Down Expand Up @@ -513,10 +517,12 @@ void main() {

final octopus = unicode(['octopus'], emojiCode: '1f419');
final workingOnIt = unicode(['working_on_it'], emojiCode: '1f6e0');
final love = unicode(['love'], emojiCode: '2764'); // aka :heart:

test('ranks exact before prefix before other match', () {
test('ranks match quality exact/prefix/word-aligned/other', () {
checkPrecedes('o', unicode(['o']), unicode(['onion']));
checkPrecedes('o', unicode(['onion']), unicode(['book']));
checkPrecedes('o', unicode(['onion']), unicode(['squared_ok']));
checkPrecedes('o', unicode(['squared_ok']), unicode(['book']));
});

test('ranks popular before realm before other Unicode', () {
Expand All @@ -535,28 +541,51 @@ void main() {
checkPrecedes('o', octopus, realmCandidate('open_book'));
});

test('ranks popular-vs-not more significant than prefix/other', () {
// Popular other beats realm prefix.
test('ranks popular-vs-not more significant than prefix/word-aligned', () {
// Popular word-aligned beats realm prefix.
checkPrecedes('o', workingOnIt, realmCandidate('open_book'));
});

test('ranks prefix/other more significant than custom/other', () {
// Generic Unicode prefix beats realm other.
checkPrecedes('o', unicode(['ok']), realmCandidate('yo'));
test('ranks popular as if generic when non-word-aligned', () {
// Generic word-aligned beats popular other.
checkPrecedes('o', unicode(['squared_ok']), love);
// Popular other ranks below even custom other…
checkPrecedes('o', realmCandidate('yo'), love);
// … and same as generic Unicode other.
checkSameRank('o', love, unicode(['book']));

// And that emoji really does count as popular,
// beating custom emoji when both have a prefix match.
checkPrecedes('l', love, realmCandidate('logs'));
});

test('ranks custom/other more significant than prefix/word-aligned', () {
// Custom word-aligned beats generic prefix.
checkPrecedes('o', realmCandidate('laughing_blue_octopus'),
unicode(['ok']));
});

test('ranks word-aligned/other more significant than custom/other', () {
// Generic Unicode word-aligned beats realm other.
checkPrecedes('o', unicode(['squared_ok']), realmCandidate('yo'));
});

test('full list of ranks', () {
check([
rankOf('o', unicode(['o'])), // exact (generic)
rankOf('o', octopus), // prefix popular
rankOf('o', workingOnIt), // other popular
rankOf('o', workingOnIt), // word-aligned popular
rankOf('o', realmCandidate('open_book')), // prefix realm
rankOf('z', zulipCandidate()), // == prefix :zulip:
rankOf('y', realmCandidate('thank_you')), // word-aligned realm
// (word-aligned :zulip: is impossible because the name is one word)
rankOf('o', unicode(['ok'])), // prefix generic
rankOf('o', unicode(['squared_ok'])), // word-aligned generic
rankOf('o', realmCandidate('yo')), // other realm
rankOf('p', zulipCandidate()), // == other :zulip:
rankOf('o', unicode(['book'])), // other generic
]).deepEquals([0, 1, 2, 3, 3, 4, 5, 5, 6]);
rankOf('o', love), // == other popular
]).deepEquals([0, 1, 2, 3, 3, 4, 5, 6, 7, 7, 8, 8]);
});
});
}
Expand Down Expand Up @@ -585,6 +614,7 @@ extension EmojiCandidateChecks on Subject<EmojiCandidate> {
extension EmojiMatchQualityChecks on Subject<EmojiMatchQuality?> {
void get exact => equals(EmojiMatchQuality.exact);
void get prefix => equals(EmojiMatchQuality.prefix);
void get wordAligned => equals(EmojiMatchQuality.wordAligned);
void get other => equals(EmojiMatchQuality.other);
void get none => isNull();
}
Expand Down

0 comments on commit a885520

Please sign in to comment.