Skip to content

Commit

Permalink
emoji: Rank by quality of match (exact, prefix, other)
Browse files Browse the repository at this point in the history
Fixes part of zulip#1068.
  • Loading branch information
gnprice committed Dec 9, 2024
1 parent 604ed1e commit 37652db
Show file tree
Hide file tree
Showing 2 changed files with 167 additions and 56 deletions.
79 changes: 62 additions & 17 deletions lib/model/emoji.dart
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,9 @@ class EmojiStoreImpl with EmojiStore {
}

List<EmojiCandidate> _generateAllCandidates() {
// See also [EmojiAutocompleteQuery._rankResult];
// that ranking takes precedence over the order of this list.
//
// Compare `emoji_picker.rebuild_catalog` in Zulip web;
// `composebox_typeahead.update_emoji_data` which receives its output;
// and `emoji.update_emojis` which builds part of its input.
Expand Down Expand Up @@ -321,20 +324,30 @@ class EmojiStoreImpl with EmojiStore {
}

/// The quality of an emoji's match to an autocomplete query.
///
/// (Rather vacuous for the moment; this structure will
/// gain more substance in an upcoming commit.)
enum EmojiMatchQuality {
match;
/// The query matches the whole emoji name (or the literal emoji itself).
exact,

/// The query matches a prefix of the emoji name, but not the whole name.
prefix,

/// The query matches somewhere in the emoji name, but not at the start.
other;

/// The best possible quality of match.
static const best = match;
static const best = exact;

/// The better of the two given qualities of match,
/// where null represents no match at all.
static EmojiMatchQuality? bestOf(EmojiMatchQuality? a, EmojiMatchQuality? b) {
if (b == null) return a;
return b;
if (a == null) return b;
return compare(a, b) <= 0 ? a : b;
}

/// Comparator that puts better matches first.
static int compare(EmojiMatchQuality a, EmojiMatchQuality b) {
return Enum.compareByIndex(a, b);
}
}

Expand Down Expand Up @@ -404,11 +417,11 @@ class EmojiAutocompleteQuery extends ComposeAutocompleteQuery {
// Compare get_emoji_matcher in Zulip web:shared/src/typeahead.ts .
@visibleForTesting
EmojiMatchQuality? match(EmojiCandidate candidate) {
if (_adjusted == '') return EmojiMatchQuality.match;
if (_adjusted == '') return EmojiMatchQuality.prefix;

if (candidate.emojiDisplay case UnicodeEmojiDisplay(:var emojiUnicode)) {
if (_adjusted == emojiUnicode) {
return EmojiMatchQuality.match;
return EmojiMatchQuality.exact;
}
}

Expand All @@ -421,13 +434,20 @@ class EmojiAutocompleteQuery extends ComposeAutocompleteQuery {
}

EmojiMatchQuality? _matchName(String emojiName) {
return _nameMatches(emojiName) ? EmojiMatchQuality.match : null;
}
// Compare query_matches_string_in_order in Zulip web:shared/src/typeahead.ts
// for a Boolean version of this logic (match vs. no match),
// and triage_raw in the same file web:shared/src/typeahead.ts
// for the finer distinctions.
// See also commentary in [_rankResult].

// Compare query_matches_string_in_order in Zulip web:shared/src/typeahead.ts .
bool _nameMatches(String emojiName) {
// TODO(#1067) this assumes emojiName is already lower-case (and no diacritics)
if (emojiName == _adjusted) return EmojiMatchQuality.exact;
if (emojiName.startsWith(_adjusted)) return EmojiMatchQuality.prefix;
if (_nameMatches(emojiName)) return EmojiMatchQuality.other;
return null;
}

bool _nameMatches(String emojiName) {
if (!_adjusted.contains(_separator)) {
// If the query is a single token (doesn't contain a separator),
// the match can be anywhere in the string.
Expand All @@ -438,21 +458,46 @@ class EmojiAutocompleteQuery extends ComposeAutocompleteQuery {
// require the match to start at the start of a token.
// (E.g. for 'ab_cd_ef', query could be 'ab_c' or 'cd_ef',
// but not 'b_cd_ef'.)
return emojiName.startsWith(_adjusted)
|| emojiName.contains(_sepAdjusted);
assert(!emojiName.startsWith(_adjusted)); // checked before calling this method
return emojiName.contains(_sepAdjusted);
}

/// A measure of the result's quality in the context of the query,
/// ranked from 0 (best) to one less than [_numResultRanks].
static int _rankResult(EmojiMatchQuality matchQuality) {
// TODO(#1068): rank emoji results (popular, realm, other; exact match, prefix, other)
// See also [EmojiStoreImpl._generateAllCandidates];
// emoji which this function ranks equally
// will appear in the order they were put in by that method.
//
// Compare sort_emojis in Zulip web:
// https://github.com/zulip/zulip/blob/83a121c7e/web/shared/src/typeahead.ts#L322-L382
//
// Behavior differences we should or might copy, TODO(#1068):
// * Web ranks popular emoji > custom emoji > others; we don't yet.
// * Web ranks matches starting at a word boundary ahead of
// other non-prefix matches; we don't yet.
// * Web ranks each name of a Unicode emoji separately.
//
// Behavior differences that web should probably fix, TODO(web):
// * Web starts with only case-sensitive exact matches ("perfect matches"),
// and puts case-insensitive exact matches just ahead of prefix matches;
// it also distinguishes prefix matches by case-sensitive vs. not.
// We use case-insensitive matches throughout;
// case seems unhelpful for emoji search.
// * Web suppresses Unicode emoji names shadowed by a realm emoji
// only if the latter is also a match for the query. That mostly works,
// because emoji with the same name will mostly both match or both not;
// but it breaks if the Unicode emoji was a literal match.

return switch (matchQuality) {
EmojiMatchQuality.match => 0,
EmojiMatchQuality.exact => 0,
EmojiMatchQuality.prefix => 1,
EmojiMatchQuality.other => 2,
};
}

/// The number of possible values returned by [_rankResult].
static const _numResultRanks = 1;
static const _numResultRanks = 3;

@override
String toString() {
Expand Down
144 changes: 105 additions & 39 deletions test/model/emoji_test.dart
Original file line number Diff line number Diff line change
Expand Up @@ -307,9 +307,44 @@ void main() {
check(view.results).single.which(
isUnicodeResult(names: ['smile']));
});

Future<Iterable<EmojiAutocompleteResult>> resultsOf(
String query, {
Map<String, String> realmEmoji = const {},
Map<String, List<String>>? unicodeEmoji,
}) async {
final store = prepare(realmEmoji: realmEmoji, unicodeEmoji: unicodeEmoji);
final view = EmojiAutocompleteView.init(store: store,
query: EmojiAutocompleteQuery(query));
bool done = false;
view.addListener(() { done = true; });
await Future(() {});
check(done).isTrue();
return view.results;
}

test('results end-to-end', () async {
final unicodeEmoji = {
'1f4d3': ['notebook'], '1f516': ['bookmark'], '1f4d6': ['book']};

// Empty query -> base ordering.
check(await resultsOf('', unicodeEmoji: unicodeEmoji)).deepEquals([
isUnicodeResult(names: ['notebook']),
isUnicodeResult(names: ['bookmark']),
isUnicodeResult(names: ['book']),
isZulipResult(),
]);

// With query, exact match precedes prefix match precedes other.
check(await resultsOf('book', unicodeEmoji: unicodeEmoji)).deepEquals([
isUnicodeResult(names: ['book']),
isUnicodeResult(names: ['bookmark']),
isUnicodeResult(names: ['notebook']),
]);
});
});

group('EmojiAutocompleteQuery.match', () {
group('EmojiAutocompleteQuery', () {
EmojiCandidate unicode(List<String> names, {String? emojiCode}) {
emojiCode ??= '10ffff';
return EmojiCandidate(emojiType: ReactionType.unicodeEmoji,
Expand All @@ -333,63 +368,71 @@ void main() {
}

test('one-word query matches anywhere in name', () {
check(matchOfName('', 'smile')).match;
check(matchOfName('s', 'smile')).match;
check(matchOfName('sm', 'smile')).match;
check(matchOfName('smile', 'smile')).match;
check(matchOfName('m', 'smile')).match;
check(matchOfName('mile', 'smile')).match;
check(matchOfName('e', 'smile')).match;
check(matchOfName('', 'smile')).prefix;
check(matchOfName('s', 'smile')).prefix;
check(matchOfName('sm', 'smile')).prefix;
check(matchOfName('smile', 'smile')).exact;
check(matchOfName('m', 'smile')).other;
check(matchOfName('mile', 'smile')).other;
check(matchOfName('e', 'smile')).other;

check(matchOfName('smiley', 'smile')).none;
check(matchOfName('a', 'smile')).none;

check(matchOfName('o', 'open_book')).match;
check(matchOfName('open', 'open_book')).match;
check(matchOfName('pe', 'open_book')).match;
check(matchOfName('boo', 'open_book')).match;
check(matchOfName('ok', 'open_book')).match;
check(matchOfName('o', 'open_book')).prefix;
check(matchOfName('open', 'open_book')).prefix;
check(matchOfName('pe', 'open_book')).other;
check(matchOfName('boo', 'open_book')).other;
check(matchOfName('ok', 'open_book')).other;
});

test('multi-word query matches from start of a word', () {
check(matchOfName('open_', 'open_book')).match;
check(matchOfName('open_b', 'open_book')).match;
check(matchOfName('open_book', 'open_book')).match;
check(matchOfName('open_', 'open_book')).prefix;
check(matchOfName('open_b', 'open_book')).prefix;
check(matchOfName('open_book', 'open_book')).exact;

check(matchOfName('pen_', 'open_book')).none;
check(matchOfName('n_b', 'open_book')).none;

check(matchOfName('blue_dia', 'large_blue_diamond')).match;
check(matchOfName('blue_dia', 'large_blue_diamond')).other;
});

test('spaces in query behave as underscores', () {
check(matchOfName('open ', 'open_book')).match;
check(matchOfName('open b', 'open_book')).match;
check(matchOfName('open book', 'open_book')).match;
check(matchOfName('open ', 'open_book')).prefix;
check(matchOfName('open b', 'open_book')).prefix;
check(matchOfName('open book', 'open_book')).exact;

check(matchOfName('pen ', 'open_book')).none;
check(matchOfName('n b', 'open_book')).none;

check(matchOfName('blue dia', 'large_blue_diamond')).match;
check(matchOfName('blue dia', 'large_blue_diamond')).other;
});

test('query is lower-cased', () {
check(matchOfName('Smi', 'smile')).match;
check(matchOfName('Smi', 'smile')).prefix;
});

test('query matches aliases same way as primary name', () {
check(matchOfNames('a', ['a', 'b'])).match;
check(matchOfNames('b', ['a', 'b'])).match;
check(matchOfNames('a', ['a', 'b'])).exact;
check(matchOfNames('b', ['a', 'b'])).exact;
check(matchOfNames('c', ['a', 'b'])).none;

check(matchOfNames('pe', ['x', 'open_book'])).match;
check(matchOfNames('ok', ['x', 'open_book'])).match;
check(matchOfNames('pe', ['x', 'open_book'])).other;
check(matchOfNames('ok', ['x', 'open_book'])).other;

check(matchOfNames('open_', ['x', 'open_book'])).match;
check(matchOfNames('open b', ['x', 'open_book'])).match;
check(matchOfNames('open_', ['x', 'open_book'])).prefix;
check(matchOfNames('open b', ['x', 'open_book'])).prefix;
check(matchOfNames('pen_', ['x', 'open_book'])).none;

check(matchOfNames('Smi', ['x', 'smile'])).match;
check(matchOfNames('Smi', ['x', 'smile'])).prefix;
});

test('best match among name and aliases prevails', () {
check(matchOfNames('a', ['ab', 'a', 'ba', 'x'])).exact;
check(matchOfNames('a', ['ba', 'ab', 'x'])).prefix;
check(matchOfNames('a', ['ba', 'ab'])).prefix;
check(matchOfNames('a', ['ba', 'x'])).other;
check(matchOfNames('a', ['x', 'y', 'z'])).none;
});

test('query matches literal Unicode value', () {
Expand All @@ -403,13 +446,13 @@ void main() {
check(matchOfLiteral('1f642', aka: '1f642', '1f642')).none;

// Matching the Unicode value the code describes does count…
check(matchOfLiteral('🙂', aka: '\u{1f642}', '1f642')).match;
check(matchOfLiteral('🙂', aka: '\u{1f642}', '1f642')).exact;
// … and failing to match it doesn't make a match.
check(matchOfLiteral('🙁', aka: '\u{1f641}', '1f642')).none;

// Multi-code-point emoji work fine.
check(matchOfLiteral('🏳‍🌈', aka: '\u{1f3f3}\u{200d}\u{1f308}',
'1f3f3-200d-1f308')).match;
'1f3f3-200d-1f308')).exact;
// Only exact matches count; no partial matches.
check(matchOfLiteral('🏳', aka: '\u{1f3f3}',
'1f3f3-200d-1f308')).none;
Expand All @@ -430,11 +473,11 @@ void main() {
resolvedStillUrl: eg.realmUrl.resolve('/emoji/1-still.png')));
}

check(matchOf('eqeq', realmCandidate('eqeq'))).match;
check(matchOf('open_', realmCandidate('open_book'))).match;
check(matchOf('eqeq', realmCandidate('eqeq'))).exact;
check(matchOf('open_', realmCandidate('open_book'))).prefix;
check(matchOf('n_b', realmCandidate('open_book'))).none;
check(matchOf('blue dia', realmCandidate('large_blue_diamond'))).match;
check(matchOf('Smi', realmCandidate('smile'))).match;
check(matchOf('blue dia', realmCandidate('large_blue_diamond'))).other;
check(matchOf('Smi', realmCandidate('smile'))).prefix;
});

test('can match Zulip extra emoji', () {
Expand All @@ -446,11 +489,32 @@ void main() {
emojiType: ReactionType.zulipExtraEmoji,
emojiCode: 'zulip', emojiName: 'zulip'));

check(matchOf('z', zulipCandidate)).match;
check(matchOf('Zulip', zulipCandidate)).match;
check(matchOf('p', zulipCandidate)).match;
check(matchOf('z', zulipCandidate)).prefix;
check(matchOf('Zulip', zulipCandidate)).exact;
check(matchOf('p', zulipCandidate)).other;
check(matchOf('x', zulipCandidate)).none;
});

int? rankOf(String query, EmojiCandidate candidate) {
return EmojiAutocompleteQuery(query).testCandidate(candidate)?.rank;
}

void checkPrecedes(String query, EmojiCandidate a, EmojiCandidate b) {
check(rankOf(query, a)!).isLessThan(rankOf(query, b)!);
}

test('ranks exact before prefix before other match', () {
checkPrecedes('o', unicode(['o']), unicode(['onion']));
checkPrecedes('o', unicode(['onion']), unicode(['book']));
});

test('full list of ranks', () {
check([
rankOf('o', unicode(['o'])), // exact
rankOf('o', unicode(['onion'])), // prefix
rankOf('o', unicode(['book'])), // other
]).deepEquals([0, 1, 2]);
});
});
}

Expand All @@ -476,7 +540,9 @@ extension EmojiCandidateChecks on Subject<EmojiCandidate> {
}

extension EmojiMatchQualityChecks on Subject<EmojiMatchQuality?> {
void get match => equals(EmojiMatchQuality.match);
void get exact => equals(EmojiMatchQuality.exact);
void get prefix => equals(EmojiMatchQuality.prefix);
void get other => equals(EmojiMatchQuality.other);
void get none => isNull();
}

Expand Down

0 comments on commit 37652db

Please sign in to comment.