From 2fdc207a58a98771a059c05cd0a906adb1a6a008 Mon Sep 17 00:00:00 2001 From: Rishabh Maurya Date: Wed, 15 Nov 2023 17:27:33 -0800 Subject: [PATCH] Add unit tests for prefix and prefix phrase queries Signed-off-by: Rishabh Maurya --- .../mapper/MatchOnlyTextFieldMapper.java | 20 ++- .../mapper/MatchOnlyTextFieldMapperTests.java | 169 ++++++++++++++++++ 2 files changed, 181 insertions(+), 8 deletions(-) diff --git a/server/src/main/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapper.java index 2fb8db13f6f5a..6b5055d07fd39 100644 --- a/server/src/main/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapper.java @@ -270,17 +270,21 @@ public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions, List> termArray = getTermsFromTokenStream(stream); BooleanQuery.Builder builder = new BooleanQuery.Builder(); for (int i = 0; i < termArray.size(); i++) { - BooleanQuery.Builder disjunctions = new BooleanQuery.Builder(); - for (Term term : termArray.get(i)) { - if (i == termArray.size() - 1) { - MultiPhrasePrefixQuery mqb = new MultiPhrasePrefixQuery(name()); - mqb.add(term); - disjunctions.add(mqb, BooleanClause.Occur.SHOULD); + if (i == termArray.size() - 1) { + MultiPhrasePrefixQuery mqb = new MultiPhrasePrefixQuery(name()); + mqb.add(termArray.get(i).toArray(new Term[0])); + builder.add(mqb, BooleanClause.Occur.FILTER); + } else { + if (termArray.get(i).size() > 1) { + BooleanQuery.Builder disjunctions = new BooleanQuery.Builder(); + for (Term term : termArray.get(i)) { + disjunctions.add(new TermQuery(term), BooleanClause.Occur.SHOULD); + } + builder.add(disjunctions.build(), BooleanClause.Occur.FILTER); } else { - disjunctions.add(new TermQuery(term), BooleanClause.Occur.SHOULD); + builder.add(new TermQuery(termArray.get(i).get(0)), BooleanClause.Occur.FILTER); } } - builder.add(disjunctions.build(), BooleanClause.Occur.FILTER); } return new SourceFieldMatchQuery(builder.build(), phrasePrefixQuery, this, context); } diff --git a/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapperTests.java b/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapperTests.java index 1f78ea3abcff8..998906fd7b500 100644 --- a/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapperTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapperTests.java @@ -44,9 +44,11 @@ import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; import org.apache.lucene.tests.analysis.MockSynonymAnalyzer; +import org.opensearch.common.lucene.search.MultiPhrasePrefixQuery; import org.opensearch.core.common.Strings; import org.opensearch.core.xcontent.MediaTypeRegistry; import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.index.query.MatchPhrasePrefixQueryBuilder; import org.opensearch.index.query.MatchPhraseQueryBuilder; import org.opensearch.index.query.QueryShardContext; import org.opensearch.index.query.SourceFieldMatchQuery; @@ -173,6 +175,173 @@ public void testIndexPrefixIndexTypes() throws IOException {} @Override public void testFastPhrasePrefixes() throws IOException {} + public void testPhrasePrefixes() throws IOException { + MapperService mapperService = createMapperService(mapping(b -> { + b.startObject("field"); + { + b.field("type", textFieldName); + b.field("analyzer", "my_stop_analyzer"); // "standard" will be replaced with MockSynonymAnalyzer + } + b.endObject(); + b.startObject("synfield"); + { + b.field("type", textFieldName); + b.field("analyzer", "standard"); // "standard" will be replaced with MockSynonymAnalyzer + } + b.endObject(); + })); + QueryShardContext queryShardContext = createQueryShardContext(mapperService); + + { + Query q = new MatchPhrasePrefixQueryBuilder("field", "two words").toQuery(queryShardContext); + MultiPhrasePrefixQuery mqb = new MultiPhrasePrefixQuery("field"); + mqb.add(new Term("field", "words")); + MultiPhrasePrefixQuery mqbFilter = new MultiPhrasePrefixQuery("field"); + mqbFilter.add(new Term("field", "two")); + mqbFilter.add(new Term("field", "words")); + Query expected = new SourceFieldMatchQuery( + new BooleanQuery.Builder().add(new TermQuery(new Term("field", "two")), BooleanClause.Occur.FILTER) + .add(mqb, BooleanClause.Occur.FILTER) + .build(), + mqbFilter, + mapperService.fieldType("field"), + queryShardContext + ); + assertThat(q, equalTo(expected)); + } + + { + Query q = new MatchPhrasePrefixQueryBuilder("field", "three words here").toQuery(queryShardContext); + MultiPhrasePrefixQuery mqb = new MultiPhrasePrefixQuery("field"); + mqb.add(new Term("field", "here")); + MultiPhrasePrefixQuery mqbFilter = new MultiPhrasePrefixQuery("field"); + mqbFilter.add(new Term("field", "three")); + mqbFilter.add(new Term("field", "words")); + mqbFilter.add(new Term("field", "here")); + Query expected = new SourceFieldMatchQuery( + new BooleanQuery.Builder().add(new TermQuery(new Term("field", "three")), BooleanClause.Occur.FILTER) + .add(new TermQuery(new Term("field", "words")), BooleanClause.Occur.FILTER) + .add(mqb, BooleanClause.Occur.FILTER) + .build(), + mqbFilter, + mapperService.fieldType("field"), + queryShardContext + ); + assertThat(q, equalTo(expected)); + } + + { + Query q = new MatchPhrasePrefixQueryBuilder("field", "two words").slop(1).toQuery(queryShardContext); + MultiPhrasePrefixQuery mqb = new MultiPhrasePrefixQuery("field"); + mqb.add(new Term("field", "words")); + MultiPhrasePrefixQuery mqbFilter = new MultiPhrasePrefixQuery("field"); + mqbFilter.setSlop(1); + mqbFilter.add(new Term("field", "two")); + mqbFilter.add(new Term("field", "words")); + Query expected = new SourceFieldMatchQuery( + new BooleanQuery.Builder().add(new TermQuery(new Term("field", "two")), BooleanClause.Occur.FILTER) + .add(mqb, BooleanClause.Occur.FILTER) + .build(), + mqbFilter, + mapperService.fieldType("field"), + queryShardContext + ); + assertThat(q, equalTo(expected)); + } + + { + Query q = new MatchPhrasePrefixQueryBuilder("field", "singleton").toQuery(queryShardContext); + MultiPhrasePrefixQuery mqb = new MultiPhrasePrefixQuery("field"); + mqb.add(new Term("field", "singleton")); + Query expected = new SourceFieldMatchQuery( + new BooleanQuery.Builder().add(mqb, BooleanClause.Occur.FILTER).build(), + mqb, + mapperService.fieldType("field"), + queryShardContext + ); + assertThat(q, is(expected)); + } + + { + Query q = new MatchPhrasePrefixQueryBuilder("field", "sparkle a stopword").toQuery(queryShardContext); + MultiPhrasePrefixQuery mqb = new MultiPhrasePrefixQuery("field"); + mqb.add(new Term("field", "stopword")); + MultiPhrasePrefixQuery mqbFilter = new MultiPhrasePrefixQuery("field"); + mqbFilter.add(new Term("field", "sparkle")); + mqbFilter.add(new Term[] { new Term("field", "stopword") }, 2); + Query expected = new SourceFieldMatchQuery( + new BooleanQuery.Builder().add(new TermQuery(new Term("field", "sparkle")), BooleanClause.Occur.FILTER) + .add(mqb, BooleanClause.Occur.FILTER) + .build(), + mqbFilter, + mapperService.fieldType("field"), + queryShardContext + ); + assertThat(q, equalTo(expected)); + } + + { + MatchQuery matchQuery = new MatchQuery(queryShardContext); + matchQuery.setAnalyzer(new MockSynonymAnalyzer()); + Query q = matchQuery.parse(MatchQuery.Type.PHRASE_PREFIX, "synfield", "motor dogs"); + MultiPhrasePrefixQuery mqb = new MultiPhrasePrefixQuery("synfield"); + mqb.add(new Term[] { new Term("synfield", "dogs"), new Term("synfield", "dog") }); + MultiPhrasePrefixQuery mqbFilter = new MultiPhrasePrefixQuery("synfield"); + mqbFilter.add(new Term("synfield", "motor")); + mqbFilter.add(new Term[] { new Term("synfield", "dogs"), new Term("synfield", "dog") }); + Query expected = new SourceFieldMatchQuery( + new BooleanQuery.Builder().add(new TermQuery(new Term("synfield", "motor")), BooleanClause.Occur.FILTER) + .add(mqb, BooleanClause.Occur.FILTER) + .build(), + mqbFilter, + mapperService.fieldType("synfield"), + queryShardContext + ); + assertThat(q, equalTo(expected)); + } + + { + MatchQuery matchQuery = new MatchQuery(queryShardContext); + matchQuery.setPhraseSlop(1); + matchQuery.setAnalyzer(new MockSynonymAnalyzer()); + Query q = matchQuery.parse(MatchQuery.Type.PHRASE_PREFIX, "synfield", "two dogs"); + MultiPhrasePrefixQuery mqb = new MultiPhrasePrefixQuery("synfield"); + mqb.add(new Term[] { new Term("synfield", "dogs"), new Term("synfield", "dog") }); + MultiPhrasePrefixQuery mqbFilter = new MultiPhrasePrefixQuery("synfield"); + mqbFilter.add(new Term("synfield", "two")); + mqbFilter.add(new Term[] { new Term("synfield", "dogs"), new Term("synfield", "dog") }); + mqbFilter.setSlop(1); + Query expected = new SourceFieldMatchQuery( + new BooleanQuery.Builder().add(new TermQuery(new Term("synfield", "two")), BooleanClause.Occur.FILTER) + .add(mqb, BooleanClause.Occur.FILTER) + .build(), + mqbFilter, + mapperService.fieldType("synfield"), + queryShardContext + ); + assertThat(q, equalTo(expected)); + } + + // { + // Query q = new MatchPhrasePrefixQueryBuilder("synfield", "three dog word").toQuery(queryShardContext); + // MultiPhrasePrefixQuery mqb = new MultiPhrasePrefixQuery("synfield"); + // mqb.add(new Term[] {new Term("synfield", "dogs"), new Term("synfield", "dog")}); + // MultiPhrasePrefixQuery mqbFilter = new MultiPhrasePrefixQuery("synfield"); + // mqbFilter.add(new Term("synfield", "motor")); + // mqbFilter.add(new Term[] {new Term("synfield", "dogs"), new Term("synfield", "dog")}); + // Query expected = new SourceFieldMatchQuery( + // new BooleanQuery.Builder().add(new TermQuery(new Term("synfield", "three")), BooleanClause.Occur.FILTER) + // .add(mqb, BooleanClause.Occur.FILTER) + // .add(new TermQuery(new Term("synfield", "word")), BooleanClause.Occur.FILTER) + // .build(), + // mqbFilter, + // mapperService.fieldType("synfield"), + // queryShardContext + // ); + // assertThat(q, equalTo(mpq)); + // } + } + @Override public void testFastPhraseMapping() throws IOException {}