diff --git a/server/src/main/java/org/opensearch/index/mapper/KeywordFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/KeywordFieldMapper.java index 92ee8067ee4a0..da89b191d0953 100644 --- a/server/src/main/java/org/opensearch/index/mapper/KeywordFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/KeywordFieldMapper.java @@ -38,11 +38,23 @@ import org.apache.lucene.document.FieldType; import org.apache.lucene.document.SortedSetDocValuesField; import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.FuzzyQuery; +import org.apache.lucene.search.IndexOrDocValuesQuery; import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.Query; +import org.apache.lucene.search.RegexpQuery; +import org.apache.lucene.search.TermInSetQuery; +import org.apache.lucene.search.TermRangeQuery; +import org.apache.lucene.search.WildcardQuery; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.automaton.Operations; +import org.opensearch.OpenSearchException; import org.opensearch.common.Nullable; +import org.opensearch.common.lucene.BytesRefs; import org.opensearch.common.lucene.Lucene; +import org.opensearch.common.lucene.search.AutomatonQueries; +import org.opensearch.common.unit.Fuzziness; import org.opensearch.core.xcontent.XContentParser; import org.opensearch.index.analysis.IndexAnalyzers; import org.opensearch.index.analysis.NamedAnalyzer; @@ -62,6 +74,8 @@ import java.util.Objects; import java.util.function.Supplier; +import static org.opensearch.search.SearchService.ALLOW_EXPENSIVE_QUERIES; + /** * A field mapper for keywords. This mapper accepts strings and indexes them as-is. * @@ -317,7 +331,7 @@ public IndexFieldData.Builder fielddataBuilder(String fullyQualifiedIndexName, S @Override public ValueFetcher valueFetcher(QueryShardContext context, SearchLookup searchLookup, String format) { if (format != null) { - throw new IllegalArgumentException("Field [" + name() + "] of type [" + typeName() + "] doesn't support formats."); + throw new IllegalArgumentException("Field [" + name() + "] of type [" + typeName() + "] doesn't " + "support formats."); } return new SourceValueFetcher(name(), context, nullValue) { @@ -372,17 +386,220 @@ protected BytesRef indexedValueForSearch(Object value) { return getTextSearchInfo().getSearchAnalyzer().normalize(name(), value.toString()); } + @Override + public Query termsQuery(List values, QueryShardContext context) { + failIfNotIndexedAndNoDocValues(); + // has index and doc_values enabled + if (isSearchable() && hasDocValues()) { + BytesRef[] bytesRefs = new BytesRef[values.size()]; + for (int i = 0; i < bytesRefs.length; i++) { + bytesRefs[i] = indexedValueForSearch(values.get(i)); + } + Query indexQuery = new TermInSetQuery(name(), bytesRefs); + Query dvQuery = new TermInSetQuery(MultiTermQuery.DOC_VALUES_REWRITE, name(), bytesRefs); + return new IndexOrDocValuesQuery(indexQuery, dvQuery); + } + // if we only have doc_values enabled, we construct a new query with doc_values re-written + if (hasDocValues()) { + BytesRef[] bytesRefs = new BytesRef[values.size()]; + for (int i = 0; i < bytesRefs.length; i++) { + bytesRefs[i] = indexedValueForSearch(values.get(i)); + } + return new TermInSetQuery(MultiTermQuery.DOC_VALUES_REWRITE, name(), bytesRefs); + } + // has index enabled, we're going to return the query as is + return super.termsQuery(values, context); + } + + @Override + public Query prefixQuery( + String value, + @Nullable MultiTermQuery.RewriteMethod method, + boolean caseInsensitive, + QueryShardContext context + ) { + if (context.allowExpensiveQueries() == false) { + throw new OpenSearchException( + "[prefix] queries cannot be executed when '" + + ALLOW_EXPENSIVE_QUERIES.getKey() + + "' is set to false. For optimised prefix queries on text " + + "fields please enable [index_prefixes]." + ); + } + failIfNotIndexedAndNoDocValues(); + if (isSearchable() && hasDocValues()) { + Query indexQuery = super.prefixQuery(value, method, caseInsensitive, context); + Query dvQuery = super.prefixQuery(value, MultiTermQuery.DOC_VALUES_REWRITE, caseInsensitive, context); + return new IndexOrDocValuesQuery(indexQuery, dvQuery); + } + if (hasDocValues()) { + return super.prefixQuery(value, MultiTermQuery.DOC_VALUES_REWRITE, caseInsensitive, context); + } + return super.prefixQuery(value, method, caseInsensitive, context); + } + + @Override + public Query regexpQuery( + String value, + int syntaxFlags, + int matchFlags, + int maxDeterminizedStates, + @Nullable MultiTermQuery.RewriteMethod method, + QueryShardContext context + ) { + if (context.allowExpensiveQueries() == false) { + throw new OpenSearchException( + "[regexp] queries cannot be executed when '" + ALLOW_EXPENSIVE_QUERIES.getKey() + "' is set to " + "false." + ); + } + failIfNotIndexedAndNoDocValues(); + if (isSearchable() && hasDocValues()) { + Query indexQuery = super.regexpQuery(value, syntaxFlags, matchFlags, maxDeterminizedStates, method, context); + Query dvQuery = super.regexpQuery( + value, + syntaxFlags, + matchFlags, + maxDeterminizedStates, + MultiTermQuery.DOC_VALUES_REWRITE, + context + ); + return new IndexOrDocValuesQuery(indexQuery, dvQuery); + } + if (hasDocValues()) { + return new RegexpQuery( + new Term(name(), indexedValueForSearch(value)), + syntaxFlags, + matchFlags, + RegexpQuery.DEFAULT_PROVIDER, + maxDeterminizedStates, + MultiTermQuery.DOC_VALUES_REWRITE + ); + } + return super.regexpQuery(value, syntaxFlags, matchFlags, maxDeterminizedStates, method, context); + } + + @Override + public Query rangeQuery(Object lowerTerm, Object upperTerm, boolean includeLower, boolean includeUpper, QueryShardContext context) { + if (context.allowExpensiveQueries() == false) { + throw new OpenSearchException( + "[range] queries on [text] or [keyword] fields cannot be executed when '" + + ALLOW_EXPENSIVE_QUERIES.getKey() + + "' is set to false." + ); + } + failIfNotIndexedAndNoDocValues(); + if (isSearchable() && hasDocValues()) { + Query indexQuery = new TermRangeQuery( + name(), + lowerTerm == null ? null : indexedValueForSearch(lowerTerm), + upperTerm == null ? null : indexedValueForSearch(upperTerm), + includeLower, + includeUpper + ); + Query dvQuery = new TermRangeQuery( + name(), + lowerTerm == null ? null : indexedValueForSearch(lowerTerm), + upperTerm == null ? null : indexedValueForSearch(upperTerm), + includeLower, + includeUpper, + MultiTermQuery.DOC_VALUES_REWRITE + ); + return new IndexOrDocValuesQuery(indexQuery, dvQuery); + } + if (hasDocValues()) { + return new TermRangeQuery( + name(), + lowerTerm == null ? null : indexedValueForSearch(lowerTerm), + upperTerm == null ? null : indexedValueForSearch(upperTerm), + includeLower, + includeUpper, + MultiTermQuery.DOC_VALUES_REWRITE + ); + } + return new TermRangeQuery( + name(), + lowerTerm == null ? null : indexedValueForSearch(lowerTerm), + upperTerm == null ? null : indexedValueForSearch(upperTerm), + includeLower, + includeUpper + ); + } + + @Override + public Query fuzzyQuery( + Object value, + Fuzziness fuzziness, + int prefixLength, + int maxExpansions, + boolean transpositions, + @Nullable MultiTermQuery.RewriteMethod method, + QueryShardContext context + ) { + failIfNotIndexedAndNoDocValues(); + if (context.allowExpensiveQueries() == false) { + throw new OpenSearchException( + "[fuzzy] queries cannot be executed when '" + ALLOW_EXPENSIVE_QUERIES.getKey() + "' is set to " + "false." + ); + } + if (isSearchable() && hasDocValues()) { + Query indexQuery = super.fuzzyQuery(value, fuzziness, prefixLength, maxExpansions, transpositions, context); + Query dvQuery = super.fuzzyQuery( + value, + fuzziness, + prefixLength, + maxExpansions, + transpositions, + MultiTermQuery.DOC_VALUES_REWRITE, + context + ); + return new IndexOrDocValuesQuery(indexQuery, dvQuery); + } + if (hasDocValues()) { + return new FuzzyQuery( + new Term(name(), indexedValueForSearch(value)), + fuzziness.asDistance(BytesRefs.toString(value)), + prefixLength, + maxExpansions, + transpositions, + MultiTermQuery.DOC_VALUES_REWRITE + ); + } + return super.fuzzyQuery(value, fuzziness, prefixLength, maxExpansions, transpositions, context); + } + @Override public Query wildcardQuery( String value, @Nullable MultiTermQuery.RewriteMethod method, - boolean caseInsensitve, + boolean caseInsensitive, QueryShardContext context ) { - // keyword field types are always normalized, so ignore case sensitivity and force normalize the wildcard + if (context.allowExpensiveQueries() == false) { + throw new OpenSearchException( + "[wildcard] queries cannot be executed when '" + ALLOW_EXPENSIVE_QUERIES.getKey() + "' is set to " + "false." + ); + } + failIfNotIndexedAndNoDocValues(); + // keyword field types are always normalized, so ignore case sensitivity and force normalize the + // wildcard // query text - return super.wildcardQuery(value, method, caseInsensitve, true, context); + if (isSearchable() && hasDocValues()) { + Query indexQuery = super.wildcardQuery(value, method, caseInsensitive, true, context); + Query dvQuery = super.wildcardQuery(value, MultiTermQuery.DOC_VALUES_REWRITE, caseInsensitive, true, context); + return new IndexOrDocValuesQuery(indexQuery, dvQuery); + } + if (hasDocValues()) { + Term term; + value = normalizeWildcardPattern(name(), value, getTextSearchInfo().getSearchAnalyzer()); + term = new Term(name(), value); + if (caseInsensitive) { + return AutomatonQueries.caseInsensitiveWildcardQuery(term, method); + } + return new WildcardQuery(term, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT, MultiTermQuery.DOC_VALUES_REWRITE); + } + return super.wildcardQuery(value, method, caseInsensitive, true, context); } + } private final boolean indexed; @@ -422,8 +639,10 @@ protected KeywordFieldMapper( this.indexAnalyzers = builder.indexAnalyzers; } - /** Values that have more chars than the return value of this method will - * be skipped at parsing time. */ + /** + * Values that have more chars than the return value of this method will + * be skipped at parsing time. + */ public int ignoreAbove() { return ignoreAbove; } diff --git a/server/src/main/java/org/opensearch/index/mapper/MappedFieldType.java b/server/src/main/java/org/opensearch/index/mapper/MappedFieldType.java index 997835f712038..62acad99074c2 100644 --- a/server/src/main/java/org/opensearch/index/mapper/MappedFieldType.java +++ b/server/src/main/java/org/opensearch/index/mapper/MappedFieldType.java @@ -269,6 +269,21 @@ public Query fuzzyQuery( ); } + // Fuzzy Query with re-write method + public Query fuzzyQuery( + Object value, + Fuzziness fuzziness, + int prefixLength, + int maxExpansions, + boolean transpositions, + @Nullable MultiTermQuery.RewriteMethod method, + QueryShardContext context + ) { + throw new IllegalArgumentException( + "Can only use fuzzy queries on keyword and text fields - not on [" + name + "] which is of type [" + typeName() + "]" + ); + } + // Case sensitive form of prefix query public final Query prefixQuery(String value, @Nullable MultiTermQuery.RewriteMethod method, QueryShardContext context) { return prefixQuery(value, method, false, context); @@ -433,6 +448,15 @@ protected final void failIfNotIndexed() { } } + protected final void failIfNotIndexedAndNoDocValues() { + // we fail if a field is both not indexed and does not have doc_values enabled + if (isIndexed == false && hasDocValues() == false) { + throw new IllegalArgumentException( + "Cannot search on field [" + name() + "] since it is both not indexed," + " and does not have doc_values enabled." + ); + } + } + public boolean eagerGlobalOrdinals() { return eagerGlobalOrdinals; } diff --git a/server/src/test/java/org/opensearch/index/mapper/KeywordFieldTypeTests.java b/server/src/test/java/org/opensearch/index/mapper/KeywordFieldTypeTests.java index 0d8ef6784a28c..393c448330142 100644 --- a/server/src/test/java/org/opensearch/index/mapper/KeywordFieldTypeTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/KeywordFieldTypeTests.java @@ -44,13 +44,17 @@ import org.apache.lucene.index.Term; import org.apache.lucene.search.DocValuesFieldExistsQuery; import org.apache.lucene.search.FuzzyQuery; +import org.apache.lucene.search.IndexOrDocValuesQuery; import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.NormsFieldExistsQuery; +import org.apache.lucene.search.Query; import org.apache.lucene.search.RegexpQuery; import org.apache.lucene.search.TermInSetQuery; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TermRangeQuery; +import org.apache.lucene.search.WildcardQuery; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.automaton.Operations; import org.opensearch.OpenSearchException; import org.opensearch.Version; import org.opensearch.cluster.metadata.IndexMetadata; @@ -128,14 +132,29 @@ public void testTermsQuery() { List terms = new ArrayList<>(); terms.add(new BytesRef("foo")); terms.add(new BytesRef("bar")); - assertEquals(new TermInSetQuery("field", terms), ft.termsQuery(Arrays.asList("foo", "bar"), null)); + Query expected = new IndexOrDocValuesQuery( + new TermInSetQuery("field", terms), + new TermInSetQuery(MultiTermQuery.DOC_VALUES_REWRITE, "field", terms) + ); + assertEquals(expected, ft.termsQuery(Arrays.asList("foo", "bar"), null)); - MappedFieldType unsearchable = new KeywordFieldType("field", false, true, Collections.emptyMap()); + MappedFieldType onlyIndexed = new KeywordFieldType("field", true, false, Collections.emptyMap()); + Query expectedIndex = new TermInSetQuery("field", terms); + assertEquals(expectedIndex, onlyIndexed.termsQuery(Arrays.asList("foo", "bar"), null)); + + MappedFieldType onlyDocValues = new KeywordFieldType("field", false, true, Collections.emptyMap()); + Query expectedDocValues = new TermInSetQuery(MultiTermQuery.DOC_VALUES_REWRITE, "field", terms); + assertEquals(expectedDocValues, onlyDocValues.termsQuery(Arrays.asList("foo", "bar"), null)); + + MappedFieldType unsearchable = new KeywordFieldType("field", false, false, Collections.emptyMap()); IllegalArgumentException e = expectThrows( IllegalArgumentException.class, () -> unsearchable.termsQuery(Arrays.asList("foo", "bar"), null) ); - assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage()); + assertEquals( + "Cannot search on field [field] since it is both not indexed, and does not have doc_values " + "enabled.", + e.getMessage() + ); } public void testExistsQuery() { @@ -157,9 +176,36 @@ public void testExistsQuery() { public void testRangeQuery() { MappedFieldType ft = new KeywordFieldType("field"); + + Query indexExpected = new TermRangeQuery("field", BytesRefs.toBytesRef("foo"), BytesRefs.toBytesRef("bar"), true, false); + Query dvExpected = new TermRangeQuery( + "field", + BytesRefs.toBytesRef("foo"), + BytesRefs.toBytesRef("bar"), + true, + false, + MultiTermQuery.DOC_VALUES_REWRITE + ); + + Query expected = new IndexOrDocValuesQuery(indexExpected, dvExpected); + Query actual = ft.rangeQuery("foo", "bar", true, false, null, null, null, MOCK_QSC); + assertEquals(expected, actual); + + MappedFieldType onlyIndexed = new KeywordFieldType("field", true, false, Collections.emptyMap()); + assertEquals(indexExpected, onlyIndexed.rangeQuery("foo", "bar", true, false, null, null, null, MOCK_QSC)); + + MappedFieldType onlyDocValues = new KeywordFieldType("field", false, true, Collections.emptyMap()); + assertEquals(dvExpected, onlyDocValues.rangeQuery("foo", "bar", true, false, null, null, null, MOCK_QSC)); + + MappedFieldType unsearchable = new KeywordFieldType("field", false, false, Collections.emptyMap()); + IllegalArgumentException e = expectThrows( + IllegalArgumentException.class, + () -> unsearchable.rangeQuery("foo", "bar", true, false, null, null, null, MOCK_QSC) + ); + assertEquals( - new TermRangeQuery("field", BytesRefs.toBytesRef("foo"), BytesRefs.toBytesRef("bar"), true, false), - ft.rangeQuery("foo", "bar", true, false, null, null, null, MOCK_QSC) + "Cannot search on field [field] since it is both not indexed, and does not have doc_values " + "enabled.", + e.getMessage() ); OpenSearchException ee = expectThrows( @@ -175,16 +221,37 @@ public void testRangeQuery() { public void testRegexpQuery() { MappedFieldType ft = new KeywordFieldType("field"); assertEquals( - new RegexpQuery(new Term("field", "foo.*")), + new IndexOrDocValuesQuery( + new RegexpQuery(new Term("field", "foo.*")), + new RegexpQuery(new Term("field", "foo.*"), 0, 0, RegexpQuery.DEFAULT_PROVIDER, 10, MultiTermQuery.DOC_VALUES_REWRITE) + ), ft.regexpQuery("foo.*", 0, 0, 10, MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE, MOCK_QSC) ); - MappedFieldType unsearchable = new KeywordFieldType("field", false, true, Collections.emptyMap()); + Query indexExpected = new RegexpQuery(new Term("field", "foo.*")); + MappedFieldType onlyIndexed = new KeywordFieldType("field", true, false, Collections.emptyMap()); + assertEquals(indexExpected, onlyIndexed.regexpQuery("foo.*", 0, 0, 10, MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE, MOCK_QSC)); + + Query dvExpected = new RegexpQuery( + new Term("field", "foo.*"), + 0, + 0, + RegexpQuery.DEFAULT_PROVIDER, + 10, + MultiTermQuery.DOC_VALUES_REWRITE + ); + MappedFieldType onlyDocValues = new KeywordFieldType("field", false, true, Collections.emptyMap()); + assertEquals(dvExpected, onlyDocValues.regexpQuery("foo.*", 0, 0, 10, MultiTermQuery.DOC_VALUES_REWRITE, MOCK_QSC)); + + MappedFieldType unsearchable = new KeywordFieldType("field", false, false, Collections.emptyMap()); IllegalArgumentException e = expectThrows( IllegalArgumentException.class, () -> unsearchable.regexpQuery("foo.*", 0, 0, 10, null, MOCK_QSC) ); - assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage()); + assertEquals( + "Cannot search on field [field] since it is both not indexed, and does not have doc_values " + "enabled.", + e.getMessage() + ); OpenSearchException ee = expectThrows( OpenSearchException.class, @@ -200,12 +267,26 @@ public void testFuzzyQuery() { ft.fuzzyQuery("foo", Fuzziness.fromEdits(2), 1, 50, true, MOCK_QSC) ); - MappedFieldType unsearchable = new KeywordFieldType("field", false, true, Collections.emptyMap()); + Query indexExpected = new FuzzyQuery(new Term("field", "foo"), 2, 1, 50, true); + MappedFieldType onlyIndexed = new KeywordFieldType("field", true, false, Collections.emptyMap()); + assertEquals(indexExpected, onlyIndexed.fuzzyQuery("foo", Fuzziness.fromEdits(2), 1, 50, true, MOCK_QSC)); + + Query dvExpected = new FuzzyQuery(new Term("field", "foo"), 2, 1, 50, true, MultiTermQuery.DOC_VALUES_REWRITE); + MappedFieldType onlyDocValues = new KeywordFieldType("field", false, true, Collections.emptyMap()); + assertEquals( + dvExpected, + onlyDocValues.fuzzyQuery("foo", Fuzziness.fromEdits(2), 1, 50, true, MultiTermQuery.DOC_VALUES_REWRITE, MOCK_QSC) + ); + + MappedFieldType unsearchable = new KeywordFieldType("field", false, false, Collections.emptyMap()); IllegalArgumentException e = expectThrows( IllegalArgumentException.class, - () -> unsearchable.fuzzyQuery("foo", Fuzziness.fromEdits(2), 1, 50, true, MOCK_QSC) + () -> unsearchable.fuzzyQuery("foo", Fuzziness.fromEdits(2), 1, 50, true, MultiTermQuery.DOC_VALUES_REWRITE, MOCK_QSC) + ); + assertEquals( + "Cannot search on field [field] since it is both not indexed, and does not have doc_values " + "enabled.", + e.getMessage() ); - assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage()); OpenSearchException ee = expectThrows( OpenSearchException.class, @@ -214,6 +295,47 @@ public void testFuzzyQuery() { assertEquals("[fuzzy] queries cannot be executed when 'search.allow_expensive_queries' is set to false.", ee.getMessage()); } + public void testWildCardQuery() { + MappedFieldType ft = new KeywordFieldType("field"); + Query expected = new IndexOrDocValuesQuery( + new WildcardQuery(new Term("field", new BytesRef("foo*"))), + new WildcardQuery( + new Term("field", new BytesRef("foo*")), + Operations.DEFAULT_DETERMINIZE_WORK_LIMIT, + MultiTermQuery.DOC_VALUES_REWRITE + ) + ); + assertEquals(expected, ft.wildcardQuery("foo*", MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE, MOCK_QSC)); + + Query indexExpected = new WildcardQuery(new Term("field", new BytesRef("foo*"))); + MappedFieldType onlyIndexed = new KeywordFieldType("field", true, false, Collections.emptyMap()); + assertEquals(indexExpected, onlyIndexed.wildcardQuery("foo*", MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE, MOCK_QSC)); + + Query dvExpected = new WildcardQuery( + new Term("field", new BytesRef("foo*")), + Operations.DEFAULT_DETERMINIZE_WORK_LIMIT, + MultiTermQuery.DOC_VALUES_REWRITE + ); + MappedFieldType onlyDocValues = new KeywordFieldType("field", false, true, Collections.emptyMap()); + assertEquals(dvExpected, onlyDocValues.wildcardQuery("foo*", MultiTermQuery.DOC_VALUES_REWRITE, MOCK_QSC)); + + MappedFieldType unsearchable = new KeywordFieldType("field", false, false, Collections.emptyMap()); + IllegalArgumentException e = expectThrows( + IllegalArgumentException.class, + () -> unsearchable.wildcardQuery("foo*", MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE, MOCK_QSC) + ); + assertEquals( + "Cannot search on field [field] since it is both not indexed, and does not have doc_values " + "enabled.", + e.getMessage() + ); + + OpenSearchException ee = expectThrows( + OpenSearchException.class, + () -> ft.wildcardQuery("foo*", MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE, MOCK_QSC_DISALLOW_EXPENSIVE) + ); + assertEquals("[wildcard] queries cannot be executed when 'search.allow_expensive_queries' is set to false.", ee.getMessage()); + } + public void testNormalizeQueries() { MappedFieldType ft = new KeywordFieldType("field"); assertEquals(new TermQuery(new Term("field", new BytesRef("FOO"))), ft.termQuery("FOO", null));