From a494ddec3f4dc8145dcc2ddf41f9f5f635bf4711 Mon Sep 17 00:00:00 2001 From: kkewwei Date: Sat, 3 Aug 2024 02:51:55 +0800 Subject: [PATCH] support rangeQuery and regexpQuery in constant_keyword field type (#14711) --------- Signed-off-by: kkewwei --- CHANGELOG.md | 1 + .../test/index/110_constant_keyword.yml | 282 +++++++++++++++++- .../index/mapper/ConstantFieldType.java | 2 +- .../mapper/ConstantKeywordFieldMapper.java | 66 ++++ .../mapper/ConstantKeywordFieldTypeTests.java | 54 ++++ 5 files changed, 394 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 11f21afd5fadf..abf7c5018bb9f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -39,6 +39,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Add basic aggregation support for derived fields ([#14618](https://github.com/opensearch-project/OpenSearch/pull/14618)) - Add ThreadContextPermission for markAsSystemContext and allow core to perform the method ([#15016](https://github.com/opensearch-project/OpenSearch/pull/15016)) - Add ThreadContextPermission for stashAndMergeHeaders and stashWithOrigin ([#15039](https://github.com/opensearch-project/OpenSearch/pull/15039)) +- Add `rangeQuery` and `regexpQuery` for `constant_keyword` field type ([#14711](https://github.com/opensearch-project/OpenSearch/pull/14711)) - [Range Queries] Add new approximateable query framework to short-circuit range queries ([#13788](https://github.com/opensearch-project/OpenSearch/pull/13788)) ### Dependencies diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/index/110_constant_keyword.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/index/110_constant_keyword.yml index f4f8b3752bec8..1c50187534026 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/index/110_constant_keyword.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/index/110_constant_keyword.yml @@ -1,17 +1,13 @@ +# The test setup includes two parts: +# part1: test mapping and indexing +# part2: test query --- -# The test setup includes: -# - Create index with constant_keyword field type -# - Check mapping -# - Index two example documents -# - Search -# - Delete Index when connection is teardown - -"Mappings and Supported queries": +"Mappings and Indexing": - skip: version: " - 2.15.99" reason: "fixed in 2.16.0" - # Create index with constant_keyword field type + # Create indices with constant_keyword field type - do: indices.create: index: test @@ -22,7 +18,7 @@ type: "constant_keyword" value: "1" - # Index document + # Index documents to test integer and string are both ok. - do: index: index: test @@ -39,6 +35,7 @@ "genre": 1 } + # Refresh - do: indices.refresh: index: test @@ -54,6 +51,7 @@ # Verify Document Count - do: search: + index: test body: { query: { match_all: {} @@ -68,3 +66,267 @@ - do: indices.delete: index: test + +--- +"Queries": + - skip: + version: " - 2.99.99" + reason: "rangeQuery and regexpQuery are supported in 3.0.0 in main branch" + + - do: + indices.create: + index: test1 + body: + mappings: + properties: + genre: + type: "constant_keyword" + value: "d3efault" + + # Index documents to test query. + - do: + index: + index: test1 + id: 1 + body: { + "genre": "d3efault" + } + + # Refresh + - do: + indices.refresh: + index: test1 + + # Test rangeQuery + - do: + search: + index: test1 + body: { + query: { + range: { + genre: { + gte: "d3efault" + } + } + } + } + + - length: { hits.hits: 1 } + + - do: + search: + index: test1 + body: { + query: { + range: { + genre: { + from: "d3efault", + "include_lower": "false" + } + } + } + } + + - length: { hits.hits: 0 } + + - do: + search: + index: test1 + body: { + query: { + range: { + genre: { + lte: "d3efault" + } + } + } + } + + - length: { hits.hits: 1 } + + - do: + search: + index: test1 + body: { + query: { + range: { + genre: { + to: "d3efault", + include_upper: "false" + } + } + } + } + + - length: { hits.hits: 0 } + + - do: + search: + index: test1 + body: { + query: { + range: { + genre: { + from: "d3efault", + to: "d3efault", + include_lower: "false", + include_upper: "true" + } + } + } + } + + - length: { hits.hits: 0 } + + - do: + search: + index: test1 + body: { + query: { + range: { + genre: { + from: "d3efault", + to: "d3efault", + include_lower: "true", + include_upper: "false" + } + } + } + } + + - length: { hits.hits: 0 } + + - do: + search: + index: test1 + body: { + query: { + range: { + genre: { + from: null, + to: null + } + } + } + } + + - length: { hits.hits: 1 } + + - do: + search: + index: test1 + body: { + query: { + range: { + genre: { + from: "d3efault", + to: "d3efault", + include_lower: "true", + include_upper: "true" + } + } + } + } + + - length: { hits.hits: 1 } + + - do: + search: + index: test1 + body: { + query: { + range: { + genre: { + from: "d3efaul", + to: "d3efault1", + include_lower: "true", + include_upper: "true" + } + } + } + } + + - length: { hits.hits: 1 } + + # Test regexpQuery + - do: + search: + index: test1 + body: { + query: { + regexp: { + "genre":"d.*" + } + } + } + + - length: { hits.hits: 1 } + + - do: + search: + index: test1 + body: { + query: { + regexp: { + "genre":"d\\defau[a-z]?t" + } + } + } + + - length: { hits.hits: 1 } + + - do: + search: + index: test1 + body: { + query: { + regexp: { + "genre":"d\\defa[a-z]?t" + } + } + } + + - length: { hits.hits: 0 } + + - do: + search: + index: test1 + body: { + query: { + regexp: { + "genre":"d3efa[a-z]{3,3}" + } + } + } + + - length: { hits.hits: 1 } + + - do: + search: + index: test1 + body: { + query: { + regexp: { + "genre":"d3efa[a-z]{4,4}" + } + } + } + + - length: { hits.hits: 0 } + + - do: + search: + index: test1 + body: { + query: { + match_all: {} + } + } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.genre: "d3efault" } + + # Delete Index when connection is teardown + - do: + indices.delete: + index: test1 diff --git a/server/src/main/java/org/opensearch/index/mapper/ConstantFieldType.java b/server/src/main/java/org/opensearch/index/mapper/ConstantFieldType.java index a28a6369b1aa4..cc581651e5295 100644 --- a/server/src/main/java/org/opensearch/index/mapper/ConstantFieldType.java +++ b/server/src/main/java/org/opensearch/index/mapper/ConstantFieldType.java @@ -76,7 +76,7 @@ public final boolean isAggregatable() { */ protected abstract boolean matches(String pattern, boolean caseInsensitive, QueryShardContext context); - private static String valueToString(Object value) { + static String valueToString(Object value) { return value instanceof BytesRef ? ((BytesRef) value).utf8ToString() : value.toString(); } diff --git a/server/src/main/java/org/opensearch/index/mapper/ConstantKeywordFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/ConstantKeywordFieldMapper.java index 2edd817f61f61..02c2214c18e72 100644 --- a/server/src/main/java/org/opensearch/index/mapper/ConstantKeywordFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/ConstantKeywordFieldMapper.java @@ -9,10 +9,21 @@ package org.opensearch.index.mapper; import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.MatchNoDocsQuery; +import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.Query; +import org.apache.lucene.search.RegexpQuery; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.automaton.Automaton; +import org.apache.lucene.util.automaton.ByteRunAutomaton; +import org.apache.lucene.util.automaton.RegExp; import org.opensearch.OpenSearchParseException; +import org.opensearch.common.Nullable; import org.opensearch.common.annotation.PublicApi; +import org.opensearch.common.geo.ShapeRelation; +import org.opensearch.common.lucene.BytesRefs; import org.opensearch.common.regex.Regex; +import org.opensearch.common.time.DateMathParser; import org.opensearch.index.fielddata.IndexFieldData; import org.opensearch.index.fielddata.plain.ConstantIndexFieldData; import org.opensearch.index.query.QueryShardContext; @@ -20,6 +31,7 @@ import org.opensearch.search.lookup.SearchLookup; import java.io.IOException; +import java.time.ZoneId; import java.util.Arrays; import java.util.Collections; import java.util.List; @@ -122,6 +134,60 @@ public Query existsQuery(QueryShardContext context) { return new MatchAllDocsQuery(); } + @Override + public Query rangeQuery( + Object lowerTerm, + Object upperTerm, + boolean includeLower, + boolean includeUpper, + ShapeRelation relation, + ZoneId timeZone, + DateMathParser parser, + QueryShardContext context + ) { + if (lowerTerm != null) { + lowerTerm = valueToString(lowerTerm); + } + if (upperTerm != null) { + upperTerm = valueToString(upperTerm); + } + + if (lowerTerm != null && upperTerm != null && ((String) lowerTerm).compareTo((String) upperTerm) > 0) { + return new MatchNoDocsQuery(); + } + + if (lowerTerm != null && ((String) lowerTerm).compareTo(value) > (includeLower ? 0 : -1)) { + return new MatchNoDocsQuery(); + } + + if (upperTerm != null && ((String) upperTerm).compareTo(value) < (includeUpper ? 0 : 1)) { + return new MatchNoDocsQuery(); + } + return new MatchAllDocsQuery(); + } + + @Override + public Query regexpQuery( + String value, + int syntaxFlags, + int matchFlags, + int maxDeterminizedStates, + @Nullable MultiTermQuery.RewriteMethod method, + QueryShardContext context + ) { + Automaton automaton = new RegExp(value, syntaxFlags, matchFlags).toAutomaton( + RegexpQuery.DEFAULT_PROVIDER, + maxDeterminizedStates + ); + ByteRunAutomaton byteRunAutomaton = new ByteRunAutomaton(automaton); + BytesRef valueBytes = BytesRefs.toBytesRef(this.value); + if (byteRunAutomaton.run(valueBytes.bytes, valueBytes.offset, valueBytes.length)) { + return new MatchAllDocsQuery(); + } else { + return new MatchNoDocsQuery(); + } + } + @Override public IndexFieldData.Builder fielddataBuilder(String fullyQualifiedIndexName, Supplier searchLookup) { return new ConstantIndexFieldData.Builder(fullyQualifiedIndexName, name(), CoreValuesSourceType.BYTES); diff --git a/server/src/test/java/org/opensearch/index/mapper/ConstantKeywordFieldTypeTests.java b/server/src/test/java/org/opensearch/index/mapper/ConstantKeywordFieldTypeTests.java index 235811539a299..266d79fb8e8b8 100644 --- a/server/src/test/java/org/opensearch/index/mapper/ConstantKeywordFieldTypeTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/ConstantKeywordFieldTypeTests.java @@ -10,6 +10,8 @@ import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.MatchNoDocsQuery; +import org.apache.lucene.search.MultiTermQuery; +import org.apache.lucene.search.Query; import org.opensearch.Version; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.common.regex.Regex; @@ -61,6 +63,58 @@ public void testExistsQuery() { assertEquals(new MatchAllDocsQuery(), ft.existsQuery(createContext())); } + public void testRangeQuery() { + Query actual = ft.rangeQuery("default", null, true, false, null, null, null, MOCK_QSC); + assertEquals(new MatchAllDocsQuery(), actual); + + actual = ft.rangeQuery("default", null, false, false, null, null, null, MOCK_QSC); + assertEquals(new MatchNoDocsQuery(), actual); + + actual = ft.rangeQuery(null, "default", true, true, null, null, null, MOCK_QSC); + assertEquals(new MatchAllDocsQuery(), actual); + + actual = ft.rangeQuery(null, "default", false, false, null, null, null, MOCK_QSC); + assertEquals(new MatchNoDocsQuery(), actual); + + actual = ft.rangeQuery("default", "default", false, true, null, null, null, MOCK_QSC); + assertEquals(new MatchNoDocsQuery(), actual); + + actual = ft.rangeQuery("default", "default", true, false, null, null, null, MOCK_QSC); + assertEquals(new MatchNoDocsQuery(), actual); + + actual = ft.rangeQuery(null, null, false, false, null, null, null, MOCK_QSC); + assertEquals(new MatchAllDocsQuery(), actual); + + actual = ft.rangeQuery("default", "default", true, true, null, null, null, MOCK_QSC); + assertEquals(new MatchAllDocsQuery(), actual); + + actual = ft.rangeQuery("defaul", "default1", true, true, null, null, null, MOCK_QSC); + assertEquals(new MatchAllDocsQuery(), actual); + } + + public void testRegexpQuery() { + final ConstantKeywordFieldMapper.ConstantKeywordFieldType ft = new ConstantKeywordFieldMapper.ConstantKeywordFieldType( + "field", + "d3efault" + ); + // test .* + Query query = ft.regexpQuery("d.*", 0, 0, 10, MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE, MOCK_QSC); + assertEquals(new MatchAllDocsQuery(), query); + // test \d and ? + query = ft.regexpQuery("d\\defau[a-z]?t", 0, 0, 10, MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE, MOCK_QSC); + assertEquals(new MatchAllDocsQuery(), query); + + // test \d and ? + query = ft.regexpQuery("d\\defa[a-z]?t", 0, 0, 10, MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE, MOCK_QSC); + assertEquals(new MatchNoDocsQuery(), query); + // \w{m,n} + query = ft.regexpQuery("d3efa[a-z]{3,3}", 0, 0, 10, MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE, MOCK_QSC); + assertEquals(new MatchAllDocsQuery(), query); + // \w{m,n} + query = ft.regexpQuery("d3efa[a-z]{4,4}", 0, 0, 10, MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE, MOCK_QSC); + assertEquals(new MatchNoDocsQuery(), query); + } + private QueryShardContext createContext() { IndexMetadata indexMetadata = IndexMetadata.builder("index") .settings(Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT))