diff --git a/CHANGELOG.md b/CHANGELOG.md index 026606ff57d65..2303ac4eba673 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -176,6 +176,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Performance improvement for date histogram aggregations without sub-aggregations ([#11083](https://github.com/opensearch-project/OpenSearch/pull/11083)) - Disable concurrent aggs for Diversified Sampler and Sampler aggs ([#11087](https://github.com/opensearch-project/OpenSearch/issues/11087)) - Made leader/follower check timeout setting dynamic ([#10528](https://github.com/opensearch-project/OpenSearch/pull/10528)) +- Improved performance of numeric exact-match queries ([#11209](https://github.com/opensearch-project/OpenSearch/pull/11209)) - Change error message when per shard document limit is breached ([#11312](https://github.com/opensearch-project/OpenSearch/pull/11312)) - Improve boolean parsing performance ([#11308](https://github.com/opensearch-project/OpenSearch/pull/11308)) - Interpret byte array as primitive using VarHandles ([#11362](https://github.com/opensearch-project/OpenSearch/pull/11362)) diff --git a/modules/mapper-extras/src/main/java/org/opensearch/index/mapper/ScaledFloatFieldMapper.java b/modules/mapper-extras/src/main/java/org/opensearch/index/mapper/ScaledFloatFieldMapper.java index 7be241017f683..400d867296e5f 100644 --- a/modules/mapper-extras/src/main/java/org/opensearch/index/mapper/ScaledFloatFieldMapper.java +++ b/modules/mapper-extras/src/main/java/org/opensearch/index/mapper/ScaledFloatFieldMapper.java @@ -199,9 +199,9 @@ public String typeName() { @Override public Query termQuery(Object value, QueryShardContext context) { - failIfNotIndexed(); + failIfNotIndexedAndNoDocValues(); long scaledValue = Math.round(scale(value)); - Query query = NumberFieldMapper.NumberType.LONG.termQuery(name(), scaledValue); + Query query = NumberFieldMapper.NumberType.LONG.termQuery(name(), scaledValue, hasDocValues(), isSearchable()); if (boost() != 1f) { query = new BoostQuery(query, boost()); } @@ -210,13 +210,18 @@ public Query termQuery(Object value, QueryShardContext context) { @Override public Query termsQuery(List values, QueryShardContext context) { - failIfNotIndexed(); + failIfNotIndexedAndNoDocValues(); List scaledValues = new ArrayList<>(values.size()); for (Object value : values) { long scaledValue = Math.round(scale(value)); scaledValues.add(scaledValue); } - Query query = NumberFieldMapper.NumberType.LONG.termsQuery(name(), Collections.unmodifiableList(scaledValues)); + Query query = NumberFieldMapper.NumberType.LONG.termsQuery( + name(), + Collections.unmodifiableList(scaledValues), + hasDocValues(), + isSearchable() + ); if (boost() != 1f) { query = new BoostQuery(query, boost()); } @@ -225,7 +230,7 @@ public Query termsQuery(List values, QueryShardContext context) { @Override public Query rangeQuery(Object lowerTerm, Object upperTerm, boolean includeLower, boolean includeUpper, QueryShardContext context) { - failIfNotIndexed(); + failIfNotIndexedAndNoDocValues(); Long lo = null; if (lowerTerm != null) { double dValue = scale(lowerTerm); @@ -242,7 +247,7 @@ public Query rangeQuery(Object lowerTerm, Object upperTerm, boolean includeLower } hi = Math.round(Math.floor(dValue)); } - Query query = NumberFieldMapper.NumberType.LONG.rangeQuery(name(), lo, hi, true, true, hasDocValues(), context); + Query query = NumberFieldMapper.NumberType.LONG.rangeQuery(name(), lo, hi, true, true, hasDocValues(), isSearchable(), context); if (boost() != 1f) { query = new BoostQuery(query, boost()); } diff --git a/modules/mapper-extras/src/test/java/org/opensearch/index/mapper/ScaledFloatFieldTypeTests.java b/modules/mapper-extras/src/test/java/org/opensearch/index/mapper/ScaledFloatFieldTypeTests.java index be12c49321b87..d83811e6668eb 100644 --- a/modules/mapper-extras/src/test/java/org/opensearch/index/mapper/ScaledFloatFieldTypeTests.java +++ b/modules/mapper-extras/src/test/java/org/opensearch/index/mapper/ScaledFloatFieldTypeTests.java @@ -34,11 +34,13 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.DoublePoint; +import org.apache.lucene.document.LongField; import org.apache.lucene.document.LongPoint; import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.search.IndexOrDocValuesQuery; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.store.Directory; @@ -63,7 +65,9 @@ public void testTermQuery() { ); double value = (randomDouble() * 2 - 1) * 10000; long scaledValue = Math.round(value * ft.getScalingFactor()); - assertEquals(LongPoint.newExactQuery("scaled_float", scaledValue), ft.termQuery(value, null)); + Query dvQuery = SortedNumericDocValuesField.newSlowExactQuery("scaled_float", scaledValue); + Query query = new IndexOrDocValuesQuery(LongPoint.newExactQuery("scaled_float", scaledValue), dvQuery); + assertEquals(query, ft.termQuery(value, null)); } public void testTermsQuery() { @@ -75,7 +79,7 @@ public void testTermsQuery() { long scaledValue1 = Math.round(value1 * ft.getScalingFactor()); double value2 = (randomDouble() * 2 - 1) * 10000; long scaledValue2 = Math.round(value2 * ft.getScalingFactor()); - assertEquals(LongPoint.newSetQuery("scaled_float", scaledValue1, scaledValue2), ft.termsQuery(Arrays.asList(value1, value2), null)); + assertEquals(LongField.newSetQuery("scaled_float", scaledValue1, scaledValue2), ft.termsQuery(Arrays.asList(value1, value2), null)); } public void testRangeQuery() throws IOException { @@ -112,7 +116,16 @@ public void testRangeQuery() throws IOException { Double u = randomBoolean() ? null : (randomDouble() * 2 - 1) * 10000; boolean includeLower = randomBoolean(); boolean includeUpper = randomBoolean(); - Query doubleQ = NumberFieldMapper.NumberType.DOUBLE.rangeQuery("double", l, u, includeLower, includeUpper, false, MOCK_QSC); + Query doubleQ = NumberFieldMapper.NumberType.DOUBLE.rangeQuery( + "double", + l, + u, + includeLower, + includeUpper, + false, + true, + MOCK_QSC + ); Query scaledFloatQ = ft.rangeQuery(l, u, includeLower, includeUpper, MOCK_QSC); assertEquals(searcher.count(doubleQ), searcher.count(scaledFloatQ)); } diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/340_doc_values_field.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/340_doc_values_field.yml new file mode 100644 index 0000000000000..f3281e35ac8e6 --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/340_doc_values_field.yml @@ -0,0 +1,1147 @@ +setup: + - skip: + features: [ "headers" ] + version: " - 2.99.99" + reason: "searching with only doc_values was added in 3.0.0" +--- +"search on fields with both index and doc_values enabled": + - do: + indices.create: + index: test-iodvq + body: + mappings: + properties: + some_keyword: + type: keyword + index: true + doc_values: true + byte: + type: byte + index: true + doc_values: true + double: + type: double + index: true + doc_values: true + float: + type: float + index: true + doc_values: true + half_float: + type: half_float + index: true + doc_values: true + integer: + type: integer + index: true + doc_values: true + long: + type: long + index: true + doc_values: true + short: + type: short + index: true + doc_values: true + unsigned_long: + type: unsigned_long + index: true + doc_values: true + + - do: + bulk: + index: test-iodvq + refresh: true + body: + - '{"index": {"_index": "test-iodvq", "_id": "1" }}' + - '{ "some_keyword": "ingesting some random keyword data", "byte": 120, "double": 100.0, "float": "800.0", "half_float": "400.0", "integer": 1290, "long": 13456, "short": 150, "unsigned_long": 10223372036854775800 }' + - '{ "index": { "_index": "test-iodvq", "_id": "2" }}' + - '{ "some_keyword": "400", "byte": 121, "double": 101.0, "float": "801.0", "half_float": "401.0", "integer": 1291, "long": 13457, "short": 151, "unsigned_long": 10223372036854775801 }' + - '{ "index": { "_index": "test-iodvq", "_id": "3" } }' + - '{ "some_keyword": "5", "byte": 122, "double": 102.0, "float": "802.0", "half_float": "402.0", "integer": 1292, "long": 13458, "short": 152, "unsigned_long": 10223372036854775802 }' + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + prefix: + some_keyword: "ing" + + - match: { hits.hits.0._source.some_keyword: "ingesting some random keyword data" } + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + range: { + "some_keyword": { + "lt": 500 + } } + + - match: { hits.total: 2 } + + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + term: + half_float: 400.0 + + - match: { hits.total: 1 } + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + term: + float: 800.0 + + - match: { hits.total: 1 } + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + term: + double: 100.0 + + - match: { hits.total: 1 } + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + term: + byte: 120 + + - match: { hits.total: 1 } + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + term: + short: 150 + + - match: { hits.total: 1 } + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + term: + integer: 1291 + + - match: { hits.total: 1 } + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + term: + long: 13456 + + - match: { hits.total: 1 } + + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + term: + unsigned_long: 10223372036854775800 + + - match: { hits.total: 1 } + + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + terms: + half_float: [ 400.0, 401.0 ] + + - match: { hits.total: 2 } + + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + terms: + float: [ 800.0, 801.0 ] + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + terms: + byte: [ 120, 121 ] + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + terms: + double: [ 100.0, 101.0 ] + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + terms: + short: [ 150, 151 ] + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + terms: + integer: [ 1290, 1291 ] + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + terms: + long: [ 13456, 13457 ] + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + terms: + unsigned_long: [ 10223372036854775800, 10223372036854775801 ] + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + range: { + half_float: { + gte: 401.0, + lte: 402.0 + }, + } + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + range: { + float: { + gte: 801.0, + lte: 802.0 + }, + } + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + range: { + byte: { + gte: 120, + lte: 121 + }, + } + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + range: { + double: { + gte: 101.0, + lte: 102.0 + }, + } + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + range: { + short: { + gte: 151, + lte: 152 + }, + } + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + range: { + integer: { + gte: 1291, + lte: 1292 + }, + } + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + range: { + long: { + gte: 13457, + lte: 13458 + }, + } + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + range: { + unsigned_long: { + gte: 10223372036854775801, + lte: 10223372036854775802 + }, + } + + - match: { hits.total: 2 } + +--- +"search on fields with only index enabled": + - do: + indices.create: + index: test-index + body: + mappings: + properties: + some_keyword: + type: keyword + index: true + doc_values: false + byte: + type: byte + index: true + doc_values: false + double: + type: double + index: true + doc_values: false + float: + type: float + index: true + doc_values: false + half_float: + type: half_float + index: true + doc_values: false + integer: + type: integer + index: true + doc_values: false + long: + type: long + index: true + doc_values: false + short: + type: short + index: true + doc_values: false + unsigned_long: + type: unsigned_long + index: true + doc_values: false + + - do: + bulk: + index: test-index + refresh: true + body: + - '{"index": {"_index": "test-index", "_id": "1" }}' + - '{ "some_keyword": "ingesting some random keyword data", "byte": 120, "double": 100.0, "float": "800.0", "half_float": "400.0", "integer": 1290, "long": 13456, "short": 150, "unsigned_long": 10223372036854775800 }' + - '{ "index": { "_index": "test-index", "_id": "2" }}' + - '{ "some_keyword": "400", "byte": 121, "double": 101.0, "float": "801.0", "half_float": "401.0", "integer": 1291, "long": 13457, "short": 151, "unsigned_long": 10223372036854775801 }' + - '{ "index": { "_index": "test-index", "_id": "3" } }' + - '{ "some_keyword": "5", "byte": 122, "double": 102.0, "float": "802.0", "half_float": "402.0", "integer": 1292, "long": 13458, "short": 152, "unsigned_long": 10223372036854775802 }' + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + prefix: + some_keyword: "ing" + + - match: { hits.hits.0._source.some_keyword: "ingesting some random keyword data" } + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + range: { + "some_keyword": { + "lt": 500 + } } + + - match: { hits.total: 2 } + + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + term: + half_float: 400.0 + + - match: { hits.total: 1 } + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + term: + float: 800.0 + + - match: { hits.total: 1 } + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + term: + double: 100.0 + + - match: { hits.total: 1 } + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + term: + byte: 120 + + - match: { hits.total: 1 } + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + term: + short: 150 + + - match: { hits.total: 1 } + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + term: + integer: 1291 + + - match: { hits.total: 1 } + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + term: + long: 13456 + + - match: { hits.total: 1 } + + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + term: + unsigned_long: 10223372036854775800 + + - match: { hits.total: 1 } + + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + terms: + half_float: [ 400.0, 401.0 ] + + - match: { hits.total: 2 } + + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + terms: + float: [ 800.0, 801.0 ] + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + terms: + byte: [ 120, 121 ] + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + terms: + double: [ 100.0, 101.0 ] + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + terms: + short: [ 150, 151 ] + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + terms: + integer: [ 1290, 1291 ] + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + terms: + long: [ 13456, 13457 ] + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + terms: + unsigned_long: [ 10223372036854775800, 10223372036854775801 ] + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + range: { + half_float: { + gte: 401.0, + lte: 402.0 + }, + } + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + range: { + float: { + gte: 801.0, + lte: 802.0 + }, + } + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + range: { + byte: { + gte: 120, + lte: 121 + }, + } + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + range: { + double: { + gte: 101.0, + lte: 102.0 + }, + } + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + range: { + short: { + gte: 151, + lte: 152 + }, + } + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + range: { + integer: { + gte: 1291, + lte: 1292 + }, + } + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + range: { + long: { + gte: 13457, + lte: 13458 + }, + } + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + range: { + unsigned_long: { + gte: 10223372036854775801, + lte: 10223372036854775802 + }, + } + + - match: { hits.total: 2 } + +--- +"search on fields with only doc_values enabled": + - do: + indices.create: + index: test-doc-values + body: + mappings: + properties: + some_keyword: + type: keyword + index: false + doc_values: true + byte: + type: byte + index: false + doc_values: true + double: + type: double + index: false + doc_values: true + float: + type: float + index: false + doc_values: true + half_float: + type: half_float + index: false + doc_values: true + integer: + type: integer + index: false + doc_values: true + long: + type: long + index: false + doc_values: true + short: + type: short + index: false + doc_values: true + unsigned_long: + type: unsigned_long + index: false + doc_values: true + + - do: + bulk: + index: test-doc-values + refresh: true + body: + - '{"index": {"_index": "test-doc-values", "_id": "1" }}' + - '{ "some_keyword": "ingesting some random keyword data", "byte": 120, "double": 100.0, "float": "800.0", "half_float": "400.0", "integer": 1290, "long": 13456, "short": 150, "unsigned_long": 10223372036854775800 }' + - '{ "index": { "_index": "test-doc-values", "_id": "2" }}' + - '{ "some_keyword": "400", "byte": 121, "double": 101.0, "float": "801.0", "half_float": "401.0", "integer": 1291, "long": 13457, "short": 151, "unsigned_long": 10223372036854775801 }' + - '{ "index": { "_index": "test-doc-values", "_id": "3" } }' + - '{ "some_keyword": "5", "byte": 122, "double": 102.0, "float": "802.0", "half_float": "402.0", "integer": 1292, "long": 13458, "short": 152, "unsigned_long": 10223372036854775802 }' + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + prefix: + some_keyword: "ing" + + - match: { hits.hits.0._source.some_keyword: "ingesting some random keyword data" } + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + range: { + "some_keyword": { + "lt": 500 + } } + + - match: { hits.total: 2 } + + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + term: + half_float: 400.0 + + - match: { hits.total: 1 } + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + term: + float: 800.0 + + - match: { hits.total: 1 } + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + term: + double: 100.0 + + - match: { hits.total: 1 } + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + term: + byte: 120 + + - match: { hits.total: 1 } + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + term: + short: 150 + + - match: { hits.total: 1 } + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + term: + integer: 1291 + + - match: { hits.total: 1 } + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + term: + long: 13456 + + - match: { hits.total: 1 } + + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + term: + unsigned_long: 10223372036854775800 + + - match: { hits.total: 1 } + + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + terms: + half_float: [ 400.0, 401.0 ] + + - match: { hits.total: 2 } + + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + terms: + float: [ 800.0, 801.0 ] + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + terms: + byte: [ 120, 121 ] + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + terms: + double: [ 100.0, 101.0 ] + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + terms: + short: [ 150, 151 ] + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + terms: + integer: [ 1290, 1291 ] + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + terms: + long: [ 13456, 13457 ] + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + terms: + unsigned_long: [ 10223372036854775800, 10223372036854775801 ] + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + range: { + half_float: { + gte: 401.0, + lte: 402.0 + }, + } + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + range: { + float: { + gte: 801.0, + lte: 802.0 + }, + } + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + range: { + byte: { + gte: 120, + lte: 121 + }, + } + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + range: { + double: { + gte: 101.0, + lte: 102.0 + }, + } + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + range: { + short: { + gte: 151, + lte: 152 + }, + } + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + range: { + integer: { + gte: 1291, + lte: 1292 + }, + } + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + range: { + long: { + gte: 13457, + lte: 13458 + }, + } + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + range: { + unsigned_long: { + gte: 10223372036854775801, + lte: 10223372036854775802 + }, + } + + - match: { hits.total: 2 } diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/340_keyword_doc_values.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/340_keyword_doc_values.yml deleted file mode 100644 index 8829e7b100fdd..0000000000000 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search/340_keyword_doc_values.yml +++ /dev/null @@ -1,46 +0,0 @@ ---- -"search on keyword fields with doc_values enabled": - - do: - indices.create: - index: test - body: - mappings: - properties: - "some_keyword": - type: "keyword" - index: true - doc_values: true - - - do: - bulk: - index: test - refresh: true - body: - - '{"index": {"_index": "test", "_id": "1" }}' - - '{ "some_keyword": "ingesting some random keyword data" }' - - '{ "index": { "_index": "test", "_id": "2" }}' - - '{ "some_keyword": "400" }' - - '{ "index": { "_index": "test", "_id": "3" } }' - - '{ "some_keyword": "5" }' - - - do: - search: - index: test - body: - query: - prefix: - some_keyword: "ing" - - - match: { hits.hits.0._source.some_keyword: "ingesting some random keyword data" } - - - do: - search: - index: test - body: - query: - range: { - "some_keyword": { - "lt": 500 - } } - - - match: { hits.total.value: 2 } diff --git a/server/src/main/java/org/apache/lucene/util/LongHashSet.java b/server/src/main/java/org/apache/lucene/util/LongHashSet.java new file mode 100644 index 0000000000000..a463e8a189585 --- /dev/null +++ b/server/src/main/java/org/apache/lucene/util/LongHashSet.java @@ -0,0 +1,136 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.apache.lucene.util; + +import org.apache.lucene.util.packed.PackedInts; + +import java.util.Arrays; +import java.util.Objects; +import java.util.stream.Collectors; +import java.util.stream.LongStream; + +/** Set of longs, optimized for docvalues usage */ +public final class LongHashSet implements Accountable { + private static final long BASE_RAM_BYTES = RamUsageEstimator.shallowSizeOfInstance(LongHashSet.class); + + private static final long MISSING = Long.MIN_VALUE; + + final long[] table; + final int mask; + final boolean hasMissingValue; + final int size; + /** minimum value in the set, or Long.MAX_VALUE for an empty set */ + public final long minValue; + /** maximum value in the set, or Long.MIN_VALUE for an empty set */ + public final long maxValue; + + /** Construct a set. Values must be in sorted order. */ + public LongHashSet(long[] values) { + int tableSize = Math.toIntExact(values.length * 3L / 2); + tableSize = 1 << PackedInts.bitsRequired(tableSize); // make it a power of 2 + assert tableSize >= values.length * 3L / 2; + table = new long[tableSize]; + Arrays.fill(table, MISSING); + mask = tableSize - 1; + boolean hasMissingValue = false; + int size = 0; + long previousValue = Long.MIN_VALUE; // for assert + for (long value : values) { + if (value == MISSING) { + size += hasMissingValue ? 0 : 1; + hasMissingValue = true; + } else if (add(value)) { + ++size; + } + assert value >= previousValue : "values must be provided in sorted order"; + previousValue = value; + } + this.hasMissingValue = hasMissingValue; + this.size = size; + this.minValue = values.length == 0 ? Long.MAX_VALUE : values[0]; + this.maxValue = values.length == 0 ? Long.MIN_VALUE : values[values.length - 1]; + } + + private boolean add(long l) { + assert l != MISSING; + final int slot = Long.hashCode(l) & mask; + for (int i = slot;; i = (i + 1) & mask) { + if (table[i] == MISSING) { + table[i] = l; + return true; + } else if (table[i] == l) { + // already added + return false; + } + } + } + + /** + * check for membership in the set. + * + *

You should use {@link #minValue} and {@link #maxValue} to guide/terminate iteration before + * calling this. + */ + public boolean contains(long l) { + if (l == MISSING) { + return hasMissingValue; + } + final int slot = Long.hashCode(l) & mask; + for (int i = slot;; i = (i + 1) & mask) { + if (table[i] == MISSING) { + return false; + } else if (table[i] == l) { + return true; + } + } + } + + /** returns a stream of all values contained in this set */ + LongStream stream() { + LongStream stream = Arrays.stream(table).filter(v -> v != MISSING); + if (hasMissingValue) { + stream = LongStream.concat(LongStream.of(MISSING), stream); + } + return stream; + } + + @Override + public int hashCode() { + return Objects.hash(size, minValue, maxValue, mask, hasMissingValue, Arrays.hashCode(table)); + } + + @Override + public boolean equals(Object obj) { + if (obj != null && obj instanceof LongHashSet) { + LongHashSet that = (LongHashSet) obj; + return size == that.size + && minValue == that.minValue + && maxValue == that.maxValue + && mask == that.mask + && hasMissingValue == that.hasMissingValue + && Arrays.equals(table, that.table); + } + return false; + } + + @Override + public String toString() { + return stream().mapToObj(String::valueOf).collect(Collectors.joining(", ", "[", "]")); + } + + /** number of elements in the set */ + public int size() { + return size; + } + + @Override + public long ramBytesUsed() { + return BASE_RAM_BYTES + RamUsageEstimator.sizeOfObject(table); + } +} diff --git a/server/src/main/java/org/opensearch/index/document/SortedUnsignedLongDocValuesSetQuery.java b/server/src/main/java/org/opensearch/index/document/SortedUnsignedLongDocValuesSetQuery.java new file mode 100644 index 0000000000000..669dbb1e1bfc7 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/document/SortedUnsignedLongDocValuesSetQuery.java @@ -0,0 +1,176 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.document; + +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.SortedNumericDocValues; +import org.apache.lucene.search.ConstantScoreScorer; +import org.apache.lucene.search.ConstantScoreWeight; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchNoDocsQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.QueryVisitor; +import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.TwoPhaseIterator; +import org.apache.lucene.search.Weight; +import org.apache.lucene.util.LongHashSet; + +import java.io.IOException; +import java.math.BigInteger; +import java.util.Arrays; +import java.util.Objects; + +/** + * The {@link org.apache.lucene.document.SortedNumericDocValuesSetQuery} implementation for unsigned long numeric data type. + * + * @opensearch.internal + */ +public abstract class SortedUnsignedLongDocValuesSetQuery extends Query { + + private final String field; + private final LongHashSet numbers; + + SortedUnsignedLongDocValuesSetQuery(String field, BigInteger[] numbers) { + this.field = Objects.requireNonNull(field); + Arrays.sort(numbers); + this.numbers = new LongHashSet(Arrays.stream(numbers).mapToLong(n -> n.longValue()).toArray()); + } + + @Override + public String toString(String field) { + return new StringBuilder().append(field).append(": ").append(numbers.toString()).toString(); + } + + @Override + public void visit(QueryVisitor visitor) { + if (visitor.acceptField(field)) { + visitor.visitLeaf(this); + } + } + + @Override + public Query rewrite(IndexSearcher indexSearcher) throws IOException { + if (numbers.size() == 0) { + return new MatchNoDocsQuery(); + } + return super.rewrite(indexSearcher); + } + + @Override + public boolean equals(Object other) { + if (sameClassAs(other) == false) { + return false; + } + SortedUnsignedLongDocValuesSetQuery that = (SortedUnsignedLongDocValuesSetQuery) other; + return field.equals(that.field) && numbers.equals(that.numbers); + } + + @Override + public int hashCode() { + return Objects.hash(classHash(), field, numbers); + } + + abstract SortedNumericDocValues getValues(LeafReader reader, String field) throws IOException; + + @Override + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + return new ConstantScoreWeight(this, boost) { + + @Override + public boolean isCacheable(LeafReaderContext ctx) { + return DocValues.isCacheable(ctx, field); + } + + @Override + public Scorer scorer(LeafReaderContext context) throws IOException { + SortedNumericDocValues values = getValues(context.reader(), field); + if (values == null) { + return null; + } + final NumericDocValues singleton = DocValues.unwrapSingleton(values); + final TwoPhaseIterator iterator; + if (singleton != null) { + iterator = new TwoPhaseIterator(singleton) { + @Override + public boolean matches() throws IOException { + long value = singleton.longValue(); + return Long.compareUnsigned(value, numbers.minValue) >= 0 + && Long.compareUnsigned(value, numbers.maxValue) <= 0 + && numbers.contains(value); + } + + @Override + public float matchCost() { + return 5; // 2 comparisions, possible lookup in the set + } + }; + } else { + iterator = new TwoPhaseIterator(values) { + @Override + public boolean matches() throws IOException { + int count = values.docValueCount(); + for (int i = 0; i < count; i++) { + final long value = values.nextValue(); + if (Long.compareUnsigned(value, numbers.minValue) < 0) { + continue; + } else if (Long.compareUnsigned(value, numbers.maxValue) > 0) { + return false; // values are sorted, terminate + } else if (numbers.contains(value)) { + return true; + } + } + return false; + } + + @Override + public float matchCost() { + return 5; // 2 comparisons, possible lookup in the set + } + }; + } + return new ConstantScoreScorer(this, score(), scoreMode, iterator); + } + }; + } + + public static Query newSlowSetQuery(String field, BigInteger... values) { + return new SortedUnsignedLongDocValuesSetQuery(field, values) { + @Override + SortedNumericDocValues getValues(LeafReader reader, String field) throws IOException { + FieldInfo info = reader.getFieldInfos().fieldInfo(field); + if (info == null) { + // Queries have some optimizations when one sub scorer returns null rather + // than a scorer that does not match any documents + return null; + } + return DocValues.getSortedNumeric(reader, field); + } + }; + } + + public static Query newSlowExactQuery(String field, BigInteger value) { + return new SortedUnsignedLongDocValuesRangeQuery(field, value, value) { + @Override + SortedNumericDocValues getValues(LeafReader reader, String field) throws IOException { + FieldInfo info = reader.getFieldInfos().fieldInfo(field); + if (info == null) { + // Queries have some optimizations when one sub scorer returns null rather + // than a scorer that does not match any documents + return null; + } + return DocValues.getSortedNumeric(reader, field); + } + }; + } +} diff --git a/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java index 204e7bc4c16ab..524d2b0e0dd38 100644 --- a/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java @@ -37,6 +37,7 @@ import org.apache.lucene.document.DoublePoint; import org.apache.lucene.document.Field; +import org.apache.lucene.document.FloatField; import org.apache.lucene.document.FloatPoint; import org.apache.lucene.document.IntPoint; import org.apache.lucene.document.LongPoint; @@ -61,6 +62,7 @@ import org.opensearch.core.xcontent.XContentParser; import org.opensearch.core.xcontent.XContentParser.Token; import org.opensearch.index.document.SortedUnsignedLongDocValuesRangeQuery; +import org.opensearch.index.document.SortedUnsignedLongDocValuesSetQuery; import org.opensearch.index.fielddata.IndexFieldData; import org.opensearch.index.fielddata.IndexNumericFieldData.NumericType; import org.opensearch.index.fielddata.plain.SortedNumericIndexFieldData; @@ -201,18 +203,39 @@ public Float parse(XContentParser parser, boolean coerce) throws IOException { } @Override - public Query termQuery(String field, Object value) { + public Query termQuery(String field, Object value, boolean hasDocValues, boolean isSearchable) { float v = parse(value, false); + if (isSearchable && hasDocValues) { + Query query = HalfFloatPoint.newExactQuery(field, v); + Query dvQuery = SortedNumericDocValuesField.newSlowExactQuery(field, HalfFloatPoint.halfFloatToSortableShort(v)); + return new IndexOrDocValuesQuery(query, dvQuery); + } + if (hasDocValues) { + return SortedNumericDocValuesField.newSlowExactQuery(field, HalfFloatPoint.halfFloatToSortableShort(v)); + } return HalfFloatPoint.newExactQuery(field, v); } @Override - public Query termsQuery(String field, List values) { + public Query termsQuery(String field, List values, boolean hasDocValues, boolean isSearchable) { float[] v = new float[values.size()]; + long points[] = new long[v.length]; for (int i = 0; i < values.size(); ++i) { v[i] = parse(values.get(i), false); + if (hasDocValues) { + points[i] = HalfFloatPoint.halfFloatToSortableShort(v[i]); + } + } + if (isSearchable && hasDocValues) { + Query query = HalfFloatPoint.newSetQuery(field, v); + Query dvQuery = SortedNumericDocValuesField.newSlowSetQuery(field, points); + return new IndexOrDocValuesQuery(query, dvQuery); + } + if (hasDocValues) { + return SortedNumericDocValuesField.newSlowSetQuery(field, points); } return HalfFloatPoint.newSetQuery(field, v); + } @Override @@ -223,6 +246,7 @@ public Query rangeQuery( boolean includeLower, boolean includeUpper, boolean hasDocValues, + boolean isSearchable, QueryShardContext context ) { float l = Float.NEGATIVE_INFINITY; @@ -241,16 +265,23 @@ public Query rangeQuery( } u = HalfFloatPoint.nextDown(u); } - Query query = HalfFloatPoint.newRangeQuery(field, l, u); - if (hasDocValues) { + if (isSearchable && hasDocValues) { + Query query = HalfFloatPoint.newRangeQuery(field, l, u); Query dvQuery = SortedNumericDocValuesField.newSlowRangeQuery( field, HalfFloatPoint.halfFloatToSortableShort(l), HalfFloatPoint.halfFloatToSortableShort(u) ); - query = new IndexOrDocValuesQuery(query, dvQuery); + return new IndexOrDocValuesQuery(query, dvQuery); + } + if (hasDocValues) { + return SortedNumericDocValuesField.newSlowRangeQuery( + field, + HalfFloatPoint.halfFloatToSortableShort(l), + HalfFloatPoint.halfFloatToSortableShort(u) + ); } - return query; + return HalfFloatPoint.newRangeQuery(field, l, u); } @Override @@ -309,18 +340,39 @@ public Float parse(XContentParser parser, boolean coerce) throws IOException { } @Override - public Query termQuery(String field, Object value) { + public Query termQuery(String field, Object value, boolean hasDocValues, boolean isSearchable) { float v = parse(value, false); + if (isSearchable && hasDocValues) { + Query query = FloatPoint.newExactQuery(field, v); + Query dvQuery = SortedNumericDocValuesField.newSlowExactQuery(field, NumericUtils.floatToSortableInt(v)); + return new IndexOrDocValuesQuery(query, dvQuery); + } + if (hasDocValues) { + return SortedNumericDocValuesField.newSlowExactQuery(field, NumericUtils.floatToSortableInt(v)); + } return FloatPoint.newExactQuery(field, v); } @Override - public Query termsQuery(String field, List values) { + public Query termsQuery(String field, List values, boolean hasDocValues, boolean isSearchable) { float[] v = new float[values.size()]; + long points[] = new long[v.length]; for (int i = 0; i < values.size(); ++i) { v[i] = parse(values.get(i), false); + if (hasDocValues) { + points[i] = NumericUtils.floatToSortableInt(v[i]); + } + } + if (isSearchable && hasDocValues) { + return new IndexOrDocValuesQuery( + FloatPoint.newSetQuery(field, v), + SortedNumericDocValuesField.newSlowSetQuery(field, points) + ); + } + if (hasDocValues) { + return SortedNumericDocValuesField.newSlowSetQuery(field, points); } - return FloatPoint.newSetQuery(field, v); + return FloatField.newSetQuery(field, v); } @Override @@ -331,6 +383,7 @@ public Query rangeQuery( boolean includeLower, boolean includeUpper, boolean hasDocValues, + boolean isSearchable, QueryShardContext context ) { float l = Float.NEGATIVE_INFINITY; @@ -347,16 +400,23 @@ public Query rangeQuery( u = FloatPoint.nextDown(u); } } - Query query = FloatPoint.newRangeQuery(field, l, u); - if (hasDocValues) { + if (isSearchable && hasDocValues) { + Query query = FloatPoint.newRangeQuery(field, l, u); Query dvQuery = SortedNumericDocValuesField.newSlowRangeQuery( field, NumericUtils.floatToSortableInt(l), NumericUtils.floatToSortableInt(u) ); - query = new IndexOrDocValuesQuery(query, dvQuery); + return new IndexOrDocValuesQuery(query, dvQuery); } - return query; + if (hasDocValues) { + return SortedNumericDocValuesField.newSlowRangeQuery( + field, + NumericUtils.floatToSortableInt(l), + NumericUtils.floatToSortableInt(u) + ); + } + return FloatPoint.newRangeQuery(field, l, u); } @Override @@ -406,16 +466,37 @@ public Double parse(XContentParser parser, boolean coerce) throws IOException { } @Override - public Query termQuery(String field, Object value) { + public Query termQuery(String field, Object value, boolean hasDocValues, boolean isSearchable) { double v = parse(value, false); + if (isSearchable && hasDocValues) { + Query query = DoublePoint.newExactQuery(field, v); + Query dvQuery = SortedNumericDocValuesField.newSlowExactQuery(field, NumericUtils.doubleToSortableLong(v)); + return new IndexOrDocValuesQuery(query, dvQuery); + } + if (hasDocValues) { + return SortedNumericDocValuesField.newSlowExactQuery(field, NumericUtils.doubleToSortableLong(v)); + } return DoublePoint.newExactQuery(field, v); } @Override - public Query termsQuery(String field, List values) { + public Query termsQuery(String field, List values, boolean hasDocValues, boolean isSearchable) { double[] v = new double[values.size()]; + long points[] = new long[v.length]; for (int i = 0; i < values.size(); ++i) { v[i] = parse(values.get(i), false); + if (hasDocValues) { + points[i] = NumericUtils.doubleToSortableLong(v[i]); + } + } + if (isSearchable && hasDocValues) { + return new IndexOrDocValuesQuery( + DoublePoint.newSetQuery(field, v), + SortedNumericDocValuesField.newSlowSetQuery(field, points) + ); + } + if (hasDocValues) { + return SortedNumericDocValuesField.newSlowSetQuery(field, points); } return DoublePoint.newSetQuery(field, v); } @@ -428,19 +509,27 @@ public Query rangeQuery( boolean includeLower, boolean includeUpper, boolean hasDocValues, + boolean isSearchable, QueryShardContext context ) { return doubleRangeQuery(lowerTerm, upperTerm, includeLower, includeUpper, (l, u) -> { - Query query = DoublePoint.newRangeQuery(field, l, u); - if (hasDocValues) { + if (isSearchable && hasDocValues) { + Query query = DoublePoint.newRangeQuery(field, l, u); Query dvQuery = SortedNumericDocValuesField.newSlowRangeQuery( field, NumericUtils.doubleToSortableLong(l), NumericUtils.doubleToSortableLong(u) ); - query = new IndexOrDocValuesQuery(query, dvQuery); + return new IndexOrDocValuesQuery(query, dvQuery); } - return query; + if (hasDocValues) { + return SortedNumericDocValuesField.newSlowRangeQuery( + field, + NumericUtils.doubleToSortableLong(l), + NumericUtils.doubleToSortableLong(u) + ); + } + return DoublePoint.newRangeQuery(field, l, u); }); } @@ -504,13 +593,13 @@ public Short parse(XContentParser parser, boolean coerce) throws IOException { } @Override - public Query termQuery(String field, Object value) { - return INTEGER.termQuery(field, value); + public Query termQuery(String field, Object value, boolean hasDocValues, boolean isSearchable) { + return INTEGER.termQuery(field, value, hasDocValues, isSearchable); } @Override - public Query termsQuery(String field, List values) { - return INTEGER.termsQuery(field, values); + public Query termsQuery(String field, List values, boolean hasDocValues, boolean isSearchable) { + return INTEGER.termsQuery(field, values, hasDocValues, isSearchable); } @Override @@ -521,9 +610,10 @@ public Query rangeQuery( boolean includeLower, boolean includeUpper, boolean hasDocValues, + boolean isSearchable, QueryShardContext context ) { - return INTEGER.rangeQuery(field, lowerTerm, upperTerm, includeLower, includeUpper, hasDocValues, context); + return INTEGER.rangeQuery(field, lowerTerm, upperTerm, includeLower, includeUpper, hasDocValues, isSearchable, context); } @Override @@ -571,13 +661,13 @@ public Short parse(XContentParser parser, boolean coerce) throws IOException { } @Override - public Query termQuery(String field, Object value) { - return INTEGER.termQuery(field, value); + public Query termQuery(String field, Object value, boolean hasDocValues, boolean isSearchable) { + return INTEGER.termQuery(field, value, hasDocValues, isSearchable); } @Override - public Query termsQuery(String field, List values) { - return INTEGER.termsQuery(field, values); + public Query termsQuery(String field, List values, boolean hasDocValues, boolean isSearchable) { + return INTEGER.termsQuery(field, values, hasDocValues, isSearchable); } @Override @@ -588,9 +678,10 @@ public Query rangeQuery( boolean includeLower, boolean includeUpper, boolean hasDocValues, + boolean isSearchable, QueryShardContext context ) { - return INTEGER.rangeQuery(field, lowerTerm, upperTerm, includeLower, includeUpper, hasDocValues, context); + return INTEGER.rangeQuery(field, lowerTerm, upperTerm, includeLower, includeUpper, hasDocValues, isSearchable, context); } @Override @@ -638,16 +729,24 @@ public Integer parse(XContentParser parser, boolean coerce) throws IOException { } @Override - public Query termQuery(String field, Object value) { + public Query termQuery(String field, Object value, boolean hasDocValues, boolean isSearchable) { if (hasDecimalPart(value)) { return Queries.newMatchNoDocsQuery("Value [" + value + "] has a decimal part"); } int v = parse(value, true); + if (isSearchable && hasDocValues) { + Query query = IntPoint.newExactQuery(field, v); + Query dvQuery = SortedNumericDocValuesField.newSlowExactQuery(field, v); + return new IndexOrDocValuesQuery(query, dvQuery); + } + if (hasDocValues) { + return SortedNumericDocValuesField.newSlowExactQuery(field, v); + } return IntPoint.newExactQuery(field, v); } @Override - public Query termsQuery(String field, List values) { + public Query termsQuery(String field, List values, boolean hasDocValues, boolean isSearchable) { int[] v = new int[values.size()]; int upTo = 0; @@ -664,6 +763,21 @@ public Query termsQuery(String field, List values) { if (upTo != v.length) { v = Arrays.copyOf(v, upTo); } + long points[] = new long[v.length]; + if (hasDocValues) { + for (int i = 0; i < v.length; i++) { + points[i] = v[i]; + } + } + if (isSearchable && hasDocValues) { + return new IndexOrDocValuesQuery( + IntPoint.newSetQuery(field, v), + SortedNumericDocValuesField.newSlowSetQuery(field, points) + ); + } + if (hasDocValues) { + return SortedNumericDocValuesField.newSlowSetQuery(field, points); + } return IntPoint.newSetQuery(field, v); } @@ -675,6 +789,7 @@ public Query rangeQuery( boolean includeLower, boolean includeUpper, boolean hasDocValues, + boolean isSearchable, QueryShardContext context ) { int l = Integer.MIN_VALUE; @@ -704,15 +819,23 @@ public Query rangeQuery( --u; } } - Query query = IntPoint.newRangeQuery(field, l, u); - if (hasDocValues) { + if (isSearchable && hasDocValues) { + Query query = IntPoint.newRangeQuery(field, l, u); Query dvQuery = SortedNumericDocValuesField.newSlowRangeQuery(field, l, u); query = new IndexOrDocValuesQuery(query, dvQuery); if (context.indexSortedOnField(field)) { query = new IndexSortSortedNumericDocValuesRangeQuery(field, l, u, query); } + return query; } - return query; + if (hasDocValues) { + Query query = SortedNumericDocValuesField.newSlowRangeQuery(field, l, u); + if (context.indexSortedOnField(field)) { + query = new IndexSortSortedNumericDocValuesRangeQuery(field, l, u, query); + } + return query; + } + return IntPoint.newRangeQuery(field, l, u); } @Override @@ -752,17 +875,28 @@ public Long parse(XContentParser parser, boolean coerce) throws IOException { } @Override - public Query termQuery(String field, Object value) { + public Query termQuery(String field, Object value, boolean hasDocValues, boolean isSearchable) { if (hasDecimalPart(value)) { return Queries.newMatchNoDocsQuery("Value [" + value + "] has a decimal part"); } long v = parse(value, true); + if (isSearchable && hasDocValues) { + Query query = LongPoint.newExactQuery(field, v); + Query dvQuery = SortedNumericDocValuesField.newSlowExactQuery(field, v); + return new IndexOrDocValuesQuery(query, dvQuery); + + } + if (hasDocValues) { + return SortedNumericDocValuesField.newSlowExactQuery(field, v); + + } return LongPoint.newExactQuery(field, v); } @Override - public Query termsQuery(String field, List values) { + public Query termsQuery(String field, List values, boolean hasDocValues, boolean isSearchable) { long[] v = new long[values.size()]; + int upTo = 0; for (int i = 0; i < values.size(); i++) { @@ -778,6 +912,16 @@ public Query termsQuery(String field, List values) { if (upTo != v.length) { v = Arrays.copyOf(v, upTo); } + if (isSearchable && hasDocValues) { + return new IndexOrDocValuesQuery( + LongPoint.newSetQuery(field, v), + SortedNumericDocValuesField.newSlowSetQuery(field, v) + ); + } + if (hasDocValues) { + return SortedNumericDocValuesField.newSlowSetQuery(field, v); + + } return LongPoint.newSetQuery(field, v); } @@ -789,18 +933,28 @@ public Query rangeQuery( boolean includeLower, boolean includeUpper, boolean hasDocValues, + boolean isSearchable, QueryShardContext context ) { return longRangeQuery(lowerTerm, upperTerm, includeLower, includeUpper, (l, u) -> { - Query query = LongPoint.newRangeQuery(field, l, u); - if (hasDocValues) { + if (isSearchable && hasDocValues) { + Query query = LongPoint.newRangeQuery(field, l, u); Query dvQuery = SortedNumericDocValuesField.newSlowRangeQuery(field, l, u); query = new IndexOrDocValuesQuery(query, dvQuery); if (context.indexSortedOnField(field)) { query = new IndexSortSortedNumericDocValuesRangeQuery(field, l, u, query); } + return query; } - return query; + if (hasDocValues) { + Query query = SortedNumericDocValuesField.newSlowRangeQuery(field, l, u); + if (context.indexSortedOnField(field)) { + query = new IndexSortSortedNumericDocValuesRangeQuery(field, l, u, query); + } + return query; + } + return LongPoint.newRangeQuery(field, l, u); + }); } @@ -841,16 +995,24 @@ public BigInteger parse(XContentParser parser, boolean coerce) throws IOExceptio } @Override - public Query termQuery(String field, Object value) { + public Query termQuery(String field, Object value, boolean hasDocValues, boolean isSearchable) { if (hasDecimalPart(value)) { return Queries.newMatchNoDocsQuery("Value [" + value + "] has a decimal part"); } BigInteger v = parse(value, true); + if (isSearchable && hasDocValues) { + Query query = BigIntegerPoint.newExactQuery(field, v); + Query dvQuery = SortedUnsignedLongDocValuesSetQuery.newSlowExactQuery(field, v); + return new IndexOrDocValuesQuery(query, dvQuery); + } + if (hasDocValues) { + return SortedUnsignedLongDocValuesSetQuery.newSlowExactQuery(field, v); + } return BigIntegerPoint.newExactQuery(field, v); } @Override - public Query termsQuery(String field, List values) { + public Query termsQuery(String field, List values, boolean hasDocvalues, boolean isSearchable) { BigInteger[] v = new BigInteger[values.size()]; int upTo = 0; @@ -868,6 +1030,14 @@ public Query termsQuery(String field, List values) { v = Arrays.copyOf(v, upTo); } + if (isSearchable && hasDocvalues) { + Query query = BigIntegerPoint.newSetQuery(field, v); + Query dvQuery = SortedUnsignedLongDocValuesSetQuery.newSlowSetQuery(field, v); + return new IndexOrDocValuesQuery(query, dvQuery); + } + if (hasDocvalues) { + return SortedUnsignedLongDocValuesSetQuery.newSlowSetQuery(field, v); + } return BigIntegerPoint.newSetQuery(field, v); } @@ -879,15 +1049,19 @@ public Query rangeQuery( boolean includeLower, boolean includeUpper, boolean hasDocValues, + boolean isSearchable, QueryShardContext context ) { return unsignedLongRangeQuery(lowerTerm, upperTerm, includeLower, includeUpper, (l, u) -> { - Query query = BigIntegerPoint.newRangeQuery(field, l, u); - if (hasDocValues) { + if (isSearchable && hasDocValues) { + Query query = BigIntegerPoint.newRangeQuery(field, l, u); Query dvQuery = SortedUnsignedLongDocValuesRangeQuery.newSlowRangeQuery(field, l, u); - query = new IndexOrDocValuesQuery(query, dvQuery); + return new IndexOrDocValuesQuery(query, dvQuery); } - return query; + if (hasDocValues) { + return SortedUnsignedLongDocValuesRangeQuery.newSlowRangeQuery(field, l, u); + } + return BigIntegerPoint.newRangeQuery(field, l, u); }); } @@ -941,9 +1115,9 @@ public final TypeParser parser() { return parser; } - public abstract Query termQuery(String field, Object value); + public abstract Query termQuery(String field, Object value, boolean hasDocValues, boolean isSearchable); - public abstract Query termsQuery(String field, List values); + public abstract Query termsQuery(String field, List values, boolean hasDocValues, boolean isSearchable); public abstract Query rangeQuery( String field, @@ -952,6 +1126,7 @@ public abstract Query rangeQuery( boolean includeLower, boolean includeUpper, boolean hasDocValues, + boolean isSearchable, QueryShardContext context ); @@ -1226,8 +1401,8 @@ public NumericType numericType() { @Override public Query termQuery(Object value, QueryShardContext context) { - failIfNotIndexed(); - Query query = type.termQuery(name(), value); + failIfNotIndexedAndNoDocValues(); + Query query = type.termQuery(name(), value, hasDocValues(), isSearchable()); if (boost() != 1f) { query = new BoostQuery(query, boost()); } @@ -1236,8 +1411,8 @@ public Query termQuery(Object value, QueryShardContext context) { @Override public Query termsQuery(List values, QueryShardContext context) { - failIfNotIndexed(); - Query query = type.termsQuery(name(), values); + failIfNotIndexedAndNoDocValues(); + Query query = type.termsQuery(name(), values, hasDocValues(), isSearchable()); if (boost() != 1f) { query = new BoostQuery(query, boost()); } @@ -1246,8 +1421,17 @@ public Query termsQuery(List values, QueryShardContext context) { @Override public Query rangeQuery(Object lowerTerm, Object upperTerm, boolean includeLower, boolean includeUpper, QueryShardContext context) { - failIfNotIndexed(); - Query query = type.rangeQuery(name(), lowerTerm, upperTerm, includeLower, includeUpper, hasDocValues(), context); + failIfNotIndexedAndNoDocValues(); + Query query = type.rangeQuery( + name(), + lowerTerm, + upperTerm, + includeLower, + includeUpper, + hasDocValues(), + isSearchable(), + context + ); if (boost() != 1f) { query = new BoostQuery(query, boost()); } diff --git a/server/src/test/java/org/opensearch/index/mapper/NumberFieldTypeTests.java b/server/src/test/java/org/opensearch/index/mapper/NumberFieldTypeTests.java index 3c30bb81a9a32..af852b12e7a30 100644 --- a/server/src/test/java/org/opensearch/index/mapper/NumberFieldTypeTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/NumberFieldTypeTests.java @@ -66,6 +66,7 @@ import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.index.IndexSettings; import org.opensearch.index.document.SortedUnsignedLongDocValuesRangeQuery; +import org.opensearch.index.document.SortedUnsignedLongDocValuesSetQuery; import org.opensearch.index.fielddata.IndexNumericFieldData; import org.opensearch.index.mapper.MappedFieldType.Relation; import org.opensearch.index.mapper.NumberFieldMapper.NumberFieldType; @@ -118,15 +119,27 @@ public void testIsFieldWithinQuery() throws IOException { public void testIntegerTermsQueryWithDecimalPart() { MappedFieldType ft = new NumberFieldMapper.NumberFieldType("field", NumberType.INTEGER); - assertEquals(IntPoint.newSetQuery("field", 1), ft.termsQuery(Arrays.asList(1, 2.1), null)); - assertEquals(IntPoint.newSetQuery("field", 1), ft.termsQuery(Arrays.asList(1.0, 2.1), null)); + assertEquals( + new IndexOrDocValuesQuery(IntPoint.newSetQuery("field", 1), SortedNumericDocValuesField.newSlowSetQuery("field", 1)), + ft.termsQuery(Arrays.asList(1, 2.1), null) + ); + assertEquals( + new IndexOrDocValuesQuery(IntPoint.newSetQuery("field", 1), SortedNumericDocValuesField.newSlowSetQuery("field", 1)), + ft.termsQuery(Arrays.asList(1.0, 2.1), null) + ); assertTrue(ft.termsQuery(Arrays.asList(1.1, 2.1), null) instanceof MatchNoDocsQuery); } public void testLongTermsQueryWithDecimalPart() { MappedFieldType ft = new NumberFieldMapper.NumberFieldType("field", NumberType.LONG); - assertEquals(LongPoint.newSetQuery("field", 1), ft.termsQuery(Arrays.asList(1, 2.1), null)); - assertEquals(LongPoint.newSetQuery("field", 1), ft.termsQuery(Arrays.asList(1.0, 2.1), null)); + assertEquals( + new IndexOrDocValuesQuery(LongPoint.newSetQuery("field", 1), SortedNumericDocValuesField.newSlowSetQuery("field", 1)), + ft.termsQuery(Arrays.asList(1, 2.1), null) + ); + assertEquals( + new IndexOrDocValuesQuery(LongPoint.newSetQuery("field", 1), SortedNumericDocValuesField.newSlowSetQuery("field", 1)), + ft.termsQuery(Arrays.asList(1.0, 2.1), null) + ); assertTrue(ft.termsQuery(Arrays.asList(1.1, 2.1), null) instanceof MatchNoDocsQuery); } @@ -151,16 +164,18 @@ public void testLongTermQueryWithDecimalPart() { } private static MappedFieldType unsearchable() { - return new NumberFieldType("field", NumberType.LONG, false, false, true, true, null, Collections.emptyMap()); + return new NumberFieldType("field", NumberType.LONG, false, false, false, true, null, Collections.emptyMap()); } public void testTermQuery() { MappedFieldType ft = new NumberFieldMapper.NumberFieldType("field", NumberFieldMapper.NumberType.LONG); - assertEquals(LongPoint.newExactQuery("field", 42), ft.termQuery("42", null)); + Query dvQuery = SortedNumericDocValuesField.newSlowExactQuery("field", 42); + Query query = new IndexOrDocValuesQuery(LongPoint.newExactQuery("field", 42), dvQuery); + assertEquals(query, ft.termQuery("42", null)); MappedFieldType unsearchable = unsearchable(); IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> unsearchable.termQuery("42", null)); - assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage()); + assertEquals("Cannot search on field [field] since it is both not indexed, and does not have doc_values enabled.", e.getMessage()); } public void testRangeQueryWithNegativeBounds() { @@ -380,7 +395,7 @@ public void testLongRangeQuery() { IllegalArgumentException.class, () -> unsearchable.rangeQuery("1", "3", true, true, null, null, null, MOCK_QSC) ); - assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage()); + assertEquals("Cannot search on field [field] since it is both not indexed, and does not have doc_values enabled.", e.getMessage()); } public void testUnsignedLongRangeQuery() { @@ -396,7 +411,23 @@ public void testUnsignedLongRangeQuery() { IllegalArgumentException.class, () -> unsearchable.rangeQuery("1", "3", true, true, null, null, null, MOCK_QSC) ); - assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage()); + assertEquals("Cannot search on field [field] since it is both not indexed, and does not have doc_values enabled.", e.getMessage()); + } + + public void testUnsignedLongTermsQuery() { + MappedFieldType ft = new NumberFieldMapper.NumberFieldType("field", NumberFieldMapper.NumberType.UNSIGNED_LONG); + Query expected = new IndexOrDocValuesQuery( + BigIntegerPoint.newSetQuery("field", BigInteger.valueOf(1), BigInteger.valueOf(3)), + SortedUnsignedLongDocValuesSetQuery.newSlowSetQuery("field", BigInteger.valueOf(1), BigInteger.valueOf(3)) + ); + assertEquals(expected, ft.termsQuery(List.of("1", "3"), MOCK_QSC)); + + MappedFieldType unsearchable = unsearchable(); + IllegalArgumentException e = expectThrows( + IllegalArgumentException.class, + () -> unsearchable.termsQuery(List.of("1", "3"), MOCK_QSC) + ); + assertEquals("Cannot search on field [field] since it is both not indexed, and does not have doc_values enabled.", e.getMessage()); } public void testDoubleRangeQuery() { @@ -416,7 +447,7 @@ public void testDoubleRangeQuery() { IllegalArgumentException.class, () -> unsearchable.rangeQuery("1", "3", true, true, null, null, null, MOCK_QSC) ); - assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage()); + assertEquals("Cannot search on field [field] since it is both not indexed, and does not have doc_values enabled.", e.getMessage()); } public void testConversions() { @@ -518,8 +549,8 @@ public void testHalfFloatRange() throws IOException { float u = (randomFloat() * 2 - 1) * 65504; boolean includeLower = randomBoolean(); boolean includeUpper = randomBoolean(); - Query floatQ = NumberType.FLOAT.rangeQuery("float", l, u, includeLower, includeUpper, false, MOCK_QSC); - Query halfFloatQ = NumberType.HALF_FLOAT.rangeQuery("half_float", l, u, includeLower, includeUpper, false, MOCK_QSC); + Query floatQ = NumberType.FLOAT.rangeQuery("float", l, u, includeLower, includeUpper, false, true, MOCK_QSC); + Query halfFloatQ = NumberType.HALF_FLOAT.rangeQuery("half_float", l, u, includeLower, includeUpper, false, true, MOCK_QSC); assertEquals(searcher.count(floatQ), searcher.count(halfFloatQ)); } IOUtils.close(reader, dir); @@ -549,8 +580,17 @@ public void testUnsignedLongRange() throws IOException { BigInteger u = randomUnsignedLong(); boolean includeLower = randomBoolean(); boolean includeUpper = randomBoolean(); - Query unsignedLongQ = NumberType.UNSIGNED_LONG.rangeQuery("unsigned_long", l, u, includeLower, includeUpper, false, MOCK_QSC); - Query doubleQ = NumberType.DOUBLE.rangeQuery("double", l, u, includeLower, includeUpper, false, MOCK_QSC); + Query unsignedLongQ = NumberType.UNSIGNED_LONG.rangeQuery( + "unsigned_long", + l, + u, + includeLower, + includeUpper, + false, + true, + MOCK_QSC + ); + Query doubleQ = NumberType.DOUBLE.rangeQuery("double", l, u, includeLower, includeUpper, false, true, MOCK_QSC); assertEquals(searcher.count(doubleQ), searcher.count(unsignedLongQ)); } IOUtils.close(reader, dir); @@ -558,21 +598,23 @@ public void testUnsignedLongRange() throws IOException { public void testNegativeZero() { assertEquals( - NumberType.DOUBLE.rangeQuery("field", null, -0d, true, true, false, MOCK_QSC), - NumberType.DOUBLE.rangeQuery("field", null, +0d, true, false, false, MOCK_QSC) + NumberType.DOUBLE.rangeQuery("field", null, -0d, true, true, false, true, MOCK_QSC), + NumberType.DOUBLE.rangeQuery("field", null, +0d, true, false, false, true, MOCK_QSC) ); assertEquals( - NumberType.FLOAT.rangeQuery("field", null, -0f, true, true, false, MOCK_QSC), - NumberType.FLOAT.rangeQuery("field", null, +0f, true, false, false, MOCK_QSC) + NumberType.FLOAT.rangeQuery("field", null, -0f, true, true, false, true, MOCK_QSC), + NumberType.FLOAT.rangeQuery("field", null, +0f, true, false, false, true, MOCK_QSC) ); assertEquals( - NumberType.HALF_FLOAT.rangeQuery("field", null, -0f, true, true, false, MOCK_QSC), - NumberType.HALF_FLOAT.rangeQuery("field", null, +0f, true, false, false, MOCK_QSC) + NumberType.HALF_FLOAT.rangeQuery("field", null, -0f, true, true, false, true, MOCK_QSC), + NumberType.HALF_FLOAT.rangeQuery("field", null, +0f, true, false, false, true, MOCK_QSC) ); - assertFalse(NumberType.DOUBLE.termQuery("field", -0d).equals(NumberType.DOUBLE.termQuery("field", +0d))); - assertFalse(NumberType.FLOAT.termQuery("field", -0f).equals(NumberType.FLOAT.termQuery("field", +0f))); - assertFalse(NumberType.HALF_FLOAT.termQuery("field", -0f).equals(NumberType.HALF_FLOAT.termQuery("field", +0f))); + assertFalse(NumberType.DOUBLE.termQuery("field", -0d, true, true).equals(NumberType.DOUBLE.termQuery("field", +0d, true, true))); + assertFalse(NumberType.FLOAT.termQuery("field", -0f, true, true).equals(NumberType.FLOAT.termQuery("field", +0f, true, true))); + assertFalse( + NumberType.HALF_FLOAT.termQuery("field", -0f, true, true).equals(NumberType.HALF_FLOAT.termQuery("field", +0f, true, true)) + ); } // Make sure we construct the IndexOrDocValuesQuery objects with queries that match @@ -628,6 +670,7 @@ public void doTestDocValueRangeQueries(NumberType type, Supplier valueSu randomBoolean(), randomBoolean(), true, + true, MOCK_QSC ); assertThat(query, instanceOf(IndexOrDocValuesQuery.class)); @@ -708,6 +751,7 @@ public void doTestIndexSortRangeQueries(NumberType type, Supplier valueS randomBoolean(), randomBoolean(), true, + true, context ); assertThat(query, instanceOf(IndexSortSortedNumericDocValuesRangeQuery.class)); diff --git a/server/src/test/java/org/opensearch/index/query/TermQueryBuilderTests.java b/server/src/test/java/org/opensearch/index/query/TermQueryBuilderTests.java index 3ac9bce840a20..c5bdf9b586df1 100644 --- a/server/src/test/java/org/opensearch/index/query/TermQueryBuilderTests.java +++ b/server/src/test/java/org/opensearch/index/query/TermQueryBuilderTests.java @@ -36,6 +36,7 @@ import org.apache.lucene.index.Term; import org.apache.lucene.search.AutomatonQuery; +import org.apache.lucene.search.IndexOrDocValuesQuery; import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.PointRangeQuery; import org.apache.lucene.search.Query; @@ -112,6 +113,7 @@ protected void doAssertLuceneQuery(TermQueryBuilder queryBuilder, Query query, Q either(instanceOf(TermQuery.class)).or(instanceOf(PointRangeQuery.class)) .or(instanceOf(MatchNoDocsQuery.class)) .or(instanceOf(AutomatonQuery.class)) + .or(instanceOf(IndexOrDocValuesQuery.class)) ); MappedFieldType mapper = context.fieldMapper(queryBuilder.fieldName()); if (query instanceof TermQuery) { diff --git a/server/src/test/java/org/opensearch/index/query/TermsQueryBuilderTests.java b/server/src/test/java/org/opensearch/index/query/TermsQueryBuilderTests.java index b587bec2d5343..32bf290627b63 100644 --- a/server/src/test/java/org/opensearch/index/query/TermsQueryBuilderTests.java +++ b/server/src/test/java/org/opensearch/index/query/TermsQueryBuilderTests.java @@ -34,6 +34,7 @@ import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.ConstantScoreQuery; +import org.apache.lucene.search.IndexOrDocValuesQuery; import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.PointInSetQuery; import org.apache.lucene.search.Query; @@ -135,6 +136,7 @@ protected void doAssertLuceneQuery(TermsQueryBuilder queryBuilder, Query query, either(instanceOf(TermInSetQuery.class)).or(instanceOf(PointInSetQuery.class)) .or(instanceOf(ConstantScoreQuery.class)) .or(instanceOf(MatchNoDocsQuery.class)) + .or(instanceOf(IndexOrDocValuesQuery.class)) ); if (query instanceof ConstantScoreQuery) { assertThat(((ConstantScoreQuery) query).getQuery(), instanceOf(BooleanQuery.class));