diff --git a/CHANGELOG.md b/CHANGELOG.md index 506e7b1f35f0c..850749dbf4c95 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Add took time to request nodes stats ([#15054](https://github.com/opensearch-project/OpenSearch/pull/15054)) - [Workload Management] Add Get QueryGroup API Logic ([14709](https://github.com/opensearch-project/OpenSearch/pull/14709)) - [Workload Management] QueryGroup resource tracking framework changes ([#13897](https://github.com/opensearch-project/OpenSearch/pull/13897)) +- Support filtering on a large list encoded by bitmap ([#14774](https://github.com/opensearch-project/OpenSearch/pull/14774)) - Add slice execution listeners to SearchOperationListener interface ([#15153](https://github.com/opensearch-project/OpenSearch/pull/15153)) ### Dependencies diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/370_bitmap_filtering.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/370_bitmap_filtering.yml new file mode 100644 index 0000000000000..d728070adb188 --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/370_bitmap_filtering.yml @@ -0,0 +1,184 @@ +--- +setup: + - skip: + version: " - 2.99.99" + reason: The bitmap filtering feature is available in 2.17 and later. + - do: + indices.create: + index: students + body: + settings: + number_of_shards: 1 + number_of_replicas: 0 + mappings: + properties: + student_id: + type: integer + - do: + bulk: + refresh: true + body: + - { "index": { "_index": "students", "_id": "1" } } + - { "name": "Jane Doe", "student_id": 111 } + - { "index": { "_index": "students", "_id": "2" } } + - { "name": "Mary Major", "student_id": 222 } + - { "index": { "_index": "students", "_id": "3" } } + - { "name": "John Doe", "student_id": 333 } + - do: + indices.create: + index: classes + body: + settings: + number_of_shards: 1 + number_of_replicas: 0 + mappings: + properties: + enrolled: + type: binary + store: true + - do: + bulk: + refresh: true + body: + - { "index": { "_index": "classes", "_id": "101" } } + - { "enrolled": "OjAAAAEAAAAAAAEAEAAAAG8A3gA=" } # 111,222 + - { "index": { "_index": "classes", "_id": "102" } } + - { "enrolled": "OjAAAAEAAAAAAAAAEAAAAG8A" } # 111 + - { "index": { "_index": "classes", "_id": "103" } } + - { "enrolled": "OjAAAAEAAAAAAAAAEAAAAE0B" } # 333 + - { "index": { "_index": "classes", "_id": "104" } } + - { "enrolled": "OjAAAAEAAAAAAAEAEAAAAN4ATQE=" } # 222,333 + - do: + cluster.health: + wait_for_status: green + +--- +"Terms lookup on a binary field with bitmap": + - do: + search: + rest_total_hits_as_int: true + index: students + body: { + "query": { + "terms": { + "student_id": { + "index": "classes", + "id": "101", + "path": "enrolled", + "store": true + }, + "value_type": "bitmap" + } + } + } + - match: { hits.total: 2 } + - match: { hits.hits.0._source.name: Jane Doe } + - match: { hits.hits.0._source.student_id: 111 } + - match: { hits.hits.1._source.name: Mary Major } + - match: { hits.hits.1._source.student_id: 222 } + +--- +"Terms query accepting bitmap as value": + - do: + search: + rest_total_hits_as_int: true + index: students + body: { + "query": { + "terms": { + "student_id": ["OjAAAAEAAAAAAAEAEAAAAG8A3gA="], + "value_type": "bitmap" + } + } + } + - match: { hits.total: 2 } + - match: { hits.hits.0._source.name: Jane Doe } + - match: { hits.hits.0._source.student_id: 111 } + - match: { hits.hits.1._source.name: Mary Major } + - match: { hits.hits.1._source.student_id: 222 } + +--- +"Boolean must bitmap filtering": + - do: + search: + rest_total_hits_as_int: true + index: students + body: { + "query": { + "bool": { + "must": [ + { + "terms": { + "student_id": { + "index": "classes", + "id": "101", + "path": "enrolled", + "store": true + }, + "value_type": "bitmap" + } + } + ], + "must_not": [ + { + "terms": { + "student_id": { + "index": "classes", + "id": "104", + "path": "enrolled", + "store": true + }, + "value_type": "bitmap" + } + } + ] + } + } + } + - match: { hits.total: 1 } + - match: { hits.hits.0._source.name: Jane Doe } + - match: { hits.hits.0._source.student_id: 111 } + +--- +"Boolean should bitmap filtering": + - do: + search: + rest_total_hits_as_int: true + index: students + body: { + "query": { + "bool": { + "should": [ + { + "terms": { + "student_id": { + "index": "classes", + "id": "101", + "path": "enrolled", + "store": true + }, + "value_type": "bitmap" + } + }, + { + "terms": { + "student_id": { + "index": "classes", + "id": "104", + "path": "enrolled", + "store": true + }, + "value_type": "bitmap" + } + } + ] + } + } + } + - match: { hits.total: 3 } + - match: { hits.hits.0._source.name: Mary Major } + - match: { hits.hits.0._source.student_id: 222 } + - match: { hits.hits.1._source.name: Jane Doe } + - match: { hits.hits.1._source.student_id: 111 } + - match: { hits.hits.2._source.name: John Doe } + - match: { hits.hits.2._source.student_id: 333 } diff --git a/server/build.gradle b/server/build.gradle index 10e0e35dd2bef..1634d91346e71 100644 --- a/server/build.gradle +++ b/server/build.gradle @@ -125,6 +125,9 @@ dependencies { api "com.google.protobuf:protobuf-java:${versions.protobuf}" api "jakarta.annotation:jakarta.annotation-api:${versions.jakarta_annotation}" + // https://mvnrepository.com/artifact/org.roaringbitmap/RoaringBitmap + implementation 'org.roaringbitmap:RoaringBitmap:1.1.0' + testImplementation(project(":test:framework")) { // tests use the locally compiled version of server exclude group: 'org.opensearch', module: 'server' diff --git a/server/licenses/RoaringBitmap-1.1.0.jar.sha1 b/server/licenses/RoaringBitmap-1.1.0.jar.sha1 new file mode 100644 index 0000000000000..bf34e11b92710 --- /dev/null +++ b/server/licenses/RoaringBitmap-1.1.0.jar.sha1 @@ -0,0 +1 @@ +9607213861158ae7060234d93ee9c9cb19f494d1 \ No newline at end of file diff --git a/server/licenses/RoaringBitmap-LICENSE.txt b/server/licenses/RoaringBitmap-LICENSE.txt new file mode 100644 index 0000000000000..3bdd0036295a6 --- /dev/null +++ b/server/licenses/RoaringBitmap-LICENSE.txt @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2013-2016 the RoaringBitmap authors + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/server/licenses/RoaringBitmap-NOTICE.txt b/server/licenses/RoaringBitmap-NOTICE.txt new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/server/src/internalClusterTest/java/org/opensearch/search/query/SearchQueryIT.java b/server/src/internalClusterTest/java/org/opensearch/search/query/SearchQueryIT.java index a58db51780826..1933d9ebfdc6c 100644 --- a/server/src/internalClusterTest/java/org/opensearch/search/query/SearchQueryIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/search/query/SearchQueryIT.java @@ -52,6 +52,7 @@ import org.opensearch.common.time.DateFormatter; import org.opensearch.common.unit.Fuzziness; import org.opensearch.common.xcontent.XContentFactory; +import org.opensearch.core.common.bytes.BytesArray; import org.opensearch.core.rest.RestStatus; import org.opensearch.core.xcontent.MediaTypeRegistry; import org.opensearch.core.xcontent.XContentBuilder; @@ -66,6 +67,7 @@ import org.opensearch.index.query.QueryBuilders; import org.opensearch.index.query.RangeQueryBuilder; import org.opensearch.index.query.TermQueryBuilder; +import org.opensearch.index.query.TermsQueryBuilder; import org.opensearch.index.query.WildcardQueryBuilder; import org.opensearch.index.query.WrapperQueryBuilder; import org.opensearch.index.query.functionscore.ScoreFunctionBuilders; @@ -84,6 +86,7 @@ import java.io.IOException; import java.io.Reader; +import java.nio.ByteBuffer; import java.time.Instant; import java.time.ZoneId; import java.time.ZoneOffset; @@ -98,6 +101,8 @@ import java.util.concurrent.ExecutionException; import java.util.regex.Pattern; +import org.roaringbitmap.RoaringBitmap; + import static java.util.Collections.singletonMap; import static org.opensearch.action.support.WriteRequest.RefreshPolicy.IMMEDIATE; import static org.opensearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_SHARDS; @@ -1157,6 +1162,41 @@ public void testTermsQuery() throws Exception { assertHitCount(searchResponse, 0L); } + public void testTermsQueryWithBitmapDocValuesQuery() throws Exception { + assertAcked( + prepareCreate("products").setMapping( + jsonBuilder().startObject() + .startObject("properties") + .startObject("product") + .field("type", "integer") + .field("index", false) + .endObject() + .endObject() + .endObject() + ) + ); + indexRandom( + true, + client().prepareIndex("products").setId("1").setSource("product", 1), + client().prepareIndex("products").setId("2").setSource("product", 2), + client().prepareIndex("products").setId("3").setSource("product", new int[] { 1, 3 }), + client().prepareIndex("products").setId("4").setSource("product", 4) + ); + + RoaringBitmap r = new RoaringBitmap(); + r.add(1); + r.add(4); + byte[] array = new byte[r.serializedSizeInBytes()]; + r.serialize(ByteBuffer.wrap(array)); + BytesArray bitmap = new BytesArray(array); + // directly building the terms query builder, so pass in the bitmap value as BytesArray + SearchResponse searchResponse = client().prepareSearch("products") + .setQuery(constantScoreQuery(termsQuery("product", bitmap).valueType(TermsQueryBuilder.ValueType.BITMAP))) + .get(); + assertHitCount(searchResponse, 3L); + assertSearchHits(searchResponse, "1", "3", "4"); + } + public void testTermsLookupFilter() throws Exception { assertAcked(prepareCreate("lookup").setMapping("terms", "type=text", "other", "type=text")); indexRandomForConcurrentSearch("lookup"); diff --git a/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java index 27e62c3746a8e..9286b5c64b5f2 100644 --- a/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java @@ -46,8 +46,10 @@ import org.apache.lucene.sandbox.document.HalfFloatPoint; import org.apache.lucene.search.BoostQuery; import org.apache.lucene.search.IndexOrDocValuesQuery; +import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSortSortedNumericDocValuesRangeQuery; import org.apache.lucene.search.MatchNoDocsQuery; +import org.apache.lucene.search.PointInSetQuery; import org.apache.lucene.search.Query; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.NumericUtils; @@ -58,6 +60,7 @@ import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Setting.Property; import org.opensearch.common.settings.Settings; +import org.opensearch.core.common.bytes.BytesArray; import org.opensearch.core.xcontent.XContentParser; import org.opensearch.core.xcontent.XContentParser.Token; import org.opensearch.index.document.SortedUnsignedLongDocValuesRangeQuery; @@ -68,13 +71,16 @@ import org.opensearch.index.query.QueryShardContext; import org.opensearch.search.DocValueFormat; import org.opensearch.search.lookup.SearchLookup; +import org.opensearch.search.query.BitmapDocValuesQuery; import java.io.IOException; import java.math.BigInteger; +import java.nio.ByteBuffer; import java.time.ZoneId; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Objects; @@ -82,6 +88,8 @@ import java.util.function.Function; import java.util.function.Supplier; +import org.roaringbitmap.RoaringBitmap; + /** * A {@link FieldMapper} for numeric types: byte, short, int, long, float and double. * @@ -822,6 +830,24 @@ public Query termsQuery(String field, List values, boolean hasDocValues, return IntPoint.newSetQuery(field, v); } + @Override + public Query bitmapQuery(String field, BytesArray bitmapArray, boolean isSearchable, boolean hasDocValues) { + RoaringBitmap bitmap = new RoaringBitmap(); + try { + bitmap.deserialize(ByteBuffer.wrap(bitmapArray.array())); + } catch (Exception e) { + throw new IllegalArgumentException("Failed to deserialize the bitmap.", e); + } + + if (isSearchable && hasDocValues) { + return new IndexOrDocValuesQuery(bitmapIndexQuery(field, bitmap), new BitmapDocValuesQuery(field, bitmap)); + } + if (isSearchable) { + return bitmapIndexQuery(field, bitmap); + } + return new BitmapDocValuesQuery(field, bitmap); + } + @Override public Query rangeQuery( String field, @@ -1174,6 +1200,10 @@ public final TypeParser parser() { public abstract Query termsQuery(String field, List values, boolean hasDocValues, boolean isSearchable); + public Query bitmapQuery(String field, BytesArray bitmap, boolean isSearchable, boolean hasDocValues) { + throw new IllegalArgumentException("Field [" + name + "] of type [" + typeName() + "] does not support bitmap queries"); + } + public abstract Query rangeQuery( String field, Object lowerTerm, @@ -1415,6 +1445,40 @@ public static Query unsignedLongRangeQuery( } return builder.apply(l, u); } + + static PointInSetQuery bitmapIndexQuery(String field, RoaringBitmap bitmap) { + final BytesRef encoded = new BytesRef(new byte[Integer.BYTES]); + return new PointInSetQuery(field, 1, Integer.BYTES, new PointInSetQuery.Stream() { + + final Iterator iterator = bitmap.iterator(); + + @Override + public BytesRef next() { + int value; + if (iterator.hasNext()) { + value = iterator.next(); + } else { + return null; + } + IntPoint.encodeDimension(value, encoded.bytes, 0); + return encoded; + } + }) { + @Override + public Query rewrite(IndexSearcher indexSearcher) throws IOException { + if (bitmap.isEmpty()) { + return new MatchNoDocsQuery(); + } + return super.rewrite(indexSearcher); + } + + @Override + protected String toString(byte[] value) { + assert value.length == Integer.BYTES; + return Integer.toString(IntPoint.decodeDimension(value, 0)); + } + }; + } } /** @@ -1495,6 +1559,11 @@ public Query termsQuery(List values, QueryShardContext context) { return query; } + public Query bitmapQuery(BytesArray bitmap) { + failIfNotIndexedAndNoDocValues(); + return type.bitmapQuery(name(), bitmap, isSearchable(), hasDocValues()); + } + @Override public Query rangeQuery(Object lowerTerm, Object upperTerm, boolean includeLower, boolean includeUpper, QueryShardContext context) { failIfNotIndexedAndNoDocValues(); diff --git a/server/src/main/java/org/opensearch/index/query/TermsQueryBuilder.java b/server/src/main/java/org/opensearch/index/query/TermsQueryBuilder.java index ac0ca3919ea38..4b92d6a1f5460 100644 --- a/server/src/main/java/org/opensearch/index/query/TermsQueryBuilder.java +++ b/server/src/main/java/org/opensearch/index/query/TermsQueryBuilder.java @@ -37,14 +37,17 @@ import org.apache.lucene.search.Query; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefBuilder; +import org.opensearch.Version; import org.opensearch.action.get.GetRequest; import org.opensearch.client.Client; import org.opensearch.common.SetOnce; import org.opensearch.common.io.stream.BytesStreamOutput; import org.opensearch.common.xcontent.support.XContentMapValues; +import org.opensearch.core.ParseField; import org.opensearch.core.action.ActionListener; import org.opensearch.core.common.ParsingException; import org.opensearch.core.common.Strings; +import org.opensearch.core.common.bytes.BytesArray; import org.opensearch.core.common.bytes.BytesReference; import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.common.io.stream.StreamOutput; @@ -53,6 +56,7 @@ import org.opensearch.index.IndexSettings; import org.opensearch.index.mapper.ConstantFieldType; import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.NumberFieldMapper; import org.opensearch.indices.TermsLookup; import java.io.IOException; @@ -60,6 +64,7 @@ import java.util.AbstractList; import java.util.ArrayList; import java.util.Arrays; +import java.util.Base64; import java.util.Collections; import java.util.HashSet; import java.util.List; @@ -82,6 +87,39 @@ public class TermsQueryBuilder extends AbstractQueryBuilder { private final TermsLookup termsLookup; private final Supplier> supplier; + private static final ParseField VALUE_TYPE_FIELD = new ParseField("value_type"); + private ValueType valueType = ValueType.DEFAULT; + + /** + * Terms query may accept different types of value + *

+ * This flag is used to decide how to parse the value and build query upon later + */ + public enum ValueType { + DEFAULT("default"), + BITMAP("bitmap"); + + private final String type; + + ValueType(String type) { + this.type = type; + } + + static ValueType fromString(String type) { + for (ValueType valueType : ValueType.values()) { + if (valueType.type.equalsIgnoreCase(type)) { + return valueType; + } + } + throw new IllegalArgumentException(type + " is not valid " + VALUE_TYPE_FIELD); + } + } + + public TermsQueryBuilder valueType(ValueType valueType) { + this.valueType = valueType; + return this; + } + public TermsQueryBuilder(String fieldName, TermsLookup termsLookup) { this(fieldName, null, termsLookup); } @@ -187,6 +225,11 @@ public TermsQueryBuilder(String fieldName, Iterable values) { this.supplier = null; } + private TermsQueryBuilder(String fieldName, Iterable values, ValueType valueType) { + this(fieldName, values); + this.valueType = valueType; + } + private TermsQueryBuilder(String fieldName, Supplier> supplier) { this.fieldName = fieldName; this.values = null; @@ -194,6 +237,11 @@ private TermsQueryBuilder(String fieldName, Supplier> supplier) { this.supplier = supplier; } + private TermsQueryBuilder(String fieldName, Supplier> supplier, ValueType valueType) { + this(fieldName, supplier); + this.valueType = valueType; + } + /** * Read from a stream. */ @@ -203,6 +251,9 @@ public TermsQueryBuilder(StreamInput in) throws IOException { termsLookup = in.readOptionalWriteable(TermsLookup::new); values = (List) in.readGenericValue(); this.supplier = null; + if (in.getVersion().onOrAfter(Version.V_2_17_0)) { + valueType = in.readEnum(ValueType.class); + } } @Override @@ -213,6 +264,9 @@ protected void doWriteTo(StreamOutput out) throws IOException { out.writeString(fieldName); out.writeOptionalWriteable(termsLookup); out.writeGenericValue(values); + if (out.getVersion().onOrAfter(Version.V_2_17_0)) { + out.writeEnum(valueType); + } } public String fieldName() { @@ -360,6 +414,9 @@ protected void doXContent(XContentBuilder builder, Params params) throws IOExcep builder.field(fieldName, convertBack(values)); } printBoostAndQueryName(builder); + if (valueType != ValueType.DEFAULT) { + builder.field(VALUE_TYPE_FIELD.getPreferredName(), valueType.type); + } builder.endObject(); } @@ -371,6 +428,8 @@ public static TermsQueryBuilder fromXContent(XContentParser parser) throws IOExc String queryName = null; float boost = AbstractQueryBuilder.DEFAULT_BOOST; + String valueTypeStr = ValueType.DEFAULT.type; + XContentParser.Token token; String currentFieldName = null; while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { @@ -406,6 +465,8 @@ public static TermsQueryBuilder fromXContent(XContentParser parser) throws IOExc boost = parser.floatValue(); } else if (AbstractQueryBuilder.NAME_FIELD.match(currentFieldName, parser.getDeprecationHandler())) { queryName = parser.text(); + } else if (VALUE_TYPE_FIELD.match(currentFieldName, parser.getDeprecationHandler())) { + valueTypeStr = parser.text(); } else { throw new ParsingException( parser.getTokenLocation(), @@ -430,7 +491,18 @@ public static TermsQueryBuilder fromXContent(XContentParser parser) throws IOExc ); } - return new TermsQueryBuilder(fieldName, values, termsLookup).boost(boost).queryName(queryName); + ValueType valueType = ValueType.fromString(valueTypeStr); + if (valueType == ValueType.BITMAP) { + if (values != null && values.size() == 1 && values.get(0) instanceof BytesRef) { + values.set(0, new BytesArray(Base64.getDecoder().decode(((BytesRef) values.get(0)).utf8ToString()))); + } else if (termsLookup == null) { + throw new IllegalArgumentException( + "Invalid value for bitmap type: Expected a single-element array with a base64 encoded serialized bitmap." + ); + } + } + + return new TermsQueryBuilder(fieldName, values, termsLookup).boost(boost).queryName(queryName).valueType(valueType); } static List parseValues(XContentParser parser) throws IOException { @@ -473,17 +545,37 @@ protected Query doToQuery(QueryShardContext context) throws IOException { if (fieldType == null) { throw new IllegalStateException("Rewrite first"); } + if (valueType == ValueType.BITMAP) { + if (values.size() == 1 && values.get(0) instanceof BytesArray) { + if (fieldType instanceof NumberFieldMapper.NumberFieldType) { + return ((NumberFieldMapper.NumberFieldType) fieldType).bitmapQuery((BytesArray) values.get(0)); + } + } + } return fieldType.termsQuery(values, context); } private void fetch(TermsLookup termsLookup, Client client, ActionListener> actionListener) { GetRequest getRequest = new GetRequest(termsLookup.index(), termsLookup.id()); getRequest.preference("_local").routing(termsLookup.routing()); + if (termsLookup.store()) { + getRequest.storedFields(termsLookup.path()); + } client.get(getRequest, ActionListener.delegateFailure(actionListener, (delegatedListener, getResponse) -> { List terms = new ArrayList<>(); - if (getResponse.isSourceEmpty() == false) { // extract terms only if the doc source exists - List extractedValues = XContentMapValues.extractRawValues(termsLookup.path(), getResponse.getSourceAsMap()); - terms.addAll(extractedValues); + if (termsLookup.store()) { + List values = getResponse.getField(termsLookup.path()).getValues(); + if (values.size() != 1 && valueType == ValueType.BITMAP) { + throw new IllegalArgumentException( + "Invalid value for bitmap type: Expected a single base64 encoded serialized bitmap." + ); + } + terms.addAll(values); + } else { + if (getResponse.isSourceEmpty() == false) { // extract terms only if the doc source exists + List extractedValues = XContentMapValues.extractRawValues(termsLookup.path(), getResponse.getSourceAsMap()); + terms.addAll(extractedValues); + } } delegatedListener.onResponse(terms); })); @@ -491,7 +583,7 @@ private void fetch(TermsLookup termsLookup, Client client, ActionListener> supplier = new SetOnce<>(); queryRewriteContext.registerAsyncAction((client, listener) -> fetch(termsLookup, client, ActionListener.map(listener, list -> { supplier.set(list); return null; }))); - return new TermsQueryBuilder(this.fieldName, supplier::get); + return new TermsQueryBuilder(this.fieldName, supplier::get, valueType); } if (values == null || values.isEmpty()) { diff --git a/server/src/main/java/org/opensearch/indices/TermsLookup.java b/server/src/main/java/org/opensearch/indices/TermsLookup.java index 37533c0809d7a..7337ed31ce41e 100644 --- a/server/src/main/java/org/opensearch/indices/TermsLookup.java +++ b/server/src/main/java/org/opensearch/indices/TermsLookup.java @@ -87,6 +87,9 @@ public TermsLookup(StreamInput in) throws IOException { path = in.readString(); index = in.readString(); routing = in.readOptionalString(); + if (in.getVersion().onOrAfter(Version.V_2_17_0)) { + store = in.readBoolean(); + } } @Override @@ -98,6 +101,9 @@ public void writeTo(StreamOutput out) throws IOException { out.writeString(path); out.writeString(index); out.writeOptionalString(routing); + if (out.getVersion().onOrAfter(Version.V_2_17_0)) { + out.writeBoolean(store); + } } public String index() { @@ -121,6 +127,17 @@ public TermsLookup routing(String routing) { return this; } + private boolean store; + + public boolean store() { + return store; + } + + public TermsLookup store(boolean store) { + this.store = store; + return this; + } + private static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>("terms_lookup", args -> { String index = (String) args[0]; String id = (String) args[1]; @@ -132,6 +149,7 @@ public TermsLookup routing(String routing) { PARSER.declareString(constructorArg(), new ParseField("id")); PARSER.declareString(constructorArg(), new ParseField("path")); PARSER.declareString(TermsLookup::routing, new ParseField("routing")); + PARSER.declareBoolean(TermsLookup::store, new ParseField("store")); } public static TermsLookup parseTermsLookup(XContentParser parser) throws IOException { @@ -151,12 +169,15 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws if (routing != null) { builder.field("routing", routing); } + if (store) { + builder.field("store", true); + } return builder; } @Override public int hashCode() { - return Objects.hash(index, id, path, routing); + return Objects.hash(index, id, path, routing, store); } @Override @@ -171,6 +192,7 @@ public boolean equals(Object obj) { return Objects.equals(index, other.index) && Objects.equals(id, other.id) && Objects.equals(path, other.path) - && Objects.equals(routing, other.routing); + && Objects.equals(routing, other.routing) + && Objects.equals(store, other.store); } } diff --git a/server/src/main/java/org/opensearch/search/query/BitmapDocValuesQuery.java b/server/src/main/java/org/opensearch/search/query/BitmapDocValuesQuery.java new file mode 100644 index 0000000000000..dfa5fc4567f80 --- /dev/null +++ b/server/src/main/java/org/opensearch/search/query/BitmapDocValuesQuery.java @@ -0,0 +1,152 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.query; + +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.SortedNumericDocValues; +import org.apache.lucene.search.ConstantScoreScorer; +import org.apache.lucene.search.ConstantScoreWeight; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchNoDocsQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.QueryVisitor; +import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.TwoPhaseIterator; +import org.apache.lucene.search.Weight; +import org.apache.lucene.util.Accountable; +import org.apache.lucene.util.RamUsageEstimator; + +import java.io.IOException; +import java.util.Objects; + +import org.roaringbitmap.RoaringBitmap; + +/** + * Filter with bitmap + *

+ * Similar to Lucene SortedNumericDocValuesSetQuery + */ +public class BitmapDocValuesQuery extends Query implements Accountable { + + final String field; + final RoaringBitmap bitmap; + final long min; + final long max; + + public BitmapDocValuesQuery(String field, RoaringBitmap bitmap) { + this.field = field; + this.bitmap = bitmap; + if (!bitmap.isEmpty()) { + min = bitmap.first(); + max = bitmap.last(); + } else { + min = 0; // final field + max = 0; + } + } + + @Override + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + return new ConstantScoreWeight(this, boost) { + @Override + public Scorer scorer(LeafReaderContext context) throws IOException { + SortedNumericDocValues values = DocValues.getSortedNumeric(context.reader(), field); + final NumericDocValues singleton = DocValues.unwrapSingleton(values); + final TwoPhaseIterator iterator; + if (singleton != null) { + iterator = new TwoPhaseIterator(singleton) { + @Override + public boolean matches() throws IOException { + long value = singleton.longValue(); + return value >= min && value <= max && bitmap.contains((int) value); + } + + @Override + public float matchCost() { + return 5; // 2 comparisons, possible lookup in the bitmap + } + }; + } else { + iterator = new TwoPhaseIterator(values) { + @Override + public boolean matches() throws IOException { + int count = values.docValueCount(); + for (int i = 0; i < count; i++) { + final long value = values.nextValue(); + if (value < min) { + continue; + } else if (value > max) { + return false; // values are sorted, terminate + } else if (bitmap.contains((int) value)) { + return true; + } + } + return false; + } + + @Override + public float matchCost() { + return 5; // 2 comparisons, possible lookup in the bitmap + } + }; + } + return new ConstantScoreScorer(this, score(), scoreMode, iterator); + } + + @Override + public boolean isCacheable(LeafReaderContext ctx) { + return DocValues.isCacheable(ctx, field); + } + }; + } + + @Override + public String toString(String field) { + // bitmap may contain high cardinality, so choose to not show the actual values in it + return field + " cardinality: " + bitmap.getLongCardinality(); + } + + @Override + public Query rewrite(IndexSearcher indexSearcher) throws IOException { + if (bitmap.isEmpty()) { + return new MatchNoDocsQuery(); + } + return super.rewrite(indexSearcher); + } + + @Override + public boolean equals(Object other) { + if (sameClassAs(other) == false) { + return false; + } + BitmapDocValuesQuery that = (BitmapDocValuesQuery) other; + return field.equals(that.field) && bitmap.equals(that.bitmap); + } + + @Override + public int hashCode() { + return Objects.hash(classHash(), field, bitmap); + } + + @Override + public long ramBytesUsed() { + return RamUsageEstimator.shallowSizeOfInstance(BitmapDocValuesQuery.class) + RamUsageEstimator.sizeOfObject(field) + + RamUsageEstimator.sizeOfObject(bitmap); + } + + @Override + public void visit(QueryVisitor visitor) { + if (visitor.acceptField(field)) { + visitor.visitLeaf(this); + } + } +} diff --git a/server/src/test/java/org/opensearch/index/mapper/NumberFieldTypeTests.java b/server/src/test/java/org/opensearch/index/mapper/NumberFieldTypeTests.java index 96487db6dd512..b27ef49303205 100644 --- a/server/src/test/java/org/opensearch/index/mapper/NumberFieldTypeTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/NumberFieldTypeTests.java @@ -62,6 +62,7 @@ import org.opensearch.common.settings.Settings; import org.opensearch.common.util.BigArrays; import org.opensearch.common.util.io.IOUtils; +import org.opensearch.core.common.bytes.BytesArray; import org.opensearch.core.xcontent.MediaTypeRegistry; import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.index.IndexSettings; @@ -73,18 +74,22 @@ import org.opensearch.index.mapper.NumberFieldMapper.NumberType; import org.opensearch.index.query.QueryShardContext; import org.opensearch.search.MultiValueMode; +import org.opensearch.search.query.BitmapDocValuesQuery; import org.junit.Before; import java.io.ByteArrayInputStream; import java.io.IOException; import java.math.BigDecimal; import java.math.BigInteger; +import java.nio.ByteBuffer; import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.function.Supplier; +import org.roaringbitmap.RoaringBitmap; + import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.either; import static org.hamcrest.Matchers.equalTo; @@ -947,4 +952,33 @@ public void testFetchSourceValue() throws IOException { assertEquals(Collections.singletonList(2.71f), fetchSourceValue(nullValueMapper, "")); assertEquals(Collections.singletonList(2.71f), fetchSourceValue(nullValueMapper, null)); } + + public void testBitmapQuery() throws IOException { + RoaringBitmap r = new RoaringBitmap(); + byte[] array = new byte[r.serializedSizeInBytes()]; + r.serialize(ByteBuffer.wrap(array)); + BytesArray bitmap = new BytesArray(array); + + NumberFieldType ft = new NumberFieldMapper.NumberFieldType("field", NumberType.INTEGER); + assertEquals( + new IndexOrDocValuesQuery(NumberType.bitmapIndexQuery("field", r), new BitmapDocValuesQuery("field", r)), + ft.bitmapQuery(bitmap) + ); + + ft = new NumberFieldType("field", NumberType.INTEGER, false, false, true, true, null, Collections.emptyMap()); + assertEquals(new BitmapDocValuesQuery("field", r), ft.bitmapQuery(bitmap)); + + Directory dir = newDirectory(); + IndexWriter w = new IndexWriter(dir, new IndexWriterConfig()); + DirectoryReader reader = DirectoryReader.open(w); + assertEquals(new MatchNoDocsQuery(), ft.bitmapQuery(bitmap).rewrite(newSearcher(reader))); + reader.close(); + w.close(); + dir.close(); + + NumberType type = randomValueOtherThan(NumberType.INTEGER, () -> randomFrom(NumberType.values())); + ft = new NumberFieldMapper.NumberFieldType("field", type); + NumberFieldType finalFt = ft; + assertThrows(IllegalArgumentException.class, () -> finalFt.bitmapQuery(bitmap)); + } } diff --git a/server/src/test/java/org/opensearch/index/query/TermsQueryBuilderTests.java b/server/src/test/java/org/opensearch/index/query/TermsQueryBuilderTests.java index 7b45308a523c8..ce3eeb0e3bf69 100644 --- a/server/src/test/java/org/opensearch/index/query/TermsQueryBuilderTests.java +++ b/server/src/test/java/org/opensearch/index/query/TermsQueryBuilderTests.java @@ -46,6 +46,7 @@ import org.opensearch.OpenSearchException; import org.opensearch.action.get.GetRequest; import org.opensearch.action.get.GetResponse; +import org.opensearch.common.document.DocumentField; import org.opensearch.common.io.stream.BytesStreamOutput; import org.opensearch.common.xcontent.XContentFactory; import org.opensearch.core.common.ParsingException; @@ -59,15 +60,20 @@ import org.junit.Before; import java.io.IOException; +import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.HashMap; import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.Objects; import java.util.Set; import java.util.stream.Collectors; +import org.roaringbitmap.RoaringBitmap; + import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.either; import static org.hamcrest.Matchers.instanceOf; @@ -120,10 +126,9 @@ protected TermsQueryBuilder doCreateTestQueryBuilder() { } private TermsLookup randomTermsLookup() { - // Randomly choose between a typeless terms lookup and one with an explicit type to make sure we are TermsLookup lookup = new TermsLookup(randomAlphaOfLength(10), randomAlphaOfLength(10), termsPath); - // testing both cases. lookup.routing(randomBoolean() ? randomAlphaOfLength(10) : null); + lookup.store(randomBoolean()); return lookup; } @@ -245,7 +250,17 @@ public GetResponse executeGet(GetRequest getRequest) { } catch (IOException ex) { throw new OpenSearchException("boom", ex); } - return new GetResponse(new GetResult(getRequest.index(), getRequest.id(), 0, 1, 0, true, new BytesArray(json), null, null)); + Map documentField = new HashMap<>(); + List nonNullTerms = new ArrayList<>(); + for (Object obj : randomTerms) { + if (obj != null) { + nonNullTerms.add(obj); + } + } + documentField.put(termsPath, new DocumentField(termsPath, nonNullTerms)); + return new GetResponse( + new GetResult(getRequest.index(), getRequest.id(), 0, 1, 0, true, new BytesArray(json), documentField, null) + ); } public void testNumeric() throws IOException { @@ -388,4 +403,50 @@ protected QueryBuilder parseQuery(XContentParser parser) throws IOException { } } + public void testFromJsonWithValueType() throws IOException { + String json = "{\n" + + " \"terms\": {\n" + + " \"student_id\": [\"OjAAAAEAAAAAAAEAEAAAAG8A3gA=\"],\n" + + " \"boost\" : 1.0,\n" + + " \"value_type\": \"bitmap\"\n" + + " }\n" + + "}"; + + TermsQueryBuilder parsed = (TermsQueryBuilder) parseQuery(json); + checkGeneratedJson(json, parsed); + assertEquals(json, 1, parsed.values().size()); + } + + public void testFromJsonWithValueTypeFail() { + String json = "{\n" + + " \"terms\": {\n" + + " \"student_id\": [\"OjAAAAEAAAAAAAEAEAAAAG8A3gA=\", \"2\"],\n" + + " \"boost\" : 1.0,\n" + + " \"value_type\": \"bitmap\"\n" + + " }\n" + + "}"; + + IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> parseQuery(json)); + assertEquals( + "Invalid value for bitmap type: Expected a single-element array with a base64 encoded serialized bitmap.", + e.getMessage() + ); + } + + public void testTermsLookupBitmap() throws IOException { + RoaringBitmap bitmap = new RoaringBitmap(); + bitmap.add(111); + bitmap.add(333); + byte[] array = new byte[bitmap.serializedSizeInBytes()]; + bitmap.serialize(ByteBuffer.wrap(array)); + randomTerms = List.of(new BytesArray(array)); // this will be fetched back by terms lookup + + TermsQueryBuilder query = new TermsQueryBuilder(INT_FIELD_NAME, randomTermsLookup().store(true)).valueType( + TermsQueryBuilder.ValueType.BITMAP + ); + QueryShardContext context = createShardContext(); + QueryBuilder rewritten = rewriteQuery(query, new QueryShardContext(context)); + Query luceneQuery = rewritten.toQuery(context); + assertTrue(luceneQuery instanceof IndexOrDocValuesQuery); + } } diff --git a/server/src/test/java/org/opensearch/indices/TermsLookupTests.java b/server/src/test/java/org/opensearch/indices/TermsLookupTests.java index 8a7867729f2c1..3f45bc86104dd 100644 --- a/server/src/test/java/org/opensearch/indices/TermsLookupTests.java +++ b/server/src/test/java/org/opensearch/indices/TermsLookupTests.java @@ -48,12 +48,15 @@ public void testTermsLookup() { String id = randomAlphaOfLengthBetween(1, 10); String path = randomAlphaOfLengthBetween(1, 10); String routing = randomAlphaOfLengthBetween(1, 10); + boolean store = randomBoolean(); TermsLookup termsLookup = new TermsLookup(index, id, path); termsLookup.routing(routing); + termsLookup.store(store); assertEquals(index, termsLookup.index()); assertEquals(id, termsLookup.id()); assertEquals(path, termsLookup.path()); assertEquals(routing, termsLookup.routing()); + assertEquals(store, termsLookup.store()); } public void testIllegalArguments() { @@ -109,6 +112,6 @@ public void testXContentParsing() throws IOException { public static TermsLookup randomTermsLookup() { return new TermsLookup(randomAlphaOfLength(10), randomAlphaOfLength(10), randomAlphaOfLength(10).replace('.', '_')).routing( randomBoolean() ? randomAlphaOfLength(10) : null - ); + ).store(randomBoolean()); } } diff --git a/server/src/test/java/org/opensearch/search/query/BitmapDocValuesQueryTests.java b/server/src/test/java/org/opensearch/search/query/BitmapDocValuesQueryTests.java new file mode 100644 index 0000000000000..6e293d1ec69fd --- /dev/null +++ b/server/src/test/java/org/opensearch/search/query/BitmapDocValuesQueryTests.java @@ -0,0 +1,149 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.query; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.IntField; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.SortedNumericDocValues; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.Weight; +import org.apache.lucene.store.Directory; +import org.opensearch.test.OpenSearchTestCase; +import org.junit.After; +import org.junit.Before; + +import java.io.IOException; +import java.util.HashSet; +import java.util.LinkedList; +import java.util.List; +import java.util.Set; + +import org.roaringbitmap.RoaringBitmap; + +public class BitmapDocValuesQueryTests extends OpenSearchTestCase { + private Directory dir; + private IndexWriter w; + private DirectoryReader reader; + private IndexSearcher searcher; + + @Before + public void initSearcher() throws IOException { + dir = newDirectory(); + w = new IndexWriter(dir, newIndexWriterConfig()); + } + + @After + public void closeAllTheReaders() throws IOException { + reader.close(); + w.close(); + dir.close(); + } + + public void testScore() throws IOException { + Document d = new Document(); + d.add(new IntField("product_id", 1, Field.Store.NO)); + w.addDocument(d); + + d = new Document(); + d.add(new IntField("product_id", 2, Field.Store.NO)); + w.addDocument(d); + + d = new Document(); + d.add(new IntField("product_id", 3, Field.Store.NO)); + w.addDocument(d); + + d = new Document(); + d.add(new IntField("product_id", 4, Field.Store.NO)); + w.addDocument(d); + + w.commit(); + reader = DirectoryReader.open(w); + searcher = newSearcher(reader); + + RoaringBitmap bitmap = new RoaringBitmap(); + bitmap.add(1); + bitmap.add(4); + BitmapDocValuesQuery query = new BitmapDocValuesQuery("product_id", bitmap); + + Weight weight = searcher.createWeight(searcher.rewrite(query), ScoreMode.COMPLETE_NO_SCORES, 1f); + + List actual = new LinkedList<>(); + for (LeafReaderContext leaf : searcher.getIndexReader().leaves()) { + // use doc values to get the actual value of the matching docs and assert + // cannot directly check the docId because test can randomize segment numbers + SortedNumericDocValues dv = DocValues.getSortedNumeric(leaf.reader(), "product_id"); + Scorer scorer = weight.scorer(leaf); + DocIdSetIterator disi = scorer.iterator(); + int docId; + while ((docId = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { + dv.advanceExact(docId); + for (int count = 0; count < dv.docValueCount(); ++count) { + actual.add((int) dv.nextValue()); + } + } + } + List expected = List.of(1, 4); + assertEquals(expected, actual); + } + + public void testScoreMutilValues() throws IOException { + Document d = new Document(); + d.add(new IntField("product_id", 1, Field.Store.NO)); + w.addDocument(d); + + d = new Document(); + d.add(new IntField("product_id", 2, Field.Store.NO)); + d.add(new IntField("product_id", 3, Field.Store.NO)); + w.addDocument(d); + + d = new Document(); + d.add(new IntField("product_id", 3, Field.Store.NO)); + w.addDocument(d); + + d = new Document(); + d.add(new IntField("product_id", 4, Field.Store.NO)); + w.addDocument(d); + + w.commit(); + reader = DirectoryReader.open(w); + searcher = newSearcher(reader); + + RoaringBitmap bitmap = new RoaringBitmap(); + bitmap.add(3); + BitmapDocValuesQuery query = new BitmapDocValuesQuery("product_id", bitmap); + + Weight weight = searcher.createWeight(searcher.rewrite(query), ScoreMode.COMPLETE_NO_SCORES, 1f); + + Set actual = new HashSet<>(); + for (LeafReaderContext leaf : searcher.getIndexReader().leaves()) { + // use doc values to get the actual value of the matching docs and assert + // cannot directly check the docId because test can randomize segment numbers + SortedNumericDocValues dv = DocValues.getSortedNumeric(leaf.reader(), "product_id"); + Scorer scorer = weight.scorer(leaf); + DocIdSetIterator disi = scorer.iterator(); + int docId; + while ((docId = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { + dv.advanceExact(docId); + for (int count = 0; count < dv.docValueCount(); ++count) { + actual.add((int) dv.nextValue()); + } + } + } + Set expected = Set.of(2, 3); + assertEquals(expected, actual); + } +}