diff --git a/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java index 3fc0ce085d63e..0b62e17e6a020 100644 --- a/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java @@ -79,6 +79,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Objects; @@ -829,7 +830,7 @@ public Query termsQuery(String field, List values, boolean hasDocValues, } @Override - public Query bitmapQuery(String field, BytesArray bitmapArray, boolean isSearchable) { + public Query bitmapQuery(String field, BytesArray bitmapArray, boolean isSearchable, boolean hasDocValues) { RoaringBitmap bitmap = new RoaringBitmap(); try { bitmap.deserialize(ByteBuffer.wrap(bitmapArray.array())); @@ -837,9 +838,12 @@ public Query bitmapQuery(String field, BytesArray bitmapArray, boolean isSearcha throw new IllegalArgumentException("Failed to deserialize the bitmap.", e); } - if (isSearchable) { + if (isSearchable && hasDocValues) { return new IndexOrDocValuesQuery(bitmapIndexQuery(field, bitmap), new BitmapDocValuesQuery(field, bitmap)); } + if (isSearchable) { + return bitmapIndexQuery(field, bitmap); + } return new BitmapDocValuesQuery(field, bitmap); } @@ -1195,7 +1199,7 @@ public final TypeParser parser() { public abstract Query termsQuery(String field, List values, boolean hasDocValues, boolean isSearchable); - public Query bitmapQuery(String field, BytesArray bitmap, boolean isSearchable) { + public Query bitmapQuery(String field, BytesArray bitmap, boolean isSearchable, boolean hasDocValues) { throw new IllegalArgumentException("Field [" + name + "] of type [" + typeName() + "] does not support bitmap queries"); } @@ -1445,16 +1449,17 @@ static PointInSetQuery bitmapIndexQuery(String field, RoaringBitmap bitmap) { final BytesRef encoded = new BytesRef(new byte[Integer.BYTES]); return new PointInSetQuery(field, 1, Integer.BYTES, new PointInSetQuery.Stream() { - long upto; + final Iterator iterator = bitmap.iterator(); @Override public BytesRef next() { - upto = bitmap.nextValue((int) upto); - if (upto == -1) { + int value; + if (iterator.hasNext()) { + value = iterator.next(); + } else { return null; } - IntPoint.encodeDimension((int) upto, encoded.bytes, 0); - upto++; + IntPoint.encodeDimension(value, encoded.bytes, 0); return encoded; } }) { @@ -1546,7 +1551,8 @@ public Query termsQuery(List values, QueryShardContext context) { } public Query bitmapQuery(BytesArray bitmap) { - return type.bitmapQuery(name(), bitmap, isSearchable()); + failIfNotIndexedAndNoDocValues(); + return type.bitmapQuery(name(), bitmap, isSearchable(), hasDocValues()); } @Override diff --git a/server/src/main/java/org/opensearch/search/query/BitmapDocValuesQuery.java b/server/src/main/java/org/opensearch/search/query/BitmapDocValuesQuery.java index 3dba89ac29e9c..e09a724c8486a 100644 --- a/server/src/main/java/org/opensearch/search/query/BitmapDocValuesQuery.java +++ b/server/src/main/java/org/opensearch/search/query/BitmapDocValuesQuery.java @@ -106,7 +106,8 @@ public boolean isCacheable(LeafReaderContext ctx) { @Override public String toString(String field) { - return field + ": " + bitmap.toString(); + // bitmap may contain high cardinality, so choose to not show the actual values in it + return field + " cardinality: " + bitmap.getLongCardinality(); } @Override diff --git a/server/src/test/java/org/opensearch/search/query/BitmapDocValuesQueryTests.java b/server/src/test/java/org/opensearch/search/query/BitmapDocValuesQueryTests.java index 9c0ff2086a49d..6e293d1ec69fd 100644 --- a/server/src/test/java/org/opensearch/search/query/BitmapDocValuesQueryTests.java +++ b/server/src/test/java/org/opensearch/search/query/BitmapDocValuesQueryTests.java @@ -12,8 +12,10 @@ import org.apache.lucene.document.Field; import org.apache.lucene.document.IntField; import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.DocValues; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.ScoreMode; @@ -25,8 +27,10 @@ import org.junit.Before; import java.io.IOException; +import java.util.HashSet; import java.util.LinkedList; import java.util.List; +import java.util.Set; import org.roaringbitmap.RoaringBitmap; @@ -40,7 +44,16 @@ public class BitmapDocValuesQueryTests extends OpenSearchTestCase { public void initSearcher() throws IOException { dir = newDirectory(); w = new IndexWriter(dir, newIndexWriterConfig()); + } + + @After + public void closeAllTheReaders() throws IOException { + reader.close(); + w.close(); + dir.close(); + } + public void testScore() throws IOException { Document d = new Document(); d.add(new IntField("product_id", 1, Field.Store.NO)); w.addDocument(d); @@ -60,16 +73,7 @@ public void initSearcher() throws IOException { w.commit(); reader = DirectoryReader.open(w); searcher = newSearcher(reader); - } - @After - public void closeAllTheReaders() throws IOException { - reader.close(); - w.close(); - dir.close(); - } - - public void testScore() throws IOException { RoaringBitmap bitmap = new RoaringBitmap(); bitmap.add(1); bitmap.add(4); @@ -79,14 +83,67 @@ public void testScore() throws IOException { List actual = new LinkedList<>(); for (LeafReaderContext leaf : searcher.getIndexReader().leaves()) { + // use doc values to get the actual value of the matching docs and assert + // cannot directly check the docId because test can randomize segment numbers + SortedNumericDocValues dv = DocValues.getSortedNumeric(leaf.reader(), "product_id"); + Scorer scorer = weight.scorer(leaf); + DocIdSetIterator disi = scorer.iterator(); + int docId; + while ((docId = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { + dv.advanceExact(docId); + for (int count = 0; count < dv.docValueCount(); ++count) { + actual.add((int) dv.nextValue()); + } + } + } + List expected = List.of(1, 4); + assertEquals(expected, actual); + } + + public void testScoreMutilValues() throws IOException { + Document d = new Document(); + d.add(new IntField("product_id", 1, Field.Store.NO)); + w.addDocument(d); + + d = new Document(); + d.add(new IntField("product_id", 2, Field.Store.NO)); + d.add(new IntField("product_id", 3, Field.Store.NO)); + w.addDocument(d); + + d = new Document(); + d.add(new IntField("product_id", 3, Field.Store.NO)); + w.addDocument(d); + + d = new Document(); + d.add(new IntField("product_id", 4, Field.Store.NO)); + w.addDocument(d); + + w.commit(); + reader = DirectoryReader.open(w); + searcher = newSearcher(reader); + + RoaringBitmap bitmap = new RoaringBitmap(); + bitmap.add(3); + BitmapDocValuesQuery query = new BitmapDocValuesQuery("product_id", bitmap); + + Weight weight = searcher.createWeight(searcher.rewrite(query), ScoreMode.COMPLETE_NO_SCORES, 1f); + + Set actual = new HashSet<>(); + for (LeafReaderContext leaf : searcher.getIndexReader().leaves()) { + // use doc values to get the actual value of the matching docs and assert + // cannot directly check the docId because test can randomize segment numbers + SortedNumericDocValues dv = DocValues.getSortedNumeric(leaf.reader(), "product_id"); Scorer scorer = weight.scorer(leaf); DocIdSetIterator disi = scorer.iterator(); int docId; while ((docId = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { - actual.add(docId); + dv.advanceExact(docId); + for (int count = 0; count < dv.docValueCount(); ++count) { + actual.add((int) dv.nextValue()); + } } } - List expected = List.of(0, 3); - assertEquals(actual, expected); + Set expected = Set.of(2, 3); + assertEquals(expected, actual); } }