Skip to content

Commit

Permalink
Merge branch '12341-bitmap-filtering-push' into 12341-bitmap-filtering
Browse files Browse the repository at this point in the history
  • Loading branch information
bowenlan-amzn committed Aug 6, 2024
2 parents 7704005 + 7700b75 commit 3bce711
Show file tree
Hide file tree
Showing 3 changed files with 86 additions and 22 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
Expand Down Expand Up @@ -829,17 +830,20 @@ public Query termsQuery(String field, List<Object> values, boolean hasDocValues,
}

@Override
public Query bitmapQuery(String field, BytesArray bitmapArray, boolean isSearchable) {
public Query bitmapQuery(String field, BytesArray bitmapArray, boolean isSearchable, boolean hasDocValues) {
RoaringBitmap bitmap = new RoaringBitmap();
try {
bitmap.deserialize(ByteBuffer.wrap(bitmapArray.array()));
} catch (Exception e) {
throw new IllegalArgumentException("Failed to deserialize the bitmap.", e);
}

if (isSearchable) {
if (isSearchable && hasDocValues) {
return new IndexOrDocValuesQuery(bitmapIndexQuery(field, bitmap), new BitmapDocValuesQuery(field, bitmap));
}
if (isSearchable) {
return bitmapIndexQuery(field, bitmap);
}
return new BitmapDocValuesQuery(field, bitmap);
}

Expand Down Expand Up @@ -1195,7 +1199,7 @@ public final TypeParser parser() {

public abstract Query termsQuery(String field, List<Object> values, boolean hasDocValues, boolean isSearchable);

public Query bitmapQuery(String field, BytesArray bitmap, boolean isSearchable) {
public Query bitmapQuery(String field, BytesArray bitmap, boolean isSearchable, boolean hasDocValues) {
throw new IllegalArgumentException("Field [" + name + "] of type [" + typeName() + "] does not support bitmap queries");
}

Expand Down Expand Up @@ -1445,16 +1449,17 @@ static PointInSetQuery bitmapIndexQuery(String field, RoaringBitmap bitmap) {
final BytesRef encoded = new BytesRef(new byte[Integer.BYTES]);
return new PointInSetQuery(field, 1, Integer.BYTES, new PointInSetQuery.Stream() {

long upto;
final Iterator<Integer> iterator = bitmap.iterator();

@Override
public BytesRef next() {
upto = bitmap.nextValue((int) upto);
if (upto == -1) {
int value;
if (iterator.hasNext()) {
value = iterator.next();
} else {
return null;
}
IntPoint.encodeDimension((int) upto, encoded.bytes, 0);
upto++;
IntPoint.encodeDimension(value, encoded.bytes, 0);
return encoded;
}
}) {
Expand Down Expand Up @@ -1546,7 +1551,8 @@ public Query termsQuery(List values, QueryShardContext context) {
}

public Query bitmapQuery(BytesArray bitmap) {
return type.bitmapQuery(name(), bitmap, isSearchable());
failIfNotIndexedAndNoDocValues();
return type.bitmapQuery(name(), bitmap, isSearchable(), hasDocValues());
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,8 @@ public boolean isCacheable(LeafReaderContext ctx) {

@Override
public String toString(String field) {
return field + ": " + bitmap.toString();
// bitmap may contain high cardinality, so choose to not show the actual values in it
return field + " cardinality: " + bitmap.getLongCardinality();
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,10 @@
import org.apache.lucene.document.Field;
import org.apache.lucene.document.IntField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreMode;
Expand All @@ -25,8 +27,10 @@
import org.junit.Before;

import java.io.IOException;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;

import org.roaringbitmap.RoaringBitmap;

Expand All @@ -40,7 +44,16 @@ public class BitmapDocValuesQueryTests extends OpenSearchTestCase {
public void initSearcher() throws IOException {
dir = newDirectory();
w = new IndexWriter(dir, newIndexWriterConfig());
}

@After
public void closeAllTheReaders() throws IOException {
reader.close();
w.close();
dir.close();
}

public void testScore() throws IOException {
Document d = new Document();
d.add(new IntField("product_id", 1, Field.Store.NO));
w.addDocument(d);
Expand All @@ -60,16 +73,7 @@ public void initSearcher() throws IOException {
w.commit();
reader = DirectoryReader.open(w);
searcher = newSearcher(reader);
}

@After
public void closeAllTheReaders() throws IOException {
reader.close();
w.close();
dir.close();
}

public void testScore() throws IOException {
RoaringBitmap bitmap = new RoaringBitmap();
bitmap.add(1);
bitmap.add(4);
Expand All @@ -79,14 +83,67 @@ public void testScore() throws IOException {

List<Integer> actual = new LinkedList<>();
for (LeafReaderContext leaf : searcher.getIndexReader().leaves()) {
// use doc values to get the actual value of the matching docs and assert
// cannot directly check the docId because test can randomize segment numbers
SortedNumericDocValues dv = DocValues.getSortedNumeric(leaf.reader(), "product_id");
Scorer scorer = weight.scorer(leaf);
DocIdSetIterator disi = scorer.iterator();
int docId;
while ((docId = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
dv.advanceExact(docId);
for (int count = 0; count < dv.docValueCount(); ++count) {
actual.add((int) dv.nextValue());
}
}
}
List<Integer> expected = List.of(1, 4);
assertEquals(expected, actual);
}

public void testScoreMutilValues() throws IOException {
Document d = new Document();
d.add(new IntField("product_id", 1, Field.Store.NO));
w.addDocument(d);

d = new Document();
d.add(new IntField("product_id", 2, Field.Store.NO));
d.add(new IntField("product_id", 3, Field.Store.NO));
w.addDocument(d);

d = new Document();
d.add(new IntField("product_id", 3, Field.Store.NO));
w.addDocument(d);

d = new Document();
d.add(new IntField("product_id", 4, Field.Store.NO));
w.addDocument(d);

w.commit();
reader = DirectoryReader.open(w);
searcher = newSearcher(reader);

RoaringBitmap bitmap = new RoaringBitmap();
bitmap.add(3);
BitmapDocValuesQuery query = new BitmapDocValuesQuery("product_id", bitmap);

Weight weight = searcher.createWeight(searcher.rewrite(query), ScoreMode.COMPLETE_NO_SCORES, 1f);

Set<Integer> actual = new HashSet<>();
for (LeafReaderContext leaf : searcher.getIndexReader().leaves()) {
// use doc values to get the actual value of the matching docs and assert
// cannot directly check the docId because test can randomize segment numbers
SortedNumericDocValues dv = DocValues.getSortedNumeric(leaf.reader(), "product_id");
Scorer scorer = weight.scorer(leaf);
DocIdSetIterator disi = scorer.iterator();
int docId;
while ((docId = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
actual.add(docId);
dv.advanceExact(docId);
for (int count = 0; count < dv.docValueCount(); ++count) {
actual.add((int) dv.nextValue());
}
}
}
List<Integer> expected = List.of(0, 3);
assertEquals(actual, expected);
Set<Integer> expected = Set.of(2, 3);
assertEquals(expected, actual);
}
}

0 comments on commit 3bce711

Please sign in to comment.