From 01580daccf2df903e4de31faa0ba38d134425609 Mon Sep 17 00:00:00 2001 From: Finn Date: Thu, 15 Aug 2024 10:04:47 -0700 Subject: [PATCH] Remove filter rewrite optimization for range aggregations when segment is not effective match all (#15194) --------- Signed-off-by: Finn Carroll (cherry picked from commit ef87b397e3c4d9571e6a29a96e438c3e006a0ea7) --- CHANGELOG.md | 2 + .../search.aggregation/360_date_histogram.yml | 91 ++++++++++ .../test/search.aggregation/40_range.yml | 79 +++++++++ .../filterrewrite/AggregatorBridge.java | 102 ++++++++++++ .../DateHistogramAggregatorBridge.java | 157 ++++++++++++++++++ .../filterrewrite/RangeAggregatorBridge.java | 95 +++++++++++ .../bucket/range/RangeAggregator.java | 10 +- .../bucket/range/RangeAggregatorTests.java | 129 +++++++++++++- 8 files changed, 657 insertions(+), 8 deletions(-) create mode 100644 server/src/main/java/org/opensearch/search/aggregations/bucket/filterrewrite/AggregatorBridge.java create mode 100644 server/src/main/java/org/opensearch/search/aggregations/bucket/filterrewrite/DateHistogramAggregatorBridge.java create mode 100644 server/src/main/java/org/opensearch/search/aggregations/bucket/filterrewrite/RangeAggregatorBridge.java diff --git a/CHANGELOG.md b/CHANGELOG.md index 90ef2781705db..0b05fb087d80a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ### Fixed +- Fix range aggregation optimization ignoring top level queries ([#15194](https://github.com/opensearch-project/OpenSearch/pull/15194)) + ### Security [Unreleased 2.x]: https://github.com/opensearch-project/OpenSearch/compare/57cd81da11e5cb831029719f0394e40aff68ced2...2.16 diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/360_date_histogram.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/360_date_histogram.yml index 0ea9d3de00926..8c8a98b2db22c 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/360_date_histogram.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/360_date_histogram.yml @@ -61,3 +61,94 @@ setup: - match: { aggregations.histo.buckets.8.doc_count: 1 } - match: { aggregations.histo.buckets.12.key_as_string: "2016-06-01T00:00:00.000Z" } - match: { aggregations.histo.buckets.12.doc_count: 1 } + +--- +"Date histogram aggregation w/ filter query test": + - skip: + version: " - 2.99.99" + reason: Backport fix to 2.16 + + - do: + bulk: + refresh: true + index: dhisto-agg-w-query + body: + - '{"index": {}}' + - '{"routing": "route1", "date": "2024-08-12", "dow": "monday"}' + - '{"index": {}}' + - '{"routing": "route1", "date": "2024-08-14", "dow": "wednesday"}' + - '{"index": {}}' + - '{"routing": "route1", "date": "2024-08-19", "dow": "monday"}' + - '{"index": {}}' + - '{"routing": "route2", "date": "2024-08-13", "dow": "tuesday"}' + - '{"index": {}}' + - '{"routing": "route2", "date": "2024-08-15", "dow": "thursday"}' + + - do: + search: + index: dhisto-agg-w-query + body: + query: + bool: + must: + match_all: {} + filter: + - terms: + routing: + - "route1" + aggregations: + weekHisto: + date_histogram: + field: date + calendar_interval: week + _source: false + + - match: { hits.total.value: 3 } + - match: { aggregations.weekHisto.buckets.0.doc_count: 2 } + - match: { aggregations.weekHisto.buckets.1.doc_count: 1 } + +--- +"Date histogram aggregation w/ shared field range test": + - do: + bulk: + refresh: true + index: dhisto-agg-w-query + body: + - '{"index": {}}' + - '{"date": "2024-10-31"}' + - '{"index": {}}' + - '{"date": "2024-11-11"}' + - '{"index": {}}' + - '{"date": "2024-11-28"}' + - '{"index": {}}' + - '{"date": "2024-12-25"}' + - '{"index": {}}' + - '{"date": "2025-01-01"}' + - '{"index": {}}' + - '{"date": "2025-02-14"}' + + - do: + search: + index: dhisto-agg-w-query + body: + profile: true + query: + range: + date: + gte: "2024-01-01" + lt: "2025-01-01" + aggregations: + monthHisto: + date_histogram: + field: date + calendar_interval: month + _source: false + + - match: { hits.total.value: 4 } + - match: { aggregations.monthHisto.buckets.0.doc_count: 1 } + - match: { aggregations.monthHisto.buckets.1.doc_count: 2 } + - match: { aggregations.monthHisto.buckets.2.doc_count: 1 } + - match: { profile.shards.0.aggregations.0.debug.optimized_segments: 1 } + - match: { profile.shards.0.aggregations.0.debug.unoptimized_segments: 0 } + - match: { profile.shards.0.aggregations.0.debug.leaf_visited: 0 } + - match: { profile.shards.0.aggregations.0.debug.inner_visited: 0 } diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/40_range.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/40_range.yml index 80aad96ce1f6b..1e1d2b0706d6b 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/40_range.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/40_range.yml @@ -673,3 +673,82 @@ setup: - match: { aggregations.my_range.buckets.3.from: 1.5 } - is_false: aggregations.my_range.buckets.3.to - match: { aggregations.my_range.buckets.3.doc_count: 2 } + +--- +"Filter query w/ aggregation test": + - skip: + version: " - 2.99.99" + reason: Backport fix to 2.16 + + - do: + bulk: + refresh: true + index: range-agg-w-query + body: + - '{"index": {}}' + - '{"routing": "route1", "v": -10, "date": "2024-10-29"}' + - '{"index": {}}' + - '{"routing": "route1", "v": -5, "date": "2024-10-30"}' + - '{"index": {}}' + - '{"routing": "route1", "v": 10, "date": "2024-10-31"}' + - '{"index": {}}' + - '{"routing": "route2", "v": 15, "date": "2024-11-01"}' + - '{"index": {}}' + - '{"routing": "route2", "v": 20, "date": "2024-11-02"}' + + - do: + search: + index: range-agg-w-query + body: + query: + bool: + must: + match_all: {} + filter: + - terms: + routing: + - "route1" + aggregations: + NegPosAgg: + range: + field: v + keyed: true + ranges: + - to: 0 + key: "0" + - from: 0 + key: "1" + _source: false + + - match: { hits.total.value: 3 } + - match: { aggregations.NegPosAgg.buckets.0.doc_count: 2 } + - match: { aggregations.NegPosAgg.buckets.1.doc_count: 1 } + + - do: + search: + index: range-agg-w-query + body: + query: + bool: + must: + match_all: {} + filter: + - terms: + routing: + - "route1" + aggregations: + HalloweenAgg: + date_range: + field: date + format: "yyyy-MM-dd" + keyed: true + ranges: + - to: "2024-11-01" + key: "to-october" + - from: "2024-11-01" + key: "from-september" + _source: false + + - match: { hits.total.value: 3 } + - match: { aggregations.HalloweenAgg.buckets.to-october.doc_count: 3 } + - match: { aggregations.HalloweenAgg.buckets.from-september.doc_count: 0 } diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/filterrewrite/AggregatorBridge.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/filterrewrite/AggregatorBridge.java new file mode 100644 index 0000000000000..145a60373b4f3 --- /dev/null +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/filterrewrite/AggregatorBridge.java @@ -0,0 +1,102 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.aggregations.bucket.filterrewrite; + +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.PointValues; +import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.Weight; +import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.search.internal.SearchContext; + +import java.io.IOException; +import java.util.function.BiConsumer; +import java.util.function.Consumer; + +/** + * This interface provides a bridge between an aggregator and the optimization context, allowing + * the aggregator to provide data and optimize the aggregation process. + * + *

The main purpose of this interface is to encapsulate the aggregator-specific optimization + * logic and provide access to the data in Aggregator that is required for optimization, while keeping the optimization + * business logic separate from the aggregator implementation. + * + *

To use this interface to optimize an aggregator, you should subclass this interface in this package + * and put any specific optimization business logic in it. Then implement this subclass in the aggregator + * to provide data that is needed for doing the optimization + * + * @opensearch.internal + */ +public abstract class AggregatorBridge { + + /** + * The field type associated with this aggregator bridge. + */ + MappedFieldType fieldType; + + Consumer setRanges; + + void setRangesConsumer(Consumer setRanges) { + this.setRanges = setRanges; + } + + /** + * Checks whether the aggregator can be optimized. + *

+ * This method is supposed to be implemented in a specific aggregator to take in fields from there + * + * @return {@code true} if the aggregator can be optimized, {@code false} otherwise. + * The result will be saved in the optimization context. + */ + protected abstract boolean canOptimize(); + + /** + * Prepares the optimization at shard level after checking aggregator is optimizable. + *

+ * For example, figure out what are the ranges from the aggregation to do the optimization later + *

+ * This method is supposed to be implemented in a specific aggregator to take in fields from there + */ + protected abstract void prepare() throws IOException; + + /** + * Prepares the optimization for a specific segment when the segment is functionally matching all docs + * + * @param leaf the leaf reader context for the segment + */ + abstract Ranges tryBuildRangesFromSegment(LeafReaderContext leaf) throws IOException; + + /** + * Attempts to build aggregation results for a segment + * + * @param values the point values (index structure for numeric values) for a segment + * @param incrementDocCount a consumer to increment the document count for a range bucket. The First parameter is document count, the second is the key of the bucket + * @param ranges + */ + abstract FilterRewriteOptimizationContext.DebugInfo tryOptimize( + PointValues values, + BiConsumer incrementDocCount, + Ranges ranges + ) throws IOException; + + /** + * Checks whether the top level query matches all documents on the segment + * + *

This method creates a weight from the search context's query and checks whether the weight's + * document count matches the total number of documents in the leaf reader context. + * + * @param ctx the search context + * @param leafCtx the leaf reader context for the segment + * @return {@code true} if the segment matches all documents, {@code false} otherwise + */ + public static boolean segmentMatchAll(SearchContext ctx, LeafReaderContext leafCtx) throws IOException { + Weight weight = ctx.query().rewrite(ctx.searcher()).createWeight(ctx.searcher(), ScoreMode.COMPLETE_NO_SCORES, 1f); + return weight != null && weight.count(leafCtx) == leafCtx.reader().numDocs(); + } +} diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/filterrewrite/DateHistogramAggregatorBridge.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/filterrewrite/DateHistogramAggregatorBridge.java new file mode 100644 index 0000000000000..c780732a5ddce --- /dev/null +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/filterrewrite/DateHistogramAggregatorBridge.java @@ -0,0 +1,157 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.aggregations.bucket.filterrewrite; + +import org.apache.lucene.document.LongPoint; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.PointValues; +import org.opensearch.common.Rounding; +import org.opensearch.index.mapper.DateFieldMapper; +import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.search.aggregations.bucket.histogram.LongBounds; +import org.opensearch.search.aggregations.support.ValuesSourceConfig; +import org.opensearch.search.internal.SearchContext; + +import java.io.IOException; +import java.util.OptionalLong; +import java.util.function.BiConsumer; +import java.util.function.Function; + +import static org.opensearch.search.aggregations.bucket.filterrewrite.PointTreeTraversal.multiRangesTraverse; + +/** + * For date histogram aggregation + */ +public abstract class DateHistogramAggregatorBridge extends AggregatorBridge { + + int maxRewriteFilters; + + protected boolean canOptimize(ValuesSourceConfig config) { + if (config.script() == null && config.missing() == null) { + MappedFieldType fieldType = config.fieldType(); + if (fieldType instanceof DateFieldMapper.DateFieldType) { + if (fieldType.isSearchable()) { + this.fieldType = fieldType; + return true; + } + } + } + return false; + } + + protected void buildRanges(SearchContext context) throws IOException { + long[] bounds = Helper.getDateHistoAggBounds(context, fieldType.name()); + this.maxRewriteFilters = context.maxAggRewriteFilters(); + setRanges.accept(buildRanges(bounds, maxRewriteFilters)); + } + + @Override + final Ranges tryBuildRangesFromSegment(LeafReaderContext leaf) throws IOException { + long[] bounds = Helper.getSegmentBounds(leaf, fieldType.name()); + return buildRanges(bounds, maxRewriteFilters); + } + + private Ranges buildRanges(long[] bounds, int maxRewriteFilters) { + bounds = processHardBounds(bounds); + if (bounds == null) { + return null; + } + assert bounds[0] <= bounds[1] : "Low bound should be less than high bound"; + + final Rounding rounding = getRounding(bounds[0], bounds[1]); + final OptionalLong intervalOpt = Rounding.getInterval(rounding); + if (intervalOpt.isEmpty()) { + return null; + } + final long interval = intervalOpt.getAsLong(); + + // process the after key of composite agg + bounds = processAfterKey(bounds, interval); + + return Helper.createRangesFromAgg( + (DateFieldMapper.DateFieldType) fieldType, + interval, + getRoundingPrepared(), + bounds[0], + bounds[1], + maxRewriteFilters + ); + } + + protected abstract Rounding getRounding(final long low, final long high); + + protected abstract Rounding.Prepared getRoundingPrepared(); + + protected long[] processAfterKey(long[] bounds, long interval) { + return bounds; + } + + protected long[] processHardBounds(long[] bounds) { + return processHardBounds(bounds, null); + } + + protected long[] processHardBounds(long[] bounds, LongBounds hardBounds) { + if (bounds != null) { + // Update min/max limit if user specified any hard bounds + if (hardBounds != null) { + if (hardBounds.getMin() > bounds[0]) { + bounds[0] = hardBounds.getMin(); + } + if (hardBounds.getMax() - 1 < bounds[1]) { + bounds[1] = hardBounds.getMax() - 1; // hard bounds max is exclusive + } + if (bounds[0] > bounds[1]) { + return null; + } + } + } + return bounds; + } + + private DateFieldMapper.DateFieldType getFieldType() { + assert fieldType instanceof DateFieldMapper.DateFieldType; + return (DateFieldMapper.DateFieldType) fieldType; + } + + protected int getSize() { + return Integer.MAX_VALUE; + } + + @Override + final FilterRewriteOptimizationContext.DebugInfo tryOptimize( + PointValues values, + BiConsumer incrementDocCount, + Ranges ranges + ) throws IOException { + int size = getSize(); + + DateFieldMapper.DateFieldType fieldType = getFieldType(); + BiConsumer incrementFunc = (activeIndex, docCount) -> { + long rangeStart = LongPoint.decodeDimension(ranges.lowers[activeIndex], 0); + rangeStart = fieldType.convertNanosToMillis(rangeStart); + long bucketOrd = getBucketOrd(bucketOrdProducer().apply(rangeStart)); + incrementDocCount.accept(bucketOrd, (long) docCount); + }; + + return multiRangesTraverse(values.getPointTree(), ranges, incrementFunc, size); + } + + private static long getBucketOrd(long bucketOrd) { + if (bucketOrd < 0) { // already seen + bucketOrd = -1 - bucketOrd; + } + + return bucketOrd; + } + + /** + * Provides a function to produce bucket ordinals from the lower bound of the range + */ + protected abstract Function bucketOrdProducer(); +} diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/filterrewrite/RangeAggregatorBridge.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/filterrewrite/RangeAggregatorBridge.java new file mode 100644 index 0000000000000..fc1bcd83f2c1b --- /dev/null +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/filterrewrite/RangeAggregatorBridge.java @@ -0,0 +1,95 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.aggregations.bucket.filterrewrite; + +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.PointValues; +import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.NumericPointEncoder; +import org.opensearch.search.aggregations.bucket.range.RangeAggregator; +import org.opensearch.search.aggregations.support.ValuesSource; +import org.opensearch.search.aggregations.support.ValuesSourceConfig; + +import java.io.IOException; +import java.util.function.BiConsumer; +import java.util.function.Function; + +import static org.opensearch.search.aggregations.bucket.filterrewrite.PointTreeTraversal.multiRangesTraverse; + +/** + * For range aggregation + */ +public abstract class RangeAggregatorBridge extends AggregatorBridge { + + protected boolean canOptimize(ValuesSourceConfig config, RangeAggregator.Range[] ranges) { + if (config.fieldType() == null) return false; + MappedFieldType fieldType = config.fieldType(); + assert fieldType != null; + if (fieldType.isSearchable() == false || !(fieldType instanceof NumericPointEncoder)) return false; + + if (config.script() == null && config.missing() == null) { + if (config.getValuesSource() instanceof ValuesSource.Numeric.FieldData) { + // ranges are already sorted by from and then to + // we want ranges not overlapping with each other + double prevTo = ranges[0].getTo(); + for (int i = 1; i < ranges.length; i++) { + if (prevTo > ranges[i].getFrom()) { + return false; + } + prevTo = ranges[i].getTo(); + } + this.fieldType = config.fieldType(); + return true; + } + } + return false; + } + + protected void buildRanges(RangeAggregator.Range[] ranges) { + assert fieldType instanceof NumericPointEncoder; + NumericPointEncoder numericPointEncoder = (NumericPointEncoder) fieldType; + byte[][] lowers = new byte[ranges.length][]; + byte[][] uppers = new byte[ranges.length][]; + for (int i = 0; i < ranges.length; i++) { + double rangeMin = ranges[i].getFrom(); + double rangeMax = ranges[i].getTo(); + byte[] lower = numericPointEncoder.encodePoint(rangeMin); + byte[] upper = numericPointEncoder.encodePoint(rangeMax); + lowers[i] = lower; + uppers[i] = upper; + } + + setRanges.accept(new Ranges(lowers, uppers)); + } + + @Override + final Ranges tryBuildRangesFromSegment(LeafReaderContext leaf) { + throw new UnsupportedOperationException("Range aggregation should not build ranges at segment level"); + } + + @Override + final FilterRewriteOptimizationContext.DebugInfo tryOptimize( + PointValues values, + BiConsumer incrementDocCount, + Ranges ranges + ) throws IOException { + int size = Integer.MAX_VALUE; + BiConsumer incrementFunc = (activeIndex, docCount) -> { + long bucketOrd = bucketOrdProducer().apply(activeIndex); + incrementDocCount.accept(bucketOrd, (long) docCount); + }; + + return multiRangesTraverse(values.getPointTree(), ranges, incrementFunc, size); + } + + /** + * Provides a function to produce bucket ordinals from index of the corresponding range in the range array + */ + protected abstract Function bucketOrdProducer(); +} diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/range/RangeAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/range/RangeAggregator.java index 2ba2b06514de1..574bf7ea4538c 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/range/RangeAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/range/RangeAggregator.java @@ -68,6 +68,7 @@ import java.util.function.BiConsumer; import static org.opensearch.core.xcontent.ConstructingObjectParser.optionalConstructorArg; +import static org.opensearch.search.aggregations.bucket.filterrewrite.AggregatorBridge.segmentMatchAll; /** * Aggregate all docs that match given ranges. @@ -298,12 +299,9 @@ public ScoreMode scoreMode() { @Override public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, final LeafBucketCollector sub) throws IOException { - boolean optimized = fastFilterContext.tryFastFilterAggregation( - ctx, - this::incrementBucketDocCount, - (activeIndex) -> subBucketOrdinal(0, (int) activeIndex) - ); - if (optimized) throw new CollectionTerminatedException(); + if (segmentMatchAll(context, ctx) && filterRewriteOptimizationContext.tryOptimize(ctx, this::incrementBucketDocCount, false)) { + throw new CollectionTerminatedException(); + } final SortedNumericDoubleValues values = valuesSource.doubleValues(ctx); return new LeafBucketCollectorBase(sub, values) { diff --git a/server/src/test/java/org/opensearch/search/aggregations/bucket/range/RangeAggregatorTests.java b/server/src/test/java/org/opensearch/search/aggregations/bucket/range/RangeAggregatorTests.java index 7e796b684e869..630326967aae1 100644 --- a/server/src/test/java/org/opensearch/search/aggregations/bucket/range/RangeAggregatorTests.java +++ b/server/src/test/java/org/opensearch/search/aggregations/bucket/range/RangeAggregatorTests.java @@ -32,6 +32,9 @@ package org.opensearch.search.aggregations.bucket.range; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.IntPoint; +import org.apache.lucene.document.KeywordField; import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.document.SortedSetDocValuesField; @@ -39,9 +42,13 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; import org.apache.lucene.store.Directory; import org.apache.lucene.tests.index.RandomIndexWriter; import org.apache.lucene.tests.util.TestUtil; @@ -54,6 +61,7 @@ import org.opensearch.index.mapper.MappedFieldType; import org.opensearch.index.mapper.NumberFieldMapper.NumberFieldType; import org.opensearch.index.mapper.NumberFieldMapper.NumberType; +import org.opensearch.index.mapper.ParseContext.Document; import org.opensearch.search.aggregations.AggregationBuilder; import org.opensearch.search.aggregations.AggregatorTestCase; import org.opensearch.search.aggregations.CardinalityUpperBound; @@ -65,6 +73,7 @@ import java.io.IOException; import java.time.ZoneOffset; import java.time.ZonedDateTime; +import java.util.ArrayList; import java.util.Collections; import java.util.LinkedHashMap; import java.util.LinkedList; @@ -457,6 +466,29 @@ public void testFloatType() throws IOException { ); } + public void testTopLevelRangeQuery() throws IOException { + NumberFieldType fieldType = new NumberFieldType(NumberType.INTEGER.typeName(), NumberType.INTEGER); + String fieldName = fieldType.numberType().typeName(); + Query query = IntPoint.newRangeQuery(fieldName, 5, 20); + + testRewriteOptimizationCase( + fieldType, + new double[][] { { 0.0, 10.0 }, { 10.0, 20.0 } }, + query, + new Number[] { 0.1, 4.0, 9, 11, 12, 19 }, + range -> { + List ranges = range.getBuckets(); + assertEquals(2, ranges.size()); + assertEquals("0.0-10.0", ranges.get(0).getKeyAsString()); + assertEquals(1, ranges.get(0).getDocCount()); + assertEquals("10.0-20.0", ranges.get(1).getKeyAsString()); + assertEquals(3, ranges.get(1).getDocCount()); + assertTrue(AggregationInspectionHelper.hasValue(range)); + }, + false + ); + } + public void testUnsignedLongType() throws IOException { testRewriteOptimizationCase( new NumberFieldType(NumberType.UNSIGNED_LONG.typeName(), NumberType.UNSIGNED_LONG), @@ -493,6 +525,76 @@ public void testUnsignedLongType() throws IOException { ); } + /** + * Expect no optimization as top level query excludes documents. + */ + public void testTopLevelFilterTermQuery() throws IOException { + final String KEYWORD_FIELD_NAME = "route"; + final NumberFieldType NUM_FIELD_TYPE = new NumberFieldType(NumberType.DOUBLE.typeName(), NumberType.DOUBLE); + final NumberType numType = NUM_FIELD_TYPE.numberType(); + + BooleanQuery.Builder builder = new BooleanQuery.Builder(); + builder.setMinimumNumberShouldMatch(0); + builder.add(new TermQuery(new Term(KEYWORD_FIELD_NAME, "route1")), BooleanClause.Occur.MUST); + Query boolQuery = builder.build(); + + List docList = new ArrayList<>(); + for (int i = 0; i < 3; i++) + docList.add(new Document()); + + docList.get(0).addAll(numType.createFields(numType.typeName(), 3.0, true, true, false)); + docList.get(1).addAll(numType.createFields(numType.typeName(), 11.0, true, true, false)); + docList.get(2).addAll(numType.createFields(numType.typeName(), 15.0, true, true, false)); + docList.get(0).add(new KeywordField(KEYWORD_FIELD_NAME, "route1", Field.Store.NO)); + docList.get(1).add(new KeywordField(KEYWORD_FIELD_NAME, "route1", Field.Store.NO)); + docList.get(2).add(new KeywordField(KEYWORD_FIELD_NAME, "route2", Field.Store.NO)); + + testRewriteOptimizationCase(NUM_FIELD_TYPE, new double[][] { { 0.0, 10.0 }, { 10.0, 20.0 } }, boolQuery, docList, range -> { + List ranges = range.getBuckets(); + assertEquals(2, ranges.size()); + assertEquals("0.0-10.0", ranges.get(0).getKeyAsString()); + assertEquals(1, ranges.get(0).getDocCount()); + assertEquals("10.0-20.0", ranges.get(1).getKeyAsString()); + assertEquals(1, ranges.get(1).getDocCount()); + assertTrue(AggregationInspectionHelper.hasValue(range)); + }, false); + } + + /** + * Expect optimization as top level query is effective match all. + */ + public void testTopLevelEffectiveMatchAll() throws IOException { + final String KEYWORD_FIELD_NAME = "route"; + final NumberFieldType NUM_FIELD_TYPE = new NumberFieldType(NumberType.DOUBLE.typeName(), NumberType.DOUBLE); + final NumberType numType = NUM_FIELD_TYPE.numberType(); + + BooleanQuery.Builder builder = new BooleanQuery.Builder(); + builder.setMinimumNumberShouldMatch(0); + builder.add(new TermQuery(new Term(KEYWORD_FIELD_NAME, "route1")), BooleanClause.Occur.MUST); + Query boolQuery = builder.build(); + + List docList = new ArrayList<>(); + for (int i = 0; i < 3; i++) + docList.add(new Document()); + + docList.get(0).addAll(numType.createFields(numType.typeName(), 3.0, true, true, false)); + docList.get(1).addAll(numType.createFields(numType.typeName(), 11.0, true, true, false)); + docList.get(2).addAll(numType.createFields(numType.typeName(), 15.0, true, true, false)); + docList.get(0).add(new KeywordField(KEYWORD_FIELD_NAME, "route1", Field.Store.NO)); + docList.get(1).add(new KeywordField(KEYWORD_FIELD_NAME, "route1", Field.Store.NO)); + docList.get(2).add(new KeywordField(KEYWORD_FIELD_NAME, "route1", Field.Store.NO)); + + testRewriteOptimizationCase(NUM_FIELD_TYPE, new double[][] { { 0.0, 10.0 }, { 10.0, 20.0 } }, boolQuery, docList, range -> { + List ranges = range.getBuckets(); + assertEquals(2, ranges.size()); + assertEquals("0.0-10.0", ranges.get(0).getKeyAsString()); + assertEquals(1, ranges.get(0).getDocCount()); + assertEquals("10.0-20.0", ranges.get(1).getKeyAsString()); + assertEquals(2, ranges.get(1).getDocCount()); + assertTrue(AggregationInspectionHelper.hasValue(range)); + }, true); + } + private void testCase( Query query, CheckedConsumer buildIndex, @@ -556,11 +658,34 @@ private void testRewriteOptimizationCase( ) throws IOException { NumberType numberType = fieldType.numberType(); String fieldName = numberType.typeName(); + List docs = new ArrayList<>(); + + for (Number dataPoint : dataPoints) { + Document doc = new Document(); + List fieldList = numberType.createFields(fieldName, dataPoint, true, true, false); + for (Field fld : fieldList) + doc.add(fld); + docs.add(doc); + } + + testRewriteOptimizationCase(fieldType, ranges, query, docs, verify, optimized); + } + + private void testRewriteOptimizationCase( + NumberFieldType fieldType, + double[][] ranges, + Query query, + List documents, + Consumer> verify, + boolean optimized + ) throws IOException { + NumberType numberType = fieldType.numberType(); + String fieldName = numberType.typeName(); try (Directory directory = newDirectory()) { try (IndexWriter indexWriter = new IndexWriter(directory, new IndexWriterConfig().setCodec(TestUtil.getDefaultCodec()))) { - for (Number dataPoint : dataPoints) { - indexWriter.addDocument(numberType.createFields(fieldName, dataPoint, true, true, false)); + for (Document doc : documents) { + indexWriter.addDocument(doc); } }