opensearch-project · finnegancarroll · Aug 15, 2024
@@ -16,6 +16,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 
 ### Fixed
 
+- Fix range aggregation optimization ignoring top level queries ([#15194](https://github.com/opensearch-project/OpenSearch/pull/15194))
+
 ### Security
 
 [Unreleased 2.x]: https://github.com/opensearch-project/OpenSearch/compare/57cd81da11e5cb831029719f0394e40aff68ced2...2.16
@@ -61,3 +61,94 @@ setup:
   - match: { aggregations.histo.buckets.8.doc_count: 1 }
   - match: { aggregations.histo.buckets.12.key_as_string: "2016-06-01T00:00:00.000Z" }
   - match: { aggregations.histo.buckets.12.doc_count: 1 }
+
+---
+"Date histogram aggregation w/ filter query test":
+  - skip:
+      version: " - 2.99.99"
+      reason: Backport fix to 2.16
+
+  - do:
+      bulk:
+        refresh: true
+        index: dhisto-agg-w-query
+        body:
+          - '{"index": {}}'
+          - '{"routing": "route1", "date": "2024-08-12", "dow": "monday"}'
+          - '{"index": {}}'
+          - '{"routing": "route1", "date": "2024-08-14", "dow": "wednesday"}'
+          - '{"index": {}}'
+          - '{"routing": "route1", "date": "2024-08-19", "dow": "monday"}'
+          - '{"index": {}}'
+          - '{"routing": "route2", "date": "2024-08-13", "dow": "tuesday"}'
+          - '{"index": {}}'
+          - '{"routing": "route2", "date": "2024-08-15", "dow": "thursday"}'
+
+  - do:
+      search:
+        index: dhisto-agg-w-query
+        body:
+          query:
+            bool:
+              must:
+                match_all: {}
+              filter:
+                - terms:
+                    routing:
+                      - "route1"
+          aggregations:
+            weekHisto:
+              date_histogram:
+                field: date
+                calendar_interval: week
+          _source: false
+
+  - match: { hits.total.value: 3 }
+  - match: { aggregations.weekHisto.buckets.0.doc_count: 2 }
+  - match: { aggregations.weekHisto.buckets.1.doc_count: 1 }
+
+---
+"Date histogram aggregation w/ shared field range test":
+  - do:
+      bulk:
+        refresh: true
+        index: dhisto-agg-w-query
+        body:
+          - '{"index": {}}'
+          - '{"date": "2024-10-31"}'
+          - '{"index": {}}'
+          - '{"date": "2024-11-11"}'
+          - '{"index": {}}'
+          - '{"date": "2024-11-28"}'
+          - '{"index": {}}'
+          - '{"date": "2024-12-25"}'
+          - '{"index": {}}'
+          - '{"date": "2025-01-01"}'
+          - '{"index": {}}'
+          - '{"date": "2025-02-14"}'
+
+  - do:
+      search:
+        index: dhisto-agg-w-query
+        body:
+          profile: true
+          query:
+            range:
+              date:
+                gte: "2024-01-01"
+                lt: "2025-01-01"
+          aggregations:
+            monthHisto:
+              date_histogram:
+                field: date
+                calendar_interval: month
+          _source: false
+
+  - match: { hits.total.value: 4 }
+  - match: { aggregations.monthHisto.buckets.0.doc_count: 1 }
+  - match: { aggregations.monthHisto.buckets.1.doc_count: 2 }
+  - match: { aggregations.monthHisto.buckets.2.doc_count: 1 }
+  - match: { profile.shards.0.aggregations.0.debug.optimized_segments: 1 }
+  - match: { profile.shards.0.aggregations.0.debug.unoptimized_segments: 0 }
+  - match: { profile.shards.0.aggregations.0.debug.leaf_visited: 0 }
+  - match: { profile.shards.0.aggregations.0.debug.inner_visited: 0 }
@@ -673,3 +673,82 @@ setup:
   - match: { aggregations.my_range.buckets.3.from: 1.5 }
   - is_false:  aggregations.my_range.buckets.3.to
   - match: { aggregations.my_range.buckets.3.doc_count: 2 }
+
+---
+"Filter query w/ aggregation test":
+  - skip:
+      version: " - 2.99.99"
+      reason: Backport fix to 2.16
+
+  - do:
+      bulk:
+        refresh: true
+        index: range-agg-w-query
+        body:
+          - '{"index": {}}'
+          - '{"routing": "route1", "v": -10, "date": "2024-10-29"}'
+          - '{"index": {}}'
+          - '{"routing": "route1", "v": -5, "date": "2024-10-30"}'
+          - '{"index": {}}'
+          - '{"routing": "route1", "v": 10, "date": "2024-10-31"}'
+          - '{"index": {}}'
+          - '{"routing": "route2", "v": 15, "date": "2024-11-01"}'
+          - '{"index": {}}'
+          - '{"routing": "route2", "v": 20, "date": "2024-11-02"}'
+
+  - do:
+      search:
+        index: range-agg-w-query
+        body:
+          query:
+            bool:
+              must:
+                match_all: {}
+              filter:
+                - terms:
+                    routing:
+                      - "route1"
+          aggregations:
+            NegPosAgg:
+              range:
+                field: v
+                keyed: true
+                ranges:
+                  - to: 0
+                    key: "0"
+                  - from: 0
+                    key: "1"
+          _source: false
+
+  - match: { hits.total.value: 3 }
+  - match: { aggregations.NegPosAgg.buckets.0.doc_count: 2 }
+  - match: { aggregations.NegPosAgg.buckets.1.doc_count: 1 }
+
+  - do:
+      search:
+        index: range-agg-w-query
+        body:
+          query:
+            bool:
+              must:
+                match_all: {}
+              filter:
+                - terms:
+                    routing:
+                      - "route1"
+          aggregations:
+            HalloweenAgg:
+              date_range:
+                field: date
+                format: "yyyy-MM-dd"
+                keyed: true
+                ranges:
+                  - to: "2024-11-01"
+                    key: "to-october"
+                  - from: "2024-11-01"
+                    key: "from-september"
+          _source: false
+
+  - match: { hits.total.value: 3 }
+  - match: { aggregations.HalloweenAgg.buckets.to-october.doc_count: 3 }
+  - match: { aggregations.HalloweenAgg.buckets.from-september.doc_count: 0 }
@@ -0,0 +1,102 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.search.aggregations.bucket.filterrewrite;
+
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.PointValues;
+import org.apache.lucene.search.ScoreMode;
+import org.apache.lucene.search.Weight;
+import org.opensearch.index.mapper.MappedFieldType;
+import org.opensearch.search.internal.SearchContext;
+
+import java.io.IOException;
+import java.util.function.BiConsumer;
+import java.util.function.Consumer;
+
+/**
+ * This interface provides a bridge between an aggregator and the optimization context, allowing
+ * the aggregator to provide data and optimize the aggregation process.
+ *
+ * <p>The main purpose of this interface is to encapsulate the aggregator-specific optimization
+ * logic and provide access to the data in Aggregator that is required for optimization, while keeping the optimization
+ * business logic separate from the aggregator implementation.
+ *
+ * <p>To use this interface to optimize an aggregator, you should subclass this interface in this package
+ * and put any specific optimization business logic in it. Then implement this subclass in the aggregator
+ * to provide data that is needed for doing the optimization
+ *
+ * @opensearch.internal
+ */
+public abstract class AggregatorBridge {
+
+    /**
+     * The field type associated with this aggregator bridge.
+     */
+    MappedFieldType fieldType;
+
+    Consumer<Ranges> setRanges;
+
+    void setRangesConsumer(Consumer<Ranges> setRanges) {
+        this.setRanges = setRanges;
+    }
+
+    /**
+     * Checks whether the aggregator can be optimized.
+     * <p>
+     * This method is supposed to be implemented in a specific aggregator to take in fields from there
+     *
+     * @return {@code true} if the aggregator can be optimized, {@code false} otherwise.
+     * The result will be saved in the optimization context.
+     */
+    protected abstract boolean canOptimize();
+
+    /**
+     * Prepares the optimization at shard level after checking aggregator is optimizable.
+     * <p>
+     * For example, figure out what are the ranges from the aggregation to do the optimization later
+     * <p>
+     * This method is supposed to be implemented in a specific aggregator to take in fields from there
+     */
+    protected abstract void prepare() throws IOException;
+
+    /**
+     * Prepares the optimization for a specific segment when the segment is functionally matching all docs
+     *
+     * @param leaf the leaf reader context for the segment
+     */
+    abstract Ranges tryBuildRangesFromSegment(LeafReaderContext leaf) throws IOException;
+
+    /**
+     * Attempts to build aggregation results for a segment
+     *
+     * @param values            the point values (index structure for numeric values) for a segment
+     * @param incrementDocCount a consumer to increment the document count for a range bucket. The First parameter is document count, the second is the key of the bucket
+     * @param ranges
+     */
+    abstract FilterRewriteOptimizationContext.DebugInfo tryOptimize(
+        PointValues values,
+        BiConsumer<Long, Long> incrementDocCount,
+        Ranges ranges
+    ) throws IOException;
+
+    /**
+     * Checks whether the top level query matches all documents on the segment
+     *
+     * <p>This method creates a weight from the search context's query and checks whether the weight's
+     * document count matches the total number of documents in the leaf reader context.
+     *
+     * @param ctx      the search context
+     * @param leafCtx  the leaf reader context for the segment
+     * @return {@code true} if the segment matches all documents, {@code false} otherwise
+     */
+    public static boolean segmentMatchAll(SearchContext ctx, LeafReaderContext leafCtx) throws IOException {
+        Weight weight = ctx.query().rewrite(ctx.searcher()).createWeight(ctx.searcher(), ScoreMode.COMPLETE_NO_SCORES, 1f);
+        return weight != null && weight.count(leafCtx) == leafCtx.reader().numDocs();
+    }
+}