From f9fdce6e9ddba0a32165dce1802360795de3ee1e Mon Sep 17 00:00:00 2001 From: expani Date: Mon, 29 Jul 2024 16:19:42 +0530 Subject: [PATCH 1/8] Avoid deep copy and other allocation improvements Signed-off-by: expani --- .../bucket/terms/MultiTermsAggregator.java | 148 +++++++++++------- 1 file changed, 89 insertions(+), 59 deletions(-) diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/MultiTermsAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/MultiTermsAggregator.java index 59f48bd7fbaba..d97d59db2ee42 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/MultiTermsAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/MultiTermsAggregator.java @@ -33,6 +33,7 @@ import org.opensearch.search.aggregations.InternalAggregation; import org.opensearch.search.aggregations.InternalOrder; import org.opensearch.search.aggregations.LeafBucketCollector; +import org.opensearch.search.aggregations.bucket.BucketsAggregator; import org.opensearch.search.aggregations.bucket.DeferableBucketAggregator; import org.opensearch.search.aggregations.bucket.LocalBucketCountThresholds; import org.opensearch.search.aggregations.support.AggregationPath; @@ -215,19 +216,11 @@ public InternalAggregation buildEmptyAggregation() { @Override protected LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCollector sub) throws IOException { - MultiTermsValuesSourceCollector collector = multiTermsValue.getValues(ctx); + MultiTermsValuesSourceCollector collector = multiTermsValue.getValues(ctx, bucketOrds, this, sub); return new LeafBucketCollector() { @Override public void collect(int doc, long owningBucketOrd) throws IOException { - for (BytesRef compositeKey : collector.apply(doc)) { - long bucketOrd = bucketOrds.add(owningBucketOrd, compositeKey); - if (bucketOrd < 0) { - bucketOrd = -1 - bucketOrd; - collectExistingBucket(sub, doc, bucketOrd); - } else { - collectBucket(sub, doc, bucketOrd); - } - } + collector.apply(doc, owningBucketOrd); } }; } @@ -268,12 +261,10 @@ private void collectZeroDocEntriesIfNeeded(long owningBucketOrd) throws IOExcept } // we need to fill-in the blanks for (LeafReaderContext ctx : context.searcher().getTopReaderContext().leaves()) { - MultiTermsValuesSourceCollector collector = multiTermsValue.getValues(ctx); + MultiTermsValuesSourceCollector collector = multiTermsValue.getValues(ctx, bucketOrds, null, null); // brute force for (int docId = 0; docId < ctx.reader().maxDoc(); ++docId) { - for (BytesRef compositeKey : collector.apply(docId)) { - bucketOrds.add(owningBucketOrd, compositeKey); - } + collector.apply(docId, owningBucketOrd); } } } @@ -287,7 +278,8 @@ interface MultiTermsValuesSourceCollector { * Collect a list values of multi_terms on each doc. * Each terms could have multi_values, so the result is the cartesian product of each term's values. */ - List apply(int doc) throws IOException; + void apply(int doc, long owningBucketOrd) throws IOException; + } @FunctionalInterface @@ -361,51 +353,17 @@ public MultiTermsValuesSource(List valuesSources) { this.valuesSources = valuesSources; } - public MultiTermsValuesSourceCollector getValues(LeafReaderContext ctx) throws IOException { + public MultiTermsValuesSourceCollector getValues( + LeafReaderContext ctx, + BytesKeyedBucketOrds bucketOrds, + BucketsAggregator aggregator, + LeafBucketCollector sub + ) throws IOException { List collectors = new ArrayList<>(); for (InternalValuesSource valuesSource : valuesSources) { collectors.add(valuesSource.apply(ctx)); } - return new MultiTermsValuesSourceCollector() { - @Override - public List apply(int doc) throws IOException { - List>> collectedValues = new ArrayList<>(); - for (InternalValuesSourceCollector collector : collectors) { - collectedValues.add(collector.apply(doc)); - } - List result = new ArrayList<>(); - scratch.seek(0); - scratch.writeVInt(collectors.size()); // number of fields per composite key - cartesianProduct(result, scratch, collectedValues, 0); - return result; - } - - /** - * Cartesian product using depth first search. - * - *

- * Composite keys are encoded to a {@link BytesRef} in a format compatible with {@link StreamOutput::writeGenericValue}, - * but reuses the encoding of the shared prefixes from the previous levels to avoid wasteful work. - */ - private void cartesianProduct( - List compositeKeys, - BytesStreamOutput scratch, - List>> collectedValues, - int index - ) throws IOException { - if (collectedValues.size() == index) { - compositeKeys.add(BytesRef.deepCopyOf(scratch.bytes().toBytesRef())); - return; - } - - long position = scratch.position(); - for (TermValue value : collectedValues.get(index)) { - value.writeTo(scratch); // encode the value - cartesianProduct(compositeKeys, scratch, collectedValues, index + 1); // dfs - scratch.seek(position); // backtrack - } - } - }; + return new MultiValuesSourceCollectorImpl(collectors, scratch, bucketOrds, aggregator, sub); } @Override @@ -414,6 +372,74 @@ public void close() { } } + static class MultiValuesSourceCollectorImpl implements MultiTermsValuesSourceCollector { + + private final List collectors; + private final BytesStreamOutput scratch; + private final BytesKeyedBucketOrds bucketOrds; + private final BucketsAggregator aggregator; + private final LeafBucketCollector sub; + + private final boolean collectViaAggregator; + + public MultiValuesSourceCollectorImpl( + List collectors, + BytesStreamOutput scratch, + BytesKeyedBucketOrds bucketOrds, + BucketsAggregator aggregator, + LeafBucketCollector sub + ) { + this.collectors = collectors; + this.scratch = scratch; + this.bucketOrds = bucketOrds; + this.aggregator = aggregator; + this.sub = sub; + this.collectViaAggregator = aggregator != null && sub != null; + } + + @Override + public void apply(int doc, long owningBucketOrd) throws IOException { + List>> collectedValues = new ArrayList<>(); + for (InternalValuesSourceCollector collector : collectors) { + collectedValues.add(collector.apply(doc)); + } + scratch.seek(0); + scratch.writeVInt(collectors.size()); // number of fields per composite key + cartesianProductRecursive(collectedValues, 0, owningBucketOrd, doc); + } + + /** + * Cartesian product using depth first search. + */ + private void cartesianProductRecursive(List>> collectedValues, int index, long owningBucketOrd, int doc) + throws IOException { + if (collectedValues.size() == index) { + // Avoid performing a deep copy of the composite key + long bucketOrd = bucketOrds.add(owningBucketOrd, scratch.bytes().toBytesRef()); + if (collectViaAggregator) { + if (bucketOrd < 0) { + bucketOrd = -1 - bucketOrd; + aggregator.collectExistingBucket(sub, doc, bucketOrd); + } else { + aggregator.collectBucket(sub, doc, bucketOrd); + } + } + return; + } + + long position = scratch.position(); + List> values = collectedValues.get(index); + int numIterations = values.size(); + for (int i = 0; i < numIterations; i++) { + TermValue value = values.get(i); + value.writeTo(scratch); // encode the value + cartesianProductRecursive(collectedValues, index + 1, owningBucketOrd, doc); // dfs + scratch.seek(position); // backtrack + } + } + + } + /** * Factory for construct {@link InternalValuesSource}. * @@ -441,9 +467,13 @@ static InternalValuesSource bytesValuesSource(ValuesSource valuesSource, Include if (i > 0 && bytes.equals(previous)) { continue; } - BytesRef copy = BytesRef.deepCopyOf(bytes); - termValues.add(TermValue.of(copy)); - previous = copy; + if (valuesCount > 1) { + BytesRef copy = BytesRef.deepCopyOf(bytes); + termValues.add(TermValue.of(copy)); + previous = copy; + } else { + termValues.add(TermValue.of(bytes)); + } } return termValues; }; From 5324fe49f25f4a7c18dbc5fd872fde23886a7431 Mon Sep 17 00:00:00 2001 From: expani Date: Thu, 1 Aug 2024 03:21:31 +0530 Subject: [PATCH 2/8] Refactoring based on PR Comments and added JavaDocs Signed-off-by: expani --- .../bucket/terms/MultiTermsAggregator.java | 125 ++++++++---------- 1 file changed, 55 insertions(+), 70 deletions(-) diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/MultiTermsAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/MultiTermsAggregator.java index d97d59db2ee42..aa17f840e99af 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/MultiTermsAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/MultiTermsAggregator.java @@ -261,8 +261,8 @@ private void collectZeroDocEntriesIfNeeded(long owningBucketOrd) throws IOExcept } // we need to fill-in the blanks for (LeafReaderContext ctx : context.searcher().getTopReaderContext().leaves()) { - MultiTermsValuesSourceCollector collector = multiTermsValue.getValues(ctx, bucketOrds, null, null); // brute force + MultiTermsValuesSourceCollector collector = multiTermsValue.getValues(ctx, bucketOrds, null, null); for (int docId = 0; docId < ctx.reader().maxDoc(); ++docId) { collector.apply(docId, owningBucketOrd); } @@ -363,81 +363,65 @@ public MultiTermsValuesSourceCollector getValues( for (InternalValuesSource valuesSource : valuesSources) { collectors.add(valuesSource.apply(ctx)); } - return new MultiValuesSourceCollectorImpl(collectors, scratch, bucketOrds, aggregator, sub); - } - - @Override - public void close() { - scratch.close(); - } - } - - static class MultiValuesSourceCollectorImpl implements MultiTermsValuesSourceCollector { - - private final List collectors; - private final BytesStreamOutput scratch; - private final BytesKeyedBucketOrds bucketOrds; - private final BucketsAggregator aggregator; - private final LeafBucketCollector sub; - - private final boolean collectViaAggregator; - - public MultiValuesSourceCollectorImpl( - List collectors, - BytesStreamOutput scratch, - BytesKeyedBucketOrds bucketOrds, - BucketsAggregator aggregator, - LeafBucketCollector sub - ) { - this.collectors = collectors; - this.scratch = scratch; - this.bucketOrds = bucketOrds; - this.aggregator = aggregator; - this.sub = sub; - this.collectViaAggregator = aggregator != null && sub != null; - } + boolean collectBucketOrds = aggregator != null && sub != null; + return new MultiTermsValuesSourceCollector() { + @Override + public void apply(int doc, long owningBucketOrd) throws IOException { + // TODO A new list creation can be avoided for every doc. + List>> collectedValues = new ArrayList<>(); + for (InternalValuesSourceCollector collector : collectors) { + collectedValues.add(collector.apply(doc)); + } + scratch.seek(0); + scratch.writeVInt(collectors.size()); // number of fields per composite key + generateAndCollectCompositeKeys(collectedValues, 0, owningBucketOrd, doc); + } - @Override - public void apply(int doc, long owningBucketOrd) throws IOException { - List>> collectedValues = new ArrayList<>(); - for (InternalValuesSourceCollector collector : collectors) { - collectedValues.add(collector.apply(doc)); - } - scratch.seek(0); - scratch.writeVInt(collectors.size()); // number of fields per composite key - cartesianProductRecursive(collectedValues, 0, owningBucketOrd, doc); - } + /** + * This generates and collects all Composite keys in their buckets by performing a cartesian product
+ * of all the values of all fields for the given doc recursively. + * @param collectedValues : Values of all fields present in the aggregation for the @doc + * @param index : Points to the field being added to generate the composite key + */ + private void generateAndCollectCompositeKeys( + List>> collectedValues, + int index, + long owningBucketOrd, + int doc + ) throws IOException { + if (collectedValues.size() == index) { + // Avoid performing a deep copy of the composite key by inlining + long bucketOrd = bucketOrds.add(owningBucketOrd, scratch.bytes().toBytesRef()); + if (collectBucketOrds) { + if (bucketOrd < 0) { + bucketOrd = -1 - bucketOrd; + aggregator.collectExistingBucket(sub, doc, bucketOrd); + } else { + aggregator.collectBucket(sub, doc, bucketOrd); + } + } + return; + } - /** - * Cartesian product using depth first search. - */ - private void cartesianProductRecursive(List>> collectedValues, int index, long owningBucketOrd, int doc) - throws IOException { - if (collectedValues.size() == index) { - // Avoid performing a deep copy of the composite key - long bucketOrd = bucketOrds.add(owningBucketOrd, scratch.bytes().toBytesRef()); - if (collectViaAggregator) { - if (bucketOrd < 0) { - bucketOrd = -1 - bucketOrd; - aggregator.collectExistingBucket(sub, doc, bucketOrd); - } else { - aggregator.collectBucket(sub, doc, bucketOrd); + long position = scratch.position(); + List> values = collectedValues.get(index); + int numIterations = values.size(); + // For each loop is not done to reduce the allocations done for Iterator objects + // once for every field in every doc. + for (int i = 0; i < numIterations; i++) { + TermValue value = values.get(i); + value.writeTo(scratch); // encode the value + generateAndCollectCompositeKeys(collectedValues, index + 1, owningBucketOrd, doc); // dfs + scratch.seek(position); // backtrack } } - return; - } - - long position = scratch.position(); - List> values = collectedValues.get(index); - int numIterations = values.size(); - for (int i = 0; i < numIterations; i++) { - TermValue value = values.get(i); - value.writeTo(scratch); // encode the value - cartesianProductRecursive(collectedValues, index + 1, owningBucketOrd, doc); // dfs - scratch.seek(position); // backtrack - } + }; } + @Override + public void close() { + scratch.close(); + } } /** @@ -467,6 +451,7 @@ static InternalValuesSource bytesValuesSource(ValuesSource valuesSource, Include if (i > 0 && bytes.equals(previous)) { continue; } + // Performing a deep copy is not required for field containing only one value. if (valuesCount > 1) { BytesRef copy = BytesRef.deepCopyOf(bytes); termValues.add(TermValue.of(copy)); From f2a6adb543266854f0917c4caf345512cc6700a5 Mon Sep 17 00:00:00 2001 From: expani Date: Thu, 1 Aug 2024 14:35:57 +0530 Subject: [PATCH 3/8] Added more comments Signed-off-by: expani --- .../aggregations/bucket/terms/MultiTermsAggregator.java | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/MultiTermsAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/MultiTermsAggregator.java index aa17f840e99af..eda6340c7639c 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/MultiTermsAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/MultiTermsAggregator.java @@ -365,6 +365,13 @@ public MultiTermsValuesSourceCollector getValues( } boolean collectBucketOrds = aggregator != null && sub != null; return new MultiTermsValuesSourceCollector() { + + /** + * This method does the following :
+ *

  • Fetches the values of every field present in the doc List>> via @{@link InternalValuesSourceCollector}
  • + *
  • Generates Composite keys from the fetched values for all fields present in the aggregation.
  • + *
  • Adds every composite key to the @{@link BytesKeyedBucketOrds} and Optionally collects them via @{@link BucketsAggregator#collectBucket(LeafBucketCollector, int, long)}
  • + */ @Override public void apply(int doc, long owningBucketOrd) throws IOException { // TODO A new list creation can be avoided for every doc. @@ -379,7 +386,7 @@ public void apply(int doc, long owningBucketOrd) throws IOException { /** * This generates and collects all Composite keys in their buckets by performing a cartesian product
    - * of all the values of all fields for the given doc recursively. + * of all the values in all the fields ( used in agg ) for the given doc recursively. * @param collectedValues : Values of all fields present in the aggregation for the @doc * @param index : Points to the field being added to generate the composite key */ From 93e355e0e4d2484ae77222d551955b0cf0cb009f Mon Sep 17 00:00:00 2001 From: expani Date: Thu, 1 Aug 2024 23:25:36 +0530 Subject: [PATCH 4/8] Added character for Triggering Jenkins build Signed-off-by: expani --- .../search/aggregations/bucket/terms/MultiTermsAggregator.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/MultiTermsAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/MultiTermsAggregator.java index eda6340c7639c..75d5c0f2b9f24 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/MultiTermsAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/MultiTermsAggregator.java @@ -397,7 +397,7 @@ private void generateAndCollectCompositeKeys( int doc ) throws IOException { if (collectedValues.size() == index) { - // Avoid performing a deep copy of the composite key by inlining + // Avoid performing a deep copy of the composite key by inlining. long bucketOrd = bucketOrds.add(owningBucketOrd, scratch.bytes().toBytesRef()); if (collectBucketOrds) { if (bucketOrd < 0) { From 313f2d496064ae8245d91e9065bf070acfe8ae4c Mon Sep 17 00:00:00 2001 From: expani Date: Tue, 6 Aug 2024 21:33:34 +0530 Subject: [PATCH 5/8] Changes to cover collectZeroDocEntries method Signed-off-by: expani --- .../aggregations/bucket/terms/MultiTermsAggregatorTests.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/server/src/test/java/org/opensearch/search/aggregations/bucket/terms/MultiTermsAggregatorTests.java b/server/src/test/java/org/opensearch/search/aggregations/bucket/terms/MultiTermsAggregatorTests.java index d550c4c354c0f..2281b86b82256 100644 --- a/server/src/test/java/org/opensearch/search/aggregations/bucket/terms/MultiTermsAggregatorTests.java +++ b/server/src/test/java/org/opensearch/search/aggregations/bucket/terms/MultiTermsAggregatorTests.java @@ -673,7 +673,10 @@ public void testDatesFieldFormat() throws IOException { } public void testIpAndKeyword() throws IOException { - testAggregation(new MatchAllDocsQuery(), fieldConfigs(asList(KEYWORD_FIELD, IP_FIELD)), NONE_DECORATOR, iw -> { + testAggregation(new MatchAllDocsQuery(), fieldConfigs(asList(KEYWORD_FIELD, IP_FIELD)), multiTermsAggregationBuilder -> { + multiTermsAggregationBuilder.minDocCount(0); + multiTermsAggregationBuilder.size(100); + }, iw -> { iw.addDocument( asList( new SortedDocValuesField(KEYWORD_FIELD, new BytesRef("a")), From 0ed2ac8ca0449198361440f4b7321a138035a1ff Mon Sep 17 00:00:00 2001 From: expani Date: Mon, 2 Sep 2024 15:54:24 +0530 Subject: [PATCH 6/8] Updated comment based on change in method's functionality Signed-off-by: expani --- .../aggregations/bucket/terms/MultiTermsAggregator.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/MultiTermsAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/MultiTermsAggregator.java index 75d5c0f2b9f24..07edf487af670 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/MultiTermsAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/MultiTermsAggregator.java @@ -275,8 +275,8 @@ private void collectZeroDocEntriesIfNeeded(long owningBucketOrd) throws IOExcept @FunctionalInterface interface MultiTermsValuesSourceCollector { /** - * Collect a list values of multi_terms on each doc. - * Each terms could have multi_values, so the result is the cartesian product of each term's values. + * Generates the cartesian product of all fields used in aggregation and + * collects them in buckets using the composite key of their field values. */ void apply(int doc, long owningBucketOrd) throws IOException; From 23ff522c538a58bc587121ab32f7a4ee3444c0ea Mon Sep 17 00:00:00 2001 From: expani Date: Tue, 10 Sep 2024 19:22:44 +0530 Subject: [PATCH 7/8] Added test to cover branches in collectZeroDocEntriesIfRequired Signed-off-by: expani --- .../terms/MultiTermsAggregatorTests.java | 71 ++++++++++++++----- 1 file changed, 55 insertions(+), 16 deletions(-) diff --git a/server/src/test/java/org/opensearch/search/aggregations/bucket/terms/MultiTermsAggregatorTests.java b/server/src/test/java/org/opensearch/search/aggregations/bucket/terms/MultiTermsAggregatorTests.java index 2281b86b82256..bb46c5607a4a7 100644 --- a/server/src/test/java/org/opensearch/search/aggregations/bucket/terms/MultiTermsAggregatorTests.java +++ b/server/src/test/java/org/opensearch/search/aggregations/bucket/terms/MultiTermsAggregatorTests.java @@ -126,6 +126,19 @@ public class MultiTermsAggregatorTests extends AggregatorTestCase { private static final Consumer NONE_DECORATOR = null; + private static final Consumer IP_AND_KEYWORD_DESC_ORDER_VERIFY = h -> { + MatcherAssert.assertThat(h.getBuckets(), hasSize(3)); + MatcherAssert.assertThat(h.getBuckets().get(0).getKey(), contains(equalTo("a"), equalTo("192.168.0.0"))); + MatcherAssert.assertThat(h.getBuckets().get(0).getKeyAsString(), equalTo("a|192.168.0.0")); + MatcherAssert.assertThat(h.getBuckets().get(0).getDocCount(), equalTo(2L)); + MatcherAssert.assertThat(h.getBuckets().get(1).getKey(), contains(equalTo("b"), equalTo("192.168.0.1"))); + MatcherAssert.assertThat(h.getBuckets().get(1).getKeyAsString(), equalTo("b|192.168.0.1")); + MatcherAssert.assertThat(h.getBuckets().get(1).getDocCount(), equalTo(1L)); + MatcherAssert.assertThat(h.getBuckets().get(2).getKey(), contains(equalTo("c"), equalTo("192.168.0.2"))); + MatcherAssert.assertThat(h.getBuckets().get(2).getKeyAsString(), equalTo("c|192.168.0.2")); + MatcherAssert.assertThat(h.getBuckets().get(2).getDocCount(), equalTo(1L)); + }; + @Override protected List getSupportedValuesSourceTypes() { return Collections.unmodifiableList( @@ -672,11 +685,48 @@ public void testDatesFieldFormat() throws IOException { ); } - public void testIpAndKeyword() throws IOException { - testAggregation(new MatchAllDocsQuery(), fieldConfigs(asList(KEYWORD_FIELD, IP_FIELD)), multiTermsAggregationBuilder -> { + public void testIpAndKeywordDefaultDescOrder() throws IOException { + ipAndKeywordTest(NONE_DECORATOR, IP_AND_KEYWORD_DESC_ORDER_VERIFY); + } + + public void testIpAndKeywordWithBucketCountSameAsSize() throws IOException { + ipAndKeywordTest(multiTermsAggregationBuilder -> { multiTermsAggregationBuilder.minDocCount(0); - multiTermsAggregationBuilder.size(100); - }, iw -> { + multiTermsAggregationBuilder.size(3); + multiTermsAggregationBuilder.order(BucketOrder.compound(BucketOrder.count(false))); + }, IP_AND_KEYWORD_DESC_ORDER_VERIFY); + } + + public void testIpAndKeywordWithBucketCountGreaterThanSize() throws IOException { + ipAndKeywordTest(multiTermsAggregationBuilder -> { + multiTermsAggregationBuilder.minDocCount(0); + multiTermsAggregationBuilder.size(10); + multiTermsAggregationBuilder.order(BucketOrder.compound(BucketOrder.count(false))); + }, IP_AND_KEYWORD_DESC_ORDER_VERIFY); + } + + public void testIpAndKeywordAscOrder() throws IOException { + ipAndKeywordTest(multiTermsAggregationBuilder -> { + multiTermsAggregationBuilder.minDocCount(0); + multiTermsAggregationBuilder.size(3); + multiTermsAggregationBuilder.order(BucketOrder.compound(BucketOrder.count(true))); + }, h -> { + MatcherAssert.assertThat(h.getBuckets(), hasSize(3)); + MatcherAssert.assertThat(h.getBuckets().get(0).getKey(), contains(equalTo("b"), equalTo("192.168.0.1"))); + MatcherAssert.assertThat(h.getBuckets().get(0).getKeyAsString(), equalTo("b|192.168.0.1")); + MatcherAssert.assertThat(h.getBuckets().get(0).getDocCount(), equalTo(1L)); + MatcherAssert.assertThat(h.getBuckets().get(1).getKey(), contains(equalTo("c"), equalTo("192.168.0.2"))); + MatcherAssert.assertThat(h.getBuckets().get(1).getKeyAsString(), equalTo("c|192.168.0.2")); + MatcherAssert.assertThat(h.getBuckets().get(1).getDocCount(), equalTo(1L)); + MatcherAssert.assertThat(h.getBuckets().get(2).getKey(), contains(equalTo("a"), equalTo("192.168.0.0"))); + MatcherAssert.assertThat(h.getBuckets().get(2).getKeyAsString(), equalTo("a|192.168.0.0")); + MatcherAssert.assertThat(h.getBuckets().get(2).getDocCount(), equalTo(2L)); + }); + } + + private void ipAndKeywordTest(Consumer builderDecorator, Consumer verify) + throws IOException { + testAggregation(new MatchAllDocsQuery(), fieldConfigs(asList(KEYWORD_FIELD, IP_FIELD)), builderDecorator, iw -> { iw.addDocument( asList( new SortedDocValuesField(KEYWORD_FIELD, new BytesRef("a")), @@ -701,18 +751,7 @@ public void testIpAndKeyword() throws IOException { new SortedDocValuesField(IP_FIELD, new BytesRef(InetAddressPoint.encode(InetAddresses.forString("192.168.0.0")))) ) ); - }, h -> { - MatcherAssert.assertThat(h.getBuckets(), hasSize(3)); - MatcherAssert.assertThat(h.getBuckets().get(0).getKey(), contains(equalTo("a"), equalTo("192.168.0.0"))); - MatcherAssert.assertThat(h.getBuckets().get(0).getKeyAsString(), equalTo("a|192.168.0.0")); - MatcherAssert.assertThat(h.getBuckets().get(0).getDocCount(), equalTo(2L)); - MatcherAssert.assertThat(h.getBuckets().get(1).getKey(), contains(equalTo("b"), equalTo("192.168.0.1"))); - MatcherAssert.assertThat(h.getBuckets().get(1).getKeyAsString(), equalTo("b|192.168.0.1")); - MatcherAssert.assertThat(h.getBuckets().get(1).getDocCount(), equalTo(1L)); - MatcherAssert.assertThat(h.getBuckets().get(2).getKey(), contains(equalTo("c"), equalTo("192.168.0.2"))); - MatcherAssert.assertThat(h.getBuckets().get(2).getKeyAsString(), equalTo("c|192.168.0.2")); - MatcherAssert.assertThat(h.getBuckets().get(2).getDocCount(), equalTo(1L)); - }); + }, verify); } public void testEmpty() throws IOException { From 734e927702a55a1a5203a56ad0659205a703690a Mon Sep 17 00:00:00 2001 From: expani Date: Fri, 4 Oct 2024 19:20:59 +0530 Subject: [PATCH 8/8] Rebased and resolved changelog conflict Signed-off-by: expani --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index cbf19fb0d35be..056545f395508 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Add changes to block calls in cat shards, indices and segments based on dynamic limit settings ([#15986](https://github.com/opensearch-project/OpenSearch/pull/15986)) - New `phone` & `phone-search` analyzer + tokenizer ([#15915](https://github.com/opensearch-project/OpenSearch/pull/15915)) - Add _list/shards API as paginated alternate to _cat/shards ([#14641](https://github.com/opensearch-project/OpenSearch/pull/14641)) +- Latency and Memory allocation improvements to Multi Term Aggregation queries ([#14993](https://github.com/opensearch-project/OpenSearch/pull/14993)) ### Dependencies - Bump `com.azure:azure-identity` from 1.13.0 to 1.13.2 ([#15578](https://github.com/opensearch-project/OpenSearch/pull/15578))