From 78400b8d05c08a3e443e926648ab98102c9e32a7 Mon Sep 17 00:00:00 2001 From: Ignacio Vera Date: Tue, 26 Nov 2024 07:53:40 +0100 Subject: [PATCH 001/139] InternalMultiBucketAggregation.InternalBucket does not implement writable anymore (#117310) This allows to make some Bucket implementations leaner, in particular terms and multi-terms aggregations --- .../adjacency/InternalAdjacencyMatrix.java | 2 +- .../bucket/timeseries/InternalTimeSeries.java | 2 +- .../InternalMultiBucketAggregation.java | 6 ++- .../bucket/composite/InternalComposite.java | 4 +- .../bucket/filter/InternalFilters.java | 2 +- .../bucket/geogrid/InternalGeoGridBucket.java | 2 +- .../histogram/AbstractHistogramBucket.java | 2 +- .../bucket/prefix/InternalIpPrefix.java | 2 +- .../bucket/range/InternalBinaryRange.java | 2 +- .../bucket/range/InternalRange.java | 2 +- .../bucket/terms/AbstractInternalTerms.java | 48 ++++++++++--------- .../bucket/terms/DoubleTerms.java | 6 +-- .../GlobalOrdinalsStringTermsAggregator.java | 1 - .../bucket/terms/InternalMappedTerms.java | 10 +++- .../bucket/terms/InternalRareTerms.java | 6 ++- .../terms/InternalSignificantTerms.java | 2 +- .../bucket/terms/InternalTerms.java | 37 +++----------- .../aggregations/bucket/terms/LongTerms.java | 6 +-- .../bucket/terms/StringTerms.java | 6 +-- .../bucket/terms/UnmappedTerms.java | 5 ++ .../pipeline/BucketHelpersTests.java | 9 ---- .../multiterms/InternalMultiTerms.java | 39 +++++++-------- .../InternalCategorizationAggregation.java | 2 +- .../aggs/changepoint/ChangePointBucket.java | 2 +- 24 files changed, 94 insertions(+), 111 deletions(-) diff --git a/modules/aggregations/src/main/java/org/elasticsearch/aggregations/bucket/adjacency/InternalAdjacencyMatrix.java b/modules/aggregations/src/main/java/org/elasticsearch/aggregations/bucket/adjacency/InternalAdjacencyMatrix.java index 824f009bc7d8e..999f790ee8117 100644 --- a/modules/aggregations/src/main/java/org/elasticsearch/aggregations/bucket/adjacency/InternalAdjacencyMatrix.java +++ b/modules/aggregations/src/main/java/org/elasticsearch/aggregations/bucket/adjacency/InternalAdjacencyMatrix.java @@ -33,7 +33,7 @@ public class InternalAdjacencyMatrix extends InternalMultiBucketAggregation implements AdjacencyMatrix { - public static class InternalBucket extends InternalMultiBucketAggregation.InternalBucket implements AdjacencyMatrix.Bucket { + public static class InternalBucket extends InternalMultiBucketAggregation.InternalBucketWritable implements AdjacencyMatrix.Bucket { private final String key; private final long docCount; diff --git a/modules/aggregations/src/main/java/org/elasticsearch/aggregations/bucket/timeseries/InternalTimeSeries.java b/modules/aggregations/src/main/java/org/elasticsearch/aggregations/bucket/timeseries/InternalTimeSeries.java index d7590f2126325..c4669b1c25224 100644 --- a/modules/aggregations/src/main/java/org/elasticsearch/aggregations/bucket/timeseries/InternalTimeSeries.java +++ b/modules/aggregations/src/main/java/org/elasticsearch/aggregations/bucket/timeseries/InternalTimeSeries.java @@ -34,7 +34,7 @@ public class InternalTimeSeries extends InternalMultiBucketAggregation reducePipelineBuckets(AggregationReduceContext reduceContext, Pi return reducedBuckets; } - public abstract static class InternalBucket implements Bucket, Writeable { + public abstract static class InternalBucket implements Bucket { public Object getProperty(String containingAggName, List path) { if (path.isEmpty()) { @@ -248,4 +248,8 @@ public Object getProperty(String containingAggName, List path) { return aggregation.getProperty(path.subList(1, path.size())); } } + + /** A {@link InternalBucket} that implements the {@link Writeable} interface. Most implementation might want + * to use this one except when specific logic is need to write into the stream. */ + public abstract static class InternalBucketWritable extends InternalBucket implements Writeable {} } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/InternalComposite.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/InternalComposite.java index faa953e77edd8..1492e97e6a5a5 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/InternalComposite.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/InternalComposite.java @@ -320,7 +320,9 @@ public int hashCode() { return Objects.hash(super.hashCode(), size, buckets, afterKey, Arrays.hashCode(reverseMuls), Arrays.hashCode(missingOrders)); } - public static class InternalBucket extends InternalMultiBucketAggregation.InternalBucket implements CompositeAggregation.Bucket { + public static class InternalBucket extends InternalMultiBucketAggregation.InternalBucketWritable + implements + CompositeAggregation.Bucket { private final CompositeKey key; private final long docCount; diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/InternalFilters.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/InternalFilters.java index c05759582346a..19cd0df9c7122 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/InternalFilters.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/InternalFilters.java @@ -30,7 +30,7 @@ import java.util.Objects; public class InternalFilters extends InternalMultiBucketAggregation implements Filters { - public static class InternalBucket extends InternalMultiBucketAggregation.InternalBucket implements Filters.Bucket { + public static class InternalBucket extends InternalMultiBucketAggregation.InternalBucketWritable implements Filters.Bucket { private final String key; private long docCount; diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/geogrid/InternalGeoGridBucket.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/geogrid/InternalGeoGridBucket.java index 9e3c96da2e70b..60de4c3974c92 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/geogrid/InternalGeoGridBucket.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/geogrid/InternalGeoGridBucket.java @@ -19,7 +19,7 @@ import java.io.IOException; import java.util.Objects; -public abstract class InternalGeoGridBucket extends InternalMultiBucketAggregation.InternalBucket +public abstract class InternalGeoGridBucket extends InternalMultiBucketAggregation.InternalBucketWritable implements GeoGrid.Bucket, Comparable { diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/AbstractHistogramBucket.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/AbstractHistogramBucket.java index 16a83ed04e524..7806d8cd8efe2 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/AbstractHistogramBucket.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/AbstractHistogramBucket.java @@ -16,7 +16,7 @@ /** * A bucket in the histogram where documents fall in */ -public abstract class AbstractHistogramBucket extends InternalMultiBucketAggregation.InternalBucket { +public abstract class AbstractHistogramBucket extends InternalMultiBucketAggregation.InternalBucketWritable { protected final long docCount; protected final InternalAggregations aggregations; diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/prefix/InternalIpPrefix.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/prefix/InternalIpPrefix.java index 5b456b3246b64..36a8fccc77e99 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/prefix/InternalIpPrefix.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/prefix/InternalIpPrefix.java @@ -33,7 +33,7 @@ public class InternalIpPrefix extends InternalMultiBucketAggregation { - public static class Bucket extends InternalMultiBucketAggregation.InternalBucket + public static class Bucket extends InternalMultiBucketAggregation.InternalBucketWritable implements IpPrefix.Bucket, KeyComparable { diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/InternalBinaryRange.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/InternalBinaryRange.java index 9571dfebc6069..34a2ebea88440 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/InternalBinaryRange.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/range/InternalBinaryRange.java @@ -36,7 +36,7 @@ public final class InternalBinaryRange extends InternalMultiBucketAggregation buckets, AggregationReduceContext context) { @@ -104,7 +104,7 @@ private B reduceBucket(List buckets, AggregationReduceContext context) { for (B bucket : buckets) { docCount += bucket.getDocCount(); if (docCountError != -1) { - if (bucket.getShowDocCountError() == false || bucket.getDocCountError() == -1) { + if (getShowDocCountError() == false || bucket.getDocCountError() == -1) { docCountError = -1; } else { docCountError += bucket.getDocCountError(); @@ -257,6 +257,7 @@ public void accept(InternalAggregation aggregation) { } otherDocCount[0] += terms.getSumOfOtherDocCounts(); final long thisAggDocCountError = getDocCountError(terms); + setDocCountError(thisAggDocCountError); if (sumDocCountError != -1) { if (thisAggDocCountError == -1) { sumDocCountError = -1; @@ -264,16 +265,17 @@ public void accept(InternalAggregation aggregation) { sumDocCountError += thisAggDocCountError; } } - setDocCountError(thisAggDocCountError); - for (B bucket : terms.getBuckets()) { - // If there is already a doc count error for this bucket - // subtract this aggs doc count error from it to make the - // new value for the bucket. This then means that when the - // final error for the bucket is calculated below we account - // for the existing error calculated in a previous reduce. - // Note that if the error is unbounded (-1) this will be fixed - // later in this method. - bucket.updateDocCountError(-thisAggDocCountError); + if (getShowDocCountError()) { + for (B bucket : terms.getBuckets()) { + // If there is already a doc count error for this bucket + // subtract this aggs doc count error from it to make the + // new value for the bucket. This then means that when the + // final error for the bucket is calculated below we account + // for the existing error calculated in a previous reduce. + // Note that if the error is unbounded (-1) this will be fixed + // later in this method. + bucket.updateDocCountError(-thisAggDocCountError); + } } if (terms.getBuckets().isEmpty() == false) { bucketsList.add(terms.getBuckets()); @@ -319,17 +321,17 @@ public InternalAggregation get() { result.add(bucket.reduced(AbstractInternalTerms.this::reduceBucket, reduceContext)); }); } - for (B r : result) { - if (sumDocCountError == -1) { - r.setDocCountError(-1); - } else { - r.updateDocCountError(sumDocCountError); + if (getShowDocCountError()) { + for (B r : result) { + if (sumDocCountError == -1) { + r.setDocCountError(-1); + } else { + r.updateDocCountError(sumDocCountError); + } } } - long docCountError; - if (sumDocCountError == -1) { - docCountError = -1; - } else { + long docCountError = -1; + if (sumDocCountError != -1) { docCountError = size == 1 ? 0 : sumDocCountError; } return create(name, result, reduceContext.isFinalReduce() ? getOrder() : thisReduceOrder, docCountError, otherDocCount[0]); @@ -349,7 +351,7 @@ public InternalAggregation finalizeSampling(SamplingContext samplingContext) { b -> createBucket( samplingContext.scaleUp(b.getDocCount()), InternalAggregations.finalizeSampling(b.getAggregations(), samplingContext), - b.getShowDocCountError() ? samplingContext.scaleUp(b.getDocCountError()) : 0, + getShowDocCountError() ? samplingContext.scaleUp(b.getDocCountError()) : 0, b ) ) diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/DoubleTerms.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/DoubleTerms.java index 9789a9edc58f7..5c28c25de6e87 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/DoubleTerms.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/DoubleTerms.java @@ -164,8 +164,8 @@ public Bucket createBucket(InternalAggregations aggregations, Bucket prototype) prototype.term, prototype.docCount, aggregations, - prototype.showDocCountError, - prototype.docCountError, + showTermDocCountError, + prototype.getDocCountError(), prototype.format ); } @@ -216,6 +216,6 @@ public void close() { @Override protected Bucket createBucket(long docCount, InternalAggregations aggs, long docCountError, DoubleTerms.Bucket prototype) { - return new Bucket(prototype.term, docCount, aggs, prototype.showDocCountError, docCountError, format); + return new Bucket(prototype.term, docCount, aggs, showTermDocCountError, docCountError, format); } } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java index db9da6ed67207..5a79155d1d4f5 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java @@ -880,7 +880,6 @@ StringTerms.Bucket convertTempBucketToRealBucket(OrdBucket temp, GlobalOrdLookup BytesRef term = BytesRef.deepCopyOf(lookupGlobalOrd.apply(temp.globalOrd)); StringTerms.Bucket result = new StringTerms.Bucket(term, temp.docCount, null, showTermDocCountError, 0, format); result.bucketOrd = temp.bucketOrd; - result.docCountError = 0; return result; } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalMappedTerms.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalMappedTerms.java index 5b9403840dfff..d7087a121b4f4 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalMappedTerms.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalMappedTerms.java @@ -87,7 +87,10 @@ protected final void writeTermTypeInfoTo(StreamOutput out) throws IOException { writeSize(shardSize, out); out.writeBoolean(showTermDocCountError); out.writeVLong(otherDocCount); - out.writeCollection(buckets); + out.writeVInt(buckets.size()); + for (var bucket : buckets) { + bucket.writeTo(out, showTermDocCountError); + } } @Override @@ -95,6 +98,11 @@ protected void setDocCountError(long docCountError) { this.docCountError = docCountError; } + @Override + protected boolean getShowDocCountError() { + return showTermDocCountError; + } + @Override protected int getShardSize() { return shardSize; diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalRareTerms.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalRareTerms.java index 64cebee880141..7859319f4dd0d 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalRareTerms.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalRareTerms.java @@ -10,6 +10,7 @@ import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.io.stream.Writeable; import org.elasticsearch.common.util.SetBackedScalingCuckooFilter; import org.elasticsearch.search.DocValueFormat; import org.elasticsearch.search.aggregations.BucketOrder; @@ -29,10 +30,11 @@ public abstract class InternalRareTerms, B ext implements RareTerms { - public abstract static class Bucket> extends InternalMultiBucketAggregation.InternalBucket + public abstract static class Bucket> extends InternalMultiBucketAggregation.InternalBucketWritable implements RareTerms.Bucket, - KeyComparable { + KeyComparable, + Writeable { /** * Reads a bucket. Should be a constructor reference. */ diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalSignificantTerms.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalSignificantTerms.java index 3f579947248bb..6c0eb465d1f80 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalSignificantTerms.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalSignificantTerms.java @@ -45,7 +45,7 @@ public abstract class InternalSignificantTerms> extends InternalMultiBucketAggregation.InternalBucket + public abstract static class Bucket> extends InternalMultiBucketAggregation.InternalBucketWritable implements SignificantTerms.Bucket { /** diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalTerms.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalTerms.java index b94b1f5ea40b1..739f0b923eaab 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalTerms.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalTerms.java @@ -41,9 +41,8 @@ public interface Reader> { long bucketOrd; protected long docCount; - protected long docCountError; + private long docCountError; protected InternalAggregations aggregations; - protected final boolean showDocCountError; protected final DocValueFormat format; protected Bucket( @@ -53,29 +52,23 @@ protected Bucket( long docCountError, DocValueFormat formatter ) { - this.showDocCountError = showDocCountError; this.format = formatter; this.docCount = docCount; this.aggregations = aggregations; - this.docCountError = docCountError; + this.docCountError = showDocCountError ? docCountError : -1; } /** * Read from a stream. */ protected Bucket(StreamInput in, DocValueFormat formatter, boolean showDocCountError) throws IOException { - this.showDocCountError = showDocCountError; this.format = formatter; docCount = in.readVLong(); - docCountError = -1; - if (showDocCountError) { - docCountError = in.readLong(); - } + docCountError = showDocCountError ? in.readLong() : -1; aggregations = InternalAggregations.readFrom(in); } - @Override - public final void writeTo(StreamOutput out) throws IOException { + final void writeTo(StreamOutput out, boolean showDocCountError) throws IOException { out.writeVLong(getDocCount()); if (showDocCountError) { out.writeLong(docCountError); @@ -105,9 +98,6 @@ public void setBucketOrd(long bucketOrd) { @Override public long getDocCountError() { - if (showDocCountError == false) { - throw new IllegalStateException("show_terms_doc_count_error is false"); - } return docCountError; } @@ -121,11 +111,6 @@ protected void updateDocCountError(long docCountErrorDiff) { this.docCountError += docCountErrorDiff; } - @Override - protected boolean getShowDocCountError() { - return showDocCountError; - } - @Override public InternalAggregations getAggregations() { return aggregations; @@ -155,23 +140,15 @@ public boolean equals(Object obj) { return false; } Bucket that = (Bucket) obj; - if (showDocCountError && docCountError != that.docCountError) { - /* - * docCountError doesn't matter if not showing it and - * serialization sets it to -1 no matter what it was - * before. - */ - return false; - } - return Objects.equals(docCount, that.docCount) - && Objects.equals(showDocCountError, that.showDocCountError) + return Objects.equals(docCountError, that.docCountError) + && Objects.equals(docCount, that.docCount) && Objects.equals(format, that.format) && Objects.equals(aggregations, that.aggregations); } @Override public int hashCode() { - return Objects.hash(getClass(), docCount, format, showDocCountError, showDocCountError ? docCountError : -1, aggregations); + return Objects.hash(getClass(), docCount, format, docCountError, aggregations); } } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/LongTerms.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/LongTerms.java index f536b7f958ca2..6c2444379c8eb 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/LongTerms.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/LongTerms.java @@ -178,8 +178,8 @@ public Bucket createBucket(InternalAggregations aggregations, Bucket prototype) prototype.term, prototype.docCount, aggregations, - prototype.showDocCountError, - prototype.docCountError, + showTermDocCountError, + prototype.getDocCountError(), prototype.format ); } @@ -260,7 +260,7 @@ public InternalAggregation get() { @Override protected Bucket createBucket(long docCount, InternalAggregations aggs, long docCountError, LongTerms.Bucket prototype) { - return new Bucket(prototype.term, docCount, aggs, prototype.showDocCountError, docCountError, format); + return new Bucket(prototype.term, docCount, aggs, showTermDocCountError, docCountError, format); } /** diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/StringTerms.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/StringTerms.java index 5faf6e0aaaedf..2370827230c47 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/StringTerms.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/StringTerms.java @@ -184,15 +184,15 @@ public Bucket createBucket(InternalAggregations aggregations, Bucket prototype) prototype.termBytes, prototype.docCount, aggregations, - prototype.showDocCountError, - prototype.docCountError, + showTermDocCountError, + prototype.getDocCountError(), prototype.format ); } @Override protected Bucket createBucket(long docCount, InternalAggregations aggs, long docCountError, StringTerms.Bucket prototype) { - return new Bucket(prototype.termBytes, docCount, aggs, prototype.showDocCountError, docCountError, format); + return new Bucket(prototype.termBytes, docCount, aggs, showTermDocCountError, docCountError, format); } @Override diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/UnmappedTerms.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/UnmappedTerms.java index 8047d1f06990f..e82a2b7fe9235 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/UnmappedTerms.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/UnmappedTerms.java @@ -114,6 +114,11 @@ public final XContentBuilder doXContentBody(XContentBuilder builder, Params para return doXContentCommon(builder, params, false, 0L, 0, Collections.emptyList()); } + @Override + protected boolean getShowDocCountError() { + return false; + } + @Override protected void setDocCountError(long docCountError) {} diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/pipeline/BucketHelpersTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/pipeline/BucketHelpersTests.java index b2f79c02baf8d..626adc9a7c41c 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/pipeline/BucketHelpersTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/pipeline/BucketHelpersTests.java @@ -9,7 +9,6 @@ package org.elasticsearch.search.aggregations.pipeline; -import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.search.aggregations.InternalAggregations; import org.elasticsearch.search.aggregations.InternalMultiBucketAggregation; import org.elasticsearch.search.aggregations.bucket.MultiBucketsAggregation; @@ -56,10 +55,6 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws }; InternalMultiBucketAggregation.InternalBucket bucket = new InternalMultiBucketAggregation.InternalBucket() { - @Override - public void writeTo(StreamOutput out) throws IOException { - - } @Override public Object getKey() { @@ -131,10 +126,6 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws }; InternalMultiBucketAggregation.InternalBucket bucket = new InternalMultiBucketAggregation.InternalBucket() { - @Override - public void writeTo(StreamOutput out) throws IOException { - - } @Override public Object getKey() { diff --git a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/multiterms/InternalMultiTerms.java b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/multiterms/InternalMultiTerms.java index c6bfb5b1b2778..0d42a2856a10e 100644 --- a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/multiterms/InternalMultiTerms.java +++ b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/multiterms/InternalMultiTerms.java @@ -42,8 +42,7 @@ public static class Bucket extends AbstractInternalTerms.AbstractTermsBucket formats; protected List terms; protected List keyConverters; @@ -60,8 +59,7 @@ public Bucket( this.terms = terms; this.docCount = docCount; this.aggregations = aggregations; - this.showDocCountError = showDocCountError; - this.docCountError = docCountError; + this.docCountError = showDocCountError ? docCountError : -1; this.formats = formats; this.keyConverters = keyConverters; } @@ -71,7 +69,6 @@ protected Bucket(StreamInput in, List formats, List formats, List { diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/changepoint/ChangePointBucket.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/changepoint/ChangePointBucket.java index 39bdb69d4da40..aed0c40043cae 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/changepoint/ChangePointBucket.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/changepoint/ChangePointBucket.java @@ -18,7 +18,7 @@ import java.io.IOException; import java.util.Objects; -public class ChangePointBucket extends InternalMultiBucketAggregation.InternalBucket implements ToXContent { +public class ChangePointBucket extends InternalMultiBucketAggregation.InternalBucketWritable implements ToXContent { private final Object key; private final long docCount; private final InternalAggregations aggregations; From ed33bea30cd898936e43e24a7927290409f30b18 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Tue, 26 Nov 2024 08:02:12 +0100 Subject: [PATCH 002/139] Adjust SyntheticSourceLicenseService (#116647) Allow gold and platinum license to use synthetic source for a limited time. If the start time of a license is before the cut off date, then gold and platinum licenses will not fallback to stored source if synthetic source is used. Co-authored-by: Nikolaj Volgushev --- .../xpack/logsdb/LogsDBPlugin.java | 13 +- .../SyntheticSourceIndexSettingsProvider.java | 32 +++- .../logsdb/SyntheticSourceLicenseService.java | 83 ++++++++- .../logsdb/LegacyLicenceIntegrationTests.java | 146 +++++++++++++++ ...dexSettingsProviderLegacyLicenseTests.java | 129 +++++++++++++ ...heticSourceIndexSettingsProviderTests.java | 13 +- .../SyntheticSourceLicenseServiceTests.java | 173 ++++++++++++++++-- 7 files changed, 562 insertions(+), 27 deletions(-) create mode 100644 x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/LegacyLicenceIntegrationTests.java create mode 100644 x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/SyntheticSourceIndexSettingsProviderLegacyLicenseTests.java diff --git a/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/LogsDBPlugin.java b/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/LogsDBPlugin.java index 04d12fd51bae7..904b00e6d0450 100644 --- a/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/LogsDBPlugin.java +++ b/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/LogsDBPlugin.java @@ -13,6 +13,8 @@ import org.elasticsearch.common.settings.Setting; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.IndexSettingProvider; +import org.elasticsearch.license.LicenseService; +import org.elasticsearch.license.XPackLicenseState; import org.elasticsearch.plugins.ActionPlugin; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.xpack.core.XPackPlugin; @@ -46,7 +48,8 @@ public LogsDBPlugin(Settings settings) { @Override public Collection createComponents(PluginServices services) { - licenseService.setLicenseState(XPackPlugin.getSharedLicenseState()); + licenseService.setLicenseService(getLicenseService()); + licenseService.setLicenseState(getLicenseState()); var clusterSettings = services.clusterService().getClusterSettings(); // The `cluster.logsdb.enabled` setting is registered by this plugin, but its value may be updated by other plugins // before this plugin registers its settings update consumer below. This means we might miss updates that occurred earlier. @@ -88,4 +91,12 @@ public List> getSettings() { actions.add(new ActionPlugin.ActionHandler<>(XPackInfoFeatureAction.LOGSDB, LogsDBInfoTransportAction.class)); return actions; } + + protected XPackLicenseState getLicenseState() { + return XPackPlugin.getSharedLicenseState(); + } + + protected LicenseService getLicenseService() { + return XPackPlugin.getSharedLicenseService(); + } } diff --git a/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/SyntheticSourceIndexSettingsProvider.java b/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/SyntheticSourceIndexSettingsProvider.java index 1f38ecda19515..462bad4b19551 100644 --- a/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/SyntheticSourceIndexSettingsProvider.java +++ b/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/SyntheticSourceIndexSettingsProvider.java @@ -81,8 +81,13 @@ public Settings getAdditionalIndexSettings( // This index name is used when validating component and index templates, we should skip this check in that case. // (See MetadataIndexTemplateService#validateIndexTemplateV2(...) method) boolean isTemplateValidation = "validate-index-name".equals(indexName); + boolean legacyLicensedUsageOfSyntheticSourceAllowed = isLegacyLicensedUsageOfSyntheticSourceAllowed( + templateIndexMode, + indexName, + dataStreamName + ); if (newIndexHasSyntheticSourceUsage(indexName, templateIndexMode, indexTemplateAndCreateRequestSettings, combinedTemplateMappings) - && syntheticSourceLicenseService.fallbackToStoredSource(isTemplateValidation)) { + && syntheticSourceLicenseService.fallbackToStoredSource(isTemplateValidation, legacyLicensedUsageOfSyntheticSourceAllowed)) { LOGGER.debug("creation of index [{}] with synthetic source without it being allowed", indexName); return Settings.builder() .put(SourceFieldMapper.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), SourceFieldMapper.Mode.STORED.toString()) @@ -167,4 +172,29 @@ private IndexMetadata buildIndexMetadataForMapperService( tmpIndexMetadata.settings(finalResolvedSettings); return tmpIndexMetadata.build(); } + + /** + * The GA-ed use cases in which synthetic source usage is allowed with gold or platinum license. + */ + boolean isLegacyLicensedUsageOfSyntheticSourceAllowed(IndexMode templateIndexMode, String indexName, String dataStreamName) { + if (templateIndexMode == IndexMode.TIME_SERIES) { + return true; + } + + // To allow the following patterns: profiling-metrics and profiling-events + if (dataStreamName != null && dataStreamName.startsWith("profiling-")) { + return true; + } + // To allow the following patterns: .profiling-sq-executables, .profiling-sq-leafframes and .profiling-stacktraces + if (indexName.startsWith(".profiling-")) { + return true; + } + // To allow the following patterns: metrics-apm.transaction.*, metrics-apm.service_transaction.*, metrics-apm.service_summary.*, + // metrics-apm.service_destination.*, "metrics-apm.internal-* and metrics-apm.app.* + if (dataStreamName != null && dataStreamName.startsWith("metrics-apm.")) { + return true; + } + + return false; + } } diff --git a/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/SyntheticSourceLicenseService.java b/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/SyntheticSourceLicenseService.java index 55d4bfe05abe3..1b3513f15a86a 100644 --- a/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/SyntheticSourceLicenseService.java +++ b/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/SyntheticSourceLicenseService.java @@ -7,18 +7,30 @@ package org.elasticsearch.xpack.logsdb; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import org.elasticsearch.common.settings.Setting; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.license.License; +import org.elasticsearch.license.LicenseService; import org.elasticsearch.license.LicensedFeature; import org.elasticsearch.license.XPackLicenseState; +import java.time.Instant; +import java.time.LocalDateTime; +import java.time.ZoneOffset; + /** * Determines based on license and fallback setting whether synthetic source usages should fallback to stored source. */ final class SyntheticSourceLicenseService { - private static final String MAPPINGS_FEATURE_FAMILY = "mappings"; + static final String MAPPINGS_FEATURE_FAMILY = "mappings"; + // You can only override this property if you received explicit approval from Elastic. + private static final String CUTOFF_DATE_SYS_PROP_NAME = + "es.mapping.synthetic_source_fallback_to_stored_source.cutoff_date_restricted_override"; + private static final Logger LOGGER = LogManager.getLogger(SyntheticSourceLicenseService.class); + static final long DEFAULT_CUTOFF_DATE = LocalDateTime.of(2024, 12, 12, 0, 0).toInstant(ZoneOffset.UTC).toEpochMilli(); /** * A setting that determines whether source mode should always be stored source. Regardless of licence. @@ -30,31 +42,71 @@ final class SyntheticSourceLicenseService { Setting.Property.Dynamic ); - private static final LicensedFeature.Momentary SYNTHETIC_SOURCE_FEATURE = LicensedFeature.momentary( + static final LicensedFeature.Momentary SYNTHETIC_SOURCE_FEATURE = LicensedFeature.momentary( MAPPINGS_FEATURE_FAMILY, "synthetic-source", License.OperationMode.ENTERPRISE ); + static final LicensedFeature.Momentary SYNTHETIC_SOURCE_FEATURE_LEGACY = LicensedFeature.momentary( + MAPPINGS_FEATURE_FAMILY, + "synthetic-source-legacy", + License.OperationMode.GOLD + ); + + private final long cutoffDate; + private LicenseService licenseService; private XPackLicenseState licenseState; private volatile boolean syntheticSourceFallback; SyntheticSourceLicenseService(Settings settings) { - syntheticSourceFallback = FALLBACK_SETTING.get(settings); + this(settings, System.getProperty(CUTOFF_DATE_SYS_PROP_NAME)); + } + + SyntheticSourceLicenseService(Settings settings, String cutoffDate) { + this.syntheticSourceFallback = FALLBACK_SETTING.get(settings); + this.cutoffDate = getCutoffDate(cutoffDate); } /** * @return whether synthetic source mode should fallback to stored source. */ - public boolean fallbackToStoredSource(boolean isTemplateValidation) { + public boolean fallbackToStoredSource(boolean isTemplateValidation, boolean legacyLicensedUsageOfSyntheticSourceAllowed) { if (syntheticSourceFallback) { return true; } + var licenseStateSnapshot = licenseState.copyCurrentLicenseState(); + if (checkFeature(SYNTHETIC_SOURCE_FEATURE, licenseStateSnapshot, isTemplateValidation)) { + return false; + } + + var license = licenseService.getLicense(); + if (license == null) { + return true; + } + + boolean beforeCutoffDate = license.startDate() <= cutoffDate; + if (legacyLicensedUsageOfSyntheticSourceAllowed + && beforeCutoffDate + && checkFeature(SYNTHETIC_SOURCE_FEATURE_LEGACY, licenseStateSnapshot, isTemplateValidation)) { + // platinum license will allow synthetic source with gold legacy licensed feature too. + LOGGER.debug("legacy license [{}] is allowed to use synthetic source", licenseStateSnapshot.getOperationMode().description()); + return false; + } + + return true; + } + + private static boolean checkFeature( + LicensedFeature.Momentary licensedFeature, + XPackLicenseState licenseStateSnapshot, + boolean isTemplateValidation + ) { if (isTemplateValidation) { - return SYNTHETIC_SOURCE_FEATURE.checkWithoutTracking(licenseState) == false; + return licensedFeature.checkWithoutTracking(licenseStateSnapshot); } else { - return SYNTHETIC_SOURCE_FEATURE.check(licenseState) == false; + return licensedFeature.check(licenseStateSnapshot); } } @@ -62,7 +114,26 @@ void setSyntheticSourceFallback(boolean syntheticSourceFallback) { this.syntheticSourceFallback = syntheticSourceFallback; } + void setLicenseService(LicenseService licenseService) { + this.licenseService = licenseService; + } + void setLicenseState(XPackLicenseState licenseState) { this.licenseState = licenseState; } + + private static long getCutoffDate(String cutoffDateAsString) { + if (cutoffDateAsString != null) { + long cutoffDate = LocalDateTime.parse(cutoffDateAsString).toInstant(ZoneOffset.UTC).toEpochMilli(); + LOGGER.warn("Configuring [{}] is only allowed with explicit approval from Elastic.", CUTOFF_DATE_SYS_PROP_NAME); + LOGGER.info( + "Configuring [{}] to [{}]", + CUTOFF_DATE_SYS_PROP_NAME, + LocalDateTime.ofInstant(Instant.ofEpochSecond(cutoffDate), ZoneOffset.UTC) + ); + return cutoffDate; + } else { + return DEFAULT_CUTOFF_DATE; + } + } } diff --git a/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/LegacyLicenceIntegrationTests.java b/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/LegacyLicenceIntegrationTests.java new file mode 100644 index 0000000000000..890bc464a2579 --- /dev/null +++ b/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/LegacyLicenceIntegrationTests.java @@ -0,0 +1,146 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.logsdb; + +import org.elasticsearch.action.admin.indices.settings.get.GetSettingsRequest; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.mapper.SourceFieldMapper; +import org.elasticsearch.license.AbstractLicensesIntegrationTestCase; +import org.elasticsearch.license.GetFeatureUsageRequest; +import org.elasticsearch.license.GetFeatureUsageResponse; +import org.elasticsearch.license.License; +import org.elasticsearch.license.LicenseService; +import org.elasticsearch.license.LicensedFeature; +import org.elasticsearch.license.TransportGetFeatureUsageAction; +import org.elasticsearch.license.XPackLicenseState; +import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.test.ESIntegTestCase; +import org.elasticsearch.xpack.core.LocalStateCompositeXPackPlugin; +import org.hamcrest.Matcher; +import org.junit.Before; + +import java.nio.file.Path; +import java.time.LocalDateTime; +import java.time.ZoneOffset; +import java.util.Collection; +import java.util.List; + +import static org.elasticsearch.test.ESIntegTestCase.Scope.TEST; +import static org.elasticsearch.xpack.logsdb.SyntheticSourceLicenseServiceTests.createEnterpriseLicense; +import static org.elasticsearch.xpack.logsdb.SyntheticSourceLicenseServiceTests.createGoldOrPlatinumLicense; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.not; +import static org.hamcrest.Matchers.nullValue; + +@ESIntegTestCase.ClusterScope(scope = TEST, numDataNodes = 1, numClientNodes = 0, supportsDedicatedMasters = false) +public class LegacyLicenceIntegrationTests extends AbstractLicensesIntegrationTestCase { + + @Override + protected Collection> nodePlugins() { + return List.of(P.class); + } + + @Before + public void setup() throws Exception { + wipeAllLicenses(); + ensureGreen(); + License license = createGoldOrPlatinumLicense(); + putLicense(license); + ensureGreen(); + } + + public void testSyntheticSourceUsageDisallowed() { + createIndexWithSyntheticSourceAndAssertExpectedType("test", "STORED"); + + assertFeatureUsage(SyntheticSourceLicenseService.SYNTHETIC_SOURCE_FEATURE_LEGACY, nullValue()); + assertFeatureUsage(SyntheticSourceLicenseService.SYNTHETIC_SOURCE_FEATURE, nullValue()); + } + + public void testSyntheticSourceUsageWithLegacyLicense() { + createIndexWithSyntheticSourceAndAssertExpectedType(".profiling-stacktraces", "synthetic"); + + assertFeatureUsage(SyntheticSourceLicenseService.SYNTHETIC_SOURCE_FEATURE_LEGACY, not(nullValue())); + assertFeatureUsage(SyntheticSourceLicenseService.SYNTHETIC_SOURCE_FEATURE, nullValue()); + } + + public void testSyntheticSourceUsageWithLegacyLicensePastCutoff() throws Exception { + long startPastCutoff = LocalDateTime.of(2025, 11, 12, 0, 0).toInstant(ZoneOffset.UTC).toEpochMilli(); + putLicense(createGoldOrPlatinumLicense(startPastCutoff)); + ensureGreen(); + + createIndexWithSyntheticSourceAndAssertExpectedType(".profiling-stacktraces", "STORED"); + assertFeatureUsage(SyntheticSourceLicenseService.SYNTHETIC_SOURCE_FEATURE_LEGACY, nullValue()); + assertFeatureUsage(SyntheticSourceLicenseService.SYNTHETIC_SOURCE_FEATURE, nullValue()); + } + + public void testSyntheticSourceUsageWithEnterpriseLicensePastCutoff() throws Exception { + long startPastCutoff = LocalDateTime.of(2025, 11, 12, 0, 0).toInstant(ZoneOffset.UTC).toEpochMilli(); + putLicense(createEnterpriseLicense(startPastCutoff)); + ensureGreen(); + + createIndexWithSyntheticSourceAndAssertExpectedType(".profiling-traces", "synthetic"); + // also supports non-exceptional indices + createIndexWithSyntheticSourceAndAssertExpectedType("test", "synthetic"); + assertFeatureUsage(SyntheticSourceLicenseService.SYNTHETIC_SOURCE_FEATURE_LEGACY, nullValue()); + assertFeatureUsage(SyntheticSourceLicenseService.SYNTHETIC_SOURCE_FEATURE, not(nullValue())); + } + + public void testSyntheticSourceUsageTracksBothLegacyAndRegularFeature() throws Exception { + createIndexWithSyntheticSourceAndAssertExpectedType(".profiling-traces", "synthetic"); + + putLicense(createEnterpriseLicense()); + ensureGreen(); + + createIndexWithSyntheticSourceAndAssertExpectedType(".profiling-traces-v2", "synthetic"); + + assertFeatureUsage(SyntheticSourceLicenseService.SYNTHETIC_SOURCE_FEATURE_LEGACY, not(nullValue())); + assertFeatureUsage(SyntheticSourceLicenseService.SYNTHETIC_SOURCE_FEATURE, not(nullValue())); + } + + private void createIndexWithSyntheticSourceAndAssertExpectedType(String indexName, String expectedType) { + var settings = Settings.builder().put(SourceFieldMapper.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "synthetic").build(); + createIndex(indexName, settings); + var response = admin().indices().getSettings(new GetSettingsRequest().indices(indexName)).actionGet(); + assertThat( + response.getIndexToSettings().get(indexName).get(SourceFieldMapper.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey()), + equalTo(expectedType) + ); + } + + private List getFeatureUsageInfo() { + return client().execute(TransportGetFeatureUsageAction.TYPE, new GetFeatureUsageRequest()).actionGet().getFeatures(); + } + + private void assertFeatureUsage(LicensedFeature.Momentary syntheticSourceFeature, Matcher matcher) { + GetFeatureUsageResponse.FeatureUsageInfo featureUsage = getFeatureUsageInfo().stream() + .filter(f -> f.getFamily().equals(SyntheticSourceLicenseService.MAPPINGS_FEATURE_FAMILY)) + .filter(f -> f.getName().equals(syntheticSourceFeature.getName())) + .findAny() + .orElse(null); + assertThat(featureUsage, matcher); + } + + public static class P extends LocalStateCompositeXPackPlugin { + + public P(final Settings settings, final Path configPath) { + super(settings, configPath); + plugins.add(new LogsDBPlugin(settings) { + @Override + protected XPackLicenseState getLicenseState() { + return P.this.getLicenseState(); + } + + @Override + protected LicenseService getLicenseService() { + return P.this.getLicenseService(); + } + }); + } + + } +} diff --git a/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/SyntheticSourceIndexSettingsProviderLegacyLicenseTests.java b/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/SyntheticSourceIndexSettingsProviderLegacyLicenseTests.java new file mode 100644 index 0000000000000..939d7d892a48d --- /dev/null +++ b/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/SyntheticSourceIndexSettingsProviderLegacyLicenseTests.java @@ -0,0 +1,129 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.logsdb; + +import org.elasticsearch.cluster.metadata.DataStream; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.IndexMode; +import org.elasticsearch.index.IndexVersion; +import org.elasticsearch.index.MapperTestUtils; +import org.elasticsearch.index.mapper.SourceFieldMapper; +import org.elasticsearch.license.License; +import org.elasticsearch.license.LicenseService; +import org.elasticsearch.license.XPackLicenseState; +import org.elasticsearch.license.internal.XPackLicenseStatus; +import org.elasticsearch.test.ESTestCase; +import org.junit.Before; + +import java.io.IOException; +import java.time.LocalDateTime; +import java.time.ZoneOffset; +import java.util.List; + +import static org.elasticsearch.xpack.logsdb.SyntheticSourceIndexSettingsProviderTests.getLogsdbIndexModeSettingsProvider; +import static org.elasticsearch.xpack.logsdb.SyntheticSourceLicenseServiceTests.createGoldOrPlatinumLicense; +import static org.hamcrest.Matchers.equalTo; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +public class SyntheticSourceIndexSettingsProviderLegacyLicenseTests extends ESTestCase { + + private SyntheticSourceIndexSettingsProvider provider; + + @Before + public void setup() throws Exception { + long time = LocalDateTime.of(2024, 11, 12, 0, 0).toInstant(ZoneOffset.UTC).toEpochMilli(); + License license = createGoldOrPlatinumLicense(); + var licenseState = new XPackLicenseState(() -> time, new XPackLicenseStatus(license.operationMode(), true, null)); + + var licenseService = new SyntheticSourceLicenseService(Settings.EMPTY); + licenseService.setLicenseState(licenseState); + var mockLicenseService = mock(LicenseService.class); + when(mockLicenseService.getLicense()).thenReturn(license); + + SyntheticSourceLicenseService syntheticSourceLicenseService = new SyntheticSourceLicenseService(Settings.EMPTY); + syntheticSourceLicenseService.setLicenseState(licenseState); + syntheticSourceLicenseService.setLicenseService(mockLicenseService); + + provider = new SyntheticSourceIndexSettingsProvider( + syntheticSourceLicenseService, + im -> MapperTestUtils.newMapperService(xContentRegistry(), createTempDir(), im.getSettings(), im.getIndex().getName()), + getLogsdbIndexModeSettingsProvider(false), + IndexVersion::current + ); + } + + public void testGetAdditionalIndexSettingsDefault() { + Settings settings = Settings.builder().put(SourceFieldMapper.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "SYNTHETIC").build(); + String dataStreamName = "metrics-my-app"; + String indexName = DataStream.getDefaultBackingIndexName(dataStreamName, 0); + var result = provider.getAdditionalIndexSettings(indexName, dataStreamName, null, null, null, settings, List.of()); + assertThat(result.size(), equalTo(1)); + assertThat(result.get(SourceFieldMapper.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey()), equalTo("STORED")); + } + + public void testGetAdditionalIndexSettingsApm() throws IOException { + Settings settings = Settings.builder().put(SourceFieldMapper.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "SYNTHETIC").build(); + String dataStreamName = "metrics-apm.app.test"; + String indexName = DataStream.getDefaultBackingIndexName(dataStreamName, 0); + var result = provider.getAdditionalIndexSettings(indexName, dataStreamName, null, null, null, settings, List.of()); + assertThat(result.size(), equalTo(0)); + } + + public void testGetAdditionalIndexSettingsProfiling() throws IOException { + Settings settings = Settings.builder().put(SourceFieldMapper.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "SYNTHETIC").build(); + for (String dataStreamName : new String[] { "profiling-metrics", "profiling-events" }) { + String indexName = DataStream.getDefaultBackingIndexName(dataStreamName, 0); + var result = provider.getAdditionalIndexSettings(indexName, dataStreamName, null, null, null, settings, List.of()); + assertThat(result.size(), equalTo(0)); + } + + for (String indexName : new String[] { ".profiling-sq-executables", ".profiling-sq-leafframes", ".profiling-stacktraces" }) { + var result = provider.getAdditionalIndexSettings(indexName, null, null, null, null, settings, List.of()); + assertThat(result.size(), equalTo(0)); + } + } + + public void testGetAdditionalIndexSettingsTsdb() throws IOException { + Settings settings = Settings.builder().put(SourceFieldMapper.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "SYNTHETIC").build(); + String dataStreamName = "metrics-my-app"; + String indexName = DataStream.getDefaultBackingIndexName(dataStreamName, 0); + var result = provider.getAdditionalIndexSettings(indexName, dataStreamName, IndexMode.TIME_SERIES, null, null, settings, List.of()); + assertThat(result.size(), equalTo(0)); + } + + public void testGetAdditionalIndexSettingsTsdbAfterCutoffDate() throws Exception { + long start = LocalDateTime.of(2024, 12, 20, 0, 0).toInstant(ZoneOffset.UTC).toEpochMilli(); + License license = createGoldOrPlatinumLicense(start); + long time = LocalDateTime.of(2024, 12, 31, 0, 0).toInstant(ZoneOffset.UTC).toEpochMilli(); + var licenseState = new XPackLicenseState(() -> time, new XPackLicenseStatus(license.operationMode(), true, null)); + + var licenseService = new SyntheticSourceLicenseService(Settings.EMPTY); + licenseService.setLicenseState(licenseState); + var mockLicenseService = mock(LicenseService.class); + when(mockLicenseService.getLicense()).thenReturn(license); + + SyntheticSourceLicenseService syntheticSourceLicenseService = new SyntheticSourceLicenseService(Settings.EMPTY); + syntheticSourceLicenseService.setLicenseState(licenseState); + syntheticSourceLicenseService.setLicenseService(mockLicenseService); + + provider = new SyntheticSourceIndexSettingsProvider( + syntheticSourceLicenseService, + im -> MapperTestUtils.newMapperService(xContentRegistry(), createTempDir(), im.getSettings(), im.getIndex().getName()), + getLogsdbIndexModeSettingsProvider(false), + IndexVersion::current + ); + + Settings settings = Settings.builder().put(SourceFieldMapper.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "SYNTHETIC").build(); + String dataStreamName = "metrics-my-app"; + String indexName = DataStream.getDefaultBackingIndexName(dataStreamName, 0); + var result = provider.getAdditionalIndexSettings(indexName, dataStreamName, IndexMode.TIME_SERIES, null, null, settings, List.of()); + assertThat(result.size(), equalTo(1)); + assertThat(result.get(SourceFieldMapper.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey()), equalTo("STORED")); + } +} diff --git a/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/SyntheticSourceIndexSettingsProviderTests.java b/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/SyntheticSourceIndexSettingsProviderTests.java index d6cdb9f761b31..df1fb8f2d958c 100644 --- a/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/SyntheticSourceIndexSettingsProviderTests.java +++ b/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/SyntheticSourceIndexSettingsProviderTests.java @@ -18,6 +18,8 @@ import org.elasticsearch.index.IndexVersion; import org.elasticsearch.index.MapperTestUtils; import org.elasticsearch.index.mapper.SourceFieldMapper; +import org.elasticsearch.license.License; +import org.elasticsearch.license.LicenseService; import org.elasticsearch.license.MockLicenseState; import org.elasticsearch.test.ESTestCase; import org.junit.Before; @@ -28,6 +30,7 @@ import java.util.concurrent.atomic.AtomicInteger; import static org.elasticsearch.common.settings.Settings.builder; +import static org.elasticsearch.xpack.logsdb.SyntheticSourceLicenseServiceTests.createEnterpriseLicense; import static org.hamcrest.Matchers.equalTo; import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.mock; @@ -39,18 +42,22 @@ public class SyntheticSourceIndexSettingsProviderTests extends ESTestCase { private SyntheticSourceIndexSettingsProvider provider; private final AtomicInteger newMapperServiceCounter = new AtomicInteger(); - private static LogsdbIndexModeSettingsProvider getLogsdbIndexModeSettingsProvider(boolean enabled) { + static LogsdbIndexModeSettingsProvider getLogsdbIndexModeSettingsProvider(boolean enabled) { return new LogsdbIndexModeSettingsProvider(Settings.builder().put("cluster.logsdb.enabled", enabled).build()); } @Before - public void setup() { - MockLicenseState licenseState = mock(MockLicenseState.class); + public void setup() throws Exception { + MockLicenseState licenseState = MockLicenseState.createMock(); when(licenseState.isAllowed(any())).thenReturn(true); var licenseService = new SyntheticSourceLicenseService(Settings.EMPTY); licenseService.setLicenseState(licenseState); + var mockLicenseService = mock(LicenseService.class); + License license = createEnterpriseLicense(); + when(mockLicenseService.getLicense()).thenReturn(license); syntheticSourceLicenseService = new SyntheticSourceLicenseService(Settings.EMPTY); syntheticSourceLicenseService.setLicenseState(licenseState); + syntheticSourceLicenseService.setLicenseService(mockLicenseService); provider = new SyntheticSourceIndexSettingsProvider(syntheticSourceLicenseService, im -> { newMapperServiceCounter.incrementAndGet(); diff --git a/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/SyntheticSourceLicenseServiceTests.java b/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/SyntheticSourceLicenseServiceTests.java index 430ee75eb3561..90a13b16c028e 100644 --- a/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/SyntheticSourceLicenseServiceTests.java +++ b/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/SyntheticSourceLicenseServiceTests.java @@ -8,54 +8,195 @@ package org.elasticsearch.xpack.logsdb; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.license.License; +import org.elasticsearch.license.LicenseService; import org.elasticsearch.license.MockLicenseState; +import org.elasticsearch.license.TestUtils; import org.elasticsearch.test.ESTestCase; +import org.junit.Before; import org.mockito.Mockito; +import java.time.LocalDateTime; +import java.time.ZoneOffset; +import java.util.UUID; + +import static org.elasticsearch.license.TestUtils.dateMath; import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.same; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; public class SyntheticSourceLicenseServiceTests extends ESTestCase { + private LicenseService mockLicenseService; + private SyntheticSourceLicenseService licenseService; + + @Before + public void setup() throws Exception { + mockLicenseService = mock(LicenseService.class); + License license = createEnterpriseLicense(); + when(mockLicenseService.getLicense()).thenReturn(license); + licenseService = new SyntheticSourceLicenseService(Settings.EMPTY); + } + public void testLicenseAllowsSyntheticSource() { - MockLicenseState licenseState = mock(MockLicenseState.class); - when(licenseState.isAllowed(any())).thenReturn(true); - var licenseService = new SyntheticSourceLicenseService(Settings.EMPTY); + MockLicenseState licenseState = MockLicenseState.createMock(); + when(licenseState.isAllowed(same(SyntheticSourceLicenseService.SYNTHETIC_SOURCE_FEATURE))).thenReturn(true); licenseService.setLicenseState(licenseState); - assertFalse("synthetic source is allowed, so not fallback to stored source", licenseService.fallbackToStoredSource(false)); + licenseService.setLicenseService(mockLicenseService); + assertFalse( + "synthetic source is allowed, so not fallback to stored source", + licenseService.fallbackToStoredSource(false, randomBoolean()) + ); Mockito.verify(licenseState, Mockito.times(1)).featureUsed(any()); } public void testLicenseAllowsSyntheticSourceTemplateValidation() { - MockLicenseState licenseState = mock(MockLicenseState.class); - when(licenseState.isAllowed(any())).thenReturn(true); - var licenseService = new SyntheticSourceLicenseService(Settings.EMPTY); + MockLicenseState licenseState = MockLicenseState.createMock(); + when(licenseState.isAllowed(same(SyntheticSourceLicenseService.SYNTHETIC_SOURCE_FEATURE))).thenReturn(true); licenseService.setLicenseState(licenseState); - assertFalse("synthetic source is allowed, so not fallback to stored source", licenseService.fallbackToStoredSource(true)); + licenseService.setLicenseService(mockLicenseService); + assertFalse( + "synthetic source is allowed, so not fallback to stored source", + licenseService.fallbackToStoredSource(true, randomBoolean()) + ); Mockito.verify(licenseState, Mockito.never()).featureUsed(any()); } public void testDefaultDisallow() { - MockLicenseState licenseState = mock(MockLicenseState.class); - when(licenseState.isAllowed(any())).thenReturn(false); - var licenseService = new SyntheticSourceLicenseService(Settings.EMPTY); + MockLicenseState licenseState = MockLicenseState.createMock(); + when(licenseState.isAllowed(same(SyntheticSourceLicenseService.SYNTHETIC_SOURCE_FEATURE))).thenReturn(false); licenseService.setLicenseState(licenseState); - assertTrue("synthetic source is not allowed, so fallback to stored source", licenseService.fallbackToStoredSource(false)); + licenseService.setLicenseService(mockLicenseService); + assertTrue( + "synthetic source is not allowed, so fallback to stored source", + licenseService.fallbackToStoredSource(false, randomBoolean()) + ); Mockito.verify(licenseState, Mockito.never()).featureUsed(any()); } public void testFallback() { - MockLicenseState licenseState = mock(MockLicenseState.class); - when(licenseState.isAllowed(any())).thenReturn(true); - var licenseService = new SyntheticSourceLicenseService(Settings.EMPTY); + MockLicenseState licenseState = MockLicenseState.createMock(); + when(licenseState.isAllowed(same(SyntheticSourceLicenseService.SYNTHETIC_SOURCE_FEATURE))).thenReturn(true); licenseService.setLicenseState(licenseState); + licenseService.setLicenseService(mockLicenseService); licenseService.setSyntheticSourceFallback(true); assertTrue( "synthetic source is allowed, but fallback has been enabled, so fallback to stored source", - licenseService.fallbackToStoredSource(false) + licenseService.fallbackToStoredSource(false, randomBoolean()) ); Mockito.verifyNoInteractions(licenseState); + Mockito.verifyNoInteractions(mockLicenseService); + } + + public void testGoldOrPlatinumLicense() throws Exception { + mockLicenseService = mock(LicenseService.class); + License license = createGoldOrPlatinumLicense(); + when(mockLicenseService.getLicense()).thenReturn(license); + + MockLicenseState licenseState = MockLicenseState.createMock(); + when(licenseState.getOperationMode()).thenReturn(license.operationMode()); + when(licenseState.isAllowed(same(SyntheticSourceLicenseService.SYNTHETIC_SOURCE_FEATURE_LEGACY))).thenReturn(true); + licenseService.setLicenseState(licenseState); + licenseService.setLicenseService(mockLicenseService); + assertFalse( + "legacy licensed usage is allowed, so not fallback to stored source", + licenseService.fallbackToStoredSource(false, true) + ); + Mockito.verify(licenseState, Mockito.times(1)).featureUsed(any()); } + public void testGoldOrPlatinumLicenseLegacyLicenseNotAllowed() throws Exception { + mockLicenseService = mock(LicenseService.class); + License license = createGoldOrPlatinumLicense(); + when(mockLicenseService.getLicense()).thenReturn(license); + + MockLicenseState licenseState = MockLicenseState.createMock(); + when(licenseState.getOperationMode()).thenReturn(license.operationMode()); + when(licenseState.isAllowed(same(SyntheticSourceLicenseService.SYNTHETIC_SOURCE_FEATURE))).thenReturn(false); + licenseService.setLicenseState(licenseState); + licenseService.setLicenseService(mockLicenseService); + assertTrue( + "legacy licensed usage is not allowed, so fallback to stored source", + licenseService.fallbackToStoredSource(false, false) + ); + Mockito.verify(licenseState, Mockito.never()).featureUsed(any()); + Mockito.verify(licenseState, Mockito.times(1)).isAllowed(same(SyntheticSourceLicenseService.SYNTHETIC_SOURCE_FEATURE)); + } + + public void testGoldOrPlatinumLicenseBeyondCutoffDate() throws Exception { + long start = LocalDateTime.of(2025, 1, 1, 0, 0).toInstant(ZoneOffset.UTC).toEpochMilli(); + License license = createGoldOrPlatinumLicense(start); + mockLicenseService = mock(LicenseService.class); + when(mockLicenseService.getLicense()).thenReturn(license); + + MockLicenseState licenseState = MockLicenseState.createMock(); + when(licenseState.getOperationMode()).thenReturn(license.operationMode()); + when(licenseState.isAllowed(same(SyntheticSourceLicenseService.SYNTHETIC_SOURCE_FEATURE))).thenReturn(false); + licenseService.setLicenseState(licenseState); + licenseService.setLicenseService(mockLicenseService); + assertTrue("beyond cutoff date, so fallback to stored source", licenseService.fallbackToStoredSource(false, true)); + Mockito.verify(licenseState, Mockito.never()).featureUsed(any()); + Mockito.verify(licenseState, Mockito.times(1)).isAllowed(same(SyntheticSourceLicenseService.SYNTHETIC_SOURCE_FEATURE)); + } + + public void testGoldOrPlatinumLicenseCustomCutoffDate() throws Exception { + licenseService = new SyntheticSourceLicenseService(Settings.EMPTY, "2025-01-02T00:00"); + + long start = LocalDateTime.of(2025, 1, 1, 0, 0).toInstant(ZoneOffset.UTC).toEpochMilli(); + License license = createGoldOrPlatinumLicense(start); + mockLicenseService = mock(LicenseService.class); + when(mockLicenseService.getLicense()).thenReturn(license); + + MockLicenseState licenseState = MockLicenseState.createMock(); + when(licenseState.getOperationMode()).thenReturn(license.operationMode()); + when(licenseState.isAllowed(same(SyntheticSourceLicenseService.SYNTHETIC_SOURCE_FEATURE_LEGACY))).thenReturn(true); + licenseService.setLicenseState(licenseState); + licenseService.setLicenseService(mockLicenseService); + assertFalse("custom cutoff date, so fallback to stored source", licenseService.fallbackToStoredSource(false, true)); + Mockito.verify(licenseState, Mockito.times(1)).featureUsed(any()); + Mockito.verify(licenseState, Mockito.times(1)).isAllowed(same(SyntheticSourceLicenseService.SYNTHETIC_SOURCE_FEATURE_LEGACY)); + } + + static License createEnterpriseLicense() throws Exception { + long start = LocalDateTime.of(2024, 11, 12, 0, 0).toInstant(ZoneOffset.UTC).toEpochMilli(); + return createEnterpriseLicense(start); + } + + static License createEnterpriseLicense(long start) throws Exception { + String uid = UUID.randomUUID().toString(); + long currentTime = System.currentTimeMillis(); + final License.Builder builder = License.builder() + .uid(uid) + .version(License.VERSION_CURRENT) + .expiryDate(dateMath("now+2d", currentTime)) + .startDate(start) + .issueDate(currentTime) + .type("enterprise") + .issuedTo("customer") + .issuer("elasticsearch") + .maxResourceUnits(10); + return TestUtils.generateSignedLicense(builder); + } + + static License createGoldOrPlatinumLicense() throws Exception { + long start = LocalDateTime.of(2024, 11, 12, 0, 0).toInstant(ZoneOffset.UTC).toEpochMilli(); + return createGoldOrPlatinumLicense(start); + } + + static License createGoldOrPlatinumLicense(long start) throws Exception { + String uid = UUID.randomUUID().toString(); + long currentTime = System.currentTimeMillis(); + final License.Builder builder = License.builder() + .uid(uid) + .version(License.VERSION_CURRENT) + .expiryDate(dateMath("now+100d", currentTime)) + .startDate(start) + .issueDate(currentTime) + .type(randomBoolean() ? "gold" : "platinum") + .issuedTo("customer") + .issuer("elasticsearch") + .maxNodes(5); + return TestUtils.generateSignedLicense(builder); + } } From b13e0d25c0ef52bf6236a981bee4823b12934a57 Mon Sep 17 00:00:00 2001 From: David Turner Date: Tue, 26 Nov 2024 09:06:02 +0000 Subject: [PATCH 003/139] Support dynamic credentials in `S3HttpFixture` (#117458) Rephrase the authorization check in `S3HttpFixture` in terms of a predicate provided by the caller so that there's no need for a separate subclass that handles session tokens, and so that it can support auto-generated credentials more naturally. Also adapts `Ec2ImdsHttpFixture` to dynamically generate credentials this way. Also extracts the STS fixture in `S3HttpFixtureWithSTS` into a separate service, similarly to #117324, and adapts this new fixture to dynamically generate credentials too. Relates ES-9984 --- modules/repository-s3/build.gradle | 1 + .../RepositoryS3RestReloadCredentialsIT.java | 15 +- .../s3/RepositoryS3ClientYamlTestSuiteIT.java | 25 +- .../RepositoryS3EcsClientYamlTestSuiteIT.java | 25 +- .../RepositoryS3StsClientYamlTestSuiteIT.java | 27 +- settings.gradle | 1 + test/fixtures/aws-sts-fixture/build.gradle | 19 ++ .../fixture/aws/sts/AwsStsHttpFixture.java | 64 +++++ .../fixture/aws/sts/AwsStsHttpHandler.java} | 77 +++-- .../aws/sts/AwsStsHttpHandlerTests.java | 268 ++++++++++++++++++ .../fixture/aws/imds/Ec2ImdsHttpFixture.java | 13 +- .../fixture/aws/imds/Ec2ImdsHttpHandler.java | 12 +- .../aws/imds/Ec2ImdsHttpHandlerTests.java | 15 +- .../java/fixture/s3/DynamicS3Credentials.java | 39 +++ .../main/java/fixture/s3/S3HttpFixture.java | 40 ++- .../s3/S3HttpFixtureWithSessionToken.java | 42 --- ...earchableSnapshotsCredentialsReloadIT.java | 23 +- 17 files changed, 552 insertions(+), 154 deletions(-) create mode 100644 test/fixtures/aws-sts-fixture/build.gradle create mode 100644 test/fixtures/aws-sts-fixture/src/main/java/fixture/aws/sts/AwsStsHttpFixture.java rename test/fixtures/{s3-fixture/src/main/java/fixture/s3/S3HttpFixtureWithSTS.java => aws-sts-fixture/src/main/java/fixture/aws/sts/AwsStsHttpHandler.java} (66%) create mode 100644 test/fixtures/aws-sts-fixture/src/test/java/fixture/aws/sts/AwsStsHttpHandlerTests.java create mode 100644 test/fixtures/s3-fixture/src/main/java/fixture/s3/DynamicS3Credentials.java delete mode 100644 test/fixtures/s3-fixture/src/main/java/fixture/s3/S3HttpFixtureWithSessionToken.java diff --git a/modules/repository-s3/build.gradle b/modules/repository-s3/build.gradle index 9a7f0a5994d73..ed1777891f40d 100644 --- a/modules/repository-s3/build.gradle +++ b/modules/repository-s3/build.gradle @@ -46,6 +46,7 @@ dependencies { yamlRestTestImplementation project(":test:framework") yamlRestTestImplementation project(':test:fixtures:s3-fixture') yamlRestTestImplementation project(':test:fixtures:ec2-imds-fixture') + yamlRestTestImplementation project(':test:fixtures:aws-sts-fixture') yamlRestTestImplementation project(':test:fixtures:minio-fixture') internalClusterTestImplementation project(':test:fixtures:minio-fixture') diff --git a/modules/repository-s3/src/javaRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3RestReloadCredentialsIT.java b/modules/repository-s3/src/javaRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3RestReloadCredentialsIT.java index 2f3e995b52468..430c0a1994967 100644 --- a/modules/repository-s3/src/javaRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3RestReloadCredentialsIT.java +++ b/modules/repository-s3/src/javaRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3RestReloadCredentialsIT.java @@ -35,7 +35,14 @@ public class RepositoryS3RestReloadCredentialsIT extends ESRestTestCase { private static final String BUCKET = "RepositoryS3RestReloadCredentialsIT-bucket-" + HASHED_SEED; private static final String BASE_PATH = "RepositoryS3RestReloadCredentialsIT-base-path-" + HASHED_SEED; - public static final S3HttpFixture s3Fixture = new S3HttpFixture(true, BUCKET, BASE_PATH, "ignored"); + private static volatile String repositoryAccessKey; + + public static final S3HttpFixture s3Fixture = new S3HttpFixture( + true, + BUCKET, + BASE_PATH, + S3HttpFixture.mutableAccessKey(() -> repositoryAccessKey) + ); private static final MutableSettingsProvider keystoreSettings = new MutableSettingsProvider(); @@ -68,7 +75,7 @@ public void testReloadCredentialsFromKeystore() throws IOException { // Set up initial credentials final var accessKey1 = randomIdentifier(); - s3Fixture.setAccessKey(accessKey1); + repositoryAccessKey = accessKey1; keystoreSettings.put("s3.client.default.access_key", accessKey1); keystoreSettings.put("s3.client.default.secret_key", randomIdentifier()); cluster.updateStoredSecureSettings(); @@ -79,14 +86,14 @@ public void testReloadCredentialsFromKeystore() throws IOException { // Rotate credentials in blob store final var accessKey2 = randomValueOtherThan(accessKey1, ESTestCase::randomIdentifier); - s3Fixture.setAccessKey(accessKey2); + repositoryAccessKey = accessKey2; // Ensure that initial credentials now invalid final var accessDeniedException2 = expectThrows(ResponseException.class, () -> client().performRequest(verifyRequest)); assertThat(accessDeniedException2.getResponse().getStatusLine().getStatusCode(), equalTo(500)); assertThat( accessDeniedException2.getMessage(), - allOf(containsString("Bad access key"), containsString("Status Code: 403"), containsString("Error Code: AccessDenied")) + allOf(containsString("Access denied"), containsString("Status Code: 403"), containsString("Error Code: AccessDenied")) ); // Set up refreshed credentials diff --git a/modules/repository-s3/src/yamlRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3ClientYamlTestSuiteIT.java b/modules/repository-s3/src/yamlRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3ClientYamlTestSuiteIT.java index 64cb3c3fd3a69..a3b154b4bdfed 100644 --- a/modules/repository-s3/src/yamlRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3ClientYamlTestSuiteIT.java +++ b/modules/repository-s3/src/yamlRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3ClientYamlTestSuiteIT.java @@ -10,8 +10,8 @@ package org.elasticsearch.repositories.s3; import fixture.aws.imds.Ec2ImdsHttpFixture; +import fixture.s3.DynamicS3Credentials; import fixture.s3.S3HttpFixture; -import fixture.s3.S3HttpFixtureWithSessionToken; import com.carrotsearch.randomizedtesting.annotations.Name; import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; @@ -34,27 +34,30 @@ public class RepositoryS3ClientYamlTestSuiteIT extends AbstractRepositoryS3Clien private static final String HASHED_SEED = Integer.toString(Murmur3HashFunction.hash(System.getProperty("tests.seed"))); private static final String TEMPORARY_SESSION_TOKEN = "session_token-" + HASHED_SEED; - private static final String IMDS_ACCESS_KEY = "imds-access-key-" + HASHED_SEED; - private static final String IMDS_SESSION_TOKEN = "imds-session-token-" + HASHED_SEED; private static final S3HttpFixture s3Fixture = new S3HttpFixture(); - private static final S3HttpFixtureWithSessionToken s3HttpFixtureWithSessionToken = new S3HttpFixtureWithSessionToken( + private static final S3HttpFixture s3HttpFixtureWithSessionToken = new S3HttpFixture( + true, "session_token_bucket", "session_token_base_path_integration_tests", - System.getProperty("s3TemporaryAccessKey"), - TEMPORARY_SESSION_TOKEN + S3HttpFixture.fixedAccessKeyAndToken(System.getProperty("s3TemporaryAccessKey"), TEMPORARY_SESSION_TOKEN) ); - private static final S3HttpFixtureWithSessionToken s3HttpFixtureWithImdsSessionToken = new S3HttpFixtureWithSessionToken( + private static final DynamicS3Credentials dynamicS3Credentials = new DynamicS3Credentials(); + + private static final Ec2ImdsHttpFixture ec2ImdsHttpFixture = new Ec2ImdsHttpFixture( + dynamicS3Credentials::addValidCredentials, + Set.of() + ); + + private static final S3HttpFixture s3HttpFixtureWithImdsSessionToken = new S3HttpFixture( + true, "ec2_bucket", "ec2_base_path", - IMDS_ACCESS_KEY, - IMDS_SESSION_TOKEN + dynamicS3Credentials::isAuthorized ); - private static final Ec2ImdsHttpFixture ec2ImdsHttpFixture = new Ec2ImdsHttpFixture(IMDS_ACCESS_KEY, IMDS_SESSION_TOKEN, Set.of()); - public static ElasticsearchCluster cluster = ElasticsearchCluster.local() .module("repository-s3") .keystore("s3.client.integration_test_permanent.access_key", System.getProperty("s3PermanentAccessKey")) diff --git a/modules/repository-s3/src/yamlRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3EcsClientYamlTestSuiteIT.java b/modules/repository-s3/src/yamlRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3EcsClientYamlTestSuiteIT.java index a522c9b17145b..bbd003f506ead 100644 --- a/modules/repository-s3/src/yamlRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3EcsClientYamlTestSuiteIT.java +++ b/modules/repository-s3/src/yamlRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3EcsClientYamlTestSuiteIT.java @@ -10,12 +10,12 @@ package org.elasticsearch.repositories.s3; import fixture.aws.imds.Ec2ImdsHttpFixture; -import fixture.s3.S3HttpFixtureWithSessionToken; +import fixture.s3.DynamicS3Credentials; +import fixture.s3.S3HttpFixture; import com.carrotsearch.randomizedtesting.annotations.Name; import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; -import org.elasticsearch.cluster.routing.Murmur3HashFunction; import org.elasticsearch.test.cluster.ElasticsearchCluster; import org.elasticsearch.test.rest.yaml.ClientYamlTestCandidate; import org.junit.ClassRule; @@ -26,23 +26,20 @@ public class RepositoryS3EcsClientYamlTestSuiteIT extends AbstractRepositoryS3ClientYamlTestSuiteIT { - private static final String HASHED_SEED = Integer.toString(Murmur3HashFunction.hash(System.getProperty("tests.seed"))); - private static final String ECS_ACCESS_KEY = "ecs-access-key-" + HASHED_SEED; - private static final String ECS_SESSION_TOKEN = "ecs-session-token-" + HASHED_SEED; - - private static final S3HttpFixtureWithSessionToken s3Fixture = new S3HttpFixtureWithSessionToken( - "ecs_bucket", - "ecs_base_path", - ECS_ACCESS_KEY, - ECS_SESSION_TOKEN - ); + private static final DynamicS3Credentials dynamicS3Credentials = new DynamicS3Credentials(); private static final Ec2ImdsHttpFixture ec2ImdsHttpFixture = new Ec2ImdsHttpFixture( - ECS_ACCESS_KEY, - ECS_SESSION_TOKEN, + dynamicS3Credentials::addValidCredentials, Set.of("/ecs_credentials_endpoint") ); + private static final S3HttpFixture s3Fixture = new S3HttpFixture( + true, + "ecs_bucket", + "ecs_base_path", + dynamicS3Credentials::isAuthorized + ); + public static ElasticsearchCluster cluster = ElasticsearchCluster.local() .module("repository-s3") .setting("s3.client.integration_test_ecs.endpoint", s3Fixture::getAddress) diff --git a/modules/repository-s3/src/yamlRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3StsClientYamlTestSuiteIT.java b/modules/repository-s3/src/yamlRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3StsClientYamlTestSuiteIT.java index 24f03a6ae7624..7c4d719485113 100644 --- a/modules/repository-s3/src/yamlRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3StsClientYamlTestSuiteIT.java +++ b/modules/repository-s3/src/yamlRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3StsClientYamlTestSuiteIT.java @@ -9,8 +9,9 @@ package org.elasticsearch.repositories.s3; +import fixture.aws.sts.AwsStsHttpFixture; +import fixture.s3.DynamicS3Credentials; import fixture.s3.S3HttpFixture; -import fixture.s3.S3HttpFixtureWithSTS; import com.carrotsearch.randomizedtesting.annotations.Name; import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; @@ -24,13 +25,27 @@ public class RepositoryS3StsClientYamlTestSuiteIT extends AbstractRepositoryS3ClientYamlTestSuiteIT { - public static final S3HttpFixture s3Fixture = new S3HttpFixture(); - private static final S3HttpFixtureWithSTS s3Sts = new S3HttpFixtureWithSTS(); + private static final DynamicS3Credentials dynamicS3Credentials = new DynamicS3Credentials(); + + private static final S3HttpFixture s3HttpFixture = new S3HttpFixture( + true, + "sts_bucket", + "sts_base_path", + dynamicS3Credentials::isAuthorized + ); + + private static final AwsStsHttpFixture stsHttpFixture = new AwsStsHttpFixture(dynamicS3Credentials::addValidCredentials, """ + Atza|IQEBLjAsAhRFiXuWpUXuRvQ9PZL3GMFcYevydwIUFAHZwXZXXXXXXXXJnrulxKDHwy87oGKPznh0D6bEQZTSCzyoCtL_8S07pLpr0zMbn6w1lfVZKNTBdDans\ + FBmtGnIsIapjI6xKR02Yc_2bQ8LZbUXSGm6Ry6_BG7PrtLZtj_dfCTj92xNGed-CrKqjG7nPBjNIL016GGvuS5gSvPRUxWES3VYfm1wl7WTI7jn-Pcb6M-buCgHhFO\ + zTQxod27L9CqnOLio7N3gZAGpsp6n1-AJBOCJckcyXe2c6uD0srOJeZlKUm2eTDVMf8IehDVI0r1QOnTV6KzzAI3OY87Vd_cVMQ"""); public static ElasticsearchCluster cluster = ElasticsearchCluster.local() .module("repository-s3") - .setting("s3.client.integration_test_sts.endpoint", s3Sts::getAddress) - .systemProperty("com.amazonaws.sdk.stsMetadataServiceEndpointOverride", () -> s3Sts.getAddress() + "/assume-role-with-web-identity") + .setting("s3.client.integration_test_sts.endpoint", s3HttpFixture::getAddress) + .systemProperty( + "com.amazonaws.sdk.stsMetadataServiceEndpointOverride", + () -> stsHttpFixture.getAddress() + "/assume-role-with-web-identity" + ) .configFile("repository-s3/aws-web-identity-token-file", Resource.fromClasspath("aws-web-identity-token-file")) .environment("AWS_WEB_IDENTITY_TOKEN_FILE", System.getProperty("awsWebIdentityTokenExternalLocation")) // // The AWS STS SDK requires the role and session names to be set. We can verify that they are sent to S3S in the @@ -40,7 +55,7 @@ public class RepositoryS3StsClientYamlTestSuiteIT extends AbstractRepositoryS3Cl .build(); @ClassRule - public static TestRule ruleChain = RuleChain.outerRule(s3Fixture).around(s3Sts).around(cluster); + public static TestRule ruleChain = RuleChain.outerRule(s3HttpFixture).around(stsHttpFixture).around(cluster); @ParametersFactory public static Iterable parameters() throws Exception { diff --git a/settings.gradle b/settings.gradle index 7bf03263031f1..4722fc311480a 100644 --- a/settings.gradle +++ b/settings.gradle @@ -86,6 +86,7 @@ List projects = [ 'distribution:tools:ansi-console', 'server', 'test:framework', + 'test:fixtures:aws-sts-fixture', 'test:fixtures:azure-fixture', 'test:fixtures:ec2-imds-fixture', 'test:fixtures:gcs-fixture', diff --git a/test/fixtures/aws-sts-fixture/build.gradle b/test/fixtures/aws-sts-fixture/build.gradle new file mode 100644 index 0000000000000..57f0f8fe25493 --- /dev/null +++ b/test/fixtures/aws-sts-fixture/build.gradle @@ -0,0 +1,19 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ +apply plugin: 'elasticsearch.java' + +description = 'Fixture for emulating the Security Token Service (STS) running in AWS' + +dependencies { + api project(':server') + api("junit:junit:${versions.junit}") { + transitive = false + } + api project(':test:framework') +} diff --git a/test/fixtures/aws-sts-fixture/src/main/java/fixture/aws/sts/AwsStsHttpFixture.java b/test/fixtures/aws-sts-fixture/src/main/java/fixture/aws/sts/AwsStsHttpFixture.java new file mode 100644 index 0000000000000..13ba7eaf8ba67 --- /dev/null +++ b/test/fixtures/aws-sts-fixture/src/main/java/fixture/aws/sts/AwsStsHttpFixture.java @@ -0,0 +1,64 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ +package fixture.aws.sts; + +import com.sun.net.httpserver.HttpHandler; +import com.sun.net.httpserver.HttpServer; + +import org.junit.rules.ExternalResource; + +import java.net.InetAddress; +import java.net.InetSocketAddress; +import java.net.UnknownHostException; +import java.util.Objects; +import java.util.function.BiConsumer; + +public class AwsStsHttpFixture extends ExternalResource { + + private HttpServer server; + + private final BiConsumer newCredentialsConsumer; + private final String webIdentityToken; + + public AwsStsHttpFixture(BiConsumer newCredentialsConsumer, String webIdentityToken) { + this.newCredentialsConsumer = Objects.requireNonNull(newCredentialsConsumer); + this.webIdentityToken = Objects.requireNonNull(webIdentityToken); + } + + protected HttpHandler createHandler() { + return new AwsStsHttpHandler(newCredentialsConsumer, webIdentityToken); + } + + public String getAddress() { + return "http://" + server.getAddress().getHostString() + ":" + server.getAddress().getPort(); + } + + public void stop(int delay) { + server.stop(delay); + } + + protected void before() throws Throwable { + server = HttpServer.create(resolveAddress(), 0); + server.createContext("/", Objects.requireNonNull(createHandler())); + server.start(); + } + + @Override + protected void after() { + stop(0); + } + + private static InetSocketAddress resolveAddress() { + try { + return new InetSocketAddress(InetAddress.getByName("localhost"), 0); + } catch (UnknownHostException e) { + throw new RuntimeException(e); + } + } +} diff --git a/test/fixtures/s3-fixture/src/main/java/fixture/s3/S3HttpFixtureWithSTS.java b/test/fixtures/aws-sts-fixture/src/main/java/fixture/aws/sts/AwsStsHttpHandler.java similarity index 66% rename from test/fixtures/s3-fixture/src/main/java/fixture/s3/S3HttpFixtureWithSTS.java rename to test/fixtures/aws-sts-fixture/src/main/java/fixture/aws/sts/AwsStsHttpHandler.java index 54e0be1e321a2..84541f5e15211 100644 --- a/test/fixtures/s3-fixture/src/main/java/fixture/s3/S3HttpFixtureWithSTS.java +++ b/test/fixtures/aws-sts-fixture/src/main/java/fixture/aws/sts/AwsStsHttpHandler.java @@ -6,12 +6,16 @@ * your election, the "Elastic License 2.0", the "GNU Affero General Public * License v3.0 only", or the "Server Side Public License, v 1". */ -package fixture.s3; +package fixture.aws.sts; +import com.sun.net.httpserver.HttpExchange; import com.sun.net.httpserver.HttpHandler; +import org.elasticsearch.ExceptionsHelper; +import org.elasticsearch.core.SuppressForbidden; import org.elasticsearch.rest.RestStatus; +import java.io.IOException; import java.net.URLDecoder; import java.nio.charset.StandardCharsets; import java.time.ZonedDateTime; @@ -19,53 +23,39 @@ import java.util.Arrays; import java.util.Locale; import java.util.Map; +import java.util.Objects; +import java.util.function.BiConsumer; import java.util.stream.Collectors; -public class S3HttpFixtureWithSTS extends S3HttpFixture { +import static org.elasticsearch.test.ESTestCase.randomIdentifier; - private static final String ROLE_ARN = "arn:aws:iam::123456789012:role/FederatedWebIdentityRole"; - private static final String ROLE_NAME = "sts-fixture-test"; - private final String sessionToken; - private final String webIdentityToken; +/** + * Minimal HTTP handler that emulates the AWS STS server + */ +@SuppressForbidden(reason = "this test uses a HttpServer to emulate the AWS STS endpoint") +public class AwsStsHttpHandler implements HttpHandler { - public S3HttpFixtureWithSTS() { - this(true); - } + static final String ROLE_ARN = "arn:aws:iam::123456789012:role/FederatedWebIdentityRole"; + static final String ROLE_NAME = "sts-fixture-test"; - public S3HttpFixtureWithSTS(boolean enabled) { - this( - enabled, - "sts_bucket", - "sts_base_path", - "sts_access_key", - "sts_session_token", - "Atza|IQEBLjAsAhRFiXuWpUXuRvQ9PZL3GMFcYevydwIUFAHZwXZXXXXXXXXJnrulxKDHwy87oGKPznh0D6bEQZTSCzyoCtL_8S07pLpr0zMbn6w1lfVZKNTBdDansFBmtGnIsIapjI6xKR02Yc_2bQ8LZbUXSGm6Ry6_BG7PrtLZtj_dfCTj92xNGed-CrKqjG7nPBjNIL016GGvuS5gSvPRUxWES3VYfm1wl7WTI7jn-Pcb6M-buCgHhFOzTQxod27L9CqnOLio7N3gZAGpsp6n1-AJBOCJckcyXe2c6uD0srOJeZlKUm2eTDVMf8IehDVI0r1QOnTV6KzzAI3OY87Vd_cVMQ" - ); - } + private final BiConsumer newCredentialsConsumer; + private final String webIdentityToken; - public S3HttpFixtureWithSTS( - boolean enabled, - String bucket, - String basePath, - String accessKey, - String sessionToken, - String webIdentityToken - ) { - super(enabled, bucket, basePath, accessKey); - this.sessionToken = sessionToken; - this.webIdentityToken = webIdentityToken; + public AwsStsHttpHandler(BiConsumer newCredentialsConsumer, String webIdentityToken) { + this.newCredentialsConsumer = Objects.requireNonNull(newCredentialsConsumer); + this.webIdentityToken = Objects.requireNonNull(webIdentityToken); } @Override - protected HttpHandler createHandler() { - final HttpHandler delegate = super.createHandler(); + public void handle(final HttpExchange exchange) throws IOException { + // https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRoleWithWebIdentity.html + + try (exchange) { + final var requestMethod = exchange.getRequestMethod(); + final var path = exchange.getRequestURI().getPath(); + + if ("POST".equals(requestMethod) && "/assume-role-with-web-identity/".equals(path)) { - return exchange -> { - // https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRoleWithWebIdentity.html - // It's run as a separate service, but we emulate it under the `assume-role-with-web-identity` endpoint - // of the S3 serve for the simplicity sake - if ("POST".equals(exchange.getRequestMethod()) - && exchange.getRequestURI().getPath().startsWith("/assume-role-with-web-identity")) { String body = new String(exchange.getRequestBody().readAllBytes(), StandardCharsets.UTF_8); Map params = Arrays.stream(body.split("&")) .map(e -> e.split("=")) @@ -82,6 +72,9 @@ protected HttpHandler createHandler() { exchange.close(); return; } + final var accessKey = randomIdentifier(); + final var sessionToken = randomIdentifier(); + newCredentialsConsumer.accept(accessKey, sessionToken); final byte[] response = String.format( Locale.ROOT, """ @@ -95,7 +88,7 @@ protected HttpHandler createHandler() { %s - secret_access_key + %s %s %s @@ -109,6 +102,7 @@ protected HttpHandler createHandler() { ROLE_ARN, ROLE_NAME, sessionToken, + randomIdentifier(), ZonedDateTime.now().plusDays(1L).format(DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ssZ")), accessKey ).getBytes(StandardCharsets.UTF_8); @@ -118,7 +112,8 @@ protected HttpHandler createHandler() { exchange.close(); return; } - delegate.handle(exchange); - }; + + ExceptionsHelper.maybeDieOnAnotherThread(new AssertionError("not supported: " + requestMethod + " " + path)); + } } } diff --git a/test/fixtures/aws-sts-fixture/src/test/java/fixture/aws/sts/AwsStsHttpHandlerTests.java b/test/fixtures/aws-sts-fixture/src/test/java/fixture/aws/sts/AwsStsHttpHandlerTests.java new file mode 100644 index 0000000000000..4094ce18e7aef --- /dev/null +++ b/test/fixtures/aws-sts-fixture/src/test/java/fixture/aws/sts/AwsStsHttpHandlerTests.java @@ -0,0 +1,268 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package fixture.aws.sts; + +import com.sun.net.httpserver.Headers; +import com.sun.net.httpserver.HttpContext; +import com.sun.net.httpserver.HttpExchange; +import com.sun.net.httpserver.HttpPrincipal; + +import org.elasticsearch.common.bytes.BytesArray; +import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.io.stream.BytesStreamOutput; +import org.elasticsearch.rest.RestStatus; +import org.elasticsearch.test.ESTestCase; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.net.InetSocketAddress; +import java.net.URI; +import java.net.URLEncoder; +import java.nio.charset.StandardCharsets; +import java.util.HashMap; +import java.util.Map; +import java.util.stream.Collectors; + +import static org.hamcrest.Matchers.aMapWithSize; +import static org.hamcrest.Matchers.containsString; + +public class AwsStsHttpHandlerTests extends ESTestCase { + + public void testGenerateCredentials() { + final Map generatedCredentials = new HashMap<>(); + + final var webIdentityToken = randomUnicodeOfLength(10); + final var handler = new AwsStsHttpHandler(generatedCredentials::put, webIdentityToken); + + final var response = handleRequest( + handler, + Map.of( + "Action", + "AssumeRoleWithWebIdentity", + "RoleSessionName", + AwsStsHttpHandler.ROLE_NAME, + "RoleArn", + AwsStsHttpHandler.ROLE_ARN, + "WebIdentityToken", + webIdentityToken + ) + ); + assertEquals(RestStatus.OK, response.status()); + + assertThat(generatedCredentials, aMapWithSize(1)); + final var accessKey = generatedCredentials.keySet().iterator().next(); + final var sessionToken = generatedCredentials.values().iterator().next(); + + final var responseBody = response.body().utf8ToString(); + assertThat(responseBody, containsString("" + accessKey + "")); + assertThat(responseBody, containsString("" + sessionToken + "")); + } + + public void testInvalidAction() { + final var handler = new AwsStsHttpHandler((key, token) -> fail(), randomUnicodeOfLength(10)); + final var response = handleRequest(handler, Map.of("Action", "Unsupported")); + assertEquals(RestStatus.BAD_REQUEST, response.status()); + } + + public void testInvalidRole() { + final var webIdentityToken = randomUnicodeOfLength(10); + final var handler = new AwsStsHttpHandler((key, token) -> fail(), webIdentityToken); + final var response = handleRequest( + handler, + Map.of( + "Action", + "AssumeRoleWithWebIdentity", + "RoleSessionName", + randomValueOtherThan(AwsStsHttpHandler.ROLE_NAME, ESTestCase::randomIdentifier), + "RoleArn", + AwsStsHttpHandler.ROLE_ARN, + "WebIdentityToken", + webIdentityToken + ) + ); + assertEquals(RestStatus.UNAUTHORIZED, response.status()); + } + + public void testInvalidToken() { + final var webIdentityToken = randomUnicodeOfLength(10); + final var handler = new AwsStsHttpHandler((key, token) -> fail(), webIdentityToken); + final var response = handleRequest( + handler, + Map.of( + "Action", + "AssumeRoleWithWebIdentity", + "RoleSessionName", + AwsStsHttpHandler.ROLE_NAME, + "RoleArn", + AwsStsHttpHandler.ROLE_ARN, + "WebIdentityToken", + randomValueOtherThan(webIdentityToken, () -> randomUnicodeOfLength(10)) + ) + ); + assertEquals(RestStatus.UNAUTHORIZED, response.status()); + } + + public void testInvalidARN() { + final var webIdentityToken = randomUnicodeOfLength(10); + final var handler = new AwsStsHttpHandler((key, token) -> fail(), webIdentityToken); + final var response = handleRequest( + handler, + Map.of( + "Action", + "AssumeRoleWithWebIdentity", + "RoleSessionName", + AwsStsHttpHandler.ROLE_NAME, + "RoleArn", + randomValueOtherThan(AwsStsHttpHandler.ROLE_ARN, ESTestCase::randomIdentifier), + "WebIdentityToken", + webIdentityToken + ) + ); + assertEquals(RestStatus.UNAUTHORIZED, response.status()); + } + + private record TestHttpResponse(RestStatus status, BytesReference body) {} + + private static TestHttpResponse handleRequest(AwsStsHttpHandler handler, Map body) { + final var httpExchange = new TestHttpExchange( + "POST", + "/assume-role-with-web-identity/", + new BytesArray( + body.entrySet() + .stream() + .map(e -> e.getKey() + "=" + URLEncoder.encode(e.getValue(), StandardCharsets.UTF_8)) + .collect(Collectors.joining("&")) + ), + TestHttpExchange.EMPTY_HEADERS + ); + try { + handler.handle(httpExchange); + } catch (IOException e) { + fail(e); + } + assertNotEquals(0, httpExchange.getResponseCode()); + return new TestHttpResponse(RestStatus.fromCode(httpExchange.getResponseCode()), httpExchange.getResponseBodyContents()); + } + + private static class TestHttpExchange extends HttpExchange { + + private static final Headers EMPTY_HEADERS = new Headers(); + + private final String method; + private final URI uri; + private final BytesReference requestBody; + private final Headers requestHeaders; + + private final Headers responseHeaders = new Headers(); + private final BytesStreamOutput responseBody = new BytesStreamOutput(); + private int responseCode; + + TestHttpExchange(String method, String uri, BytesReference requestBody, Headers requestHeaders) { + this.method = method; + this.uri = URI.create(uri); + this.requestBody = requestBody; + this.requestHeaders = requestHeaders; + } + + @Override + public Headers getRequestHeaders() { + return requestHeaders; + } + + @Override + public Headers getResponseHeaders() { + return responseHeaders; + } + + @Override + public URI getRequestURI() { + return uri; + } + + @Override + public String getRequestMethod() { + return method; + } + + @Override + public HttpContext getHttpContext() { + return null; + } + + @Override + public void close() {} + + @Override + public InputStream getRequestBody() { + try { + return requestBody.streamInput(); + } catch (IOException e) { + throw new AssertionError(e); + } + } + + @Override + public OutputStream getResponseBody() { + return responseBody; + } + + @Override + public void sendResponseHeaders(int rCode, long responseLength) { + this.responseCode = rCode; + } + + @Override + public InetSocketAddress getRemoteAddress() { + return null; + } + + @Override + public int getResponseCode() { + return responseCode; + } + + public BytesReference getResponseBodyContents() { + return responseBody.bytes(); + } + + @Override + public InetSocketAddress getLocalAddress() { + return null; + } + + @Override + public String getProtocol() { + return "HTTP/1.1"; + } + + @Override + public Object getAttribute(String name) { + return null; + } + + @Override + public void setAttribute(String name, Object value) { + fail("setAttribute not implemented"); + } + + @Override + public void setStreams(InputStream i, OutputStream o) { + fail("setStreams not implemented"); + } + + @Override + public HttpPrincipal getPrincipal() { + fail("getPrincipal not implemented"); + throw new UnsupportedOperationException("getPrincipal not implemented"); + } + } + +} diff --git a/test/fixtures/ec2-imds-fixture/src/main/java/fixture/aws/imds/Ec2ImdsHttpFixture.java b/test/fixtures/ec2-imds-fixture/src/main/java/fixture/aws/imds/Ec2ImdsHttpFixture.java index 68f46d778018c..13d36c6fc4812 100644 --- a/test/fixtures/ec2-imds-fixture/src/main/java/fixture/aws/imds/Ec2ImdsHttpFixture.java +++ b/test/fixtures/ec2-imds-fixture/src/main/java/fixture/aws/imds/Ec2ImdsHttpFixture.java @@ -18,23 +18,22 @@ import java.net.UnknownHostException; import java.util.Objects; import java.util.Set; +import java.util.function.BiConsumer; public class Ec2ImdsHttpFixture extends ExternalResource { private HttpServer server; - private final String accessKey; - private final String sessionToken; + private final BiConsumer newCredentialsConsumer; private final Set alternativeCredentialsEndpoints; - public Ec2ImdsHttpFixture(String accessKey, String sessionToken, Set alternativeCredentialsEndpoints) { - this.accessKey = accessKey; - this.sessionToken = sessionToken; - this.alternativeCredentialsEndpoints = alternativeCredentialsEndpoints; + public Ec2ImdsHttpFixture(BiConsumer newCredentialsConsumer, Set alternativeCredentialsEndpoints) { + this.newCredentialsConsumer = Objects.requireNonNull(newCredentialsConsumer); + this.alternativeCredentialsEndpoints = Objects.requireNonNull(alternativeCredentialsEndpoints); } protected HttpHandler createHandler() { - return new Ec2ImdsHttpHandler(accessKey, sessionToken, alternativeCredentialsEndpoints); + return new Ec2ImdsHttpHandler(newCredentialsConsumer, alternativeCredentialsEndpoints); } public String getAddress() { diff --git a/test/fixtures/ec2-imds-fixture/src/main/java/fixture/aws/imds/Ec2ImdsHttpHandler.java b/test/fixtures/ec2-imds-fixture/src/main/java/fixture/aws/imds/Ec2ImdsHttpHandler.java index 04e5e83bddfa9..a92f1bdc5f9ae 100644 --- a/test/fixtures/ec2-imds-fixture/src/main/java/fixture/aws/imds/Ec2ImdsHttpHandler.java +++ b/test/fixtures/ec2-imds-fixture/src/main/java/fixture/aws/imds/Ec2ImdsHttpHandler.java @@ -25,6 +25,7 @@ import java.util.Collection; import java.util.Objects; import java.util.Set; +import java.util.function.BiConsumer; import static org.elasticsearch.test.ESTestCase.randomIdentifier; @@ -36,13 +37,11 @@ public class Ec2ImdsHttpHandler implements HttpHandler { private static final String IMDS_SECURITY_CREDENTIALS_PATH = "/latest/meta-data/iam/security-credentials/"; - private final String accessKey; - private final String sessionToken; + private final BiConsumer newCredentialsConsumer; private final Set validCredentialsEndpoints = ConcurrentCollections.newConcurrentSet(); - public Ec2ImdsHttpHandler(String accessKey, String sessionToken, Collection alternativeCredentialsEndpoints) { - this.accessKey = Objects.requireNonNull(accessKey); - this.sessionToken = Objects.requireNonNull(sessionToken); + public Ec2ImdsHttpHandler(BiConsumer newCredentialsConsumer, Collection alternativeCredentialsEndpoints) { + this.newCredentialsConsumer = Objects.requireNonNull(newCredentialsConsumer); this.validCredentialsEndpoints.addAll(alternativeCredentialsEndpoints); } @@ -70,6 +69,9 @@ public void handle(final HttpExchange exchange) throws IOException { exchange.getResponseBody().write(response); return; } else if (validCredentialsEndpoints.contains(path)) { + final String accessKey = randomIdentifier(); + final String sessionToken = randomIdentifier(); + newCredentialsConsumer.accept(accessKey, sessionToken); final byte[] response = Strings.format( """ { diff --git a/test/fixtures/ec2-imds-fixture/src/test/java/fixture/aws/imds/Ec2ImdsHttpHandlerTests.java b/test/fixtures/ec2-imds-fixture/src/test/java/fixture/aws/imds/Ec2ImdsHttpHandlerTests.java index 5d5cbfae3fa60..369b0ef449b2f 100644 --- a/test/fixtures/ec2-imds-fixture/src/test/java/fixture/aws/imds/Ec2ImdsHttpHandlerTests.java +++ b/test/fixtures/ec2-imds-fixture/src/test/java/fixture/aws/imds/Ec2ImdsHttpHandlerTests.java @@ -28,15 +28,18 @@ import java.io.OutputStream; import java.net.InetSocketAddress; import java.net.URI; +import java.util.HashMap; +import java.util.Map; import java.util.Set; +import static org.hamcrest.Matchers.aMapWithSize; + public class Ec2ImdsHttpHandlerTests extends ESTestCase { public void testImdsV1() throws IOException { - final var accessKey = randomIdentifier(); - final var sessionToken = randomIdentifier(); + final Map generatedCredentials = new HashMap<>(); - final var handler = new Ec2ImdsHttpHandler(accessKey, sessionToken, Set.of()); + final var handler = new Ec2ImdsHttpHandler(generatedCredentials::put, Set.of()); final var roleResponse = handleRequest(handler, "GET", "/latest/meta-data/iam/security-credentials/"); assertEquals(RestStatus.OK, roleResponse.status()); @@ -46,6 +49,10 @@ public void testImdsV1() throws IOException { final var credentialsResponse = handleRequest(handler, "GET", "/latest/meta-data/iam/security-credentials/" + profileName); assertEquals(RestStatus.OK, credentialsResponse.status()); + assertThat(generatedCredentials, aMapWithSize(1)); + final var accessKey = generatedCredentials.keySet().iterator().next(); + final var sessionToken = generatedCredentials.values().iterator().next(); + final var responseMap = XContentHelper.convertToMap(XContentType.JSON.xContent(), credentialsResponse.body().streamInput(), false); assertEquals(Set.of("AccessKeyId", "Expiration", "RoleArn", "SecretAccessKey", "Token"), responseMap.keySet()); assertEquals(accessKey, responseMap.get("AccessKeyId")); @@ -55,7 +62,7 @@ public void testImdsV1() throws IOException { public void testImdsV2Disabled() { assertEquals( RestStatus.METHOD_NOT_ALLOWED, - handleRequest(new Ec2ImdsHttpHandler(randomIdentifier(), randomIdentifier(), Set.of()), "PUT", "/latest/api/token").status() + handleRequest(new Ec2ImdsHttpHandler((accessKey, sessionToken) -> fail(), Set.of()), "PUT", "/latest/api/token").status() ); } diff --git a/test/fixtures/s3-fixture/src/main/java/fixture/s3/DynamicS3Credentials.java b/test/fixtures/s3-fixture/src/main/java/fixture/s3/DynamicS3Credentials.java new file mode 100644 index 0000000000000..4e8f267ad3543 --- /dev/null +++ b/test/fixtures/s3-fixture/src/main/java/fixture/s3/DynamicS3Credentials.java @@ -0,0 +1,39 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package fixture.s3; + +import org.elasticsearch.common.util.concurrent.ConcurrentCollections; + +import java.util.Map; +import java.util.Objects; +import java.util.Set; + +/** + * Allows dynamic creation of access-key/session-token credentials for accessing AWS services such as S3. Typically there's one service + * (e.g. IMDS or STS) which creates credentials dynamically and registers them here using {@link #addValidCredentials}, and then the + * {@link S3HttpFixture} uses {@link #isAuthorized} to validate the credentials it receives corresponds with some previously-generated + * credentials. + */ +public class DynamicS3Credentials { + private final Map> validCredentialsMap = ConcurrentCollections.newConcurrentMap(); + + public boolean isAuthorized(String authorizationHeader, String sessionTokenHeader) { + return authorizationHeader != null + && sessionTokenHeader != null + && validCredentialsMap.getOrDefault(sessionTokenHeader, Set.of()).stream().anyMatch(authorizationHeader::contains); + } + + public void addValidCredentials(String accessKey, String sessionToken) { + validCredentialsMap.computeIfAbsent( + Objects.requireNonNull(sessionToken, "sessionToken"), + t -> ConcurrentCollections.newConcurrentSet() + ).add(Objects.requireNonNull(accessKey, "accessKey")); + } +} diff --git a/test/fixtures/s3-fixture/src/main/java/fixture/s3/S3HttpFixture.java b/test/fixtures/s3-fixture/src/main/java/fixture/s3/S3HttpFixture.java index 421478a53e6bc..36f8fedcb3335 100644 --- a/test/fixtures/s3-fixture/src/main/java/fixture/s3/S3HttpFixture.java +++ b/test/fixtures/s3-fixture/src/main/java/fixture/s3/S3HttpFixture.java @@ -21,6 +21,8 @@ import java.net.InetSocketAddress; import java.net.UnknownHostException; import java.util.Objects; +import java.util.function.BiPredicate; +import java.util.function.Supplier; public class S3HttpFixture extends ExternalResource { @@ -29,21 +31,21 @@ public class S3HttpFixture extends ExternalResource { private final boolean enabled; private final String bucket; private final String basePath; - protected volatile String accessKey; + private final BiPredicate authorizationPredicate; public S3HttpFixture() { this(true); } public S3HttpFixture(boolean enabled) { - this(enabled, "bucket", "base_path_integration_tests", "s3_test_access_key"); + this(enabled, "bucket", "base_path_integration_tests", fixedAccessKey("s3_test_access_key")); } - public S3HttpFixture(boolean enabled, String bucket, String basePath, String accessKey) { + public S3HttpFixture(boolean enabled, String bucket, String basePath, BiPredicate authorizationPredicate) { this.enabled = enabled; this.bucket = bucket; this.basePath = basePath; - this.accessKey = accessKey; + this.authorizationPredicate = authorizationPredicate; } protected HttpHandler createHandler() { @@ -51,9 +53,11 @@ protected HttpHandler createHandler() { @Override public void handle(final HttpExchange exchange) throws IOException { try { - final String authorization = exchange.getRequestHeaders().getFirst("Authorization"); - if (authorization == null || authorization.contains(accessKey) == false) { - sendError(exchange, RestStatus.FORBIDDEN, "AccessDenied", "Bad access key"); + if (authorizationPredicate.test( + exchange.getRequestHeaders().getFirst("Authorization"), + exchange.getRequestHeaders().getFirst("x-amz-security-token") + ) == false) { + sendError(exchange, RestStatus.FORBIDDEN, "AccessDenied", "Access denied by " + authorizationPredicate); return; } super.handle(exchange); @@ -76,7 +80,7 @@ public void stop(int delay) { protected void before() throws Throwable { if (enabled) { - InetSocketAddress inetSocketAddress = resolveAddress("localhost", 0); + InetSocketAddress inetSocketAddress = resolveAddress(); this.server = HttpServer.create(inetSocketAddress, 0); HttpHandler handler = createHandler(); this.server.createContext("/", Objects.requireNonNull(handler)); @@ -91,15 +95,27 @@ protected void after() { } } - private static InetSocketAddress resolveAddress(String address, int port) { + private static InetSocketAddress resolveAddress() { try { - return new InetSocketAddress(InetAddress.getByName(address), port); + return new InetSocketAddress(InetAddress.getByName("localhost"), 0); } catch (UnknownHostException e) { throw new RuntimeException(e); } } - public void setAccessKey(String accessKey) { - this.accessKey = accessKey; + public static BiPredicate fixedAccessKey(String accessKey) { + return mutableAccessKey(() -> accessKey); + } + + public static BiPredicate mutableAccessKey(Supplier accessKeySupplier) { + return (authorizationHeader, sessionTokenHeader) -> authorizationHeader != null + && authorizationHeader.contains(accessKeySupplier.get()); + } + + public static BiPredicate fixedAccessKeyAndToken(String accessKey, String sessionToken) { + Objects.requireNonNull(sessionToken); + final var accessKeyPredicate = fixedAccessKey(accessKey); + return (authorizationHeader, sessionTokenHeader) -> accessKeyPredicate.test(authorizationHeader, sessionTokenHeader) + && sessionToken.equals(sessionTokenHeader); } } diff --git a/test/fixtures/s3-fixture/src/main/java/fixture/s3/S3HttpFixtureWithSessionToken.java b/test/fixtures/s3-fixture/src/main/java/fixture/s3/S3HttpFixtureWithSessionToken.java deleted file mode 100644 index 001cc34d9b20d..0000000000000 --- a/test/fixtures/s3-fixture/src/main/java/fixture/s3/S3HttpFixtureWithSessionToken.java +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the "Elastic License - * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side - * Public License v 1"; you may not use this file except in compliance with, at - * your election, the "Elastic License 2.0", the "GNU Affero General Public - * License v3.0 only", or the "Server Side Public License, v 1". - */ -package fixture.s3; - -import com.sun.net.httpserver.HttpHandler; - -import org.elasticsearch.rest.RestStatus; - -import static fixture.s3.S3HttpHandler.sendError; - -public class S3HttpFixtureWithSessionToken extends S3HttpFixture { - - protected final String sessionToken; - - public S3HttpFixtureWithSessionToken(String bucket, String basePath, String accessKey, String sessionToken) { - super(true, bucket, basePath, accessKey); - this.sessionToken = sessionToken; - } - - @Override - protected HttpHandler createHandler() { - final HttpHandler delegate = super.createHandler(); - return exchange -> { - final String securityToken = exchange.getRequestHeaders().getFirst("x-amz-security-token"); - if (securityToken == null) { - sendError(exchange, RestStatus.FORBIDDEN, "AccessDenied", "No session token"); - return; - } - if (securityToken.equals(sessionToken) == false) { - sendError(exchange, RestStatus.FORBIDDEN, "AccessDenied", "Bad session token"); - return; - } - delegate.handle(exchange); - }; - } -} diff --git a/x-pack/plugin/searchable-snapshots/qa/s3/src/javaRestTest/java/org/elasticsearch/xpack/searchablesnapshots/s3/S3SearchableSnapshotsCredentialsReloadIT.java b/x-pack/plugin/searchable-snapshots/qa/s3/src/javaRestTest/java/org/elasticsearch/xpack/searchablesnapshots/s3/S3SearchableSnapshotsCredentialsReloadIT.java index 3049fe830e728..989e5468c4fb3 100644 --- a/x-pack/plugin/searchable-snapshots/qa/s3/src/javaRestTest/java/org/elasticsearch/xpack/searchablesnapshots/s3/S3SearchableSnapshotsCredentialsReloadIT.java +++ b/x-pack/plugin/searchable-snapshots/qa/s3/src/javaRestTest/java/org/elasticsearch/xpack/searchablesnapshots/s3/S3SearchableSnapshotsCredentialsReloadIT.java @@ -44,7 +44,14 @@ public class S3SearchableSnapshotsCredentialsReloadIT extends ESRestTestCase { private static final String BUCKET = "S3SearchableSnapshotsCredentialsReloadIT-bucket"; private static final String BASE_PATH = "S3SearchableSnapshotsCredentialsReloadIT-base-path"; - public static final S3HttpFixture s3Fixture = new S3HttpFixture(true, BUCKET, BASE_PATH, "ignored"); + private static volatile String repositoryAccessKey; + + public static final S3HttpFixture s3Fixture = new S3HttpFixture( + true, + BUCKET, + BASE_PATH, + S3HttpFixture.mutableAccessKey(() -> repositoryAccessKey) + ); private static final MutableSettingsProvider keystoreSettings = new MutableSettingsProvider(); @@ -78,7 +85,7 @@ public void testReloadCredentialsFromKeystore() throws IOException { // Set up initial credentials final String accessKey1 = randomIdentifier(); - s3Fixture.setAccessKey(accessKey1); + repositoryAccessKey = accessKey1; keystoreSettings.put("s3.client.default.access_key", accessKey1); keystoreSettings.put("s3.client.default.secret_key", randomIdentifier()); cluster.updateStoredSecureSettings(); @@ -92,7 +99,7 @@ public void testReloadCredentialsFromKeystore() throws IOException { // Rotate credentials in blob store logger.info("--> rotate credentials"); final String accessKey2 = randomValueOtherThan(accessKey1, ESTestCase::randomIdentifier); - s3Fixture.setAccessKey(accessKey2); + repositoryAccessKey = accessKey2; // Ensure searchable snapshot now does not work due to invalid credentials logger.info("--> expect failure"); @@ -118,7 +125,7 @@ public void testReloadCredentialsFromAlternativeClient() throws IOException { final String accessKey2 = randomValueOtherThan(accessKey1, ESTestCase::randomIdentifier); final String alternativeClient = randomValueOtherThan("default", ESTestCase::randomIdentifier); - s3Fixture.setAccessKey(accessKey1); + repositoryAccessKey = accessKey1; keystoreSettings.put("s3.client.default.access_key", accessKey1); keystoreSettings.put("s3.client.default.secret_key", randomIdentifier()); keystoreSettings.put("s3.client." + alternativeClient + ".access_key", accessKey2); @@ -133,7 +140,7 @@ public void testReloadCredentialsFromAlternativeClient() throws IOException { // Rotate credentials in blob store logger.info("--> rotate credentials"); - s3Fixture.setAccessKey(accessKey2); + repositoryAccessKey = accessKey2; // Ensure searchable snapshot now does not work due to invalid credentials logger.info("--> expect failure"); @@ -157,7 +164,7 @@ public void testReloadCredentialsFromMetadata() throws IOException { final String accessKey2 = randomValueOtherThan(accessKey1, ESTestCase::randomIdentifier); testHarness.putRepository(b -> b.put("access_key", accessKey1).put("secret_key", randomIdentifier())); - s3Fixture.setAccessKey(accessKey1); + repositoryAccessKey = accessKey1; testHarness.createFrozenSearchableSnapshotIndex(); @@ -166,7 +173,7 @@ public void testReloadCredentialsFromMetadata() throws IOException { // Rotate credentials in blob store logger.info("--> rotate credentials"); - s3Fixture.setAccessKey(accessKey2); + repositoryAccessKey = accessKey2; // Ensure searchable snapshot now does not work due to invalid credentials logger.info("--> expect failure"); @@ -269,7 +276,7 @@ void ensureSearchFailure() throws IOException { assertThat( expectThrows(ResponseException.class, () -> client().performRequest(searchRequest)).getMessage(), allOf( - containsString("Bad access key"), + containsString("Access denied"), containsString("Status Code: 403"), containsString("Error Code: AccessDenied"), containsString("failed to read data from cache") From a860d3ab33cf12bba782924c3fd87c586fe887ad Mon Sep 17 00:00:00 2001 From: Liam Thompson <32779855+leemthompo@users.noreply.github.com> Date: Tue, 26 Nov 2024 10:48:35 +0100 Subject: [PATCH 004/139] [DOCS] Trivial: remove tech preview badge (#117461) --- docs/reference/intro.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/intro.asciidoc b/docs/reference/intro.asciidoc index 2908c55789bab..e0100b1c5640b 100644 --- a/docs/reference/intro.asciidoc +++ b/docs/reference/intro.asciidoc @@ -85,7 +85,7 @@ You can deploy {es} in various ways. **Hosted options** * {cloud}/ec-getting-started-trial.html[*Elastic Cloud Hosted*]: {es} is available as part of the hosted Elastic Stack offering, deployed in the cloud with your provider of choice. Sign up for a https://cloud.elastic.co/registration[14-day free trial]. -* {serverless-docs}/general/sign-up-trial[*Elastic Cloud Serverless* (technical preview)]: Create serverless projects for autoscaled and fully managed {es} deployments. Sign up for a https://cloud.elastic.co/serverless-registration[14-day free trial]. +* {serverless-docs}/general/sign-up-trial[*Elastic Cloud Serverless*]: Create serverless projects for autoscaled and fully managed {es} deployments. Sign up for a https://cloud.elastic.co/serverless-registration[14-day free trial]. **Advanced options** From 5b929d7f415094e1e58609e86ff977b46d71c016 Mon Sep 17 00:00:00 2001 From: Tim Grein Date: Tue, 26 Nov 2024 12:01:10 +0100 Subject: [PATCH 005/139] Small wording fix in ESIntegTestCase (#117341) --- .../src/main/java/org/elasticsearch/test/ESIntegTestCase.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/framework/src/main/java/org/elasticsearch/test/ESIntegTestCase.java b/test/framework/src/main/java/org/elasticsearch/test/ESIntegTestCase.java index d7c5c598ce978..af92eae8c8a19 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/ESIntegTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/test/ESIntegTestCase.java @@ -281,7 +281,7 @@ public abstract class ESIntegTestCase extends ESTestCase { /** * Annotation for third-party integration tests. *

- * These are tests the require a third-party service in order to run. They + * These are tests, which require a third-party service in order to run. They * may require the user to manually configure an external process (such as rabbitmq), * or may additionally require some external configuration (e.g. AWS credentials) * via the {@code tests.config} system property. From 5e028220c91af4a37d6a0abcc9d5b9359ba0eaf3 Mon Sep 17 00:00:00 2001 From: Jedr Blaszyk Date: Tue, 26 Nov 2024 12:06:52 +0100 Subject: [PATCH 006/139] [Docs] Update incremental sync note (#117545) --- docs/reference/connector/docs/connectors-content-syncs.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/connector/docs/connectors-content-syncs.asciidoc b/docs/reference/connector/docs/connectors-content-syncs.asciidoc index f1745382677a2..0a2eb54047170 100644 --- a/docs/reference/connector/docs/connectors-content-syncs.asciidoc +++ b/docs/reference/connector/docs/connectors-content-syncs.asciidoc @@ -52,7 +52,7 @@ However, a fast, accessible third-party data source that stores huge amounts of [NOTE] ==== -Incremental syncs for the SharePoint Online connector use specific logic. +Incremental syncs for <> and <> connectors use specific logic. All other connectors use the same shared connector framework logic for incremental syncs. ==== From 5a749a30d6bed5aaff8f057e6c14f53a75713acd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mariusz=20J=C3=B3zala?= <377355+jozala@users.noreply.github.com> Date: Tue, 26 Nov 2024 12:42:41 +0100 Subject: [PATCH 007/139] Changelog for default container image change to UBI (#117482) The image has been changed in #116739 --- docs/changelog/116739.yaml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 docs/changelog/116739.yaml diff --git a/docs/changelog/116739.yaml b/docs/changelog/116739.yaml new file mode 100644 index 0000000000000..ea3b1253a9008 --- /dev/null +++ b/docs/changelog/116739.yaml @@ -0,0 +1,5 @@ +pr: 116739 +summary: Change default Docker image to be based on UBI minimal instead of Ubuntu +area: Infra/Core +type: enhancement +issues: [] From d7797eed31237104a369b54b16d3dcf56fe56fbc Mon Sep 17 00:00:00 2001 From: Ievgen Degtiarenko Date: Tue, 26 Nov 2024 12:50:47 +0100 Subject: [PATCH 008/139] Add a way to log hot threads in plain text (#111053) This adds a way to log current threads in plain text to logs. This way we do not need to decode them and can search by stack trace in logs (for example to know if the issue is recurring). Please note, this produces a multi-line log entry. --- .../action/admin/HotThreadsIT.java | 23 +++++++ .../elasticsearch/monitor/jvm/HotThreads.java | 60 ++++++++++++++----- .../monitor/jvm/HotThreadsTests.java | 2 +- 3 files changed, 68 insertions(+), 17 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/action/admin/HotThreadsIT.java b/server/src/internalClusterTest/java/org/elasticsearch/action/admin/HotThreadsIT.java index 8c80cee58f46c..76a6717ab1d09 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/action/admin/HotThreadsIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/action/admin/HotThreadsIT.java @@ -22,6 +22,7 @@ import org.elasticsearch.core.TimeValue; import org.elasticsearch.monitor.jvm.HotThreads; import org.elasticsearch.test.ESIntegTestCase; +import org.elasticsearch.test.MockLog; import org.elasticsearch.test.junit.annotations.TestLogging; import org.hamcrest.Matcher; @@ -31,6 +32,7 @@ import static org.elasticsearch.index.query.QueryBuilders.boolQuery; import static org.elasticsearch.index.query.QueryBuilders.matchAllQuery; import static org.elasticsearch.index.query.QueryBuilders.termQuery; +import static org.elasticsearch.test.MockLog.assertThatLogger; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertResponse; import static org.hamcrest.CoreMatchers.equalTo; @@ -211,4 +213,25 @@ public void testLogLocalHotThreads() { ) ); } + + @TestLogging(reason = "testing logging at various levels", value = "org.elasticsearch.action.admin.HotThreadsIT:TRACE") + public void testLogLocalCurrentThreadsInPlainText() { + final var level = randomFrom(Level.TRACE, Level.DEBUG, Level.INFO, Level.WARN, Level.ERROR); + assertThatLogger( + () -> HotThreads.logLocalCurrentThreads(logger, level, getTestName()), + HotThreadsIT.class, + new MockLog.SeenEventExpectation( + "Should log hot threads header in plain text", + HotThreadsIT.class.getCanonicalName(), + level, + "testLogLocalCurrentThreadsInPlainText: Hot threads at" + ), + new MockLog.SeenEventExpectation( + "Should log hot threads cpu usage in plain text", + HotThreadsIT.class.getCanonicalName(), + level, + "cpu usage by thread" + ) + ); + } } diff --git a/server/src/main/java/org/elasticsearch/monitor/jvm/HotThreads.java b/server/src/main/java/org/elasticsearch/monitor/jvm/HotThreads.java index b14ef171ccd1d..8c903fdc634d3 100644 --- a/server/src/main/java/org/elasticsearch/monitor/jvm/HotThreads.java +++ b/server/src/main/java/org/elasticsearch/monitor/jvm/HotThreads.java @@ -28,6 +28,7 @@ import java.io.IOException; import java.io.OutputStreamWriter; +import java.io.StringWriter; import java.io.Writer; import java.lang.management.ManagementFactory; import java.lang.management.ThreadInfo; @@ -105,6 +106,33 @@ public static void logLocalHotThreads(Logger logger, Level level, String prefix, } } + /** + * Capture and log the current threads on the local node. Unlike hot threads this does not sample and captures current state only. + * Useful for capturing stack traces for unexpectedly-slow operations in production. The resulting message might be large, so it is + * split per thread and logged as multiple entries. + * + * @param logger The logger to use for the logging + * @param level The log level to use for the logging. + * @param prefix The prefix to emit on each chunk of the logging. + */ + public static void logLocalCurrentThreads(Logger logger, Level level, String prefix) { + if (logger.isEnabled(level) == false) { + return; + } + + try (var writer = new StringWriter()) { + new HotThreads().busiestThreads(500).threadElementsSnapshotCount(1).detect(writer, () -> { + logger.log(level, "{}: {}", prefix, writer.toString()); + writer.getBuffer().setLength(0); + }); + } catch (Exception e) { + logger.error( + () -> org.elasticsearch.common.Strings.format("failed to write local current threads with prefix [%s]", prefix), + e + ); + } + } + public enum ReportType { CPU("cpu"), @@ -192,11 +220,12 @@ public HotThreads sortOrder(SortOrder order) { } public void detect(Writer writer) throws Exception { + detect(writer, () -> {}); + } + + public void detect(Writer writer, Runnable onNextThread) throws Exception { synchronized (mutex) { - innerDetect(ManagementFactory.getThreadMXBean(), SunThreadInfo.INSTANCE, Thread.currentThread().getId(), (interval) -> { - Thread.sleep(interval); - return null; - }, writer); + innerDetect(ManagementFactory.getThreadMXBean(), SunThreadInfo.INSTANCE, Thread.currentThread().getId(), writer, onNextThread); } } @@ -245,13 +274,15 @@ Map getAllValidThreadInfos(ThreadMXBean threadBean, ThreadInfo[][] captureThreadStacks(ThreadMXBean threadBean, long[] threadIds) throws InterruptedException { ThreadInfo[][] result = new ThreadInfo[threadElementsSnapshotCount][]; - for (int j = 0; j < threadElementsSnapshotCount; j++) { - // NOTE, javadoc of getThreadInfo says: If a thread of the given ID is not alive or does not exist, - // null will be set in the corresponding element in the returned array. A thread is alive if it has - // been started and has not yet died. + + // NOTE, javadoc of getThreadInfo says: If a thread of the given ID is not alive or does not exist, + // null will be set in the corresponding element in the returned array. A thread is alive if it has + // been started and has not yet died. + for (int j = 0; j < threadElementsSnapshotCount - 1; j++) { result[j] = threadBean.getThreadInfo(threadIds, Integer.MAX_VALUE); Thread.sleep(threadElementsSnapshotDelay.millis()); } + result[threadElementsSnapshotCount - 1] = threadBean.getThreadInfo(threadIds, Integer.MAX_VALUE); return result; } @@ -267,13 +298,8 @@ private double getTimeSharePercentage(long time) { return (((double) time) / interval.nanos()) * 100; } - void innerDetect( - ThreadMXBean threadBean, - SunThreadInfo sunThreadInfo, - long currentThreadId, - SleepFunction threadSleep, - Writer writer - ) throws Exception { + void innerDetect(ThreadMXBean threadBean, SunThreadInfo sunThreadInfo, long currentThreadId, Writer writer, Runnable onNextThread) + throws Exception { if (threadBean.isThreadCpuTimeSupported() == false) { throw new ElasticsearchException("thread CPU time is not supported on this JDK"); } @@ -297,10 +323,11 @@ void innerDetect( .append(", ignoreIdleThreads=") .append(Boolean.toString(ignoreIdleThreads)) .append(":\n"); + onNextThread.run(); // Capture before and after thread state with timings Map previousThreadInfos = getAllValidThreadInfos(threadBean, sunThreadInfo, currentThreadId); - threadSleep.apply(interval.millis()); + Thread.sleep(interval.millis()); Map latestThreadInfos = getAllValidThreadInfos(threadBean, sunThreadInfo, currentThreadId); latestThreadInfos.forEach((threadId, accumulator) -> accumulator.subtractPrevious(previousThreadInfos.get(threadId))); @@ -430,6 +457,7 @@ void innerDetect( } } } + onNextThread.run(); } } diff --git a/server/src/test/java/org/elasticsearch/monitor/jvm/HotThreadsTests.java b/server/src/test/java/org/elasticsearch/monitor/jvm/HotThreadsTests.java index 93c40185f62ac..37eb69c0ca409 100644 --- a/server/src/test/java/org/elasticsearch/monitor/jvm/HotThreadsTests.java +++ b/server/src/test/java/org/elasticsearch/monitor/jvm/HotThreadsTests.java @@ -947,7 +947,7 @@ private static String innerDetect( long currentThreadId ) throws Exception { try (var writer = new StringWriter()) { - hotThreads.innerDetect(mockedMthreadMXBeanBean, sunThreadInfo, currentThreadId, (interval) -> null, writer); + hotThreads.innerDetect(mockedMthreadMXBeanBean, sunThreadInfo, currentThreadId, writer, () -> {}); return writer.toString(); } } From a245e709ba5a94ad7a476a84d43f0b04bd361fc4 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine <58790826+elasticsearchmachine@users.noreply.github.com> Date: Tue, 26 Nov 2024 23:02:11 +1100 Subject: [PATCH 009/139] Mute org.elasticsearch.xpack.esql.qa.mixed.FieldExtractorIT testConstantKeywordField #117531 --- muted-tests.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/muted-tests.yml b/muted-tests.yml index 37f36e9a19340..b3c34505e6561 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -240,6 +240,9 @@ tests: issue: https://github.com/elastic/elasticsearch/issues/117524 - class: org.elasticsearch.repositories.s3.RepositoryS3EcsClientYamlTestSuiteIT issue: https://github.com/elastic/elasticsearch/issues/117525 +- class: org.elasticsearch.xpack.esql.qa.mixed.FieldExtractorIT + method: testConstantKeywordField + issue: https://github.com/elastic/elasticsearch/issues/117531 # Examples: # From 5e16bc3fa615d76a5f188e0b722691da2981e633 Mon Sep 17 00:00:00 2001 From: Alexey Ivanov Date: Tue, 26 Nov 2024 12:49:33 +0000 Subject: [PATCH 010/139] [CI] FileSettingsServiceIT testErrorCanRecoverOnRestart failing (#116895) (#117511) Fixes flaky test FileSettingsServiceIT.testErrorCanRecoverOnRestart Fixes #116895 --- .../reservedstate/service/FileSettingsServiceIT.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/reservedstate/service/FileSettingsServiceIT.java b/server/src/internalClusterTest/java/org/elasticsearch/reservedstate/service/FileSettingsServiceIT.java index 90326abb381d0..85f0e2cf7e3ff 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/reservedstate/service/FileSettingsServiceIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/reservedstate/service/FileSettingsServiceIT.java @@ -398,7 +398,7 @@ public void testErrorCanRecoverOnRestart() throws Exception { FileSettingsService masterFileSettingsService = internalCluster().getInstance(FileSettingsService.class, masterNode); - assertTrue(masterFileSettingsService.watching()); + assertBusy(() -> assertTrue(masterFileSettingsService.watching())); assertFalse(dataFileSettingsService.watching()); writeJSONFile(masterNode, testErrorJSON, logger, versionCounter.incrementAndGet()); @@ -434,7 +434,7 @@ public void testNewErrorOnRestartReprocessing() throws Exception { FileSettingsService masterFileSettingsService = internalCluster().getInstance(FileSettingsService.class, masterNode); - assertTrue(masterFileSettingsService.watching()); + assertBusy(() -> assertTrue(masterFileSettingsService.watching())); assertFalse(dataFileSettingsService.watching()); writeJSONFile(masterNode, testErrorJSON, logger, versionCounter.incrementAndGet()); From 1495c550ad05af55acec47ca1445b5faeb86d4e8 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine <58790826+elasticsearchmachine@users.noreply.github.com> Date: Wed, 27 Nov 2024 00:54:46 +1100 Subject: [PATCH 011/139] Mute org.elasticsearch.backwards.MixedClusterClientYamlTestSuiteIT test {p0=synonyms/90_synonyms_reloading_for_synset/Reload analyzers for specific synonym set} #116777 --- muted-tests.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/muted-tests.yml b/muted-tests.yml index b3c34505e6561..49898308e411b 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -243,6 +243,9 @@ tests: - class: org.elasticsearch.xpack.esql.qa.mixed.FieldExtractorIT method: testConstantKeywordField issue: https://github.com/elastic/elasticsearch/issues/117531 +- class: org.elasticsearch.backwards.MixedClusterClientYamlTestSuiteIT + method: test {p0=synonyms/90_synonyms_reloading_for_synset/Reload analyzers for specific synonym set} + issue: https://github.com/elastic/elasticsearch/issues/116777 # Examples: # From 2bc1b4f6062c33a259b4aa0df9a7118bbfc4dc2e Mon Sep 17 00:00:00 2001 From: David Turner Date: Tue, 26 Nov 2024 13:58:54 +0000 Subject: [PATCH 012/139] Make `PutStoredScriptRequest` immutable (#117556) No need for this request to be mutable, we always know all the values at creation time. Also adjusts the `toString()` impl to use the `source` field, since this is the only spot that we use the `content` so with this change we can follow up with a 9.x-only change to remove it. --- .../script/mustache/SearchTemplateIT.java | 11 +-- .../elasticsearch/script/StoredScriptsIT.java | 26 ++----- .../storedscripts/PutStoredScriptRequest.java | 78 ++++++------------- .../PutStoredScriptRequestTests.java | 12 ++- .../StoredScriptIntegTestUtils.java | 22 ++++-- .../integration/DlsFlsRequestCacheTests.java | 17 +--- 6 files changed, 60 insertions(+), 106 deletions(-) diff --git a/modules/lang-mustache/src/internalClusterTest/java/org/elasticsearch/script/mustache/SearchTemplateIT.java b/modules/lang-mustache/src/internalClusterTest/java/org/elasticsearch/script/mustache/SearchTemplateIT.java index defd20b64762b..cc0b0122e9cce 100644 --- a/modules/lang-mustache/src/internalClusterTest/java/org/elasticsearch/script/mustache/SearchTemplateIT.java +++ b/modules/lang-mustache/src/internalClusterTest/java/org/elasticsearch/script/mustache/SearchTemplateIT.java @@ -13,12 +13,10 @@ import org.elasticsearch.action.admin.cluster.storedscripts.GetStoredScriptAction; import org.elasticsearch.action.admin.cluster.storedscripts.GetStoredScriptRequest; import org.elasticsearch.action.admin.cluster.storedscripts.GetStoredScriptResponse; -import org.elasticsearch.action.admin.cluster.storedscripts.PutStoredScriptRequest; import org.elasticsearch.action.admin.cluster.storedscripts.TransportDeleteStoredScriptAction; import org.elasticsearch.action.admin.cluster.storedscripts.TransportPutStoredScriptAction; import org.elasticsearch.action.bulk.BulkRequestBuilder; import org.elasticsearch.action.search.SearchRequest; -import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.script.ScriptType; @@ -39,6 +37,7 @@ import java.util.Map; import java.util.concurrent.ExecutionException; +import static org.elasticsearch.action.admin.cluster.storedscripts.StoredScriptIntegTestUtils.newPutStoredScriptTestRequest; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertResponse; import static org.elasticsearch.xcontent.XContentFactory.jsonBuilder; @@ -467,12 +466,6 @@ public static void assertHitCount(SearchTemplateRequestBuilder requestBuilder, l } private void putJsonStoredScript(String id, String jsonContent) { - assertAcked( - safeExecute( - TransportPutStoredScriptAction.TYPE, - new PutStoredScriptRequest(TEST_REQUEST_TIMEOUT, TEST_REQUEST_TIMEOUT).id(id) - .content(new BytesArray(jsonContent), XContentType.JSON) - ) - ); + assertAcked(safeExecute(TransportPutStoredScriptAction.TYPE, newPutStoredScriptTestRequest(id, jsonContent))); } } diff --git a/server/src/internalClusterTest/java/org/elasticsearch/script/StoredScriptsIT.java b/server/src/internalClusterTest/java/org/elasticsearch/script/StoredScriptsIT.java index e9efab5934e52..76ea5b99a2a6b 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/script/StoredScriptsIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/script/StoredScriptsIT.java @@ -11,16 +11,13 @@ import org.elasticsearch.action.admin.cluster.storedscripts.DeleteStoredScriptRequest; import org.elasticsearch.action.admin.cluster.storedscripts.GetStoredScriptAction; import org.elasticsearch.action.admin.cluster.storedscripts.GetStoredScriptRequest; -import org.elasticsearch.action.admin.cluster.storedscripts.PutStoredScriptRequest; import org.elasticsearch.action.admin.cluster.storedscripts.TransportDeleteStoredScriptAction; import org.elasticsearch.action.admin.cluster.storedscripts.TransportPutStoredScriptAction; import org.elasticsearch.action.support.master.AcknowledgedResponse; -import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.core.Strings; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.test.ESIntegTestCase; -import org.elasticsearch.xcontent.XContentType; import java.util.Arrays; import java.util.Collection; @@ -28,6 +25,7 @@ import java.util.Map; import java.util.function.Function; +import static org.elasticsearch.action.admin.cluster.storedscripts.StoredScriptIntegTestUtils.newPutStoredScriptTestRequest; import static org.elasticsearch.action.admin.cluster.storedscripts.StoredScriptIntegTestUtils.putJsonStoredScript; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; @@ -73,14 +71,9 @@ public void testBasics() { safeAwaitAndUnwrapFailure( IllegalArgumentException.class, AcknowledgedResponse.class, - l -> client().execute( - TransportPutStoredScriptAction.TYPE, - new PutStoredScriptRequest(TEST_REQUEST_TIMEOUT, TEST_REQUEST_TIMEOUT).id("id#") - .content(new BytesArray(Strings.format(""" - {"script": {"lang": "%s", "source": "1"} } - """, LANG)), XContentType.JSON), - l - ) + l -> client().execute(TransportPutStoredScriptAction.TYPE, newPutStoredScriptTestRequest("id#", Strings.format(""" + {"script": {"lang": "%s", "source": "1"} } + """, LANG)), l) ).getMessage() ); } @@ -91,14 +84,9 @@ public void testMaxScriptSize() { safeAwaitAndUnwrapFailure( IllegalArgumentException.class, AcknowledgedResponse.class, - l -> client().execute( - TransportPutStoredScriptAction.TYPE, - new PutStoredScriptRequest(TEST_REQUEST_TIMEOUT, TEST_REQUEST_TIMEOUT).id("foobar") - .content(new BytesArray(Strings.format(""" - {"script": { "lang": "%s", "source":"0123456789abcdef"} }\ - """, LANG)), XContentType.JSON), - l - ) + l -> client().execute(TransportPutStoredScriptAction.TYPE, newPutStoredScriptTestRequest("foobar", Strings.format(""" + {"script": { "lang": "%s", "source":"0123456789abcdef"} }\ + """, LANG)), l) ).getMessage() ); } diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/storedscripts/PutStoredScriptRequest.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/storedscripts/PutStoredScriptRequest.java index 35e46d3f2a4da..8e453cd5bac3a 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/storedscripts/PutStoredScriptRequest.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/storedscripts/PutStoredScriptRequest.java @@ -11,10 +11,12 @@ import org.elasticsearch.action.ActionRequestValidationException; import org.elasticsearch.action.support.master.AcknowledgedRequest; +import org.elasticsearch.common.Strings; import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.xcontent.XContentHelper; +import org.elasticsearch.core.Nullable; import org.elasticsearch.core.TimeValue; import org.elasticsearch.script.StoredScriptSource; import org.elasticsearch.xcontent.ToXContentFragment; @@ -28,11 +30,15 @@ public class PutStoredScriptRequest extends AcknowledgedRequest implements ToXContentFragment { - private String id; - private String context; - private BytesReference content; - private XContentType xContentType; - private StoredScriptSource source; + @Nullable + private final String id; + + @Nullable + private final String context; + + private final BytesReference content; + private final XContentType xContentType; + private final StoredScriptSource source; public PutStoredScriptRequest(StreamInput in) throws IOException { super(in); @@ -43,15 +49,11 @@ public PutStoredScriptRequest(StreamInput in) throws IOException { source = new StoredScriptSource(in); } - public PutStoredScriptRequest(TimeValue masterNodeTimeout, TimeValue ackTimeout) { - super(masterNodeTimeout, ackTimeout); - } - public PutStoredScriptRequest( TimeValue masterNodeTimeout, TimeValue ackTimeout, - String id, - String context, + @Nullable String id, + @Nullable String context, BytesReference content, XContentType xContentType, StoredScriptSource source @@ -59,9 +61,9 @@ public PutStoredScriptRequest( super(masterNodeTimeout, ackTimeout); this.id = id; this.context = context; - this.content = content; + this.content = Objects.requireNonNull(content); this.xContentType = Objects.requireNonNull(xContentType); - this.source = source; + this.source = Objects.requireNonNull(source); } @Override @@ -74,10 +76,6 @@ public ActionRequestValidationException validate() { validationException = addValidationError("id cannot contain '#' for stored script", validationException); } - if (content == null) { - validationException = addValidationError("must specify code for stored script", validationException); - } - return validationException; } @@ -85,20 +83,10 @@ public String id() { return id; } - public PutStoredScriptRequest id(String id) { - this.id = id; - return this; - } - public String context() { return context; } - public PutStoredScriptRequest context(String context) { - this.context = context; - return this; - } - public BytesReference content() { return content; } @@ -111,16 +99,6 @@ public StoredScriptSource source() { return source; } - /** - * Set the script source and the content type of the bytes. - */ - public PutStoredScriptRequest content(BytesReference content, XContentType xContentType) { - this.content = content; - this.xContentType = Objects.requireNonNull(xContentType); - this.source = StoredScriptSource.parse(content, xContentType); - return this; - } - @Override public void writeTo(StreamOutput out) throws IOException { super.writeTo(out); @@ -133,28 +111,16 @@ public void writeTo(StreamOutput out) throws IOException { @Override public String toString() { - String source = "_na_"; - - try { - source = XContentHelper.convertToJson(content, false, xContentType); - } catch (Exception e) { - // ignore - } - - return "put stored script {id [" - + id - + "]" - + (context != null ? ", context [" + context + "]" : "") - + ", content [" - + source - + "]}"; + return Strings.format( + "put stored script {id [%s]%s, content [%s]}", + id, + context != null ? ", context [" + context + "]" : "", + source + ); } @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { - builder.field("script"); - source.toXContent(builder, params); - - return builder; + return builder.field("script", source, params); } } diff --git a/server/src/test/java/org/elasticsearch/action/admin/cluster/storedscripts/PutStoredScriptRequestTests.java b/server/src/test/java/org/elasticsearch/action/admin/cluster/storedscripts/PutStoredScriptRequestTests.java index ffdd588764699..023e7693f8a47 100644 --- a/server/src/test/java/org/elasticsearch/action/admin/cluster/storedscripts/PutStoredScriptRequestTests.java +++ b/server/src/test/java/org/elasticsearch/action/admin/cluster/storedscripts/PutStoredScriptRequestTests.java @@ -57,9 +57,15 @@ public void testToXContent() throws IOException { BytesReference expectedRequestBody = BytesReference.bytes(builder); - PutStoredScriptRequest request = new PutStoredScriptRequest(TEST_REQUEST_TIMEOUT, TEST_REQUEST_TIMEOUT); - request.id("test1"); - request.content(expectedRequestBody, xContentType); + PutStoredScriptRequest request = new PutStoredScriptRequest( + TEST_REQUEST_TIMEOUT, + TEST_REQUEST_TIMEOUT, + "test1", + null, + expectedRequestBody, + xContentType, + StoredScriptSource.parse(expectedRequestBody, xContentType) + ); XContentBuilder requestBuilder = XContentBuilder.builder(xContentType.xContent()); requestBuilder.startObject(); diff --git a/test/framework/src/main/java/org/elasticsearch/action/admin/cluster/storedscripts/StoredScriptIntegTestUtils.java b/test/framework/src/main/java/org/elasticsearch/action/admin/cluster/storedscripts/StoredScriptIntegTestUtils.java index 5f979d75ec382..0a090af431dae 100644 --- a/test/framework/src/main/java/org/elasticsearch/action/admin/cluster/storedscripts/StoredScriptIntegTestUtils.java +++ b/test/framework/src/main/java/org/elasticsearch/action/admin/cluster/storedscripts/StoredScriptIntegTestUtils.java @@ -11,6 +11,7 @@ import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.script.StoredScriptSource; import org.elasticsearch.test.ESIntegTestCase; import org.elasticsearch.xcontent.XContentType; @@ -25,11 +26,22 @@ public static void putJsonStoredScript(String id, String jsonContent) { } public static void putJsonStoredScript(String id, BytesReference jsonContent) { - assertAcked( - ESIntegTestCase.safeExecute( - TransportPutStoredScriptAction.TYPE, - new PutStoredScriptRequest(TEST_REQUEST_TIMEOUT, TEST_REQUEST_TIMEOUT).id(id).content(jsonContent, XContentType.JSON) - ) + assertAcked(ESIntegTestCase.safeExecute(TransportPutStoredScriptAction.TYPE, newPutStoredScriptTestRequest(id, jsonContent))); + } + + public static PutStoredScriptRequest newPutStoredScriptTestRequest(String id, String jsonContent) { + return newPutStoredScriptTestRequest(id, new BytesArray(jsonContent)); + } + + public static PutStoredScriptRequest newPutStoredScriptTestRequest(String id, BytesReference jsonContent) { + return new PutStoredScriptRequest( + TEST_REQUEST_TIMEOUT, + TEST_REQUEST_TIMEOUT, + id, + null, + jsonContent, + XContentType.JSON, + StoredScriptSource.parse(jsonContent, XContentType.JSON) ); } } diff --git a/x-pack/plugin/security/src/internalClusterTest/java/org/elasticsearch/integration/DlsFlsRequestCacheTests.java b/x-pack/plugin/security/src/internalClusterTest/java/org/elasticsearch/integration/DlsFlsRequestCacheTests.java index a5f827c2a4b53..82a10f21debfb 100644 --- a/x-pack/plugin/security/src/internalClusterTest/java/org/elasticsearch/integration/DlsFlsRequestCacheTests.java +++ b/x-pack/plugin/security/src/internalClusterTest/java/org/elasticsearch/integration/DlsFlsRequestCacheTests.java @@ -8,13 +8,11 @@ package org.elasticsearch.integration; import org.elasticsearch.ElasticsearchSecurityException; -import org.elasticsearch.action.admin.cluster.storedscripts.PutStoredScriptRequest; import org.elasticsearch.action.admin.cluster.storedscripts.TransportPutStoredScriptAction; import org.elasticsearch.action.admin.indices.alias.Alias; import org.elasticsearch.action.search.SearchRequestBuilder; import org.elasticsearch.action.support.broadcast.BroadcastResponse; import org.elasticsearch.client.internal.Client; -import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.settings.SecureString; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.core.Strings; @@ -24,7 +22,6 @@ import org.elasticsearch.search.SearchHit; import org.elasticsearch.test.SecuritySingleNodeTestCase; import org.elasticsearch.test.hamcrest.ElasticsearchAssertions; -import org.elasticsearch.xcontent.XContentType; import org.elasticsearch.xpack.core.XPackSettings; import org.elasticsearch.xpack.core.security.action.apikey.CreateApiKeyAction; import org.elasticsearch.xpack.core.security.action.apikey.CreateApiKeyRequest; @@ -43,6 +40,7 @@ import java.util.concurrent.ExecutionException; import java.util.stream.Collectors; +import static org.elasticsearch.action.admin.cluster.storedscripts.StoredScriptIntegTestUtils.newPutStoredScriptTestRequest; import static org.elasticsearch.action.support.WriteRequest.RefreshPolicy.IMMEDIATE; import static org.elasticsearch.action.support.WriteRequest.RefreshPolicy.NONE; import static org.elasticsearch.action.support.WriteRequest.RefreshPolicy.WAIT_UNTIL; @@ -350,17 +348,8 @@ public void testRequestCacheWithTemplateRoleQuery() { private void prepareIndices() { final Client client = client(); - assertAcked( - safeExecute( - TransportPutStoredScriptAction.TYPE, - new PutStoredScriptRequest(TEST_REQUEST_TIMEOUT, TEST_REQUEST_TIMEOUT).id("my-script") - .content( - new BytesArray(""" - {"script":{"source":"{\\"match\\":{\\"username\\":\\"{{_user.username}}\\"}}","lang":"mustache"}}"""), - XContentType.JSON - ) - ) - ); + assertAcked(safeExecute(TransportPutStoredScriptAction.TYPE, newPutStoredScriptTestRequest("my-script", """ + {"script":{"source":"{\\"match\\":{\\"username\\":\\"{{_user.username}}\\"}}","lang":"mustache"}}"""))); assertAcked(indicesAdmin().prepareCreate(DLS_INDEX).addAlias(new Alias("dls-alias")).get()); client.prepareIndex(DLS_INDEX).setId("101").setSource("number", 101, "letter", "A").get(); From e9f899ee6913fe00dc8ef7a4254c76e8dca31b47 Mon Sep 17 00:00:00 2001 From: Pooya Salehi Date: Tue, 26 Nov 2024 16:44:15 +0100 Subject: [PATCH 013/139] Add current node weight as an APM metric (#117557) As discussed previously, the current node weight (calculated the same way that we calculate for the desired balance computations) might also be useful to have as a metric. The difference is that the current node weight is calculated based on the current cluster state rather than the internal state of the BalancedShardsAllocator (i.e. Balancer and ModelNode). To share all the weight calculation logic I had to move out the weight function and a few related utilities. NodeAllocationStatsProvider is still shared by both the AllocationStatsService and the desired balance metric collection. Relates ES-10080 --- .../DesiredBalanceReconcilerMetricsIT.java | 10 ++ .../elasticsearch/cluster/ClusterModule.java | 2 +- .../allocation/AllocationStatsService.java | 23 ++- .../NodeAllocationStatsProvider.java | 61 ++++++- .../allocator/BalancedShardsAllocator.java | 136 ++------------- .../allocation/allocator/DesiredBalance.java | 2 +- .../allocator/DesiredBalanceMetrics.java | 26 ++- .../allocator/DesiredBalanceReconciler.java | 11 +- .../allocation/allocator/WeightFunction.java | 157 ++++++++++++++++++ .../AllocationStatsServiceTests.java | 6 +- .../BalancedShardsAllocatorTests.java | 2 +- .../cluster/ESAllocationTestCase.java | 10 +- 12 files changed, 297 insertions(+), 149 deletions(-) create mode 100644 server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/WeightFunction.java diff --git a/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceReconcilerMetricsIT.java b/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceReconcilerMetricsIT.java index b3ec4a5331180..355427c4e059b 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceReconcilerMetricsIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceReconcilerMetricsIT.java @@ -117,6 +117,15 @@ public void testDesiredBalanceMetrics() { assertThat((String) nodeStat.attributes().get("node_id"), is(in(nodeIds))); assertThat((String) nodeStat.attributes().get("node_name"), is(in(nodeNames))); } + final var currentNodeWeightsMetrics = telemetryPlugin.getDoubleGaugeMeasurement( + DesiredBalanceMetrics.CURRENT_NODE_WEIGHT_METRIC_NAME + ); + assertThat(currentNodeWeightsMetrics.size(), equalTo(2)); + for (var nodeStat : currentNodeWeightsMetrics) { + assertTrue(nodeStat.isDouble()); + assertThat((String) nodeStat.attributes().get("node_id"), is(in(nodeIds))); + assertThat((String) nodeStat.attributes().get("node_name"), is(in(nodeNames))); + } final var currentNodeShardCountMetrics = telemetryPlugin.getLongGaugeMeasurement( DesiredBalanceMetrics.CURRENT_NODE_SHARD_COUNT_METRIC_NAME ); @@ -196,6 +205,7 @@ private static void assertMetricsAreBeingPublished(String nodeName, boolean shou testTelemetryPlugin.getLongGaugeMeasurement(DesiredBalanceMetrics.DESIRED_BALANCE_NODE_SHARD_COUNT_METRIC_NAME), matcher ); + assertThat(testTelemetryPlugin.getDoubleGaugeMeasurement(DesiredBalanceMetrics.CURRENT_NODE_WEIGHT_METRIC_NAME), matcher); assertThat(testTelemetryPlugin.getDoubleGaugeMeasurement(DesiredBalanceMetrics.CURRENT_NODE_WRITE_LOAD_METRIC_NAME), matcher); assertThat(testTelemetryPlugin.getLongGaugeMeasurement(DesiredBalanceMetrics.CURRENT_NODE_DISK_USAGE_METRIC_NAME), matcher); assertThat(testTelemetryPlugin.getLongGaugeMeasurement(DesiredBalanceMetrics.CURRENT_NODE_SHARD_COUNT_METRIC_NAME), matcher); diff --git a/server/src/main/java/org/elasticsearch/cluster/ClusterModule.java b/server/src/main/java/org/elasticsearch/cluster/ClusterModule.java index 046f4b6b0b251..c2da33f8f4135 100644 --- a/server/src/main/java/org/elasticsearch/cluster/ClusterModule.java +++ b/server/src/main/java/org/elasticsearch/cluster/ClusterModule.java @@ -139,7 +139,7 @@ public ClusterModule( this.clusterPlugins = clusterPlugins; this.deciderList = createAllocationDeciders(settings, clusterService.getClusterSettings(), clusterPlugins); this.allocationDeciders = new AllocationDeciders(deciderList); - var nodeAllocationStatsProvider = new NodeAllocationStatsProvider(writeLoadForecaster); + var nodeAllocationStatsProvider = new NodeAllocationStatsProvider(writeLoadForecaster, clusterService.getClusterSettings()); this.shardsAllocator = createShardsAllocator( settings, clusterService.getClusterSettings(), diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/AllocationStatsService.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/AllocationStatsService.java index 0c82faaaeaa45..b98e9050d2b4a 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/AllocationStatsService.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/AllocationStatsService.java @@ -17,6 +17,7 @@ import java.util.Map; import java.util.function.Supplier; +import java.util.stream.Collectors; public class AllocationStatsService { private final ClusterService clusterService; @@ -39,6 +40,26 @@ public AllocationStatsService( } public Map stats() { - return nodeAllocationStatsProvider.stats(clusterService.state(), clusterInfoService.getClusterInfo(), desiredBalanceSupplier.get()); + var state = clusterService.state(); + var stats = nodeAllocationStatsProvider.stats( + state.metadata(), + state.getRoutingNodes(), + clusterInfoService.getClusterInfo(), + desiredBalanceSupplier.get() + ); + return stats.entrySet() + .stream() + .collect( + Collectors.toMap( + Map.Entry::getKey, + e -> new NodeAllocationStats( + e.getValue().shards(), + e.getValue().undesiredShards(), + e.getValue().forecastedIngestLoad(), + e.getValue().forecastedDiskUsage(), + e.getValue().currentDiskUsage() + ) + ) + ); } } diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/NodeAllocationStatsProvider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/NodeAllocationStatsProvider.java index 157b409be14d3..8368f5916ef91 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/NodeAllocationStatsProvider.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/NodeAllocationStatsProvider.java @@ -10,11 +10,15 @@ package org.elasticsearch.cluster.routing.allocation; import org.elasticsearch.cluster.ClusterInfo; -import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.metadata.IndexMetadata; +import org.elasticsearch.cluster.metadata.Metadata; import org.elasticsearch.cluster.routing.RoutingNode; +import org.elasticsearch.cluster.routing.RoutingNodes; import org.elasticsearch.cluster.routing.ShardRouting; +import org.elasticsearch.cluster.routing.allocation.allocator.BalancedShardsAllocator; import org.elasticsearch.cluster.routing.allocation.allocator.DesiredBalance; +import org.elasticsearch.cluster.routing.allocation.allocator.WeightFunction; +import org.elasticsearch.common.settings.ClusterSettings; import org.elasticsearch.common.util.Maps; import org.elasticsearch.core.Nullable; @@ -23,17 +27,47 @@ public class NodeAllocationStatsProvider { private final WriteLoadForecaster writeLoadForecaster; - public NodeAllocationStatsProvider(WriteLoadForecaster writeLoadForecaster) { + private volatile float indexBalanceFactor; + private volatile float shardBalanceFactor; + private volatile float writeLoadBalanceFactor; + private volatile float diskUsageBalanceFactor; + + public record NodeAllocationAndClusterBalanceStats( + int shards, + int undesiredShards, + double forecastedIngestLoad, + long forecastedDiskUsage, + long currentDiskUsage, + float currentNodeWeight + ) {} + + public NodeAllocationStatsProvider(WriteLoadForecaster writeLoadForecaster, ClusterSettings clusterSettings) { this.writeLoadForecaster = writeLoadForecaster; + clusterSettings.initializeAndWatch(BalancedShardsAllocator.SHARD_BALANCE_FACTOR_SETTING, value -> this.shardBalanceFactor = value); + clusterSettings.initializeAndWatch(BalancedShardsAllocator.INDEX_BALANCE_FACTOR_SETTING, value -> this.indexBalanceFactor = value); + clusterSettings.initializeAndWatch( + BalancedShardsAllocator.WRITE_LOAD_BALANCE_FACTOR_SETTING, + value -> this.writeLoadBalanceFactor = value + ); + clusterSettings.initializeAndWatch( + BalancedShardsAllocator.DISK_USAGE_BALANCE_FACTOR_SETTING, + value -> this.diskUsageBalanceFactor = value + ); } - public Map stats( - ClusterState clusterState, + public Map stats( + Metadata metadata, + RoutingNodes routingNodes, ClusterInfo clusterInfo, @Nullable DesiredBalance desiredBalance ) { - var stats = Maps.newMapWithExpectedSize(clusterState.getRoutingNodes().size()); - for (RoutingNode node : clusterState.getRoutingNodes()) { + var weightFunction = new WeightFunction(shardBalanceFactor, indexBalanceFactor, writeLoadBalanceFactor, diskUsageBalanceFactor); + var avgShardsPerNode = WeightFunction.avgShardPerNode(metadata, routingNodes); + var avgWriteLoadPerNode = WeightFunction.avgWriteLoadPerNode(writeLoadForecaster, metadata, routingNodes); + var avgDiskUsageInBytesPerNode = WeightFunction.avgDiskUsageInBytesPerNode(clusterInfo, metadata, routingNodes); + + var stats = Maps.newMapWithExpectedSize(routingNodes.size()); + for (RoutingNode node : routingNodes) { int shards = 0; int undesiredShards = 0; double forecastedWriteLoad = 0.0; @@ -44,7 +78,7 @@ public Map stats( continue; } shards++; - IndexMetadata indexMetadata = clusterState.metadata().getIndexSafe(shardRouting.index()); + IndexMetadata indexMetadata = metadata.getIndexSafe(shardRouting.index()); if (isDesiredAllocation(desiredBalance, shardRouting) == false) { undesiredShards++; } @@ -54,14 +88,23 @@ public Map stats( currentDiskUsage += shardSize; } + float currentNodeWeight = weightFunction.nodeWeight( + shards, + avgShardsPerNode, + forecastedWriteLoad, + avgWriteLoadPerNode, + currentDiskUsage, + avgDiskUsageInBytesPerNode + ); stats.put( node.nodeId(), - new NodeAllocationStats( + new NodeAllocationAndClusterBalanceStats( shards, desiredBalance != null ? undesiredShards : -1, forecastedWriteLoad, forecastedDiskUsage, - currentDiskUsage + currentDiskUsage, + currentNodeWeight ) ); } diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java index 5b8fb0c7e9203..8dd1f14564ce9 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocator.java @@ -168,14 +168,17 @@ private void collectAndRecordNodeWeightStats(Balancer balancer, WeightFunction w Map nodeLevelWeights = new HashMap<>(); for (var entry : balancer.nodes.entrySet()) { var node = entry.getValue(); + var nodeWeight = weightFunction.nodeWeight( + node.numShards(), + balancer.avgShardsPerNode(), + node.writeLoad(), + balancer.avgWriteLoadPerNode(), + node.diskUsageInBytes(), + balancer.avgDiskUsageInBytesPerNode() + ); nodeLevelWeights.put( node.routingNode.node(), - new DesiredBalanceMetrics.NodeWeightStats( - node.numShards(), - node.diskUsageInBytes(), - node.writeLoad(), - weightFunction.nodeWeight(balancer, node) - ) + new DesiredBalanceMetrics.NodeWeightStats(node.numShards(), node.diskUsageInBytes(), node.writeLoad(), nodeWeight) ); } allocation.routingNodes().setBalanceWeightStatsPerNode(nodeLevelWeights); @@ -252,65 +255,6 @@ public float getShardBalance() { return shardBalanceFactor; } - /** - * This class is the primary weight function used to create balanced over nodes and shards in the cluster. - * Currently this function has 3 properties: - *

    - *
  • index balance - balance property over shards per index
  • - *
  • shard balance - balance property over shards per cluster
  • - *
- *

- * Each of these properties are expressed as factor such that the properties factor defines the relative - * importance of the property for the weight function. For example if the weight function should calculate - * the weights only based on a global (shard) balance the index balance can be set to {@code 0.0} and will - * in turn have no effect on the distribution. - *

- * The weight per index is calculated based on the following formula: - *
    - *
  • - * weightindex(node, index) = indexBalance * (node.numShards(index) - avgShardsPerNode(index)) - *
  • - *
  • - * weightnode(node, index) = shardBalance * (node.numShards() - avgShardsPerNode) - *
  • - *
- * weight(node, index) = weightindex(node, index) + weightnode(node, index) - */ - private static class WeightFunction { - - private final float theta0; - private final float theta1; - private final float theta2; - private final float theta3; - - WeightFunction(float shardBalance, float indexBalance, float writeLoadBalance, float diskUsageBalance) { - float sum = shardBalance + indexBalance + writeLoadBalance + diskUsageBalance; - if (sum <= 0.0f) { - throw new IllegalArgumentException("Balance factors must sum to a value > 0 but was: " + sum); - } - theta0 = shardBalance / sum; - theta1 = indexBalance / sum; - theta2 = writeLoadBalance / sum; - theta3 = diskUsageBalance / sum; - } - - float weight(Balancer balancer, ModelNode node, String index) { - final float weightIndex = node.numShards(index) - balancer.avgShardsPerNode(index); - return nodeWeight(balancer, node) + theta1 * weightIndex; - } - - float nodeWeight(Balancer balancer, ModelNode node) { - final float weightShard = node.numShards() - balancer.avgShardsPerNode(); - final float ingestLoad = (float) (node.writeLoad() - balancer.avgWriteLoadPerNode()); - final float diskUsage = (float) (node.diskUsageInBytes() - balancer.avgDiskUsageInBytesPerNode()); - return theta0 * weightShard + theta2 * ingestLoad + theta3 * diskUsage; - } - - float minWeightDelta(Balancer balancer, String index) { - return theta0 * 1 + theta1 * 1 + theta2 * balancer.getShardWriteLoad(index) + theta3 * balancer.maxShardSizeBytes(index); - } - } - /** * A {@link Balancer} */ @@ -335,63 +279,13 @@ private Balancer(WriteLoadForecaster writeLoadForecaster, RoutingAllocation allo this.metadata = allocation.metadata(); this.weight = weight; this.threshold = threshold; - avgShardsPerNode = ((float) metadata.getTotalNumberOfShards()) / routingNodes.size(); - avgWriteLoadPerNode = getTotalWriteLoad(writeLoadForecaster, metadata) / routingNodes.size(); - avgDiskUsageInBytesPerNode = ((double) getTotalDiskUsageInBytes(allocation.clusterInfo(), metadata) / routingNodes.size()); + avgShardsPerNode = WeightFunction.avgShardPerNode(metadata, routingNodes); + avgWriteLoadPerNode = WeightFunction.avgWriteLoadPerNode(writeLoadForecaster, metadata, routingNodes); + avgDiskUsageInBytesPerNode = WeightFunction.avgDiskUsageInBytesPerNode(allocation.clusterInfo(), metadata, routingNodes); nodes = Collections.unmodifiableMap(buildModelFromAssigned()); sorter = newNodeSorter(); } - private static double getTotalWriteLoad(WriteLoadForecaster writeLoadForecaster, Metadata metadata) { - double writeLoad = 0.0; - for (IndexMetadata indexMetadata : metadata.indices().values()) { - writeLoad += getIndexWriteLoad(writeLoadForecaster, indexMetadata); - } - return writeLoad; - } - - private static double getIndexWriteLoad(WriteLoadForecaster writeLoadForecaster, IndexMetadata indexMetadata) { - var shardWriteLoad = writeLoadForecaster.getForecastedWriteLoad(indexMetadata).orElse(0.0); - return shardWriteLoad * numberOfCopies(indexMetadata); - } - - private static long getTotalDiskUsageInBytes(ClusterInfo clusterInfo, Metadata metadata) { - long totalDiskUsageInBytes = 0; - for (IndexMetadata indexMetadata : metadata.indices().values()) { - totalDiskUsageInBytes += getIndexDiskUsageInBytes(clusterInfo, indexMetadata); - } - return totalDiskUsageInBytes; - } - - // Visible for testing - static long getIndexDiskUsageInBytes(ClusterInfo clusterInfo, IndexMetadata indexMetadata) { - if (indexMetadata.ignoreDiskWatermarks()) { - // disk watermarks are ignored for partial searchable snapshots - // and is equivalent to indexMetadata.isPartialSearchableSnapshot() - return 0; - } - final long forecastedShardSize = indexMetadata.getForecastedShardSizeInBytes().orElse(-1L); - long totalSizeInBytes = 0; - int shardCount = 0; - for (int shard = 0; shard < indexMetadata.getNumberOfShards(); shard++) { - final ShardId shardId = new ShardId(indexMetadata.getIndex(), shard); - final long primaryShardSize = Math.max(forecastedShardSize, clusterInfo.getShardSize(shardId, true, -1L)); - if (primaryShardSize != -1L) { - totalSizeInBytes += primaryShardSize; - shardCount++; - } - final long replicaShardSize = Math.max(forecastedShardSize, clusterInfo.getShardSize(shardId, false, -1L)); - if (replicaShardSize != -1L) { - totalSizeInBytes += replicaShardSize * indexMetadata.getNumberOfReplicas(); - shardCount += indexMetadata.getNumberOfReplicas(); - } - } - if (shardCount == numberOfCopies(indexMetadata)) { - return totalSizeInBytes; - } - return shardCount == 0 ? 0 : (totalSizeInBytes / shardCount) * numberOfCopies(indexMetadata); - } - private static long getShardDiskUsageInBytes(ShardRouting shardRouting, IndexMetadata indexMetadata, ClusterInfo clusterInfo) { if (indexMetadata.ignoreDiskWatermarks()) { // disk watermarks are ignored for partial searchable snapshots @@ -401,10 +295,6 @@ private static long getShardDiskUsageInBytes(ShardRouting shardRouting, IndexMet return Math.max(indexMetadata.getForecastedShardSizeInBytes().orElse(0L), clusterInfo.getShardSize(shardRouting, 0L)); } - private static int numberOfCopies(IndexMetadata indexMetadata) { - return indexMetadata.getNumberOfShards() * (1 + indexMetadata.getNumberOfReplicas()); - } - private float getShardWriteLoad(String index) { return (float) writeLoadForecaster.getForecastedWriteLoad(metadata.index(index)).orElse(0.0); } @@ -1433,7 +1323,7 @@ public float weight(ModelNode node) { } public float minWeightDelta() { - return function.minWeightDelta(balancer, index); + return function.minWeightDelta(balancer.getShardWriteLoad(index), balancer.maxShardSizeBytes(index)); } @Override diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalance.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalance.java index 9de95804b49b2..6ad44fdf3a9c0 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalance.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalance.java @@ -21,7 +21,7 @@ * * @param assignments a set of the (persistent) node IDs to which each {@link ShardId} should be allocated * @param weightsPerNode The node weights calculated based on - * {@link org.elasticsearch.cluster.routing.allocation.allocator.BalancedShardsAllocator.WeightFunction#nodeWeight} + * {@link org.elasticsearch.cluster.routing.allocation.allocator.WeightFunction#nodeWeight} */ public record DesiredBalance( long lastConvergedIndex, diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceMetrics.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceMetrics.java index cf8840dc95724..9f6487bdc8abd 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceMetrics.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceMetrics.java @@ -10,7 +10,7 @@ package org.elasticsearch.cluster.routing.allocation.allocator; import org.elasticsearch.cluster.node.DiscoveryNode; -import org.elasticsearch.cluster.routing.allocation.NodeAllocationStats; +import org.elasticsearch.cluster.routing.allocation.NodeAllocationStatsProvider.NodeAllocationAndClusterBalanceStats; import org.elasticsearch.telemetry.metric.DoubleWithAttributes; import org.elasticsearch.telemetry.metric.LongWithAttributes; import org.elasticsearch.telemetry.metric.MeterRegistry; @@ -41,6 +41,7 @@ public record NodeWeightStats(long shardCount, double diskUsageInBytes, double w public static final String DESIRED_BALANCE_NODE_DISK_USAGE_METRIC_NAME = "es.allocator.desired_balance.allocations.node_disk_usage_bytes.current"; + public static final String CURRENT_NODE_WEIGHT_METRIC_NAME = "es.allocator.allocations.node.weight.current"; public static final String CURRENT_NODE_SHARD_COUNT_METRIC_NAME = "es.allocator.allocations.node.shard_count.current"; public static final String CURRENT_NODE_WRITE_LOAD_METRIC_NAME = "es.allocator.allocations.node.write_load.current"; public static final String CURRENT_NODE_DISK_USAGE_METRIC_NAME = "es.allocator.allocations.node.disk_usage_bytes.current"; @@ -68,12 +69,13 @@ public record NodeWeightStats(long shardCount, double diskUsageInBytes, double w private volatile long undesiredAllocations; private final AtomicReference> weightStatsPerNodeRef = new AtomicReference<>(Map.of()); - private final AtomicReference> allocationStatsPerNodeRef = new AtomicReference<>(Map.of()); + private final AtomicReference> allocationStatsPerNodeRef = + new AtomicReference<>(Map.of()); public void updateMetrics( AllocationStats allocationStats, Map weightStatsPerNode, - Map nodeAllocationStats + Map nodeAllocationStats ) { assert allocationStats != null : "allocation stats cannot be null"; assert weightStatsPerNode != null : "node balance weight stats cannot be null"; @@ -124,6 +126,12 @@ public DesiredBalanceMetrics(MeterRegistry meterRegistry) { "bytes", this::getDesiredBalanceNodeDiskUsageMetrics ); + meterRegistry.registerDoublesGauge( + CURRENT_NODE_WEIGHT_METRIC_NAME, + "The weight of nodes based on the current allocation state", + "unit", + this::getCurrentNodeWeightMetrics + ); meterRegistry.registerLongsGauge( DESIRED_BALANCE_NODE_SHARD_COUNT_METRIC_NAME, "Shard count of nodes in the computed desired balance", @@ -291,6 +299,18 @@ private List getCurrentNodeUndesiredShardCountMetrics() { return values; } + private List getCurrentNodeWeightMetrics() { + if (nodeIsMaster == false) { + return List.of(); + } + var stats = allocationStatsPerNodeRef.get(); + List doubles = new ArrayList<>(stats.size()); + for (var node : stats.keySet()) { + doubles.add(new DoubleWithAttributes(stats.get(node).currentNodeWeight(), getNodeAttributes(node))); + } + return doubles; + } + private Map getNodeAttributes(DiscoveryNode node) { return Map.of("node_id", node.getId(), "node_name", node.getName()); } diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceReconciler.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceReconciler.java index 5ad29debc8f20..2ee905634f760 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceReconciler.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceReconciler.java @@ -20,8 +20,8 @@ import org.elasticsearch.cluster.routing.ShardRouting; import org.elasticsearch.cluster.routing.UnassignedInfo; import org.elasticsearch.cluster.routing.UnassignedInfo.AllocationStatus; -import org.elasticsearch.cluster.routing.allocation.NodeAllocationStats; import org.elasticsearch.cluster.routing.allocation.NodeAllocationStatsProvider; +import org.elasticsearch.cluster.routing.allocation.NodeAllocationStatsProvider.NodeAllocationAndClusterBalanceStats; import org.elasticsearch.cluster.routing.allocation.RoutingAllocation; import org.elasticsearch.cluster.routing.allocation.allocator.DesiredBalanceMetrics.AllocationStats; import org.elasticsearch.cluster.routing.allocation.decider.Decision; @@ -159,8 +159,13 @@ void run() { } private void updateDesireBalanceMetrics(AllocationStats allocationStats) { - var stats = nodeAllocationStatsProvider.stats(allocation.getClusterState(), allocation.clusterInfo(), desiredBalance); - Map nodeAllocationStats = new HashMap<>(stats.size()); + var stats = nodeAllocationStatsProvider.stats( + allocation.metadata(), + allocation.routingNodes(), + allocation.clusterInfo(), + desiredBalance + ); + Map nodeAllocationStats = new HashMap<>(stats.size()); for (var entry : stats.entrySet()) { var node = allocation.nodes().get(entry.getKey()); if (node != null) { diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/WeightFunction.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/WeightFunction.java new file mode 100644 index 0000000000000..7203a92b147f6 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/WeightFunction.java @@ -0,0 +1,157 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.cluster.routing.allocation.allocator; + +import org.elasticsearch.cluster.ClusterInfo; +import org.elasticsearch.cluster.metadata.IndexMetadata; +import org.elasticsearch.cluster.metadata.Metadata; +import org.elasticsearch.cluster.routing.RoutingNodes; +import org.elasticsearch.cluster.routing.allocation.WriteLoadForecaster; +import org.elasticsearch.index.shard.ShardId; + +/** + * This class is the primary weight function used to create balanced over nodes and shards in the cluster. + * Currently this function has 3 properties: + *
    + *
  • index balance - balance property over shards per index
  • + *
  • shard balance - balance property over shards per cluster
  • + *
+ *

+ * Each of these properties are expressed as factor such that the properties factor defines the relative + * importance of the property for the weight function. For example if the weight function should calculate + * the weights only based on a global (shard) balance the index balance can be set to {@code 0.0} and will + * in turn have no effect on the distribution. + *

+ * The weight per index is calculated based on the following formula: + *
    + *
  • + * weightindex(node, index) = indexBalance * (node.numShards(index) - avgShardsPerNode(index)) + *
  • + *
  • + * weightnode(node, index) = shardBalance * (node.numShards() - avgShardsPerNode) + *
  • + *
+ * weight(node, index) = weightindex(node, index) + weightnode(node, index) + */ +public class WeightFunction { + + private final float theta0; + private final float theta1; + private final float theta2; + private final float theta3; + + public WeightFunction(float shardBalance, float indexBalance, float writeLoadBalance, float diskUsageBalance) { + float sum = shardBalance + indexBalance + writeLoadBalance + diskUsageBalance; + if (sum <= 0.0f) { + throw new IllegalArgumentException("Balance factors must sum to a value > 0 but was: " + sum); + } + theta0 = shardBalance / sum; + theta1 = indexBalance / sum; + theta2 = writeLoadBalance / sum; + theta3 = diskUsageBalance / sum; + } + + float weight(BalancedShardsAllocator.Balancer balancer, BalancedShardsAllocator.ModelNode node, String index) { + final float weightIndex = node.numShards(index) - balancer.avgShardsPerNode(index); + final float nodeWeight = nodeWeight( + node.numShards(), + balancer.avgShardsPerNode(), + node.writeLoad(), + balancer.avgWriteLoadPerNode(), + node.diskUsageInBytes(), + balancer.avgDiskUsageInBytesPerNode() + ); + return nodeWeight + theta1 * weightIndex; + } + + public float nodeWeight( + int nodeNumShards, + float avgShardsPerNode, + double nodeWriteLoad, + double avgWriteLoadPerNode, + double diskUsageInBytes, + double avgDiskUsageInBytesPerNode + ) { + final float weightShard = nodeNumShards - avgShardsPerNode; + final float ingestLoad = (float) (nodeWriteLoad - avgWriteLoadPerNode); + final float diskUsage = (float) (diskUsageInBytes - avgDiskUsageInBytesPerNode); + return theta0 * weightShard + theta2 * ingestLoad + theta3 * diskUsage; + } + + float minWeightDelta(float shardWriteLoad, float shardSizeBytes) { + return theta0 * 1 + theta1 * 1 + theta2 * shardWriteLoad + theta3 * shardSizeBytes; + } + + public static float avgShardPerNode(Metadata metadata, RoutingNodes routingNodes) { + return ((float) metadata.getTotalNumberOfShards()) / routingNodes.size(); + } + + public static double avgWriteLoadPerNode(WriteLoadForecaster writeLoadForecaster, Metadata metadata, RoutingNodes routingNodes) { + return getTotalWriteLoad(writeLoadForecaster, metadata) / routingNodes.size(); + } + + public static double avgDiskUsageInBytesPerNode(ClusterInfo clusterInfo, Metadata metadata, RoutingNodes routingNodes) { + return ((double) getTotalDiskUsageInBytes(clusterInfo, metadata) / routingNodes.size()); + } + + private static double getTotalWriteLoad(WriteLoadForecaster writeLoadForecaster, Metadata metadata) { + double writeLoad = 0.0; + for (IndexMetadata indexMetadata : metadata.indices().values()) { + writeLoad += getIndexWriteLoad(writeLoadForecaster, indexMetadata); + } + return writeLoad; + } + + private static double getIndexWriteLoad(WriteLoadForecaster writeLoadForecaster, IndexMetadata indexMetadata) { + var shardWriteLoad = writeLoadForecaster.getForecastedWriteLoad(indexMetadata).orElse(0.0); + return shardWriteLoad * numberOfCopies(indexMetadata); + } + + private static int numberOfCopies(IndexMetadata indexMetadata) { + return indexMetadata.getNumberOfShards() * (1 + indexMetadata.getNumberOfReplicas()); + } + + private static long getTotalDiskUsageInBytes(ClusterInfo clusterInfo, Metadata metadata) { + long totalDiskUsageInBytes = 0; + for (IndexMetadata indexMetadata : metadata.indices().values()) { + totalDiskUsageInBytes += getIndexDiskUsageInBytes(clusterInfo, indexMetadata); + } + return totalDiskUsageInBytes; + } + + // Visible for testing + static long getIndexDiskUsageInBytes(ClusterInfo clusterInfo, IndexMetadata indexMetadata) { + if (indexMetadata.ignoreDiskWatermarks()) { + // disk watermarks are ignored for partial searchable snapshots + // and is equivalent to indexMetadata.isPartialSearchableSnapshot() + return 0; + } + final long forecastedShardSize = indexMetadata.getForecastedShardSizeInBytes().orElse(-1L); + long totalSizeInBytes = 0; + int shardCount = 0; + for (int shard = 0; shard < indexMetadata.getNumberOfShards(); shard++) { + final ShardId shardId = new ShardId(indexMetadata.getIndex(), shard); + final long primaryShardSize = Math.max(forecastedShardSize, clusterInfo.getShardSize(shardId, true, -1L)); + if (primaryShardSize != -1L) { + totalSizeInBytes += primaryShardSize; + shardCount++; + } + final long replicaShardSize = Math.max(forecastedShardSize, clusterInfo.getShardSize(shardId, false, -1L)); + if (replicaShardSize != -1L) { + totalSizeInBytes += replicaShardSize * indexMetadata.getNumberOfReplicas(); + shardCount += indexMetadata.getNumberOfReplicas(); + } + } + if (shardCount == numberOfCopies(indexMetadata)) { + return totalSizeInBytes; + } + return shardCount == 0 ? 0 : (totalSizeInBytes / shardCount) * numberOfCopies(indexMetadata); + } +} diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/AllocationStatsServiceTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/AllocationStatsServiceTests.java index 0efa576a0cddc..35f1780464659 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/AllocationStatsServiceTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/AllocationStatsServiceTests.java @@ -84,7 +84,7 @@ public void testShardStats() { clusterService, () -> clusterInfo, createShardAllocator(), - new NodeAllocationStatsProvider(TEST_WRITE_LOAD_FORECASTER) + new NodeAllocationStatsProvider(TEST_WRITE_LOAD_FORECASTER, ClusterSettings.createBuiltInClusterSettings()) ); assertThat( service.stats(), @@ -125,7 +125,7 @@ public void testRelocatingShardIsOnlyCountedOnceOnTargetNode() { clusterService, EmptyClusterInfoService.INSTANCE, createShardAllocator(), - new NodeAllocationStatsProvider(TEST_WRITE_LOAD_FORECASTER) + new NodeAllocationStatsProvider(TEST_WRITE_LOAD_FORECASTER, ClusterSettings.createBuiltInClusterSettings()) ); assertThat( service.stats(), @@ -182,7 +182,7 @@ public DesiredBalance getDesiredBalance() { ); } }, - new NodeAllocationStatsProvider(TEST_WRITE_LOAD_FORECASTER) + new NodeAllocationStatsProvider(TEST_WRITE_LOAD_FORECASTER, ClusterSettings.createBuiltInClusterSettings()) ); assertThat( service.stats(), diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocatorTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocatorTests.java index 98c3451329f52..412329e51a485 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocatorTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/BalancedShardsAllocatorTests.java @@ -59,8 +59,8 @@ import static java.util.stream.Collectors.toSet; import static org.elasticsearch.cluster.routing.ShardRoutingState.RELOCATING; import static org.elasticsearch.cluster.routing.TestShardRouting.shardRoutingBuilder; -import static org.elasticsearch.cluster.routing.allocation.allocator.BalancedShardsAllocator.Balancer.getIndexDiskUsageInBytes; import static org.elasticsearch.cluster.routing.allocation.allocator.BalancedShardsAllocator.DISK_USAGE_BALANCE_FACTOR_SETTING; +import static org.elasticsearch.cluster.routing.allocation.allocator.WeightFunction.getIndexDiskUsageInBytes; import static org.elasticsearch.cluster.routing.allocation.decider.DiskThresholdDecider.SETTING_IGNORE_DISK_WATERMARKS; import static org.hamcrest.Matchers.containsInAnyOrder; import static org.hamcrest.Matchers.equalTo; diff --git a/test/framework/src/main/java/org/elasticsearch/cluster/ESAllocationTestCase.java b/test/framework/src/main/java/org/elasticsearch/cluster/ESAllocationTestCase.java index a041efc9ad3f1..75cd6da44724d 100644 --- a/test/framework/src/main/java/org/elasticsearch/cluster/ESAllocationTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/cluster/ESAllocationTestCase.java @@ -19,12 +19,12 @@ import org.elasticsearch.cluster.node.DiscoveryNodeUtils; import org.elasticsearch.cluster.routing.RecoverySource; import org.elasticsearch.cluster.routing.RoutingNode; +import org.elasticsearch.cluster.routing.RoutingNodes; import org.elasticsearch.cluster.routing.RoutingNodesHelper; import org.elasticsearch.cluster.routing.ShardRouting; import org.elasticsearch.cluster.routing.UnassignedInfo; import org.elasticsearch.cluster.routing.allocation.AllocationService; import org.elasticsearch.cluster.routing.allocation.FailedShard; -import org.elasticsearch.cluster.routing.allocation.NodeAllocationStats; import org.elasticsearch.cluster.routing.allocation.NodeAllocationStatsProvider; import org.elasticsearch.cluster.routing.allocation.RoutingAllocation; import org.elasticsearch.cluster.routing.allocation.WriteLoadForecaster; @@ -438,11 +438,13 @@ public void allocateUnassigned( } protected static final NodeAllocationStatsProvider EMPTY_NODE_ALLOCATION_STATS = new NodeAllocationStatsProvider( - WriteLoadForecaster.DEFAULT + WriteLoadForecaster.DEFAULT, + createBuiltInClusterSettings() ) { @Override - public Map stats( - ClusterState clusterState, + public Map stats( + Metadata metadata, + RoutingNodes routingNodes, ClusterInfo clusterInfo, @Nullable DesiredBalance desiredBalance ) { From bfe1aad78044d7adc864ad647e88462f8cdce150 Mon Sep 17 00:00:00 2001 From: Ignacio Vera Date: Tue, 26 Nov 2024 16:47:25 +0100 Subject: [PATCH 014/139] Cleanup BucketsAggregator#rewriteBuckets (#114574) The array is initialized with the flag clearOnResize set to true so we don't need to set the values to 0 again. --- .../search/aggregations/bucket/BucketsAggregator.java | 1 - 1 file changed, 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java index ea667b821a7dd..665dd49e3381d 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java @@ -105,7 +105,6 @@ public final void rewriteBuckets(long newNumBuckets, LongUnaryOperator mergeMap) try { docCounts = bigArrays().newLongArray(newNumBuckets, true); success = true; - docCounts.fill(0, newNumBuckets, 0); for (long i = 0; i < oldDocCounts.size(); i++) { long docCount = oldDocCounts.get(i); From 505c54eb94c71b694d44b8cf424be7ab5894e2e5 Mon Sep 17 00:00:00 2001 From: Nikolaj Volgushev Date: Tue, 26 Nov 2024 16:59:54 +0100 Subject: [PATCH 015/139] Use feature flags in OperatorPrivilegesIT (#117491) Release runs fail for this suite because some of the actions listed are still behind a feature flag. Closes: https://github.com/elastic/elasticsearch/issues/102992 --- muted-tests.yml | 3 --- .../elasticsearch/xpack/security/operator/Constants.java | 8 +++++--- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/muted-tests.yml b/muted-tests.yml index 49898308e411b..1f092de410f8e 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -226,9 +226,6 @@ tests: - class: org.elasticsearch.xpack.inference.InferenceRestIT method: test {p0=inference/30_semantic_text_inference/Calculates embeddings using the default ELSER 2 endpoint} issue: https://github.com/elastic/elasticsearch/issues/117349 -- class: org.elasticsearch.xpack.security.operator.OperatorPrivilegesIT - method: testEveryActionIsEitherOperatorOnlyOrNonOperator - issue: https://github.com/elastic/elasticsearch/issues/102992 - class: org.elasticsearch.xpack.test.rest.XPackRestIT method: test {p0=transform/transforms_reset/Test reset running transform} issue: https://github.com/elastic/elasticsearch/issues/117473 diff --git a/x-pack/plugin/security/qa/operator-privileges-tests/src/javaRestTest/java/org/elasticsearch/xpack/security/operator/Constants.java b/x-pack/plugin/security/qa/operator-privileges-tests/src/javaRestTest/java/org/elasticsearch/xpack/security/operator/Constants.java index bfff63442281d..8df10037affdb 100644 --- a/x-pack/plugin/security/qa/operator-privileges-tests/src/javaRestTest/java/org/elasticsearch/xpack/security/operator/Constants.java +++ b/x-pack/plugin/security/qa/operator-privileges-tests/src/javaRestTest/java/org/elasticsearch/xpack/security/operator/Constants.java @@ -7,6 +7,8 @@ package org.elasticsearch.xpack.security.operator; +import org.elasticsearch.cluster.metadata.DataStream; + import java.util.Objects; import java.util.Set; import java.util.stream.Collectors; @@ -508,9 +510,9 @@ public class Constants { "indices:admin/data_stream/lifecycle/get", "indices:admin/data_stream/lifecycle/put", "indices:admin/data_stream/lifecycle/explain", - "indices:admin/data_stream/options/delete", - "indices:admin/data_stream/options/get", - "indices:admin/data_stream/options/put", + DataStream.isFailureStoreFeatureFlagEnabled() ? "indices:admin/data_stream/options/delete" : null, + DataStream.isFailureStoreFeatureFlagEnabled() ? "indices:admin/data_stream/options/get" : null, + DataStream.isFailureStoreFeatureFlagEnabled() ? "indices:admin/data_stream/options/put" : null, "indices:admin/delete", "indices:admin/flush", "indices:admin/flush[s]", From f57c43cdf5ce8188cc66042b1a8adee420e91825 Mon Sep 17 00:00:00 2001 From: Oleksandr Kolomiiets Date: Tue, 26 Nov 2024 08:09:30 -0800 Subject: [PATCH 016/139] Include a link to downsampling a TSDS using DSL document (#117510) --- docs/reference/data-streams/tsds.asciidoc | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/reference/data-streams/tsds.asciidoc b/docs/reference/data-streams/tsds.asciidoc index 461c0a1272e96..d0d6d4a455c63 100644 --- a/docs/reference/data-streams/tsds.asciidoc +++ b/docs/reference/data-streams/tsds.asciidoc @@ -339,4 +339,5 @@ include::tsds-index-settings.asciidoc[] include::downsampling.asciidoc[] include::downsampling-ilm.asciidoc[] include::downsampling-manual.asciidoc[] +include::downsampling-dsl.asciidoc[] include::tsds-reindex.asciidoc[] From b22d185b7fca8147ec1cfcd993d7c803ce5a240e Mon Sep 17 00:00:00 2001 From: Luigi Dell'Aquila Date: Tue, 26 Nov 2024 17:46:40 +0100 Subject: [PATCH 017/139] ES|QL: fix stats by constant expresson with alias (#117551) --- docs/changelog/117551.yaml | 5 + .../src/main/resources/stats.csv-spec | 12 ++ .../xpack/esql/action/EsqlCapabilities.java | 7 +- .../xpack/esql/session/EsqlSession.java | 2 +- .../session/IndexResolverFieldNamesTests.java | 108 ++++++++++++++++++ 5 files changed, 132 insertions(+), 2 deletions(-) create mode 100644 docs/changelog/117551.yaml diff --git a/docs/changelog/117551.yaml b/docs/changelog/117551.yaml new file mode 100644 index 0000000000000..081dd9203d82a --- /dev/null +++ b/docs/changelog/117551.yaml @@ -0,0 +1,5 @@ +pr: 117551 +summary: Fix stats by constant expresson with alias +area: ES|QL +type: bug +issues: [] diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec index 5562028a5935f..f95506ff1982f 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec @@ -2778,6 +2778,18 @@ m:integer | y+1:integer 11 | 12 ; +statsByConstantExpressionWithAliasAndSort +required_capability: fix_stats_by_foldable_expression_2 +FROM employees +| EVAL y = "a" +| STATS count = COUNT() BY x = y +| SORT x +; + +count:long | x:keyword +100 | a +; + filterIsAlwaysTrue required_capability: per_agg_filtering FROM employees diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index 08fa7f0a9b213..3eaeceaa86564 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -531,7 +531,12 @@ public enum Cap { /** * support for aggregations on semantic_text */ - SEMANTIC_TEXT_AGGREGATIONS(EsqlCorePlugin.SEMANTIC_TEXT_FEATURE_FLAG); + SEMANTIC_TEXT_AGGREGATIONS(EsqlCorePlugin.SEMANTIC_TEXT_FEATURE_FLAG), + + /** + * Fix for https://github.com/elastic/elasticsearch/issues/114714, again + */ + FIX_STATS_BY_FOLDABLE_EXPRESSION_2,; private final boolean enabled; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java index 25bb6d80d0dd0..8f65914d1c30d 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java @@ -511,7 +511,7 @@ static Set fieldNames(LogicalPlan parsed, Set enrichPolicyMatchF // remove any already discovered UnresolvedAttributes that are in fact aliases defined later down in the tree // for example "from test | eval x = salary | stats max = max(x) by gender" // remove the UnresolvedAttribute "x", since that is an Alias defined in "eval" - AttributeSet planRefs = Expressions.references(p.expressions()); + AttributeSet planRefs = p.references(); p.forEachExpressionDown(Alias.class, alias -> { // do not remove the UnresolvedAttribute that has the same name as its alias, ie "rename id = id" // or the UnresolvedAttributes that are used in Functions that have aliases "STATS id = MAX(id)" diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java index 5425f770c49e8..0fe89b24dfc6a 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/IndexResolverFieldNamesTests.java @@ -353,6 +353,114 @@ public void testDocsStats() { | SORT languages""", Set.of("emp_no", "emp_no.*", "languages", "languages.*")); } + public void testEvalStats() { + + assertFieldNames(""" + FROM employees + | EVAL y = "a" + | STATS count = COUNT(*) BY y""", Set.of("_index")); + + assertFieldNames(""" + FROM employees + | EVAL y = "a" + | STATS count = COUNT(*) BY y + | SORT y""", Set.of("_index")); + + assertFieldNames(""" + FROM employees + | EVAL y = "a" + | STATS count = COUNT(*) BY x = y + | SORT x""", Set.of("_index")); + + assertFieldNames(""" + FROM employees + | STATS count = COUNT(*) BY first_name + | SORT first_name""", Set.of("first_name", "first_name.*")); + + assertFieldNames(""" + FROM employees + | EVAL y = "a" + | STATS count = COUNT(*) BY x = y + | SORT x, first_name""", Set.of("first_name", "first_name.*")); + + assertFieldNames(""" + FROM employees + | EVAL first_name = "a" + | STATS count = COUNT(*) BY first_name + | SORT first_name""", Set.of("_index")); + + assertFieldNames(""" + FROM employees + | EVAL y = "a" + | STATS count = COUNT(*) BY first_name = to_upper(y) + | SORT first_name""", Set.of("_index")); + + assertFieldNames(""" + FROM employees + | EVAL y = to_upper(first_name), z = "z" + | STATS count = COUNT(*) BY first_name = to_lower(y), z + | SORT first_name""", Set.of("first_name", "first_name.*")); + + assertFieldNames(""" + FROM employees + | EVAL y = "a" + | STATS count = COUNT(*) BY x = y, z = first_name + | SORT x, z""", Set.of("first_name", "first_name.*")); + + assertFieldNames(""" + FROM employees + | EVAL y = "a" + | STATS count = COUNT(*) BY x = y, first_name + | SORT x, first_name""", Set.of("first_name", "first_name.*")); + + assertFieldNames(""" + FROM employees + | EVAL y = "a" + | STATS count = COUNT(first_name) BY x = y + | SORT x + | DROP first_name""", Set.of("first_name", "first_name.*")); + + assertFieldNames(""" + FROM employees + | EVAL y = "a" + | STATS count = COUNT(*) BY x = y + | MV_EXPAND x""", Set.of("_index")); + + assertFieldNames(""" + FROM employees + | EVAL y = "a" + | STATS count = COUNT(*) BY first_name, y + | MV_EXPAND first_name""", Set.of("first_name", "first_name.*")); + + assertFieldNames(""" + FROM employees + | MV_EXPAND first_name + | EVAL y = "a" + | STATS count = COUNT(*) BY first_name, y + | SORT y""", Set.of("first_name", "first_name.*")); + + assertFieldNames(""" + FROM employees + | EVAL y = "a" + | MV_EXPAND y + | STATS count = COUNT(*) BY x = y + | SORT x""", Set.of("_index")); + + assertFieldNames(""" + FROM employees + | EVAL y = "a" + | STATS count = COUNT(*) BY x = y + | STATS count = COUNT(count) by x + | SORT x""", Set.of("_index")); + + assertFieldNames(""" + FROM employees + | EVAL y = "a" + | STATS count = COUNT(*) BY first_name, y + | STATS count = COUNT(count) by x = y + | SORT x""", Set.of("first_name", "first_name.*")); + } + public void testSortWithLimitOne_DropHeight() { assertFieldNames("from employees | sort languages | limit 1 | drop height*", ALL_FIELDS); } From 1866299fa46e387238d28fe4e0d26c713926d47e Mon Sep 17 00:00:00 2001 From: Mikhail Berezovskiy Date: Tue, 26 Nov 2024 12:23:19 -0500 Subject: [PATCH 018/139] Remove HTTP content copies (#117303) --- .../forbidden/es-server-signatures.txt | 2 - docs/changelog/117303.yaml | 5 +++ .../netty4/Netty4TrashingAllocatorIT.java | 2 +- .../system/indices/SystemIndicesQA.java | 7 +-- .../elasticsearch/action/ActionListener.java | 8 ++++ .../common/bytes/BytesReference.java | 23 ---------- .../org/elasticsearch/http/HttpTracer.java | 2 +- .../org/elasticsearch/rest/RestRequest.java | 43 +++++-------------- .../elasticsearch/rest/RestRequestFilter.java | 4 +- .../cluster/RestPutStoredScriptAction.java | 7 ++- .../rest/action/document/RestBulkAction.java | 2 +- .../rest/action/document/RestIndexAction.java | 2 +- .../action/ingest/RestPutPipelineAction.java | 14 ++++-- .../ingest/RestSimulateIngestAction.java | 3 +- .../ingest/RestSimulatePipelineAction.java | 10 +++-- .../action/search/RestMultiSearchAction.java | 6 +-- .../common/bytes/BytesArrayTests.java | 5 --- .../elasticsearch/rest/RestRequestTests.java | 4 +- .../EnterpriseSearchBaseRestHandler.java | 2 +- .../action/RestPostAnalyticsEventAction.java | 42 +++++++++--------- .../rules/action/RestPutQueryRuleAction.java | 2 +- .../action/RestPutQueryRulesetAction.java | 2 +- .../rest/RestPutInferenceModelAction.java | 13 +++--- .../rest/RestUpdateInferenceModelAction.java | 10 ++++- .../logstash/rest/RestPutPipelineAction.java | 2 +- .../xpack/ml/rest/job/RestPostDataAction.java | 10 ++++- .../rest/action/RestMonitoringBulkAction.java | 6 ++- .../xpack/security/audit/AuditUtil.java | 2 +- .../rest/action/SecurityBaseRestHandler.java | 2 +- .../action/user/RestHasPrivilegesAction.java | 4 +- .../rest/RestFindStructureAction.java | 16 +++---- .../rest/action/RestPutWatchAction.java | 24 +++++++---- 32 files changed, 141 insertions(+), 145 deletions(-) create mode 100644 docs/changelog/117303.yaml diff --git a/build-tools-internal/src/main/resources/forbidden/es-server-signatures.txt b/build-tools-internal/src/main/resources/forbidden/es-server-signatures.txt index 68b97050ea012..a9da7995c2b36 100644 --- a/build-tools-internal/src/main/resources/forbidden/es-server-signatures.txt +++ b/build-tools-internal/src/main/resources/forbidden/es-server-signatures.txt @@ -167,5 +167,3 @@ org.elasticsearch.cluster.SnapshotDeletionsInProgress$Entry#(java.lang.Str @defaultMessage Use a Thread constructor with a name, anonymous threads are more difficult to debug java.lang.Thread#(java.lang.Runnable) java.lang.Thread#(java.lang.ThreadGroup, java.lang.Runnable) - -org.elasticsearch.common.bytes.BytesReference#copyBytes(org.elasticsearch.common.bytes.BytesReference) @ This method is a subject for removal. Copying bytes is prone to performance regressions and unnecessary allocations. diff --git a/docs/changelog/117303.yaml b/docs/changelog/117303.yaml new file mode 100644 index 0000000000000..71d134f2cd077 --- /dev/null +++ b/docs/changelog/117303.yaml @@ -0,0 +1,5 @@ +pr: 117303 +summary: Remove HTTP content copies +area: Network +type: enhancement +issues: [] diff --git a/modules/transport-netty4/src/internalClusterTest/java/org/elasticsearch/http/netty4/Netty4TrashingAllocatorIT.java b/modules/transport-netty4/src/internalClusterTest/java/org/elasticsearch/http/netty4/Netty4TrashingAllocatorIT.java index 18c91068ff4f9..f3a10ce228117 100644 --- a/modules/transport-netty4/src/internalClusterTest/java/org/elasticsearch/http/netty4/Netty4TrashingAllocatorIT.java +++ b/modules/transport-netty4/src/internalClusterTest/java/org/elasticsearch/http/netty4/Netty4TrashingAllocatorIT.java @@ -89,7 +89,7 @@ public List routes() { @Override protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient client) throws IOException { - var content = request.releasableContent(); + var content = request.content(); var iter = content.iterator(); return (chan) -> { request.getHttpRequest().release(); diff --git a/qa/system-indices/src/main/java/org/elasticsearch/system/indices/SystemIndicesQA.java b/qa/system-indices/src/main/java/org/elasticsearch/system/indices/SystemIndicesQA.java index 6e15e40efa69a..46c6d1b9228d6 100644 --- a/qa/system-indices/src/main/java/org/elasticsearch/system/indices/SystemIndicesQA.java +++ b/qa/system-indices/src/main/java/org/elasticsearch/system/indices/SystemIndicesQA.java @@ -10,6 +10,7 @@ package org.elasticsearch.system.indices; +import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.admin.indices.create.CreateIndexRequest; import org.elasticsearch.action.index.IndexRequest; import org.elasticsearch.client.internal.node.NodeClient; @@ -177,12 +178,12 @@ public List routes() { @Override protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient client) throws IOException { + var content = request.requiredContent(); IndexRequest indexRequest = new IndexRequest(".net-new-system-index-primary"); - indexRequest.source(request.requiredContent(), request.getXContentType()); + indexRequest.source(content, request.getXContentType()); indexRequest.id(request.param("id")); indexRequest.setRefreshPolicy(request.param("refresh")); - - return channel -> client.index(indexRequest, new RestToXContentListener<>(channel)); + return channel -> client.index(indexRequest, ActionListener.withRef(new RestToXContentListener<>(channel), content)); } @Override diff --git a/server/src/main/java/org/elasticsearch/action/ActionListener.java b/server/src/main/java/org/elasticsearch/action/ActionListener.java index 890c3251e4f9a..a158669d936fe 100644 --- a/server/src/main/java/org/elasticsearch/action/ActionListener.java +++ b/server/src/main/java/org/elasticsearch/action/ActionListener.java @@ -475,4 +475,12 @@ static void runWithResource( ActionListener.run(ActionListener.runBefore(listener, resource::close), l -> action.accept(l, resource)); } + /** + * Increments ref count and returns a listener that will decrement ref count on listener completion. + */ + static ActionListener withRef(ActionListener listener, RefCounted ref) { + ref.mustIncRef(); + return releaseAfter(listener, ref::decRef); + } + } diff --git a/server/src/main/java/org/elasticsearch/common/bytes/BytesReference.java b/server/src/main/java/org/elasticsearch/common/bytes/BytesReference.java index 51e6512072e41..ddcfc1ea7eed8 100644 --- a/server/src/main/java/org/elasticsearch/common/bytes/BytesReference.java +++ b/server/src/main/java/org/elasticsearch/common/bytes/BytesReference.java @@ -74,29 +74,6 @@ static ByteBuffer[] toByteBuffers(BytesReference reference) { } } - /** - * Allocates new buffer and copy bytes from given BytesReference. - * - * @deprecated copying bytes is a right place for performance regression and unnecessary allocations. - * This method exists to serve very few places that struggle to handle reference counted buffers. - */ - @Deprecated(forRemoval = true) - static BytesReference copyBytes(BytesReference bytesReference) { - byte[] arr = new byte[bytesReference.length()]; - int offset = 0; - final BytesRefIterator iterator = bytesReference.iterator(); - try { - BytesRef slice; - while ((slice = iterator.next()) != null) { - System.arraycopy(slice.bytes, slice.offset, arr, offset, slice.length); - offset += slice.length; - } - return new BytesArray(arr); - } catch (IOException e) { - throw new AssertionError(e); - } - } - /** * Returns BytesReference composed of the provided ByteBuffers. */ diff --git a/server/src/main/java/org/elasticsearch/http/HttpTracer.java b/server/src/main/java/org/elasticsearch/http/HttpTracer.java index d6daf11c0539a..3d8360e6ee3fa 100644 --- a/server/src/main/java/org/elasticsearch/http/HttpTracer.java +++ b/server/src/main/java/org/elasticsearch/http/HttpTracer.java @@ -94,7 +94,7 @@ HttpTracer maybeLogRequest(RestRequest restRequest, @Nullable Exception e) { private void logFullContent(RestRequest restRequest) { try (var stream = HttpBodyTracer.getBodyOutputStream(restRequest.getRequestId(), HttpBodyTracer.Type.REQUEST)) { - restRequest.releasableContent().writeTo(stream); + restRequest.content().writeTo(stream); } catch (Exception e2) { assert false : e2; // no real IO here } diff --git a/server/src/main/java/org/elasticsearch/rest/RestRequest.java b/server/src/main/java/org/elasticsearch/rest/RestRequest.java index 17d85a8eabb1c..a04bdcb32f2b4 100644 --- a/server/src/main/java/org/elasticsearch/rest/RestRequest.java +++ b/server/src/main/java/org/elasticsearch/rest/RestRequest.java @@ -23,7 +23,6 @@ import org.elasticsearch.core.CheckedConsumer; import org.elasticsearch.core.Nullable; import org.elasticsearch.core.RestApiVersion; -import org.elasticsearch.core.SuppressForbidden; import org.elasticsearch.core.TimeValue; import org.elasticsearch.core.Tuple; import org.elasticsearch.http.HttpBody; @@ -303,22 +302,13 @@ public boolean isFullContent() { return httpRequest.body().isFull(); } - /** - * Returns a copy of HTTP content. The copy is GC-managed and does not require reference counting. - * Please use {@link #releasableContent()} to avoid content copy. - */ - @SuppressForbidden(reason = "temporarily support content copy while migrating RestHandlers to ref counted pooled buffers") - public BytesReference content() { - return BytesReference.copyBytes(releasableContent()); - } - /** * Returns a direct reference to the network buffer containing the request body. The HTTP layers will release their references to this * buffer as soon as they have finished the synchronous steps of processing the request on the network thread, which will by default * release the buffer back to the pool where it may be re-used for another request. If you need to keep the buffer alive past the end of * these synchronous steps, acquire your own reference to this buffer and release it once it's no longer needed. */ - public ReleasableBytesReference releasableContent() { + public ReleasableBytesReference content() { this.contentConsumed = true; var bytes = httpRequest.body().asFull().bytes(); if (bytes.hasReferences() == false) { @@ -338,32 +328,19 @@ public HttpBody.Stream contentStream() { return httpRequest.body().asStream(); } - private void ensureContent() { + /** + * Returns reference to the network buffer of HTTP content or throw an exception if the body or content type is missing. + * See {@link #content()}. + */ + public ReleasableBytesReference requiredContent() { if (hasContent() == false) { throw new ElasticsearchParseException("request body is required"); } else if (xContentType.get() == null) { throwValidationException("unknown content type"); } - } - - /** - * @return copy of the request body or throw an exception if the body or content type is missing. - * See {@link #content()}. Please use {@link #requiredReleasableContent()} to avoid content copy. - */ - public final BytesReference requiredContent() { - ensureContent(); return content(); } - /** - * Returns reference to the network buffer of HTTP content or throw an exception if the body or content type is missing. - * See {@link #releasableContent()}. It's a recommended method to handle HTTP content without copying it. - */ - public ReleasableBytesReference requiredReleasableContent() { - ensureContent(); - return releasableContent(); - } - private static void throwValidationException(String msg) { ValidationException unknownContentType = new ValidationException(); unknownContentType.addValidationError(msg); @@ -596,7 +573,7 @@ public final boolean hasContentOrSourceParam() { * if you need to handle the absence request content gracefully. */ public final XContentParser contentOrSourceParamParser() throws IOException { - Tuple tuple = contentOrSourceParam(); + Tuple tuple = contentOrSourceParam(); return XContentHelper.createParserNotCompressed(parserConfig, tuple.v2(), tuple.v1().xContent().type()); } @@ -607,7 +584,7 @@ public final XContentParser contentOrSourceParamParser() throws IOException { */ public final void withContentOrSourceParamParserOrNull(CheckedConsumer withParser) throws IOException { if (hasContentOrSourceParam()) { - Tuple tuple = contentOrSourceParam(); + Tuple tuple = contentOrSourceParam(); try (XContentParser parser = XContentHelper.createParserNotCompressed(parserConfig, tuple.v2(), tuple.v1())) { withParser.accept(parser); } @@ -620,7 +597,7 @@ public final void withContentOrSourceParamParserOrNull(CheckedConsumer contentOrSourceParam() { + public final Tuple contentOrSourceParam() { if (hasContentOrSourceParam() == false) { throw new ElasticsearchParseException("request body or source parameter is required"); } else if (hasContent()) { @@ -636,7 +613,7 @@ public final Tuple contentOrSourceParam() { if (xContentType == null) { throwValidationException("Unknown value for source_content_type [" + typeParam + "]"); } - return new Tuple<>(xContentType, bytes); + return new Tuple<>(xContentType, ReleasableBytesReference.wrap(bytes)); } public ParsedMediaType getParsedAccept() { diff --git a/server/src/main/java/org/elasticsearch/rest/RestRequestFilter.java b/server/src/main/java/org/elasticsearch/rest/RestRequestFilter.java index 57b4d2990c8e0..7c90d9168e6c8 100644 --- a/server/src/main/java/org/elasticsearch/rest/RestRequestFilter.java +++ b/server/src/main/java/org/elasticsearch/rest/RestRequestFilter.java @@ -45,10 +45,10 @@ public boolean hasContent() { } @Override - public ReleasableBytesReference releasableContent() { + public ReleasableBytesReference content() { if (filteredBytes == null) { Tuple> result = XContentHelper.convertToMap( - restRequest.requiredReleasableContent(), + restRequest.requiredContent(), true, restRequest.getXContentType() ); diff --git a/server/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestPutStoredScriptAction.java b/server/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestPutStoredScriptAction.java index 4451117fa4792..a698dc3f30577 100644 --- a/server/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestPutStoredScriptAction.java +++ b/server/src/main/java/org/elasticsearch/rest/action/admin/cluster/RestPutStoredScriptAction.java @@ -8,6 +8,7 @@ */ package org.elasticsearch.rest.action.admin.cluster; +import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.admin.cluster.storedscripts.PutStoredScriptRequest; import org.elasticsearch.action.admin.cluster.storedscripts.TransportPutStoredScriptAction; import org.elasticsearch.client.internal.node.NodeClient; @@ -57,6 +58,10 @@ public RestChannelConsumer prepareRequest(RestRequest request, NodeClient client request.getXContentType(), StoredScriptSource.parse(content, xContentType) ); - return channel -> client.execute(TransportPutStoredScriptAction.TYPE, putRequest, new RestToXContentListener<>(channel)); + return channel -> client.execute( + TransportPutStoredScriptAction.TYPE, + putRequest, + ActionListener.withRef(new RestToXContentListener<>(channel), content) + ); } } diff --git a/server/src/main/java/org/elasticsearch/rest/action/document/RestBulkAction.java b/server/src/main/java/org/elasticsearch/rest/action/document/RestBulkAction.java index 9428ef5390b2f..dea7b7138d0d0 100644 --- a/server/src/main/java/org/elasticsearch/rest/action/document/RestBulkAction.java +++ b/server/src/main/java/org/elasticsearch/rest/action/document/RestBulkAction.java @@ -103,7 +103,7 @@ public RestChannelConsumer prepareRequest(final RestRequest request, final NodeC boolean defaultRequireDataStream = request.paramAsBoolean(DocWriteRequest.REQUIRE_DATA_STREAM, false); bulkRequest.timeout(request.paramAsTime("timeout", BulkShardRequest.DEFAULT_TIMEOUT)); bulkRequest.setRefreshPolicy(request.param("refresh")); - ReleasableBytesReference content = request.requiredReleasableContent(); + ReleasableBytesReference content = request.requiredContent(); try { bulkRequest.add( diff --git a/server/src/main/java/org/elasticsearch/rest/action/document/RestIndexAction.java b/server/src/main/java/org/elasticsearch/rest/action/document/RestIndexAction.java index d81ac03492d59..d40c6225cc7b4 100644 --- a/server/src/main/java/org/elasticsearch/rest/action/document/RestIndexAction.java +++ b/server/src/main/java/org/elasticsearch/rest/action/document/RestIndexAction.java @@ -106,7 +106,7 @@ public RestChannelConsumer prepareRequest(RestRequest request, final NodeClient @Override public RestChannelConsumer prepareRequest(final RestRequest request, final NodeClient client) throws IOException { - ReleasableBytesReference source = request.requiredReleasableContent(); + ReleasableBytesReference source = request.requiredContent(); IndexRequest indexRequest = new IndexRequest(request.param("index")); indexRequest.id(request.param("id")); indexRequest.routing(request.param("routing")); diff --git a/server/src/main/java/org/elasticsearch/rest/action/ingest/RestPutPipelineAction.java b/server/src/main/java/org/elasticsearch/rest/action/ingest/RestPutPipelineAction.java index 269d9b08ab66b..c6b3daa38d663 100644 --- a/server/src/main/java/org/elasticsearch/rest/action/ingest/RestPutPipelineAction.java +++ b/server/src/main/java/org/elasticsearch/rest/action/ingest/RestPutPipelineAction.java @@ -9,10 +9,11 @@ package org.elasticsearch.rest.action.ingest; +import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.ingest.PutPipelineRequest; import org.elasticsearch.action.ingest.PutPipelineTransportAction; import org.elasticsearch.client.internal.node.NodeClient; -import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.bytes.ReleasableBytesReference; import org.elasticsearch.core.Tuple; import org.elasticsearch.rest.BaseRestHandler; import org.elasticsearch.rest.RestRequest; @@ -56,15 +57,20 @@ public RestChannelConsumer prepareRequest(RestRequest restRequest, NodeClient cl } } - Tuple sourceTuple = restRequest.contentOrSourceParam(); + Tuple sourceTuple = restRequest.contentOrSourceParam(); + var content = sourceTuple.v2(); final var request = new PutPipelineRequest( getMasterNodeTimeout(restRequest), getAckTimeout(restRequest), restRequest.param("id"), - sourceTuple.v2(), + content, sourceTuple.v1(), ifVersion ); - return channel -> client.execute(PutPipelineTransportAction.TYPE, request, new RestToXContentListener<>(channel)); + return channel -> client.execute( + PutPipelineTransportAction.TYPE, + request, + ActionListener.withRef(new RestToXContentListener<>(channel), content) + ); } } diff --git a/server/src/main/java/org/elasticsearch/rest/action/ingest/RestSimulateIngestAction.java b/server/src/main/java/org/elasticsearch/rest/action/ingest/RestSimulateIngestAction.java index c825a8198e6e4..978b6d1c3a92d 100644 --- a/server/src/main/java/org/elasticsearch/rest/action/ingest/RestSimulateIngestAction.java +++ b/server/src/main/java/org/elasticsearch/rest/action/ingest/RestSimulateIngestAction.java @@ -16,6 +16,7 @@ import org.elasticsearch.action.bulk.SimulateBulkRequest; import org.elasticsearch.client.internal.node.NodeClient; import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.bytes.ReleasableBytesReference; import org.elasticsearch.common.xcontent.XContentHelper; import org.elasticsearch.core.Tuple; import org.elasticsearch.ingest.ConfigurationUtils; @@ -72,7 +73,7 @@ public RestChannelConsumer prepareRequest(final RestRequest request, final NodeC String defaultIndex = request.param("index"); FetchSourceContext defaultFetchSourceContext = FetchSourceContext.parseFromRestRequest(request); String defaultPipeline = request.param("pipeline"); - Tuple sourceTuple = request.contentOrSourceParam(); + Tuple sourceTuple = request.contentOrSourceParam(); Map sourceMap = XContentHelper.convertToMap(sourceTuple.v2(), false, sourceTuple.v1()).v2(); Map> pipelineSubstitutions = (Map>) sourceMap.remove( "pipeline_substitutions" diff --git a/server/src/main/java/org/elasticsearch/rest/action/ingest/RestSimulatePipelineAction.java b/server/src/main/java/org/elasticsearch/rest/action/ingest/RestSimulatePipelineAction.java index f85b89f774477..faf977b54885d 100644 --- a/server/src/main/java/org/elasticsearch/rest/action/ingest/RestSimulatePipelineAction.java +++ b/server/src/main/java/org/elasticsearch/rest/action/ingest/RestSimulatePipelineAction.java @@ -9,9 +9,10 @@ package org.elasticsearch.rest.action.ingest; +import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.ingest.SimulatePipelineRequest; import org.elasticsearch.client.internal.node.NodeClient; -import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.bytes.ReleasableBytesReference; import org.elasticsearch.core.Tuple; import org.elasticsearch.rest.BaseRestHandler; import org.elasticsearch.rest.RestRequest; @@ -46,10 +47,13 @@ public String getName() { @Override public RestChannelConsumer prepareRequest(RestRequest restRequest, NodeClient client) throws IOException { - Tuple sourceTuple = restRequest.contentOrSourceParam(); + Tuple sourceTuple = restRequest.contentOrSourceParam(); + var content = sourceTuple.v2(); SimulatePipelineRequest request = new SimulatePipelineRequest(sourceTuple.v2(), sourceTuple.v1(), restRequest.getRestApiVersion()); request.setId(restRequest.param("id")); request.setVerbose(restRequest.paramAsBoolean("verbose", false)); - return channel -> client.admin().cluster().simulatePipeline(request, new RestToXContentListener<>(channel)); + return channel -> client.admin() + .cluster() + .simulatePipeline(request, ActionListener.withRef(new RestToXContentListener<>(channel), content)); } } diff --git a/server/src/main/java/org/elasticsearch/rest/action/search/RestMultiSearchAction.java b/server/src/main/java/org/elasticsearch/rest/action/search/RestMultiSearchAction.java index aeb182978e1eb..89775b4ca8e15 100644 --- a/server/src/main/java/org/elasticsearch/rest/action/search/RestMultiSearchAction.java +++ b/server/src/main/java/org/elasticsearch/rest/action/search/RestMultiSearchAction.java @@ -17,7 +17,7 @@ import org.elasticsearch.common.CheckedBiConsumer; import org.elasticsearch.common.Strings; import org.elasticsearch.common.TriFunction; -import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.bytes.ReleasableBytesReference; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.core.Tuple; import org.elasticsearch.features.NodeFeature; @@ -184,9 +184,9 @@ public static void parseMultiLineRequest( boolean ccsMinimizeRoundtrips = request.paramAsBoolean("ccs_minimize_roundtrips", true); String routing = request.param("routing"); - final Tuple sourceTuple = request.contentOrSourceParam(); + final Tuple sourceTuple = request.contentOrSourceParam(); final XContent xContent = sourceTuple.v1().xContent(); - final BytesReference data = sourceTuple.v2(); + final ReleasableBytesReference data = sourceTuple.v2(); MultiSearchRequest.readMultiLineFormat( xContent, request.contentParserConfig(), diff --git a/server/src/test/java/org/elasticsearch/common/bytes/BytesArrayTests.java b/server/src/test/java/org/elasticsearch/common/bytes/BytesArrayTests.java index 3fd8535cd5c27..e067be6b1b0da 100644 --- a/server/src/test/java/org/elasticsearch/common/bytes/BytesArrayTests.java +++ b/server/src/test/java/org/elasticsearch/common/bytes/BytesArrayTests.java @@ -108,9 +108,4 @@ public void testGetDoubleLE() { assertThat(e.getMessage(), equalTo("Index 9 out of bounds for length 9")); } - public void testCopyBytes() { - var data = randomByteArrayOfLength(between(1024, 1024 * 1024 * 50)); - var copy = BytesReference.copyBytes(new BytesArray(data)); - assertArrayEquals(data, BytesReference.toBytes(copy)); - } } diff --git a/server/src/test/java/org/elasticsearch/rest/RestRequestTests.java b/server/src/test/java/org/elasticsearch/rest/RestRequestTests.java index 8a0ca5ba6c8a5..b391b77503400 100644 --- a/server/src/test/java/org/elasticsearch/rest/RestRequestTests.java +++ b/server/src/test/java/org/elasticsearch/rest/RestRequestTests.java @@ -12,7 +12,7 @@ import org.elasticsearch.ElasticsearchParseException; import org.elasticsearch.common.ValidationException; import org.elasticsearch.common.bytes.BytesArray; -import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.bytes.ReleasableBytesReference; import org.elasticsearch.core.CheckedConsumer; import org.elasticsearch.http.HttpBody; import org.elasticsearch.http.HttpChannel; @@ -321,7 +321,7 @@ public String uri() { } @Override - public BytesReference content() { + public ReleasableBytesReference content() { return restRequest.content(); } } diff --git a/x-pack/plugin/ent-search/src/main/java/org/elasticsearch/xpack/application/EnterpriseSearchBaseRestHandler.java b/x-pack/plugin/ent-search/src/main/java/org/elasticsearch/xpack/application/EnterpriseSearchBaseRestHandler.java index 214f9150dfcc5..aa200f7ae9acb 100644 --- a/x-pack/plugin/ent-search/src/main/java/org/elasticsearch/xpack/application/EnterpriseSearchBaseRestHandler.java +++ b/x-pack/plugin/ent-search/src/main/java/org/elasticsearch/xpack/application/EnterpriseSearchBaseRestHandler.java @@ -32,7 +32,7 @@ protected final BaseRestHandler.RestChannelConsumer prepareRequest(RestRequest r // We need to consume parameters and content from the REST request in order to bypass unrecognized param errors // and return a license error. request.params().keySet().forEach(key -> request.param(key, "")); - request.releasableContent(); + request.content(); return channel -> channel.sendResponse( new RestResponse(channel, LicenseUtils.newComplianceException(this.licenseState, this.product)) ); diff --git a/x-pack/plugin/ent-search/src/main/java/org/elasticsearch/xpack/application/analytics/action/RestPostAnalyticsEventAction.java b/x-pack/plugin/ent-search/src/main/java/org/elasticsearch/xpack/application/analytics/action/RestPostAnalyticsEventAction.java index 34292c4669333..5706e5e384053 100644 --- a/x-pack/plugin/ent-search/src/main/java/org/elasticsearch/xpack/application/analytics/action/RestPostAnalyticsEventAction.java +++ b/x-pack/plugin/ent-search/src/main/java/org/elasticsearch/xpack/application/analytics/action/RestPostAnalyticsEventAction.java @@ -7,8 +7,9 @@ package org.elasticsearch.xpack.application.analytics.action; +import org.elasticsearch.action.ActionListener; import org.elasticsearch.client.internal.node.NodeClient; -import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.bytes.ReleasableBytesReference; import org.elasticsearch.common.network.InetAddresses; import org.elasticsearch.core.Tuple; import org.elasticsearch.license.XPackLicenseState; @@ -48,11 +49,26 @@ public List routes() { @Override protected RestChannelConsumer innerPrepareRequest(RestRequest restRequest, NodeClient client) { - PostAnalyticsEventAction.Request request = buidRequest(restRequest); + Tuple sourceTuple = restRequest.contentOrSourceParam(); + + var content = sourceTuple.v2(); + PostAnalyticsEventAction.RequestBuilder builder = PostAnalyticsEventAction.Request.builder( + restRequest.param("collection_name"), + restRequest.param("event_type"), + sourceTuple.v1(), + content + ); + + builder.debug(restRequest.paramAsBoolean("debug", false)); + + final Map> headers = restRequest.getHeaders(); + builder.headers(headers); + builder.clientAddress(getClientAddress(restRequest, headers)); + return channel -> client.execute( PostAnalyticsEventAction.INSTANCE, - request, - new RestToXContentListener<>(channel, r -> RestStatus.ACCEPTED) + builder.request(), + ActionListener.withRef(new RestToXContentListener<>(channel, r -> RestStatus.ACCEPTED), content) ); } @@ -71,22 +87,4 @@ private static InetAddress getClientAddress(RestRequest restRequest, Map sourceTuple = restRequest.contentOrSourceParam(); - - PostAnalyticsEventAction.RequestBuilder builder = PostAnalyticsEventAction.Request.builder( - restRequest.param("collection_name"), - restRequest.param("event_type"), - sourceTuple.v1(), - sourceTuple.v2() - ); - - builder.debug(restRequest.paramAsBoolean("debug", false)); - - final Map> headers = restRequest.getHeaders(); - builder.headers(headers); - builder.clientAddress(getClientAddress(restRequest, headers)); - - return builder.request(); - } } diff --git a/x-pack/plugin/ent-search/src/main/java/org/elasticsearch/xpack/application/rules/action/RestPutQueryRuleAction.java b/x-pack/plugin/ent-search/src/main/java/org/elasticsearch/xpack/application/rules/action/RestPutQueryRuleAction.java index 4addd97465bf2..1660502d77920 100644 --- a/x-pack/plugin/ent-search/src/main/java/org/elasticsearch/xpack/application/rules/action/RestPutQueryRuleAction.java +++ b/x-pack/plugin/ent-search/src/main/java/org/elasticsearch/xpack/application/rules/action/RestPutQueryRuleAction.java @@ -43,7 +43,7 @@ protected RestChannelConsumer innerPrepareRequest(RestRequest restRequest, NodeC PutQueryRuleAction.Request request = new PutQueryRuleAction.Request( restRequest.param("ruleset_id"), restRequest.param("rule_id"), - restRequest.content(), + restRequest.requiredContent(), restRequest.getXContentType() ); return channel -> client.execute( diff --git a/x-pack/plugin/ent-search/src/main/java/org/elasticsearch/xpack/application/rules/action/RestPutQueryRulesetAction.java b/x-pack/plugin/ent-search/src/main/java/org/elasticsearch/xpack/application/rules/action/RestPutQueryRulesetAction.java index a43ac70327e77..db20e66845f35 100644 --- a/x-pack/plugin/ent-search/src/main/java/org/elasticsearch/xpack/application/rules/action/RestPutQueryRulesetAction.java +++ b/x-pack/plugin/ent-search/src/main/java/org/elasticsearch/xpack/application/rules/action/RestPutQueryRulesetAction.java @@ -42,7 +42,7 @@ public List routes() { protected RestChannelConsumer innerPrepareRequest(RestRequest restRequest, NodeClient client) throws IOException { PutQueryRulesetAction.Request request = new PutQueryRulesetAction.Request( restRequest.param("ruleset_id"), - restRequest.content(), + restRequest.requiredContent(), restRequest.getXContentType() ); return channel -> client.execute( diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rest/RestPutInferenceModelAction.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rest/RestPutInferenceModelAction.java index 0523160ee19c2..655e11996d522 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rest/RestPutInferenceModelAction.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rest/RestPutInferenceModelAction.java @@ -7,6 +7,7 @@ package org.elasticsearch.xpack.inference.rest; +import org.elasticsearch.action.ActionListener; import org.elasticsearch.client.internal.node.NodeClient; import org.elasticsearch.inference.TaskType; import org.elasticsearch.rest.BaseRestHandler; @@ -48,12 +49,12 @@ protected RestChannelConsumer prepareRequest(RestRequest restRequest, NodeClient taskType = TaskType.ANY; // task type must be defined in the body } - var request = new PutInferenceModelAction.Request( - taskType, - inferenceEntityId, - restRequest.requiredContent(), - restRequest.getXContentType() + var content = restRequest.requiredContent(); + var request = new PutInferenceModelAction.Request(taskType, inferenceEntityId, content, restRequest.getXContentType()); + return channel -> client.execute( + PutInferenceModelAction.INSTANCE, + request, + ActionListener.withRef(new RestToXContentListener<>(channel), content) ); - return channel -> client.execute(PutInferenceModelAction.INSTANCE, request, new RestToXContentListener<>(channel)); } } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rest/RestUpdateInferenceModelAction.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rest/RestUpdateInferenceModelAction.java index 9405a6752538c..120731a4f8e66 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rest/RestUpdateInferenceModelAction.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/rest/RestUpdateInferenceModelAction.java @@ -8,6 +8,7 @@ package org.elasticsearch.xpack.inference.rest; import org.elasticsearch.ElasticsearchStatusException; +import org.elasticsearch.action.ActionListener; import org.elasticsearch.client.internal.node.NodeClient; import org.elasticsearch.inference.TaskType; import org.elasticsearch.rest.BaseRestHandler; @@ -50,13 +51,18 @@ protected RestChannelConsumer prepareRequest(RestRequest restRequest, NodeClient throw new ElasticsearchStatusException("Inference ID must be provided in the path", RestStatus.BAD_REQUEST); } + var content = restRequest.requiredContent(); var request = new UpdateInferenceModelAction.Request( inferenceEntityId, - restRequest.requiredContent(), + content, restRequest.getXContentType(), taskType, RestUtils.getMasterNodeTimeout(restRequest) ); - return channel -> client.execute(UpdateInferenceModelAction.INSTANCE, request, new RestToXContentListener<>(channel)); + return channel -> client.execute( + UpdateInferenceModelAction.INSTANCE, + request, + ActionListener.withRef(new RestToXContentListener<>(channel), content) + ); } } diff --git a/x-pack/plugin/logstash/src/main/java/org/elasticsearch/xpack/logstash/rest/RestPutPipelineAction.java b/x-pack/plugin/logstash/src/main/java/org/elasticsearch/xpack/logstash/rest/RestPutPipelineAction.java index 2ea56b147bf9c..a9992e168bc66 100644 --- a/x-pack/plugin/logstash/src/main/java/org/elasticsearch/xpack/logstash/rest/RestPutPipelineAction.java +++ b/x-pack/plugin/logstash/src/main/java/org/elasticsearch/xpack/logstash/rest/RestPutPipelineAction.java @@ -49,7 +49,7 @@ protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient cli } return restChannel -> { - final String content = request.releasableContent().utf8ToString(); + final String content = request.content().utf8ToString(); client.execute( PutPipelineAction.INSTANCE, new PutPipelineRequest(id, content, request.getXContentType()), diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/job/RestPostDataAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/job/RestPostDataAction.java index 48c6abde3010a..0fcad773100ff 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/job/RestPostDataAction.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/rest/job/RestPostDataAction.java @@ -6,6 +6,7 @@ */ package org.elasticsearch.xpack.ml.rest.job; +import org.elasticsearch.action.ActionListener; import org.elasticsearch.client.internal.node.NodeClient; import org.elasticsearch.core.UpdateForV9; import org.elasticsearch.rest.BaseRestHandler; @@ -51,9 +52,14 @@ protected RestChannelConsumer prepareRequest(RestRequest restRequest, NodeClient PostDataAction.Request request = new PostDataAction.Request(restRequest.param(Job.ID.getPreferredName())); request.setResetStart(restRequest.param(PostDataAction.Request.RESET_START.getPreferredName(), DEFAULT_RESET_START)); request.setResetEnd(restRequest.param(PostDataAction.Request.RESET_END.getPreferredName(), DEFAULT_RESET_END)); - request.setContent(restRequest.content(), restRequest.getXContentType()); + var content = restRequest.content(); + request.setContent(content, restRequest.getXContentType()); - return channel -> client.execute(PostDataAction.INSTANCE, request, new RestToXContentListener<>(channel, r -> RestStatus.ACCEPTED)); + return channel -> client.execute( + PostDataAction.INSTANCE, + request, + ActionListener.withRef(new RestToXContentListener<>(channel, r -> RestStatus.ACCEPTED), content) + ); } @Override diff --git a/x-pack/plugin/monitoring/src/main/java/org/elasticsearch/xpack/monitoring/rest/action/RestMonitoringBulkAction.java b/x-pack/plugin/monitoring/src/main/java/org/elasticsearch/xpack/monitoring/rest/action/RestMonitoringBulkAction.java index b69b958a27ce6..762cbffacb082 100644 --- a/x-pack/plugin/monitoring/src/main/java/org/elasticsearch/xpack/monitoring/rest/action/RestMonitoringBulkAction.java +++ b/x-pack/plugin/monitoring/src/main/java/org/elasticsearch/xpack/monitoring/rest/action/RestMonitoringBulkAction.java @@ -7,6 +7,7 @@ package org.elasticsearch.xpack.monitoring.rest.action; import org.elasticsearch.ElasticsearchParseException; +import org.elasticsearch.action.ActionListener; import org.elasticsearch.client.internal.node.NodeClient; import org.elasticsearch.common.Strings; import org.elasticsearch.rest.BaseRestHandler; @@ -93,8 +94,9 @@ public RestChannelConsumer prepareRequest(RestRequest request, NodeClient client final long intervalMillis = parseTimeValue(intervalAsString, INTERVAL).getMillis(); final MonitoringBulkRequestBuilder requestBuilder = new MonitoringBulkRequestBuilder(client); - requestBuilder.add(system, request.content(), request.getXContentType(), timestamp, intervalMillis); - return channel -> requestBuilder.execute(getRestBuilderListener(channel)); + var content = request.content(); + requestBuilder.add(system, content, request.getXContentType(), timestamp, intervalMillis); + return channel -> requestBuilder.execute(ActionListener.withRef(getRestBuilderListener(channel), content)); } @Override diff --git a/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/audit/AuditUtil.java b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/audit/AuditUtil.java index 429b632cdac18..58516b1d8324d 100644 --- a/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/audit/AuditUtil.java +++ b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/audit/AuditUtil.java @@ -27,7 +27,7 @@ public class AuditUtil { public static String restRequestContent(RestRequest request) { if (request.hasContent()) { - var content = request.releasableContent(); + var content = request.content(); try { return XContentHelper.convertToJson(content, false, false, request.getXContentType()); } catch (IOException ioe) { diff --git a/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/rest/action/SecurityBaseRestHandler.java b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/rest/action/SecurityBaseRestHandler.java index df21f5d4eeb0b..d5d11ea42e345 100644 --- a/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/rest/action/SecurityBaseRestHandler.java +++ b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/rest/action/SecurityBaseRestHandler.java @@ -75,7 +75,7 @@ protected final RestChannelConsumer prepareRequest(RestRequest request, NodeClie return innerPrepareRequest(request, client); } else { request.params().keySet().forEach(key -> request.param(key, "")); - request.releasableContent(); // mark content consumed + request.content(); // mark content consumed return channel -> channel.sendResponse(new RestResponse(channel, failedFeature)); } } diff --git a/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/rest/action/user/RestHasPrivilegesAction.java b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/rest/action/user/RestHasPrivilegesAction.java index f2233a7e19fd0..8029ed3ba45e4 100644 --- a/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/rest/action/user/RestHasPrivilegesAction.java +++ b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/rest/action/user/RestHasPrivilegesAction.java @@ -8,7 +8,7 @@ import org.elasticsearch.ElasticsearchSecurityException; import org.elasticsearch.client.internal.node.NodeClient; -import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.bytes.ReleasableBytesReference; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.core.Tuple; import org.elasticsearch.license.XPackLicenseState; @@ -77,7 +77,7 @@ public RestChannelConsumer innerPrepareRequest(RestRequest request, NodeClient c * Consume the body immediately. This ensures that if there is a body and we later reject the request (e.g., because security is not * enabled) that the REST infrastructure will not reject the request for not having consumed the body. */ - final Tuple content = request.contentOrSourceParam(); + final Tuple content = request.contentOrSourceParam(); final String username = getUsername(request); if (username == null) { return restChannel -> { throw new ElasticsearchSecurityException("there is no authenticated user"); }; diff --git a/x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/rest/RestFindStructureAction.java b/x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/rest/RestFindStructureAction.java index 5078572dee5fd..f47a25409b821 100644 --- a/x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/rest/RestFindStructureAction.java +++ b/x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/rest/RestFindStructureAction.java @@ -6,7 +6,7 @@ */ package org.elasticsearch.xpack.textstructure.rest; -import org.elasticsearch.ElasticsearchParseException; +import org.elasticsearch.action.ActionListener; import org.elasticsearch.client.internal.node.NodeClient; import org.elasticsearch.core.UpdateForV9; import org.elasticsearch.rest.BaseRestHandler; @@ -50,14 +50,14 @@ public String getName() { protected RestChannelConsumer prepareRequest(RestRequest restRequest, NodeClient client) { FindStructureAction.Request request = new FindStructureAction.Request(); RestFindStructureArgumentsParser.parse(restRequest, request); + var content = restRequest.requiredContent(); + request.setSample(content); - if (restRequest.hasContent()) { - request.setSample(restRequest.content()); - } else { - throw new ElasticsearchParseException("request body is required"); - } - - return channel -> client.execute(FindStructureAction.INSTANCE, request, new RestToXContentListener<>(channel)); + return channel -> client.execute( + FindStructureAction.INSTANCE, + request, + ActionListener.withRef(new RestToXContentListener<>(channel), content) + ); } @Override diff --git a/x-pack/plugin/watcher/src/main/java/org/elasticsearch/xpack/watcher/rest/action/RestPutWatchAction.java b/x-pack/plugin/watcher/src/main/java/org/elasticsearch/xpack/watcher/rest/action/RestPutWatchAction.java index 9dba72b1f64c3..0ed27a4073653 100644 --- a/x-pack/plugin/watcher/src/main/java/org/elasticsearch/xpack/watcher/rest/action/RestPutWatchAction.java +++ b/x-pack/plugin/watcher/src/main/java/org/elasticsearch/xpack/watcher/rest/action/RestPutWatchAction.java @@ -7,6 +7,7 @@ package org.elasticsearch.xpack.watcher.rest.action; +import org.elasticsearch.action.ActionListener; import org.elasticsearch.client.internal.node.NodeClient; import org.elasticsearch.common.lucene.uid.Versions; import org.elasticsearch.protocol.xpack.watcher.PutWatchRequest; @@ -42,19 +43,24 @@ public String getName() { @Override protected RestChannelConsumer prepareRequest(final RestRequest request, NodeClient client) { - PutWatchRequest putWatchRequest = new PutWatchRequest(request.param("id"), request.content(), request.getXContentType()); + var content = request.content(); + PutWatchRequest putWatchRequest = new PutWatchRequest(request.param("id"), content, request.getXContentType()); putWatchRequest.setVersion(request.paramAsLong("version", Versions.MATCH_ANY)); putWatchRequest.setIfSeqNo(request.paramAsLong("if_seq_no", putWatchRequest.getIfSeqNo())); putWatchRequest.setIfPrimaryTerm(request.paramAsLong("if_primary_term", putWatchRequest.getIfPrimaryTerm())); putWatchRequest.setActive(request.paramAsBoolean("active", putWatchRequest.isActive())); - return channel -> client.execute(PutWatchAction.INSTANCE, putWatchRequest, new RestBuilderListener<>(channel) { - @Override - public RestResponse buildResponse(PutWatchResponse response, XContentBuilder builder) throws Exception { - response.toXContent(builder, request); - RestStatus status = response.isCreated() ? CREATED : OK; - return new RestResponse(status, builder); - } - }); + return channel -> client.execute( + PutWatchAction.INSTANCE, + putWatchRequest, + ActionListener.withRef(new RestBuilderListener<>(channel) { + @Override + public RestResponse buildResponse(PutWatchResponse response, XContentBuilder builder) throws Exception { + response.toXContent(builder, request); + RestStatus status = response.isCreated() ? CREATED : OK; + return new RestResponse(status, builder); + } + }, content) + ); } private static final Set FILTERED_FIELDS = Set.of( From f05c9b07f801e49e1a95f7665485464dcda862ee Mon Sep 17 00:00:00 2001 From: Mark Tozzi Date: Tue, 26 Nov 2024 13:45:13 -0500 Subject: [PATCH 019/139] ESQL Add some tests for sorting the date nanos union type (#117567) --- .../src/main/resources/union_types.csv-spec | 59 +++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/union_types.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/union_types.csv-spec index af987b13acc82..bf6e2f8ae0893 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/union_types.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/union_types.csv-spec @@ -626,6 +626,65 @@ sample_data_ts_nanos | 2023-10-23T12:27:28.948123456Z | 172.21.2.113 | 27648 sample_data_ts_nanos | 2023-10-23T12:15:03.360123456Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3 ; +multiIndex sort millis and nanos as nanos +required_capability: to_date_nanos +required_capability: union_types +required_capability: metadata_fields +required_capability: union_types_remove_fields + +FROM sample_data, sample_data_ts_nanos METADATA _index +| EVAL ts = TO_DATE_NANOS(@timestamp) +| KEEP _index, ts, client_ip, event_duration, message +| SORT ts DESC +; + +_index:keyword | ts:date_nanos | client_ip:ip | event_duration:long | message:keyword +sample_data_ts_nanos | 2023-10-23T13:55:01.543123456Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1 +sample_data | 2023-10-23T13:55:01.543000000Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1 +sample_data_ts_nanos | 2023-10-23T13:53:55.832123456Z | 172.21.3.15 | 5033755 | Connection error +sample_data | 2023-10-23T13:53:55.832000000Z | 172.21.3.15 | 5033755 | Connection error +sample_data_ts_nanos | 2023-10-23T13:52:55.015123456Z | 172.21.3.15 | 8268153 | Connection error +sample_data | 2023-10-23T13:52:55.015000000Z | 172.21.3.15 | 8268153 | Connection error +sample_data_ts_nanos | 2023-10-23T13:51:54.732123456Z | 172.21.3.15 | 725448 | Connection error +sample_data | 2023-10-23T13:51:54.732000000Z | 172.21.3.15 | 725448 | Connection error +sample_data_ts_nanos | 2023-10-23T13:33:34.937123456Z | 172.21.0.5 | 1232382 | Disconnected +sample_data | 2023-10-23T13:33:34.937000000Z | 172.21.0.5 | 1232382 | Disconnected +sample_data_ts_nanos | 2023-10-23T12:27:28.948123456Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2 +sample_data | 2023-10-23T12:27:28.948000000Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2 +sample_data_ts_nanos | 2023-10-23T12:15:03.360123456Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3 +sample_data | 2023-10-23T12:15:03.360000000Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3 +; + +multiIndex sort millis and nanos as millis +required_capability: to_date_nanos +required_capability: union_types +required_capability: metadata_fields +required_capability: union_types_remove_fields + +FROM sample_data, sample_data_ts_nanos METADATA _index +| EVAL ts = TO_DATETIME(@timestamp) +| KEEP _index, ts, client_ip, event_duration, message +| SORT ts DESC, _index DESC +; + +_index:keyword | ts:datetime | client_ip:ip | event_duration:long | message:keyword +sample_data_ts_nanos | 2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1 +sample_data | 2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1 +sample_data_ts_nanos | 2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error +sample_data | 2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error +sample_data_ts_nanos | 2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error +sample_data | 2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error +sample_data_ts_nanos | 2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error +sample_data | 2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error +sample_data_ts_nanos | 2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected +sample_data | 2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected +sample_data_ts_nanos | 2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2 +sample_data | 2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2 +sample_data_ts_nanos | 2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3 +sample_data | 2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3 +; + + multiIndexTsNanosRenameToNanosWithFiltering required_capability: to_date_nanos required_capability: date_nanos_binary_comparison From 094a81510c65e9ddd294137c369a716f707c1482 Mon Sep 17 00:00:00 2001 From: David Turner Date: Tue, 26 Nov 2024 20:05:26 +0000 Subject: [PATCH 020/139] Add `@UpdateForV9` annotations to `PutStoredScriptRequest` (#117582) We can remove some fields from `PutStoredScriptRequest` once the v9.0 transport protocol can deviate from the v8.last one. This commit adds reminder annotations to do this. Relates #117566 --- .../storedscripts/PutStoredScriptRequest.java | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/storedscripts/PutStoredScriptRequest.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/storedscripts/PutStoredScriptRequest.java index 8e453cd5bac3a..c3bdfc5a594c0 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/storedscripts/PutStoredScriptRequest.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/storedscripts/PutStoredScriptRequest.java @@ -18,6 +18,7 @@ import org.elasticsearch.common.xcontent.XContentHelper; import org.elasticsearch.core.Nullable; import org.elasticsearch.core.TimeValue; +import org.elasticsearch.core.UpdateForV9; import org.elasticsearch.script.StoredScriptSource; import org.elasticsearch.xcontent.ToXContentFragment; import org.elasticsearch.xcontent.XContentBuilder; @@ -36,8 +37,20 @@ public class PutStoredScriptRequest extends AcknowledgedRequest Date: Tue, 26 Nov 2024 20:05:45 +0000 Subject: [PATCH 021/139] Add `@UpdateForV10` annotation to `allow_insecure_settings` (#117571) This hasn't really been necessary since reloadable secure settings landed in 7.0. It's been deprecated for a long time and the last known user has agreed to stop using it in v9. This commit adds a reminder to drop this functionality entirely in v10. --- .../java/org/elasticsearch/common/settings/SecureSetting.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/server/src/main/java/org/elasticsearch/common/settings/SecureSetting.java b/server/src/main/java/org/elasticsearch/common/settings/SecureSetting.java index 3d4f0d2d9dbf7..64fe57b3ea373 100644 --- a/server/src/main/java/org/elasticsearch/common/settings/SecureSetting.java +++ b/server/src/main/java/org/elasticsearch/common/settings/SecureSetting.java @@ -11,6 +11,7 @@ import org.elasticsearch.common.util.ArrayUtils; import org.elasticsearch.core.Booleans; +import org.elasticsearch.core.UpdateForV10; import java.io.InputStream; import java.security.GeneralSecurityException; @@ -26,6 +27,7 @@ public abstract class SecureSetting extends Setting { /** Determines whether legacy settings with sensitive values should be allowed. */ + @UpdateForV10(owner = UpdateForV10.Owner.DISTRIBUTED_COORDINATION) // this should no longer be in use, even in v9, so can go away in v10 private static final boolean ALLOW_INSECURE_SETTINGS = Booleans.parseBoolean(System.getProperty("es.allow_insecure_settings", "false")); private static final Set ALLOWED_PROPERTIES = EnumSet.of( From 2e9ef4059fd049f45e67325a1ebfb79ef2d78561 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine <58790826+elasticsearchmachine@users.noreply.github.com> Date: Wed, 27 Nov 2024 08:28:36 +1100 Subject: [PATCH 022/139] Mute org.elasticsearch.reservedstate.service.FileSettingsServiceTests testStopWorksInMiddleOfProcessing #117591 --- muted-tests.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/muted-tests.yml b/muted-tests.yml index 1f092de410f8e..a54520fa66adf 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -243,6 +243,9 @@ tests: - class: org.elasticsearch.backwards.MixedClusterClientYamlTestSuiteIT method: test {p0=synonyms/90_synonyms_reloading_for_synset/Reload analyzers for specific synonym set} issue: https://github.com/elastic/elasticsearch/issues/116777 +- class: org.elasticsearch.reservedstate.service.FileSettingsServiceTests + method: testStopWorksInMiddleOfProcessing + issue: https://github.com/elastic/elasticsearch/issues/117591 # Examples: # From 82be243b648f9fe61705f8caa31f931ad0c95d9c Mon Sep 17 00:00:00 2001 From: Mark Vieira Date: Tue, 26 Nov 2024 14:54:31 -0800 Subject: [PATCH 023/139] Refactor preview feature task to better support composite builds (#117594) --- .../src/main/groovy/elasticsearch.ide.gradle | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/build-tools-internal/src/main/groovy/elasticsearch.ide.gradle b/build-tools-internal/src/main/groovy/elasticsearch.ide.gradle index 9237c3ae8918c..895cca2af7967 100644 --- a/build-tools-internal/src/main/groovy/elasticsearch.ide.gradle +++ b/build-tools-internal/src/main/groovy/elasticsearch.ide.gradle @@ -142,13 +142,18 @@ if (providers.systemProperty('idea.active').getOrNull() == 'true') { description = 'Enables preview features on native library module' dependsOn tasks.named("enableExternalConfiguration") - doLast { - ['main', 'test'].each { sourceSet -> - modifyXml(".idea/modules/libs/native/elasticsearch.libs.native.${sourceSet}.iml") { xml -> - xml.component.find { it.'@name' == 'NewModuleRootManager' }?.'@LANGUAGE_LEVEL' = 'JDK_21_PREVIEW' + ext { + enablePreview = { moduleFile, languageLevel -> + modifyXml(moduleFile) { xml -> + xml.component.find { it.'@name' == 'NewModuleRootManager' }?.'@LANGUAGE_LEVEL' = languageLevel } } } + + doLast { + enablePreview('.idea/modules/libs/native/elasticsearch.libs.native.main.iml', 'JDK_21_PREVIEW') + enablePreview('.idea/modules/libs/native/elasticsearch.libs.native.test.iml', 'JDK_21_PREVIEW') + } } tasks.register('buildDependencyArtifacts') { From 433a00c0ee70ee285987f7ee9125be791bb22b86 Mon Sep 17 00:00:00 2001 From: Max Hniebergall <137079448+maxhniebergall@users.noreply.github.com> Date: Tue, 26 Nov 2024 18:00:19 -0500 Subject: [PATCH 024/139] [ML] Fix for Deberta tokenizer when input sequence exceeds 512 tokens (#117595) * Add test and fix * Update docs/changelog/117595.yaml * Remove test which wasn't working --- docs/changelog/117595.yaml | 5 +++ .../nlp/tokenizers/NlpTokenizer.java | 23 ++++++++++++++ .../nlp/TextSimilarityProcessorTests.java | 31 +++++++++++++++++++ .../tokenizers/DebertaV2TokenizerTests.java | 4 +-- 4 files changed, 61 insertions(+), 2 deletions(-) create mode 100644 docs/changelog/117595.yaml diff --git a/docs/changelog/117595.yaml b/docs/changelog/117595.yaml new file mode 100644 index 0000000000000..9360c372ac97e --- /dev/null +++ b/docs/changelog/117595.yaml @@ -0,0 +1,5 @@ +pr: 117595 +summary: Fix for Deberta tokenizer when input sequence exceeds 512 tokens +area: Machine Learning +type: bug +issues: [] diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/nlp/tokenizers/NlpTokenizer.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/nlp/tokenizers/NlpTokenizer.java index 0b4a5b651d8d4..930dbee304790 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/nlp/tokenizers/NlpTokenizer.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/nlp/tokenizers/NlpTokenizer.java @@ -331,6 +331,29 @@ public List tokenize(String seq1, String seq2, Tokeni tokenIdsSeq2 = tokenIdsSeq2.subList(0, maxSequenceLength() - extraTokens - tokenIdsSeq1.size()); tokenPositionMapSeq2 = tokenPositionMapSeq2.subList(0, maxSequenceLength() - extraTokens - tokenIdsSeq1.size()); } + case BALANCED -> { + isTruncated = true; + int firstSequenceLength = 0; + + if (tokenIdsSeq2.size() > (maxSequenceLength() - getNumExtraTokensForSeqPair()) / 2) { + firstSequenceLength = min(tokenIdsSeq1.size(), (maxSequenceLength() - getNumExtraTokensForSeqPair()) / 2); + } else { + firstSequenceLength = min( + tokenIdsSeq1.size(), + maxSequenceLength() - tokenIdsSeq2.size() - getNumExtraTokensForSeqPair() + ); + } + int secondSequenceLength = min( + tokenIdsSeq2.size(), + maxSequenceLength() - firstSequenceLength - getNumExtraTokensForSeqPair() + ); + + tokenIdsSeq1 = tokenIdsSeq1.subList(0, firstSequenceLength); + tokenPositionMapSeq1 = tokenPositionMapSeq1.subList(0, firstSequenceLength); + + tokenIdsSeq2 = tokenIdsSeq2.subList(0, secondSequenceLength); + tokenPositionMapSeq2 = tokenPositionMapSeq2.subList(0, secondSequenceLength); + } case NONE -> throw ExceptionsHelper.badRequestException( "Input too large. The tokenized input length [{}] exceeds the maximum sequence length [{}]", numTokens, diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/nlp/TextSimilarityProcessorTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/nlp/TextSimilarityProcessorTests.java index 3590793b81abd..7460e17055a00 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/nlp/TextSimilarityProcessorTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/nlp/TextSimilarityProcessorTests.java @@ -10,11 +10,13 @@ import org.elasticsearch.test.ESTestCase; import org.elasticsearch.xpack.core.ml.inference.results.TextSimilarityInferenceResults; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.BertTokenization; +import org.elasticsearch.xpack.core.ml.inference.trainedmodel.DebertaV2Tokenization; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.TextSimilarityConfig; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.Tokenization; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.VocabularyConfig; import org.elasticsearch.xpack.ml.inference.nlp.tokenizers.BertTokenizationResult; import org.elasticsearch.xpack.ml.inference.nlp.tokenizers.BertTokenizer; +import org.elasticsearch.xpack.ml.inference.nlp.tokenizers.DebertaV2Tokenizer; import org.elasticsearch.xpack.ml.inference.nlp.tokenizers.TokenizationResult; import org.elasticsearch.xpack.ml.inference.pytorch.results.PyTorchInferenceResult; @@ -22,6 +24,8 @@ import java.util.List; import static org.elasticsearch.xpack.ml.inference.nlp.tokenizers.BertTokenizerTests.TEST_CASED_VOCAB; +import static org.elasticsearch.xpack.ml.inference.nlp.tokenizers.DebertaV2TokenizerTests.TEST_CASE_SCORES; +import static org.elasticsearch.xpack.ml.inference.nlp.tokenizers.DebertaV2TokenizerTests.TEST_CASE_VOCAB; import static org.hamcrest.Matchers.closeTo; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.is; @@ -62,6 +66,33 @@ public void testProcessor() throws IOException { assertThat(result.predictedValue(), closeTo(42, 1e-6)); } + public void testBalancedTruncationWithLongInput() throws IOException { + String question = "Is Elasticsearch scalable?"; + StringBuilder longInputBuilder = new StringBuilder(); + for (int i = 0; i < 1000; i++) { + longInputBuilder.append(TEST_CASE_VOCAB.get(randomIntBetween(0, TEST_CASE_VOCAB.size() - 1))).append(i).append(" "); + } + String longInput = longInputBuilder.toString().trim(); + + DebertaV2Tokenization tokenization = new DebertaV2Tokenization(false, true, null, Tokenization.Truncate.BALANCED, -1); + DebertaV2Tokenizer tokenizer = DebertaV2Tokenizer.builder(TEST_CASE_VOCAB, TEST_CASE_SCORES, tokenization).build(); + TextSimilarityConfig textSimilarityConfig = new TextSimilarityConfig( + question, + new VocabularyConfig(""), + tokenization, + "result", + TextSimilarityConfig.SpanScoreFunction.MAX + ); + TextSimilarityProcessor processor = new TextSimilarityProcessor(tokenizer); + TokenizationResult tokenizationResult = processor.getRequestBuilder(textSimilarityConfig) + .buildRequest(List.of(longInput), "1", Tokenization.Truncate.BALANCED, -1, null) + .tokenization(); + + // Assert that the tokenization result is as expected + assertThat(tokenizationResult.anyTruncated(), is(true)); + assertThat(tokenizationResult.getTokenization(0).tokenIds().length, equalTo(512)); + } + public void testResultFunctions() { BertTokenization tokenization = new BertTokenization(false, true, 384, Tokenization.Truncate.NONE, 128); BertTokenizer tokenizer = BertTokenizer.builder(TEST_CASED_VOCAB, tokenization).build(); diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/nlp/tokenizers/DebertaV2TokenizerTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/nlp/tokenizers/DebertaV2TokenizerTests.java index a8461de8630ae..fc070ec25dc68 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/nlp/tokenizers/DebertaV2TokenizerTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/nlp/tokenizers/DebertaV2TokenizerTests.java @@ -23,7 +23,7 @@ public class DebertaV2TokenizerTests extends ESTestCase { - private static final List TEST_CASE_VOCAB = List.of( + public static final List TEST_CASE_VOCAB = List.of( DebertaV2Tokenizer.CLASS_TOKEN, DebertaV2Tokenizer.PAD_TOKEN, DebertaV2Tokenizer.SEPARATOR_TOKEN, @@ -48,7 +48,7 @@ public class DebertaV2TokenizerTests extends ESTestCase { "<0xAD>", "▁" ); - private static final List TEST_CASE_SCORES = List.of( + public static final List TEST_CASE_SCORES = List.of( 0.0, 0.0, 0.0, From edd9d96fdf7141840a6051ec99883e4769a13b29 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Wed, 27 Nov 2024 11:08:13 +1100 Subject: [PATCH 025/139] Add a blank line between java and javax imports (#117602) This PR updates java and javax imports layout in editconfig to be consistent with spotless --- .editorconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.editorconfig b/.editorconfig index cf4f74744d2b4..774fd201ef8d5 100644 --- a/.editorconfig +++ b/.editorconfig @@ -209,7 +209,7 @@ indent_size = 4 max_line_length = 140 ij_java_class_count_to_use_import_on_demand = 999 ij_java_names_count_to_use_import_on_demand = 999 -ij_java_imports_layout = *,|,com.**,|,org.**,|,java.**,javax.**,|,$* +ij_java_imports_layout = *,|,com.**,|,org.**,|,java.**,|,javax.**,|,$* [*.json] indent_size = 2 From c5d155ec2b7f60ca68a75be784e1eae90e5ddf2f Mon Sep 17 00:00:00 2001 From: Mark Vieira Date: Tue, 26 Nov 2024 16:17:40 -0800 Subject: [PATCH 026/139] Increase test cluster node startup timeout (#117603) --- .../elasticsearch/gradle/testclusters/ElasticsearchNode.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build-tools/src/main/java/org/elasticsearch/gradle/testclusters/ElasticsearchNode.java b/build-tools/src/main/java/org/elasticsearch/gradle/testclusters/ElasticsearchNode.java index 90162591cfcef..4cb67e249b0b0 100644 --- a/build-tools/src/main/java/org/elasticsearch/gradle/testclusters/ElasticsearchNode.java +++ b/build-tools/src/main/java/org/elasticsearch/gradle/testclusters/ElasticsearchNode.java @@ -98,7 +98,7 @@ public class ElasticsearchNode implements TestClusterConfiguration { private static final int ES_DESTROY_TIMEOUT = 20; private static final TimeUnit ES_DESTROY_TIMEOUT_UNIT = TimeUnit.SECONDS; - private static final int NODE_UP_TIMEOUT = 2; + private static final int NODE_UP_TIMEOUT = 3; private static final TimeUnit NODE_UP_TIMEOUT_UNIT = TimeUnit.MINUTES; private static final int ADDITIONAL_CONFIG_TIMEOUT = 15; private static final TimeUnit ADDITIONAL_CONFIG_TIMEOUT_UNIT = TimeUnit.SECONDS; From e7a9dcb180f9f12ccdf876eaa427b86ca873715d Mon Sep 17 00:00:00 2001 From: elasticsearchmachine <58790826+elasticsearchmachine@users.noreply.github.com> Date: Wed, 27 Nov 2024 16:48:12 +1100 Subject: [PATCH 027/139] Mute org.elasticsearch.repositories.s3.RepositoryS3ClientYamlTestSuiteIT org.elasticsearch.repositories.s3.RepositoryS3ClientYamlTestSuiteIT #117596 --- muted-tests.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/muted-tests.yml b/muted-tests.yml index a54520fa66adf..c97e46375c597 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -246,6 +246,8 @@ tests: - class: org.elasticsearch.reservedstate.service.FileSettingsServiceTests method: testStopWorksInMiddleOfProcessing issue: https://github.com/elastic/elasticsearch/issues/117591 +- class: org.elasticsearch.repositories.s3.RepositoryS3ClientYamlTestSuiteIT + issue: https://github.com/elastic/elasticsearch/issues/117596 # Examples: # From 1988bf10880749cef8a3d554c098eea4d8e4870b Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Wed, 27 Nov 2024 07:38:33 +0100 Subject: [PATCH 028/139] Add has_custom_cutoff_date to logsdb usage. (#117550) Indicates whether es.mapping.synthetic_source_fallback_to_stored_source.cutoff_date_restricted_override system property has been configured. A follow up from #116647 --- .../org/elasticsearch/TransportVersions.java | 2 + .../application/LogsDBFeatureSetUsage.java | 23 ++++++++-- .../logsdb/qa/with-custom-cutoff/build.gradle | 19 ++++++++ .../xpack/logsdb/LogsdbWithBasicRestIT.java | 45 +++++++++++++++++++ .../logsdb/LogsDBUsageTransportAction.java | 8 +++- .../logsdb/SyntheticSourceLicenseService.java | 5 +-- 6 files changed, 94 insertions(+), 8 deletions(-) create mode 100644 x-pack/plugin/logsdb/qa/with-custom-cutoff/build.gradle create mode 100644 x-pack/plugin/logsdb/qa/with-custom-cutoff/src/javaRestTest/java/org/elasticsearch/xpack/logsdb/LogsdbWithBasicRestIT.java diff --git a/server/src/main/java/org/elasticsearch/TransportVersions.java b/server/src/main/java/org/elasticsearch/TransportVersions.java index 6567f48d6c232..dda7d7e5d4c4c 100644 --- a/server/src/main/java/org/elasticsearch/TransportVersions.java +++ b/server/src/main/java/org/elasticsearch/TransportVersions.java @@ -204,9 +204,11 @@ static TransportVersion def(int id) { public static final TransportVersion FAST_REFRESH_RCO_2 = def(8_795_00_0); public static final TransportVersion ESQL_ENRICH_RUNTIME_WARNINGS = def(8_796_00_0); public static final TransportVersion INGEST_PIPELINE_CONFIGURATION_AS_MAP = def(8_797_00_0); + public static final TransportVersion LOGSDB_TELEMETRY_CUSTOM_CUTOFF_DATE_FIX_8_17 = def(8_797_00_1); public static final TransportVersion INDEXING_PRESSURE_THROTTLING_STATS = def(8_798_00_0); public static final TransportVersion REINDEX_DATA_STREAMS = def(8_799_00_0); public static final TransportVersion ESQL_REMOVE_NODE_LEVEL_PLAN = def(8_800_00_0); + public static final TransportVersion LOGSDB_TELEMETRY_CUSTOM_CUTOFF_DATE = def(8_801_00_0); /* * STOP! READ THIS FIRST! No, really, * ____ _____ ___ ____ _ ____ _____ _ ____ _____ _ _ ___ ____ _____ ___ ____ ____ _____ _ diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/application/LogsDBFeatureSetUsage.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/application/LogsDBFeatureSetUsage.java index 2758ef73a98da..b32e95c5fc9d8 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/application/LogsDBFeatureSetUsage.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/application/LogsDBFeatureSetUsage.java @@ -22,6 +22,7 @@ public final class LogsDBFeatureSetUsage extends XPackFeatureUsage { private final int indicesWithSyntheticSource; private final long numDocs; private final long sizeInBytes; + private final boolean hasCustomCutoffDate; public LogsDBFeatureSetUsage(StreamInput input) throws IOException { super(input); @@ -34,6 +35,13 @@ public LogsDBFeatureSetUsage(StreamInput input) throws IOException { numDocs = 0; sizeInBytes = 0; } + var transportVersion = input.getTransportVersion(); + if (transportVersion.isPatchFrom(TransportVersions.LOGSDB_TELEMETRY_CUSTOM_CUTOFF_DATE_FIX_8_17) + || transportVersion.onOrAfter(TransportVersions.LOGSDB_TELEMETRY_CUSTOM_CUTOFF_DATE)) { + hasCustomCutoffDate = input.readBoolean(); + } else { + hasCustomCutoffDate = false; + } } @Override @@ -45,6 +53,11 @@ public void writeTo(StreamOutput out) throws IOException { out.writeVLong(numDocs); out.writeVLong(sizeInBytes); } + var transportVersion = out.getTransportVersion(); + if (transportVersion.isPatchFrom(TransportVersions.LOGSDB_TELEMETRY_CUSTOM_CUTOFF_DATE_FIX_8_17) + || transportVersion.onOrAfter(TransportVersions.LOGSDB_TELEMETRY_CUSTOM_CUTOFF_DATE)) { + out.writeBoolean(hasCustomCutoffDate); + } } public LogsDBFeatureSetUsage( @@ -53,13 +66,15 @@ public LogsDBFeatureSetUsage( int indicesCount, int indicesWithSyntheticSource, long numDocs, - long sizeInBytes + long sizeInBytes, + boolean hasCustomCutoffDate ) { super(XPackField.LOGSDB, available, enabled); this.indicesCount = indicesCount; this.indicesWithSyntheticSource = indicesWithSyntheticSource; this.numDocs = numDocs; this.sizeInBytes = sizeInBytes; + this.hasCustomCutoffDate = hasCustomCutoffDate; } @Override @@ -74,11 +89,12 @@ protected void innerXContent(XContentBuilder builder, Params params) throws IOEx builder.field("indices_with_synthetic_source", indicesWithSyntheticSource); builder.field("num_docs", numDocs); builder.field("size_in_bytes", sizeInBytes); + builder.field("has_custom_cutoff_date", hasCustomCutoffDate); } @Override public int hashCode() { - return Objects.hash(available, enabled, indicesCount, indicesWithSyntheticSource, numDocs, sizeInBytes); + return Objects.hash(available, enabled, indicesCount, indicesWithSyntheticSource, numDocs, sizeInBytes, hasCustomCutoffDate); } @Override @@ -95,6 +111,7 @@ public boolean equals(Object obj) { && Objects.equals(indicesCount, other.indicesCount) && Objects.equals(indicesWithSyntheticSource, other.indicesWithSyntheticSource) && Objects.equals(numDocs, other.numDocs) - && Objects.equals(sizeInBytes, other.sizeInBytes); + && Objects.equals(sizeInBytes, other.sizeInBytes) + && Objects.equals(hasCustomCutoffDate, other.hasCustomCutoffDate); } } diff --git a/x-pack/plugin/logsdb/qa/with-custom-cutoff/build.gradle b/x-pack/plugin/logsdb/qa/with-custom-cutoff/build.gradle new file mode 100644 index 0000000000000..9729ac9c29cef --- /dev/null +++ b/x-pack/plugin/logsdb/qa/with-custom-cutoff/build.gradle @@ -0,0 +1,19 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +apply plugin: 'elasticsearch.internal-java-rest-test' + +dependencies { + javaRestTestImplementation(testArtifact(project(xpackModule('core')))) +} + +tasks.named("javaRestTest").configure { + // This test cluster is using a BASIC license and FIPS 140 mode is not supported in BASIC + buildParams.withFipsEnabledOnly(it) + + usesDefaultDistribution() +} diff --git a/x-pack/plugin/logsdb/qa/with-custom-cutoff/src/javaRestTest/java/org/elasticsearch/xpack/logsdb/LogsdbWithBasicRestIT.java b/x-pack/plugin/logsdb/qa/with-custom-cutoff/src/javaRestTest/java/org/elasticsearch/xpack/logsdb/LogsdbWithBasicRestIT.java new file mode 100644 index 0000000000000..3266e2e6e4757 --- /dev/null +++ b/x-pack/plugin/logsdb/qa/with-custom-cutoff/src/javaRestTest/java/org/elasticsearch/xpack/logsdb/LogsdbWithBasicRestIT.java @@ -0,0 +1,45 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.logsdb; + +import org.elasticsearch.test.cluster.ElasticsearchCluster; +import org.elasticsearch.test.cluster.local.distribution.DistributionType; +import org.elasticsearch.test.rest.ESRestTestCase; +import org.hamcrest.Matchers; +import org.junit.ClassRule; + +import java.io.IOException; +import java.util.Map; + +public class LogsdbWithBasicRestIT extends ESRestTestCase { + + @ClassRule + public static ElasticsearchCluster cluster = ElasticsearchCluster.local() + .distribution(DistributionType.DEFAULT) + .systemProperty("es.mapping.synthetic_source_fallback_to_stored_source.cutoff_date_restricted_override", "2027-12-31T23:59") + .setting("xpack.security.enabled", "false") + .setting("cluster.logsdb.enabled", "true") + .build(); + + @Override + protected String getTestRestCluster() { + return cluster.getHttpAddresses(); + } + + public void testCustomCutoffDateUsage() throws IOException { + var response = getAsMap("/_xpack/usage"); + Map usage = (Map) response.get("logsdb"); + assertThat(usage, Matchers.hasEntry("available", true)); + assertThat(usage, Matchers.hasEntry("enabled", true)); + assertThat(usage, Matchers.hasEntry("indices_count", 0)); + assertThat(usage, Matchers.hasEntry("indices_with_synthetic_source", 0)); + assertThat(usage, Matchers.hasEntry("num_docs", 0)); + assertThat(usage, Matchers.hasEntry("size_in_bytes", 0)); + assertThat(usage, Matchers.hasEntry("has_custom_cutoff_date", true)); + } +} diff --git a/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/LogsDBUsageTransportAction.java b/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/LogsDBUsageTransportAction.java index 62e1eef3e0e97..f4fa2a29d79a0 100644 --- a/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/LogsDBUsageTransportAction.java +++ b/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/LogsDBUsageTransportAction.java @@ -77,6 +77,7 @@ protected void masterOperation( } } final boolean enabled = LogsDBPlugin.CLUSTER_LOGSDB_ENABLED.get(clusterService.getSettings()); + final boolean hasCustomCutoffDate = System.getProperty(SyntheticSourceLicenseService.CUTOFF_DATE_SYS_PROP_NAME) != null; if (featureService.clusterHasFeature(state, XPackFeatures.LOGSDB_TELMETRY_STATS)) { final DiscoveryNode[] nodes = state.nodes().getDataNodes().values().toArray(DiscoveryNode[]::new); final var statsRequest = new IndexModeStatsActionType.StatsRequest(nodes); @@ -91,13 +92,16 @@ protected void masterOperation( finalNumIndices, finalNumIndicesWithSyntheticSources, indexStats.numDocs(), - indexStats.numBytes() + indexStats.numBytes(), + hasCustomCutoffDate ) ); })); } else { listener.onResponse( - new XPackUsageFeatureResponse(new LogsDBFeatureSetUsage(true, enabled, numIndices, numIndicesWithSyntheticSources, 0L, 0L)) + new XPackUsageFeatureResponse( + new LogsDBFeatureSetUsage(true, enabled, numIndices, numIndicesWithSyntheticSources, 0L, 0L, hasCustomCutoffDate) + ) ); } } diff --git a/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/SyntheticSourceLicenseService.java b/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/SyntheticSourceLicenseService.java index 1b3513f15a86a..71de2f7909835 100644 --- a/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/SyntheticSourceLicenseService.java +++ b/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/SyntheticSourceLicenseService.java @@ -27,8 +27,7 @@ final class SyntheticSourceLicenseService { static final String MAPPINGS_FEATURE_FAMILY = "mappings"; // You can only override this property if you received explicit approval from Elastic. - private static final String CUTOFF_DATE_SYS_PROP_NAME = - "es.mapping.synthetic_source_fallback_to_stored_source.cutoff_date_restricted_override"; + static final String CUTOFF_DATE_SYS_PROP_NAME = "es.mapping.synthetic_source_fallback_to_stored_source.cutoff_date_restricted_override"; private static final Logger LOGGER = LogManager.getLogger(SyntheticSourceLicenseService.class); static final long DEFAULT_CUTOFF_DATE = LocalDateTime.of(2024, 12, 12, 0, 0).toInstant(ZoneOffset.UTC).toEpochMilli(); @@ -129,7 +128,7 @@ private static long getCutoffDate(String cutoffDateAsString) { LOGGER.info( "Configuring [{}] to [{}]", CUTOFF_DATE_SYS_PROP_NAME, - LocalDateTime.ofInstant(Instant.ofEpochSecond(cutoffDate), ZoneOffset.UTC) + LocalDateTime.ofInstant(Instant.ofEpochMilli(cutoffDate), ZoneOffset.UTC) ); return cutoffDate; } else { From ef8ffc5ada043b1f71052cdd919b5ee419472c1a Mon Sep 17 00:00:00 2001 From: "elastic-renovate-prod[bot]" <174716857+elastic-renovate-prod[bot]@users.noreply.github.com> Date: Wed, 27 Nov 2024 08:13:43 +0100 Subject: [PATCH 029/139] Update docker.elastic.co/wolfi/chainguard-base:latest Docker digest to 32f06b1 (#117564) Co-authored-by: elastic-renovate-prod[bot] <174716857+elastic-renovate-prod[bot]@users.noreply.github.com> --- .../main/java/org/elasticsearch/gradle/internal/DockerBase.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/DockerBase.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/DockerBase.java index 71e968557cefe..0fb75b59b6096 100644 --- a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/DockerBase.java +++ b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/DockerBase.java @@ -22,7 +22,7 @@ public enum DockerBase { // Chainguard based wolfi image with latest jdk // This is usually updated via renovatebot // spotless:off - WOLFI("docker.elastic.co/wolfi/chainguard-base:latest@sha256:55b297da5151d2a2997e8ab9729fe1304e4869389d7090ab7031cc29530f69f8", + WOLFI("docker.elastic.co/wolfi/chainguard-base:latest@sha256:32f06b169bb4b0f257fbb10e8c8379f06d3ee1355c89b3327cb623781a29590e", "-wolfi", "apk" ), From 6130fbb0ea012b29f94a62df1d39abfcda247555 Mon Sep 17 00:00:00 2001 From: David Turner Date: Wed, 27 Nov 2024 08:17:42 +0000 Subject: [PATCH 030/139] Implement lifecycle on `SimulatePipelineRequest` (#117585) Rather than releasing the REST request body after computing the response, we can link the lifecycles of the REST and transport requests and release the REST request body sooner. Not that we expect these bodies to be particularly large in this case, but still it's a better pattern to follow. --- .../ingest/geoip/GeoIpDownloaderIT.java | 4 +-- .../elasticsearch/ingest/IngestClientIT.java | 3 +- .../ingest/SimulatePipelineRequest.java | 31 ++++++++++++++++--- .../SimulatePipelineRequestBuilder.java | 3 +- .../ingest/RestSimulatePipelineAction.java | 8 ++--- .../ingest/SimulatePipelineRequestTests.java | 9 ++---- .../ingest/IngestPipelineTestUtils.java | 16 ++++++++++ .../xpack/enrich/EnrichProcessorIT.java | 11 +++---- .../license/MachineLearningLicensingIT.java | 18 +++-------- .../TransportPreviewTransformAction.java | 6 +++- 10 files changed, 68 insertions(+), 41 deletions(-) diff --git a/modules/ingest-geoip/src/internalClusterTest/java/org/elasticsearch/ingest/geoip/GeoIpDownloaderIT.java b/modules/ingest-geoip/src/internalClusterTest/java/org/elasticsearch/ingest/geoip/GeoIpDownloaderIT.java index f8c8d2bd359f3..dd177fed5732a 100644 --- a/modules/ingest-geoip/src/internalClusterTest/java/org/elasticsearch/ingest/geoip/GeoIpDownloaderIT.java +++ b/modules/ingest-geoip/src/internalClusterTest/java/org/elasticsearch/ingest/geoip/GeoIpDownloaderIT.java @@ -41,7 +41,6 @@ import org.elasticsearch.search.sort.SortOrder; import org.elasticsearch.test.junit.annotations.TestLogging; import org.elasticsearch.xcontent.XContentBuilder; -import org.elasticsearch.xcontent.XContentType; import org.elasticsearch.xcontent.json.JsonXContent; import org.junit.After; @@ -67,6 +66,7 @@ import java.util.zip.GZIPInputStream; import static org.elasticsearch.ingest.ConfigurationUtils.readStringProperty; +import static org.elasticsearch.ingest.IngestPipelineTestUtils.jsonSimulatePipelineRequest; import static org.elasticsearch.ingest.geoip.GeoIpTestUtils.copyDefaultDatabases; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertResponse; @@ -494,7 +494,7 @@ private SimulateDocumentBaseResult simulatePipeline() throws IOException { builder.endObject(); bytes = BytesReference.bytes(builder); } - SimulatePipelineRequest simulateRequest = new SimulatePipelineRequest(bytes, XContentType.JSON); + SimulatePipelineRequest simulateRequest = jsonSimulatePipelineRequest(bytes); simulateRequest.setId("_id"); // Avoid executing on a coordinating only node, because databases are not available there and geoip processor won't do any lookups. // (some test seeds repeatedly hit such nodes causing failures) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/ingest/IngestClientIT.java b/server/src/internalClusterTest/java/org/elasticsearch/ingest/IngestClientIT.java index c25ce822f8755..81a39dbe1f9f7 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/ingest/IngestClientIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/ingest/IngestClientIT.java @@ -37,6 +37,7 @@ import java.util.List; import java.util.Map; +import static org.elasticsearch.ingest.IngestPipelineTestUtils.jsonSimulatePipelineRequest; import static org.elasticsearch.ingest.IngestPipelineTestUtils.putJsonPipelineRequest; import static org.elasticsearch.test.NodeRoles.nonIngestNode; import static org.elasticsearch.xcontent.XContentFactory.jsonBuilder; @@ -97,7 +98,7 @@ public void testSimulate() throws Exception { if (randomBoolean()) { response = clusterAdmin().prepareSimulatePipeline(bytes, XContentType.JSON).setId("_id").get(); } else { - SimulatePipelineRequest request = new SimulatePipelineRequest(bytes, XContentType.JSON); + SimulatePipelineRequest request = jsonSimulatePipelineRequest(bytes); request.setId("_id"); response = clusterAdmin().simulatePipeline(request).get(); } diff --git a/server/src/main/java/org/elasticsearch/action/ingest/SimulatePipelineRequest.java b/server/src/main/java/org/elasticsearch/action/ingest/SimulatePipelineRequest.java index 9cfc441490859..d6a2d81fdb7d3 100644 --- a/server/src/main/java/org/elasticsearch/action/ingest/SimulatePipelineRequest.java +++ b/server/src/main/java/org/elasticsearch/action/ingest/SimulatePipelineRequest.java @@ -12,6 +12,7 @@ import org.elasticsearch.action.ActionRequest; import org.elasticsearch.action.ActionRequestValidationException; import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.bytes.ReleasableBytesReference; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.logging.DeprecationLogger; @@ -41,19 +42,20 @@ public class SimulatePipelineRequest extends ActionRequest implements ToXContent private static final DeprecationLogger deprecationLogger = DeprecationLogger.getLogger(SimulatePipelineRequest.class); private String id; private boolean verbose; - private final BytesReference source; + private final ReleasableBytesReference source; private final XContentType xContentType; private RestApiVersion restApiVersion; /** * Creates a new request with the given source and its content type */ - public SimulatePipelineRequest(BytesReference source, XContentType xContentType) { + public SimulatePipelineRequest(ReleasableBytesReference source, XContentType xContentType) { this(source, xContentType, RestApiVersion.current()); } - public SimulatePipelineRequest(BytesReference source, XContentType xContentType, RestApiVersion restApiVersion) { + public SimulatePipelineRequest(ReleasableBytesReference source, XContentType xContentType, RestApiVersion restApiVersion) { this.source = Objects.requireNonNull(source); + assert source.hasReferences(); this.xContentType = Objects.requireNonNull(xContentType); this.restApiVersion = restApiVersion; } @@ -62,7 +64,7 @@ public SimulatePipelineRequest(BytesReference source, XContentType xContentType, super(in); id = in.readOptionalString(); verbose = in.readBoolean(); - source = in.readBytesReference(); + source = in.readReleasableBytesReference(); xContentType = in.readEnum(XContentType.class); } @@ -88,6 +90,7 @@ public void setVerbose(boolean verbose) { } public BytesReference getSource() { + assert source.hasReferences(); return source; } @@ -250,4 +253,24 @@ private static List parseDocs(Map config, RestAp public RestApiVersion getRestApiVersion() { return restApiVersion; } + + @Override + public final void incRef() { + source.incRef(); + } + + @Override + public final boolean tryIncRef() { + return source.tryIncRef(); + } + + @Override + public final boolean decRef() { + return source.decRef(); + } + + @Override + public final boolean hasReferences() { + return source.hasReferences(); + } } diff --git a/server/src/main/java/org/elasticsearch/action/ingest/SimulatePipelineRequestBuilder.java b/server/src/main/java/org/elasticsearch/action/ingest/SimulatePipelineRequestBuilder.java index 05e30685c6a9b..931b86d15e24b 100644 --- a/server/src/main/java/org/elasticsearch/action/ingest/SimulatePipelineRequestBuilder.java +++ b/server/src/main/java/org/elasticsearch/action/ingest/SimulatePipelineRequestBuilder.java @@ -12,6 +12,7 @@ import org.elasticsearch.action.ActionRequestBuilder; import org.elasticsearch.client.internal.ElasticsearchClient; import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.bytes.ReleasableBytesReference; import org.elasticsearch.xcontent.XContentType; public class SimulatePipelineRequestBuilder extends ActionRequestBuilder { @@ -20,7 +21,7 @@ public class SimulatePipelineRequestBuilder extends ActionRequestBuilder sourceTuple = restRequest.contentOrSourceParam(); - var content = sourceTuple.v2(); - SimulatePipelineRequest request = new SimulatePipelineRequest(sourceTuple.v2(), sourceTuple.v1(), restRequest.getRestApiVersion()); + final var request = new SimulatePipelineRequest(sourceTuple.v2(), sourceTuple.v1(), restRequest.getRestApiVersion()); request.setId(restRequest.param("id")); request.setVerbose(restRequest.paramAsBoolean("verbose", false)); - return channel -> client.admin() - .cluster() - .simulatePipeline(request, ActionListener.withRef(new RestToXContentListener<>(channel), content)); + return channel -> client.admin().cluster().simulatePipeline(request, new RestToXContentListener<>(channel)); } } diff --git a/server/src/test/java/org/elasticsearch/action/ingest/SimulatePipelineRequestTests.java b/server/src/test/java/org/elasticsearch/action/ingest/SimulatePipelineRequestTests.java index 58ff9ec421889..983c2e7d65032 100644 --- a/server/src/test/java/org/elasticsearch/action/ingest/SimulatePipelineRequestTests.java +++ b/server/src/test/java/org/elasticsearch/action/ingest/SimulatePipelineRequestTests.java @@ -16,14 +16,14 @@ import org.elasticsearch.xcontent.XContentType; import java.io.IOException; -import java.nio.charset.StandardCharsets; +import static org.elasticsearch.ingest.IngestPipelineTestUtils.jsonSimulatePipelineRequest; import static org.hamcrest.CoreMatchers.equalTo; public class SimulatePipelineRequestTests extends ESTestCase { public void testSerialization() throws IOException { - SimulatePipelineRequest request = new SimulatePipelineRequest(new BytesArray(""), XContentType.JSON); + SimulatePipelineRequest request = jsonSimulatePipelineRequest(new BytesArray("")); // Sometimes we set an id if (randomBoolean()) { request.setId(randomAlphaOfLengthBetween(1, 10)); @@ -44,10 +44,7 @@ public void testSerialization() throws IOException { } public void testSerializationWithXContent() throws IOException { - SimulatePipelineRequest request = new SimulatePipelineRequest( - new BytesArray("{}".getBytes(StandardCharsets.UTF_8)), - XContentType.JSON - ); + SimulatePipelineRequest request = jsonSimulatePipelineRequest("{}"); assertEquals(XContentType.JSON, request.getXContentType()); BytesStreamOutput output = new BytesStreamOutput(); diff --git a/test/framework/src/main/java/org/elasticsearch/ingest/IngestPipelineTestUtils.java b/test/framework/src/main/java/org/elasticsearch/ingest/IngestPipelineTestUtils.java index 8fd3c61d4c9da..9888b1eb661ff 100644 --- a/test/framework/src/main/java/org/elasticsearch/ingest/IngestPipelineTestUtils.java +++ b/test/framework/src/main/java/org/elasticsearch/ingest/IngestPipelineTestUtils.java @@ -14,11 +14,13 @@ import org.elasticsearch.action.ingest.DeletePipelineTransportAction; import org.elasticsearch.action.ingest.PutPipelineRequest; import org.elasticsearch.action.ingest.PutPipelineTransportAction; +import org.elasticsearch.action.ingest.SimulatePipelineRequest; import org.elasticsearch.action.support.master.AcknowledgedResponse; import org.elasticsearch.client.internal.ElasticsearchClient; import org.elasticsearch.common.Strings; import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.bytes.ReleasableBytesReference; import org.elasticsearch.logging.LogManager; import org.elasticsearch.logging.Logger; import org.elasticsearch.test.ESTestCase; @@ -124,4 +126,18 @@ public void onFailure(Exception e) { ); } } + + /** + * Construct a new {@link SimulatePipelineRequest} whose content is the given JSON document, represented as a {@link String}. + */ + public static SimulatePipelineRequest jsonSimulatePipelineRequest(String jsonString) { + return jsonSimulatePipelineRequest(new BytesArray(jsonString)); + } + + /** + * Construct a new {@link SimulatePipelineRequest} whose content is the given JSON document, represented as a {@link BytesReference}. + */ + public static SimulatePipelineRequest jsonSimulatePipelineRequest(BytesReference jsonBytes) { + return new SimulatePipelineRequest(ReleasableBytesReference.wrap(jsonBytes), XContentType.JSON); + } } diff --git a/x-pack/plugin/enrich/src/internalClusterTest/java/org/elasticsearch/xpack/enrich/EnrichProcessorIT.java b/x-pack/plugin/enrich/src/internalClusterTest/java/org/elasticsearch/xpack/enrich/EnrichProcessorIT.java index d646aed11d7d9..5fc16034465d4 100644 --- a/x-pack/plugin/enrich/src/internalClusterTest/java/org/elasticsearch/xpack/enrich/EnrichProcessorIT.java +++ b/x-pack/plugin/enrich/src/internalClusterTest/java/org/elasticsearch/xpack/enrich/EnrichProcessorIT.java @@ -9,9 +9,7 @@ import org.elasticsearch.action.index.IndexRequest; import org.elasticsearch.action.ingest.SimulateDocumentBaseResult; -import org.elasticsearch.action.ingest.SimulatePipelineRequest; import org.elasticsearch.action.support.WriteRequest; -import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.ingest.common.IngestCommonPlugin; import org.elasticsearch.plugins.Plugin; @@ -27,6 +25,7 @@ import java.util.Collection; import java.util.List; +import static org.elasticsearch.ingest.IngestPipelineTestUtils.jsonSimulatePipelineRequest; import static org.elasticsearch.xpack.enrich.AbstractEnrichTestCase.createSourceIndices; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.nullValue; @@ -90,7 +89,7 @@ public void testEnrichCacheValuesCannotBeCorrupted() { var executePolicyRequest = new ExecuteEnrichPolicyAction.Request(TEST_REQUEST_TIMEOUT, policyName); client().execute(ExecuteEnrichPolicyAction.INSTANCE, executePolicyRequest).actionGet(); - var simulatePipelineRequest = new SimulatePipelineRequest(new BytesArray(""" + var simulatePipelineRequest = jsonSimulatePipelineRequest(""" { "pipeline": { "processors": [ @@ -119,7 +118,7 @@ public void testEnrichCacheValuesCannotBeCorrupted() { } ] } - """), XContentType.JSON); + """); var response = clusterAdmin().simulatePipeline(simulatePipelineRequest).actionGet(); var result = (SimulateDocumentBaseResult) response.getResults().get(0); assertThat(result.getFailure(), nullValue()); @@ -132,7 +131,7 @@ public void testEnrichCacheValuesCannotBeCorrupted() { assertThat(statsResponse.getCacheStats().get(0).misses(), equalTo(1L)); assertThat(statsResponse.getCacheStats().get(0).hits(), equalTo(0L)); - simulatePipelineRequest = new SimulatePipelineRequest(new BytesArray(""" + simulatePipelineRequest = jsonSimulatePipelineRequest(""" { "pipeline": { "processors": [ @@ -155,7 +154,7 @@ public void testEnrichCacheValuesCannotBeCorrupted() { } ] } - """), XContentType.JSON); + """); response = clusterAdmin().simulatePipeline(simulatePipelineRequest).actionGet(); result = (SimulateDocumentBaseResult) response.getResults().get(0); assertThat(result.getFailure(), nullValue()); diff --git a/x-pack/plugin/ml/src/internalClusterTest/java/org/elasticsearch/license/MachineLearningLicensingIT.java b/x-pack/plugin/ml/src/internalClusterTest/java/org/elasticsearch/license/MachineLearningLicensingIT.java index 08d09f70cb46b..479fb20650b18 100644 --- a/x-pack/plugin/ml/src/internalClusterTest/java/org/elasticsearch/license/MachineLearningLicensingIT.java +++ b/x-pack/plugin/ml/src/internalClusterTest/java/org/elasticsearch/license/MachineLearningLicensingIT.java @@ -11,14 +11,12 @@ import org.elasticsearch.action.index.IndexRequest; import org.elasticsearch.action.ingest.SimulateDocumentBaseResult; import org.elasticsearch.action.ingest.SimulatePipelineAction; -import org.elasticsearch.action.ingest.SimulatePipelineRequest; import org.elasticsearch.action.ingest.SimulatePipelineResponse; import org.elasticsearch.action.search.SearchRequest; import org.elasticsearch.action.support.PlainActionFuture; import org.elasticsearch.action.support.WriteRequest; import org.elasticsearch.action.support.master.AcknowledgedResponse; import org.elasticsearch.cluster.ClusterState; -import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.core.Strings; import org.elasticsearch.core.TimeValue; @@ -61,13 +59,13 @@ import org.elasticsearch.xpack.ml.support.BaseMlIntegTestCase; import org.junit.Before; -import java.nio.charset.StandardCharsets; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; +import static org.elasticsearch.ingest.IngestPipelineTestUtils.jsonSimulatePipelineRequest; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.empty; import static org.hamcrest.Matchers.hasItem; @@ -541,11 +539,7 @@ public void testMachineLearningCreateInferenceProcessorRestricted() { }}] }""", pipeline); PlainActionFuture simulatePipelineListener = new PlainActionFuture<>(); - client().execute( - SimulatePipelineAction.INSTANCE, - new SimulatePipelineRequest(new BytesArray(simulateSource.getBytes(StandardCharsets.UTF_8)), XContentType.JSON), - simulatePipelineListener - ); + client().execute(SimulatePipelineAction.INSTANCE, jsonSimulatePipelineRequest(simulateSource), simulatePipelineListener); assertThat(simulatePipelineListener.actionGet().getResults(), is(not(empty()))); @@ -575,7 +569,7 @@ public void testMachineLearningCreateInferenceProcessorRestricted() { // Simulating the pipeline should fail SimulateDocumentBaseResult simulateResponse = (SimulateDocumentBaseResult) client().execute( SimulatePipelineAction.INSTANCE, - new SimulatePipelineRequest(new BytesArray(simulateSource.getBytes(StandardCharsets.UTF_8)), XContentType.JSON) + jsonSimulatePipelineRequest(simulateSource) ).actionGet().getResults().get(0); assertThat(simulateResponse.getFailure(), is(not(nullValue()))); assertThat((simulateResponse.getFailure()).getCause(), is(instanceOf(ElasticsearchSecurityException.class))); @@ -588,11 +582,7 @@ public void testMachineLearningCreateInferenceProcessorRestricted() { putJsonPipeline("test_infer_license_pipeline", pipeline); PlainActionFuture simulatePipelineListenerNewLicense = new PlainActionFuture<>(); - client().execute( - SimulatePipelineAction.INSTANCE, - new SimulatePipelineRequest(new BytesArray(simulateSource.getBytes(StandardCharsets.UTF_8)), XContentType.JSON), - simulatePipelineListenerNewLicense - ); + client().execute(SimulatePipelineAction.INSTANCE, jsonSimulatePipelineRequest(simulateSource), simulatePipelineListenerNewLicense); assertThat(simulatePipelineListenerNewLicense.actionGet().getResults(), is(not(empty()))); diff --git a/x-pack/plugin/transform/src/main/java/org/elasticsearch/xpack/transform/action/TransportPreviewTransformAction.java b/x-pack/plugin/transform/src/main/java/org/elasticsearch/xpack/transform/action/TransportPreviewTransformAction.java index 36237d2705205..60f00da195974 100644 --- a/x-pack/plugin/transform/src/main/java/org/elasticsearch/xpack/transform/action/TransportPreviewTransformAction.java +++ b/x-pack/plugin/transform/src/main/java/org/elasticsearch/xpack/transform/action/TransportPreviewTransformAction.java @@ -21,6 +21,7 @@ import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.bytes.ReleasableBytesReference; import org.elasticsearch.common.logging.HeaderWarning; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.util.concurrent.EsExecutors; @@ -282,7 +283,10 @@ private void getPreview( builder.startObject(); builder.field("docs", results); builder.endObject(); - var pipelineRequest = new SimulatePipelineRequest(BytesReference.bytes(builder), XContentType.JSON); + var pipelineRequest = new SimulatePipelineRequest( + ReleasableBytesReference.wrap(BytesReference.bytes(builder)), + XContentType.JSON + ); pipelineRequest.setId(pipeline); parentTaskClient.execute(SimulatePipelineAction.INSTANCE, pipelineRequest, pipelineResponseActionListener); } From c11e3c22991d39a95b71e992024d80d8eb677419 Mon Sep 17 00:00:00 2001 From: David Turner Date: Wed, 27 Nov 2024 08:18:54 +0000 Subject: [PATCH 031/139] Log shard `completed snapshot` message at `TRACE` (#117569) This message is on the happy path, no need to log it at `DEBUG`. Relates ES-8773 --- .../org/elasticsearch/snapshots/SnapshotShardsService.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotShardsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotShardsService.java index 7b2066f243771..234c0239a68ce 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotShardsService.java +++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotShardsService.java @@ -425,9 +425,9 @@ public void onResponse(ShardSnapshotResult shardSnapshotResult) { final ShardGeneration newGeneration = shardSnapshotResult.getGeneration(); assert newGeneration != null; assert newGeneration.equals(snapshotStatus.generation()); - if (logger.isDebugEnabled()) { + if (logger.isTraceEnabled()) { final IndexShardSnapshotStatus.Copy lastSnapshotStatus = snapshotStatus.asCopy(); - logger.debug( + logger.trace( "[{}][{}] completed snapshot to [{}] with status [{}] at generation [{}]", shardId, snapshot, From 04dd9c22dae13e7a5ab67e8c3ea4b8228784f21a Mon Sep 17 00:00:00 2001 From: Iraklis Psaroudakis Date: Wed, 27 Nov 2024 12:10:22 +0200 Subject: [PATCH 032/139] Make fast refresh ineffective for search routing (#117455) Re-introduction of ES PR #114619. Now, fast refresh indices route searches/gets to search shards in stateless. Thus, this PR removes unnecessary code and simplifies some things. Relates ES-9563 --- ...ansportUnpromotableShardRefreshAction.java | 15 --------- .../action/get/TransportGetAction.java | 12 +++---- .../get/TransportShardMultiGetAction.java | 13 +++----- .../cluster/routing/IndexRoutingTable.java | 2 +- .../cluster/routing/OperationRouting.java | 19 +---------- .../cluster/routing/ShardRouting.java | 3 +- .../routing/IndexRoutingTableTests.java | 33 +++---------------- 7 files changed, 15 insertions(+), 82 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/action/admin/indices/refresh/TransportUnpromotableShardRefreshAction.java b/server/src/main/java/org/elasticsearch/action/admin/indices/refresh/TransportUnpromotableShardRefreshAction.java index 4458c008babcd..6c24ec2d17604 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/indices/refresh/TransportUnpromotableShardRefreshAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/indices/refresh/TransportUnpromotableShardRefreshAction.java @@ -24,9 +24,6 @@ import java.util.List; -import static org.elasticsearch.TransportVersions.FAST_REFRESH_RCO_2; -import static org.elasticsearch.index.IndexSettings.INDEX_FAST_REFRESH_SETTING; - public class TransportUnpromotableShardRefreshAction extends TransportBroadcastUnpromotableAction< UnpromotableShardRefreshRequest, ActionResponse.Empty> { @@ -76,18 +73,6 @@ protected void unpromotableShardOperation( return; } - // During an upgrade to FAST_REFRESH_RCO_2, we expect search shards to be first upgraded before the primary is upgraded. Thus, - // when the primary is upgraded, and starts to deliver unpromotable refreshes, we expect the search shards to be upgraded already. - // Note that the fast refresh setting is final. - // TODO: remove assertion (ES-9563) - assert INDEX_FAST_REFRESH_SETTING.get(shard.indexSettings().getSettings()) == false - || transportService.getLocalNodeConnection().getTransportVersion().onOrAfter(FAST_REFRESH_RCO_2) - : "attempted to refresh a fast refresh search shard " - + shard - + " on transport version " - + transportService.getLocalNodeConnection().getTransportVersion() - + " (before FAST_REFRESH_RCO_2)"; - ActionListener.run(responseListener, listener -> { shard.waitForPrimaryTermAndGeneration( request.getPrimaryTerm(), diff --git a/server/src/main/java/org/elasticsearch/action/get/TransportGetAction.java b/server/src/main/java/org/elasticsearch/action/get/TransportGetAction.java index fb4b3907d2bfd..a2c7c8664e81a 100644 --- a/server/src/main/java/org/elasticsearch/action/get/TransportGetAction.java +++ b/server/src/main/java/org/elasticsearch/action/get/TransportGetAction.java @@ -28,9 +28,9 @@ import org.elasticsearch.cluster.ClusterStateObserver; import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver; import org.elasticsearch.cluster.node.DiscoveryNode; -import org.elasticsearch.cluster.routing.OperationRouting; import org.elasticsearch.cluster.routing.PlainShardIterator; import org.elasticsearch.cluster.routing.ShardIterator; +import org.elasticsearch.cluster.routing.ShardRouting; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.io.stream.Writeable; import org.elasticsearch.core.TimeValue; @@ -109,10 +109,7 @@ protected ShardIterator shards(ClusterState state, InternalRequest request) { if (iterator == null) { return null; } - return new PlainShardIterator( - iterator.shardId(), - iterator.getShardRoutings().stream().filter(shardRouting -> OperationRouting.canSearchShard(shardRouting, state)).toList() - ); + return new PlainShardIterator(iterator.shardId(), iterator.getShardRoutings().stream().filter(ShardRouting::isSearchable).toList()); } @Override @@ -129,9 +126,8 @@ protected void asyncShardOperation(GetRequest request, ShardId shardId, ActionLi handleGetOnUnpromotableShard(request, indexShard, listener); return; } - // TODO: adapt assertion to assert only that it is not stateless (ES-9563) - assert DiscoveryNode.isStateless(clusterService.getSettings()) == false || indexShard.indexSettings().isFastRefresh() - : "in Stateless a promotable to primary shard can receive a TransportGetAction only if an index has the fast refresh setting"; + assert DiscoveryNode.isStateless(clusterService.getSettings()) == false + : "in Stateless a promotable to primary shard should not receive a TransportGetAction"; if (request.realtime()) { // we are not tied to a refresh cycle here anyway asyncGet(request, shardId, listener); } else { diff --git a/server/src/main/java/org/elasticsearch/action/get/TransportShardMultiGetAction.java b/server/src/main/java/org/elasticsearch/action/get/TransportShardMultiGetAction.java index 93e1b18ec64c6..0fa770df8e4ef 100644 --- a/server/src/main/java/org/elasticsearch/action/get/TransportShardMultiGetAction.java +++ b/server/src/main/java/org/elasticsearch/action/get/TransportShardMultiGetAction.java @@ -28,9 +28,9 @@ import org.elasticsearch.cluster.ClusterStateObserver; import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver; import org.elasticsearch.cluster.node.DiscoveryNode; -import org.elasticsearch.cluster.routing.OperationRouting; import org.elasticsearch.cluster.routing.PlainShardIterator; import org.elasticsearch.cluster.routing.ShardIterator; +import org.elasticsearch.cluster.routing.ShardRouting; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.io.stream.Writeable; import org.elasticsearch.core.TimeValue; @@ -113,10 +113,7 @@ protected ShardIterator shards(ClusterState state, InternalRequest request) { if (iterator == null) { return null; } - return new PlainShardIterator( - iterator.shardId(), - iterator.getShardRoutings().stream().filter(shardRouting -> OperationRouting.canSearchShard(shardRouting, state)).toList() - ); + return new PlainShardIterator(iterator.shardId(), iterator.getShardRoutings().stream().filter(ShardRouting::isSearchable).toList()); } @Override @@ -128,10 +125,8 @@ protected void asyncShardOperation(MultiGetShardRequest request, ShardId shardId handleMultiGetOnUnpromotableShard(request, indexShard, listener); return; } - // TODO: adapt assertion to assert only that it is not stateless (ES-9563) - assert DiscoveryNode.isStateless(clusterService.getSettings()) == false || indexShard.indexSettings().isFastRefresh() - : "in Stateless a promotable to primary shard can receive a TransportShardMultiGetAction only if an index has " - + "the fast refresh setting"; + assert DiscoveryNode.isStateless(clusterService.getSettings()) == false + : "in Stateless a promotable to primary shard should not receive a TransportShardMultiGetAction"; if (request.realtime()) { // we are not tied to a refresh cycle here anyway asyncShardMultiGet(request, shardId, listener); } else { diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/IndexRoutingTable.java b/server/src/main/java/org/elasticsearch/cluster/routing/IndexRoutingTable.java index 7cb0e457e36c7..bcacf21fcedbf 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/IndexRoutingTable.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/IndexRoutingTable.java @@ -241,7 +241,7 @@ public boolean readyForSearch(ClusterState clusterState) { boolean found = false; for (int idx = 0; idx < shardRoutingTable.size(); idx++) { ShardRouting shardRouting = shardRoutingTable.shard(idx); - if (shardRouting.active() && OperationRouting.canSearchShard(shardRouting, clusterState)) { + if (shardRouting.active() && shardRouting.isSearchable()) { found = true; break; } diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/OperationRouting.java b/server/src/main/java/org/elasticsearch/cluster/routing/OperationRouting.java index 13fc874f52e9f..5e2dbf1c5df5d 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/OperationRouting.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/OperationRouting.java @@ -32,9 +32,6 @@ import java.util.Set; import java.util.stream.Collectors; -import static org.elasticsearch.TransportVersions.FAST_REFRESH_RCO_2; -import static org.elasticsearch.index.IndexSettings.INDEX_FAST_REFRESH_SETTING; - public class OperationRouting { public static final Setting USE_ADAPTIVE_REPLICA_SELECTION_SETTING = Setting.boolSetting( @@ -151,7 +148,7 @@ private static List statefulShardsThatHandleSearches(ShardIterator } private static List statelessShardsThatHandleSearches(ClusterState clusterState, ShardIterator iterator) { - return iterator.getShardRoutings().stream().filter(shardRouting -> canSearchShard(shardRouting, clusterState)).toList(); + return iterator.getShardRoutings().stream().filter(ShardRouting::isSearchable).toList(); } public static ShardIterator getShards(ClusterState clusterState, ShardId shardId) { @@ -304,18 +301,4 @@ public ShardId shardId(ClusterState clusterState, String index, String id, @Null IndexMetadata indexMetadata = indexMetadata(clusterState, index); return new ShardId(indexMetadata.getIndex(), IndexRouting.fromIndexMetadata(indexMetadata).getShard(id, routing)); } - - public static boolean canSearchShard(ShardRouting shardRouting, ClusterState clusterState) { - // TODO: remove if and always return isSearchable (ES-9563) - if (INDEX_FAST_REFRESH_SETTING.get(clusterState.metadata().index(shardRouting.index()).getSettings())) { - // Until all the cluster is upgraded, we send searches/gets to the primary (even if it has been upgraded) to execute locally. - if (clusterState.getMinTransportVersion().onOrAfter(FAST_REFRESH_RCO_2)) { - return shardRouting.isSearchable(); - } else { - return shardRouting.isPromotableToPrimary(); - } - } else { - return shardRouting.isSearchable(); - } - } } diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/ShardRouting.java b/server/src/main/java/org/elasticsearch/cluster/routing/ShardRouting.java index 319786b558ddd..157d28e61057c 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/ShardRouting.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/ShardRouting.java @@ -935,8 +935,7 @@ public boolean isPromotableToPrimary() { } /** - * Determine if role searchable. Consumers should prefer {@link OperationRouting#canSearchShard(ShardRouting, ClusterState)} to - * determine if a shard can be searched and {@link IndexRoutingTable#readyForSearch(ClusterState)} to determine if an index + * Determine if role searchable. Consumers should prefer {@link IndexRoutingTable#readyForSearch(ClusterState)} to determine if an index * is ready to be searched. */ public boolean isSearchable() { diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/IndexRoutingTableTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/IndexRoutingTableTests.java index e5786b1b3449e..912326162e5c4 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/IndexRoutingTableTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/IndexRoutingTableTests.java @@ -9,7 +9,6 @@ package org.elasticsearch.cluster.routing; -import org.elasticsearch.TransportVersion; import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.common.UUIDs; import org.elasticsearch.common.settings.Settings; @@ -20,7 +19,6 @@ import java.util.List; -import static org.elasticsearch.TransportVersions.FAST_REFRESH_RCO_2; import static org.elasticsearch.index.IndexSettings.INDEX_FAST_REFRESH_SETTING; import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.mock; @@ -29,21 +27,10 @@ public class IndexRoutingTableTests extends ESTestCase { public void testReadyForSearch() { - innerReadyForSearch(false, false); - innerReadyForSearch(false, true); - innerReadyForSearch(true, false); - innerReadyForSearch(true, true); - } - - // TODO: remove if (fastRefresh && beforeFastRefreshRCO) branches (ES-9563) - private void innerReadyForSearch(boolean fastRefresh, boolean beforeFastRefreshRCO) { Index index = new Index(randomIdentifier(), UUIDs.randomBase64UUID()); ClusterState clusterState = mock(ClusterState.class, Mockito.RETURNS_DEEP_STUBS); when(clusterState.metadata().index(any(Index.class)).getSettings()).thenReturn( - Settings.builder().put(INDEX_FAST_REFRESH_SETTING.getKey(), fastRefresh).build() - ); - when(clusterState.getMinTransportVersion()).thenReturn( - beforeFastRefreshRCO ? TransportVersion.fromId(FAST_REFRESH_RCO_2.id() - 1_00_0) : TransportVersion.current() + Settings.builder().put(INDEX_FAST_REFRESH_SETTING.getKey(), randomBoolean()).build() ); // 2 primaries that are search and index ShardId p1 = new ShardId(index, 0); @@ -63,11 +50,7 @@ private void innerReadyForSearch(boolean fastRefresh, boolean beforeFastRefreshR shardTable1 = new IndexShardRoutingTable(p1, List.of(getShard(p1, true, ShardRoutingState.STARTED, ShardRouting.Role.INDEX_ONLY))); shardTable2 = new IndexShardRoutingTable(p2, List.of(getShard(p2, true, ShardRoutingState.STARTED, ShardRouting.Role.INDEX_ONLY))); indexRoutingTable = new IndexRoutingTable(index, new IndexShardRoutingTable[] { shardTable1, shardTable2 }); - if (fastRefresh && beforeFastRefreshRCO) { - assertTrue(indexRoutingTable.readyForSearch(clusterState)); - } else { - assertFalse(indexRoutingTable.readyForSearch(clusterState)); - } + assertFalse(indexRoutingTable.readyForSearch(clusterState)); // 2 unassigned primaries that are index only shardTable1 = new IndexShardRoutingTable( @@ -99,11 +82,7 @@ private void innerReadyForSearch(boolean fastRefresh, boolean beforeFastRefreshR ) ); indexRoutingTable = new IndexRoutingTable(index, new IndexShardRoutingTable[] { shardTable1, shardTable2 }); - if (fastRefresh && beforeFastRefreshRCO) { - assertTrue(indexRoutingTable.readyForSearch(clusterState)); - } else { - assertFalse(indexRoutingTable.readyForSearch(clusterState)); - } + assertFalse(indexRoutingTable.readyForSearch(clusterState)); // 2 primaries that are index only with some replicas that are all available shardTable1 = new IndexShardRoutingTable( @@ -143,11 +122,7 @@ private void innerReadyForSearch(boolean fastRefresh, boolean beforeFastRefreshR ) ); indexRoutingTable = new IndexRoutingTable(index, new IndexShardRoutingTable[] { shardTable1, shardTable2 }); - if (fastRefresh && beforeFastRefreshRCO) { - assertFalse(indexRoutingTable.readyForSearch(clusterState)); - } else { - assertTrue(indexRoutingTable.readyForSearch(clusterState)); - } + assertTrue(indexRoutingTable.readyForSearch(clusterState)); // 2 primaries that are index only with at least 1 replica per primary that is available shardTable1 = new IndexShardRoutingTable( From d7737e73065dd30da18c409616d242ee7f30ff3e Mon Sep 17 00:00:00 2001 From: Shamil Date: Wed, 27 Nov 2024 13:17:34 +0300 Subject: [PATCH 033/139] [ML] Remove ChunkingOptions parameter (#117235) --- docs/changelog/117235.yaml | 5 +++++ .../inference/ChunkingOptions.java | 19 ------------------- .../inference/InferenceService.java | 6 ------ .../TestDenseInferenceServiceExtension.java | 2 -- .../mock/TestRerankingServiceExtension.java | 2 -- .../TestSparseInferenceServiceExtension.java | 2 -- ...stStreamingCompletionServiceExtension.java | 2 -- .../ShardBulkInferenceActionFilter.java | 12 +----------- .../inference/services/SenderService.java | 5 +---- .../AlibabaCloudSearchService.java | 2 -- .../amazonbedrock/AmazonBedrockService.java | 2 -- .../services/anthropic/AnthropicService.java | 2 -- .../azureaistudio/AzureAiStudioService.java | 2 -- .../azureopenai/AzureOpenAiService.java | 2 -- .../services/cohere/CohereService.java | 2 -- .../elastic/ElasticInferenceService.java | 2 -- .../ElasticsearchInternalService.java | 5 +---- .../googleaistudio/GoogleAiStudioService.java | 2 -- .../googlevertexai/GoogleVertexAiService.java | 2 -- .../huggingface/HuggingFaceService.java | 2 -- .../elser/HuggingFaceElserService.java | 2 -- .../ibmwatsonx/IbmWatsonxService.java | 2 -- .../services/mistral/MistralService.java | 2 -- .../services/openai/OpenAiService.java | 2 -- .../ShardBulkInferenceActionFilterTests.java | 4 ++-- .../services/SenderServiceTests.java | 2 -- .../AlibabaCloudSearchServiceTests.java | 13 +------------ .../AmazonBedrockServiceTests.java | 2 -- .../AzureAiStudioServiceTests.java | 2 -- .../azureopenai/AzureOpenAiServiceTests.java | 2 -- .../services/cohere/CohereServiceTests.java | 3 --- .../elastic/ElasticInferenceServiceTests.java | 2 -- .../ElasticsearchInternalServiceTests.java | 8 -------- .../GoogleAiStudioServiceTests.java | 12 +----------- .../HuggingFaceElserServiceTests.java | 2 -- .../huggingface/HuggingFaceServiceTests.java | 3 --- .../ibmwatsonx/IbmWatsonxServiceTests.java | 12 +----------- .../services/mistral/MistralServiceTests.java | 2 -- .../services/openai/OpenAiServiceTests.java | 2 -- 39 files changed, 13 insertions(+), 146 deletions(-) create mode 100644 docs/changelog/117235.yaml delete mode 100644 server/src/main/java/org/elasticsearch/inference/ChunkingOptions.java diff --git a/docs/changelog/117235.yaml b/docs/changelog/117235.yaml new file mode 100644 index 0000000000000..dbf0b4cc18388 --- /dev/null +++ b/docs/changelog/117235.yaml @@ -0,0 +1,5 @@ +pr: 117235 +summary: "Deprecate `ChunkingOptions` parameter" +area: ES|QL +type: enhancement +issues: [] diff --git a/server/src/main/java/org/elasticsearch/inference/ChunkingOptions.java b/server/src/main/java/org/elasticsearch/inference/ChunkingOptions.java deleted file mode 100644 index 5953e2cb44ebf..0000000000000 --- a/server/src/main/java/org/elasticsearch/inference/ChunkingOptions.java +++ /dev/null @@ -1,19 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the "Elastic License - * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side - * Public License v 1"; you may not use this file except in compliance with, at - * your election, the "Elastic License 2.0", the "GNU Affero General Public - * License v3.0 only", or the "Server Side Public License, v 1". - */ - -package org.elasticsearch.inference; - -import org.elasticsearch.core.Nullable; - -public record ChunkingOptions(@Nullable Integer windowSize, @Nullable Integer span) { - - public boolean settingsArePresent() { - return windowSize != null || span != null; - } -} diff --git a/server/src/main/java/org/elasticsearch/inference/InferenceService.java b/server/src/main/java/org/elasticsearch/inference/InferenceService.java index c6e09f61befa0..4497254aad1f0 100644 --- a/server/src/main/java/org/elasticsearch/inference/InferenceService.java +++ b/server/src/main/java/org/elasticsearch/inference/InferenceService.java @@ -112,16 +112,11 @@ void infer( ); /** - * Chunk long text according to {@code chunkingOptions} or the - * model defaults if {@code chunkingOptions} contains unset - * values. - * * @param model The model * @param query Inference query, mainly for re-ranking * @param input Inference input * @param taskSettings Settings in the request to override the model's defaults * @param inputType For search, ingest etc - * @param chunkingOptions The window and span options to apply * @param timeout The timeout for the request * @param listener Chunked Inference result listener */ @@ -131,7 +126,6 @@ void chunkedInfer( List input, Map taskSettings, InputType inputType, - ChunkingOptions chunkingOptions, TimeValue timeout, ActionListener> listener ); diff --git a/x-pack/plugin/inference/qa/test-service-plugin/src/main/java/org/elasticsearch/xpack/inference/mock/TestDenseInferenceServiceExtension.java b/x-pack/plugin/inference/qa/test-service-plugin/src/main/java/org/elasticsearch/xpack/inference/mock/TestDenseInferenceServiceExtension.java index 2ddc4f6c3e2f6..ae11a02d312e2 100644 --- a/x-pack/plugin/inference/qa/test-service-plugin/src/main/java/org/elasticsearch/xpack/inference/mock/TestDenseInferenceServiceExtension.java +++ b/x-pack/plugin/inference/qa/test-service-plugin/src/main/java/org/elasticsearch/xpack/inference/mock/TestDenseInferenceServiceExtension.java @@ -18,7 +18,6 @@ import org.elasticsearch.core.TimeValue; import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; import org.elasticsearch.inference.ChunkedInferenceServiceResults; -import org.elasticsearch.inference.ChunkingOptions; import org.elasticsearch.inference.EmptySettingsConfiguration; import org.elasticsearch.inference.InferenceServiceConfiguration; import org.elasticsearch.inference.InferenceServiceExtension; @@ -140,7 +139,6 @@ public void chunkedInfer( List input, Map taskSettings, InputType inputType, - ChunkingOptions chunkingOptions, TimeValue timeout, ActionListener> listener ) { diff --git a/x-pack/plugin/inference/qa/test-service-plugin/src/main/java/org/elasticsearch/xpack/inference/mock/TestRerankingServiceExtension.java b/x-pack/plugin/inference/qa/test-service-plugin/src/main/java/org/elasticsearch/xpack/inference/mock/TestRerankingServiceExtension.java index 2075c1b1924bf..9320571572f0a 100644 --- a/x-pack/plugin/inference/qa/test-service-plugin/src/main/java/org/elasticsearch/xpack/inference/mock/TestRerankingServiceExtension.java +++ b/x-pack/plugin/inference/qa/test-service-plugin/src/main/java/org/elasticsearch/xpack/inference/mock/TestRerankingServiceExtension.java @@ -17,7 +17,6 @@ import org.elasticsearch.core.Nullable; import org.elasticsearch.core.TimeValue; import org.elasticsearch.inference.ChunkedInferenceServiceResults; -import org.elasticsearch.inference.ChunkingOptions; import org.elasticsearch.inference.EmptySettingsConfiguration; import org.elasticsearch.inference.InferenceServiceConfiguration; import org.elasticsearch.inference.InferenceServiceExtension; @@ -128,7 +127,6 @@ public void chunkedInfer( List input, Map taskSettings, InputType inputType, - ChunkingOptions chunkingOptions, TimeValue timeout, ActionListener> listener ) { diff --git a/x-pack/plugin/inference/qa/test-service-plugin/src/main/java/org/elasticsearch/xpack/inference/mock/TestSparseInferenceServiceExtension.java b/x-pack/plugin/inference/qa/test-service-plugin/src/main/java/org/elasticsearch/xpack/inference/mock/TestSparseInferenceServiceExtension.java index 3d6f0ce6eba05..fe0223cce0323 100644 --- a/x-pack/plugin/inference/qa/test-service-plugin/src/main/java/org/elasticsearch/xpack/inference/mock/TestSparseInferenceServiceExtension.java +++ b/x-pack/plugin/inference/qa/test-service-plugin/src/main/java/org/elasticsearch/xpack/inference/mock/TestSparseInferenceServiceExtension.java @@ -17,7 +17,6 @@ import org.elasticsearch.core.Nullable; import org.elasticsearch.core.TimeValue; import org.elasticsearch.inference.ChunkedInferenceServiceResults; -import org.elasticsearch.inference.ChunkingOptions; import org.elasticsearch.inference.EmptySettingsConfiguration; import org.elasticsearch.inference.InferenceServiceConfiguration; import org.elasticsearch.inference.InferenceServiceExtension; @@ -131,7 +130,6 @@ public void chunkedInfer( List input, Map taskSettings, InputType inputType, - ChunkingOptions chunkingOptions, TimeValue timeout, ActionListener> listener ) { diff --git a/x-pack/plugin/inference/qa/test-service-plugin/src/main/java/org/elasticsearch/xpack/inference/mock/TestStreamingCompletionServiceExtension.java b/x-pack/plugin/inference/qa/test-service-plugin/src/main/java/org/elasticsearch/xpack/inference/mock/TestStreamingCompletionServiceExtension.java index 595b92a6be66b..6d7983bc8cb53 100644 --- a/x-pack/plugin/inference/qa/test-service-plugin/src/main/java/org/elasticsearch/xpack/inference/mock/TestStreamingCompletionServiceExtension.java +++ b/x-pack/plugin/inference/qa/test-service-plugin/src/main/java/org/elasticsearch/xpack/inference/mock/TestStreamingCompletionServiceExtension.java @@ -19,7 +19,6 @@ import org.elasticsearch.common.xcontent.ChunkedToXContentHelper; import org.elasticsearch.core.TimeValue; import org.elasticsearch.inference.ChunkedInferenceServiceResults; -import org.elasticsearch.inference.ChunkingOptions; import org.elasticsearch.inference.EmptySettingsConfiguration; import org.elasticsearch.inference.InferenceServiceConfiguration; import org.elasticsearch.inference.InferenceServiceExtension; @@ -160,7 +159,6 @@ public void chunkedInfer( List input, Map taskSettings, InputType inputType, - ChunkingOptions chunkingOptions, TimeValue timeout, ActionListener> listener ) { diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilter.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilter.java index dd59230e575c4..d178e927aa65d 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilter.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilter.java @@ -30,7 +30,6 @@ import org.elasticsearch.core.Releasable; import org.elasticsearch.core.TimeValue; import org.elasticsearch.inference.ChunkedInferenceServiceResults; -import org.elasticsearch.inference.ChunkingOptions; import org.elasticsearch.inference.InferenceService; import org.elasticsearch.inference.InferenceServiceRegistry; import org.elasticsearch.inference.InputType; @@ -337,16 +336,7 @@ private void onFinish() { } }; inferenceProvider.service() - .chunkedInfer( - inferenceProvider.model(), - null, - inputs, - Map.of(), - InputType.INGEST, - new ChunkingOptions(null, null), - TimeValue.MAX_VALUE, - completionListener - ); + .chunkedInfer(inferenceProvider.model(), null, inputs, Map.of(), InputType.INGEST, TimeValue.MAX_VALUE, completionListener); } private FieldInferenceResponseAccumulator ensureResponseAccumulatorSlot(int id) { diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/SenderService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/SenderService.java index b8a99227cf517..8e2dac1ef9db2 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/SenderService.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/SenderService.java @@ -12,7 +12,6 @@ import org.elasticsearch.core.Nullable; import org.elasticsearch.core.TimeValue; import org.elasticsearch.inference.ChunkedInferenceServiceResults; -import org.elasticsearch.inference.ChunkingOptions; import org.elasticsearch.inference.InferenceService; import org.elasticsearch.inference.InferenceServiceResults; import org.elasticsearch.inference.InputType; @@ -76,13 +75,12 @@ public void chunkedInfer( List input, Map taskSettings, InputType inputType, - ChunkingOptions chunkingOptions, TimeValue timeout, ActionListener> listener ) { init(); // a non-null query is not supported and is dropped by all providers - doChunkedInfer(model, new DocumentsOnlyInput(input), taskSettings, inputType, chunkingOptions, timeout, listener); + doChunkedInfer(model, new DocumentsOnlyInput(input), taskSettings, inputType, timeout, listener); } protected abstract void doInfer( @@ -99,7 +97,6 @@ protected abstract void doChunkedInfer( DocumentsOnlyInput inputs, Map taskSettings, InputType inputType, - ChunkingOptions chunkingOptions, TimeValue timeout, ActionListener> listener ); diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/alibabacloudsearch/AlibabaCloudSearchService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/alibabacloudsearch/AlibabaCloudSearchService.java index 6d77663f49ece..d7ac7caed7efc 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/alibabacloudsearch/AlibabaCloudSearchService.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/alibabacloudsearch/AlibabaCloudSearchService.java @@ -15,7 +15,6 @@ import org.elasticsearch.core.Nullable; import org.elasticsearch.core.TimeValue; import org.elasticsearch.inference.ChunkedInferenceServiceResults; -import org.elasticsearch.inference.ChunkingOptions; import org.elasticsearch.inference.ChunkingSettings; import org.elasticsearch.inference.EmptySettingsConfiguration; import org.elasticsearch.inference.InferenceServiceConfiguration; @@ -289,7 +288,6 @@ protected void doChunkedInfer( DocumentsOnlyInput inputs, Map taskSettings, InputType inputType, - ChunkingOptions chunkingOptions, TimeValue timeout, ActionListener> listener ) { diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/amazonbedrock/AmazonBedrockService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/amazonbedrock/AmazonBedrockService.java index a69b9d2c70405..48b3c3df03e11 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/amazonbedrock/AmazonBedrockService.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/amazonbedrock/AmazonBedrockService.java @@ -17,7 +17,6 @@ import org.elasticsearch.core.Nullable; import org.elasticsearch.core.TimeValue; import org.elasticsearch.inference.ChunkedInferenceServiceResults; -import org.elasticsearch.inference.ChunkingOptions; import org.elasticsearch.inference.ChunkingSettings; import org.elasticsearch.inference.EmptySettingsConfiguration; import org.elasticsearch.inference.InferenceServiceConfiguration; @@ -114,7 +113,6 @@ protected void doChunkedInfer( DocumentsOnlyInput inputs, Map taskSettings, InputType inputType, - ChunkingOptions chunkingOptions, TimeValue timeout, ActionListener> listener ) { diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/anthropic/AnthropicService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/anthropic/AnthropicService.java index eba7353f2b12e..b3d503de8e3eb 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/anthropic/AnthropicService.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/anthropic/AnthropicService.java @@ -15,7 +15,6 @@ import org.elasticsearch.core.Nullable; import org.elasticsearch.core.TimeValue; import org.elasticsearch.inference.ChunkedInferenceServiceResults; -import org.elasticsearch.inference.ChunkingOptions; import org.elasticsearch.inference.EmptySettingsConfiguration; import org.elasticsearch.inference.InferenceServiceConfiguration; import org.elasticsearch.inference.InferenceServiceResults; @@ -220,7 +219,6 @@ protected void doChunkedInfer( DocumentsOnlyInput inputs, Map taskSettings, InputType inputType, - ChunkingOptions chunkingOptions, TimeValue timeout, ActionListener> listener ) { diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/azureaistudio/AzureAiStudioService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/azureaistudio/AzureAiStudioService.java index a2f8dc409585e..bba331fc0b5df 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/azureaistudio/AzureAiStudioService.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/azureaistudio/AzureAiStudioService.java @@ -16,7 +16,6 @@ import org.elasticsearch.core.Nullable; import org.elasticsearch.core.TimeValue; import org.elasticsearch.inference.ChunkedInferenceServiceResults; -import org.elasticsearch.inference.ChunkingOptions; import org.elasticsearch.inference.ChunkingSettings; import org.elasticsearch.inference.EmptySettingsConfiguration; import org.elasticsearch.inference.InferenceServiceConfiguration; @@ -107,7 +106,6 @@ protected void doChunkedInfer( DocumentsOnlyInput inputs, Map taskSettings, InputType inputType, - ChunkingOptions chunkingOptions, TimeValue timeout, ActionListener> listener ) { diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/azureopenai/AzureOpenAiService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/azureopenai/AzureOpenAiService.java index 2f3a935cdf010..16c94dfa9ad94 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/azureopenai/AzureOpenAiService.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/azureopenai/AzureOpenAiService.java @@ -15,7 +15,6 @@ import org.elasticsearch.core.Nullable; import org.elasticsearch.core.TimeValue; import org.elasticsearch.inference.ChunkedInferenceServiceResults; -import org.elasticsearch.inference.ChunkingOptions; import org.elasticsearch.inference.ChunkingSettings; import org.elasticsearch.inference.EmptySettingsConfiguration; import org.elasticsearch.inference.InferenceServiceConfiguration; @@ -261,7 +260,6 @@ protected void doChunkedInfer( DocumentsOnlyInput inputs, Map taskSettings, InputType inputType, - ChunkingOptions chunkingOptions, TimeValue timeout, ActionListener> listener ) { diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/cohere/CohereService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/cohere/CohereService.java index cc67470686a02..b3d8b3b6efce3 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/cohere/CohereService.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/cohere/CohereService.java @@ -15,7 +15,6 @@ import org.elasticsearch.core.Nullable; import org.elasticsearch.core.TimeValue; import org.elasticsearch.inference.ChunkedInferenceServiceResults; -import org.elasticsearch.inference.ChunkingOptions; import org.elasticsearch.inference.ChunkingSettings; import org.elasticsearch.inference.EmptySettingsConfiguration; import org.elasticsearch.inference.InferenceServiceConfiguration; @@ -260,7 +259,6 @@ protected void doChunkedInfer( DocumentsOnlyInput inputs, Map taskSettings, InputType inputType, - ChunkingOptions chunkingOptions, TimeValue timeout, ActionListener> listener ) { diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceService.java index e7ce5903163d4..1f08c06edaa91 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceService.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceService.java @@ -16,7 +16,6 @@ import org.elasticsearch.core.Nullable; import org.elasticsearch.core.TimeValue; import org.elasticsearch.inference.ChunkedInferenceServiceResults; -import org.elasticsearch.inference.ChunkingOptions; import org.elasticsearch.inference.EmptySettingsConfiguration; import org.elasticsearch.inference.InferenceServiceConfiguration; import org.elasticsearch.inference.InferenceServiceResults; @@ -109,7 +108,6 @@ protected void doChunkedInfer( DocumentsOnlyInput inputs, Map taskSettings, InputType inputType, - ChunkingOptions chunkingOptions, TimeValue timeout, ActionListener> listener ) { diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalService.java index 6d124906d65bd..2ec3a9d629434 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalService.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalService.java @@ -19,7 +19,6 @@ import org.elasticsearch.core.Nullable; import org.elasticsearch.core.TimeValue; import org.elasticsearch.inference.ChunkedInferenceServiceResults; -import org.elasticsearch.inference.ChunkingOptions; import org.elasticsearch.inference.ChunkingSettings; import org.elasticsearch.inference.EmptySettingsConfiguration; import org.elasticsearch.inference.InferenceResults; @@ -676,11 +675,10 @@ public void chunkedInfer( List input, Map taskSettings, InputType inputType, - ChunkingOptions chunkingOptions, TimeValue timeout, ActionListener> listener ) { - chunkedInfer(model, null, input, taskSettings, inputType, chunkingOptions, timeout, listener); + chunkedInfer(model, null, input, taskSettings, inputType, timeout, listener); } @Override @@ -690,7 +688,6 @@ public void chunkedInfer( List input, Map taskSettings, InputType inputType, - ChunkingOptions chunkingOptions, TimeValue timeout, ActionListener> listener ) { diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/googleaistudio/GoogleAiStudioService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/googleaistudio/GoogleAiStudioService.java index 1c01ebbe2c0e4..57a8a66a3f3a6 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/googleaistudio/GoogleAiStudioService.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/googleaistudio/GoogleAiStudioService.java @@ -15,7 +15,6 @@ import org.elasticsearch.core.Nullable; import org.elasticsearch.core.TimeValue; import org.elasticsearch.inference.ChunkedInferenceServiceResults; -import org.elasticsearch.inference.ChunkingOptions; import org.elasticsearch.inference.ChunkingSettings; import org.elasticsearch.inference.EmptySettingsConfiguration; import org.elasticsearch.inference.InferenceServiceConfiguration; @@ -315,7 +314,6 @@ protected void doChunkedInfer( DocumentsOnlyInput inputs, Map taskSettings, InputType inputType, - ChunkingOptions chunkingOptions, TimeValue timeout, ActionListener> listener ) { diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/googlevertexai/GoogleVertexAiService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/googlevertexai/GoogleVertexAiService.java index 204593464a4ad..857d475499aae 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/googlevertexai/GoogleVertexAiService.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/googlevertexai/GoogleVertexAiService.java @@ -15,7 +15,6 @@ import org.elasticsearch.core.Nullable; import org.elasticsearch.core.TimeValue; import org.elasticsearch.inference.ChunkedInferenceServiceResults; -import org.elasticsearch.inference.ChunkingOptions; import org.elasticsearch.inference.ChunkingSettings; import org.elasticsearch.inference.EmptySettingsConfiguration; import org.elasticsearch.inference.InferenceServiceConfiguration; @@ -213,7 +212,6 @@ protected void doChunkedInfer( DocumentsOnlyInput inputs, Map taskSettings, InputType inputType, - ChunkingOptions chunkingOptions, TimeValue timeout, ActionListener> listener ) { diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/huggingface/HuggingFaceService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/huggingface/HuggingFaceService.java index eede14a975234..51cca72f26054 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/huggingface/HuggingFaceService.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/huggingface/HuggingFaceService.java @@ -15,7 +15,6 @@ import org.elasticsearch.core.Nullable; import org.elasticsearch.core.TimeValue; import org.elasticsearch.inference.ChunkedInferenceServiceResults; -import org.elasticsearch.inference.ChunkingOptions; import org.elasticsearch.inference.ChunkingSettings; import org.elasticsearch.inference.EmptySettingsConfiguration; import org.elasticsearch.inference.InferenceServiceConfiguration; @@ -116,7 +115,6 @@ protected void doChunkedInfer( DocumentsOnlyInput inputs, Map taskSettings, InputType inputType, - ChunkingOptions chunkingOptions, TimeValue timeout, ActionListener> listener ) { diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/huggingface/elser/HuggingFaceElserService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/huggingface/elser/HuggingFaceElserService.java index a2e22e24172cf..75920efa251f2 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/huggingface/elser/HuggingFaceElserService.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/huggingface/elser/HuggingFaceElserService.java @@ -16,7 +16,6 @@ import org.elasticsearch.core.Nullable; import org.elasticsearch.core.TimeValue; import org.elasticsearch.inference.ChunkedInferenceServiceResults; -import org.elasticsearch.inference.ChunkingOptions; import org.elasticsearch.inference.ChunkingSettings; import org.elasticsearch.inference.EmptySettingsConfiguration; import org.elasticsearch.inference.InferenceServiceConfiguration; @@ -88,7 +87,6 @@ protected void doChunkedInfer( DocumentsOnlyInput inputs, Map taskSettings, InputType inputType, - ChunkingOptions chunkingOptions, TimeValue timeout, ActionListener> listener ) { diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxService.java index 592900d117b39..ea263fb77a2da 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxService.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxService.java @@ -15,7 +15,6 @@ import org.elasticsearch.core.Nullable; import org.elasticsearch.core.TimeValue; import org.elasticsearch.inference.ChunkedInferenceServiceResults; -import org.elasticsearch.inference.ChunkingOptions; import org.elasticsearch.inference.ChunkingSettings; import org.elasticsearch.inference.EmptySettingsConfiguration; import org.elasticsearch.inference.InferenceServiceConfiguration; @@ -283,7 +282,6 @@ protected void doChunkedInfer( DocumentsOnlyInput input, Map taskSettings, InputType inputType, - ChunkingOptions chunkingOptions, TimeValue timeout, ActionListener> listener ) { diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mistral/MistralService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mistral/MistralService.java index 2e810c357f8bd..fe0edb851902b 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mistral/MistralService.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mistral/MistralService.java @@ -15,7 +15,6 @@ import org.elasticsearch.core.Nullable; import org.elasticsearch.core.TimeValue; import org.elasticsearch.inference.ChunkedInferenceServiceResults; -import org.elasticsearch.inference.ChunkingOptions; import org.elasticsearch.inference.ChunkingSettings; import org.elasticsearch.inference.EmptySettingsConfiguration; import org.elasticsearch.inference.InferenceServiceConfiguration; @@ -95,7 +94,6 @@ protected void doChunkedInfer( DocumentsOnlyInput inputs, Map taskSettings, InputType inputType, - ChunkingOptions chunkingOptions, TimeValue timeout, ActionListener> listener ) { diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/openai/OpenAiService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/openai/OpenAiService.java index 81ab87a461696..20ff1c617d21f 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/openai/OpenAiService.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/openai/OpenAiService.java @@ -15,7 +15,6 @@ import org.elasticsearch.core.Nullable; import org.elasticsearch.core.TimeValue; import org.elasticsearch.inference.ChunkedInferenceServiceResults; -import org.elasticsearch.inference.ChunkingOptions; import org.elasticsearch.inference.ChunkingSettings; import org.elasticsearch.inference.EmptySettingsConfiguration; import org.elasticsearch.inference.InferenceServiceConfiguration; @@ -264,7 +263,6 @@ protected void doChunkedInfer( DocumentsOnlyInput inputs, Map taskSettings, InputType inputType, - ChunkingOptions chunkingOptions, TimeValue timeout, ActionListener> listener ) { diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilterTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilterTests.java index 770e6e3cb9cf4..2416aeb62ff33 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilterTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilterTests.java @@ -291,7 +291,7 @@ private static ShardBulkInferenceActionFilter createFilter(ThreadPool threadPool StaticModel model = (StaticModel) invocationOnMock.getArguments()[0]; List inputs = (List) invocationOnMock.getArguments()[2]; ActionListener> listener = (ActionListener< - List>) invocationOnMock.getArguments()[7]; + List>) invocationOnMock.getArguments()[6]; Runnable runnable = () -> { List results = new ArrayList<>(); for (String input : inputs) { @@ -310,7 +310,7 @@ private static ShardBulkInferenceActionFilter createFilter(ThreadPool threadPool } return null; }; - doAnswer(chunkedInferAnswer).when(inferenceService).chunkedInfer(any(), any(), any(), any(), any(), any(), any(), any()); + doAnswer(chunkedInferAnswer).when(inferenceService).chunkedInfer(any(), any(), any(), any(), any(), any(), any()); Answer modelAnswer = invocationOnMock -> { String inferenceId = (String) invocationOnMock.getArguments()[0]; diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/SenderServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/SenderServiceTests.java index d8402c28cec87..47a96bf78dda1 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/SenderServiceTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/SenderServiceTests.java @@ -12,7 +12,6 @@ import org.elasticsearch.action.support.PlainActionFuture; import org.elasticsearch.core.TimeValue; import org.elasticsearch.inference.ChunkedInferenceServiceResults; -import org.elasticsearch.inference.ChunkingOptions; import org.elasticsearch.inference.EmptySettingsConfiguration; import org.elasticsearch.inference.InferenceServiceConfiguration; import org.elasticsearch.inference.InferenceServiceResults; @@ -126,7 +125,6 @@ protected void doChunkedInfer( DocumentsOnlyInput inputs, Map taskSettings, InputType inputType, - ChunkingOptions chunkingOptions, TimeValue timeout, ActionListener> listener ) { diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/alibabacloudsearch/AlibabaCloudSearchServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/alibabacloudsearch/AlibabaCloudSearchServiceTests.java index b6d29ccab9a49..a154ded395822 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/alibabacloudsearch/AlibabaCloudSearchServiceTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/alibabacloudsearch/AlibabaCloudSearchServiceTests.java @@ -16,7 +16,6 @@ import org.elasticsearch.common.xcontent.XContentHelper; import org.elasticsearch.core.TimeValue; import org.elasticsearch.inference.ChunkedInferenceServiceResults; -import org.elasticsearch.inference.ChunkingOptions; import org.elasticsearch.inference.ChunkingSettings; import org.elasticsearch.inference.InferenceServiceConfiguration; import org.elasticsearch.inference.InferenceServiceResults; @@ -401,7 +400,6 @@ public void testChunkedInfer_InvalidTaskType() throws IOException { List.of("foo", "bar"), new HashMap<>(), InputType.INGEST, - new ChunkingOptions(null, null), InferenceAction.Request.DEFAULT_TIMEOUT, listener ); @@ -420,16 +418,7 @@ private void testChunkedInfer(TaskType taskType, ChunkingSettings chunkingSettin var model = createModelForTaskType(taskType, chunkingSettings); PlainActionFuture> listener = new PlainActionFuture<>(); - service.chunkedInfer( - model, - null, - input, - new HashMap<>(), - InputType.INGEST, - new ChunkingOptions(null, null), - InferenceAction.Request.DEFAULT_TIMEOUT, - listener - ); + service.chunkedInfer(model, null, input, new HashMap<>(), InputType.INGEST, InferenceAction.Request.DEFAULT_TIMEOUT, listener); var results = listener.actionGet(TIMEOUT); assertThat(results, instanceOf(List.class)); diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/amazonbedrock/AmazonBedrockServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/amazonbedrock/AmazonBedrockServiceTests.java index e583e50075ee7..35b5642b7a60c 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/amazonbedrock/AmazonBedrockServiceTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/amazonbedrock/AmazonBedrockServiceTests.java @@ -20,7 +20,6 @@ import org.elasticsearch.common.xcontent.XContentHelper; import org.elasticsearch.core.TimeValue; import org.elasticsearch.inference.ChunkedInferenceServiceResults; -import org.elasticsearch.inference.ChunkingOptions; import org.elasticsearch.inference.ChunkingSettings; import org.elasticsearch.inference.InferenceServiceConfiguration; import org.elasticsearch.inference.InferenceServiceResults; @@ -1559,7 +1558,6 @@ private void testChunkedInfer(AmazonBedrockEmbeddingsModel model) throws IOExcep List.of("abc", "xyz"), new HashMap<>(), InputType.INGEST, - new ChunkingOptions(null, null), InferenceAction.Request.DEFAULT_TIMEOUT, listener ); diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/azureaistudio/AzureAiStudioServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/azureaistudio/AzureAiStudioServiceTests.java index 76ea7a5bde5ca..8636ba8890e87 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/azureaistudio/AzureAiStudioServiceTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/azureaistudio/AzureAiStudioServiceTests.java @@ -20,7 +20,6 @@ import org.elasticsearch.core.Nullable; import org.elasticsearch.core.TimeValue; import org.elasticsearch.inference.ChunkedInferenceServiceResults; -import org.elasticsearch.inference.ChunkingOptions; import org.elasticsearch.inference.ChunkingSettings; import org.elasticsearch.inference.InferenceServiceConfiguration; import org.elasticsearch.inference.InferenceServiceResults; @@ -1194,7 +1193,6 @@ private void testChunkedInfer(AzureAiStudioEmbeddingsModel model) throws IOExcep List.of("foo", "bar"), new HashMap<>(), InputType.INGEST, - new ChunkingOptions(null, null), InferenceAction.Request.DEFAULT_TIMEOUT, listener ); diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/azureopenai/AzureOpenAiServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/azureopenai/AzureOpenAiServiceTests.java index dc1970e26a3f8..b0c590e237a44 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/azureopenai/AzureOpenAiServiceTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/azureopenai/AzureOpenAiServiceTests.java @@ -20,7 +20,6 @@ import org.elasticsearch.common.xcontent.XContentHelper; import org.elasticsearch.core.TimeValue; import org.elasticsearch.inference.ChunkedInferenceServiceResults; -import org.elasticsearch.inference.ChunkingOptions; import org.elasticsearch.inference.ChunkingSettings; import org.elasticsearch.inference.InferenceServiceConfiguration; import org.elasticsearch.inference.InferenceServiceResults; @@ -1343,7 +1342,6 @@ private void testChunkedInfer(AzureOpenAiEmbeddingsModel model) throws IOExcepti List.of("foo", "bar"), new HashMap<>(), InputType.INGEST, - new ChunkingOptions(null, null), InferenceAction.Request.DEFAULT_TIMEOUT, listener ); diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/cohere/CohereServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/cohere/CohereServiceTests.java index 30f3b344a268c..259a32aa6254d 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/cohere/CohereServiceTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/cohere/CohereServiceTests.java @@ -21,7 +21,6 @@ import org.elasticsearch.core.TimeValue; import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; import org.elasticsearch.inference.ChunkedInferenceServiceResults; -import org.elasticsearch.inference.ChunkingOptions; import org.elasticsearch.inference.ChunkingSettings; import org.elasticsearch.inference.InferenceServiceConfiguration; import org.elasticsearch.inference.InferenceServiceResults; @@ -1451,7 +1450,6 @@ private void testChunkedInfer(CohereEmbeddingsModel model) throws IOException { List.of("foo", "bar"), new HashMap<>(), InputType.UNSPECIFIED, - new ChunkingOptions(null, null), InferenceAction.Request.DEFAULT_TIMEOUT, listener ); @@ -1543,7 +1541,6 @@ public void testChunkedInfer_BatchesCalls_Bytes() throws IOException { List.of("foo", "bar"), new HashMap<>(), InputType.UNSPECIFIED, - new ChunkingOptions(null, null), InferenceAction.Request.DEFAULT_TIMEOUT, listener ); diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceTests.java index 3767ac496d183..d3101099d06c7 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceTests.java @@ -17,7 +17,6 @@ import org.elasticsearch.common.xcontent.XContentHelper; import org.elasticsearch.core.TimeValue; import org.elasticsearch.inference.ChunkedInferenceServiceResults; -import org.elasticsearch.inference.ChunkingOptions; import org.elasticsearch.inference.EmptySecretSettings; import org.elasticsearch.inference.EmptyTaskSettings; import org.elasticsearch.inference.InferenceServiceConfiguration; @@ -461,7 +460,6 @@ public void testChunkedInfer_PassesThrough() throws IOException { List.of("input text"), new HashMap<>(), InputType.INGEST, - new ChunkingOptions(null, null), InferenceAction.Request.DEFAULT_TIMEOUT, listener ); diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalServiceTests.java index 9a4d0dda82238..306509ea60cfc 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalServiceTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalServiceTests.java @@ -25,7 +25,6 @@ import org.elasticsearch.core.TimeValue; import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; import org.elasticsearch.inference.ChunkedInferenceServiceResults; -import org.elasticsearch.inference.ChunkingOptions; import org.elasticsearch.inference.ChunkingSettings; import org.elasticsearch.inference.EmptyTaskSettings; import org.elasticsearch.inference.InferenceResults; @@ -902,7 +901,6 @@ private void testChunkInfer_e5(ChunkingSettings chunkingSettings) throws Interru List.of("foo", "bar"), Map.of(), InputType.SEARCH, - new ChunkingOptions(null, null), InferenceAction.Request.DEFAULT_TIMEOUT, latchedListener ); @@ -973,7 +971,6 @@ private void testChunkInfer_Sparse(ChunkingSettings chunkingSettings) throws Int List.of("foo", "bar"), Map.of(), InputType.SEARCH, - new ChunkingOptions(null, null), InferenceAction.Request.DEFAULT_TIMEOUT, latchedListener ); @@ -1044,7 +1041,6 @@ private void testChunkInfer_Elser(ChunkingSettings chunkingSettings) throws Inte List.of("foo", "bar"), Map.of(), InputType.SEARCH, - new ChunkingOptions(null, null), InferenceAction.Request.DEFAULT_TIMEOUT, latchedListener ); @@ -1090,7 +1086,6 @@ public void testChunkInferSetsTokenization() { List.of("foo", "bar"), Map.of(), InputType.SEARCH, - null, InferenceAction.Request.DEFAULT_TIMEOUT, ActionListener.wrap(r -> fail("unexpected result"), e -> fail(e.getMessage())) ); @@ -1102,7 +1097,6 @@ public void testChunkInferSetsTokenization() { List.of("foo", "bar"), Map.of(), InputType.SEARCH, - new ChunkingOptions(256, null), InferenceAction.Request.DEFAULT_TIMEOUT, ActionListener.wrap(r -> fail("unexpected result"), e -> fail(e.getMessage())) ); @@ -1155,7 +1149,6 @@ public void testChunkInfer_FailsBatch() throws InterruptedException { List.of("foo", "bar", "baz"), Map.of(), InputType.SEARCH, - new ChunkingOptions(null, null), InferenceAction.Request.DEFAULT_TIMEOUT, latchedListener ); @@ -1228,7 +1221,6 @@ public void testChunkingLargeDocument() throws InterruptedException { List.of(input), Map.of(), InputType.SEARCH, - new ChunkingOptions(null, null), InferenceAction.Request.DEFAULT_TIMEOUT, latchedListener ); diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/googleaistudio/GoogleAiStudioServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/googleaistudio/GoogleAiStudioServiceTests.java index bc8020d8d88fe..375c583cce13a 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/googleaistudio/GoogleAiStudioServiceTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/googleaistudio/GoogleAiStudioServiceTests.java @@ -19,7 +19,6 @@ import org.elasticsearch.core.Strings; import org.elasticsearch.core.TimeValue; import org.elasticsearch.inference.ChunkedInferenceServiceResults; -import org.elasticsearch.inference.ChunkingOptions; import org.elasticsearch.inference.ChunkingSettings; import org.elasticsearch.inference.EmptyTaskSettings; import org.elasticsearch.inference.InferenceServiceConfiguration; @@ -870,16 +869,7 @@ private void testChunkedInfer(String modelId, String apiKey, GoogleAiStudioEmbed webServer.enqueue(new MockResponse().setResponseCode(200).setBody(responseJson)); PlainActionFuture> listener = new PlainActionFuture<>(); - service.chunkedInfer( - model, - null, - input, - new HashMap<>(), - InputType.INGEST, - new ChunkingOptions(null, null), - InferenceAction.Request.DEFAULT_TIMEOUT, - listener - ); + service.chunkedInfer(model, null, input, new HashMap<>(), InputType.INGEST, InferenceAction.Request.DEFAULT_TIMEOUT, listener); var results = listener.actionGet(TIMEOUT); assertThat(results, hasSize(2)); diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/huggingface/HuggingFaceElserServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/huggingface/HuggingFaceElserServiceTests.java index df82f1ed393bf..8f0e481213cdf 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/huggingface/HuggingFaceElserServiceTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/huggingface/HuggingFaceElserServiceTests.java @@ -15,7 +15,6 @@ import org.elasticsearch.common.xcontent.XContentHelper; import org.elasticsearch.core.TimeValue; import org.elasticsearch.inference.ChunkedInferenceServiceResults; -import org.elasticsearch.inference.ChunkingOptions; import org.elasticsearch.inference.InferenceServiceConfiguration; import org.elasticsearch.inference.InputType; import org.elasticsearch.test.ESTestCase; @@ -98,7 +97,6 @@ public void testChunkedInfer_CallsInfer_Elser_ConvertsFloatResponse() throws IOE List.of("abc"), new HashMap<>(), InputType.INGEST, - new ChunkingOptions(null, null), InferenceAction.Request.DEFAULT_TIMEOUT, listener ); diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/huggingface/HuggingFaceServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/huggingface/HuggingFaceServiceTests.java index 0ff4bd805ea36..022cbecd1ea6a 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/huggingface/HuggingFaceServiceTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/huggingface/HuggingFaceServiceTests.java @@ -19,7 +19,6 @@ import org.elasticsearch.common.xcontent.XContentHelper; import org.elasticsearch.core.TimeValue; import org.elasticsearch.inference.ChunkedInferenceServiceResults; -import org.elasticsearch.inference.ChunkingOptions; import org.elasticsearch.inference.ChunkingSettings; import org.elasticsearch.inference.InferenceServiceConfiguration; import org.elasticsearch.inference.InferenceServiceResults; @@ -782,7 +781,6 @@ public void testChunkedInfer_CallsInfer_TextEmbedding_ConvertsFloatResponse() th List.of("abc"), new HashMap<>(), InputType.INGEST, - new ChunkingOptions(null, null), InferenceAction.Request.DEFAULT_TIMEOUT, listener ); @@ -838,7 +836,6 @@ public void testChunkedInfer() throws IOException { List.of("abc"), new HashMap<>(), InputType.INGEST, - new ChunkingOptions(null, null), InferenceAction.Request.DEFAULT_TIMEOUT, listener ); diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxServiceTests.java index 1261e3834437b..5aa826f1d80fe 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxServiceTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxServiceTests.java @@ -19,7 +19,6 @@ import org.elasticsearch.common.xcontent.XContentHelper; import org.elasticsearch.core.TimeValue; import org.elasticsearch.inference.ChunkedInferenceServiceResults; -import org.elasticsearch.inference.ChunkingOptions; import org.elasticsearch.inference.ChunkingSettings; import org.elasticsearch.inference.EmptyTaskSettings; import org.elasticsearch.inference.InferenceServiceConfiguration; @@ -686,16 +685,7 @@ private void testChunkedInfer_Batches(ChunkingSettings chunkingSettings) throws getUrl(webServer) ); PlainActionFuture> listener = new PlainActionFuture<>(); - service.chunkedInfer( - model, - null, - input, - new HashMap<>(), - InputType.INGEST, - new ChunkingOptions(null, null), - InferenceAction.Request.DEFAULT_TIMEOUT, - listener - ); + service.chunkedInfer(model, null, input, new HashMap<>(), InputType.INGEST, InferenceAction.Request.DEFAULT_TIMEOUT, listener); var results = listener.actionGet(TIMEOUT); assertThat(results, hasSize(2)); diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mistral/MistralServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mistral/MistralServiceTests.java index 71e9eac9a6635..73bf03fd43ec5 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mistral/MistralServiceTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mistral/MistralServiceTests.java @@ -19,7 +19,6 @@ import org.elasticsearch.core.Nullable; import org.elasticsearch.core.TimeValue; import org.elasticsearch.inference.ChunkedInferenceServiceResults; -import org.elasticsearch.inference.ChunkingOptions; import org.elasticsearch.inference.ChunkingSettings; import org.elasticsearch.inference.InferenceServiceConfiguration; import org.elasticsearch.inference.InferenceServiceResults; @@ -673,7 +672,6 @@ public void testChunkedInfer(MistralEmbeddingsModel model) throws IOException { List.of("abc", "def"), new HashMap<>(), InputType.INGEST, - new ChunkingOptions(null, null), InferenceAction.Request.DEFAULT_TIMEOUT, listener ); diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/openai/OpenAiServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/openai/OpenAiServiceTests.java index 509a1f8a3d010..76b5d6fee2c59 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/openai/OpenAiServiceTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/openai/OpenAiServiceTests.java @@ -20,7 +20,6 @@ import org.elasticsearch.common.xcontent.XContentHelper; import org.elasticsearch.core.TimeValue; import org.elasticsearch.inference.ChunkedInferenceServiceResults; -import org.elasticsearch.inference.ChunkingOptions; import org.elasticsearch.inference.ChunkingSettings; import org.elasticsearch.inference.InferenceServiceConfiguration; import org.elasticsearch.inference.InferenceServiceResults; @@ -1558,7 +1557,6 @@ private void testChunkedInfer(OpenAiEmbeddingsModel model) throws IOException { List.of("foo", "bar"), new HashMap<>(), InputType.INGEST, - new ChunkingOptions(null, null), InferenceAction.Request.DEFAULT_TIMEOUT, listener ); From 9799d0082b5ca39f598dd71beda2c7823f88444b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lorenzo=20Dematt=C3=A9?= Date: Wed, 27 Nov 2024 11:31:02 +0100 Subject: [PATCH 034/139] [Entitlements] Add support for instrumenting constructors (#117332) --- .../impl/InstrumentationServiceImpl.java | 9 +- .../impl/InstrumenterImpl.java | 11 +- .../impl/InstrumentationServiceImplTests.java | 56 ++++++++++ .../impl/InstrumenterTests.java | 103 ++++++++++++++++-- .../bridge/EntitlementChecker.java | 14 +++ .../EntitlementInitialization.java | 4 - .../api/ElasticsearchEntitlementChecker.java | 34 ++++++ .../runtime/policy/FlagEntitlementType.java | 3 +- .../runtime/policy/PolicyManager.java | 2 +- .../test/entitlements/EntitlementsIT.java | 7 ++ .../entitlements/EntitlementsCheckPlugin.java | 3 +- ...estEntitlementsCheckClassLoaderAction.java | 54 +++++++++ .../bootstrap/Elasticsearch.java | 4 +- 13 files changed, 281 insertions(+), 23 deletions(-) create mode 100644 qa/entitlements/src/main/java/org/elasticsearch/test/entitlements/RestEntitlementsCheckClassLoaderAction.java diff --git a/libs/entitlement/asm-provider/src/main/java/org/elasticsearch/entitlement/instrumentation/impl/InstrumentationServiceImpl.java b/libs/entitlement/asm-provider/src/main/java/org/elasticsearch/entitlement/instrumentation/impl/InstrumentationServiceImpl.java index a3bbb611f3e68..16bd04e60c5e3 100644 --- a/libs/entitlement/asm-provider/src/main/java/org/elasticsearch/entitlement/instrumentation/impl/InstrumentationServiceImpl.java +++ b/libs/entitlement/asm-provider/src/main/java/org/elasticsearch/entitlement/instrumentation/impl/InstrumentationServiceImpl.java @@ -91,15 +91,18 @@ static MethodKey parseCheckerMethodSignature(String checkerMethodName, Type[] ch String.format( Locale.ROOT, "Checker method %s has incorrect name format. " - + "It should be either check$$methodName (instance) or check$package_ClassName$methodName (static)", + + "It should be either check$$methodName (instance), check$package_ClassName$methodName (static) or " + + "check$package_ClassName$ (ctor)", checkerMethodName ) ); } - // No "className" (check$$methodName) -> method is static, and we'll get the class from the actual typed argument + // No "className" (check$$methodName) -> method is instance, and we'll get the class from the actual typed argument final boolean targetMethodIsStatic = classNameStartIndex + 1 != classNameEndIndex; - final String targetMethodName = checkerMethodName.substring(classNameEndIndex + 1); + // No "methodName" (check$package_ClassName$) -> method is ctor + final boolean targetMethodIsCtor = classNameEndIndex + 1 == checkerMethodName.length(); + final String targetMethodName = targetMethodIsCtor ? "" : checkerMethodName.substring(classNameEndIndex + 1); final String targetClassName; final List targetParameterTypes; diff --git a/libs/entitlement/asm-provider/src/main/java/org/elasticsearch/entitlement/instrumentation/impl/InstrumenterImpl.java b/libs/entitlement/asm-provider/src/main/java/org/elasticsearch/entitlement/instrumentation/impl/InstrumenterImpl.java index dc20b16400f3d..4d762dc997383 100644 --- a/libs/entitlement/asm-provider/src/main/java/org/elasticsearch/entitlement/instrumentation/impl/InstrumenterImpl.java +++ b/libs/entitlement/asm-provider/src/main/java/org/elasticsearch/entitlement/instrumentation/impl/InstrumenterImpl.java @@ -154,11 +154,12 @@ public MethodVisitor visitMethod(int access, String name, String descriptor, Str var mv = super.visitMethod(access, name, descriptor, signature, exceptions); if (isAnnotationPresent == false) { boolean isStatic = (access & ACC_STATIC) != 0; + boolean isCtor = "".equals(name); var key = new MethodKey(className, name, Stream.of(Type.getArgumentTypes(descriptor)).map(Type::getInternalName).toList()); var instrumentationMethod = instrumentationMethods.get(key); if (instrumentationMethod != null) { // LOGGER.debug("Will instrument method {}", key); - return new EntitlementMethodVisitor(Opcodes.ASM9, mv, isStatic, descriptor, instrumentationMethod); + return new EntitlementMethodVisitor(Opcodes.ASM9, mv, isStatic, isCtor, descriptor, instrumentationMethod); } else { // LOGGER.trace("Will not instrument method {}", key); } @@ -187,6 +188,7 @@ private void addClassAnnotationIfNeeded() { class EntitlementMethodVisitor extends MethodVisitor { private final boolean instrumentedMethodIsStatic; + private final boolean instrumentedMethodIsCtor; private final String instrumentedMethodDescriptor; private final CheckerMethod instrumentationMethod; private boolean hasCallerSensitiveAnnotation = false; @@ -195,11 +197,13 @@ class EntitlementMethodVisitor extends MethodVisitor { int api, MethodVisitor methodVisitor, boolean instrumentedMethodIsStatic, + boolean instrumentedMethodIsCtor, String instrumentedMethodDescriptor, CheckerMethod instrumentationMethod ) { super(api, methodVisitor); this.instrumentedMethodIsStatic = instrumentedMethodIsStatic; + this.instrumentedMethodIsCtor = instrumentedMethodIsCtor; this.instrumentedMethodDescriptor = instrumentedMethodDescriptor; this.instrumentationMethod = instrumentationMethod; } @@ -260,14 +264,15 @@ private void pushCallerClass() { private void forwardIncomingArguments() { int localVarIndex = 0; - if (instrumentedMethodIsStatic == false) { + if (instrumentedMethodIsCtor) { + localVarIndex++; + } else if (instrumentedMethodIsStatic == false) { mv.visitVarInsn(Opcodes.ALOAD, localVarIndex++); } for (Type type : Type.getArgumentTypes(instrumentedMethodDescriptor)) { mv.visitVarInsn(type.getOpcode(Opcodes.ILOAD), localVarIndex); localVarIndex += type.getSize(); } - } private void invokeInstrumentationMethod() { diff --git a/libs/entitlement/asm-provider/src/test/java/org/elasticsearch/entitlement/instrumentation/impl/InstrumentationServiceImplTests.java b/libs/entitlement/asm-provider/src/test/java/org/elasticsearch/entitlement/instrumentation/impl/InstrumentationServiceImplTests.java index c0ff5d59d3c72..5eee0bf27d1df 100644 --- a/libs/entitlement/asm-provider/src/test/java/org/elasticsearch/entitlement/instrumentation/impl/InstrumentationServiceImplTests.java +++ b/libs/entitlement/asm-provider/src/test/java/org/elasticsearch/entitlement/instrumentation/impl/InstrumentationServiceImplTests.java @@ -45,6 +45,12 @@ interface TestCheckerOverloads { void check$org_example_TestTargetClass$staticMethodWithOverload(Class clazz, int x, String y); } + interface TestCheckerCtors { + void check$org_example_TestTargetClass$(Class clazz); + + void check$org_example_TestTargetClass$(Class clazz, int x, String y); + } + public void testInstrumentationTargetLookup() throws IOException, ClassNotFoundException { Map methodsMap = instrumentationService.lookupMethodsToInstrument(TestChecker.class.getName()); @@ -142,6 +148,38 @@ public void testInstrumentationTargetLookupWithOverloads() throws IOException, C ); } + public void testInstrumentationTargetLookupWithCtors() throws IOException, ClassNotFoundException { + Map methodsMap = instrumentationService.lookupMethodsToInstrument(TestCheckerCtors.class.getName()); + + assertThat(methodsMap, aMapWithSize(2)); + assertThat( + methodsMap, + hasEntry( + equalTo(new MethodKey("org/example/TestTargetClass", "", List.of("I", "java/lang/String"))), + equalTo( + new CheckerMethod( + "org/elasticsearch/entitlement/instrumentation/impl/InstrumentationServiceImplTests$TestCheckerCtors", + "check$org_example_TestTargetClass$", + List.of("Ljava/lang/Class;", "I", "Ljava/lang/String;") + ) + ) + ) + ); + assertThat( + methodsMap, + hasEntry( + equalTo(new MethodKey("org/example/TestTargetClass", "", List.of())), + equalTo( + new CheckerMethod( + "org/elasticsearch/entitlement/instrumentation/impl/InstrumentationServiceImplTests$TestCheckerCtors", + "check$org_example_TestTargetClass$", + List.of("Ljava/lang/Class;") + ) + ) + ) + ); + } + public void testParseCheckerMethodSignatureStaticMethod() { var methodKey = InstrumentationServiceImpl.parseCheckerMethodSignature( "check$org_example_TestClass$staticMethod", @@ -169,6 +207,24 @@ public void testParseCheckerMethodSignatureStaticMethodInnerClass() { assertThat(methodKey, equalTo(new MethodKey("org/example/TestClass$InnerClass", "staticMethod", List.of()))); } + public void testParseCheckerMethodSignatureCtor() { + var methodKey = InstrumentationServiceImpl.parseCheckerMethodSignature( + "check$org_example_TestClass$", + new Type[] { Type.getType(Class.class) } + ); + + assertThat(methodKey, equalTo(new MethodKey("org/example/TestClass", "", List.of()))); + } + + public void testParseCheckerMethodSignatureCtorWithArgs() { + var methodKey = InstrumentationServiceImpl.parseCheckerMethodSignature( + "check$org_example_TestClass$", + new Type[] { Type.getType(Class.class), Type.getType("I"), Type.getType(String.class) } + ); + + assertThat(methodKey, equalTo(new MethodKey("org/example/TestClass", "", List.of("I", "java/lang/String")))); + } + public void testParseCheckerMethodSignatureIncorrectName() { var exception = assertThrows( IllegalArgumentException.class, diff --git a/libs/entitlement/asm-provider/src/test/java/org/elasticsearch/entitlement/instrumentation/impl/InstrumenterTests.java b/libs/entitlement/asm-provider/src/test/java/org/elasticsearch/entitlement/instrumentation/impl/InstrumenterTests.java index e3f5539999be5..40f0162d2eaa2 100644 --- a/libs/entitlement/asm-provider/src/test/java/org/elasticsearch/entitlement/instrumentation/impl/InstrumenterTests.java +++ b/libs/entitlement/asm-provider/src/test/java/org/elasticsearch/entitlement/instrumentation/impl/InstrumenterTests.java @@ -23,12 +23,15 @@ import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Method; +import java.net.URL; +import java.net.URLStreamHandlerFactory; import java.util.Arrays; +import java.util.List; import java.util.Map; import static org.elasticsearch.entitlement.instrumentation.impl.ASMUtils.bytecode2text; import static org.elasticsearch.entitlement.instrumentation.impl.InstrumenterImpl.getClassFileInfo; -import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.startsWith; import static org.objectweb.asm.Opcodes.INVOKESTATIC; @@ -72,6 +75,11 @@ public interface Testable { * They must not throw {@link TestException}. */ public static class ClassToInstrument implements Testable { + + public ClassToInstrument() {} + + public ClassToInstrument(int arg) {} + public static void systemExit(int status) { assertEquals(123, status); } @@ -91,12 +99,20 @@ public static void someStaticMethod(int arg, String anotherArg) {} static final class TestException extends RuntimeException {} + /** + * Interface to test specific, "synthetic" cases (e.g. overloaded methods, overloaded constructors, etc.) that + * may be not present/may be difficult to find or not clear in the production EntitlementChecker interface + */ public interface MockEntitlementChecker extends EntitlementChecker { void checkSomeStaticMethod(Class clazz, int arg); void checkSomeStaticMethod(Class clazz, int arg, String anotherArg); void checkSomeInstanceMethod(Class clazz, Testable that, int arg, String anotherArg); + + void checkCtor(Class clazz); + + void checkCtor(Class clazz, int arg); } /** @@ -118,6 +134,9 @@ public static class TestEntitlementChecker implements MockEntitlementChecker { int checkSomeStaticMethodIntStringCallCount = 0; int checkSomeInstanceMethodCallCount = 0; + int checkCtorCallCount = 0; + int checkCtorIntCallCount = 0; + @Override public void check$java_lang_System$exit(Class callerClass, int status) { checkSystemExitCallCount++; @@ -126,6 +145,27 @@ public static class TestEntitlementChecker implements MockEntitlementChecker { throwIfActive(); } + @Override + public void check$java_net_URLClassLoader$(Class callerClass, URL[] urls) {} + + @Override + public void check$java_net_URLClassLoader$(Class callerClass, URL[] urls, ClassLoader parent) {} + + @Override + public void check$java_net_URLClassLoader$(Class callerClass, URL[] urls, ClassLoader parent, URLStreamHandlerFactory factory) {} + + @Override + public void check$java_net_URLClassLoader$(Class callerClass, String name, URL[] urls, ClassLoader parent) {} + + @Override + public void check$java_net_URLClassLoader$( + Class callerClass, + String name, + URL[] urls, + ClassLoader parent, + URLStreamHandlerFactory factory + ) {} + private void throwIfActive() { if (isActive) { throw new TestException(); @@ -161,6 +201,21 @@ public void checkSomeInstanceMethod(Class callerClass, Testable that, int arg assertEquals("def", anotherArg); throwIfActive(); } + + @Override + public void checkCtor(Class callerClass) { + checkCtorCallCount++; + assertSame(InstrumenterTests.class, callerClass); + throwIfActive(); + } + + @Override + public void checkCtor(Class callerClass, int arg) { + checkCtorIntCallCount++; + assertSame(InstrumenterTests.class, callerClass); + assertEquals(123, arg); + throwIfActive(); + } } public void testClassIsInstrumented() throws Exception { @@ -225,7 +280,7 @@ public void testClassIsNotInstrumentedTwice() throws Exception { getTestEntitlementChecker().checkSystemExitCallCount = 0; assertThrows(TestException.class, () -> callStaticMethod(newClass, "systemExit", 123)); - assertThat(getTestEntitlementChecker().checkSystemExitCallCount, is(1)); + assertEquals(1, getTestEntitlementChecker().checkSystemExitCallCount); } public void testClassAllMethodsAreInstrumentedFirstPass() throws Exception { @@ -259,10 +314,10 @@ public void testClassAllMethodsAreInstrumentedFirstPass() throws Exception { getTestEntitlementChecker().checkSystemExitCallCount = 0; assertThrows(TestException.class, () -> callStaticMethod(newClass, "systemExit", 123)); - assertThat(getTestEntitlementChecker().checkSystemExitCallCount, is(1)); + assertEquals(1, getTestEntitlementChecker().checkSystemExitCallCount); assertThrows(TestException.class, () -> callStaticMethod(newClass, "anotherSystemExit", 123)); - assertThat(getTestEntitlementChecker().checkSystemExitCallCount, is(2)); + assertEquals(2, getTestEntitlementChecker().checkSystemExitCallCount); } public void testInstrumenterWorksWithOverloads() throws Exception { @@ -294,8 +349,8 @@ public void testInstrumenterWorksWithOverloads() throws Exception { assertThrows(TestException.class, () -> callStaticMethod(newClass, "someStaticMethod", 123)); assertThrows(TestException.class, () -> callStaticMethod(newClass, "someStaticMethod", 123, "abc")); - assertThat(getTestEntitlementChecker().checkSomeStaticMethodIntCallCount, is(1)); - assertThat(getTestEntitlementChecker().checkSomeStaticMethodIntStringCallCount, is(1)); + assertEquals(1, getTestEntitlementChecker().checkSomeStaticMethodIntCallCount); + assertEquals(1, getTestEntitlementChecker().checkSomeStaticMethodIntStringCallCount); } public void testInstrumenterWorksWithInstanceMethodsAndOverloads() throws Exception { @@ -327,7 +382,41 @@ public void testInstrumenterWorksWithInstanceMethodsAndOverloads() throws Except testTargetClass.someMethod(123); assertThrows(TestException.class, () -> testTargetClass.someMethod(123, "def")); - assertThat(getTestEntitlementChecker().checkSomeInstanceMethodCallCount, is(1)); + assertEquals(1, getTestEntitlementChecker().checkSomeInstanceMethodCallCount); + } + + public void testInstrumenterWorksWithConstructors() throws Exception { + var classToInstrument = ClassToInstrument.class; + + Map methods = Map.of( + new MethodKey(classToInstrument.getName().replace('.', '/'), "", List.of()), + getCheckerMethod(MockEntitlementChecker.class, "checkCtor", Class.class), + new MethodKey(classToInstrument.getName().replace('.', '/'), "", List.of("I")), + getCheckerMethod(MockEntitlementChecker.class, "checkCtor", Class.class, int.class) + ); + + var instrumenter = createInstrumenter(methods); + + byte[] newBytecode = instrumenter.instrumentClassFile(classToInstrument).bytecodes(); + + if (logger.isTraceEnabled()) { + logger.trace("Bytecode after instrumentation:\n{}", bytecode2text(newBytecode)); + } + + Class newClass = new TestLoader(Testable.class.getClassLoader()).defineClassFromBytes( + classToInstrument.getName() + "_NEW", + newBytecode + ); + + getTestEntitlementChecker().isActive = true; + + var ex = assertThrows(InvocationTargetException.class, () -> newClass.getConstructor().newInstance()); + assertThat(ex.getCause(), instanceOf(TestException.class)); + var ex2 = assertThrows(InvocationTargetException.class, () -> newClass.getConstructor(int.class).newInstance(123)); + assertThat(ex2.getCause(), instanceOf(TestException.class)); + + assertEquals(1, getTestEntitlementChecker().checkCtorCallCount); + assertEquals(1, getTestEntitlementChecker().checkCtorIntCallCount); } /** This test doesn't replace classToInstrument in-place but instead loads a separate diff --git a/libs/entitlement/bridge/src/main/java/org/elasticsearch/entitlement/bridge/EntitlementChecker.java b/libs/entitlement/bridge/src/main/java/org/elasticsearch/entitlement/bridge/EntitlementChecker.java index 167c93c90df5c..ad0f14bcf4478 100644 --- a/libs/entitlement/bridge/src/main/java/org/elasticsearch/entitlement/bridge/EntitlementChecker.java +++ b/libs/entitlement/bridge/src/main/java/org/elasticsearch/entitlement/bridge/EntitlementChecker.java @@ -9,6 +9,20 @@ package org.elasticsearch.entitlement.bridge; +import java.net.URL; +import java.net.URLStreamHandlerFactory; + public interface EntitlementChecker { void check$java_lang_System$exit(Class callerClass, int status); + + // URLClassLoader ctor + void check$java_net_URLClassLoader$(Class callerClass, URL[] urls); + + void check$java_net_URLClassLoader$(Class callerClass, URL[] urls, ClassLoader parent); + + void check$java_net_URLClassLoader$(Class callerClass, URL[] urls, ClassLoader parent, URLStreamHandlerFactory factory); + + void check$java_net_URLClassLoader$(Class callerClass, String name, URL[] urls, ClassLoader parent); + + void check$java_net_URLClassLoader$(Class callerClass, String name, URL[] urls, ClassLoader parent, URLStreamHandlerFactory factory); } diff --git a/libs/entitlement/src/main/java/org/elasticsearch/entitlement/initialization/EntitlementInitialization.java b/libs/entitlement/src/main/java/org/elasticsearch/entitlement/initialization/EntitlementInitialization.java index ca57e7b255bca..1f87e067e04f1 100644 --- a/libs/entitlement/src/main/java/org/elasticsearch/entitlement/initialization/EntitlementInitialization.java +++ b/libs/entitlement/src/main/java/org/elasticsearch/entitlement/initialization/EntitlementInitialization.java @@ -169,10 +169,6 @@ private static ElasticsearchEntitlementChecker initChecker() throws IOException } } - private static String internalName(Class c) { - return c.getName().replace('.', '/'); - } - private static final InstrumentationService INSTRUMENTER_FACTORY = new ProviderLocator<>( "entitlement", InstrumentationService.class, diff --git a/libs/entitlement/src/main/java/org/elasticsearch/entitlement/runtime/api/ElasticsearchEntitlementChecker.java b/libs/entitlement/src/main/java/org/elasticsearch/entitlement/runtime/api/ElasticsearchEntitlementChecker.java index 790416ca5659a..28a080470c043 100644 --- a/libs/entitlement/src/main/java/org/elasticsearch/entitlement/runtime/api/ElasticsearchEntitlementChecker.java +++ b/libs/entitlement/src/main/java/org/elasticsearch/entitlement/runtime/api/ElasticsearchEntitlementChecker.java @@ -13,6 +13,9 @@ import org.elasticsearch.entitlement.runtime.policy.FlagEntitlementType; import org.elasticsearch.entitlement.runtime.policy.PolicyManager; +import java.net.URL; +import java.net.URLStreamHandlerFactory; + /** * Implementation of the {@link EntitlementChecker} interface, providing additional * API methods for managing the checks. @@ -29,4 +32,35 @@ public ElasticsearchEntitlementChecker(PolicyManager policyManager) { public void check$java_lang_System$exit(Class callerClass, int status) { policyManager.checkFlagEntitlement(callerClass, FlagEntitlementType.SYSTEM_EXIT); } + + @Override + public void check$java_net_URLClassLoader$(Class callerClass, URL[] urls) { + policyManager.checkFlagEntitlement(callerClass, FlagEntitlementType.CREATE_CLASSLOADER); + } + + @Override + public void check$java_net_URLClassLoader$(Class callerClass, URL[] urls, ClassLoader parent) { + policyManager.checkFlagEntitlement(callerClass, FlagEntitlementType.CREATE_CLASSLOADER); + } + + @Override + public void check$java_net_URLClassLoader$(Class callerClass, URL[] urls, ClassLoader parent, URLStreamHandlerFactory factory) { + policyManager.checkFlagEntitlement(callerClass, FlagEntitlementType.CREATE_CLASSLOADER); + } + + @Override + public void check$java_net_URLClassLoader$(Class callerClass, String name, URL[] urls, ClassLoader parent) { + policyManager.checkFlagEntitlement(callerClass, FlagEntitlementType.CREATE_CLASSLOADER); + } + + @Override + public void check$java_net_URLClassLoader$( + Class callerClass, + String name, + URL[] urls, + ClassLoader parent, + URLStreamHandlerFactory factory + ) { + policyManager.checkFlagEntitlement(callerClass, FlagEntitlementType.CREATE_CLASSLOADER); + } } diff --git a/libs/entitlement/src/main/java/org/elasticsearch/entitlement/runtime/policy/FlagEntitlementType.java b/libs/entitlement/src/main/java/org/elasticsearch/entitlement/runtime/policy/FlagEntitlementType.java index 60490baf41a10..d40235ee12166 100644 --- a/libs/entitlement/src/main/java/org/elasticsearch/entitlement/runtime/policy/FlagEntitlementType.java +++ b/libs/entitlement/src/main/java/org/elasticsearch/entitlement/runtime/policy/FlagEntitlementType.java @@ -10,5 +10,6 @@ package org.elasticsearch.entitlement.runtime.policy; public enum FlagEntitlementType { - SYSTEM_EXIT; + SYSTEM_EXIT, + CREATE_CLASSLOADER; } diff --git a/libs/entitlement/src/main/java/org/elasticsearch/entitlement/runtime/policy/PolicyManager.java b/libs/entitlement/src/main/java/org/elasticsearch/entitlement/runtime/policy/PolicyManager.java index c06dc09758de5..b3fb5b75a1d5a 100644 --- a/libs/entitlement/src/main/java/org/elasticsearch/entitlement/runtime/policy/PolicyManager.java +++ b/libs/entitlement/src/main/java/org/elasticsearch/entitlement/runtime/policy/PolicyManager.java @@ -66,7 +66,7 @@ public void checkFlagEntitlement(Class callerClass, FlagEntitlementType type) // TODO: this will be checked using policies if (requestingModule.isNamed() && requestingModule.getName().equals("org.elasticsearch.server") - && type == FlagEntitlementType.SYSTEM_EXIT) { + && (type == FlagEntitlementType.SYSTEM_EXIT || type == FlagEntitlementType.CREATE_CLASSLOADER)) { logger.debug("Allowed: caller [{}] in module [{}] has entitlement [{}]", callerClass, requestingModule.getName(), type); return; } diff --git a/qa/entitlements/src/javaRestTest/java/org/elasticsearch/test/entitlements/EntitlementsIT.java b/qa/entitlements/src/javaRestTest/java/org/elasticsearch/test/entitlements/EntitlementsIT.java index 8b3629527f918..f8bae10492ba8 100644 --- a/qa/entitlements/src/javaRestTest/java/org/elasticsearch/test/entitlements/EntitlementsIT.java +++ b/qa/entitlements/src/javaRestTest/java/org/elasticsearch/test/entitlements/EntitlementsIT.java @@ -39,4 +39,11 @@ public void testCheckSystemExit() { ); assertThat(exception.getMessage(), containsString("not_entitled_exception")); } + + public void testCheckCreateURLClassLoader() { + var exception = expectThrows(IOException.class, () -> { + client().performRequest(new Request("GET", "/_entitlement/_check_create_url_classloader")); + }); + assertThat(exception.getMessage(), containsString("not_entitled_exception")); + } } diff --git a/qa/entitlements/src/main/java/org/elasticsearch/test/entitlements/EntitlementsCheckPlugin.java b/qa/entitlements/src/main/java/org/elasticsearch/test/entitlements/EntitlementsCheckPlugin.java index f3821c065eceb..94ad54c8c8ba8 100644 --- a/qa/entitlements/src/main/java/org/elasticsearch/test/entitlements/EntitlementsCheckPlugin.java +++ b/qa/entitlements/src/main/java/org/elasticsearch/test/entitlements/EntitlementsCheckPlugin.java @@ -22,7 +22,6 @@ import org.elasticsearch.rest.RestController; import org.elasticsearch.rest.RestHandler; -import java.util.Collections; import java.util.List; import java.util.function.Predicate; import java.util.function.Supplier; @@ -42,6 +41,6 @@ public List getRestHandlers( final Supplier nodesInCluster, Predicate clusterSupportsFeature ) { - return Collections.singletonList(new RestEntitlementsCheckSystemExitAction()); + return List.of(new RestEntitlementsCheckSystemExitAction(), new RestEntitlementsCheckClassLoaderAction()); } } diff --git a/qa/entitlements/src/main/java/org/elasticsearch/test/entitlements/RestEntitlementsCheckClassLoaderAction.java b/qa/entitlements/src/main/java/org/elasticsearch/test/entitlements/RestEntitlementsCheckClassLoaderAction.java new file mode 100644 index 0000000000000..0b5ca28739ed0 --- /dev/null +++ b/qa/entitlements/src/main/java/org/elasticsearch/test/entitlements/RestEntitlementsCheckClassLoaderAction.java @@ -0,0 +1,54 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.test.entitlements; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.elasticsearch.client.internal.node.NodeClient; +import org.elasticsearch.rest.BaseRestHandler; +import org.elasticsearch.rest.RestRequest; + +import java.net.URL; +import java.net.URLClassLoader; +import java.util.List; + +import static org.elasticsearch.rest.RestRequest.Method.GET; + +public class RestEntitlementsCheckClassLoaderAction extends BaseRestHandler { + + private static final Logger logger = LogManager.getLogger(RestEntitlementsCheckClassLoaderAction.class); + + RestEntitlementsCheckClassLoaderAction() {} + + @Override + public List routes() { + return List.of(new Route(GET, "/_entitlement/_check_create_url_classloader")); + } + + @Override + public String getName() { + return "check_classloader_action"; + } + + @Override + protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient client) { + logger.info("RestEntitlementsCheckClassLoaderAction rest handler [{}]", request.path()); + if (request.path().equals("/_entitlement/_check_create_url_classloader")) { + return channel -> { + logger.info("Calling new URLClassLoader"); + try (var classLoader = new URLClassLoader("test", new URL[0], this.getClass().getClassLoader())) { + logger.info("Created URLClassLoader [{}]", classLoader.getName()); + } + }; + } + + throw new UnsupportedOperationException(); + } +} diff --git a/server/src/main/java/org/elasticsearch/bootstrap/Elasticsearch.java b/server/src/main/java/org/elasticsearch/bootstrap/Elasticsearch.java index 95e5b00a2805f..b7774259bf289 100644 --- a/server/src/main/java/org/elasticsearch/bootstrap/Elasticsearch.java +++ b/server/src/main/java/org/elasticsearch/bootstrap/Elasticsearch.java @@ -210,7 +210,7 @@ private static void initPhase2(Bootstrap bootstrap) throws IOException { bootstrap.setPluginsLoader(pluginsLoader); if (Boolean.parseBoolean(System.getProperty("es.entitlements.enabled"))) { - logger.info("Bootstrapping Entitlements"); + LogManager.getLogger(Elasticsearch.class).info("Bootstrapping Entitlements"); List> pluginData = new ArrayList<>(); Set moduleBundles = PluginsUtils.getModuleBundles(nodeEnv.modulesFile()); @@ -225,7 +225,7 @@ private static void initPhase2(Bootstrap bootstrap) throws IOException { EntitlementBootstrap.bootstrap(pluginData, callerClass -> null); } else { // install SM after natives, shutdown hooks, etc. - logger.info("Bootstrapping java SecurityManager"); + LogManager.getLogger(Elasticsearch.class).info("Bootstrapping java SecurityManager"); org.elasticsearch.bootstrap.Security.configure( nodeEnv, SECURITY_FILTER_BAD_DEFAULTS_SETTING.get(args.nodeSettings()), From 9e610894143483ef234d447c420f08ccae73648d Mon Sep 17 00:00:00 2001 From: George Wallace Date: Wed, 27 Nov 2024 03:39:07 -0700 Subject: [PATCH 035/139] [DOCS] : swap allocation sections (#116518) Co-authored-by: Liam Thompson <32779855+leemthompo@users.noreply.github.com> --- .../inference/service-elser.asciidoc | 61 ++++++++++--------- 1 file changed, 31 insertions(+), 30 deletions(-) diff --git a/docs/reference/inference/service-elser.asciidoc b/docs/reference/inference/service-elser.asciidoc index 262bdfbca002f..c1cc23c8c9adb 100644 --- a/docs/reference/inference/service-elser.asciidoc +++ b/docs/reference/inference/service-elser.asciidoc @@ -102,10 +102,39 @@ If `adaptive_allocations` is enabled, do not set this value, because it's automa Sets the number of threads used by each model allocation during inference. This generally increases the speed per inference request. The inference process is a compute-bound process; `threads_per_allocations` must not exceed the number of available allocated processors per node. Must be a power of 2. Max allowed value is 32. +[discrete] +[[inference-example-elser-adaptive-allocation]] +==== ELSER service example with adaptive allocations + +When adaptive allocations are enabled, the number of allocations of the model is set automatically based on the current load. + +NOTE: For more information on how to optimize your ELSER endpoints, refer to {ml-docs}/ml-nlp-elser.html#elser-recommendations[the ELSER recommendations] section in the model documentation. +To learn more about model autoscaling, refer to the {ml-docs}/ml-nlp-auto-scale.html[trained model autoscaling] page. + +The following example shows how to create an {infer} endpoint called `my-elser-model` to perform a `sparse_embedding` task type and configure adaptive allocations. + +The request below will automatically download the ELSER model if it isn't already downloaded and then deploy the model. + +[source,console] +------------------------------------------------------------ +PUT _inference/sparse_embedding/my-elser-model +{ + "service": "elser", + "service_settings": { + "adaptive_allocations": { + "enabled": true, + "min_number_of_allocations": 3, + "max_number_of_allocations": 10 + }, + "num_threads": 1 + } +} +------------------------------------------------------------ +// TEST[skip:TBD] [discrete] [[inference-example-elser]] -==== ELSER service example +==== ELSER service example without adaptive allocations The following example shows how to create an {infer} endpoint called `my-elser-model` to perform a `sparse_embedding` task type. Refer to the {ml-docs}/ml-nlp-elser.html[ELSER model documentation] for more info. @@ -151,32 +180,4 @@ You might see a 502 bad gateway error in the response when using the {kib} Conso This error usually just reflects a timeout, while the model downloads in the background. You can check the download progress in the {ml-app} UI. If using the Python client, you can set the `timeout` parameter to a higher value. -==== - -[discrete] -[[inference-example-elser-adaptive-allocation]] -==== Setting adaptive allocations for the ELSER service - -NOTE: For more information on how to optimize your ELSER endpoints, refer to {ml-docs}/ml-nlp-elser.html#elser-recommendations[the ELSER recommendations] section in the model documentation. -To learn more about model autoscaling, refer to the {ml-docs}/ml-nlp-auto-scale.html[trained model autoscaling] page. - -The following example shows how to create an {infer} endpoint called `my-elser-model` to perform a `sparse_embedding` task type and configure adaptive allocations. - -The request below will automatically download the ELSER model if it isn't already downloaded and then deploy the model. - -[source,console] ------------------------------------------------------------- -PUT _inference/sparse_embedding/my-elser-model -{ - "service": "elser", - "service_settings": { - "adaptive_allocations": { - "enabled": true, - "min_number_of_allocations": 3, - "max_number_of_allocations": 10 - }, - "num_threads": 1 - } -} ------------------------------------------------------------- -// TEST[skip:TBD] +==== \ No newline at end of file From 9946cea34dc711d6cc48fa49784e804f2421088d Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Wed, 27 Nov 2024 11:52:23 +0100 Subject: [PATCH 036/139] Turn RankFeatureShardPhase into utility class (#117616) This class has no state, no need to pass instances of it around all its members can be static to simplify node construction and the code overall. --- .../elasticsearch/node/NodeConstruction.java | 1 - .../node/NodeServiceProvider.java | 3 --- .../org/elasticsearch/search/SearchModule.java | 5 ----- .../elasticsearch/search/SearchService.java | 7 ++----- .../rank/feature/RankFeatureShardPhase.java | 8 ++++---- .../rank/RankFeatureShardPhaseTests.java | 18 ++++++------------ .../snapshots/SnapshotResiliencyTests.java | 2 -- .../java/org/elasticsearch/node/MockNode.java | 4 ---- .../search/MockSearchService.java | 3 --- 9 files changed, 12 insertions(+), 39 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/node/NodeConstruction.java b/server/src/main/java/org/elasticsearch/node/NodeConstruction.java index 2488ac894a612..795fe9e2771f0 100644 --- a/server/src/main/java/org/elasticsearch/node/NodeConstruction.java +++ b/server/src/main/java/org/elasticsearch/node/NodeConstruction.java @@ -1099,7 +1099,6 @@ private void construct( threadPool, scriptService, bigArrays, - searchModule.getRankFeatureShardPhase(), searchModule.getFetchPhase(), responseCollectorService, circuitBreakerService, diff --git a/server/src/main/java/org/elasticsearch/node/NodeServiceProvider.java b/server/src/main/java/org/elasticsearch/node/NodeServiceProvider.java index 8f2dc4e532ae0..a49958c476416 100644 --- a/server/src/main/java/org/elasticsearch/node/NodeServiceProvider.java +++ b/server/src/main/java/org/elasticsearch/node/NodeServiceProvider.java @@ -35,7 +35,6 @@ import org.elasticsearch.script.ScriptService; import org.elasticsearch.search.SearchService; import org.elasticsearch.search.fetch.FetchPhase; -import org.elasticsearch.search.rank.feature.RankFeatureShardPhase; import org.elasticsearch.tasks.TaskManager; import org.elasticsearch.telemetry.tracing.Tracer; import org.elasticsearch.threadpool.ThreadPool; @@ -119,7 +118,6 @@ SearchService newSearchService( ThreadPool threadPool, ScriptService scriptService, BigArrays bigArrays, - RankFeatureShardPhase rankFeatureShardPhase, FetchPhase fetchPhase, ResponseCollectorService responseCollectorService, CircuitBreakerService circuitBreakerService, @@ -132,7 +130,6 @@ SearchService newSearchService( threadPool, scriptService, bigArrays, - rankFeatureShardPhase, fetchPhase, responseCollectorService, circuitBreakerService, diff --git a/server/src/main/java/org/elasticsearch/search/SearchModule.java b/server/src/main/java/org/elasticsearch/search/SearchModule.java index b8f50c6f9a62f..09e25350ad4fd 100644 --- a/server/src/main/java/org/elasticsearch/search/SearchModule.java +++ b/server/src/main/java/org/elasticsearch/search/SearchModule.java @@ -231,7 +231,6 @@ import org.elasticsearch.search.rank.RankDoc; import org.elasticsearch.search.rank.RankShardResult; import org.elasticsearch.search.rank.feature.RankFeatureDoc; -import org.elasticsearch.search.rank.feature.RankFeatureShardPhase; import org.elasticsearch.search.rank.feature.RankFeatureShardResult; import org.elasticsearch.search.rescore.QueryRescorerBuilder; import org.elasticsearch.search.rescore.RescorerBuilder; @@ -1299,10 +1298,6 @@ private void registerQuery(QuerySpec spec) { ); } - public RankFeatureShardPhase getRankFeatureShardPhase() { - return new RankFeatureShardPhase(); - } - public FetchPhase getFetchPhase() { return new FetchPhase(fetchSubPhases); } diff --git a/server/src/main/java/org/elasticsearch/search/SearchService.java b/server/src/main/java/org/elasticsearch/search/SearchService.java index a11c4013a9c9b..84bdc017ce970 100644 --- a/server/src/main/java/org/elasticsearch/search/SearchService.java +++ b/server/src/main/java/org/elasticsearch/search/SearchService.java @@ -286,7 +286,6 @@ public class SearchService extends AbstractLifecycleComponent implements IndexEv private final BigArrays bigArrays; private final FetchPhase fetchPhase; - private final RankFeatureShardPhase rankFeatureShardPhase; private volatile Executor searchExecutor; private volatile boolean enableQueryPhaseParallelCollection; @@ -325,7 +324,6 @@ public SearchService( ThreadPool threadPool, ScriptService scriptService, BigArrays bigArrays, - RankFeatureShardPhase rankFeatureShardPhase, FetchPhase fetchPhase, ResponseCollectorService responseCollectorService, CircuitBreakerService circuitBreakerService, @@ -339,7 +337,6 @@ public SearchService( this.scriptService = scriptService; this.responseCollectorService = responseCollectorService; this.bigArrays = bigArrays; - this.rankFeatureShardPhase = rankFeatureShardPhase; this.fetchPhase = fetchPhase; this.multiBucketConsumerService = new MultiBucketConsumerService( clusterService, @@ -751,9 +748,9 @@ public void executeRankFeaturePhase(RankFeatureShardRequest request, SearchShard searchContext.rankFeatureResult().incRef(); return searchContext.rankFeatureResult(); } - rankFeatureShardPhase.prepareForFetch(searchContext, request); + RankFeatureShardPhase.prepareForFetch(searchContext, request); fetchPhase.execute(searchContext, docIds, null); - rankFeatureShardPhase.processFetch(searchContext); + RankFeatureShardPhase.processFetch(searchContext); var rankFeatureResult = searchContext.rankFeatureResult(); rankFeatureResult.incRef(); return rankFeatureResult; diff --git a/server/src/main/java/org/elasticsearch/search/rank/feature/RankFeatureShardPhase.java b/server/src/main/java/org/elasticsearch/search/rank/feature/RankFeatureShardPhase.java index 68463eecfb11d..e64bbe3c39d79 100644 --- a/server/src/main/java/org/elasticsearch/search/rank/feature/RankFeatureShardPhase.java +++ b/server/src/main/java/org/elasticsearch/search/rank/feature/RankFeatureShardPhase.java @@ -35,9 +35,9 @@ public final class RankFeatureShardPhase { public static final RankFeatureShardResult EMPTY_RESULT = new RankFeatureShardResult(new RankFeatureDoc[0]); - public RankFeatureShardPhase() {} + private RankFeatureShardPhase() {} - public void prepareForFetch(SearchContext searchContext, RankFeatureShardRequest request) { + public static void prepareForFetch(SearchContext searchContext, RankFeatureShardRequest request) { if (logger.isTraceEnabled()) { logger.trace("{}", new SearchContextSourcePrinter(searchContext)); } @@ -58,7 +58,7 @@ public void prepareForFetch(SearchContext searchContext, RankFeatureShardRequest } } - public void processFetch(SearchContext searchContext) { + public static void processFetch(SearchContext searchContext) { if (logger.isTraceEnabled()) { logger.trace("{}", new SearchContextSourcePrinter(searchContext)); } @@ -92,7 +92,7 @@ public void processFetch(SearchContext searchContext) { } } - private RankFeaturePhaseRankShardContext shardContext(SearchContext searchContext) { + private static RankFeaturePhaseRankShardContext shardContext(SearchContext searchContext) { return searchContext.request().source() != null && searchContext.request().source().rankBuilder() != null ? searchContext.request().source().rankBuilder().buildRankFeaturePhaseShardContext() : null; diff --git a/server/src/test/java/org/elasticsearch/search/rank/RankFeatureShardPhaseTests.java b/server/src/test/java/org/elasticsearch/search/rank/RankFeatureShardPhaseTests.java index 6250d1679fda3..41febe77d54aa 100644 --- a/server/src/test/java/org/elasticsearch/search/rank/RankFeatureShardPhaseTests.java +++ b/server/src/test/java/org/elasticsearch/search/rank/RankFeatureShardPhaseTests.java @@ -219,8 +219,7 @@ public void testPrepareForFetch() { RankFeatureShardRequest request = mock(RankFeatureShardRequest.class); when(request.getDocIds()).thenReturn(new int[] { 4, 9, numDocs - 1 }); - RankFeatureShardPhase rankFeatureShardPhase = new RankFeatureShardPhase(); - rankFeatureShardPhase.prepareForFetch(searchContext, request); + RankFeatureShardPhase.prepareForFetch(searchContext, request); assertNotNull(searchContext.fetchFieldsContext()); assertEquals(searchContext.fetchFieldsContext().fields().size(), 1); @@ -248,8 +247,7 @@ public void testPrepareForFetchNoRankFeatureContext() { RankFeatureShardRequest request = mock(RankFeatureShardRequest.class); when(request.getDocIds()).thenReturn(new int[] { 4, 9, numDocs - 1 }); - RankFeatureShardPhase rankFeatureShardPhase = new RankFeatureShardPhase(); - rankFeatureShardPhase.prepareForFetch(searchContext, request); + RankFeatureShardPhase.prepareForFetch(searchContext, request); assertNull(searchContext.fetchFieldsContext()); assertNull(searchContext.fetchResult()); @@ -274,8 +272,7 @@ public void testPrepareForFetchWhileTaskIsCancelled() { RankFeatureShardRequest request = mock(RankFeatureShardRequest.class); when(request.getDocIds()).thenReturn(new int[] { 4, 9, numDocs - 1 }); - RankFeatureShardPhase rankFeatureShardPhase = new RankFeatureShardPhase(); - expectThrows(TaskCancelledException.class, () -> rankFeatureShardPhase.prepareForFetch(searchContext, request)); + expectThrows(TaskCancelledException.class, () -> RankFeatureShardPhase.prepareForFetch(searchContext, request)); } } @@ -318,11 +315,10 @@ public void testProcessFetch() { RankFeatureShardRequest request = mock(RankFeatureShardRequest.class); when(request.getDocIds()).thenReturn(new int[] { 4, 9, numDocs - 1 }); - RankFeatureShardPhase rankFeatureShardPhase = new RankFeatureShardPhase(); // this is called as part of the search context initialization // with the ResultsType.RANK_FEATURE type searchContext.addRankFeatureResult(); - rankFeatureShardPhase.processFetch(searchContext); + RankFeatureShardPhase.processFetch(searchContext); assertNotNull(searchContext.rankFeatureResult()); assertNotNull(searchContext.rankFeatureResult().rankFeatureResult()); @@ -365,11 +361,10 @@ public void testProcessFetchEmptyHits() { RankFeatureShardRequest request = mock(RankFeatureShardRequest.class); when(request.getDocIds()).thenReturn(new int[] { 4, 9, numDocs - 1 }); - RankFeatureShardPhase rankFeatureShardPhase = new RankFeatureShardPhase(); // this is called as part of the search context initialization // with the ResultsType.RANK_FEATURE type searchContext.addRankFeatureResult(); - rankFeatureShardPhase.processFetch(searchContext); + RankFeatureShardPhase.processFetch(searchContext); assertNotNull(searchContext.rankFeatureResult()); assertNotNull(searchContext.rankFeatureResult().rankFeatureResult()); @@ -410,11 +405,10 @@ public void testProcessFetchWhileTaskIsCancelled() { RankFeatureShardRequest request = mock(RankFeatureShardRequest.class); when(request.getDocIds()).thenReturn(new int[] { 4, 9, numDocs - 1 }); - RankFeatureShardPhase rankFeatureShardPhase = new RankFeatureShardPhase(); // this is called as part of the search context initialization // with the ResultsType.RANK_FEATURE type searchContext.addRankFeatureResult(); - expectThrows(TaskCancelledException.class, () -> rankFeatureShardPhase.processFetch(searchContext)); + expectThrows(TaskCancelledException.class, () -> RankFeatureShardPhase.processFetch(searchContext)); } finally { if (searchHits != null) { searchHits.decRef(); diff --git a/server/src/test/java/org/elasticsearch/snapshots/SnapshotResiliencyTests.java b/server/src/test/java/org/elasticsearch/snapshots/SnapshotResiliencyTests.java index cf240550e809d..ceaf7979ed60e 100644 --- a/server/src/test/java/org/elasticsearch/snapshots/SnapshotResiliencyTests.java +++ b/server/src/test/java/org/elasticsearch/snapshots/SnapshotResiliencyTests.java @@ -180,7 +180,6 @@ import org.elasticsearch.search.SearchService; import org.elasticsearch.search.builder.SearchSourceBuilder; import org.elasticsearch.search.fetch.FetchPhase; -import org.elasticsearch.search.rank.feature.RankFeatureShardPhase; import org.elasticsearch.telemetry.TelemetryProvider; import org.elasticsearch.telemetry.tracing.Tracer; import org.elasticsearch.test.ClusterServiceUtils; @@ -2314,7 +2313,6 @@ public RecyclerBytesStreamOutput newNetworkBytesStream() { threadPool, scriptService, bigArrays, - new RankFeatureShardPhase(), new FetchPhase(Collections.emptyList()), responseCollectorService, new NoneCircuitBreakerService(), diff --git a/test/framework/src/main/java/org/elasticsearch/node/MockNode.java b/test/framework/src/main/java/org/elasticsearch/node/MockNode.java index 38c7b1eb04772..7fddeb8491c7f 100644 --- a/test/framework/src/main/java/org/elasticsearch/node/MockNode.java +++ b/test/framework/src/main/java/org/elasticsearch/node/MockNode.java @@ -42,7 +42,6 @@ import org.elasticsearch.search.MockSearchService; import org.elasticsearch.search.SearchService; import org.elasticsearch.search.fetch.FetchPhase; -import org.elasticsearch.search.rank.feature.RankFeatureShardPhase; import org.elasticsearch.tasks.TaskManager; import org.elasticsearch.telemetry.tracing.Tracer; import org.elasticsearch.test.ESTestCase; @@ -100,7 +99,6 @@ SearchService newSearchService( ThreadPool threadPool, ScriptService scriptService, BigArrays bigArrays, - RankFeatureShardPhase rankFeatureShardPhase, FetchPhase fetchPhase, ResponseCollectorService responseCollectorService, CircuitBreakerService circuitBreakerService, @@ -115,7 +113,6 @@ SearchService newSearchService( threadPool, scriptService, bigArrays, - rankFeatureShardPhase, fetchPhase, responseCollectorService, circuitBreakerService, @@ -129,7 +126,6 @@ SearchService newSearchService( threadPool, scriptService, bigArrays, - rankFeatureShardPhase, fetchPhase, responseCollectorService, circuitBreakerService, diff --git a/test/framework/src/main/java/org/elasticsearch/search/MockSearchService.java b/test/framework/src/main/java/org/elasticsearch/search/MockSearchService.java index 778a6e3106f49..179e1cd80cd4b 100644 --- a/test/framework/src/main/java/org/elasticsearch/search/MockSearchService.java +++ b/test/framework/src/main/java/org/elasticsearch/search/MockSearchService.java @@ -24,7 +24,6 @@ import org.elasticsearch.search.internal.ReaderContext; import org.elasticsearch.search.internal.SearchContext; import org.elasticsearch.search.internal.ShardSearchRequest; -import org.elasticsearch.search.rank.feature.RankFeatureShardPhase; import org.elasticsearch.telemetry.tracing.Tracer; import org.elasticsearch.threadpool.ThreadPool; @@ -83,7 +82,6 @@ public MockSearchService( ThreadPool threadPool, ScriptService scriptService, BigArrays bigArrays, - RankFeatureShardPhase rankFeatureShardPhase, FetchPhase fetchPhase, ResponseCollectorService responseCollectorService, CircuitBreakerService circuitBreakerService, @@ -96,7 +94,6 @@ public MockSearchService( threadPool, scriptService, bigArrays, - rankFeatureShardPhase, fetchPhase, responseCollectorService, circuitBreakerService, From 2ed318f21fc015609fa9b09d94115e3465c17615 Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Wed, 27 Nov 2024 12:02:36 +0100 Subject: [PATCH 037/139] Remove unnecessary ResponseCollectorService dependency from SearchService (#117573) Small cleanup from a code-review earlier. SearchService isn't using this thing, it's only used by the transport action so that's where it should reside. Adjusted constructors accordingly and removed getter. --- .../action/search/TransportSearchAction.java | 6 +++++- .../java/org/elasticsearch/node/NodeConstruction.java | 5 +++-- .../java/org/elasticsearch/node/NodeServiceProvider.java | 2 -- .../java/org/elasticsearch/search/SearchService.java | 9 --------- .../action/search/TransportSearchActionTests.java | 1 + .../elasticsearch/snapshots/SnapshotResiliencyTests.java | 2 +- .../src/main/java/org/elasticsearch/node/MockNode.java | 3 --- .../java/org/elasticsearch/search/MockSearchService.java | 3 --- 8 files changed, 10 insertions(+), 21 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/action/search/TransportSearchAction.java b/server/src/main/java/org/elasticsearch/action/search/TransportSearchAction.java index 4bca7a562fc38..5d1fb46a53cef 100644 --- a/server/src/main/java/org/elasticsearch/action/search/TransportSearchAction.java +++ b/server/src/main/java/org/elasticsearch/action/search/TransportSearchAction.java @@ -69,6 +69,7 @@ import org.elasticsearch.indices.ExecutorSelector; import org.elasticsearch.indices.breaker.CircuitBreakerService; import org.elasticsearch.injection.guice.Inject; +import org.elasticsearch.node.ResponseCollectorService; import org.elasticsearch.rest.action.search.SearchResponseMetrics; import org.elasticsearch.search.SearchPhaseResult; import org.elasticsearch.search.SearchService; @@ -151,6 +152,7 @@ public class TransportSearchAction extends HandledTransportAction getLocalShardsIterator( concreteIndices, routingMap, searchRequest.preference(), - searchService.getResponseCollectorService(), + responseCollectorService, searchTransportService.getPendingSearchRequests() ); final Map originalIndices = buildPerIndexOriginalIndices( diff --git a/server/src/main/java/org/elasticsearch/node/NodeConstruction.java b/server/src/main/java/org/elasticsearch/node/NodeConstruction.java index 795fe9e2771f0..aec8eb0c3ca67 100644 --- a/server/src/main/java/org/elasticsearch/node/NodeConstruction.java +++ b/server/src/main/java/org/elasticsearch/node/NodeConstruction.java @@ -921,6 +921,9 @@ private void construct( final IndexingPressure indexingLimits = new IndexingPressure(settings); final IncrementalBulkService incrementalBulkService = new IncrementalBulkService(client, indexingLimits); + final ResponseCollectorService responseCollectorService = new ResponseCollectorService(clusterService); + modules.bindToInstance(ResponseCollectorService.class, responseCollectorService); + ActionModule actionModule = new ActionModule( settings, clusterModule.getIndexNameExpressionResolver(), @@ -1003,7 +1006,6 @@ private void construct( taskManager, telemetryProvider.getTracer() ); - final ResponseCollectorService responseCollectorService = new ResponseCollectorService(clusterService); final SearchResponseMetrics searchResponseMetrics = new SearchResponseMetrics(telemetryProvider.getMeterRegistry()); final SearchTransportService searchTransportService = new SearchTransportService( transportService, @@ -1100,7 +1102,6 @@ private void construct( scriptService, bigArrays, searchModule.getFetchPhase(), - responseCollectorService, circuitBreakerService, systemIndices.getExecutorSelector(), telemetryProvider.getTracer() diff --git a/server/src/main/java/org/elasticsearch/node/NodeServiceProvider.java b/server/src/main/java/org/elasticsearch/node/NodeServiceProvider.java index a49958c476416..4b7524a7ac011 100644 --- a/server/src/main/java/org/elasticsearch/node/NodeServiceProvider.java +++ b/server/src/main/java/org/elasticsearch/node/NodeServiceProvider.java @@ -119,7 +119,6 @@ SearchService newSearchService( ScriptService scriptService, BigArrays bigArrays, FetchPhase fetchPhase, - ResponseCollectorService responseCollectorService, CircuitBreakerService circuitBreakerService, ExecutorSelector executorSelector, Tracer tracer @@ -131,7 +130,6 @@ SearchService newSearchService( scriptService, bigArrays, fetchPhase, - responseCollectorService, circuitBreakerService, executorSelector, tracer diff --git a/server/src/main/java/org/elasticsearch/search/SearchService.java b/server/src/main/java/org/elasticsearch/search/SearchService.java index 84bdc017ce970..e17709ed78318 100644 --- a/server/src/main/java/org/elasticsearch/search/SearchService.java +++ b/server/src/main/java/org/elasticsearch/search/SearchService.java @@ -73,7 +73,6 @@ import org.elasticsearch.indices.IndicesService; import org.elasticsearch.indices.breaker.CircuitBreakerService; import org.elasticsearch.indices.cluster.IndicesClusterStateService.AllocatedIndices.IndexRemovalReason; -import org.elasticsearch.node.ResponseCollectorService; import org.elasticsearch.script.FieldScript; import org.elasticsearch.script.ScriptService; import org.elasticsearch.search.aggregations.AggregationInitializationException; @@ -279,8 +278,6 @@ public class SearchService extends AbstractLifecycleComponent implements IndexEv private final ScriptService scriptService; - private final ResponseCollectorService responseCollectorService; - private final ExecutorSelector executorSelector; private final BigArrays bigArrays; @@ -325,7 +322,6 @@ public SearchService( ScriptService scriptService, BigArrays bigArrays, FetchPhase fetchPhase, - ResponseCollectorService responseCollectorService, CircuitBreakerService circuitBreakerService, ExecutorSelector executorSelector, Tracer tracer @@ -335,7 +331,6 @@ public SearchService( this.clusterService = clusterService; this.indicesService = indicesService; this.scriptService = scriptService; - this.responseCollectorService = responseCollectorService; this.bigArrays = bigArrays; this.fetchPhase = fetchPhase; this.multiBucketConsumerService = new MultiBucketConsumerService( @@ -1535,10 +1530,6 @@ public int getOpenScrollContexts() { return openScrollContexts.get(); } - public ResponseCollectorService getResponseCollectorService() { - return this.responseCollectorService; - } - public long getDefaultKeepAliveInMillis() { return defaultKeepAlive; } diff --git a/server/src/test/java/org/elasticsearch/action/search/TransportSearchActionTests.java b/server/src/test/java/org/elasticsearch/action/search/TransportSearchActionTests.java index a9de118c6b859..367508283bb93 100644 --- a/server/src/test/java/org/elasticsearch/action/search/TransportSearchActionTests.java +++ b/server/src/test/java/org/elasticsearch/action/search/TransportSearchActionTests.java @@ -1758,6 +1758,7 @@ protected void doWriteTo(StreamOutput out) throws IOException { new NoneCircuitBreakerService(), transportService, searchService, + null, new SearchTransportService(transportService, client, null), null, clusterService, diff --git a/server/src/test/java/org/elasticsearch/snapshots/SnapshotResiliencyTests.java b/server/src/test/java/org/elasticsearch/snapshots/SnapshotResiliencyTests.java index ceaf7979ed60e..b7f33151961ea 100644 --- a/server/src/test/java/org/elasticsearch/snapshots/SnapshotResiliencyTests.java +++ b/server/src/test/java/org/elasticsearch/snapshots/SnapshotResiliencyTests.java @@ -2314,7 +2314,6 @@ public RecyclerBytesStreamOutput newNetworkBytesStream() { scriptService, bigArrays, new FetchPhase(Collections.emptyList()), - responseCollectorService, new NoneCircuitBreakerService(), EmptySystemIndices.INSTANCE.getExecutorSelector(), Tracer.NOOP @@ -2481,6 +2480,7 @@ public RecyclerBytesStreamOutput newNetworkBytesStream() { new NoneCircuitBreakerService(), transportService, searchService, + responseCollectorService, searchTransportService, searchPhaseController, clusterService, diff --git a/test/framework/src/main/java/org/elasticsearch/node/MockNode.java b/test/framework/src/main/java/org/elasticsearch/node/MockNode.java index 7fddeb8491c7f..d3bfacdf7691a 100644 --- a/test/framework/src/main/java/org/elasticsearch/node/MockNode.java +++ b/test/framework/src/main/java/org/elasticsearch/node/MockNode.java @@ -100,7 +100,6 @@ SearchService newSearchService( ScriptService scriptService, BigArrays bigArrays, FetchPhase fetchPhase, - ResponseCollectorService responseCollectorService, CircuitBreakerService circuitBreakerService, ExecutorSelector executorSelector, Tracer tracer @@ -114,7 +113,6 @@ SearchService newSearchService( scriptService, bigArrays, fetchPhase, - responseCollectorService, circuitBreakerService, executorSelector, tracer @@ -127,7 +125,6 @@ SearchService newSearchService( scriptService, bigArrays, fetchPhase, - responseCollectorService, circuitBreakerService, executorSelector, tracer diff --git a/test/framework/src/main/java/org/elasticsearch/search/MockSearchService.java b/test/framework/src/main/java/org/elasticsearch/search/MockSearchService.java index 179e1cd80cd4b..79c61cacb58eb 100644 --- a/test/framework/src/main/java/org/elasticsearch/search/MockSearchService.java +++ b/test/framework/src/main/java/org/elasticsearch/search/MockSearchService.java @@ -17,7 +17,6 @@ import org.elasticsearch.indices.IndicesService; import org.elasticsearch.indices.breaker.CircuitBreakerService; import org.elasticsearch.node.MockNode; -import org.elasticsearch.node.ResponseCollectorService; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.script.ScriptService; import org.elasticsearch.search.fetch.FetchPhase; @@ -83,7 +82,6 @@ public MockSearchService( ScriptService scriptService, BigArrays bigArrays, FetchPhase fetchPhase, - ResponseCollectorService responseCollectorService, CircuitBreakerService circuitBreakerService, ExecutorSelector executorSelector, Tracer tracer @@ -95,7 +93,6 @@ public MockSearchService( scriptService, bigArrays, fetchPhase, - responseCollectorService, circuitBreakerService, executorSelector, tracer From 560e0c5d0441a165f4588f8af869053b5202999f Mon Sep 17 00:00:00 2001 From: Bogdan Pintea Date: Wed, 27 Nov 2024 14:59:42 +0100 Subject: [PATCH 038/139] ESQL: fix COUNT filter pushdown (#117503) If `COUNT` agg has a filter applied, this must also be push down to source. This currently does not happen, but this issue is masked currently by two factors: * a logical optimisation, `ExtractAggregateCommonFilter` that extracts the filter out of the STATS entirely (and pushes it to source then from a `WHERE`); * the phisical plan optimisation implementing the push down, `PushStatsToSource`, currently only applies if there's just one agg function to push down. However, this fix needs to be applied since: * it's still present in versions prior to `ExtractAggregateCommonFilter` introduction; * the defect might resurface when the restriction in `PushStatsToSource` is lifted. Fixes #115522. --- docs/changelog/117503.yaml | 6 ++ .../src/main/resources/stats.csv-spec | 31 +++++++++ .../physical/local/PushStatsToSource.java | 11 ++++ .../LocalPhysicalPlanOptimizerTests.java | 66 +++++++++++++++++++ .../esql/optimizer/TestPlannerOptimizer.java | 10 +-- 5 files changed, 120 insertions(+), 4 deletions(-) create mode 100644 docs/changelog/117503.yaml diff --git a/docs/changelog/117503.yaml b/docs/changelog/117503.yaml new file mode 100644 index 0000000000000..d48741262b581 --- /dev/null +++ b/docs/changelog/117503.yaml @@ -0,0 +1,6 @@ +pr: 117503 +summary: Fix COUNT filter pushdown +area: ES|QL +type: bug +issues: + - 115522 diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec index f95506ff1982f..d76f4c05d955f 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec @@ -2688,6 +2688,16 @@ c1:long 41 ; +simpleCountOnFieldWithFilteringOnDifferentFieldAndNoGrouping +required_capability: per_agg_filtering +from employees +| stats c1 = count(hire_date) where emp_no < 10042 +; + +c1:long +41 +; + simpleCountOnStarWithFilteringAndNoGrouping required_capability: per_agg_filtering from employees @@ -2698,6 +2708,27 @@ c1:long 41 ; +simpleCountWithFilteringAndNoGroupingOnFieldWithNulls +required_capability: per_agg_filtering +from employees +| stats c1 = count(birth_date) where emp_no <= 10050 +; + +c1:long +40 +; + + +simpleCountWithFilteringAndNoGroupingOnFieldWithMultivalues +required_capability: per_agg_filtering +from employees +| stats c1 = count(job_positions) where emp_no <= 10003 +; + +c1:long +3 +; + commonFilterExtractionWithAliasing required_capability: per_agg_filtering from employees diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/PushStatsToSource.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/PushStatsToSource.java index b0b86b43cd162..21bc360404628 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/PushStatsToSource.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/PushStatsToSource.java @@ -16,6 +16,7 @@ import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; import org.elasticsearch.xpack.esql.core.expression.NamedExpression; +import org.elasticsearch.xpack.esql.core.util.Queries; import org.elasticsearch.xpack.esql.core.util.StringUtils; import org.elasticsearch.xpack.esql.expression.function.aggregate.Count; import org.elasticsearch.xpack.esql.optimizer.LocalPhysicalOptimizerContext; @@ -25,12 +26,15 @@ import org.elasticsearch.xpack.esql.plan.physical.EsStatsQueryExec; import org.elasticsearch.xpack.esql.plan.physical.PhysicalPlan; import org.elasticsearch.xpack.esql.planner.AbstractPhysicalOperationProviders; +import org.elasticsearch.xpack.esql.planner.PlannerUtils; import java.util.ArrayList; import java.util.List; +import static java.util.Arrays.asList; import static java.util.Collections.emptyList; import static java.util.Collections.singletonList; +import static org.elasticsearch.xpack.esql.optimizer.rules.physical.local.PushFiltersToSource.canPushToSource; import static org.elasticsearch.xpack.esql.plan.physical.EsStatsQueryExec.StatsType.COUNT; /** @@ -98,6 +102,13 @@ private Tuple, List> pushableStats( } } if (fieldName != null) { + if (count.hasFilter()) { + if (canPushToSource(count.filter()) == false) { + return null; // can't push down + } + var countFilter = PlannerUtils.TRANSLATOR_HANDLER.asQuery(count.filter()); + query = Queries.combine(Queries.Clause.MUST, asList(countFilter.asBuilder(), query)); + } return new EsStatsQueryExec.Stat(fieldName, COUNT, query); } } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java index 4612ccb425ba2..86f5c812737b1 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LocalPhysicalPlanOptimizerTests.java @@ -42,7 +42,9 @@ import org.elasticsearch.xpack.esql.expression.function.EsqlFunctionRegistry; import org.elasticsearch.xpack.esql.index.EsIndex; import org.elasticsearch.xpack.esql.index.IndexResolution; +import org.elasticsearch.xpack.esql.optimizer.rules.logical.ExtractAggregateCommonFilter; import org.elasticsearch.xpack.esql.plan.logical.Enrich; +import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; import org.elasticsearch.xpack.esql.plan.physical.AggregateExec; import org.elasticsearch.xpack.esql.plan.physical.EsQueryExec; import org.elasticsearch.xpack.esql.plan.physical.EsStatsQueryExec; @@ -59,6 +61,7 @@ import org.elasticsearch.xpack.esql.planner.FilterTests; import org.elasticsearch.xpack.esql.plugin.QueryPragmas; import org.elasticsearch.xpack.esql.querydsl.query.SingleValueQuery; +import org.elasticsearch.xpack.esql.rule.Rule; import org.elasticsearch.xpack.esql.session.Configuration; import org.elasticsearch.xpack.esql.stats.Metrics; import org.elasticsearch.xpack.esql.stats.SearchContextStats; @@ -67,9 +70,11 @@ import org.junit.Before; import java.io.IOException; +import java.util.ArrayList; import java.util.List; import java.util.Locale; import java.util.Map; +import java.util.function.Function; import static java.util.Arrays.asList; import static org.elasticsearch.compute.aggregation.AggregatorMode.FINAL; @@ -380,6 +385,67 @@ public void testMultiCountAllWithFilter() { assertThat(plan.anyMatch(EsQueryExec.class::isInstance), is(true)); } + @SuppressWarnings("unchecked") + public void testSingleCountWithStatsFilter() { + // an optimizer that filters out the ExtractAggregateCommonFilter rule + var logicalOptimizer = new LogicalPlanOptimizer(new LogicalOptimizerContext(config)) { + @Override + protected List> batches() { + var oldBatches = super.batches(); + List> newBatches = new ArrayList<>(oldBatches.size()); + for (var batch : oldBatches) { + List> rules = new ArrayList<>(List.of(batch.rules())); + rules.removeIf(r -> r instanceof ExtractAggregateCommonFilter); + newBatches.add(batch.with(rules.toArray(Rule[]::new))); + } + return newBatches; + } + }; + var analyzer = makeAnalyzer("mapping-default.json"); + var plannerOptimizer = new TestPlannerOptimizer(config, analyzer, logicalOptimizer); + var plan = plannerOptimizer.plan(""" + from test + | stats c = count(hire_date) where emp_no < 10042 + """, IS_SV_STATS); + + var limit = as(plan, LimitExec.class); + var agg = as(limit.child(), AggregateExec.class); + assertThat(agg.getMode(), is(FINAL)); + var exchange = as(agg.child(), ExchangeExec.class); + var esStatsQuery = as(exchange.child(), EsStatsQueryExec.class); + + Function compact = s -> s.replaceAll("\\s+", ""); + assertThat(compact.apply(esStatsQuery.query().toString()), is(compact.apply(""" + { + "bool": { + "must": [ + { + "exists": { + "field": "hire_date", + "boost": 1.0 + } + }, + { + "esql_single_value": { + "field": "emp_no", + "next": { + "range": { + "emp_no": { + "lt": 10042, + "boost": 1.0 + } + } + }, + "source": "emp_no < 10042@2:36" + } + } + ], + "boost": 1.0 + } + } + """))); + } + /** * Expecting * LimitExec[1000[INTEGER]] diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/TestPlannerOptimizer.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/TestPlannerOptimizer.java index 595f0aaa91f0d..9fe479dbb8625 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/TestPlannerOptimizer.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/TestPlannerOptimizer.java @@ -9,7 +9,6 @@ import org.elasticsearch.xpack.esql.EsqlTestUtils; import org.elasticsearch.xpack.esql.analysis.Analyzer; -import org.elasticsearch.xpack.esql.expression.function.EsqlFunctionRegistry; import org.elasticsearch.xpack.esql.parser.EsqlParser; import org.elasticsearch.xpack.esql.plan.physical.EstimatesRowSize; import org.elasticsearch.xpack.esql.plan.physical.PhysicalPlan; @@ -23,19 +22,22 @@ public class TestPlannerOptimizer { private final Analyzer analyzer; private final LogicalPlanOptimizer logicalOptimizer; private final PhysicalPlanOptimizer physicalPlanOptimizer; - private final EsqlFunctionRegistry functionRegistry; private final Mapper mapper; private final Configuration config; public TestPlannerOptimizer(Configuration config, Analyzer analyzer) { + this(config, analyzer, new LogicalPlanOptimizer(new LogicalOptimizerContext(config))); + } + + public TestPlannerOptimizer(Configuration config, Analyzer analyzer, LogicalPlanOptimizer logicalOptimizer) { this.analyzer = analyzer; this.config = config; + this.logicalOptimizer = logicalOptimizer; parser = new EsqlParser(); - logicalOptimizer = new LogicalPlanOptimizer(new LogicalOptimizerContext(config)); physicalPlanOptimizer = new PhysicalPlanOptimizer(new PhysicalOptimizerContext(config)); - functionRegistry = new EsqlFunctionRegistry(); mapper = new Mapper(); + } public PhysicalPlan plan(String query) { From 66108ebeb9c3d526a8d61df73af2191a5282dc8d Mon Sep 17 00:00:00 2001 From: Dimitris Rempapis Date: Wed, 27 Nov 2024 16:42:41 +0200 Subject: [PATCH 039/139] Search Queries in parallel - part 2 (#117141) Assert optimization applied to search IT tests --- .../search/fields/SearchFieldsIT.java | 65 +-- .../functionscore/DecayFunctionScoreIT.java | 412 ++++++------------ .../search/functionscore/FunctionScoreIT.java | 89 ++-- .../search/nested/SimpleNestedIT.java | 135 ++---- .../search/query/QueryStringIT.java | 75 ++-- .../search/query/SearchQueryIT.java | 119 ++--- .../search/query/SimpleQueryStringIT.java | 86 ++-- .../routing/SearchReplicaSelectionIT.java | 17 +- 8 files changed, 340 insertions(+), 658 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/search/fields/SearchFieldsIT.java b/server/src/internalClusterTest/java/org/elasticsearch/search/fields/SearchFieldsIT.java index 16e5e42e00c9f..0310af3685e3e 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/search/fields/SearchFieldsIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/search/fields/SearchFieldsIT.java @@ -65,6 +65,7 @@ import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailuresAndResponse; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertResponse; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertResponses; import static org.elasticsearch.xcontent.XContentFactory.jsonBuilder; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.hasSize; @@ -203,18 +204,16 @@ public void testStoredFields() throws Exception { assertThat(response.getHits().getAt(0).getFields().size(), equalTo(0)); assertThat(response.getHits().getAt(0).getFields().get("field2"), nullValue()); }); - assertResponse(prepareSearch().setQuery(matchAllQuery()).addStoredField("field3"), response -> { + assertResponses(response -> { assertThat(response.getHits().getTotalHits().value(), equalTo(1L)); assertThat(response.getHits().getHits().length, equalTo(1)); assertThat(response.getHits().getAt(0).getFields().size(), equalTo(1)); assertThat(response.getHits().getAt(0).getFields().get("field3").getValue().toString(), equalTo("value3")); - }); - assertResponse(prepareSearch().setQuery(matchAllQuery()).addStoredField("*3"), response -> { - assertThat(response.getHits().getTotalHits().value(), equalTo(1L)); - assertThat(response.getHits().getHits().length, equalTo(1)); - assertThat(response.getHits().getAt(0).getFields().size(), equalTo(1)); - assertThat(response.getHits().getAt(0).getFields().get("field3").getValue().toString(), equalTo("value3")); - }); + }, + prepareSearch().setQuery(matchAllQuery()).addStoredField("field3"), + prepareSearch().setQuery(matchAllQuery()).addStoredField("*3"), + prepareSearch().setQuery(matchAllQuery()).addStoredField("f*3") + ); assertResponse( prepareSearch().setQuery(matchAllQuery()).addStoredField("*3").addStoredField("field1").addStoredField("field2"), response -> { @@ -232,12 +231,6 @@ public void testStoredFields() throws Exception { assertThat(response.getHits().getAt(0).getFields().get("field3").getValue().toString(), equalTo("value3")); assertThat(response.getHits().getAt(0).getFields().get("field1").getValue().toString(), equalTo("value1")); }); - assertResponse(prepareSearch().setQuery(matchAllQuery()).addStoredField("f*3"), response -> { - assertThat(response.getHits().getTotalHits().value(), equalTo(1L)); - assertThat(response.getHits().getHits().length, equalTo(1)); - assertThat(response.getHits().getAt(0).getFields().size(), equalTo(1)); - assertThat(response.getHits().getAt(0).getFields().get("field3").getValue().toString(), equalTo("value3")); - }); assertResponse(prepareSearch().setQuery(matchAllQuery()).addStoredField("*"), response -> { assertThat(response.getHits().getTotalHits().value(), equalTo(1L)); assertThat(response.getHits().getHits().length, equalTo(1)); @@ -865,47 +858,7 @@ public void testDocValueFields() throws Exception { if (randomBoolean()) { builder.addDocValueField("*_field"); } - assertResponse(builder, response -> { - assertThat(response.getHits().getTotalHits().value(), equalTo(1L)); - assertThat(response.getHits().getHits().length, equalTo(1)); - Set fields = new HashSet<>(response.getHits().getAt(0).getFields().keySet()); - assertThat( - fields, - equalTo( - newHashSet( - "byte_field", - "short_field", - "integer_field", - "long_field", - "float_field", - "double_field", - "date_field", - "boolean_field", - "text_field", - "keyword_field", - "binary_field", - "ip_field" - ) - ) - ); - - assertThat(response.getHits().getAt(0).getFields().get("byte_field").getValues(), equalTo(List.of(1L))); - assertThat(response.getHits().getAt(0).getFields().get("short_field").getValues(), equalTo(List.of(2L))); - assertThat(response.getHits().getAt(0).getFields().get("integer_field").getValues(), equalTo(List.of(3L))); - assertThat(response.getHits().getAt(0).getFields().get("long_field").getValues(), equalTo(List.of(4L))); - assertThat(response.getHits().getAt(0).getFields().get("float_field").getValues(), equalTo(List.of(5.0))); - assertThat(response.getHits().getAt(0).getFields().get("double_field").getValues(), equalTo(List.of(6.0d))); - assertThat( - response.getHits().getAt(0).getFields().get("date_field").getValue(), - equalTo(DateFormatter.forPattern("date_optional_time").format(date)) - ); - assertThat(response.getHits().getAt(0).getFields().get("boolean_field").getValues(), equalTo(List.of(true))); - assertThat(response.getHits().getAt(0).getFields().get("text_field").getValues(), equalTo(List.of("foo"))); - assertThat(response.getHits().getAt(0).getFields().get("keyword_field").getValues(), equalTo(List.of("foo"))); - assertThat(response.getHits().getAt(0).getFields().get("binary_field").getValues(), equalTo(List.of("KmQ="))); - assertThat(response.getHits().getAt(0).getFields().get("ip_field").getValues(), equalTo(List.of("::1"))); - }); - assertResponse(prepareSearch().setQuery(matchAllQuery()).addDocValueField("*field"), response -> { + assertResponses(response -> { assertThat(response.getHits().getTotalHits().value(), equalTo(1L)); assertThat(response.getHits().getHits().length, equalTo(1)); Set fields = new HashSet<>(response.getHits().getAt(0).getFields().keySet()); @@ -944,7 +897,7 @@ public void testDocValueFields() throws Exception { assertThat(response.getHits().getAt(0).getFields().get("keyword_field").getValues(), equalTo(List.of("foo"))); assertThat(response.getHits().getAt(0).getFields().get("binary_field").getValues(), equalTo(List.of("KmQ="))); assertThat(response.getHits().getAt(0).getFields().get("ip_field").getValues(), equalTo(List.of("::1"))); - }); + }, builder, prepareSearch().setQuery(matchAllQuery()).addDocValueField("*field")); assertResponse( prepareSearch().setQuery(matchAllQuery()) .addDocValueField("byte_field", "#.0") diff --git a/server/src/internalClusterTest/java/org/elasticsearch/search/functionscore/DecayFunctionScoreIT.java b/server/src/internalClusterTest/java/org/elasticsearch/search/functionscore/DecayFunctionScoreIT.java index 76384253282de..9988624f6a677 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/search/functionscore/DecayFunctionScoreIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/search/functionscore/DecayFunctionScoreIT.java @@ -51,6 +51,7 @@ import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailuresAndResponse; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertOrderedSearchHits; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertResponse; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertResponses; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchHits; import static org.elasticsearch.xcontent.XContentFactory.jsonBuilder; import static org.hamcrest.Matchers.anyOf; @@ -135,64 +136,21 @@ public void testDistanceScoreGeoLinGaussExp() throws Exception { lonlat.add(20f); lonlat.add(11f); - assertHitCount( - client().search( - new SearchRequest(new String[] {}).searchType(SearchType.QUERY_THEN_FETCH).source(searchSource().query(baseQuery)) - ), - (numDummyDocs + 2) - ); - - assertResponse( - client().search( - new SearchRequest(new String[] {}).searchType(SearchType.QUERY_THEN_FETCH) - .source(searchSource().query(functionScoreQuery(baseQuery, gaussDecayFunction("loc", lonlat, "1000km")))) - ), - response -> { - assertHitCount(response, (numDummyDocs + 2)); - assertThat(response.getHits().getAt(0).getId(), equalTo("1")); - assertThat(response.getHits().getAt(1).getId(), equalTo("2")); - } - ); - // Test Exp - - assertHitCount( - client().search( - new SearchRequest(new String[] {}).searchType(SearchType.QUERY_THEN_FETCH).source(searchSource().query(baseQuery)) - ), - (numDummyDocs + 2) - ); - - assertResponse( - client().search( - new SearchRequest(new String[] {}).searchType(SearchType.QUERY_THEN_FETCH) - .source(searchSource().query(functionScoreQuery(baseQuery, linearDecayFunction("loc", lonlat, "1000km")))) - ), - response -> { - assertHitCount(response, (numDummyDocs + 2)); - assertThat(response.getHits().getAt(0).getId(), equalTo("1")); - assertThat(response.getHits().getAt(1).getId(), equalTo("2")); - } - ); - - // Test Lin - - assertHitCount( - client().search( - new SearchRequest(new String[] {}).searchType(SearchType.QUERY_THEN_FETCH).source(searchSource().query(baseQuery)) - ), - (numDummyDocs + 2) - ); - - assertResponse( - client().search( - new SearchRequest(new String[] {}).searchType(SearchType.QUERY_THEN_FETCH) - .source(searchSource().query(functionScoreQuery(baseQuery, exponentialDecayFunction("loc", lonlat, "1000km")))) - ), - response -> { - assertHitCount(response, (numDummyDocs + 2)); - assertThat(response.getHits().getAt(0).getId(), equalTo("1")); - assertThat(response.getHits().getAt(1).getId(), equalTo("2")); - } + assertResponses(response -> { + assertHitCount(response, (numDummyDocs + 2)); + assertThat(response.getHits().getAt(0).getId(), equalTo("1")); + assertThat(response.getHits().getAt(1).getId(), equalTo("2")); + assertHitCount( + (numDummyDocs + 2), + prepareSearch().setSearchType(SearchType.QUERY_THEN_FETCH).setSource(searchSource().query(baseQuery)) + ); + }, + prepareSearch().setSearchType(SearchType.QUERY_THEN_FETCH) + .setSource(searchSource().query(functionScoreQuery(baseQuery, gaussDecayFunction("loc", lonlat, "1000km")))), + prepareSearch().setSearchType(SearchType.QUERY_THEN_FETCH) + .setSource(searchSource().query(functionScoreQuery(baseQuery, linearDecayFunction("loc", lonlat, "1000km")))), + prepareSearch().setSearchType(SearchType.QUERY_THEN_FETCH) + .setSource(searchSource().query(functionScoreQuery(baseQuery, exponentialDecayFunction("loc", lonlat, "1000km")))) ); } @@ -234,77 +192,46 @@ public void testDistanceScoreGeoLinGaussExpWithOffset() throws Exception { indexRandom(true, indexBuilders); - // Test Gauss - - assertResponse( - client().search( - new SearchRequest(new String[] {}).searchType(SearchType.QUERY_THEN_FETCH) - .source( - searchSource().size(numDummyDocs + 2) - .query( - functionScoreQuery(termQuery("test", "value"), gaussDecayFunction("num", 1.0, 5.0, 1.0)).boostMode( - CombineFunction.REPLACE - ) - ) - ) - ), - response -> { - SearchHits sh = response.getHits(); - assertThat(sh.getTotalHits().value(), equalTo((long) (numDummyDocs + 2))); - assertThat(sh.getAt(0).getId(), anyOf(equalTo("1"), equalTo("2"))); - assertThat(sh.getAt(1).getId(), anyOf(equalTo("1"), equalTo("2"))); - assertThat(sh.getAt(1).getScore(), equalTo(sh.getAt(0).getScore())); - for (int i = 0; i < numDummyDocs; i++) { - assertThat(sh.getAt(i + 2).getId(), equalTo(Integer.toString(i + 3))); - } + assertResponses(response -> { + SearchHits sh = response.getHits(); + assertThat(sh.getTotalHits().value(), equalTo((long) (numDummyDocs + 2))); + assertThat(sh.getAt(0).getId(), anyOf(equalTo("1"), equalTo("2"))); + assertThat(sh.getAt(1).getId(), anyOf(equalTo("1"), equalTo("2"))); + assertThat(sh.getAt(1).getScore(), equalTo(sh.getAt(0).getScore())); + for (int i = 0; i < numDummyDocs; i++) { + assertThat(sh.getAt(i + 2).getId(), equalTo(Integer.toString(i + 3))); } - ); - - // Test Exp - - assertResponse( - client().search( - new SearchRequest(new String[] {}).searchType(SearchType.QUERY_THEN_FETCH) - .source( - searchSource().size(numDummyDocs + 2) - .query( - functionScoreQuery(termQuery("test", "value"), exponentialDecayFunction("num", 1.0, 5.0, 1.0)).boostMode( - CombineFunction.REPLACE - ) + }, + // Test Gauss + prepareSearch().setSearchType(SearchType.QUERY_THEN_FETCH) + .setSource( + searchSource().size(numDummyDocs + 2) + .query( + functionScoreQuery(termQuery("test", "value"), gaussDecayFunction("num", 1.0, 5.0, 1.0)).boostMode( + CombineFunction.REPLACE ) - ) - ), - response -> { - SearchHits sh = response.getHits(); - assertThat(sh.getTotalHits().value(), equalTo((long) (numDummyDocs + 2))); - assertThat(sh.getAt(0).getId(), anyOf(equalTo("1"), equalTo("2"))); - assertThat(sh.getAt(1).getId(), anyOf(equalTo("1"), equalTo("2"))); - assertThat(sh.getAt(1).getScore(), equalTo(sh.getAt(0).getScore())); - for (int i = 0; i < numDummyDocs; i++) { - assertThat(sh.getAt(i + 2).getId(), equalTo(Integer.toString(i + 3))); - } - } - ); - // Test Lin - assertResponse( - client().search( - new SearchRequest(new String[] {}).searchType(SearchType.QUERY_THEN_FETCH) - .source( - searchSource().size(numDummyDocs + 2) - .query( - functionScoreQuery(termQuery("test", "value"), linearDecayFunction("num", 1.0, 20.0, 1.0)).boostMode( - CombineFunction.REPLACE - ) + ) + ), + // Test Exp + prepareSearch().setSearchType(SearchType.QUERY_THEN_FETCH) + .setSource( + searchSource().size(numDummyDocs + 2) + .query( + functionScoreQuery(termQuery("test", "value"), exponentialDecayFunction("num", 1.0, 5.0, 1.0)).boostMode( + CombineFunction.REPLACE ) - ) - ), - response -> { - SearchHits sh = response.getHits(); - assertThat(sh.getTotalHits().value(), equalTo((long) (numDummyDocs + 2))); - assertThat(sh.getAt(0).getId(), anyOf(equalTo("1"), equalTo("2"))); - assertThat(sh.getAt(1).getId(), anyOf(equalTo("1"), equalTo("2"))); - assertThat(sh.getAt(1).getScore(), equalTo(sh.getAt(0).getScore())); - } + ) + ), + // Test Lin + prepareSearch().setSearchType(SearchType.QUERY_THEN_FETCH) + .setSource( + searchSource().size(numDummyDocs + 2) + .query( + functionScoreQuery(termQuery("test", "value"), linearDecayFunction("num", 1.0, 20.0, 1.0)).boostMode( + CombineFunction.REPLACE + ) + ) + ) ); } @@ -355,54 +282,38 @@ public void testBoostModeSettingWorks() throws Exception { ); indexRandom(true, false, indexBuilders); // force no dummy docs - // Test Gauss List lonlat = new ArrayList<>(); lonlat.add(20f); lonlat.add(11f); - assertResponse( - client().search( - new SearchRequest(new String[] {}).searchType(SearchType.QUERY_THEN_FETCH) - .source( - searchSource().query( - functionScoreQuery(termQuery("test", "value"), gaussDecayFunction("loc", lonlat, "1000km")).boostMode( - CombineFunction.MULTIPLY - ) + assertResponses(response -> { + SearchHits sh = response.getHits(); + assertThat(sh.getTotalHits().value(), equalTo((long) (2))); + assertThat(sh.getAt(0).getId(), equalTo("1")); + assertThat(sh.getAt(1).getId(), equalTo("2")); + }, + // Test Gauss + prepareSearch().setSearchType(SearchType.QUERY_THEN_FETCH) + .setSource( + searchSource().query( + functionScoreQuery(termQuery("test", "value"), gaussDecayFunction("loc", lonlat, "1000km")).boostMode( + CombineFunction.MULTIPLY ) ) - ), - response -> { - SearchHits sh = response.getHits(); - assertThat(sh.getTotalHits().value(), equalTo((long) (2))); - assertThat(sh.getAt(0).getId(), equalTo("1")); - assertThat(sh.getAt(1).getId(), equalTo("2")); - } - ); - // Test Exp - assertResponse( - client().search( - new SearchRequest(new String[] {}).searchType(SearchType.QUERY_THEN_FETCH) - .source(searchSource().query(termQuery("test", "value"))) - ), - response -> { - SearchHits sh = response.getHits(); - assertThat(sh.getTotalHits().value(), equalTo((long) (2))); - assertThat(sh.getAt(0).getId(), equalTo("1")); - assertThat(sh.getAt(1).getId(), equalTo("2")); - } + ), + // Test Exp + prepareSearch().setSearchType(SearchType.QUERY_THEN_FETCH).setSource(searchSource().query(termQuery("test", "value"))) ); assertResponse( - client().search( - new SearchRequest(new String[] {}).searchType(SearchType.QUERY_THEN_FETCH) - .source( - searchSource().query( - functionScoreQuery(termQuery("test", "value"), gaussDecayFunction("loc", lonlat, "1000km")).boostMode( - CombineFunction.REPLACE - ) + prepareSearch().setSearchType(SearchType.QUERY_THEN_FETCH) + .setSource( + searchSource().query( + functionScoreQuery(termQuery("test", "value"), gaussDecayFunction("loc", lonlat, "1000km")).boostMode( + CombineFunction.REPLACE ) ) - ), + ), response -> { SearchHits sh = response.getHits(); assertThat(sh.getTotalHits().value(), equalTo((long) (2))); @@ -410,7 +321,6 @@ public void testBoostModeSettingWorks() throws Exception { assertThat(sh.getAt(1).getId(), equalTo("1")); } ); - } public void testParseGeoPoint() throws Exception { @@ -447,44 +357,30 @@ public void testParseGeoPoint() throws Exception { constantScoreQuery(termQuery("test", "value")), ScoreFunctionBuilders.weightFactorFunction(randomIntBetween(1, 10)) ); - GeoPoint point = new GeoPoint(20, 11); - assertResponse( - client().search( - new SearchRequest(new String[] {}).searchType(SearchType.QUERY_THEN_FETCH) - .source( - searchSource().query( - functionScoreQuery(baseQueryBuilder, gaussDecayFunction("loc", point, "1000km")).boostMode( - CombineFunction.REPLACE - ) + + assertResponses(response -> { + SearchHits sh = response.getHits(); + assertThat(sh.getTotalHits().value(), equalTo((long) (1))); + assertThat(sh.getAt(0).getId(), equalTo("1")); + assertThat((double) sh.getAt(0).getScore(), closeTo(1.0f, 1.e-5)); + }, + prepareSearch().setSearchType(SearchType.QUERY_THEN_FETCH) + .setSource( + searchSource().query( + functionScoreQuery(baseQueryBuilder, gaussDecayFunction("loc", new GeoPoint(20, 11), "1000km")).boostMode( + CombineFunction.REPLACE ) ) - ), - response -> { - SearchHits sh = response.getHits(); - assertThat(sh.getTotalHits().value(), equalTo((long) (1))); - assertThat(sh.getAt(0).getId(), equalTo("1")); - assertThat((double) sh.getAt(0).getScore(), closeTo(1.0, 1.e-5)); - } - ); - // this is equivalent to new GeoPoint(20, 11); just flipped so scores must be same - float[] coords = { 11, 20 }; - assertResponse( - client().search( - new SearchRequest(new String[] {}).searchType(SearchType.QUERY_THEN_FETCH) - .source( - searchSource().query( - functionScoreQuery(baseQueryBuilder, gaussDecayFunction("loc", coords, "1000km")).boostMode( - CombineFunction.REPLACE - ) + ), + // new float[] {11,20} is equivalent to new GeoPoint(20, 11); just flipped so scores must be same + prepareSearch().setSearchType(SearchType.QUERY_THEN_FETCH) + .setSource( + searchSource().query( + functionScoreQuery(baseQueryBuilder, gaussDecayFunction("loc", new float[] { 11, 20 }, "1000km")).boostMode( + CombineFunction.REPLACE ) ) - ), - response -> { - SearchHits sh = response.getHits(); - assertThat(sh.getTotalHits().value(), equalTo((long) (1))); - assertThat(sh.getAt(0).getId(), equalTo("1")); - assertThat((double) sh.getAt(0).getScore(), closeTo(1.0f, 1.e-5)); - } + ) ); } @@ -516,16 +412,14 @@ public void testCombineModes() throws Exception { ); // decay score should return 0.5 for this function and baseQuery should return 2.0f as it's score assertResponse( - client().search( - new SearchRequest(new String[] {}).searchType(SearchType.QUERY_THEN_FETCH) - .source( - searchSource().query( - functionScoreQuery(baseQueryBuilder, gaussDecayFunction("num", 0.0, 1.0, null, 0.5)).boostMode( - CombineFunction.MULTIPLY - ) + prepareSearch().setSearchType(SearchType.QUERY_THEN_FETCH) + .setSource( + searchSource().query( + functionScoreQuery(baseQueryBuilder, gaussDecayFunction("num", 0.0, 1.0, null, 0.5)).boostMode( + CombineFunction.MULTIPLY ) ) - ), + ), response -> { SearchHits sh = response.getHits(); assertThat(sh.getTotalHits().value(), equalTo((long) (1))); @@ -534,16 +428,14 @@ public void testCombineModes() throws Exception { } ); assertResponse( - client().search( - new SearchRequest(new String[] {}).searchType(SearchType.QUERY_THEN_FETCH) - .source( - searchSource().query( - functionScoreQuery(baseQueryBuilder, gaussDecayFunction("num", 0.0, 1.0, null, 0.5)).boostMode( - CombineFunction.REPLACE - ) + prepareSearch().setSearchType(SearchType.QUERY_THEN_FETCH) + .setSource( + searchSource().query( + functionScoreQuery(baseQueryBuilder, gaussDecayFunction("num", 0.0, 1.0, null, 0.5)).boostMode( + CombineFunction.REPLACE ) ) - ), + ), response -> { SearchHits sh = response.getHits(); assertThat(sh.getTotalHits().value(), equalTo((long) (1))); @@ -552,16 +444,12 @@ public void testCombineModes() throws Exception { } ); assertResponse( - client().search( - new SearchRequest(new String[] {}).searchType(SearchType.QUERY_THEN_FETCH) - .source( - searchSource().query( - functionScoreQuery(baseQueryBuilder, gaussDecayFunction("num", 0.0, 1.0, null, 0.5)).boostMode( - CombineFunction.SUM - ) - ) - ) - ), + prepareSearch().setSearchType(SearchType.QUERY_THEN_FETCH) + .setSource( + (searchSource().query( + functionScoreQuery(baseQueryBuilder, gaussDecayFunction("num", 0.0, 1.0, null, 0.5)).boostMode(CombineFunction.SUM) + )) + ), response -> { SearchHits sh = response.getHits(); assertThat(sh.getTotalHits().value(), equalTo((long) (1))); @@ -576,16 +464,12 @@ public void testCombineModes() throws Exception { ); assertResponse( - client().search( - new SearchRequest(new String[] {}).searchType(SearchType.QUERY_THEN_FETCH) - .source( - searchSource().query( - functionScoreQuery(baseQueryBuilder, gaussDecayFunction("num", 0.0, 1.0, null, 0.5)).boostMode( - CombineFunction.AVG - ) - ) + prepareSearch().setSearchType(SearchType.QUERY_THEN_FETCH) + .setSource( + searchSource().query( + functionScoreQuery(baseQueryBuilder, gaussDecayFunction("num", 0.0, 1.0, null, 0.5)).boostMode(CombineFunction.AVG) ) - ), + ), response -> { SearchHits sh = response.getHits(); assertThat(sh.getTotalHits().value(), equalTo((long) (1))); @@ -594,16 +478,12 @@ public void testCombineModes() throws Exception { } ); assertResponse( - client().search( - new SearchRequest(new String[] {}).searchType(SearchType.QUERY_THEN_FETCH) - .source( - searchSource().query( - functionScoreQuery(baseQueryBuilder, gaussDecayFunction("num", 0.0, 1.0, null, 0.5)).boostMode( - CombineFunction.MIN - ) - ) + prepareSearch().setSearchType(SearchType.QUERY_THEN_FETCH) + .setSource( + searchSource().query( + functionScoreQuery(baseQueryBuilder, gaussDecayFunction("num", 0.0, 1.0, null, 0.5)).boostMode(CombineFunction.MIN) ) - ), + ), response -> { SearchHits sh = response.getHits(); assertThat(sh.getTotalHits().value(), equalTo((long) (1))); @@ -612,16 +492,12 @@ public void testCombineModes() throws Exception { } ); assertResponse( - client().search( - new SearchRequest(new String[] {}).searchType(SearchType.QUERY_THEN_FETCH) - .source( - searchSource().query( - functionScoreQuery(baseQueryBuilder, gaussDecayFunction("num", 0.0, 1.0, null, 0.5)).boostMode( - CombineFunction.MAX - ) - ) + prepareSearch().setSearchType(SearchType.QUERY_THEN_FETCH) + .setSource( + searchSource().query( + functionScoreQuery(baseQueryBuilder, gaussDecayFunction("num", 0.0, 1.0, null, 0.5)).boostMode(CombineFunction.MAX) ) - ), + ), response -> { SearchHits sh = response.getHits(); assertThat(sh.getTotalHits().value(), equalTo((long) (1))); @@ -1128,7 +1004,7 @@ public void testMultiFieldOptions() throws Exception { indexRandom(true, doc1, doc2); - assertResponse(client().search(new SearchRequest(new String[] {}).source(searchSource().query(baseQuery))), response -> { + assertResponse(prepareSearch().setSource(searchSource().query(baseQuery)), response -> { assertSearchHits(response, "1", "2"); SearchHits sh = response.getHits(); assertThat(sh.getTotalHits().value(), equalTo((long) (2))); @@ -1138,11 +1014,9 @@ public void testMultiFieldOptions() throws Exception { lonlat.add(20f); lonlat.add(10f); assertResponse( - client().search( - new SearchRequest(new String[] {}).source( - searchSource().query( - functionScoreQuery(baseQuery, gaussDecayFunction("loc", lonlat, "1000km").setMultiValueMode(MultiValueMode.MIN)) - ) + prepareSearch().setSource( + searchSource().query( + functionScoreQuery(baseQuery, gaussDecayFunction("loc", lonlat, "1000km").setMultiValueMode(MultiValueMode.MIN)) ) ), response -> { @@ -1154,11 +1028,9 @@ public void testMultiFieldOptions() throws Exception { } ); assertResponse( - client().search( - new SearchRequest(new String[] {}).source( - searchSource().query( - functionScoreQuery(baseQuery, gaussDecayFunction("loc", lonlat, "1000km").setMultiValueMode(MultiValueMode.MAX)) - ) + prepareSearch().setSource( + searchSource().query( + functionScoreQuery(baseQuery, gaussDecayFunction("loc", lonlat, "1000km").setMultiValueMode(MultiValueMode.MAX)) ) ), response -> { @@ -1180,11 +1052,9 @@ public void testMultiFieldOptions() throws Exception { indexRandom(true, doc1, doc2); assertResponse( - client().search( - new SearchRequest(new String[] {}).source( - searchSource().query( - functionScoreQuery(baseQuery, linearDecayFunction("num", "0", "10").setMultiValueMode(MultiValueMode.SUM)) - ) + prepareSearch().setSource( + searchSource().query( + functionScoreQuery(baseQuery, linearDecayFunction("num", "0", "10").setMultiValueMode(MultiValueMode.SUM)) ) ), response -> { @@ -1197,11 +1067,9 @@ public void testMultiFieldOptions() throws Exception { } ); assertResponse( - client().search( - new SearchRequest(new String[] {}).source( - searchSource().query( - functionScoreQuery(baseQuery, linearDecayFunction("num", "0", "10").setMultiValueMode(MultiValueMode.AVG)) - ) + prepareSearch().setSource( + searchSource().query( + functionScoreQuery(baseQuery, linearDecayFunction("num", "0", "10").setMultiValueMode(MultiValueMode.AVG)) ) ), response -> { diff --git a/server/src/internalClusterTest/java/org/elasticsearch/search/functionscore/FunctionScoreIT.java b/server/src/internalClusterTest/java/org/elasticsearch/search/functionscore/FunctionScoreIT.java index a38c9dc916056..e90740c042de3 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/search/functionscore/FunctionScoreIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/search/functionscore/FunctionScoreIT.java @@ -43,7 +43,7 @@ import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailures; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailuresAndResponse; -import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertResponse; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertResponses; import static org.elasticsearch.xcontent.XContentFactory.jsonBuilder; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.is; @@ -137,41 +137,25 @@ public void testMinScoreFunctionScoreBasic() throws Exception { ensureYellow(); Script script = new Script(ScriptType.INLINE, CustomScriptPlugin.NAME, "doc['random_score']", Collections.emptyMap()); - assertResponse( - client().search( - new SearchRequest(new String[] {}).source( - searchSource().query(functionScoreQuery(scriptFunction(script)).setMinScore(minScore)) - ) - ), - response -> { - if (score < minScore) { - assertThat(response.getHits().getTotalHits().value(), is(0L)); - } else { - assertThat(response.getHits().getTotalHits().value(), is(1L)); - } - } - ); - assertResponse( - client().search( - new SearchRequest(new String[] {}).source( - searchSource().query( - functionScoreQuery( - new MatchAllQueryBuilder(), - new FilterFunctionBuilder[] { - new FilterFunctionBuilder(scriptFunction(script)), - new FilterFunctionBuilder(scriptFunction(script)) } - ).scoreMode(FunctionScoreQuery.ScoreMode.AVG).setMinScore(minScore) - ) - ) - ), - response -> { - if (score < minScore) { - assertThat(response.getHits().getTotalHits().value(), is(0L)); - } else { - assertThat(response.getHits().getTotalHits().value(), is(1L)); - } + assertResponses(response -> { + if (score < minScore) { + assertThat(response.getHits().getTotalHits().value(), is(0L)); + } else { + assertThat(response.getHits().getTotalHits().value(), is(1L)); } + }, + prepareSearch().setSource(searchSource().query(functionScoreQuery(scriptFunction(script)).setMinScore(minScore))), + prepareSearch().setSource( + searchSource().query( + functionScoreQuery( + new MatchAllQueryBuilder(), + new FilterFunctionBuilder[] { + new FilterFunctionBuilder(scriptFunction(script)), + new FilterFunctionBuilder(scriptFunction(script)) } + ).scoreMode(FunctionScoreQuery.ScoreMode.AVG).setMinScore(minScore) + ) + ) ); } @@ -195,31 +179,20 @@ public void testMinScoreFunctionScoreManyDocsAndRandomMinScore() throws IOExcept final int finalNumMatchingDocs = numMatchingDocs; - assertResponse( - client().search( - new SearchRequest(new String[] {}).source( - searchSource().query(functionScoreQuery(scriptFunction(script)).setMinScore(minScore)).size(numDocs) - ) - ), - response -> assertMinScoreSearchResponses(numDocs, response, finalNumMatchingDocs) - ); - - assertResponse( - client().search( - new SearchRequest(new String[] {}).source( - searchSource().query( - functionScoreQuery( - new MatchAllQueryBuilder(), - new FilterFunctionBuilder[] { - new FilterFunctionBuilder(scriptFunction(script)), - new FilterFunctionBuilder(scriptFunction(script)) } - ).scoreMode(FunctionScoreQuery.ScoreMode.AVG).setMinScore(minScore) - ).size(numDocs) - ) - ), - response -> assertMinScoreSearchResponses(numDocs, response, finalNumMatchingDocs) + assertResponses( + response -> assertMinScoreSearchResponses(numDocs, response, finalNumMatchingDocs), + prepareSearch().setSource(searchSource().query(functionScoreQuery(scriptFunction(script)).setMinScore(minScore)).size(numDocs)), + prepareSearch().setSource( + searchSource().query( + functionScoreQuery( + new MatchAllQueryBuilder(), + new FilterFunctionBuilder[] { + new FilterFunctionBuilder(scriptFunction(script)), + new FilterFunctionBuilder(scriptFunction(script)) } + ).scoreMode(FunctionScoreQuery.ScoreMode.AVG).setMinScore(minScore) + ).size(numDocs) + ) ); - } protected void assertMinScoreSearchResponses(int numDocs, SearchResponse searchResponse, int numMatchingDocs) { diff --git a/server/src/internalClusterTest/java/org/elasticsearch/search/nested/SimpleNestedIT.java b/server/src/internalClusterTest/java/org/elasticsearch/search/nested/SimpleNestedIT.java index 4688201c66201..8225386ed02d2 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/search/nested/SimpleNestedIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/search/nested/SimpleNestedIT.java @@ -44,6 +44,7 @@ import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailures; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailuresAndResponse; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertResponse; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertResponses; import static org.elasticsearch.xcontent.XContentFactory.jsonBuilder; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThan; @@ -1149,39 +1150,54 @@ public void testSortNestedWithNestedFilter() throws Exception { // With nested filter NestedSortBuilder nestedSort = new NestedSortBuilder("parent.child"); nestedSort.setFilter(QueryBuilders.termQuery("parent.child.filter", true)); - assertResponse( + assertResponses(response -> { + assertHitCount(response, 3); + assertThat(response.getHits().getHits().length, equalTo(3)); + assertThat(response.getHits().getHits()[0].getId(), equalTo("1")); + assertThat(response.getHits().getHits()[0].getSortValues()[0].toString(), equalTo("1")); + assertThat(response.getHits().getHits()[1].getId(), equalTo("2")); + assertThat(response.getHits().getHits()[1].getSortValues()[0].toString(), equalTo("2")); + assertThat(response.getHits().getHits()[2].getId(), equalTo("3")); + assertThat(response.getHits().getHits()[2].getSortValues()[0].toString(), equalTo("3")); + }, prepareSearch().setQuery(matchAllQuery()) .addSort(SortBuilders.fieldSort("parent.child.child_values").setNestedSort(nestedSort).order(SortOrder.ASC)), - response -> { - assertHitCount(response, 3); - assertThat(response.getHits().getHits().length, equalTo(3)); - assertThat(response.getHits().getHits()[0].getId(), equalTo("1")); - assertThat(response.getHits().getHits()[0].getSortValues()[0].toString(), equalTo("1")); - assertThat(response.getHits().getHits()[1].getId(), equalTo("2")); - assertThat(response.getHits().getHits()[1].getSortValues()[0].toString(), equalTo("2")); - assertThat(response.getHits().getHits()[2].getId(), equalTo("3")); - assertThat(response.getHits().getHits()[2].getSortValues()[0].toString(), equalTo("3")); - } - ); - // Nested path should be automatically detected, expect same results as above search request - assertResponse( prepareSearch().setQuery(matchAllQuery()) - .addSort(SortBuilders.fieldSort("parent.child.child_values").setNestedSort(nestedSort).order(SortOrder.ASC)), - response -> { - assertHitCount(response, 3); - assertThat(response.getHits().getHits().length, equalTo(3)); - assertThat(response.getHits().getHits()[0].getId(), equalTo("1")); - assertThat(response.getHits().getHits()[0].getSortValues()[0].toString(), equalTo("1")); - assertThat(response.getHits().getHits()[1].getId(), equalTo("2")); - assertThat(response.getHits().getHits()[1].getSortValues()[0].toString(), equalTo("2")); - assertThat(response.getHits().getHits()[2].getId(), equalTo("3")); - assertThat(response.getHits().getHits()[2].getSortValues()[0].toString(), equalTo("3")); - } + .addSort( + SortBuilders.fieldSort("parent.child.child_obj.value") + .setNestedSort( + new NestedSortBuilder("parent.child").setFilter(QueryBuilders.termQuery("parent.child.filter", true)) + ) + .order(SortOrder.ASC) + ), + // Sort mode: sum with filter + prepareSearch().setQuery(matchAllQuery()) + .addSort( + SortBuilders.fieldSort("parent.child.child_values") + .setNestedSort( + new NestedSortBuilder("parent.child").setFilter(QueryBuilders.termQuery("parent.child.filter", true)) + ) + .sortMode(SortMode.SUM) + .order(SortOrder.ASC) + ), + // Sort mode: avg with filter + prepareSearch().setQuery(matchAllQuery()) + .addSort( + SortBuilders.fieldSort("parent.child.child_values") + .setNestedSort( + new NestedSortBuilder("parent.child").setFilter(QueryBuilders.termQuery("parent.child.filter", true)) + ) + .sortMode(SortMode.AVG) + .order(SortOrder.ASC) + ) ); - nestedSort.setFilter(QueryBuilders.termQuery("parent.filter", false)); assertResponse( prepareSearch().setQuery(matchAllQuery()) - .addSort(SortBuilders.fieldSort("parent.parent_values").setNestedSort(nestedSort).order(SortOrder.ASC)), + .addSort( + SortBuilders.fieldSort("parent.parent_values") + .setNestedSort(nestedSort.setFilter(QueryBuilders.termQuery("parent.filter", false))) + .order(SortOrder.ASC) + ), response -> { assertHitCount(response, 3); assertThat(response.getHits().getHits().length, equalTo(3)); @@ -1215,27 +1231,6 @@ public void testSortNestedWithNestedFilter() throws Exception { assertThat(response.getHits().getHits()[2].getSortValues()[0].toString(), equalTo("6")); } ); - // Check if closest nested type is resolved - assertResponse( - prepareSearch().setQuery(matchAllQuery()) - .addSort( - SortBuilders.fieldSort("parent.child.child_obj.value") - .setNestedSort( - new NestedSortBuilder("parent.child").setFilter(QueryBuilders.termQuery("parent.child.filter", true)) - ) - .order(SortOrder.ASC) - ), - response -> { - assertHitCount(response, 3); - assertThat(response.getHits().getHits().length, equalTo(3)); - assertThat(response.getHits().getHits()[0].getId(), equalTo("1")); - assertThat(response.getHits().getHits()[0].getSortValues()[0].toString(), equalTo("1")); - assertThat(response.getHits().getHits()[1].getId(), equalTo("2")); - assertThat(response.getHits().getHits()[1].getSortValues()[0].toString(), equalTo("2")); - assertThat(response.getHits().getHits()[2].getId(), equalTo("3")); - assertThat(response.getHits().getHits()[2].getSortValues()[0].toString(), equalTo("3")); - } - ); // Sort mode: sum assertResponse( prepareSearch().setQuery(matchAllQuery()) @@ -1275,28 +1270,6 @@ public void testSortNestedWithNestedFilter() throws Exception { assertThat(response.getHits().getHits()[2].getSortValues()[0].toString(), equalTo("2")); } ); - // Sort mode: sum with filter - assertResponse( - prepareSearch().setQuery(matchAllQuery()) - .addSort( - SortBuilders.fieldSort("parent.child.child_values") - .setNestedSort( - new NestedSortBuilder("parent.child").setFilter(QueryBuilders.termQuery("parent.child.filter", true)) - ) - .sortMode(SortMode.SUM) - .order(SortOrder.ASC) - ), - response -> { - assertHitCount(response, 3); - assertThat(response.getHits().getHits().length, equalTo(3)); - assertThat(response.getHits().getHits()[0].getId(), equalTo("1")); - assertThat(response.getHits().getHits()[0].getSortValues()[0].toString(), equalTo("1")); - assertThat(response.getHits().getHits()[1].getId(), equalTo("2")); - assertThat(response.getHits().getHits()[1].getSortValues()[0].toString(), equalTo("2")); - assertThat(response.getHits().getHits()[2].getId(), equalTo("3")); - assertThat(response.getHits().getHits()[2].getSortValues()[0].toString(), equalTo("3")); - } - ); // Sort mode: avg assertResponse( prepareSearch().setQuery(matchAllQuery()) @@ -1336,28 +1309,6 @@ public void testSortNestedWithNestedFilter() throws Exception { assertThat(response.getHits().getHits()[2].getSortValues()[0].toString(), equalTo("1")); } ); - // Sort mode: avg with filter - assertResponse( - prepareSearch().setQuery(matchAllQuery()) - .addSort( - SortBuilders.fieldSort("parent.child.child_values") - .setNestedSort( - new NestedSortBuilder("parent.child").setFilter(QueryBuilders.termQuery("parent.child.filter", true)) - ) - .sortMode(SortMode.AVG) - .order(SortOrder.ASC) - ), - response -> { - assertHitCount(response, 3); - assertThat(response.getHits().getHits().length, equalTo(3)); - assertThat(response.getHits().getHits()[0].getId(), equalTo("1")); - assertThat(response.getHits().getHits()[0].getSortValues()[0].toString(), equalTo("1")); - assertThat(response.getHits().getHits()[1].getId(), equalTo("2")); - assertThat(response.getHits().getHits()[1].getSortValues()[0].toString(), equalTo("2")); - assertThat(response.getHits().getHits()[2].getId(), equalTo("3")); - assertThat(response.getHits().getHits()[2].getSortValues()[0].toString(), equalTo("3")); - } - ); } // Issue #9305 diff --git a/server/src/internalClusterTest/java/org/elasticsearch/search/query/QueryStringIT.java b/server/src/internalClusterTest/java/org/elasticsearch/search/query/QueryStringIT.java index c8fe9498b156f..28d72518f516e 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/search/query/QueryStringIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/search/query/QueryStringIT.java @@ -30,6 +30,7 @@ import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailuresAndResponse; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertResponse; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertResponses; import static org.hamcrest.Matchers.containsInAnyOrder; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; @@ -50,14 +51,10 @@ public void testBasicAllQuery() throws Exception { reqs.add(prepareIndex("test").setId("3").setSource("f3", "foo bar baz")); indexRandom(true, false, reqs); - assertResponse(prepareSearch("test").setQuery(queryStringQuery("foo")), response -> { - assertHitCount(response, 2L); - assertHits(response.getHits(), "1", "3"); - }); - assertResponse(prepareSearch("test").setQuery(queryStringQuery("bar")), response -> { + assertResponses(response -> { assertHitCount(response, 2L); assertHits(response.getHits(), "1", "3"); - }); + }, prepareSearch("test").setQuery(queryStringQuery("foo")), prepareSearch("test").setQuery(queryStringQuery("bar"))); assertResponse(prepareSearch("test").setQuery(queryStringQuery("Bar")), response -> { assertHitCount(response, 3L); assertHits(response.getHits(), "1", "2", "3"); @@ -70,22 +67,18 @@ public void testWithDate() throws Exception { reqs.add(prepareIndex("test").setId("2").setSource("f1", "bar", "f_date", "2015/09/01")); indexRandom(true, false, reqs); - assertResponse(prepareSearch("test").setQuery(queryStringQuery("foo bar")), response -> { + assertResponses(response -> { assertHits(response.getHits(), "1", "2"); assertHitCount(response, 2L); - }); + }, + prepareSearch("test").setQuery(queryStringQuery("foo bar")), + prepareSearch("test").setQuery(queryStringQuery("bar \"2015/09/02\"")), + prepareSearch("test").setQuery(queryStringQuery("\"2015/09/02\" \"2015/09/01\"")) + ); assertResponse(prepareSearch("test").setQuery(queryStringQuery("\"2015/09/02\"")), response -> { assertHits(response.getHits(), "1"); assertHitCount(response, 1L); }); - assertResponse(prepareSearch("test").setQuery(queryStringQuery("bar \"2015/09/02\"")), response -> { - assertHits(response.getHits(), "1", "2"); - assertHitCount(response, 2L); - }); - assertResponse(prepareSearch("test").setQuery(queryStringQuery("\"2015/09/02\" \"2015/09/01\"")), response -> { - assertHits(response.getHits(), "1", "2"); - assertHitCount(response, 2L); - }); } public void testWithLotsOfTypes() throws Exception { @@ -94,22 +87,18 @@ public void testWithLotsOfTypes() throws Exception { reqs.add(prepareIndex("test").setId("2").setSource("f1", "bar", "f_date", "2015/09/01", "f_float", "1.8", "f_ip", "127.0.0.2")); indexRandom(true, false, reqs); - assertResponse(prepareSearch("test").setQuery(queryStringQuery("foo bar")), response -> { + assertResponses(response -> { assertHits(response.getHits(), "1", "2"); assertHitCount(response, 2L); - }); + }, + prepareSearch("test").setQuery(queryStringQuery("foo bar")), + prepareSearch("test").setQuery(queryStringQuery("127.0.0.2 \"2015/09/02\"")), + prepareSearch("test").setQuery(queryStringQuery("127.0.0.1 OR 1.8")) + ); assertResponse(prepareSearch("test").setQuery(queryStringQuery("\"2015/09/02\"")), response -> { assertHits(response.getHits(), "1"); assertHitCount(response, 1L); }); - assertResponse(prepareSearch("test").setQuery(queryStringQuery("127.0.0.2 \"2015/09/02\"")), response -> { - assertHits(response.getHits(), "1", "2"); - assertHitCount(response, 2L); - }); - assertResponse(prepareSearch("test").setQuery(queryStringQuery("127.0.0.1 OR 1.8")), response -> { - assertHits(response.getHits(), "1", "2"); - assertHitCount(response, 2L); - }); } public void testDocWithAllTypes() throws Exception { @@ -118,23 +107,23 @@ public void testDocWithAllTypes() throws Exception { reqs.add(prepareIndex("test").setId("1").setSource(docBody, XContentType.JSON)); indexRandom(true, false, reqs); - assertResponse(prepareSearch("test").setQuery(queryStringQuery("foo")), response -> assertHits(response.getHits(), "1")); - assertResponse(prepareSearch("test").setQuery(queryStringQuery("Bar")), response -> assertHits(response.getHits(), "1")); - assertResponse(prepareSearch("test").setQuery(queryStringQuery("Baz")), response -> assertHits(response.getHits(), "1")); - assertResponse(prepareSearch("test").setQuery(queryStringQuery("19")), response -> assertHits(response.getHits(), "1")); - // nested doesn't match because it's hidden - assertResponse(prepareSearch("test").setQuery(queryStringQuery("1476383971")), response -> assertHits(response.getHits(), "1")); - // bool doesn't match - assertResponse(prepareSearch("test").setQuery(queryStringQuery("7")), response -> assertHits(response.getHits(), "1")); - assertResponse(prepareSearch("test").setQuery(queryStringQuery("23")), response -> assertHits(response.getHits(), "1")); - assertResponse(prepareSearch("test").setQuery(queryStringQuery("1293")), response -> assertHits(response.getHits(), "1")); - assertResponse(prepareSearch("test").setQuery(queryStringQuery("42")), response -> assertHits(response.getHits(), "1")); - assertResponse(prepareSearch("test").setQuery(queryStringQuery("1.7")), response -> assertHits(response.getHits(), "1")); - assertResponse(prepareSearch("test").setQuery(queryStringQuery("1.5")), response -> assertHits(response.getHits(), "1")); - assertResponse(prepareSearch("test").setQuery(queryStringQuery("127.0.0.1")), response -> assertHits(response.getHits(), "1")); - // binary doesn't match - // suggest doesn't match - // geo_point doesn't match + assertResponses( + response -> assertHits(response.getHits(), "1"), + prepareSearch("test").setQuery(queryStringQuery("foo")), + prepareSearch("test").setQuery(queryStringQuery("Bar")), + prepareSearch("test").setQuery(queryStringQuery("Baz")), + prepareSearch("test").setQuery(queryStringQuery("19")), + // nested doesn't match because it's hidden + prepareSearch("test").setQuery(queryStringQuery("1476383971")), + // bool doesn't match + prepareSearch("test").setQuery(queryStringQuery("7")), + prepareSearch("test").setQuery(queryStringQuery("23")), + prepareSearch("test").setQuery(queryStringQuery("1293")), + prepareSearch("test").setQuery(queryStringQuery("42")), + prepareSearch("test").setQuery(queryStringQuery("1.7")), + prepareSearch("test").setQuery(queryStringQuery("1.5")), + prepareSearch("test").setQuery(queryStringQuery("127.0.0.1")) + ); } public void testKeywordWithWhitespace() throws Exception { diff --git a/server/src/internalClusterTest/java/org/elasticsearch/search/query/SearchQueryIT.java b/server/src/internalClusterTest/java/org/elasticsearch/search/query/SearchQueryIT.java index 118aa00fc1b4f..f790cf30e1c0e 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/search/query/SearchQueryIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/search/query/SearchQueryIT.java @@ -589,19 +589,19 @@ public void testMultiMatchQuery() throws Exception { indicesAdmin().prepareRefresh("test").get(); builder = multiMatchQuery("value1", "field1", "field2").operator(Operator.AND); // Operator only applies on terms inside a field! - // Fields are always OR-ed together. + // Fields are always OR-ed together. assertSearchHitsWithoutFailures(prepareSearch().setQuery(builder), "1"); refresh(); builder = multiMatchQuery("value1", "field1").field("field3", 1.5f).operator(Operator.AND); // Operator only applies on terms inside - // a field! Fields are always OR-ed - // together. + // a field! Fields are always OR-ed + // together. assertSearchHitsWithoutFailures(prepareSearch().setQuery(builder), "3", "1"); indicesAdmin().prepareRefresh("test").get(); builder = multiMatchQuery("value1").field("field1").field("field3", 1.5f).operator(Operator.AND); // Operator only applies on terms - // inside a field! Fields are - // always OR-ed together. + // inside a field! Fields are + // always OR-ed together. assertResponse(prepareSearch().setQuery(builder), response -> { assertHitCount(response, 2L); assertSearchHits(response, "3", "1"); @@ -726,25 +726,27 @@ public void testBoolQueryMinShouldMatchBiggerThanNumberOfShouldClauses() throws prepareIndex("test").setId("2").setSource("field2", "value1").get(); refresh(); - BoolQueryBuilder boolQuery = boolQuery().must(termQuery("field1", "value1")) - .should(boolQuery().should(termQuery("field1", "value1")).should(termQuery("field1", "value2")).minimumShouldMatch(3)); - assertResponse(prepareSearch().setQuery(boolQuery), response -> { + assertResponses(response -> { assertHitCount(response, 1L); assertFirstHit(response, hasId("1")); - }); - boolQuery = boolQuery().must(termQuery("field1", "value1")) + }, + prepareSearch().setQuery( + boolQuery().must(termQuery("field1", "value1")) + .should(boolQuery().should(termQuery("field1", "value1")).should(termQuery("field1", "value2")).minimumShouldMatch(3)) + ), + prepareSearch().setQuery( + boolQuery().should(termQuery("field1", "value1")) + .should(boolQuery().should(termQuery("field1", "value1")).should(termQuery("field1", "value2")).minimumShouldMatch(3)) + .minimumShouldMatch(1) + ) + ); + + BoolQueryBuilder boolQuery = boolQuery().must(termQuery("field1", "value1")) .should(boolQuery().should(termQuery("field1", "value1")).should(termQuery("field1", "value2")).minimumShouldMatch(1)) // Only one should clause is defined, returns no docs. .minimumShouldMatch(2); assertHitCount(prepareSearch().setQuery(boolQuery), 0L); - boolQuery = boolQuery().should(termQuery("field1", "value1")) - .should(boolQuery().should(termQuery("field1", "value1")).should(termQuery("field1", "value2")).minimumShouldMatch(3)) - .minimumShouldMatch(1); - assertResponse(prepareSearch().setQuery(boolQuery), response -> { - assertHitCount(response, 1L); - assertFirstHit(response, hasId("1")); - }); boolQuery = boolQuery().must(termQuery("field1", "value1")) .must(boolQuery().should(termQuery("field1", "value1")).should(termQuery("field1", "value2")).minimumShouldMatch(3)); assertHitCount(prepareSearch().setQuery(boolQuery), 0L); @@ -1449,73 +1451,40 @@ public void testRangeQueryWithTimeZone() throws Exception { .setSource("date", Instant.now().atZone(ZoneOffset.ofHours(1)).toInstant().toEpochMilli(), "num", 4) ); - assertResponse( + assertResponses(response -> { + assertHitCount(response, 1L); + assertThat(response.getHits().getAt(0).getId(), is("1")); + }, prepareSearch("test").setQuery(QueryBuilders.rangeQuery("date").from("2014-01-01T00:00:00").to("2014-01-01T00:59:00")), - response -> { - assertHitCount(response, 1L); - assertThat(response.getHits().getAt(0).getId(), is("1")); - } - ); - assertResponse( - prepareSearch("test").setQuery(QueryBuilders.rangeQuery("date").from("2013-12-31T23:00:00").to("2013-12-31T23:59:00")), - response -> { - assertHitCount(response, 1L); - assertThat(response.getHits().getAt(0).getId(), is("2")); - } - ); - assertResponse( - prepareSearch("test").setQuery(QueryBuilders.rangeQuery("date").from("2014-01-01T01:00:00").to("2014-01-01T01:59:00")), - response -> { - assertHitCount(response, 1L); - assertThat(response.getHits().getAt(0).getId(), is("3")); - } - ); - // We explicitly define a time zone in the from/to dates so whatever the time zone is, it won't be used - assertResponse( + // We explicitly define a time zone in the from/to dates so whatever the time zone is, it won't be used prepareSearch("test").setQuery( QueryBuilders.rangeQuery("date").from("2014-01-01T00:00:00Z").to("2014-01-01T00:59:00Z").timeZone("+10:00") ), - response -> { - assertHitCount(response, 1L); - assertThat(response.getHits().getAt(0).getId(), is("1")); - } - ); - assertResponse( + // We define a time zone to be applied to the filter and from/to have no time zone prepareSearch("test").setQuery( - QueryBuilders.rangeQuery("date").from("2013-12-31T23:00:00Z").to("2013-12-31T23:59:00Z").timeZone("+10:00") - ), - response -> { - assertHitCount(response, 1L); - assertThat(response.getHits().getAt(0).getId(), is("2")); - } + QueryBuilders.rangeQuery("date").from("2014-01-01T03:00:00").to("2014-01-01T03:59:00").timeZone("+03:00") + ) ); - assertResponse( + assertResponses(response -> { + assertHitCount(response, 1L); + assertThat(response.getHits().getAt(0).getId(), is("2")); + }, + prepareSearch("test").setQuery(QueryBuilders.rangeQuery("date").from("2013-12-31T23:00:00").to("2013-12-31T23:59:00")), prepareSearch("test").setQuery( - QueryBuilders.rangeQuery("date").from("2014-01-01T01:00:00Z").to("2014-01-01T01:59:00Z").timeZone("+10:00") + QueryBuilders.rangeQuery("date").from("2013-12-31T23:00:00Z").to("2013-12-31T23:59:00Z").timeZone("+10:00") ), - response -> { - assertHitCount(response, 1L); - assertThat(response.getHits().getAt(0).getId(), is("3")); - } - ); - // We define a time zone to be applied to the filter and from/to have no time zone - assertResponse( prepareSearch("test").setQuery( - QueryBuilders.rangeQuery("date").from("2014-01-01T03:00:00").to("2014-01-01T03:59:00").timeZone("+03:00") - ), - response -> { - assertHitCount(response, 1L); - assertThat(response.getHits().getAt(0).getId(), is("1")); - } + QueryBuilders.rangeQuery("date").from("2014-01-01T02:00:00").to("2014-01-01T02:59:00").timeZone("+03:00") + ) ); - assertResponse( + assertResponses(response -> { + assertHitCount(response, 1L); + assertThat(response.getHits().getAt(0).getId(), is("3")); + }, + prepareSearch("test").setQuery(QueryBuilders.rangeQuery("date").from("2014-01-01T01:00:00").to("2014-01-01T01:59:00")), prepareSearch("test").setQuery( - QueryBuilders.rangeQuery("date").from("2014-01-01T02:00:00").to("2014-01-01T02:59:00").timeZone("+03:00") - ), - response -> { - assertHitCount(response, 1L); - assertThat(response.getHits().getAt(0).getId(), is("2")); - } + QueryBuilders.rangeQuery("date").from("2014-01-01T01:00:00Z").to("2014-01-01T01:59:00Z").timeZone("+10:00") + ) ); assertResponses(response -> { assertHitCount(response, 1L); @@ -1713,8 +1682,8 @@ public void testFieldAliasesForMetaFields() throws Exception { } /** - * Test that wildcard queries on keyword fields get normalized - */ + * Test that wildcard queries on keyword fields get normalized + */ public void testWildcardQueryNormalizationOnKeywordField() { assertAcked( prepareCreate("test").setSettings( diff --git a/server/src/internalClusterTest/java/org/elasticsearch/search/query/SimpleQueryStringIT.java b/server/src/internalClusterTest/java/org/elasticsearch/search/query/SimpleQueryStringIT.java index 522c20b687caa..f9ae30720b33f 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/search/query/SimpleQueryStringIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/search/query/SimpleQueryStringIT.java @@ -51,6 +51,7 @@ import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailuresAndResponse; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertResponse; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertResponses; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchHits; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchHitsWithoutFailures; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.hasId; @@ -383,14 +384,10 @@ public void testBasicAllQuery() throws Exception { reqs.add(prepareIndex("test").setId("3").setSource("f3", "foo bar baz")); indexRandom(true, false, reqs); - assertResponse(prepareSearch("test").setQuery(simpleQueryStringQuery("foo")), response -> { + assertResponses(response -> { assertHitCount(response, 2L); assertHits(response.getHits(), "1", "3"); - }); - assertResponse(prepareSearch("test").setQuery(simpleQueryStringQuery("bar")), response -> { - assertHitCount(response, 2L); - assertHits(response.getHits(), "1", "3"); - }); + }, prepareSearch("test").setQuery(simpleQueryStringQuery("foo")), prepareSearch("test").setQuery(simpleQueryStringQuery("bar"))); assertResponse(prepareSearch("test").setQuery(simpleQueryStringQuery("Bar")), response -> { assertHitCount(response, 3L); assertHits(response.getHits(), "1", "2", "3"); @@ -407,22 +404,18 @@ public void testWithDate() throws Exception { reqs.add(prepareIndex("test").setId("2").setSource("f1", "bar", "f_date", "2015/09/01")); indexRandom(true, false, reqs); - assertResponse(prepareSearch("test").setQuery(simpleQueryStringQuery("foo bar")), response -> { + assertResponses(response -> { assertHits(response.getHits(), "1", "2"); assertHitCount(response, 2L); - }); + }, + prepareSearch("test").setQuery(simpleQueryStringQuery("foo bar")), + prepareSearch("test").setQuery(simpleQueryStringQuery("bar \"2015/09/02\"")), + prepareSearch("test").setQuery(simpleQueryStringQuery("\"2015/09/02\" \"2015/09/01\"")) + ); assertResponse(prepareSearch("test").setQuery(simpleQueryStringQuery("\"2015/09/02\"")), response -> { assertHits(response.getHits(), "1"); assertHitCount(response, 1L); }); - assertResponse(prepareSearch("test").setQuery(simpleQueryStringQuery("bar \"2015/09/02\"")), response -> { - assertHits(response.getHits(), "1", "2"); - assertHitCount(response, 2L); - }); - assertResponse(prepareSearch("test").setQuery(simpleQueryStringQuery("\"2015/09/02\" \"2015/09/01\"")), response -> { - assertHits(response.getHits(), "1", "2"); - assertHitCount(response, 2L); - }); } public void testWithLotsOfTypes() throws Exception { @@ -435,22 +428,18 @@ public void testWithLotsOfTypes() throws Exception { reqs.add(prepareIndex("test").setId("2").setSource("f1", "bar", "f_date", "2015/09/01", "f_float", "1.8", "f_ip", "127.0.0.2")); indexRandom(true, false, reqs); - assertResponse(prepareSearch("test").setQuery(simpleQueryStringQuery("foo bar")), response -> { + assertResponses(response -> { assertHits(response.getHits(), "1", "2"); assertHitCount(response, 2L); - }); + }, + prepareSearch("test").setQuery(simpleQueryStringQuery("foo bar")), + prepareSearch("test").setQuery(simpleQueryStringQuery("127.0.0.2 \"2015/09/02\"")), + prepareSearch("test").setQuery(simpleQueryStringQuery("127.0.0.1 1.8")) + ); assertResponse(prepareSearch("test").setQuery(simpleQueryStringQuery("\"2015/09/02\"")), response -> { assertHits(response.getHits(), "1"); assertHitCount(response, 1L); }); - assertResponse(prepareSearch("test").setQuery(simpleQueryStringQuery("127.0.0.2 \"2015/09/02\"")), response -> { - assertHits(response.getHits(), "1", "2"); - assertHitCount(response, 2L); - }); - assertResponse(prepareSearch("test").setQuery(simpleQueryStringQuery("127.0.0.1 1.8")), response -> { - assertHits(response.getHits(), "1", "2"); - assertHitCount(response, 2L); - }); } public void testDocWithAllTypes() throws Exception { @@ -463,34 +452,27 @@ public void testDocWithAllTypes() throws Exception { reqs.add(prepareIndex("test").setId("1").setSource(docBody, XContentType.JSON)); indexRandom(true, false, reqs); - assertResponse(prepareSearch("test").setQuery(simpleQueryStringQuery("foo")), response -> assertHits(response.getHits(), "1")); - assertResponse(prepareSearch("test").setQuery(simpleQueryStringQuery("Bar")), response -> assertHits(response.getHits(), "1")); - assertResponse(prepareSearch("test").setQuery(simpleQueryStringQuery("Baz")), response -> assertHits(response.getHits(), "1")); - assertResponse(prepareSearch("test").setQuery(simpleQueryStringQuery("19")), response -> assertHits(response.getHits(), "1")); - // nested doesn't match because it's hidden - assertResponse( + assertResponses( + response -> assertHits(response.getHits(), "1"), + prepareSearch("test").setQuery(simpleQueryStringQuery("foo")), + prepareSearch("test").setQuery(simpleQueryStringQuery("Bar")), + prepareSearch("test").setQuery(simpleQueryStringQuery("Baz")), + prepareSearch("test").setQuery(simpleQueryStringQuery("19")), + // nested doesn't match because it's hidden prepareSearch("test").setQuery(simpleQueryStringQuery("1476383971")), - response -> assertHits(response.getHits(), "1") - ); - // bool doesn't match - assertResponse(prepareSearch("test").setQuery(simpleQueryStringQuery("7")), response -> assertHits(response.getHits(), "1")); - assertResponse(prepareSearch("test").setQuery(simpleQueryStringQuery("23")), response -> assertHits(response.getHits(), "1")); - assertResponse(prepareSearch("test").setQuery(simpleQueryStringQuery("1293")), response -> assertHits(response.getHits(), "1")); - assertResponse(prepareSearch("test").setQuery(simpleQueryStringQuery("42")), response -> assertHits(response.getHits(), "1")); - assertResponse(prepareSearch("test").setQuery(simpleQueryStringQuery("1.7")), response -> assertHits(response.getHits(), "1")); - assertResponse(prepareSearch("test").setQuery(simpleQueryStringQuery("1.5")), response -> assertHits(response.getHits(), "1")); - assertResponse( + // bool doesn't match + prepareSearch("test").setQuery(simpleQueryStringQuery("7")), + prepareSearch("test").setQuery(simpleQueryStringQuery("23")), + prepareSearch("test").setQuery(simpleQueryStringQuery("1293")), + prepareSearch("test").setQuery(simpleQueryStringQuery("42")), + prepareSearch("test").setQuery(simpleQueryStringQuery("1.7")), + prepareSearch("test").setQuery(simpleQueryStringQuery("1.5")), prepareSearch("test").setQuery(simpleQueryStringQuery("127.0.0.1")), - response -> assertHits(response.getHits(), "1") - ); - // binary doesn't match - // suggest doesn't match - // geo_point doesn't match - // geo_shape doesn't match - - assertResponse( - prepareSearch("test").setQuery(simpleQueryStringQuery("foo Bar 19 127.0.0.1").defaultOperator(Operator.AND)), - response -> assertHits(response.getHits(), "1") + // binary doesn't match + // suggest doesn't match + // geo_point doesn't match + // geo_shape doesn't match + prepareSearch("test").setQuery(simpleQueryStringQuery("foo Bar 19 127.0.0.1").defaultOperator(Operator.AND)) ); } diff --git a/server/src/internalClusterTest/java/org/elasticsearch/search/routing/SearchReplicaSelectionIT.java b/server/src/internalClusterTest/java/org/elasticsearch/search/routing/SearchReplicaSelectionIT.java index 06ce330213af8..789da5aac7568 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/search/routing/SearchReplicaSelectionIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/search/routing/SearchReplicaSelectionIT.java @@ -24,6 +24,7 @@ import static org.elasticsearch.index.query.QueryBuilders.matchAllQuery; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertResponse; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertResponses; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThanOrEqualTo; @@ -50,18 +51,14 @@ public void testNodeSelection() { // Before we've gathered stats for all nodes, we should try each node once. Set nodeIds = new HashSet<>(); - assertResponse(client.prepareSearch().setQuery(matchAllQuery()), response -> { - assertThat(response.getHits().getTotalHits().value(), equalTo(1L)); - nodeIds.add(response.getHits().getAt(0).getShard().getNodeId()); - }); - assertResponse(client.prepareSearch().setQuery(matchAllQuery()), response -> { + assertResponses(response -> { assertThat(response.getHits().getTotalHits().value(), equalTo(1L)); nodeIds.add(response.getHits().getAt(0).getShard().getNodeId()); - }); - assertResponse(client.prepareSearch().setQuery(matchAllQuery()), response -> { - assertThat(response.getHits().getTotalHits().value(), equalTo(1L)); - nodeIds.add(response.getHits().getAt(0).getShard().getNodeId()); - }); + }, + client.prepareSearch().setQuery(matchAllQuery()), + client.prepareSearch().setQuery(matchAllQuery()), + client.prepareSearch().setQuery(matchAllQuery()) + ); assertEquals(3, nodeIds.size()); // Now after more searches, we should select a node with the lowest ARS rank. From 5c928a431671fd2789c9d58fd26a0e48cb7d6f92 Mon Sep 17 00:00:00 2001 From: Nhat Nguyen Date: Wed, 27 Nov 2024 07:27:21 -0800 Subject: [PATCH 040/139] Emit deprecation warnings only for new index or template (#117529) Currently, we emit a deprecation warning in the parser of the source field when source mode is used in mappings. However, this behavior causes warnings to be emitted for every mapping update. In tests with assertions enabled, warnings are also triggered for every change to index metadata. As a result, deprecation warnings are inadvertently emitted for index or update requests. This change relocates the deprecation check to the mapper, limiting it to cases where a new index is created or a template is created/updated. Relates to #117524 --- .../index/mapper/MappingParser.java | 9 +++++++++ .../index/mapper/SourceFieldMapper.java | 14 +------------- .../mapper/DocumentParserContextTests.java | 1 - .../index/mapper/SourceFieldMapperTests.java | 17 +---------------- .../index/shard/ShardGetServiceTests.java | 2 -- 5 files changed, 11 insertions(+), 32 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MappingParser.java b/server/src/main/java/org/elasticsearch/index/mapper/MappingParser.java index f30a0089e4eff..2ca14473c8385 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MappingParser.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MappingParser.java @@ -10,6 +10,8 @@ package org.elasticsearch.index.mapper; import org.elasticsearch.common.compress.CompressedXContent; +import org.elasticsearch.common.logging.DeprecationCategory; +import org.elasticsearch.common.logging.DeprecationLogger; import org.elasticsearch.common.xcontent.XContentHelper; import org.elasticsearch.core.Nullable; import org.elasticsearch.index.mapper.MapperService.MergeReason; @@ -31,6 +33,7 @@ public final class MappingParser { private final Supplier, MetadataFieldMapper>> metadataMappersSupplier; private final Map metadataMapperParsers; private final Function documentTypeResolver; + private static final DeprecationLogger deprecationLogger = DeprecationLogger.getLogger(MappingParser.class); MappingParser( Supplier mappingParserContextSupplier, @@ -144,6 +147,12 @@ Mapping parse(@Nullable String type, MergeReason reason, Map map } @SuppressWarnings("unchecked") Map fieldNodeMap = (Map) fieldNode; + if (reason == MergeReason.INDEX_TEMPLATE + && SourceFieldMapper.NAME.equals(fieldName) + && fieldNodeMap.containsKey("mode") + && SourceFieldMapper.onOrAfterDeprecateModeVersion(mappingParserContext.indexVersionCreated())) { + deprecationLogger.critical(DeprecationCategory.MAPPINGS, "mapping_source_mode", SourceFieldMapper.DEPRECATION_WARNING); + } MetadataFieldMapper metadataFieldMapper = typeParser.parse(fieldName, fieldNodeMap, mappingParserContext).build(); metadataMappers.put(metadataFieldMapper.getClass(), metadataFieldMapper); assert fieldNodeMap.isEmpty(); diff --git a/server/src/main/java/org/elasticsearch/index/mapper/SourceFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/SourceFieldMapper.java index e7c7ec3535b91..b97e04fcddb5d 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/SourceFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/SourceFieldMapper.java @@ -18,7 +18,6 @@ import org.elasticsearch.common.Explicit; import org.elasticsearch.common.Strings; import org.elasticsearch.common.bytes.BytesReference; -import org.elasticsearch.common.logging.DeprecationCategory; import org.elasticsearch.common.settings.Setting; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.util.CollectionUtils; @@ -40,7 +39,6 @@ import java.util.Collections; import java.util.List; import java.util.Locale; -import java.util.Map; public class SourceFieldMapper extends MetadataFieldMapper { public static final NodeFeature SYNTHETIC_SOURCE_FALLBACK = new NodeFeature("mapper.source.synthetic_source_fallback"); @@ -310,17 +308,7 @@ private static SourceFieldMapper resolveStaticInstance(final Mode sourceMode) { c.indexVersionCreated().onOrAfter(IndexVersions.SOURCE_MAPPER_LOSSY_PARAMS_CHECK), onOrAfterDeprecateModeVersion(c.indexVersionCreated()) == false ) - ) { - @Override - public MetadataFieldMapper.Builder parse(String name, Map node, MappingParserContext parserContext) - throws MapperParsingException { - assert name.equals(SourceFieldMapper.NAME) : name; - if (onOrAfterDeprecateModeVersion(parserContext.indexVersionCreated()) && node.containsKey("mode")) { - deprecationLogger.critical(DeprecationCategory.MAPPINGS, "mapping_source_mode", SourceFieldMapper.DEPRECATION_WARNING); - } - return super.parse(name, node, parserContext); - } - }; + ); static final class SourceFieldType extends MappedFieldType { private final boolean enabled; diff --git a/server/src/test/java/org/elasticsearch/index/mapper/DocumentParserContextTests.java b/server/src/test/java/org/elasticsearch/index/mapper/DocumentParserContextTests.java index a4108caaf4fc3..be36ab9d6eac1 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/DocumentParserContextTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/DocumentParserContextTests.java @@ -133,6 +133,5 @@ public void testCreateDynamicMapperBuilderContext() throws IOException { assertEquals(ObjectMapper.Defaults.DYNAMIC, resultFromParserContext.getDynamic()); assertEquals(MapperService.MergeReason.MAPPING_UPDATE, resultFromParserContext.getMergeReason()); assertFalse(resultFromParserContext.isInNestedContext()); - assertWarnings(SourceFieldMapper.DEPRECATION_WARNING); } } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/SourceFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/SourceFieldMapperTests.java index fa173bc64518e..4d6a30849e263 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/SourceFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/SourceFieldMapperTests.java @@ -65,7 +65,6 @@ protected void registerParameters(ParameterChecker checker) throws IOException { topMapping(b -> b.startObject(SourceFieldMapper.NAME).field("mode", "synthetic").endObject()), dm -> { assertTrue(dm.metadataMapper(SourceFieldMapper.class).isSynthetic()); - assertWarnings(SourceFieldMapper.DEPRECATION_WARNING); } ); checker.registerConflictCheck("includes", b -> b.array("includes", "foo*")); @@ -74,7 +73,7 @@ protected void registerParameters(ParameterChecker checker) throws IOException { "mode", topMapping(b -> b.startObject(SourceFieldMapper.NAME).field("mode", "synthetic").endObject()), topMapping(b -> b.startObject(SourceFieldMapper.NAME).field("mode", "stored").endObject()), - dm -> assertWarnings(SourceFieldMapper.DEPRECATION_WARNING) + d -> {} ); } @@ -211,14 +210,12 @@ public void testSyntheticDisabledNotSupported() { ) ); assertThat(e.getMessage(), containsString("Cannot set both [mode] and [enabled] parameters")); - assertWarnings(SourceFieldMapper.DEPRECATION_WARNING); } public void testSyntheticUpdates() throws Exception { MapperService mapperService = createMapperService(""" { "_doc" : { "_source" : { "mode" : "synthetic" } } } """); - assertWarnings(SourceFieldMapper.DEPRECATION_WARNING); SourceFieldMapper mapper = mapperService.documentMapper().sourceMapper(); assertTrue(mapper.enabled()); assertTrue(mapper.isSynthetic()); @@ -226,7 +223,6 @@ public void testSyntheticUpdates() throws Exception { merge(mapperService, """ { "_doc" : { "_source" : { "mode" : "synthetic" } } } """); - assertWarnings(SourceFieldMapper.DEPRECATION_WARNING); mapper = mapperService.documentMapper().sourceMapper(); assertTrue(mapper.enabled()); assertTrue(mapper.isSynthetic()); @@ -239,12 +235,10 @@ public void testSyntheticUpdates() throws Exception { """)); assertThat(e.getMessage(), containsString("Cannot update parameter [mode] from [synthetic] to [stored]")); - assertWarnings(SourceFieldMapper.DEPRECATION_WARNING); merge(mapperService, """ { "_doc" : { "_source" : { "mode" : "disabled" } } } """); - assertWarnings(SourceFieldMapper.DEPRECATION_WARNING); mapper = mapperService.documentMapper().sourceMapper(); assertFalse(mapper.enabled()); @@ -281,7 +275,6 @@ public void testSupportsNonDefaultParameterValues() throws IOException { topMapping(b -> b.startObject("_source").field("mode", randomBoolean() ? "synthetic" : "stored").endObject()) ).documentMapper().sourceMapper(); assertThat(sourceFieldMapper, notNullValue()); - assertWarnings(SourceFieldMapper.DEPRECATION_WARNING); } Exception e = expectThrows( MapperParsingException.class, @@ -313,8 +306,6 @@ public void testSupportsNonDefaultParameterValues() throws IOException { .documentMapper() .sourceMapper() ); - assertWarnings(SourceFieldMapper.DEPRECATION_WARNING); - assertThat(e.getMessage(), containsString("Parameter [mode=disabled] is not allowed in source")); e = expectThrows( @@ -423,7 +414,6 @@ public void testRecoverySourceWithSyntheticSource() throws IOException { ParsedDocument doc = docMapper.parse(source(b -> { b.field("field1", "value1"); })); assertNotNull(doc.rootDoc().getField("_recovery_source")); assertThat(doc.rootDoc().getField("_recovery_source").binaryValue(), equalTo(new BytesRef("{\"field1\":\"value1\"}"))); - assertWarnings(SourceFieldMapper.DEPRECATION_WARNING); } { Settings settings = Settings.builder().put(INDICES_RECOVERY_SOURCE_ENABLED_SETTING.getKey(), false).build(); @@ -434,7 +424,6 @@ public void testRecoverySourceWithSyntheticSource() throws IOException { DocumentMapper docMapper = mapperService.documentMapper(); ParsedDocument doc = docMapper.parse(source(b -> b.field("field1", "value1"))); assertNull(doc.rootDoc().getField("_recovery_source")); - assertWarnings(SourceFieldMapper.DEPRECATION_WARNING); } } @@ -629,7 +618,6 @@ public void testRecoverySourceWithLogsCustom() throws IOException { ParsedDocument doc = docMapper.parse(source(b -> { b.field("@timestamp", "2012-02-13"); })); assertNotNull(doc.rootDoc().getField("_recovery_source")); assertThat(doc.rootDoc().getField("_recovery_source").binaryValue(), equalTo(new BytesRef("{\"@timestamp\":\"2012-02-13\"}"))); - assertWarnings(SourceFieldMapper.DEPRECATION_WARNING); } { Settings settings = Settings.builder() @@ -640,7 +628,6 @@ public void testRecoverySourceWithLogsCustom() throws IOException { DocumentMapper docMapper = mapperService.documentMapper(); ParsedDocument doc = docMapper.parse(source(b -> b.field("@timestamp", "2012-02-13"))); assertNull(doc.rootDoc().getField("_recovery_source")); - assertWarnings(SourceFieldMapper.DEPRECATION_WARNING); } } @@ -709,7 +696,6 @@ public void testRecoverySourceWithTimeSeriesCustom() throws IOException { doc.rootDoc().getField("_recovery_source").binaryValue(), equalTo(new BytesRef("{\"@timestamp\":\"2012-02-13\",\"field\":\"value1\"}")) ); - assertWarnings(SourceFieldMapper.DEPRECATION_WARNING); } { Settings settings = Settings.builder() @@ -723,7 +709,6 @@ public void testRecoverySourceWithTimeSeriesCustom() throws IOException { source("123", b -> b.field("@timestamp", "2012-02-13").field("field", randomAlphaOfLength(5)), null) ); assertNull(doc.rootDoc().getField("_recovery_source")); - assertWarnings(SourceFieldMapper.DEPRECATION_WARNING); } } } diff --git a/server/src/test/java/org/elasticsearch/index/shard/ShardGetServiceTests.java b/server/src/test/java/org/elasticsearch/index/shard/ShardGetServiceTests.java index 307bc26c44ba6..a49d895f38f67 100644 --- a/server/src/test/java/org/elasticsearch/index/shard/ShardGetServiceTests.java +++ b/server/src/test/java/org/elasticsearch/index/shard/ShardGetServiceTests.java @@ -21,7 +21,6 @@ import org.elasticsearch.index.engine.VersionConflictEngineException; import org.elasticsearch.index.get.GetResult; import org.elasticsearch.index.mapper.RoutingFieldMapper; -import org.elasticsearch.index.mapper.SourceFieldMapper; import org.elasticsearch.search.fetch.subphase.FetchSourceContext; import org.elasticsearch.xcontent.XContentType; @@ -115,7 +114,6 @@ public void testGetFromTranslogWithSyntheticSource() throws IOException { "mode": "synthetic" """; runGetFromTranslogWithOptions(docToIndex, sourceOptions, expectedFetchedSource, "\"long\"", 7L, true); - assertWarnings(SourceFieldMapper.DEPRECATION_WARNING); } public void testGetFromTranslogWithDenseVector() throws IOException { From 418cbbf7b9f175ceba858a684215f42c55c9830e Mon Sep 17 00:00:00 2001 From: Jack Conradson Date: Wed, 27 Nov 2024 07:56:54 -0800 Subject: [PATCH 041/139] Remove entitlement parameter (#117597) Removes the "entitlement" parameter from policy parsing. --- .../runtime/policy/PolicyParser.java | 13 -------- .../policy/PolicyParserFailureTests.java | 30 ++++++++----------- .../runtime/policy/test-policy.yaml | 11 ++++--- 3 files changed, 18 insertions(+), 36 deletions(-) diff --git a/libs/entitlement/src/main/java/org/elasticsearch/entitlement/runtime/policy/PolicyParser.java b/libs/entitlement/src/main/java/org/elasticsearch/entitlement/runtime/policy/PolicyParser.java index 229ccec3b8b2c..ea6603af99925 100644 --- a/libs/entitlement/src/main/java/org/elasticsearch/entitlement/runtime/policy/PolicyParser.java +++ b/libs/entitlement/src/main/java/org/elasticsearch/entitlement/runtime/policy/PolicyParser.java @@ -9,7 +9,6 @@ package org.elasticsearch.entitlement.runtime.policy; -import org.elasticsearch.xcontent.ParseField; import org.elasticsearch.xcontent.XContentParser; import org.elasticsearch.xcontent.XContentParserConfiguration; import org.elasticsearch.xcontent.yaml.YamlXContent; @@ -31,8 +30,6 @@ */ public class PolicyParser { - protected static final ParseField ENTITLEMENTS_PARSEFIELD = new ParseField("entitlements"); - protected static final String entitlementPackageName = Entitlement.class.getPackage().getName(); protected final XContentParser policyParser; @@ -65,13 +62,6 @@ public Policy parsePolicy() { protected Scope parseScope(String scopeName) throws IOException { try { - if (policyParser.nextToken() != XContentParser.Token.START_OBJECT) { - throw newPolicyParserException(scopeName, "expected object [" + ENTITLEMENTS_PARSEFIELD.getPreferredName() + "]"); - } - if (policyParser.nextToken() != XContentParser.Token.FIELD_NAME - || policyParser.currentName().equals(ENTITLEMENTS_PARSEFIELD.getPreferredName()) == false) { - throw newPolicyParserException(scopeName, "expected object [" + ENTITLEMENTS_PARSEFIELD.getPreferredName() + "]"); - } if (policyParser.nextToken() != XContentParser.Token.START_ARRAY) { throw newPolicyParserException(scopeName, "expected array of "); } @@ -90,9 +80,6 @@ protected Scope parseScope(String scopeName) throws IOException { throw newPolicyParserException(scopeName, "expected closing object"); } } - if (policyParser.nextToken() != XContentParser.Token.END_OBJECT) { - throw newPolicyParserException(scopeName, "expected closing object"); - } return new Scope(scopeName, entitlements); } catch (IOException ioe) { throw new UncheckedIOException(ioe); diff --git a/libs/entitlement/src/test/java/org/elasticsearch/entitlement/runtime/policy/PolicyParserFailureTests.java b/libs/entitlement/src/test/java/org/elasticsearch/entitlement/runtime/policy/PolicyParserFailureTests.java index b21d206f3eb6a..de8280ea87fe5 100644 --- a/libs/entitlement/src/test/java/org/elasticsearch/entitlement/runtime/policy/PolicyParserFailureTests.java +++ b/libs/entitlement/src/test/java/org/elasticsearch/entitlement/runtime/policy/PolicyParserFailureTests.java @@ -29,11 +29,10 @@ public void testParserSyntaxFailures() { public void testEntitlementDoesNotExist() throws IOException { PolicyParserException ppe = expectThrows(PolicyParserException.class, () -> new PolicyParser(new ByteArrayInputStream(""" entitlement-module-name: - entitlements: - - does_not_exist: {} + - does_not_exist: {} """.getBytes(StandardCharsets.UTF_8)), "test-failure-policy.yaml").parsePolicy()); assertEquals( - "[3:7] policy parsing error for [test-failure-policy.yaml] in scope [entitlement-module-name]: " + "[2:5] policy parsing error for [test-failure-policy.yaml] in scope [entitlement-module-name]: " + "unknown entitlement type [does_not_exist]", ppe.getMessage() ); @@ -42,23 +41,21 @@ public void testEntitlementDoesNotExist() throws IOException { public void testEntitlementMissingParameter() throws IOException { PolicyParserException ppe = expectThrows(PolicyParserException.class, () -> new PolicyParser(new ByteArrayInputStream(""" entitlement-module-name: - entitlements: - - file: {} + - file: {} """.getBytes(StandardCharsets.UTF_8)), "test-failure-policy.yaml").parsePolicy()); assertEquals( - "[3:14] policy parsing error for [test-failure-policy.yaml] in scope [entitlement-module-name] " + "[2:12] policy parsing error for [test-failure-policy.yaml] in scope [entitlement-module-name] " + "for entitlement type [file]: missing entitlement parameter [path]", ppe.getMessage() ); ppe = expectThrows(PolicyParserException.class, () -> new PolicyParser(new ByteArrayInputStream(""" entitlement-module-name: - entitlements: - - file: - path: test-path + - file: + path: test-path """.getBytes(StandardCharsets.UTF_8)), "test-failure-policy.yaml").parsePolicy()); assertEquals( - "[5:1] policy parsing error for [test-failure-policy.yaml] in scope [entitlement-module-name] " + "[4:1] policy parsing error for [test-failure-policy.yaml] in scope [entitlement-module-name] " + "for entitlement type [file]: missing entitlement parameter [actions]", ppe.getMessage() ); @@ -67,15 +64,14 @@ public void testEntitlementMissingParameter() throws IOException { public void testEntitlementExtraneousParameter() throws IOException { PolicyParserException ppe = expectThrows(PolicyParserException.class, () -> new PolicyParser(new ByteArrayInputStream(""" entitlement-module-name: - entitlements: - - file: - path: test-path - actions: - - read - extra: test + - file: + path: test-path + actions: + - read + extra: test """.getBytes(StandardCharsets.UTF_8)), "test-failure-policy.yaml").parsePolicy()); assertEquals( - "[8:1] policy parsing error for [test-failure-policy.yaml] in scope [entitlement-module-name] " + "[7:1] policy parsing error for [test-failure-policy.yaml] in scope [entitlement-module-name] " + "for entitlement type [file]: extraneous entitlement parameter(s) {extra=test}", ppe.getMessage() ); diff --git a/libs/entitlement/src/test/resources/org/elasticsearch/entitlement/runtime/policy/test-policy.yaml b/libs/entitlement/src/test/resources/org/elasticsearch/entitlement/runtime/policy/test-policy.yaml index b58287cfc83b7..f13f574535bec 100644 --- a/libs/entitlement/src/test/resources/org/elasticsearch/entitlement/runtime/policy/test-policy.yaml +++ b/libs/entitlement/src/test/resources/org/elasticsearch/entitlement/runtime/policy/test-policy.yaml @@ -1,7 +1,6 @@ entitlement-module-name: - entitlements: - - file: - path: "test/path/to/file" - actions: - - "read" - - "write" + - file: + path: "test/path/to/file" + actions: + - "read" + - "write" From 9022cccba7b617d6ccd0b2ec411dbd1aa6aff0c1 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Wed, 27 Nov 2024 11:44:55 -0500 Subject: [PATCH 042/139] ESQL: CATEGORIZE as a BlockHash (#114317) Re-implement `CATEGORIZE` in a way that works for multi-node clusters. This requires that data is first categorized on each data node in a first pass, then the categorizers from each data node are merged on the coordinator node and previously categorized rows are re-categorized. BlockHashes, used in HashAggregations, already work in a very similar way. E.g. for queries like `... | STATS ... BY field1, field2` they map values for `field1` and `field2` to unique integer ids that are then passed to the actual aggregate functions to identify which "bucket" a row belongs to. When passed from the data nodes to the coordinator, the BlockHashes are also merged to obtain unique ids for every value in `field1, field2` that is seen on the coordinator (not only on the local data nodes). Therefore, we re-implement `CATEGORIZE` as a special BlockHash. To choose the correct BlockHash when a query plan is mapped to physical operations, the `AggregateExec` query plan node needs to know that we will be categorizing the field `message` in a query containing `... | STATS ... BY c = CATEGORIZE(message)`. For this reason, _we do not extract the expression_ `c = CATEGORIZE(message)` into an `EVAL` node, in contrast to e.g. `STATS ... BY b = BUCKET(field, 10)`. The expression `c = CATEGORIZE(message)` simply remains inside the `AggregateExec`'s groupings. **Important limitation:** For now, to use `CATEGORIZE` in a `STATS` command, there can be only 1 grouping (the `CATEGORIZE`) overall. --- docs/changelog/114317.yaml | 5 + .../kibana/definition/categorize.json | 4 +- .../esql/functions/types/categorize.asciidoc | 4 +- muted-tests.yml | 18 - .../AbstractCategorizeBlockHash.java | 105 ++++ .../aggregation/blockhash/BlockHash.java | 28 +- .../blockhash/CategorizeRawBlockHash.java | 137 +++++ .../CategorizedIntermediateBlockHash.java | 77 +++ .../operator/HashAggregationOperator.java | 9 + .../GroupingAggregatorFunctionTestCase.java | 1 + .../blockhash/BlockHashTestCase.java | 34 ++ .../aggregation/blockhash/BlockHashTests.java | 22 +- .../blockhash/CategorizeBlockHashTests.java | 406 ++++++++++++++ .../HashAggregationOperatorTests.java | 1 + .../xpack/esql/CsvTestsDataLoader.java | 2 + .../src/main/resources/categorize.csv-spec | 526 +++++++++++++++++- .../resources/mapping-mv_sample_data.json | 16 + .../src/main/resources/mv_sample_data.csv | 8 + .../grouping/CategorizeEvaluator.java | 145 ----- .../xpack/esql/action/EsqlCapabilities.java | 5 +- .../function/grouping/Categorize.java | 76 +-- .../rules/logical/CombineProjections.java | 38 +- .../optimizer/rules/logical/FoldNull.java | 2 + ...laceAggregateNestedExpressionWithEval.java | 31 +- .../physical/local/InsertFieldExtraction.java | 17 +- .../AbstractPhysicalOperationProviders.java | 42 +- .../xpack/esql/analysis/VerifierTests.java | 6 +- .../function/AbstractAggregationTestCase.java | 3 +- .../function/AbstractFunctionTestCase.java | 19 +- .../AbstractScalarFunctionTestCase.java | 1 + .../expression/function/TestCaseSupplier.java | 83 ++- .../function/grouping/CategorizeTests.java | 16 +- .../optimizer/LogicalPlanOptimizerTests.java | 61 ++ .../rules/logical/FoldNullTests.java | 13 + .../categorization/TokenListCategorizer.java | 24 + 35 files changed, 1660 insertions(+), 325 deletions(-) create mode 100644 docs/changelog/114317.yaml create mode 100644 x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/AbstractCategorizeBlockHash.java create mode 100644 x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/CategorizeRawBlockHash.java create mode 100644 x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/CategorizedIntermediateBlockHash.java create mode 100644 x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/blockhash/BlockHashTestCase.java create mode 100644 x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/blockhash/CategorizeBlockHashTests.java create mode 100644 x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-mv_sample_data.json create mode 100644 x-pack/plugin/esql/qa/testFixtures/src/main/resources/mv_sample_data.csv delete mode 100644 x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/grouping/CategorizeEvaluator.java diff --git a/docs/changelog/114317.yaml b/docs/changelog/114317.yaml new file mode 100644 index 0000000000000..9c73fe513e197 --- /dev/null +++ b/docs/changelog/114317.yaml @@ -0,0 +1,5 @@ +pr: 114317 +summary: "ESQL: CATEGORIZE as a `BlockHash`" +area: ES|QL +type: enhancement +issues: [] diff --git a/docs/reference/esql/functions/kibana/definition/categorize.json b/docs/reference/esql/functions/kibana/definition/categorize.json index 386b178d3753f..ca3971a6e05a3 100644 --- a/docs/reference/esql/functions/kibana/definition/categorize.json +++ b/docs/reference/esql/functions/kibana/definition/categorize.json @@ -14,7 +14,7 @@ } ], "variadic" : false, - "returnType" : "integer" + "returnType" : "keyword" }, { "params" : [ @@ -26,7 +26,7 @@ } ], "variadic" : false, - "returnType" : "integer" + "returnType" : "keyword" } ], "preview" : false, diff --git a/docs/reference/esql/functions/types/categorize.asciidoc b/docs/reference/esql/functions/types/categorize.asciidoc index 4917ed313e6d7..5b64971cbc482 100644 --- a/docs/reference/esql/functions/types/categorize.asciidoc +++ b/docs/reference/esql/functions/types/categorize.asciidoc @@ -5,6 +5,6 @@ [%header.monospaced.styled,format=dsv,separator=|] |=== field | result -keyword | integer -text | integer +keyword | keyword +text | keyword |=== diff --git a/muted-tests.yml b/muted-tests.yml index c97e46375c597..8b12bd2dd3365 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -67,9 +67,6 @@ tests: - class: org.elasticsearch.xpack.transform.integration.TransformIT method: testStopWaitForCheckpoint issue: https://github.com/elastic/elasticsearch/issues/106113 -- class: org.elasticsearch.xpack.esql.qa.mixed.MixedClusterEsqlSpecIT - method: test {categorize.Categorize SYNC} - issue: https://github.com/elastic/elasticsearch/issues/113722 - class: org.elasticsearch.kibana.KibanaThreadPoolIT method: testBlockedThreadPoolsRejectUserRequests issue: https://github.com/elastic/elasticsearch/issues/113939 @@ -126,12 +123,6 @@ tests: - class: org.elasticsearch.xpack.ml.integration.DatafeedJobsRestIT method: testLookbackWithIndicesOptions issue: https://github.com/elastic/elasticsearch/issues/116127 -- class: org.elasticsearch.xpack.esql.qa.multi_node.EsqlSpecIT - method: test {categorize.Categorize SYNC} - issue: https://github.com/elastic/elasticsearch/issues/113054 -- class: org.elasticsearch.xpack.esql.qa.multi_node.EsqlSpecIT - method: test {categorize.Categorize ASYNC} - issue: https://github.com/elastic/elasticsearch/issues/113055 - class: org.elasticsearch.xpack.test.rest.XPackRestIT method: test {p0=transform/transforms_start_stop/Test start already started transform} issue: https://github.com/elastic/elasticsearch/issues/98802 @@ -153,9 +144,6 @@ tests: - class: org.elasticsearch.xpack.shutdown.NodeShutdownIT method: testAllocationPreventedForRemoval issue: https://github.com/elastic/elasticsearch/issues/116363 -- class: org.elasticsearch.xpack.esql.qa.mixed.MixedClusterEsqlSpecIT - method: test {categorize.Categorize ASYNC} - issue: https://github.com/elastic/elasticsearch/issues/116373 - class: org.elasticsearch.threadpool.SimpleThreadPoolIT method: testThreadPoolMetrics issue: https://github.com/elastic/elasticsearch/issues/108320 @@ -168,9 +156,6 @@ tests: - class: org.elasticsearch.xpack.searchablesnapshots.SearchableSnapshotsCanMatchOnCoordinatorIntegTests method: testSearchableSnapshotShardsAreSkippedBySearchRequestWithoutQueryingAnyNodeWhenTheyAreOutsideOfTheQueryRange issue: https://github.com/elastic/elasticsearch/issues/116523 -- class: org.elasticsearch.xpack.esql.ccq.MultiClusterSpecIT - method: test {categorize.Categorize} - issue: https://github.com/elastic/elasticsearch/issues/116434 - class: org.elasticsearch.upgrades.SearchStatesIT method: testBWCSearchStates issue: https://github.com/elastic/elasticsearch/issues/116617 @@ -229,9 +214,6 @@ tests: - class: org.elasticsearch.xpack.test.rest.XPackRestIT method: test {p0=transform/transforms_reset/Test reset running transform} issue: https://github.com/elastic/elasticsearch/issues/117473 -- class: org.elasticsearch.xpack.esql.qa.single_node.FieldExtractorIT - method: testConstantKeywordField - issue: https://github.com/elastic/elasticsearch/issues/117524 - class: org.elasticsearch.xpack.esql.qa.multi_node.FieldExtractorIT method: testConstantKeywordField issue: https://github.com/elastic/elasticsearch/issues/117524 diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/AbstractCategorizeBlockHash.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/AbstractCategorizeBlockHash.java new file mode 100644 index 0000000000000..22d3a10facb06 --- /dev/null +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/AbstractCategorizeBlockHash.java @@ -0,0 +1,105 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.aggregation.blockhash; + +import org.apache.lucene.util.BytesRefBuilder; +import org.elasticsearch.common.io.stream.BytesStreamOutput; +import org.elasticsearch.common.unit.ByteSizeValue; +import org.elasticsearch.common.util.BigArrays; +import org.elasticsearch.common.util.BitArray; +import org.elasticsearch.common.util.BytesRefHash; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.BlockFactory; +import org.elasticsearch.compute.data.BytesRefVector; +import org.elasticsearch.compute.data.IntBlock; +import org.elasticsearch.compute.data.IntVector; +import org.elasticsearch.compute.data.Page; +import org.elasticsearch.core.ReleasableIterator; +import org.elasticsearch.xpack.ml.aggs.categorization.CategorizationBytesRefHash; +import org.elasticsearch.xpack.ml.aggs.categorization.CategorizationPartOfSpeechDictionary; +import org.elasticsearch.xpack.ml.aggs.categorization.SerializableTokenListCategory; +import org.elasticsearch.xpack.ml.aggs.categorization.TokenListCategorizer; + +import java.io.IOException; + +/** + * Base BlockHash implementation for {@code Categorize} grouping function. + */ +public abstract class AbstractCategorizeBlockHash extends BlockHash { + // TODO: this should probably also take an emitBatchSize + private final int channel; + private final boolean outputPartial; + protected final TokenListCategorizer.CloseableTokenListCategorizer categorizer; + + AbstractCategorizeBlockHash(BlockFactory blockFactory, int channel, boolean outputPartial) { + super(blockFactory); + this.channel = channel; + this.outputPartial = outputPartial; + this.categorizer = new TokenListCategorizer.CloseableTokenListCategorizer( + new CategorizationBytesRefHash(new BytesRefHash(2048, blockFactory.bigArrays())), + CategorizationPartOfSpeechDictionary.getInstance(), + 0.70f + ); + } + + protected int channel() { + return channel; + } + + @Override + public Block[] getKeys() { + return new Block[] { outputPartial ? buildIntermediateBlock() : buildFinalBlock() }; + } + + @Override + public IntVector nonEmpty() { + return IntVector.range(0, categorizer.getCategoryCount(), blockFactory); + } + + @Override + public BitArray seenGroupIds(BigArrays bigArrays) { + throw new UnsupportedOperationException(); + } + + @Override + public final ReleasableIterator lookup(Page page, ByteSizeValue targetBlockSize) { + throw new UnsupportedOperationException(); + } + + /** + * Serializes the intermediate state into a single BytesRef block, or an empty Null block if there are no categories. + */ + private Block buildIntermediateBlock() { + if (categorizer.getCategoryCount() == 0) { + return blockFactory.newConstantNullBlock(0); + } + try (BytesStreamOutput out = new BytesStreamOutput()) { + // TODO be more careful here. + out.writeVInt(categorizer.getCategoryCount()); + for (SerializableTokenListCategory category : categorizer.toCategoriesById()) { + category.writeTo(out); + } + // We're returning a block with N positions just because the Page must have all blocks with the same position count! + return blockFactory.newConstantBytesRefBlockWith(out.bytes().toBytesRef(), categorizer.getCategoryCount()); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + private Block buildFinalBlock() { + try (BytesRefVector.Builder result = blockFactory.newBytesRefVectorBuilder(categorizer.getCategoryCount())) { + BytesRefBuilder scratch = new BytesRefBuilder(); + for (SerializableTokenListCategory category : categorizer.toCategoriesById()) { + scratch.copyChars(category.getRegex()); + result.appendBytesRef(scratch.get()); + scratch.clear(); + } + return result.build().asBlock(); + } + } +} diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/BlockHash.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/BlockHash.java index 919cb92f79260..ef0f3ceb112c4 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/BlockHash.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/BlockHash.java @@ -14,6 +14,7 @@ import org.elasticsearch.common.util.Int3Hash; import org.elasticsearch.common.util.LongHash; import org.elasticsearch.common.util.LongLongHash; +import org.elasticsearch.compute.aggregation.AggregatorMode; import org.elasticsearch.compute.aggregation.GroupingAggregatorFunction; import org.elasticsearch.compute.aggregation.SeenGroupIds; import org.elasticsearch.compute.data.Block; @@ -58,9 +59,7 @@ * leave a big gap, even if we never see {@code null}. *

*/ -public abstract sealed class BlockHash implements Releasable, SeenGroupIds // - permits BooleanBlockHash, BytesRefBlockHash, DoubleBlockHash, IntBlockHash, LongBlockHash, BytesRef2BlockHash, BytesRef3BlockHash, // - NullBlockHash, PackedValuesBlockHash, BytesRefLongBlockHash, LongLongBlockHash, TimeSeriesBlockHash { +public abstract class BlockHash implements Releasable, SeenGroupIds { protected final BlockFactory blockFactory; @@ -107,7 +106,15 @@ public abstract sealed class BlockHash implements Releasable, SeenGroupIds // @Override public abstract BitArray seenGroupIds(BigArrays bigArrays); - public record GroupSpec(int channel, ElementType elementType) {} + /** + * @param isCategorize Whether this group is a CATEGORIZE() or not. + * May be changed in the future when more stateful grouping functions are added. + */ + public record GroupSpec(int channel, ElementType elementType, boolean isCategorize) { + public GroupSpec(int channel, ElementType elementType) { + this(channel, elementType, false); + } + } /** * Creates a specialized hash table that maps one or more {@link Block}s to ids. @@ -159,6 +166,19 @@ public static BlockHash buildPackedValuesBlockHash(List groups, Block return new PackedValuesBlockHash(groups, blockFactory, emitBatchSize); } + /** + * Builds a BlockHash for the Categorize grouping function. + */ + public static BlockHash buildCategorizeBlockHash(List groups, AggregatorMode aggregatorMode, BlockFactory blockFactory) { + if (groups.size() != 1) { + throw new IllegalArgumentException("only a single CATEGORIZE group can used"); + } + + return aggregatorMode.isInputPartial() + ? new CategorizedIntermediateBlockHash(groups.get(0).channel, blockFactory, aggregatorMode.isOutputPartial()) + : new CategorizeRawBlockHash(groups.get(0).channel, blockFactory, aggregatorMode.isOutputPartial()); + } + /** * Creates a specialized hash table that maps a {@link Block} of the given input element type to ids. */ diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/CategorizeRawBlockHash.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/CategorizeRawBlockHash.java new file mode 100644 index 0000000000000..bf633e0454384 --- /dev/null +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/CategorizeRawBlockHash.java @@ -0,0 +1,137 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.aggregation.blockhash; + +import org.apache.lucene.analysis.core.WhitespaceTokenizer; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.compute.aggregation.GroupingAggregatorFunction; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.BlockFactory; +import org.elasticsearch.compute.data.BytesRefBlock; +import org.elasticsearch.compute.data.BytesRefVector; +import org.elasticsearch.compute.data.IntBlock; +import org.elasticsearch.compute.data.IntVector; +import org.elasticsearch.compute.data.Page; +import org.elasticsearch.core.Releasable; +import org.elasticsearch.core.Releasables; +import org.elasticsearch.index.analysis.CharFilterFactory; +import org.elasticsearch.index.analysis.CustomAnalyzer; +import org.elasticsearch.index.analysis.TokenFilterFactory; +import org.elasticsearch.index.analysis.TokenizerFactory; +import org.elasticsearch.xpack.ml.aggs.categorization.TokenListCategorizer; +import org.elasticsearch.xpack.ml.job.categorization.CategorizationAnalyzer; + +/** + * BlockHash implementation for {@code Categorize} grouping function. + *

+ * This implementation expects rows, and can't deserialize intermediate states coming from other nodes. + *

+ */ +public class CategorizeRawBlockHash extends AbstractCategorizeBlockHash { + private final CategorizeEvaluator evaluator; + + CategorizeRawBlockHash(int channel, BlockFactory blockFactory, boolean outputPartial) { + super(blockFactory, channel, outputPartial); + CategorizationAnalyzer analyzer = new CategorizationAnalyzer( + // TODO: should be the same analyzer as used in Production + new CustomAnalyzer( + TokenizerFactory.newFactory("whitespace", WhitespaceTokenizer::new), + new CharFilterFactory[0], + new TokenFilterFactory[0] + ), + true + ); + this.evaluator = new CategorizeEvaluator(analyzer, categorizer, blockFactory); + } + + @Override + public void add(Page page, GroupingAggregatorFunction.AddInput addInput) { + try (IntBlock result = (IntBlock) evaluator.eval(page.getBlock(channel()))) { + addInput.add(0, result); + } + } + + @Override + public void close() { + evaluator.close(); + } + + /** + * Similar implementation to an Evaluator. + */ + public static final class CategorizeEvaluator implements Releasable { + private final CategorizationAnalyzer analyzer; + + private final TokenListCategorizer.CloseableTokenListCategorizer categorizer; + + private final BlockFactory blockFactory; + + public CategorizeEvaluator( + CategorizationAnalyzer analyzer, + TokenListCategorizer.CloseableTokenListCategorizer categorizer, + BlockFactory blockFactory + ) { + this.analyzer = analyzer; + this.categorizer = categorizer; + this.blockFactory = blockFactory; + } + + public Block eval(BytesRefBlock vBlock) { + BytesRefVector vVector = vBlock.asVector(); + if (vVector == null) { + return eval(vBlock.getPositionCount(), vBlock); + } + IntVector vector = eval(vBlock.getPositionCount(), vVector); + return vector.asBlock(); + } + + public IntBlock eval(int positionCount, BytesRefBlock vBlock) { + try (IntBlock.Builder result = blockFactory.newIntBlockBuilder(positionCount)) { + BytesRef vScratch = new BytesRef(); + for (int p = 0; p < positionCount; p++) { + if (vBlock.isNull(p)) { + result.appendNull(); + continue; + } + int first = vBlock.getFirstValueIndex(p); + int count = vBlock.getValueCount(p); + if (count == 1) { + result.appendInt(process(vBlock.getBytesRef(first, vScratch))); + continue; + } + int end = first + count; + result.beginPositionEntry(); + for (int i = first; i < end; i++) { + result.appendInt(process(vBlock.getBytesRef(i, vScratch))); + } + result.endPositionEntry(); + } + return result.build(); + } + } + + public IntVector eval(int positionCount, BytesRefVector vVector) { + try (IntVector.FixedBuilder result = blockFactory.newIntVectorFixedBuilder(positionCount)) { + BytesRef vScratch = new BytesRef(); + for (int p = 0; p < positionCount; p++) { + result.appendInt(p, process(vVector.getBytesRef(p, vScratch))); + } + return result.build(); + } + } + + private int process(BytesRef v) { + return categorizer.computeCategory(v.utf8ToString(), analyzer).getId(); + } + + @Override + public void close() { + Releasables.closeExpectNoException(analyzer, categorizer); + } + } +} diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/CategorizedIntermediateBlockHash.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/CategorizedIntermediateBlockHash.java new file mode 100644 index 0000000000000..1bca34a70e5fa --- /dev/null +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/CategorizedIntermediateBlockHash.java @@ -0,0 +1,77 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.aggregation.blockhash; + +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.bytes.BytesArray; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.compute.aggregation.GroupingAggregatorFunction; +import org.elasticsearch.compute.data.BlockFactory; +import org.elasticsearch.compute.data.BytesRefBlock; +import org.elasticsearch.compute.data.IntBlock; +import org.elasticsearch.compute.data.Page; +import org.elasticsearch.xpack.ml.aggs.categorization.SerializableTokenListCategory; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +/** + * BlockHash implementation for {@code Categorize} grouping function. + *

+ * This implementation expects a single intermediate state in a block, as generated by {@link AbstractCategorizeBlockHash}. + *

+ */ +public class CategorizedIntermediateBlockHash extends AbstractCategorizeBlockHash { + + CategorizedIntermediateBlockHash(int channel, BlockFactory blockFactory, boolean outputPartial) { + super(blockFactory, channel, outputPartial); + } + + @Override + public void add(Page page, GroupingAggregatorFunction.AddInput addInput) { + if (page.getPositionCount() == 0) { + // No categories + return; + } + BytesRefBlock categorizerState = page.getBlock(channel()); + Map idMap = readIntermediate(categorizerState.getBytesRef(0, new BytesRef())); + try (IntBlock.Builder newIdsBuilder = blockFactory.newIntBlockBuilder(idMap.size())) { + for (int i = 0; i < idMap.size(); i++) { + newIdsBuilder.appendInt(idMap.get(i)); + } + try (IntBlock newIds = newIdsBuilder.build()) { + addInput.add(0, newIds); + } + } + } + + /** + * Read intermediate state from a block. + * + * @return a map from the old category id to the new one. The old ids go from 0 to {@code size - 1}. + */ + private Map readIntermediate(BytesRef bytes) { + Map idMap = new HashMap<>(); + try (StreamInput in = new BytesArray(bytes).streamInput()) { + int count = in.readVInt(); + for (int oldCategoryId = 0; oldCategoryId < count; oldCategoryId++) { + int newCategoryId = categorizer.mergeWireCategory(new SerializableTokenListCategory(in)).getId(); + idMap.put(oldCategoryId, newCategoryId); + } + return idMap; + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + @Override + public void close() { + categorizer.close(); + } +} diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/HashAggregationOperator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/HashAggregationOperator.java index 03a4ca2b0ad5e..a69e8ca767014 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/HashAggregationOperator.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/HashAggregationOperator.java @@ -14,6 +14,7 @@ import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.compute.Describable; +import org.elasticsearch.compute.aggregation.AggregatorMode; import org.elasticsearch.compute.aggregation.GroupingAggregator; import org.elasticsearch.compute.aggregation.GroupingAggregatorFunction; import org.elasticsearch.compute.aggregation.blockhash.BlockHash; @@ -39,11 +40,19 @@ public class HashAggregationOperator implements Operator { public record HashAggregationOperatorFactory( List groups, + AggregatorMode aggregatorMode, List aggregators, int maxPageSize ) implements OperatorFactory { @Override public Operator get(DriverContext driverContext) { + if (groups.stream().anyMatch(BlockHash.GroupSpec::isCategorize)) { + return new HashAggregationOperator( + aggregators, + () -> BlockHash.buildCategorizeBlockHash(groups, aggregatorMode, driverContext.blockFactory()), + driverContext + ); + } return new HashAggregationOperator( aggregators, () -> BlockHash.build(groups, driverContext.blockFactory(), maxPageSize, false), diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/GroupingAggregatorFunctionTestCase.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/GroupingAggregatorFunctionTestCase.java index cb190dfffafb9..1e97bdf5a2e79 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/GroupingAggregatorFunctionTestCase.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/GroupingAggregatorFunctionTestCase.java @@ -105,6 +105,7 @@ private Operator.OperatorFactory simpleWithMode( } return new HashAggregationOperator.HashAggregationOperatorFactory( List.of(new BlockHash.GroupSpec(0, ElementType.LONG)), + mode, List.of(supplier.groupingAggregatorFactory(mode)), randomPageSize() ); diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/blockhash/BlockHashTestCase.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/blockhash/BlockHashTestCase.java new file mode 100644 index 0000000000000..fa93c0aa1c375 --- /dev/null +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/blockhash/BlockHashTestCase.java @@ -0,0 +1,34 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.aggregation.blockhash; + +import org.elasticsearch.common.breaker.CircuitBreaker; +import org.elasticsearch.common.unit.ByteSizeValue; +import org.elasticsearch.common.util.BigArrays; +import org.elasticsearch.common.util.MockBigArrays; +import org.elasticsearch.common.util.PageCacheRecycler; +import org.elasticsearch.compute.data.MockBlockFactory; +import org.elasticsearch.indices.breaker.CircuitBreakerService; +import org.elasticsearch.test.ESTestCase; + +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +public abstract class BlockHashTestCase extends ESTestCase { + + final CircuitBreaker breaker = newLimitedBreaker(ByteSizeValue.ofGb(1)); + final BigArrays bigArrays = new MockBigArrays(PageCacheRecycler.NON_RECYCLING_INSTANCE, mockBreakerService(breaker)); + final MockBlockFactory blockFactory = new MockBlockFactory(breaker, bigArrays); + + // A breaker service that always returns the given breaker for getBreaker(CircuitBreaker.REQUEST) + private static CircuitBreakerService mockBreakerService(CircuitBreaker breaker) { + CircuitBreakerService breakerService = mock(CircuitBreakerService.class); + when(breakerService.getBreaker(CircuitBreaker.REQUEST)).thenReturn(breaker); + return breakerService; + } +} diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/blockhash/BlockHashTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/blockhash/BlockHashTests.java index 088e791348840..ede2d68ca2367 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/blockhash/BlockHashTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/blockhash/BlockHashTests.java @@ -11,11 +11,7 @@ import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; import org.apache.lucene.util.BytesRef; -import org.elasticsearch.common.breaker.CircuitBreaker; import org.elasticsearch.common.unit.ByteSizeValue; -import org.elasticsearch.common.util.BigArrays; -import org.elasticsearch.common.util.MockBigArrays; -import org.elasticsearch.common.util.PageCacheRecycler; import org.elasticsearch.compute.aggregation.GroupingAggregatorFunction; import org.elasticsearch.compute.data.Block; import org.elasticsearch.compute.data.BooleanBlock; @@ -26,7 +22,6 @@ import org.elasticsearch.compute.data.IntBlock; import org.elasticsearch.compute.data.IntVector; import org.elasticsearch.compute.data.LongBlock; -import org.elasticsearch.compute.data.MockBlockFactory; import org.elasticsearch.compute.data.OrdinalBytesRefBlock; import org.elasticsearch.compute.data.OrdinalBytesRefVector; import org.elasticsearch.compute.data.Page; @@ -34,8 +29,6 @@ import org.elasticsearch.core.Releasable; import org.elasticsearch.core.ReleasableIterator; import org.elasticsearch.core.Releasables; -import org.elasticsearch.indices.breaker.CircuitBreakerService; -import org.elasticsearch.test.ESTestCase; import org.junit.After; import java.util.ArrayList; @@ -54,14 +47,8 @@ import static org.hamcrest.Matchers.greaterThan; import static org.hamcrest.Matchers.is; import static org.hamcrest.Matchers.startsWith; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.when; -public class BlockHashTests extends ESTestCase { - - final CircuitBreaker breaker = new MockBigArrays.LimitedBreaker("esql-test-breaker", ByteSizeValue.ofGb(1)); - final BigArrays bigArrays = new MockBigArrays(PageCacheRecycler.NON_RECYCLING_INSTANCE, mockBreakerService(breaker)); - final MockBlockFactory blockFactory = new MockBlockFactory(breaker, bigArrays); +public class BlockHashTests extends BlockHashTestCase { @ParametersFactory public static List params() { @@ -1534,13 +1521,6 @@ private void assertKeys(Block[] actualKeys, Object[][] expectedKeys) { } } - // A breaker service that always returns the given breaker for getBreaker(CircuitBreaker.REQUEST) - static CircuitBreakerService mockBreakerService(CircuitBreaker breaker) { - CircuitBreakerService breakerService = mock(CircuitBreakerService.class); - when(breakerService.getBreaker(CircuitBreaker.REQUEST)).thenReturn(breaker); - return breakerService; - } - IntVector intRange(int startInclusive, int endExclusive) { return IntVector.range(startInclusive, endExclusive, TestBlockFactory.getNonBreakingInstance()); } diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/blockhash/CategorizeBlockHashTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/blockhash/CategorizeBlockHashTests.java new file mode 100644 index 0000000000000..de8a2a44266fe --- /dev/null +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/blockhash/CategorizeBlockHashTests.java @@ -0,0 +1,406 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.aggregation.blockhash; + +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.breaker.CircuitBreaker; +import org.elasticsearch.common.collect.Iterators; +import org.elasticsearch.common.unit.ByteSizeValue; +import org.elasticsearch.common.util.BigArrays; +import org.elasticsearch.common.util.MockBigArrays; +import org.elasticsearch.common.util.PageCacheRecycler; +import org.elasticsearch.compute.aggregation.AggregatorMode; +import org.elasticsearch.compute.aggregation.GroupingAggregatorFunction; +import org.elasticsearch.compute.aggregation.MaxLongAggregatorFunctionSupplier; +import org.elasticsearch.compute.aggregation.SumLongAggregatorFunctionSupplier; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.BlockFactory; +import org.elasticsearch.compute.data.BytesRefBlock; +import org.elasticsearch.compute.data.BytesRefVector; +import org.elasticsearch.compute.data.ElementType; +import org.elasticsearch.compute.data.IntBlock; +import org.elasticsearch.compute.data.IntVector; +import org.elasticsearch.compute.data.LongBlock; +import org.elasticsearch.compute.data.LongVector; +import org.elasticsearch.compute.data.Page; +import org.elasticsearch.compute.operator.CannedSourceOperator; +import org.elasticsearch.compute.operator.Driver; +import org.elasticsearch.compute.operator.DriverContext; +import org.elasticsearch.compute.operator.HashAggregationOperator; +import org.elasticsearch.compute.operator.LocalSourceOperator; +import org.elasticsearch.compute.operator.PageConsumerOperator; +import org.elasticsearch.core.Releasables; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +import static org.elasticsearch.compute.operator.OperatorTestCase.runDriver; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.hasSize; + +public class CategorizeBlockHashTests extends BlockHashTestCase { + + public void testCategorizeRaw() { + final Page page; + final int positions = 7; + try (BytesRefBlock.Builder builder = blockFactory.newBytesRefBlockBuilder(positions)) { + builder.appendBytesRef(new BytesRef("Connected to 10.1.0.1")); + builder.appendBytesRef(new BytesRef("Connection error")); + builder.appendBytesRef(new BytesRef("Connection error")); + builder.appendBytesRef(new BytesRef("Connection error")); + builder.appendBytesRef(new BytesRef("Disconnected")); + builder.appendBytesRef(new BytesRef("Connected to 10.1.0.2")); + builder.appendBytesRef(new BytesRef("Connected to 10.1.0.3")); + page = new Page(builder.build()); + } + + try (BlockHash hash = new CategorizeRawBlockHash(0, blockFactory, true)) { + hash.add(page, new GroupingAggregatorFunction.AddInput() { + @Override + public void add(int positionOffset, IntBlock groupIds) { + assertEquals(groupIds.getPositionCount(), positions); + + assertEquals(0, groupIds.getInt(0)); + assertEquals(1, groupIds.getInt(1)); + assertEquals(1, groupIds.getInt(2)); + assertEquals(1, groupIds.getInt(3)); + assertEquals(2, groupIds.getInt(4)); + assertEquals(0, groupIds.getInt(5)); + assertEquals(0, groupIds.getInt(6)); + } + + @Override + public void add(int positionOffset, IntVector groupIds) { + add(positionOffset, groupIds.asBlock()); + } + + @Override + public void close() { + fail("hashes should not close AddInput"); + } + }); + } finally { + page.releaseBlocks(); + } + + // TODO: randomize and try multiple pages. + // TODO: assert the state of the BlockHash after adding pages. Including the categorizer state. + // TODO: also test the lookup method and other stuff. + } + + public void testCategorizeIntermediate() { + Page page1; + int positions1 = 7; + try (BytesRefBlock.Builder builder = blockFactory.newBytesRefBlockBuilder(positions1)) { + builder.appendBytesRef(new BytesRef("Connected to 10.1.0.1")); + builder.appendBytesRef(new BytesRef("Connection error")); + builder.appendBytesRef(new BytesRef("Connection error")); + builder.appendBytesRef(new BytesRef("Connected to 10.1.0.2")); + builder.appendBytesRef(new BytesRef("Connection error")); + builder.appendBytesRef(new BytesRef("Connected to 10.1.0.3")); + builder.appendBytesRef(new BytesRef("Connected to 10.1.0.4")); + page1 = new Page(builder.build()); + } + Page page2; + int positions2 = 5; + try (BytesRefBlock.Builder builder = blockFactory.newBytesRefBlockBuilder(positions2)) { + builder.appendBytesRef(new BytesRef("Disconnected")); + builder.appendBytesRef(new BytesRef("Connected to 10.2.0.1")); + builder.appendBytesRef(new BytesRef("Disconnected")); + builder.appendBytesRef(new BytesRef("Connected to 10.3.0.2")); + builder.appendBytesRef(new BytesRef("System shutdown")); + page2 = new Page(builder.build()); + } + + Page intermediatePage1, intermediatePage2; + + // Fill intermediatePages with the intermediate state from the raw hashes + try ( + BlockHash rawHash1 = new CategorizeRawBlockHash(0, blockFactory, true); + BlockHash rawHash2 = new CategorizeRawBlockHash(0, blockFactory, true) + ) { + rawHash1.add(page1, new GroupingAggregatorFunction.AddInput() { + @Override + public void add(int positionOffset, IntBlock groupIds) { + assertEquals(groupIds.getPositionCount(), positions1); + assertEquals(0, groupIds.getInt(0)); + assertEquals(1, groupIds.getInt(1)); + assertEquals(1, groupIds.getInt(2)); + assertEquals(0, groupIds.getInt(3)); + assertEquals(1, groupIds.getInt(4)); + assertEquals(0, groupIds.getInt(5)); + assertEquals(0, groupIds.getInt(6)); + } + + @Override + public void add(int positionOffset, IntVector groupIds) { + add(positionOffset, groupIds.asBlock()); + } + + @Override + public void close() { + fail("hashes should not close AddInput"); + } + }); + intermediatePage1 = new Page(rawHash1.getKeys()[0]); + + rawHash2.add(page2, new GroupingAggregatorFunction.AddInput() { + @Override + public void add(int positionOffset, IntBlock groupIds) { + assertEquals(groupIds.getPositionCount(), positions2); + assertEquals(0, groupIds.getInt(0)); + assertEquals(1, groupIds.getInt(1)); + assertEquals(0, groupIds.getInt(2)); + assertEquals(1, groupIds.getInt(3)); + assertEquals(2, groupIds.getInt(4)); + } + + @Override + public void add(int positionOffset, IntVector groupIds) { + add(positionOffset, groupIds.asBlock()); + } + + @Override + public void close() { + fail("hashes should not close AddInput"); + } + }); + intermediatePage2 = new Page(rawHash2.getKeys()[0]); + } finally { + page1.releaseBlocks(); + page2.releaseBlocks(); + } + + try (BlockHash intermediateHash = new CategorizedIntermediateBlockHash(0, blockFactory, true)) { + intermediateHash.add(intermediatePage1, new GroupingAggregatorFunction.AddInput() { + @Override + public void add(int positionOffset, IntBlock groupIds) { + Set values = IntStream.range(0, groupIds.getPositionCount()) + .map(groupIds::getInt) + .boxed() + .collect(Collectors.toSet()); + assertEquals(values, Set.of(0, 1)); + } + + @Override + public void add(int positionOffset, IntVector groupIds) { + add(positionOffset, groupIds.asBlock()); + } + + @Override + public void close() { + fail("hashes should not close AddInput"); + } + }); + + intermediateHash.add(intermediatePage2, new GroupingAggregatorFunction.AddInput() { + @Override + public void add(int positionOffset, IntBlock groupIds) { + Set values = IntStream.range(0, groupIds.getPositionCount()) + .map(groupIds::getInt) + .boxed() + .collect(Collectors.toSet()); + // The category IDs {0, 1, 2} should map to groups {0, 2, 3}, because + // 0 matches an existing category (Connected to ...), and the others are new. + assertEquals(values, Set.of(0, 2, 3)); + } + + @Override + public void add(int positionOffset, IntVector groupIds) { + add(positionOffset, groupIds.asBlock()); + } + + @Override + public void close() { + fail("hashes should not close AddInput"); + } + }); + } finally { + intermediatePage1.releaseBlocks(); + intermediatePage2.releaseBlocks(); + } + } + + public void testCategorize_withDriver() { + BigArrays bigArrays = new MockBigArrays(PageCacheRecycler.NON_RECYCLING_INSTANCE, ByteSizeValue.ofMb(256)).withCircuitBreaking(); + CircuitBreaker breaker = bigArrays.breakerService().getBreaker(CircuitBreaker.REQUEST); + DriverContext driverContext = new DriverContext(bigArrays, new BlockFactory(breaker, bigArrays)); + + LocalSourceOperator.BlockSupplier input1 = () -> { + try ( + BytesRefVector.Builder textsBuilder = driverContext.blockFactory().newBytesRefVectorBuilder(10); + LongVector.Builder countsBuilder = driverContext.blockFactory().newLongVectorBuilder(10) + ) { + textsBuilder.appendBytesRef(new BytesRef("a")); + textsBuilder.appendBytesRef(new BytesRef("b")); + textsBuilder.appendBytesRef(new BytesRef("words words words goodbye jan")); + textsBuilder.appendBytesRef(new BytesRef("words words words goodbye nik")); + textsBuilder.appendBytesRef(new BytesRef("words words words goodbye tom")); + textsBuilder.appendBytesRef(new BytesRef("words words words hello jan")); + textsBuilder.appendBytesRef(new BytesRef("c")); + textsBuilder.appendBytesRef(new BytesRef("d")); + countsBuilder.appendLong(1); + countsBuilder.appendLong(2); + countsBuilder.appendLong(800); + countsBuilder.appendLong(80); + countsBuilder.appendLong(8000); + countsBuilder.appendLong(900); + countsBuilder.appendLong(30); + countsBuilder.appendLong(4); + return new Block[] { textsBuilder.build().asBlock(), countsBuilder.build().asBlock() }; + } + }; + LocalSourceOperator.BlockSupplier input2 = () -> { + try ( + BytesRefVector.Builder textsBuilder = driverContext.blockFactory().newBytesRefVectorBuilder(10); + LongVector.Builder countsBuilder = driverContext.blockFactory().newLongVectorBuilder(10) + ) { + textsBuilder.appendBytesRef(new BytesRef("words words words hello nik")); + textsBuilder.appendBytesRef(new BytesRef("words words words hello nik")); + textsBuilder.appendBytesRef(new BytesRef("c")); + textsBuilder.appendBytesRef(new BytesRef("words words words goodbye chris")); + textsBuilder.appendBytesRef(new BytesRef("d")); + textsBuilder.appendBytesRef(new BytesRef("e")); + countsBuilder.appendLong(9); + countsBuilder.appendLong(90); + countsBuilder.appendLong(3); + countsBuilder.appendLong(8); + countsBuilder.appendLong(40); + countsBuilder.appendLong(5); + return new Block[] { textsBuilder.build().asBlock(), countsBuilder.build().asBlock() }; + } + }; + + List intermediateOutput = new ArrayList<>(); + + Driver driver = new Driver( + driverContext, + new LocalSourceOperator(input1), + List.of( + new HashAggregationOperator.HashAggregationOperatorFactory( + List.of(makeGroupSpec()), + AggregatorMode.INITIAL, + List.of( + new SumLongAggregatorFunctionSupplier(List.of(1)).groupingAggregatorFactory(AggregatorMode.INITIAL), + new MaxLongAggregatorFunctionSupplier(List.of(1)).groupingAggregatorFactory(AggregatorMode.INITIAL) + ), + 16 * 1024 + ).get(driverContext) + ), + new PageConsumerOperator(intermediateOutput::add), + () -> {} + ); + runDriver(driver); + + driver = new Driver( + driverContext, + new LocalSourceOperator(input2), + List.of( + new HashAggregationOperator.HashAggregationOperatorFactory( + List.of(makeGroupSpec()), + AggregatorMode.INITIAL, + List.of( + new SumLongAggregatorFunctionSupplier(List.of(1)).groupingAggregatorFactory(AggregatorMode.INITIAL), + new MaxLongAggregatorFunctionSupplier(List.of(1)).groupingAggregatorFactory(AggregatorMode.INITIAL) + ), + 16 * 1024 + ).get(driverContext) + ), + new PageConsumerOperator(intermediateOutput::add), + () -> {} + ); + runDriver(driver); + + List finalOutput = new ArrayList<>(); + + driver = new Driver( + driverContext, + new CannedSourceOperator(intermediateOutput.iterator()), + List.of( + new HashAggregationOperator.HashAggregationOperatorFactory( + List.of(makeGroupSpec()), + AggregatorMode.FINAL, + List.of( + new SumLongAggregatorFunctionSupplier(List.of(1, 2)).groupingAggregatorFactory(AggregatorMode.FINAL), + new MaxLongAggregatorFunctionSupplier(List.of(3, 4)).groupingAggregatorFactory(AggregatorMode.FINAL) + ), + 16 * 1024 + ).get(driverContext) + ), + new PageConsumerOperator(finalOutput::add), + () -> {} + ); + runDriver(driver); + + assertThat(finalOutput, hasSize(1)); + assertThat(finalOutput.get(0).getBlockCount(), equalTo(3)); + BytesRefBlock outputTexts = finalOutput.get(0).getBlock(0); + LongBlock outputSums = finalOutput.get(0).getBlock(1); + LongBlock outputMaxs = finalOutput.get(0).getBlock(2); + assertThat(outputSums.getPositionCount(), equalTo(outputTexts.getPositionCount())); + assertThat(outputMaxs.getPositionCount(), equalTo(outputTexts.getPositionCount())); + Map sums = new HashMap<>(); + Map maxs = new HashMap<>(); + for (int i = 0; i < outputTexts.getPositionCount(); i++) { + sums.put(outputTexts.getBytesRef(i, new BytesRef()).utf8ToString(), outputSums.getLong(i)); + maxs.put(outputTexts.getBytesRef(i, new BytesRef()).utf8ToString(), outputMaxs.getLong(i)); + } + assertThat( + sums, + equalTo( + Map.of( + ".*?a.*?", + 1L, + ".*?b.*?", + 2L, + ".*?c.*?", + 33L, + ".*?d.*?", + 44L, + ".*?e.*?", + 5L, + ".*?words.+?words.+?words.+?goodbye.*?", + 8888L, + ".*?words.+?words.+?words.+?hello.*?", + 999L + ) + ) + ); + assertThat( + maxs, + equalTo( + Map.of( + ".*?a.*?", + 1L, + ".*?b.*?", + 2L, + ".*?c.*?", + 30L, + ".*?d.*?", + 40L, + ".*?e.*?", + 5L, + ".*?words.+?words.+?words.+?goodbye.*?", + 8000L, + ".*?words.+?words.+?words.+?hello.*?", + 900L + ) + ) + ); + Releasables.close(() -> Iterators.map(finalOutput.iterator(), (Page p) -> p::releaseBlocks)); + } + + private BlockHash.GroupSpec makeGroupSpec() { + return new BlockHash.GroupSpec(0, ElementType.BYTES_REF, true); + } +} diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/HashAggregationOperatorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/HashAggregationOperatorTests.java index f2fa94c1feb08..b2f4ad594936e 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/HashAggregationOperatorTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/HashAggregationOperatorTests.java @@ -54,6 +54,7 @@ protected Operator.OperatorFactory simpleWithMode(AggregatorMode mode) { return new HashAggregationOperator.HashAggregationOperatorFactory( List.of(new BlockHash.GroupSpec(0, ElementType.LONG)), + mode, List.of( new SumLongAggregatorFunctionSupplier(sumChannels).groupingAggregatorFactory(mode), new MaxLongAggregatorFunctionSupplier(maxChannels).groupingAggregatorFactory(mode) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java index ffbac2829ea4a..9c987a02aca2d 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java @@ -61,6 +61,7 @@ public class CsvTestsDataLoader { private static final TestsDataset ALERTS = new TestsDataset("alerts"); private static final TestsDataset UL_LOGS = new TestsDataset("ul_logs"); private static final TestsDataset SAMPLE_DATA = new TestsDataset("sample_data"); + private static final TestsDataset MV_SAMPLE_DATA = new TestsDataset("mv_sample_data"); private static final TestsDataset SAMPLE_DATA_STR = SAMPLE_DATA.withIndex("sample_data_str") .withTypeMapping(Map.of("client_ip", "keyword")); private static final TestsDataset SAMPLE_DATA_TS_LONG = SAMPLE_DATA.withIndex("sample_data_ts_long") @@ -104,6 +105,7 @@ public class CsvTestsDataLoader { Map.entry(LANGUAGES_LOOKUP.indexName, LANGUAGES_LOOKUP), Map.entry(UL_LOGS.indexName, UL_LOGS), Map.entry(SAMPLE_DATA.indexName, SAMPLE_DATA), + Map.entry(MV_SAMPLE_DATA.indexName, MV_SAMPLE_DATA), Map.entry(ALERTS.indexName, ALERTS), Map.entry(SAMPLE_DATA_STR.indexName, SAMPLE_DATA_STR), Map.entry(SAMPLE_DATA_TS_LONG.indexName, SAMPLE_DATA_TS_LONG), diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/categorize.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/categorize.csv-spec index 8e0fcd78f0322..89d9026423204 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/categorize.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/categorize.csv-spec @@ -1,14 +1,524 @@ -categorize -required_capability: categorize +standard aggs +required_capability: categorize_v2 FROM sample_data - | SORT message ASC - | STATS count=COUNT(), values=MV_SORT(VALUES(message)) BY category=CATEGORIZE(message) + | STATS count=COUNT(), + sum=SUM(event_duration), + avg=AVG(event_duration), + count_distinct=COUNT_DISTINCT(event_duration) + BY category=CATEGORIZE(message) + | SORT count DESC, category +; + +count:long | sum:long | avg:double | count_distinct:long | category:keyword + 3 | 7971589 | 2657196.3333333335 | 3 | .*?Connected.+?to.*? + 3 | 14027356 | 4675785.333333333 | 3 | .*?Connection.+?error.*? + 1 | 1232382 | 1232382.0 | 1 | .*?Disconnected.*? +; + +values aggs +required_capability: categorize_v2 + +FROM sample_data + | STATS values=MV_SORT(VALUES(message)), + top=TOP(event_duration, 2, "DESC") + BY category=CATEGORIZE(message) + | SORT category +; + +values:keyword | top:long | category:keyword +[Connected to 10.1.0.1, Connected to 10.1.0.2, Connected to 10.1.0.3] | [3450233, 2764889] | .*?Connected.+?to.*? +[Connection error] | [8268153, 5033755] | .*?Connection.+?error.*? +[Disconnected] | 1232382 | .*?Disconnected.*? +; + +mv +required_capability: categorize_v2 + +FROM mv_sample_data + | STATS COUNT(), SUM(event_duration) BY category=CATEGORIZE(message) + | SORT category +; + +COUNT():long | SUM(event_duration):long | category:keyword + 7 | 23231327 | .*?Banana.*? + 3 | 7971589 | .*?Connected.+?to.*? + 3 | 14027356 | .*?Connection.+?error.*? + 1 | 1232382 | .*?Disconnected.*? +; + +row mv +required_capability: categorize_v2 + +ROW message = ["connected to a", "connected to b", "disconnected"], str = ["a", "b", "c"] + | STATS COUNT(), VALUES(str) BY category=CATEGORIZE(message) + | SORT category +; + +COUNT():long | VALUES(str):keyword | category:keyword + 2 | [a, b, c] | .*?connected.+?to.*? + 1 | [a, b, c] | .*?disconnected.*? +; + +with multiple indices +required_capability: categorize_v2 +required_capability: union_types + +FROM sample_data* + | STATS COUNT() BY category=CATEGORIZE(message) + | SORT category +; + +COUNT():long | category:keyword + 12 | .*?Connected.+?to.*? + 12 | .*?Connection.+?error.*? + 4 | .*?Disconnected.*? +; + +mv with many values +required_capability: categorize_v2 + +FROM employees + | STATS COUNT() BY category=CATEGORIZE(job_positions) + | SORT category + | LIMIT 5 +; + +COUNT():long | category:keyword + 18 | .*?Accountant.*? + 13 | .*?Architect.*? + 11 | .*?Business.+?Analyst.*? + 13 | .*?Data.+?Scientist.*? + 10 | .*?Head.+?Human.+?Resources.*? +; + +# Throws when calling AbstractCategorizeBlockHash.seenGroupIds() - Requires nulls support? +mv with many values-Ignore +required_capability: categorize_v2 + +FROM employees + | STATS SUM(languages) BY category=CATEGORIZE(job_positions) + | SORT category DESC + | LIMIT 3 +; + +SUM(languages):integer | category:keyword + 43 | .*?Accountant.*? + 46 | .*?Architect.*? + 35 | .*?Business.+?Analyst.*? +; + +mv via eval +required_capability: categorize_v2 + +FROM sample_data + | EVAL message = MV_APPEND(message, "Banana") + | STATS COUNT() BY category=CATEGORIZE(message) + | SORT category +; + +COUNT():long | category:keyword + 7 | .*?Banana.*? + 3 | .*?Connected.+?to.*? + 3 | .*?Connection.+?error.*? + 1 | .*?Disconnected.*? +; + +mv via eval const +required_capability: categorize_v2 + +FROM sample_data + | EVAL message = ["Banana", "Bread"] + | STATS COUNT() BY category=CATEGORIZE(message) + | SORT category +; + +COUNT():long | category:keyword + 7 | .*?Banana.*? + 7 | .*?Bread.*? +; + +mv via eval const without aliases +required_capability: categorize_v2 + +FROM sample_data + | EVAL message = ["Banana", "Bread"] + | STATS COUNT() BY CATEGORIZE(message) + | SORT `CATEGORIZE(message)` +; + +COUNT():long | CATEGORIZE(message):keyword + 7 | .*?Banana.*? + 7 | .*?Bread.*? +; + +mv const in parameter +required_capability: categorize_v2 + +FROM sample_data + | STATS COUNT() BY c = CATEGORIZE(["Banana", "Bread"]) + | SORT c +; + +COUNT():long | c:keyword + 7 | .*?Banana.*? + 7 | .*?Bread.*? +; + +agg alias shadowing +required_capability: categorize_v2 + +FROM sample_data + | STATS c = COUNT() BY c = CATEGORIZE(["Banana", "Bread"]) + | SORT c +; + +warning:Line 2:9: Field 'c' shadowed by field at line 2:24 + +c:keyword +.*?Banana.*? +.*?Bread.*? +; + +chained aggregations using categorize +required_capability: categorize_v2 + +FROM sample_data + | STATS COUNT() BY category=CATEGORIZE(message) + | STATS COUNT() BY category=CATEGORIZE(category) + | SORT category +; + +COUNT():long | category:keyword + 1 | .*?\.\*\?Connected\.\+\?to\.\*\?.*? + 1 | .*?\.\*\?Connection\.\+\?error\.\*\?.*? + 1 | .*?\.\*\?Disconnected\.\*\?.*? +; + +stats without aggs +required_capability: categorize_v2 + +FROM sample_data + | STATS BY category=CATEGORIZE(message) + | SORT category +; + +category:keyword +.*?Connected.+?to.*? +.*?Connection.+?error.*? +.*?Disconnected.*? +; + +text field +required_capability: categorize_v2 + +FROM hosts + | STATS COUNT() BY category=CATEGORIZE(host_group) + | SORT category +; + +COUNT():long | category:keyword + 2 | .*?DB.+?servers.*? + 2 | .*?Gateway.+?instances.*? + 5 | .*?Kubernetes.+?cluster.*? +; + +on TO_UPPER +required_capability: categorize_v2 + +FROM sample_data + | STATS COUNT() BY category=CATEGORIZE(TO_UPPER(message)) + | SORT category +; + +COUNT():long | category:keyword + 3 | .*?CONNECTED.+?TO.*? + 3 | .*?CONNECTION.+?ERROR.*? + 1 | .*?DISCONNECTED.*? +; + +on CONCAT +required_capability: categorize_v2 + +FROM sample_data + | STATS COUNT() BY category=CATEGORIZE(CONCAT(message, " banana")) + | SORT category +; + +COUNT():long | category:keyword + 3 | .*?Connected.+?to.+?banana.*? + 3 | .*?Connection.+?error.+?banana.*? + 1 | .*?Disconnected.+?banana.*? +; + +on CONCAT with unicode +required_capability: categorize_v2 + +FROM sample_data + | STATS COUNT() BY category=CATEGORIZE(CONCAT(message, " 👍🏽😊")) + | SORT category +; + +COUNT():long | category:keyword + 3 | .*?Connected.+?to.+?👍🏽😊.*? + 3 | .*?Connection.+?error.+?👍🏽😊.*? + 1 | .*?Disconnected.+?👍🏽😊.*? +; + +on REVERSE(CONCAT()) +required_capability: categorize_v2 + +FROM sample_data + | STATS COUNT() BY category=CATEGORIZE(REVERSE(CONCAT(message, " 👍🏽😊"))) + | SORT category +; + +COUNT():long | category:keyword + 1 | .*?😊👍🏽.+?detcennocsiD.*? + 3 | .*?😊👍🏽.+?ot.+?detcennoC.*? + 3 | .*?😊👍🏽.+?rorre.+?noitcennoC.*? +; + +and then TO_LOWER +required_capability: categorize_v2 + +FROM sample_data + | STATS COUNT() BY category=CATEGORIZE(message) + | EVAL category=TO_LOWER(category) + | SORT category +; + +COUNT():long | category:keyword + 3 | .*?connected.+?to.*? + 3 | .*?connection.+?error.*? + 1 | .*?disconnected.*? +; + +# Throws NPE - Requires nulls support +on const empty string-Ignore +required_capability: categorize_v2 + +FROM sample_data + | STATS COUNT() BY category=CATEGORIZE("") + | SORT category +; + +COUNT():long | category:keyword + 7 | .*?.*? +; + +# Throws NPE - Requires nulls support +on const empty string from eval-Ignore +required_capability: categorize_v2 + +FROM sample_data + | EVAL x = "" + | STATS COUNT() BY category=CATEGORIZE(x) + | SORT category +; + +COUNT():long | category:keyword + 7 | .*?.*? +; + +# Doesn't give the correct results - Requires nulls support +on null-Ignore +required_capability: categorize_v2 + +FROM sample_data + | EVAL x = null + | STATS COUNT() BY category=CATEGORIZE(x) + | SORT category +; + +COUNT():long | category:keyword + 7 | null +; + +# Doesn't give the correct results - Requires nulls support +on null string-Ignore +required_capability: categorize_v2 + +FROM sample_data + | EVAL x = null::string + | STATS COUNT() BY category=CATEGORIZE(x) + | SORT category +; + +COUNT():long | category:keyword + 7 | null +; + +filtering out all data +required_capability: categorize_v2 + +FROM sample_data + | WHERE @timestamp < "2023-10-23T00:00:00Z" + | STATS COUNT() BY category=CATEGORIZE(message) + | SORT category +; + +COUNT():long | category:keyword +; + +filtering out all data with constant +required_capability: categorize_v2 + +FROM sample_data + | STATS COUNT() BY category=CATEGORIZE(message) + | WHERE false +; + +COUNT():long | category:keyword +; + +drop output columns +required_capability: categorize_v2 + +FROM sample_data + | STATS count=COUNT() BY category=CATEGORIZE(message) + | EVAL x=1 + | DROP count, category +; + +x:integer +1 +1 +1 +; + +category value processing +required_capability: categorize_v2 + +ROW message = ["connected to a", "connected to b", "disconnected"] + | STATS COUNT() BY category=CATEGORIZE(message) + | EVAL category = TO_UPPER(category) | SORT category ; -count:long | values:keyword | category:integer -3 | [Connected to 10.1.0.1, Connected to 10.1.0.2, Connected to 10.1.0.3] | 0 -3 | [Connection error] | 1 -1 | [Disconnected] | 2 +COUNT():long | category:keyword + 2 | .*?CONNECTED.+?TO.*? + 1 | .*?DISCONNECTED.*? +; + +row aliases +required_capability: categorize_v2 + +ROW message = "connected to a" + | EVAL x = message + | STATS COUNT() BY category=CATEGORIZE(x) + | EVAL y = category + | SORT y +; + +COUNT():long | category:keyword | y:keyword + 1 | .*?connected.+?to.+?a.*? | .*?connected.+?to.+?a.*? +; + +from aliases +required_capability: categorize_v2 + +FROM sample_data + | EVAL x = message + | STATS COUNT() BY category=CATEGORIZE(x) + | EVAL y = category + | SORT y +; + +COUNT():long | category:keyword | y:keyword + 3 | .*?Connected.+?to.*? | .*?Connected.+?to.*? + 3 | .*?Connection.+?error.*? | .*?Connection.+?error.*? + 1 | .*?Disconnected.*? | .*?Disconnected.*? +; + +row aliases with keep +required_capability: categorize_v2 + +ROW message = "connected to a" + | EVAL x = message + | KEEP x + | STATS COUNT() BY category=CATEGORIZE(x) + | EVAL y = category + | KEEP `COUNT()`, y + | SORT y +; + +COUNT():long | y:keyword + 1 | .*?connected.+?to.+?a.*? +; + +from aliases with keep +required_capability: categorize_v2 + +FROM sample_data + | EVAL x = message + | KEEP x + | STATS COUNT() BY category=CATEGORIZE(x) + | EVAL y = category + | KEEP `COUNT()`, y + | SORT y +; + +COUNT():long | y:keyword + 3 | .*?Connected.+?to.*? + 3 | .*?Connection.+?error.*? + 1 | .*?Disconnected.*? +; + +row rename +required_capability: categorize_v2 + +ROW message = "connected to a" + | RENAME message as x + | STATS COUNT() BY category=CATEGORIZE(x) + | RENAME category as y + | SORT y +; + +COUNT():long | y:keyword + 1 | .*?connected.+?to.+?a.*? +; + +from rename +required_capability: categorize_v2 + +FROM sample_data + | RENAME message as x + | STATS COUNT() BY category=CATEGORIZE(x) + | RENAME category as y + | SORT y +; + +COUNT():long | y:keyword + 3 | .*?Connected.+?to.*? + 3 | .*?Connection.+?error.*? + 1 | .*?Disconnected.*? +; + +row drop +required_capability: categorize_v2 + +ROW message = "connected to a" + | STATS c = COUNT() BY category=CATEGORIZE(message) + | DROP category + | SORT c +; + +c:long +1 +; + +from drop +required_capability: categorize_v2 + +FROM sample_data + | STATS c = COUNT() BY category=CATEGORIZE(message) + | DROP category + | SORT c +; + +c:long +1 +3 +3 ; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-mv_sample_data.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-mv_sample_data.json new file mode 100644 index 0000000000000..838a8ba09b45a --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-mv_sample_data.json @@ -0,0 +1,16 @@ +{ + "properties": { + "@timestamp": { + "type": "date" + }, + "client_ip": { + "type": "ip" + }, + "event_duration": { + "type": "long" + }, + "message": { + "type": "keyword" + } + } +} \ No newline at end of file diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mv_sample_data.csv b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mv_sample_data.csv new file mode 100644 index 0000000000000..c02a4a7a5845f --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mv_sample_data.csv @@ -0,0 +1,8 @@ +@timestamp:date ,client_ip:ip,event_duration:long,message:keyword +2023-10-23T13:55:01.543Z,172.21.3.15 ,1756467,[Connected to 10.1.0.1, Banana] +2023-10-23T13:53:55.832Z,172.21.3.15 ,5033755,[Connection error, Banana] +2023-10-23T13:52:55.015Z,172.21.3.15 ,8268153,[Connection error, Banana] +2023-10-23T13:51:54.732Z,172.21.3.15 , 725448,[Connection error, Banana] +2023-10-23T13:33:34.937Z,172.21.0.5 ,1232382,[Disconnected, Banana] +2023-10-23T12:27:28.948Z,172.21.2.113,2764889,[Connected to 10.1.0.2, Banana] +2023-10-23T12:15:03.360Z,172.21.2.162,3450233,[Connected to 10.1.0.3, Banana] diff --git a/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/grouping/CategorizeEvaluator.java b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/grouping/CategorizeEvaluator.java deleted file mode 100644 index c6349907f9b4b..0000000000000 --- a/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/grouping/CategorizeEvaluator.java +++ /dev/null @@ -1,145 +0,0 @@ -// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -// or more contributor license agreements. Licensed under the Elastic License -// 2.0; you may not use this file except in compliance with the Elastic License -// 2.0. -package org.elasticsearch.xpack.esql.expression.function.grouping; - -import java.lang.IllegalArgumentException; -import java.lang.Override; -import java.lang.String; -import java.util.function.Function; -import org.apache.lucene.util.BytesRef; -import org.elasticsearch.compute.data.Block; -import org.elasticsearch.compute.data.BytesRefBlock; -import org.elasticsearch.compute.data.BytesRefVector; -import org.elasticsearch.compute.data.IntBlock; -import org.elasticsearch.compute.data.IntVector; -import org.elasticsearch.compute.data.Page; -import org.elasticsearch.compute.operator.DriverContext; -import org.elasticsearch.compute.operator.EvalOperator; -import org.elasticsearch.compute.operator.Warnings; -import org.elasticsearch.core.Releasables; -import org.elasticsearch.xpack.esql.core.tree.Source; -import org.elasticsearch.xpack.ml.aggs.categorization.TokenListCategorizer; -import org.elasticsearch.xpack.ml.job.categorization.CategorizationAnalyzer; - -/** - * {@link EvalOperator.ExpressionEvaluator} implementation for {@link Categorize}. - * This class is generated. Do not edit it. - */ -public final class CategorizeEvaluator implements EvalOperator.ExpressionEvaluator { - private final Source source; - - private final EvalOperator.ExpressionEvaluator v; - - private final CategorizationAnalyzer analyzer; - - private final TokenListCategorizer.CloseableTokenListCategorizer categorizer; - - private final DriverContext driverContext; - - private Warnings warnings; - - public CategorizeEvaluator(Source source, EvalOperator.ExpressionEvaluator v, - CategorizationAnalyzer analyzer, - TokenListCategorizer.CloseableTokenListCategorizer categorizer, DriverContext driverContext) { - this.source = source; - this.v = v; - this.analyzer = analyzer; - this.categorizer = categorizer; - this.driverContext = driverContext; - } - - @Override - public Block eval(Page page) { - try (BytesRefBlock vBlock = (BytesRefBlock) v.eval(page)) { - BytesRefVector vVector = vBlock.asVector(); - if (vVector == null) { - return eval(page.getPositionCount(), vBlock); - } - return eval(page.getPositionCount(), vVector).asBlock(); - } - } - - public IntBlock eval(int positionCount, BytesRefBlock vBlock) { - try(IntBlock.Builder result = driverContext.blockFactory().newIntBlockBuilder(positionCount)) { - BytesRef vScratch = new BytesRef(); - position: for (int p = 0; p < positionCount; p++) { - if (vBlock.isNull(p)) { - result.appendNull(); - continue position; - } - if (vBlock.getValueCount(p) != 1) { - if (vBlock.getValueCount(p) > 1) { - warnings().registerException(new IllegalArgumentException("single-value function encountered multi-value")); - } - result.appendNull(); - continue position; - } - result.appendInt(Categorize.process(vBlock.getBytesRef(vBlock.getFirstValueIndex(p), vScratch), this.analyzer, this.categorizer)); - } - return result.build(); - } - } - - public IntVector eval(int positionCount, BytesRefVector vVector) { - try(IntVector.FixedBuilder result = driverContext.blockFactory().newIntVectorFixedBuilder(positionCount)) { - BytesRef vScratch = new BytesRef(); - position: for (int p = 0; p < positionCount; p++) { - result.appendInt(p, Categorize.process(vVector.getBytesRef(p, vScratch), this.analyzer, this.categorizer)); - } - return result.build(); - } - } - - @Override - public String toString() { - return "CategorizeEvaluator[" + "v=" + v + "]"; - } - - @Override - public void close() { - Releasables.closeExpectNoException(v, analyzer, categorizer); - } - - private Warnings warnings() { - if (warnings == null) { - this.warnings = Warnings.createWarnings( - driverContext.warningsMode(), - source.source().getLineNumber(), - source.source().getColumnNumber(), - source.text() - ); - } - return warnings; - } - - static class Factory implements EvalOperator.ExpressionEvaluator.Factory { - private final Source source; - - private final EvalOperator.ExpressionEvaluator.Factory v; - - private final Function analyzer; - - private final Function categorizer; - - public Factory(Source source, EvalOperator.ExpressionEvaluator.Factory v, - Function analyzer, - Function categorizer) { - this.source = source; - this.v = v; - this.analyzer = analyzer; - this.categorizer = categorizer; - } - - @Override - public CategorizeEvaluator get(DriverContext context) { - return new CategorizeEvaluator(source, v.get(context), analyzer.apply(context), categorizer.apply(context), context); - } - - @Override - public String toString() { - return "CategorizeEvaluator[" + "v=" + v + "]"; - } - } -} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index 3eaeceaa86564..58748781d1778 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -402,8 +402,11 @@ public enum Cap { /** * Supported the text categorization function "CATEGORIZE". + *

+ * This capability was initially named `CATEGORIZE`, and got renamed after the function started correctly returning keywords. + *

*/ - CATEGORIZE(Build.current().isSnapshot()), + CATEGORIZE_V2(Build.current().isSnapshot()), /** * QSTR function diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/Categorize.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/Categorize.java index 75a9883a77102..31b603ecef889 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/Categorize.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/Categorize.java @@ -7,20 +7,10 @@ package org.elasticsearch.xpack.esql.expression.function.grouping; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.core.WhitespaceTokenizer; -import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; -import org.elasticsearch.common.util.BytesRefHash; -import org.elasticsearch.compute.ann.Evaluator; -import org.elasticsearch.compute.ann.Fixed; import org.elasticsearch.compute.operator.EvalOperator.ExpressionEvaluator; -import org.elasticsearch.index.analysis.CharFilterFactory; -import org.elasticsearch.index.analysis.CustomAnalyzer; -import org.elasticsearch.index.analysis.TokenFilterFactory; -import org.elasticsearch.index.analysis.TokenizerFactory; import org.elasticsearch.xpack.esql.capabilities.Validatable; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; @@ -29,10 +19,6 @@ import org.elasticsearch.xpack.esql.expression.function.FunctionInfo; import org.elasticsearch.xpack.esql.expression.function.Param; import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput; -import org.elasticsearch.xpack.ml.aggs.categorization.CategorizationBytesRefHash; -import org.elasticsearch.xpack.ml.aggs.categorization.CategorizationPartOfSpeechDictionary; -import org.elasticsearch.xpack.ml.aggs.categorization.TokenListCategorizer; -import org.elasticsearch.xpack.ml.job.categorization.CategorizationAnalyzer; import java.io.IOException; import java.util.List; @@ -42,16 +28,16 @@ /** * Categorizes text messages. - * - * This implementation is incomplete and comes with the following caveats: - * - it only works correctly on a single node. - * - when running on multiple nodes, category IDs of the different nodes are - * aggregated, even though the same ID can correspond to a totally different - * category - * - the output consists of category IDs, which should be replaced by category - * regexes or keys - * - * TODO(jan, nik): fix this + *

+ * This function has no evaluators, as it works like an aggregation (Accumulates values, stores intermediate states, etc). + *

+ *

+ * For the implementation, see: + *

+ *
    + *
  • {@link org.elasticsearch.compute.aggregation.blockhash.CategorizedIntermediateBlockHash}
  • + *
  • {@link org.elasticsearch.compute.aggregation.blockhash.CategorizeRawBlockHash}
  • + *
*/ public class Categorize extends GroupingFunction implements Validatable { public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry( @@ -62,7 +48,7 @@ public class Categorize extends GroupingFunction implements Validatable { private final Expression field; - @FunctionInfo(returnType = { "integer" }, description = "Categorizes text messages.") + @FunctionInfo(returnType = "keyword", description = "Categorizes text messages.") public Categorize( Source source, @Param(name = "field", type = { "text", "keyword" }, description = "Expression to categorize") Expression field @@ -88,43 +74,13 @@ public String getWriteableName() { @Override public boolean foldable() { - return field.foldable(); - } - - @Evaluator - static int process( - BytesRef v, - @Fixed(includeInToString = false, build = true) CategorizationAnalyzer analyzer, - @Fixed(includeInToString = false, build = true) TokenListCategorizer.CloseableTokenListCategorizer categorizer - ) { - String s = v.utf8ToString(); - try (TokenStream ts = analyzer.tokenStream("text", s)) { - return categorizer.computeCategory(ts, s.length(), 1).getId(); - } catch (IOException e) { - throw new RuntimeException(e); - } + // Categorize cannot be currently folded + return false; } @Override public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) { - return new CategorizeEvaluator.Factory( - source(), - toEvaluator.apply(field), - context -> new CategorizationAnalyzer( - // TODO(jan): get the correct analyzer in here, see CategorizationAnalyzerConfig::buildStandardCategorizationAnalyzer - new CustomAnalyzer( - TokenizerFactory.newFactory("whitespace", WhitespaceTokenizer::new), - new CharFilterFactory[0], - new TokenFilterFactory[0] - ), - true - ), - context -> new TokenListCategorizer.CloseableTokenListCategorizer( - new CategorizationBytesRefHash(new BytesRefHash(2048, context.bigArrays())), - CategorizationPartOfSpeechDictionary.getInstance(), - 0.70f - ) - ); + throw new UnsupportedOperationException("CATEGORIZE is only evaluated during aggregations"); } @Override @@ -134,11 +90,11 @@ protected TypeResolution resolveType() { @Override public DataType dataType() { - return DataType.INTEGER; + return DataType.KEYWORD; } @Override - public Expression replaceChildren(List newChildren) { + public Categorize replaceChildren(List newChildren) { return new Categorize(source(), newChildren.get(0)); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/CombineProjections.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/CombineProjections.java index 1c256012baeb0..be7096538fb9a 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/CombineProjections.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/CombineProjections.java @@ -15,6 +15,7 @@ import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.expression.Expressions; import org.elasticsearch.xpack.esql.core.expression.NamedExpression; +import org.elasticsearch.xpack.esql.expression.function.grouping.Categorize; import org.elasticsearch.xpack.esql.plan.logical.Aggregate; import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; import org.elasticsearch.xpack.esql.plan.logical.Project; @@ -61,12 +62,15 @@ protected LogicalPlan rule(UnaryPlan plan) { if (plan instanceof Aggregate a) { if (child instanceof Project p) { var groupings = a.groupings(); - List groupingAttrs = new ArrayList<>(a.groupings().size()); + List groupingAttrs = new ArrayList<>(a.groupings().size()); for (Expression grouping : groupings) { if (grouping instanceof Attribute attribute) { groupingAttrs.add(attribute); + } else if (grouping instanceof Alias as && as.child() instanceof Categorize) { + groupingAttrs.add(as); } else { - // After applying ReplaceAggregateNestedExpressionWithEval, groupings can only contain attributes. + // After applying ReplaceAggregateNestedExpressionWithEval, + // groupings (except Categorize) can only contain attributes. throw new EsqlIllegalArgumentException("Expected an Attribute, got {}", grouping); } } @@ -137,23 +141,33 @@ private static List combineProjections(List combineUpperGroupingsAndLowerProjections( - List upperGroupings, + List upperGroupings, List lowerProjections ) { // Collect the alias map for resolving the source (f1 = 1, f2 = f1, etc..) - AttributeMap aliases = new AttributeMap<>(); + AttributeMap aliases = new AttributeMap<>(); for (NamedExpression ne : lowerProjections) { - // Projections are just aliases for attributes, so casting is safe. - aliases.put(ne.toAttribute(), (Attribute) Alias.unwrap(ne)); + // record the alias + aliases.put(ne.toAttribute(), Alias.unwrap(ne)); } - // Replace any matching attribute directly with the aliased attribute from the projection. - AttributeSet replaced = new AttributeSet(); - for (Attribute attr : upperGroupings) { - // All substitutions happen before; groupings must be attributes at this point. - replaced.add(aliases.resolve(attr, attr)); + AttributeSet seen = new AttributeSet(); + List replaced = new ArrayList<>(); + for (NamedExpression ne : upperGroupings) { + // Duplicated attributes are ignored. + if (ne instanceof Attribute attribute) { + var newExpression = aliases.resolve(attribute, attribute); + if (newExpression instanceof Attribute newAttribute && seen.add(newAttribute) == false) { + // Already seen, skip + continue; + } + replaced.add(newExpression); + } else { + // For grouping functions, this will replace nested properties too + replaced.add(ne.transformUp(Attribute.class, a -> aliases.resolve(a, a))); + } } - return new ArrayList<>(replaced); + return replaced; } /** diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/FoldNull.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/FoldNull.java index 0f08cd66444a3..638fa1b8db456 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/FoldNull.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/FoldNull.java @@ -13,6 +13,7 @@ import org.elasticsearch.xpack.esql.core.expression.Literal; import org.elasticsearch.xpack.esql.core.expression.Nullability; import org.elasticsearch.xpack.esql.expression.function.aggregate.AggregateFunction; +import org.elasticsearch.xpack.esql.expression.function.grouping.Categorize; import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.In; public class FoldNull extends OptimizerRules.OptimizerExpressionRule { @@ -42,6 +43,7 @@ public Expression rule(Expression e) { } } else if (e instanceof Alias == false && e.nullable() == Nullability.TRUE + && e instanceof Categorize == false && Expressions.anyMatch(e.children(), Expressions::isNull)) { return Literal.of(e, null); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceAggregateNestedExpressionWithEval.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceAggregateNestedExpressionWithEval.java index 173940af19935..985e68252a1f9 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceAggregateNestedExpressionWithEval.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceAggregateNestedExpressionWithEval.java @@ -13,6 +13,7 @@ import org.elasticsearch.xpack.esql.core.expression.NamedExpression; import org.elasticsearch.xpack.esql.core.util.Holder; import org.elasticsearch.xpack.esql.expression.function.aggregate.AggregateFunction; +import org.elasticsearch.xpack.esql.expression.function.grouping.Categorize; import org.elasticsearch.xpack.esql.expression.function.grouping.GroupingFunction; import org.elasticsearch.xpack.esql.plan.logical.Aggregate; import org.elasticsearch.xpack.esql.plan.logical.Eval; @@ -46,15 +47,29 @@ protected LogicalPlan rule(Aggregate aggregate) { // start with the groupings since the aggs might duplicate it for (int i = 0, s = newGroupings.size(); i < s; i++) { Expression g = newGroupings.get(i); - // move the alias into an eval and replace it with its attribute + // Move the alias into an eval and replace it with its attribute. + // Exception: Categorize is internal to the aggregation and remains in the groupings. We move its child expression into an eval. if (g instanceof Alias as) { - groupingChanged = true; - var attr = as.toAttribute(); - evals.add(as); - evalNames.put(as.name(), attr); - newGroupings.set(i, attr); - if (as.child() instanceof GroupingFunction gf) { - groupingAttributes.put(gf, attr); + if (as.child() instanceof Categorize cat) { + if (cat.field() instanceof Attribute == false) { + groupingChanged = true; + var fieldAs = new Alias(as.source(), as.name(), cat.field(), null, true); + var fieldAttr = fieldAs.toAttribute(); + evals.add(fieldAs); + evalNames.put(fieldAs.name(), fieldAttr); + Categorize replacement = cat.replaceChildren(List.of(fieldAttr)); + newGroupings.set(i, as.replaceChild(replacement)); + groupingAttributes.put(cat, fieldAttr); + } + } else { + groupingChanged = true; + var attr = as.toAttribute(); + evals.add(as); + evalNames.put(as.name(), attr); + newGroupings.set(i, attr); + if (as.child() instanceof GroupingFunction gf) { + groupingAttributes.put(gf, attr); + } } } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/InsertFieldExtraction.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/InsertFieldExtraction.java index ea9cd76bcb9bc..72573821dfeb8 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/InsertFieldExtraction.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/InsertFieldExtraction.java @@ -12,6 +12,7 @@ import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; import org.elasticsearch.xpack.esql.core.expression.MetadataAttribute; import org.elasticsearch.xpack.esql.core.expression.TypedAttribute; +import org.elasticsearch.xpack.esql.expression.function.grouping.Categorize; import org.elasticsearch.xpack.esql.optimizer.rules.physical.ProjectAwayColumns; import org.elasticsearch.xpack.esql.plan.physical.AggregateExec; import org.elasticsearch.xpack.esql.plan.physical.EsQueryExec; @@ -58,11 +59,17 @@ public PhysicalPlan apply(PhysicalPlan plan) { * make sure the fields are loaded for the standard hash aggregator. */ if (p instanceof AggregateExec agg && agg.groupings().size() == 1) { - var leaves = new LinkedList<>(); - // TODO: this seems out of place - agg.aggregates().stream().filter(a -> agg.groupings().contains(a) == false).forEach(a -> leaves.addAll(a.collectLeaves())); - var remove = agg.groupings().stream().filter(g -> leaves.contains(g) == false).toList(); - missing.removeAll(Expressions.references(remove)); + // CATEGORIZE requires the standard hash aggregator as well. + if (agg.groupings().get(0).anyMatch(e -> e instanceof Categorize) == false) { + var leaves = new LinkedList<>(); + // TODO: this seems out of place + agg.aggregates() + .stream() + .filter(a -> agg.groupings().contains(a) == false) + .forEach(a -> leaves.addAll(a.collectLeaves())); + var remove = agg.groupings().stream().filter(g -> leaves.contains(g) == false).toList(); + missing.removeAll(Expressions.references(remove)); + } } // add extractor diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/AbstractPhysicalOperationProviders.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/AbstractPhysicalOperationProviders.java index 94a9246a56f83..a7418654f6b0e 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/AbstractPhysicalOperationProviders.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/AbstractPhysicalOperationProviders.java @@ -29,6 +29,7 @@ import org.elasticsearch.xpack.esql.evaluator.EvalMapper; import org.elasticsearch.xpack.esql.expression.function.aggregate.AggregateFunction; import org.elasticsearch.xpack.esql.expression.function.aggregate.Count; +import org.elasticsearch.xpack.esql.expression.function.grouping.Categorize; import org.elasticsearch.xpack.esql.plan.physical.AggregateExec; import org.elasticsearch.xpack.esql.plan.physical.ExchangeSourceExec; import org.elasticsearch.xpack.esql.planner.LocalExecutionPlanner.LocalExecutionPlannerContext; @@ -52,6 +53,7 @@ public final PhysicalOperation groupingPhysicalOperation( PhysicalOperation source, LocalExecutionPlannerContext context ) { + // The layout this operation will produce. Layout.Builder layout = new Layout.Builder(); Operator.OperatorFactory operatorFactory = null; AggregatorMode aggregatorMode = aggregateExec.getMode(); @@ -95,12 +97,17 @@ public final PhysicalOperation groupingPhysicalOperation( List aggregatorFactories = new ArrayList<>(); List groupSpecs = new ArrayList<>(aggregateExec.groupings().size()); for (Expression group : aggregateExec.groupings()) { - var groupAttribute = Expressions.attribute(group); - if (groupAttribute == null) { + Attribute groupAttribute = Expressions.attribute(group); + // In case of `... BY groupAttribute = CATEGORIZE(sourceGroupAttribute)` the actual source attribute is different. + Attribute sourceGroupAttribute = (aggregatorMode.isInputPartial() == false + && group instanceof Alias as + && as.child() instanceof Categorize categorize) ? Expressions.attribute(categorize.field()) : groupAttribute; + if (sourceGroupAttribute == null) { throw new EsqlIllegalArgumentException("Unexpected non-named expression[{}] as grouping in [{}]", group, aggregateExec); } - Layout.ChannelSet groupAttributeLayout = new Layout.ChannelSet(new HashSet<>(), groupAttribute.dataType()); - groupAttributeLayout.nameIds().add(groupAttribute.id()); + Layout.ChannelSet groupAttributeLayout = new Layout.ChannelSet(new HashSet<>(), sourceGroupAttribute.dataType()); + groupAttributeLayout.nameIds() + .add(group instanceof Alias as && as.child() instanceof Categorize ? groupAttribute.id() : sourceGroupAttribute.id()); /* * Check for aliasing in aggregates which occurs in two cases (due to combining project + stats): @@ -119,7 +126,7 @@ public final PhysicalOperation groupingPhysicalOperation( // check if there's any alias used in grouping - no need for the final reduction since the intermediate data // is in the output form // if the group points to an alias declared in the aggregate, use the alias child as source - else if (aggregatorMode == AggregatorMode.INITIAL || aggregatorMode == AggregatorMode.INTERMEDIATE) { + else if (aggregatorMode.isOutputPartial()) { if (groupAttribute.semanticEquals(a.toAttribute())) { groupAttribute = attr; break; @@ -129,8 +136,8 @@ else if (aggregatorMode == AggregatorMode.INITIAL || aggregatorMode == Aggregato } } layout.append(groupAttributeLayout); - Layout.ChannelAndType groupInput = source.layout.get(groupAttribute.id()); - groupSpecs.add(new GroupSpec(groupInput == null ? null : groupInput.channel(), groupAttribute)); + Layout.ChannelAndType groupInput = source.layout.get(sourceGroupAttribute.id()); + groupSpecs.add(new GroupSpec(groupInput == null ? null : groupInput.channel(), sourceGroupAttribute, group)); } if (aggregatorMode == AggregatorMode.FINAL) { @@ -164,6 +171,7 @@ else if (aggregatorMode == AggregatorMode.INITIAL || aggregatorMode == Aggregato } else { operatorFactory = new HashAggregationOperatorFactory( groupSpecs.stream().map(GroupSpec::toHashGroupSpec).toList(), + aggregatorMode, aggregatorFactories, context.pageSize(aggregateExec.estimatedRowSize()) ); @@ -178,10 +186,14 @@ else if (aggregatorMode == AggregatorMode.INITIAL || aggregatorMode == Aggregato /*** * Creates a standard layout for intermediate aggregations, typically used across exchanges. * Puts the group first, followed by each aggregation. - * - * It's similar to the code above (groupingPhysicalOperation) but ignores the factory creation. + *

+ * It's similar to the code above (groupingPhysicalOperation) but ignores the factory creation. + *

*/ public static List intermediateAttributes(List aggregates, List groupings) { + // TODO: This should take CATEGORIZE into account: + // it currently works because the CATEGORIZE intermediate state is just 1 block with the same type as the function return, + // so the attribute generated here is the expected one var aggregateMapper = new AggregateMapper(); List attrs = new ArrayList<>(); @@ -304,12 +316,20 @@ private static AggregatorFunctionSupplier supplier(AggregateFunction aggregateFu throw new EsqlIllegalArgumentException("aggregate functions must extend ToAggregator"); } - private record GroupSpec(Integer channel, Attribute attribute) { + /** + * The input configuration of this group. + * + * @param channel The source channel of this group + * @param attribute The attribute, source of this group + * @param expression The expression being used to group + */ + private record GroupSpec(Integer channel, Attribute attribute, Expression expression) { BlockHash.GroupSpec toHashGroupSpec() { if (channel == null) { throw new EsqlIllegalArgumentException("planned to use ordinals but tried to use the hash instead"); } - return new BlockHash.GroupSpec(channel, elementType()); + + return new BlockHash.GroupSpec(channel, elementType(), Alias.unwrap(expression) instanceof Categorize); } ElementType elementType() { diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java index f25b19c4e5d1c..355073fcc873f 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java @@ -1821,7 +1821,7 @@ public void testIntervalAsString() { } public void testCategorizeSingleGrouping() { - assumeTrue("requires Categorize capability", EsqlCapabilities.Cap.CATEGORIZE.isEnabled()); + assumeTrue("requires Categorize capability", EsqlCapabilities.Cap.CATEGORIZE_V2.isEnabled()); query("from test | STATS COUNT(*) BY CATEGORIZE(first_name)"); query("from test | STATS COUNT(*) BY cat = CATEGORIZE(first_name)"); @@ -1850,7 +1850,7 @@ public void testCategorizeSingleGrouping() { } public void testCategorizeNestedGrouping() { - assumeTrue("requires Categorize capability", EsqlCapabilities.Cap.CATEGORIZE.isEnabled()); + assumeTrue("requires Categorize capability", EsqlCapabilities.Cap.CATEGORIZE_V2.isEnabled()); query("from test | STATS COUNT(*) BY CATEGORIZE(LENGTH(first_name)::string)"); @@ -1865,7 +1865,7 @@ public void testCategorizeNestedGrouping() { } public void testCategorizeWithinAggregations() { - assumeTrue("requires Categorize capability", EsqlCapabilities.Cap.CATEGORIZE.isEnabled()); + assumeTrue("requires Categorize capability", EsqlCapabilities.Cap.CATEGORIZE_V2.isEnabled()); query("from test | STATS MV_COUNT(cat), COUNT(*) BY cat = CATEGORIZE(first_name)"); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/AbstractAggregationTestCase.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/AbstractAggregationTestCase.java index db5d8e03458ea..df1675ba22568 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/AbstractAggregationTestCase.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/AbstractAggregationTestCase.java @@ -111,7 +111,8 @@ protected static List withNoRowsExpectingNull(List anyNullIsNull( oc.getExpectedTypeError(), null, null, - null + null, + oc.canBuildEvaluator() ); })); @@ -260,7 +261,8 @@ protected static List anyNullIsNull( oc.getExpectedTypeError(), null, null, - null + null, + oc.canBuildEvaluator() ); })); } @@ -648,18 +650,7 @@ protected static List randomizeBytesRefsOffset(List data, String expectedTypeError) Class foldingExceptionClass, String foldingExceptionMessage, Object extra + ) { + this( + data, + evaluatorToString, + expectedType, + matcher, + expectedWarnings, + expectedBuildEvaluatorWarnings, + expectedTypeError, + foldingExceptionClass, + foldingExceptionMessage, + extra, + data.stream().allMatch(d -> d.forceLiteral || DataType.isRepresentable(d.type)) + ); + } + + TestCase( + List data, + Matcher evaluatorToString, + DataType expectedType, + Matcher matcher, + String[] expectedWarnings, + String[] expectedBuildEvaluatorWarnings, + String expectedTypeError, + Class foldingExceptionClass, + String foldingExceptionMessage, + Object extra, + boolean canBuildEvaluator ) { this.source = Source.EMPTY; this.data = data; @@ -1442,10 +1470,10 @@ public static TestCase typeError(List data, String expectedTypeError) this.expectedWarnings = expectedWarnings; this.expectedBuildEvaluatorWarnings = expectedBuildEvaluatorWarnings; this.expectedTypeError = expectedTypeError; - this.canBuildEvaluator = data.stream().allMatch(d -> d.forceLiteral || DataType.isRepresentable(d.type)); this.foldingExceptionClass = foldingExceptionClass; this.foldingExceptionMessage = foldingExceptionMessage; this.extra = extra; + this.canBuildEvaluator = canBuildEvaluator; } public Source getSource() { @@ -1520,6 +1548,25 @@ public Object extra() { return extra; } + /** + * Build a new {@link TestCase} with new {@link #data}. + */ + public TestCase withData(List data) { + return new TestCase( + data, + evaluatorToString, + expectedType, + matcher, + expectedWarnings, + expectedBuildEvaluatorWarnings, + expectedTypeError, + foldingExceptionClass, + foldingExceptionMessage, + extra, + canBuildEvaluator + ); + } + /** * Build a new {@link TestCase} with new {@link #extra()}. */ @@ -1534,7 +1581,8 @@ public TestCase withExtra(Object extra) { expectedTypeError, foldingExceptionClass, foldingExceptionMessage, - extra + extra, + canBuildEvaluator ); } @@ -1549,7 +1597,8 @@ public TestCase withWarning(String warning) { expectedTypeError, foldingExceptionClass, foldingExceptionMessage, - extra + extra, + canBuildEvaluator ); } @@ -1568,7 +1617,8 @@ public TestCase withBuildEvaluatorWarning(String warning) { expectedTypeError, foldingExceptionClass, foldingExceptionMessage, - extra + extra, + canBuildEvaluator ); } @@ -1592,7 +1642,30 @@ public TestCase withFoldingException(Class clazz, String me expectedTypeError, clazz, message, - extra + extra, + canBuildEvaluator + ); + } + + /** + * Build a new {@link TestCase} that can't build an evaluator. + *

+ * Useful for special cases that can't be executed, but should still be considered. + *

+ */ + public TestCase withoutEvaluator() { + return new TestCase( + data, + evaluatorToString, + expectedType, + matcher, + expectedWarnings, + expectedBuildEvaluatorWarnings, + expectedTypeError, + foldingExceptionClass, + foldingExceptionMessage, + extra, + false ); } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/grouping/CategorizeTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/grouping/CategorizeTests.java index f93389d5cb659..d29ac635e4bb7 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/grouping/CategorizeTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/grouping/CategorizeTests.java @@ -23,6 +23,12 @@ import static org.hamcrest.Matchers.equalTo; +/** + * Dummy test implementation for Categorize. Used just to generate documentation. + *

+ * Most test cases are currently skipped as this function can't build an evaluator. + *

+ */ public class CategorizeTests extends AbstractScalarFunctionTestCase { public CategorizeTests(@Name("TestCase") Supplier testCaseSupplier) { this.testCase = testCaseSupplier.get(); @@ -37,11 +43,11 @@ public static Iterable parameters() { "text with " + dataType.typeName(), List.of(dataType), () -> new TestCaseSupplier.TestCase( - List.of(new TestCaseSupplier.TypedData(new BytesRef("blah blah blah"), dataType, "f")), - "CategorizeEvaluator[v=Attribute[channel=0]]", - DataType.INTEGER, - equalTo(0) - ) + List.of(new TestCaseSupplier.TypedData(new BytesRef(""), dataType, "field")), + "", + DataType.KEYWORD, + equalTo(new BytesRef("")) + ).withoutEvaluator() ) ); } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java index a11a9cef82989..2b4fb6ad68972 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java @@ -57,6 +57,7 @@ import org.elasticsearch.xpack.esql.expression.function.aggregate.ToPartial; import org.elasticsearch.xpack.esql.expression.function.aggregate.Values; import org.elasticsearch.xpack.esql.expression.function.grouping.Bucket; +import org.elasticsearch.xpack.esql.expression.function.grouping.Categorize; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToDouble; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToInteger; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToLong; @@ -1203,6 +1204,33 @@ public void testCombineProjectionWithAggregationFirstAndAliasedGroupingUsedInAgg assertThat(Expressions.names(agg.groupings()), contains("first_name")); } + /** + * Expects + * Limit[1000[INTEGER]] + * \_Aggregate[STANDARD,[CATEGORIZE(first_name{f}#18) AS cat],[SUM(salary{f}#22,true[BOOLEAN]) AS s, cat{r}#10]] + * \_EsRelation[test][_meta_field{f}#23, emp_no{f}#17, first_name{f}#18, ..] + */ + public void testCombineProjectionWithCategorizeGrouping() { + var plan = plan(""" + from test + | eval k = first_name, k1 = k + | stats s = sum(salary) by cat = CATEGORIZE(k) + | keep s, cat + """); + + var limit = as(plan, Limit.class); + var agg = as(limit.child(), Aggregate.class); + assertThat(agg.child(), instanceOf(EsRelation.class)); + + assertThat(Expressions.names(agg.aggregates()), contains("s", "cat")); + assertThat(Expressions.names(agg.groupings()), contains("cat")); + + var categorizeAlias = as(agg.groupings().get(0), Alias.class); + var categorize = as(categorizeAlias.child(), Categorize.class); + var categorizeField = as(categorize.field(), FieldAttribute.class); + assertThat(categorizeField.name(), is("first_name")); + } + /** * Expects * Limit[1000[INTEGER]] @@ -3909,6 +3937,39 @@ public void testNestedExpressionsInGroups() { assertThat(eval.fields().get(0).name(), is("emp_no % 2")); } + /** + * Expects + * Limit[1000[INTEGER]] + * \_Aggregate[STANDARD,[CATEGORIZE(CATEGORIZE(CONCAT(first_name, "abc")){r$}#18) AS CATEGORIZE(CONCAT(first_name, "abc"))],[CO + * UNT(salary{f}#13,true[BOOLEAN]) AS c, CATEGORIZE(CONCAT(first_name, "abc")){r}#3]] + * \_Eval[[CONCAT(first_name{f}#9,[61 62 63][KEYWORD]) AS CATEGORIZE(CONCAT(first_name, "abc"))]] + * \_EsRelation[test][_meta_field{f}#14, emp_no{f}#8, first_name{f}#9, ge..] + */ + public void testNestedExpressionsInGroupsWithCategorize() { + var plan = optimizedPlan(""" + from test + | stats c = count(salary) by CATEGORIZE(CONCAT(first_name, "abc")) + """); + + var limit = as(plan, Limit.class); + var agg = as(limit.child(), Aggregate.class); + var groupings = agg.groupings(); + var categorizeAlias = as(groupings.get(0), Alias.class); + var categorize = as(categorizeAlias.child(), Categorize.class); + var aggs = agg.aggregates(); + assertThat(aggs.get(1), is(categorizeAlias.toAttribute())); + + var eval = as(agg.child(), Eval.class); + assertThat(eval.fields(), hasSize(1)); + var evalFieldAlias = as(eval.fields().get(0), Alias.class); + var evalField = as(evalFieldAlias.child(), Concat.class); + + assertThat(evalFieldAlias.name(), is("CATEGORIZE(CONCAT(first_name, \"abc\"))")); + assertThat(categorize.field(), is(evalFieldAlias.toAttribute())); + assertThat(evalField.source().text(), is("CONCAT(first_name, \"abc\")")); + assertThat(categorizeAlias.source(), is(evalFieldAlias.source())); + } + /** * Expects * Limit[1000[INTEGER]] diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/FoldNullTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/FoldNullTests.java index 89117b5d4e729..ae31576184938 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/FoldNullTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/FoldNullTests.java @@ -28,6 +28,8 @@ import org.elasticsearch.xpack.esql.expression.function.aggregate.Percentile; import org.elasticsearch.xpack.esql.expression.function.aggregate.SpatialCentroid; import org.elasticsearch.xpack.esql.expression.function.aggregate.Sum; +import org.elasticsearch.xpack.esql.expression.function.grouping.Bucket; +import org.elasticsearch.xpack.esql.expression.function.grouping.Categorize; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToString; import org.elasticsearch.xpack.esql.expression.function.scalar.date.DateExtract; import org.elasticsearch.xpack.esql.expression.function.scalar.date.DateFormat; @@ -267,6 +269,17 @@ public void testNullFoldableDoesNotApplyToIsNullAndNotNull() { } } + public void testNullBucketGetsFolded() { + FoldNull foldNull = new FoldNull(); + assertEquals(NULL, foldNull.rule(new Bucket(EMPTY, NULL, NULL, NULL, NULL))); + } + + public void testNullCategorizeGroupingNotFolded() { + FoldNull foldNull = new FoldNull(); + Categorize categorize = new Categorize(EMPTY, NULL); + assertEquals(categorize, foldNull.rule(categorize)); + } + private void assertNullLiteral(Expression expression) { assertEquals(Literal.class, expression.getClass()); assertNull(expression.fold()); diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/categorization/TokenListCategorizer.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/categorization/TokenListCategorizer.java index d0088edcb0805..e4257270ce641 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/categorization/TokenListCategorizer.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/categorization/TokenListCategorizer.java @@ -19,6 +19,7 @@ import org.elasticsearch.search.aggregations.AggregationReduceContext; import org.elasticsearch.search.aggregations.InternalAggregations; import org.elasticsearch.xpack.ml.aggs.categorization.TokenListCategory.TokenAndWeight; +import org.elasticsearch.xpack.ml.job.categorization.CategorizationAnalyzer; import java.io.IOException; import java.nio.charset.StandardCharsets; @@ -83,6 +84,8 @@ public void close() { @Nullable private final CategorizationPartOfSpeechDictionary partOfSpeechDictionary; + private final List categoriesById; + /** * Categories stored in such a way that the most common are accessed first. * This is implemented as an {@link ArrayList} with bespoke ordering rather @@ -108,9 +111,18 @@ public TokenListCategorizer( this.lowerThreshold = threshold; this.upperThreshold = (1.0f + threshold) / 2.0f; this.categoriesByNumMatches = new ArrayList<>(); + this.categoriesById = new ArrayList<>(); cacheRamUsage(0); } + public TokenListCategory computeCategory(String s, CategorizationAnalyzer analyzer) { + try (TokenStream ts = analyzer.tokenStream("text", s)) { + return computeCategory(ts, s.length(), 1); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + public TokenListCategory computeCategory(TokenStream ts, int unfilteredStringLen, long numDocs) throws IOException { assert partOfSpeechDictionary != null : "This version of computeCategory should only be used when a part-of-speech dictionary is available"; @@ -301,6 +313,7 @@ private synchronized TokenListCategory computeCategory( maxUnfilteredStringLen, numDocs ); + categoriesById.add(newCategory); categoriesByNumMatches.add(newCategory); cacheRamUsage(newCategory.ramBytesUsed()); return repositionCategory(newCategory, newIndex); @@ -412,6 +425,17 @@ static float similarity(List left, int leftWeight, List toCategories(int size) { + return categoriesByNumMatches.stream() + .limit(size) + .map(category -> new SerializableTokenListCategory(category, bytesRefHash)) + .toList(); + } + + public List toCategoriesById() { + return categoriesById.stream().map(category -> new SerializableTokenListCategory(category, bytesRefHash)).toList(); + } + public InternalCategorizationAggregation.Bucket[] toOrderedBuckets(int size) { return categoriesByNumMatches.stream() .limit(size) From 31ebc5f33fece5e32a4350c13bcd385ee20aabcc Mon Sep 17 00:00:00 2001 From: Brian Seeders Date: Wed, 27 Nov 2024 13:51:02 -0500 Subject: [PATCH 043/139] Bump versions after 8.15.5 release --- .buildkite/pipelines/periodic-packaging.yml | 6 +++--- .buildkite/pipelines/periodic.yml | 6 +++--- .ci/bwcVersions | 2 +- server/src/main/java/org/elasticsearch/Version.java | 1 + .../main/resources/org/elasticsearch/TransportVersions.csv | 1 + .../resources/org/elasticsearch/index/IndexVersions.csv | 1 + 6 files changed, 10 insertions(+), 7 deletions(-) diff --git a/.buildkite/pipelines/periodic-packaging.yml b/.buildkite/pipelines/periodic-packaging.yml index a49e486176484..c1b10a46c62a7 100644 --- a/.buildkite/pipelines/periodic-packaging.yml +++ b/.buildkite/pipelines/periodic-packaging.yml @@ -273,8 +273,8 @@ steps: env: BWC_VERSION: 8.14.3 - - label: "{{matrix.image}} / 8.15.4 / packaging-tests-upgrade" - command: ./.ci/scripts/packaging-test.sh -Dbwc.checkout.align=true destructiveDistroUpgradeTest.v8.15.4 + - label: "{{matrix.image}} / 8.15.6 / packaging-tests-upgrade" + command: ./.ci/scripts/packaging-test.sh -Dbwc.checkout.align=true destructiveDistroUpgradeTest.v8.15.6 timeout_in_minutes: 300 matrix: setup: @@ -287,7 +287,7 @@ steps: machineType: custom-16-32768 buildDirectory: /dev/shm/bk env: - BWC_VERSION: 8.15.4 + BWC_VERSION: 8.15.6 - label: "{{matrix.image}} / 8.16.2 / packaging-tests-upgrade" command: ./.ci/scripts/packaging-test.sh -Dbwc.checkout.align=true destructiveDistroUpgradeTest.v8.16.2 diff --git a/.buildkite/pipelines/periodic.yml b/.buildkite/pipelines/periodic.yml index aa1db893df8cc..69d11ef1dabb6 100644 --- a/.buildkite/pipelines/periodic.yml +++ b/.buildkite/pipelines/periodic.yml @@ -287,8 +287,8 @@ steps: - signal_reason: agent_stop limit: 3 - - label: 8.15.4 / bwc - command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true v8.15.4#bwcTest + - label: 8.15.6 / bwc + command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true v8.15.6#bwcTest timeout_in_minutes: 300 agents: provider: gcp @@ -297,7 +297,7 @@ steps: buildDirectory: /dev/shm/bk preemptible: true env: - BWC_VERSION: 8.15.4 + BWC_VERSION: 8.15.6 retry: automatic: - exit_status: "-1" diff --git a/.ci/bwcVersions b/.ci/bwcVersions index a8d6dda4fb0c2..826091807ce57 100644 --- a/.ci/bwcVersions +++ b/.ci/bwcVersions @@ -14,7 +14,7 @@ BWC_VERSION: - "8.12.2" - "8.13.4" - "8.14.3" - - "8.15.4" + - "8.15.6" - "8.16.2" - "8.17.0" - "8.18.0" diff --git a/server/src/main/java/org/elasticsearch/Version.java b/server/src/main/java/org/elasticsearch/Version.java index 7b65547a7d591..24aa5bd261d7e 100644 --- a/server/src/main/java/org/elasticsearch/Version.java +++ b/server/src/main/java/org/elasticsearch/Version.java @@ -187,6 +187,7 @@ public class Version implements VersionId, ToXContentFragment { public static final Version V_8_15_2 = new Version(8_15_02_99); public static final Version V_8_15_3 = new Version(8_15_03_99); public static final Version V_8_15_4 = new Version(8_15_04_99); + public static final Version V_8_15_6 = new Version(8_15_06_99); public static final Version V_8_16_0 = new Version(8_16_00_99); public static final Version V_8_16_1 = new Version(8_16_01_99); public static final Version V_8_16_2 = new Version(8_16_02_99); diff --git a/server/src/main/resources/org/elasticsearch/TransportVersions.csv b/server/src/main/resources/org/elasticsearch/TransportVersions.csv index 6191922f13094..faeb7fe848159 100644 --- a/server/src/main/resources/org/elasticsearch/TransportVersions.csv +++ b/server/src/main/resources/org/elasticsearch/TransportVersions.csv @@ -132,5 +132,6 @@ 8.15.2,8702003 8.15.3,8702003 8.15.4,8702003 +8.15.5,8702003 8.16.0,8772001 8.16.1,8772004 diff --git a/server/src/main/resources/org/elasticsearch/index/IndexVersions.csv b/server/src/main/resources/org/elasticsearch/index/IndexVersions.csv index f84d69af727ac..1fc8bd8648ad6 100644 --- a/server/src/main/resources/org/elasticsearch/index/IndexVersions.csv +++ b/server/src/main/resources/org/elasticsearch/index/IndexVersions.csv @@ -132,5 +132,6 @@ 8.15.2,8512000 8.15.3,8512000 8.15.4,8512000 +8.15.5,8512000 8.16.0,8518000 8.16.1,8518000 From 807d994c5b956841546c2ce40eb2cd8ddd6a339d Mon Sep 17 00:00:00 2001 From: Brian Seeders Date: Wed, 27 Nov 2024 13:52:47 -0500 Subject: [PATCH 044/139] Prune changelogs after 8.15.5 release --- docs/changelog/114193.yaml | 5 ----- docs/changelog/114227.yaml | 6 ------ docs/changelog/114268.yaml | 5 ----- docs/changelog/114521.yaml | 5 ----- docs/changelog/114548.yaml | 5 ----- docs/changelog/116277.yaml | 6 ------ docs/changelog/116292.yaml | 5 ----- docs/changelog/116357.yaml | 5 ----- docs/changelog/116382.yaml | 5 ----- docs/changelog/116408.yaml | 6 ------ docs/changelog/116478.yaml | 5 ----- docs/changelog/116650.yaml | 5 ----- docs/changelog/116676.yaml | 5 ----- docs/changelog/116915.yaml | 5 ----- docs/changelog/116918.yaml | 5 ----- docs/changelog/116942.yaml | 5 ----- docs/changelog/116995.yaml | 5 ----- docs/changelog/117182.yaml | 6 ------ 18 files changed, 94 deletions(-) delete mode 100644 docs/changelog/114193.yaml delete mode 100644 docs/changelog/114227.yaml delete mode 100644 docs/changelog/114268.yaml delete mode 100644 docs/changelog/114521.yaml delete mode 100644 docs/changelog/114548.yaml delete mode 100644 docs/changelog/116277.yaml delete mode 100644 docs/changelog/116292.yaml delete mode 100644 docs/changelog/116357.yaml delete mode 100644 docs/changelog/116382.yaml delete mode 100644 docs/changelog/116408.yaml delete mode 100644 docs/changelog/116478.yaml delete mode 100644 docs/changelog/116650.yaml delete mode 100644 docs/changelog/116676.yaml delete mode 100644 docs/changelog/116915.yaml delete mode 100644 docs/changelog/116918.yaml delete mode 100644 docs/changelog/116942.yaml delete mode 100644 docs/changelog/116995.yaml delete mode 100644 docs/changelog/117182.yaml diff --git a/docs/changelog/114193.yaml b/docs/changelog/114193.yaml deleted file mode 100644 index f18f9359007b8..0000000000000 --- a/docs/changelog/114193.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 114193 -summary: Add postal_code support to the City and Enterprise databases -area: Ingest Node -type: enhancement -issues: [] diff --git a/docs/changelog/114227.yaml b/docs/changelog/114227.yaml deleted file mode 100644 index 9b508f07c9e5a..0000000000000 --- a/docs/changelog/114227.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 114227 -summary: Ignore conflicting fields during dynamic mapping update -area: Mapping -type: bug -issues: - - 114228 diff --git a/docs/changelog/114268.yaml b/docs/changelog/114268.yaml deleted file mode 100644 index 5e4457005d7d3..0000000000000 --- a/docs/changelog/114268.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 114268 -summary: Support more maxmind fields in the geoip processor -area: Ingest Node -type: enhancement -issues: [] diff --git a/docs/changelog/114521.yaml b/docs/changelog/114521.yaml deleted file mode 100644 index c3a9c7cdd0848..0000000000000 --- a/docs/changelog/114521.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 114521 -summary: Add support for registered country fields for maxmind geoip databases -area: Ingest Node -type: enhancement -issues: [] diff --git a/docs/changelog/114548.yaml b/docs/changelog/114548.yaml deleted file mode 100644 index b9692bcb2d10c..0000000000000 --- a/docs/changelog/114548.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 114548 -summary: Support IPinfo database configurations -area: Ingest Node -type: enhancement -issues: [] diff --git a/docs/changelog/116277.yaml b/docs/changelog/116277.yaml deleted file mode 100644 index 62262b7797783..0000000000000 --- a/docs/changelog/116277.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 116277 -summary: Update Semantic Query To Handle Zero Size Responses -area: Vector Search -type: bug -issues: - - 116083 diff --git a/docs/changelog/116292.yaml b/docs/changelog/116292.yaml deleted file mode 100644 index f741c67bea155..0000000000000 --- a/docs/changelog/116292.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 116292 -summary: Add missing header in `put_data_lifecycle` rest-api-spec -area: Data streams -type: bug -issues: [] diff --git a/docs/changelog/116357.yaml b/docs/changelog/116357.yaml deleted file mode 100644 index a1a7831eab9ca..0000000000000 --- a/docs/changelog/116357.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 116357 -summary: Add tracking for query rule types -area: Relevance -type: enhancement -issues: [] diff --git a/docs/changelog/116382.yaml b/docs/changelog/116382.yaml deleted file mode 100644 index c941fb6eaa1e4..0000000000000 --- a/docs/changelog/116382.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 116382 -summary: Validate missing shards after the coordinator rewrite -area: Search -type: bug -issues: [] diff --git a/docs/changelog/116408.yaml b/docs/changelog/116408.yaml deleted file mode 100644 index 5f4c8459778a6..0000000000000 --- a/docs/changelog/116408.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 116408 -summary: Propagating nested `inner_hits` to the parent compound retriever -area: Ranking -type: bug -issues: - - 116397 diff --git a/docs/changelog/116478.yaml b/docs/changelog/116478.yaml deleted file mode 100644 index ec50799eb2019..0000000000000 --- a/docs/changelog/116478.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 116478 -summary: Semantic text simple partial update -area: Search -type: bug -issues: [] diff --git a/docs/changelog/116650.yaml b/docs/changelog/116650.yaml deleted file mode 100644 index d314a918aede9..0000000000000 --- a/docs/changelog/116650.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 116650 -summary: Fix bug in ML autoscaling when some node info is unavailable -area: Machine Learning -type: bug -issues: [] diff --git a/docs/changelog/116676.yaml b/docs/changelog/116676.yaml deleted file mode 100644 index 8c6671e177499..0000000000000 --- a/docs/changelog/116676.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 116676 -summary: Fix handling of time exceeded exception in fetch phase -area: Search -type: bug -issues: [] diff --git a/docs/changelog/116915.yaml b/docs/changelog/116915.yaml deleted file mode 100644 index 9686f0023a14a..0000000000000 --- a/docs/changelog/116915.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 116915 -summary: Improve message about insecure S3 settings -area: Snapshot/Restore -type: enhancement -issues: [] diff --git a/docs/changelog/116918.yaml b/docs/changelog/116918.yaml deleted file mode 100644 index 3b04b4ae4a69a..0000000000000 --- a/docs/changelog/116918.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 116918 -summary: Split searchable snapshot into multiple repo operations -area: Snapshot/Restore -type: enhancement -issues: [] diff --git a/docs/changelog/116942.yaml b/docs/changelog/116942.yaml deleted file mode 100644 index 5037e8c59cd85..0000000000000 --- a/docs/changelog/116942.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 116942 -summary: Fix handling of bulk requests with semantic text fields and delete ops -area: Relevance -type: bug -issues: [] diff --git a/docs/changelog/116995.yaml b/docs/changelog/116995.yaml deleted file mode 100644 index a0467c630edf3..0000000000000 --- a/docs/changelog/116995.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 116995 -summary: "Apm-data: disable date_detection for all apm data streams" -area: Data streams -type: enhancement -issues: [] \ No newline at end of file diff --git a/docs/changelog/117182.yaml b/docs/changelog/117182.yaml deleted file mode 100644 index b5398bec1ef30..0000000000000 --- a/docs/changelog/117182.yaml +++ /dev/null @@ -1,6 +0,0 @@ -pr: 117182 -summary: Change synthetic source logic for `constant_keyword` -area: Mapping -type: bug -issues: - - 117083 From a46547c8dcf8b58d822b2e30639fe35e4687883b Mon Sep 17 00:00:00 2001 From: Brian Seeders Date: Wed, 27 Nov 2024 15:26:23 -0500 Subject: [PATCH 045/139] [CI] Pull in the latest mutes from base branch for PRs at runtime (#117587) --- .buildkite/hooks/pre-command | 4 ++++ .buildkite/hooks/pre-command.bat | 3 +++ .buildkite/scripts/get-latest-test-mutes.sh | 20 +++++++++++++++++++ .../internal/test/MutedTestsBuildService.java | 12 ++++++----- 4 files changed, 34 insertions(+), 5 deletions(-) create mode 100755 .buildkite/scripts/get-latest-test-mutes.sh diff --git a/.buildkite/hooks/pre-command b/.buildkite/hooks/pre-command index 0ece129a3c238..f25092bc6d42f 100644 --- a/.buildkite/hooks/pre-command +++ b/.buildkite/hooks/pre-command @@ -47,6 +47,8 @@ export GRADLE_BUILD_CACHE_PASSWORD BUILDKITE_API_TOKEN=$(vault read -field=token secret/ci/elastic-elasticsearch/buildkite-api-token) export BUILDKITE_API_TOKEN +export GH_TOKEN="$VAULT_GITHUB_TOKEN" + if [[ "${USE_LUCENE_SNAPSHOT_CREDS:-}" == "true" ]]; then data=$(.buildkite/scripts/get-legacy-secret.sh aws-elastic/creds/lucene-snapshots) @@ -117,3 +119,5 @@ if [[ -f /etc/os-release ]] && grep -q '"Amazon Linux 2"' /etc/os-release; then echo "$(hostname -i | cut -d' ' -f 2) $(hostname -f)." | sudo tee /etc/dnsmasq.hosts sudo systemctl restart dnsmasq.service fi + +.buildkite/scripts/get-latest-test-mutes.sh diff --git a/.buildkite/hooks/pre-command.bat b/.buildkite/hooks/pre-command.bat index fe7c2371de0e5..752c2bf23eb14 100644 --- a/.buildkite/hooks/pre-command.bat +++ b/.buildkite/hooks/pre-command.bat @@ -15,9 +15,12 @@ set BUILD_NUMBER=%BUILDKITE_BUILD_NUMBER% set COMPOSE_HTTP_TIMEOUT=120 set JOB_BRANCH=%BUILDKITE_BRANCH% +set GH_TOKEN=%VAULT_GITHUB_TOKEN% + set GRADLE_BUILD_CACHE_USERNAME=vault read -field=username secret/ci/elastic-elasticsearch/migrated/gradle-build-cache set GRADLE_BUILD_CACHE_PASSWORD=vault read -field=password secret/ci/elastic-elasticsearch/migrated/gradle-build-cache bash.exe -c "nohup bash .buildkite/scripts/setup-monitoring.sh /dev/null 2>&1 &" +bash.exe -c "bash .buildkite/scripts/get-latest-test-mutes.sh" exit /b 0 diff --git a/.buildkite/scripts/get-latest-test-mutes.sh b/.buildkite/scripts/get-latest-test-mutes.sh new file mode 100755 index 0000000000000..5721e29f1b773 --- /dev/null +++ b/.buildkite/scripts/get-latest-test-mutes.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +if [[ ! "${BUILDKITE_PULL_REQUEST:-}" || "${BUILDKITE_AGENT_META_DATA_PROVIDER:-}" == "k8s" ]]; then + exit 0 +fi + +testMuteBranch="${BUILDKITE_PULL_REQUEST_BASE_BRANCH:-main}" +testMuteFile="$(mktemp)" + +# If this PR contains changes to muted-tests.yml, we disable this functionality +# Otherwise, we wouldn't be able to test unmutes +if [[ ! $(gh pr diff "$BUILDKITE_PULL_REQUEST" --name-only | grep 'muted-tests.yml') ]]; then + gh api -H 'Accept: application/vnd.github.v3.raw' "repos/elastic/elasticsearch/contents/muted-tests.yml?ref=$testMuteBranch" > "$testMuteFile" + + if [[ -s "$testMuteFile" ]]; then + mkdir -p ~/.gradle + # This is using gradle.properties instead of an env var so that it's easily compatible with the Windows pre-command hook + echo "org.gradle.project.org.elasticsearch.additional.muted.tests=$testMuteFile" >> ~/.gradle/gradle.properties + fi +fi diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/test/MutedTestsBuildService.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/test/MutedTestsBuildService.java index 1dfa3bbb29aa2..df3d1c9b70a94 100644 --- a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/test/MutedTestsBuildService.java +++ b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/test/MutedTestsBuildService.java @@ -28,10 +28,12 @@ import java.io.UncheckedIOException; import java.util.ArrayList; import java.util.Collections; +import java.util.LinkedHashSet; import java.util.List; +import java.util.Set; public abstract class MutedTestsBuildService implements BuildService { - private final List excludePatterns = new ArrayList<>(); + private final Set excludePatterns = new LinkedHashSet<>(); private final ObjectMapper objectMapper = new ObjectMapper(new YAMLFactory()); public MutedTestsBuildService() { @@ -43,23 +45,23 @@ public MutedTestsBuildService() { } } - public List getExcludePatterns() { + public Set getExcludePatterns() { return excludePatterns; } - private List buildExcludePatterns(File file) { + private Set buildExcludePatterns(File file) { List mutedTests; try (InputStream is = new BufferedInputStream(new FileInputStream(file))) { mutedTests = objectMapper.readValue(is, MutedTests.class).getTests(); if (mutedTests == null) { - return Collections.emptyList(); + return Collections.emptySet(); } } catch (IOException e) { throw new UncheckedIOException(e); } - List excludes = new ArrayList<>(); + Set excludes = new LinkedHashSet<>(); if (mutedTests.isEmpty() == false) { for (MutedTestsBuildService.MutedTest mutedTest : mutedTests) { if (mutedTest.getClassName() != null && mutedTest.getMethods().isEmpty() == false) { From 7a98e31f9db4e7155eecc3563284640ea8b5dbf1 Mon Sep 17 00:00:00 2001 From: Brendan Cully Date: Wed, 27 Nov 2024 12:30:02 -0800 Subject: [PATCH 046/139] Make VerifyingIndexInput public (#117518) This way we can verify store files as we read them directly, without going through a store abstraction we may not have if we copy lucene files around. --- server/src/main/java/org/elasticsearch/index/store/Store.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/store/Store.java b/server/src/main/java/org/elasticsearch/index/store/Store.java index 887fe486b6003..e6b499c07f189 100644 --- a/server/src/main/java/org/elasticsearch/index/store/Store.java +++ b/server/src/main/java/org/elasticsearch/index/store/Store.java @@ -1217,14 +1217,14 @@ public static String digestToString(long digest) { * mechanism that is used in some repository plugins (S3 for example). However, the checksum is only calculated on * the first read. All consecutive reads of the same data are not used to calculate the checksum. */ - static class VerifyingIndexInput extends ChecksumIndexInput { + public static class VerifyingIndexInput extends ChecksumIndexInput { private final IndexInput input; private final Checksum digest; private final long checksumPosition; private final byte[] checksum = new byte[8]; private long verifiedPosition = 0; - VerifyingIndexInput(IndexInput input) { + public VerifyingIndexInput(IndexInput input) { this(input, new BufferedChecksum(new CRC32())); } From e33e1a03da31c88e4fa7bbaa074fa33ecd4c68ab Mon Sep 17 00:00:00 2001 From: Michael Peterson Date: Wed, 27 Nov 2024 16:14:57 -0500 Subject: [PATCH 047/139] ESQL: async search responses have CCS metadata while searches are running (#117265) ES|QL async search responses now include CCS metadata while the query is still running. The CCS metadata will be present only if a remote cluster is queried and the user requested it with the `include_ccs_metadata: true` setting on the original request to `POST /_query/async`. The setting cannot be modified in the query to `GET /_query/async/:id`. The core change is that the EsqlExecutionInfo object is set on the EsqlQueryTask, which is used for async ES|QL queries, so that calls to `GET /_query/async/:id` have access to the same EsqlExecutionInfo object that is being updated as the planning and query progress. Secondly, the overall `took` time is now always present on ES|QL responses, even for async-searches while the query is still running. The took time shows a "took-so-far" value and will change upon refresh until the query has finished. This is present regardless of the `include_ccs_metadata` setting. Example response showing in progress state of the query: ``` GET _query/async/FlhaeTBxUU0yU2xhVzM2TlRLY3F1eXcceWlSWWZlRDhUVTJEUGFfZUROaDdtUTo0MDQwNA ``` ```json { "id": "FlhaeTBxUU0yU2xhVzM2TlRLY3F1eXcceWlSWWZlRDhUVTJEUGFfZUROaDdtUTo0MDQwNA==", "is_running": true, "took": 2032, "columns": [], "values": [], "_clusters": { "total": 3, "successful": 1, "running": 2, "skipped": 0, "partial": 0, "failed": 0, "details": { "(local)": { "status": "running", "indices": "web_traffic", "_shards": { "total": 2, "skipped": 0 } }, "remote1": { "status": "running", "indices": "web_traffic" }, "remote2": { "status": "successful", "indices": "web_traffic", "took": 180, "_shards": { "total": 2, "successful": 2, "skipped": 0, "failed": 0 } } } } } ``` --- docs/changelog/117265.yaml | 5 + .../esql/action/CrossClusterAsyncQueryIT.java | 522 ++++++++++++++++++ .../esql/action/CrossClustersQueryIT.java | 9 +- .../xpack/esql/action/EsqlExecutionInfo.java | 13 +- .../xpack/esql/action/EsqlQueryResponse.java | 7 +- .../xpack/esql/action/EsqlQueryTask.java | 13 +- .../xpack/esql/plugin/ComputeListener.java | 29 +- .../xpack/esql/plugin/ComputeService.java | 26 +- .../esql/plugin/TransportEsqlQueryAction.java | 23 +- .../xpack/esql/session/EsqlSession.java | 1 + .../esql/action/EsqlQueryResponseTests.java | 3 +- .../esql/plugin/ComputeListenerTests.java | 16 +- 12 files changed, 634 insertions(+), 33 deletions(-) create mode 100644 docs/changelog/117265.yaml create mode 100644 x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/CrossClusterAsyncQueryIT.java diff --git a/docs/changelog/117265.yaml b/docs/changelog/117265.yaml new file mode 100644 index 0000000000000..ec6605155538d --- /dev/null +++ b/docs/changelog/117265.yaml @@ -0,0 +1,5 @@ +pr: 117265 +summary: Async search responses have CCS metadata while searches are running +area: ES|QL +type: enhancement +issues: [] diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/CrossClusterAsyncQueryIT.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/CrossClusterAsyncQueryIT.java new file mode 100644 index 0000000000000..440582dcfbb45 --- /dev/null +++ b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/CrossClusterAsyncQueryIT.java @@ -0,0 +1,522 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.action; + +import org.elasticsearch.ElasticsearchTimeoutException; +import org.elasticsearch.action.bulk.BulkRequestBuilder; +import org.elasticsearch.action.index.IndexRequest; +import org.elasticsearch.action.support.WriteRequest; +import org.elasticsearch.action.support.master.AcknowledgedResponse; +import org.elasticsearch.client.internal.Client; +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.settings.Setting; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.compute.operator.exchange.ExchangeService; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.core.Tuple; +import org.elasticsearch.index.mapper.OnScriptError; +import org.elasticsearch.index.query.QueryBuilder; +import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.plugins.ScriptPlugin; +import org.elasticsearch.script.LongFieldScript; +import org.elasticsearch.script.ScriptContext; +import org.elasticsearch.script.ScriptEngine; +import org.elasticsearch.search.lookup.SearchLookup; +import org.elasticsearch.test.AbstractMultiClustersTestCase; +import org.elasticsearch.test.XContentTestUtils; +import org.elasticsearch.transport.RemoteClusterAware; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xcontent.json.JsonXContent; +import org.elasticsearch.xpack.core.async.DeleteAsyncResultRequest; +import org.elasticsearch.xpack.core.async.GetAsyncResultRequest; +import org.elasticsearch.xpack.core.async.TransportDeleteAsyncResultAction; +import org.elasticsearch.xpack.esql.plugin.EsqlPlugin; +import org.junit.Before; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicReference; + +import static org.elasticsearch.core.TimeValue.timeValueMillis; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThanOrEqualTo; +import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.lessThanOrEqualTo; +import static org.hamcrest.Matchers.not; + +public class CrossClusterAsyncQueryIT extends AbstractMultiClustersTestCase { + + private static final String REMOTE_CLUSTER_1 = "cluster-a"; + private static final String REMOTE_CLUSTER_2 = "remote-b"; + private static String LOCAL_INDEX = "logs-1"; + private static String REMOTE_INDEX = "logs-2"; + private static final String INDEX_WITH_RUNTIME_MAPPING = "blocking"; + + @Override + protected Collection remoteClusterAlias() { + return List.of(REMOTE_CLUSTER_1, REMOTE_CLUSTER_2); + } + + @Override + protected Map skipUnavailableForRemoteClusters() { + return Map.of(REMOTE_CLUSTER_1, randomBoolean(), REMOTE_CLUSTER_2, randomBoolean()); + } + + @Override + protected Collection> nodePlugins(String clusterAlias) { + List> plugins = new ArrayList<>(super.nodePlugins(clusterAlias)); + plugins.add(EsqlPlugin.class); + plugins.add(EsqlAsyncActionIT.LocalStateEsqlAsync.class); // allows the async_search DELETE action + plugins.add(InternalExchangePlugin.class); + plugins.add(PauseFieldPlugin.class); + return plugins; + } + + public static class InternalExchangePlugin extends Plugin { + @Override + public List> getSettings() { + return List.of( + Setting.timeSetting( + ExchangeService.INACTIVE_SINKS_INTERVAL_SETTING, + TimeValue.timeValueSeconds(30), + Setting.Property.NodeScope + ) + ); + } + } + + @Before + public void resetPlugin() { + PauseFieldPlugin.allowEmitting = new CountDownLatch(1); + PauseFieldPlugin.startEmitting = new CountDownLatch(1); + } + + public static class PauseFieldPlugin extends Plugin implements ScriptPlugin { + public static CountDownLatch startEmitting = new CountDownLatch(1); + public static CountDownLatch allowEmitting = new CountDownLatch(1); + + @Override + public ScriptEngine getScriptEngine(Settings settings, Collection> contexts) { + return new ScriptEngine() { + @Override + + public String getType() { + return "pause"; + } + + @Override + @SuppressWarnings("unchecked") + public FactoryType compile( + String name, + String code, + ScriptContext context, + Map params + ) { + if (context == LongFieldScript.CONTEXT) { + return (FactoryType) new LongFieldScript.Factory() { + @Override + public LongFieldScript.LeafFactory newFactory( + String fieldName, + Map params, + SearchLookup searchLookup, + OnScriptError onScriptError + ) { + return ctx -> new LongFieldScript(fieldName, params, searchLookup, onScriptError, ctx) { + @Override + public void execute() { + startEmitting.countDown(); + try { + assertTrue(allowEmitting.await(30, TimeUnit.SECONDS)); + } catch (InterruptedException e) { + throw new AssertionError(e); + } + emit(1); + } + }; + } + }; + } + throw new IllegalStateException("unsupported type " + context); + } + + @Override + public Set> getSupportedContexts() { + return Set.of(LongFieldScript.CONTEXT); + } + }; + } + } + + /** + * Includes testing for CCS metadata in the GET /_query/async/:id response while the search is still running + */ + public void testSuccessfulPathways() throws Exception { + Map testClusterInfo = setupClusters(3); + int localNumShards = (Integer) testClusterInfo.get("local.num_shards"); + int remote1NumShards = (Integer) testClusterInfo.get("remote1.num_shards"); + int remote2NumShards = (Integer) testClusterInfo.get("remote2.blocking_index.num_shards"); + + Tuple includeCCSMetadata = randomIncludeCCSMetadata(); + Boolean requestIncludeMeta = includeCCSMetadata.v1(); + boolean responseExpectMeta = includeCCSMetadata.v2(); + + AtomicReference asyncExecutionId = new AtomicReference<>(); + + String q = "FROM logs-*,cluster-a:logs-*,remote-b:blocking | STATS total=sum(const) | LIMIT 10"; + try (EsqlQueryResponse resp = runAsyncQuery(q, requestIncludeMeta, null, TimeValue.timeValueMillis(100))) { + assertTrue(resp.isRunning()); + assertNotNull("async execution id is null", resp.asyncExecutionId()); + asyncExecutionId.set(resp.asyncExecutionId().get()); + // executionInfo may or may not be set on the initial response when there is a relatively low wait_for_completion_timeout + // so we do not check for it here + } + + // wait until we know that the query against 'remote-b:blocking' has started + PauseFieldPlugin.startEmitting.await(30, TimeUnit.SECONDS); + + // wait until the query of 'cluster-a:logs-*' has finished (it is not blocked since we are not searching the 'blocking' index on it) + assertBusy(() -> { + try (EsqlQueryResponse asyncResponse = getAsyncResponse(asyncExecutionId.get())) { + EsqlExecutionInfo executionInfo = asyncResponse.getExecutionInfo(); + assertNotNull(executionInfo); + EsqlExecutionInfo.Cluster clusterA = executionInfo.getCluster("cluster-a"); + assertThat(clusterA.getStatus(), not(equalTo(EsqlExecutionInfo.Cluster.Status.RUNNING))); + } + }); + + /* at this point: + * the query against cluster-a should be finished + * the query against remote-b should be running (blocked on the PauseFieldPlugin.allowEmitting CountDown) + * the query against the local cluster should be running because it has a STATS clause that needs to wait on remote-b + */ + try (EsqlQueryResponse asyncResponse = getAsyncResponse(asyncExecutionId.get())) { + EsqlExecutionInfo executionInfo = asyncResponse.getExecutionInfo(); + assertThat(asyncResponse.isRunning(), is(true)); + assertThat( + executionInfo.clusterAliases(), + equalTo(Set.of(REMOTE_CLUSTER_1, REMOTE_CLUSTER_2, RemoteClusterAware.LOCAL_CLUSTER_GROUP_KEY)) + ); + assertThat(executionInfo.getClusterStateCount(EsqlExecutionInfo.Cluster.Status.RUNNING), equalTo(2)); + assertThat(executionInfo.getClusterStateCount(EsqlExecutionInfo.Cluster.Status.SUCCESSFUL), equalTo(1)); + + EsqlExecutionInfo.Cluster clusterA = executionInfo.getCluster(REMOTE_CLUSTER_1); + assertThat(clusterA.getStatus(), equalTo(EsqlExecutionInfo.Cluster.Status.SUCCESSFUL)); + assertThat(clusterA.getTotalShards(), greaterThanOrEqualTo(1)); + assertThat(clusterA.getSuccessfulShards(), equalTo(clusterA.getTotalShards())); + assertThat(clusterA.getSkippedShards(), equalTo(0)); + assertThat(clusterA.getFailedShards(), equalTo(0)); + assertThat(clusterA.getFailures().size(), equalTo(0)); + assertThat(clusterA.getTook().millis(), greaterThanOrEqualTo(0L)); + + EsqlExecutionInfo.Cluster local = executionInfo.getCluster(RemoteClusterAware.LOCAL_CLUSTER_GROUP_KEY); + // should still be RUNNING since the local cluster has to do a STATS on the coordinator, waiting on remoteB + assertThat(local.getStatus(), equalTo(EsqlExecutionInfo.Cluster.Status.RUNNING)); + assertThat(clusterA.getTotalShards(), greaterThanOrEqualTo(1)); + + EsqlExecutionInfo.Cluster remoteB = executionInfo.getCluster(REMOTE_CLUSTER_2); + // should still be RUNNING since we haven't released the countdown lock to proceed + assertThat(remoteB.getStatus(), equalTo(EsqlExecutionInfo.Cluster.Status.RUNNING)); + assertNull(remoteB.getSuccessfulShards()); // should not be filled in until query is finished + + assertClusterMetadataInResponse(asyncResponse, responseExpectMeta, 3); + } + + // allow remoteB query to proceed + PauseFieldPlugin.allowEmitting.countDown(); + + // wait until both remoteB and local queries have finished + assertBusy(() -> { + try (EsqlQueryResponse asyncResponse = getAsyncResponse(asyncExecutionId.get())) { + EsqlExecutionInfo executionInfo = asyncResponse.getExecutionInfo(); + assertNotNull(executionInfo); + EsqlExecutionInfo.Cluster remoteB = executionInfo.getCluster(REMOTE_CLUSTER_2); + assertThat(remoteB.getStatus(), not(equalTo(EsqlExecutionInfo.Cluster.Status.RUNNING))); + EsqlExecutionInfo.Cluster local = executionInfo.getCluster(RemoteClusterAware.LOCAL_CLUSTER_GROUP_KEY); + assertThat(local.getStatus(), not(equalTo(EsqlExecutionInfo.Cluster.Status.RUNNING))); + assertThat(asyncResponse.isRunning(), is(false)); + } + }); + + try (EsqlQueryResponse asyncResponse = getAsyncResponse(asyncExecutionId.get())) { + EsqlExecutionInfo executionInfo = asyncResponse.getExecutionInfo(); + assertNotNull(executionInfo); + assertThat(executionInfo.overallTook().millis(), greaterThanOrEqualTo(1L)); + + EsqlExecutionInfo.Cluster clusterA = executionInfo.getCluster(REMOTE_CLUSTER_1); + assertThat(clusterA.getStatus(), equalTo(EsqlExecutionInfo.Cluster.Status.SUCCESSFUL)); + assertThat(clusterA.getTook().millis(), greaterThanOrEqualTo(0L)); + assertThat(clusterA.getTotalShards(), equalTo(remote1NumShards)); + assertThat(clusterA.getSuccessfulShards(), equalTo(remote1NumShards)); + assertThat(clusterA.getSkippedShards(), equalTo(0)); + assertThat(clusterA.getFailedShards(), equalTo(0)); + assertThat(clusterA.getFailures().size(), equalTo(0)); + + EsqlExecutionInfo.Cluster remoteB = executionInfo.getCluster(REMOTE_CLUSTER_2); + assertThat(remoteB.getTook().millis(), greaterThanOrEqualTo(0L)); + assertThat(remoteB.getStatus(), equalTo(EsqlExecutionInfo.Cluster.Status.SUCCESSFUL)); + assertThat(remoteB.getTotalShards(), equalTo(remote2NumShards)); + assertThat(remoteB.getSuccessfulShards(), equalTo(remote2NumShards)); + assertThat(remoteB.getSkippedShards(), equalTo(0)); + assertThat(remoteB.getFailedShards(), equalTo(0)); + assertThat(remoteB.getFailures().size(), equalTo(0)); + + EsqlExecutionInfo.Cluster local = executionInfo.getCluster(RemoteClusterAware.LOCAL_CLUSTER_GROUP_KEY); + assertThat(local.getTook().millis(), greaterThanOrEqualTo(0L)); + assertThat(local.getStatus(), equalTo(EsqlExecutionInfo.Cluster.Status.SUCCESSFUL)); + assertThat(local.getTotalShards(), equalTo(localNumShards)); + assertThat(local.getSuccessfulShards(), equalTo(localNumShards)); + assertThat(local.getSkippedShards(), equalTo(0)); + assertThat(local.getFailedShards(), equalTo(0)); + assertThat(local.getFailures().size(), equalTo(0)); + } finally { + AcknowledgedResponse acknowledgedResponse = deleteAsyncId(asyncExecutionId.get()); + assertThat(acknowledgedResponse.isAcknowledged(), is(true)); + } + } + + public void testAsyncQueriesWithLimit0() throws IOException { + setupClusters(3); + Tuple includeCCSMetadata = randomIncludeCCSMetadata(); + Boolean requestIncludeMeta = includeCCSMetadata.v1(); + boolean responseExpectMeta = includeCCSMetadata.v2(); + + final TimeValue waitForCompletion = TimeValue.timeValueNanos(randomFrom(1L, Long.MAX_VALUE)); + String asyncExecutionId = null; + try (EsqlQueryResponse resp = runAsyncQuery("FROM logs*,*:logs* | LIMIT 0", requestIncludeMeta, null, waitForCompletion)) { + EsqlExecutionInfo executionInfo = resp.getExecutionInfo(); + if (resp.isRunning()) { + asyncExecutionId = resp.asyncExecutionId().get(); + assertThat(resp.columns().size(), equalTo(0)); + assertThat(resp.values().hasNext(), is(false)); // values should be empty list + + } else { + assertThat(resp.columns().size(), equalTo(4)); + assertThat(resp.columns().contains(new ColumnInfoImpl("const", "long")), is(true)); + assertThat(resp.columns().contains(new ColumnInfoImpl("id", "keyword")), is(true)); + assertThat(resp.columns().contains(new ColumnInfoImpl("tag", "keyword")), is(true)); + assertThat(resp.columns().contains(new ColumnInfoImpl("v", "long")), is(true)); + assertThat(resp.values().hasNext(), is(false)); // values should be empty list + + assertNotNull(executionInfo); + assertThat(executionInfo.isCrossClusterSearch(), is(true)); + long overallTookMillis = executionInfo.overallTook().millis(); + assertThat(overallTookMillis, greaterThanOrEqualTo(0L)); + assertThat(executionInfo.includeCCSMetadata(), equalTo(responseExpectMeta)); + assertThat(executionInfo.clusterAliases(), equalTo(Set.of(LOCAL_CLUSTER, REMOTE_CLUSTER_1, REMOTE_CLUSTER_2))); + + EsqlExecutionInfo.Cluster remoteCluster = executionInfo.getCluster(REMOTE_CLUSTER_1); + assertThat(remoteCluster.getIndexExpression(), equalTo("logs*")); + assertThat(remoteCluster.getStatus(), equalTo(EsqlExecutionInfo.Cluster.Status.SUCCESSFUL)); + assertThat(remoteCluster.getTook().millis(), greaterThanOrEqualTo(0L)); + assertThat(remoteCluster.getTook().millis(), lessThanOrEqualTo(overallTookMillis)); + assertThat(remoteCluster.getTotalShards(), equalTo(0)); + assertThat(remoteCluster.getSuccessfulShards(), equalTo(0)); + assertThat(remoteCluster.getSkippedShards(), equalTo(0)); + assertThat(remoteCluster.getFailedShards(), equalTo(0)); + + EsqlExecutionInfo.Cluster remote2Cluster = executionInfo.getCluster(REMOTE_CLUSTER_1); + assertThat(remote2Cluster.getIndexExpression(), equalTo("logs*")); + assertThat(remote2Cluster.getStatus(), equalTo(EsqlExecutionInfo.Cluster.Status.SUCCESSFUL)); + assertThat(remote2Cluster.getTook().millis(), greaterThanOrEqualTo(0L)); + assertThat(remote2Cluster.getTook().millis(), lessThanOrEqualTo(overallTookMillis)); + assertThat(remote2Cluster.getTotalShards(), equalTo(0)); + assertThat(remote2Cluster.getSuccessfulShards(), equalTo(0)); + assertThat(remote2Cluster.getSkippedShards(), equalTo(0)); + assertThat(remote2Cluster.getFailedShards(), equalTo(0)); + + EsqlExecutionInfo.Cluster localCluster = executionInfo.getCluster(LOCAL_CLUSTER); + assertThat(localCluster.getIndexExpression(), equalTo("logs*")); + assertThat(localCluster.getStatus(), equalTo(EsqlExecutionInfo.Cluster.Status.SUCCESSFUL)); + assertThat(localCluster.getTook().millis(), greaterThanOrEqualTo(0L)); + assertThat(localCluster.getTook().millis(), lessThanOrEqualTo(overallTookMillis)); + assertThat(remote2Cluster.getTotalShards(), equalTo(0)); + assertThat(remote2Cluster.getSuccessfulShards(), equalTo(0)); + assertThat(remote2Cluster.getSkippedShards(), equalTo(0)); + assertThat(remote2Cluster.getFailedShards(), equalTo(0)); + + assertClusterMetadataInResponse(resp, responseExpectMeta, 3); + } + } finally { + if (asyncExecutionId != null) { + AcknowledgedResponse acknowledgedResponse = deleteAsyncId(asyncExecutionId); + assertThat(acknowledgedResponse.isAcknowledged(), is(true)); + } + } + } + + protected EsqlQueryResponse runAsyncQuery(String query, Boolean ccsMetadata, QueryBuilder filter, TimeValue waitCompletionTime) { + EsqlQueryRequest request = EsqlQueryRequest.asyncEsqlQueryRequest(); + request.query(query); + request.pragmas(AbstractEsqlIntegTestCase.randomPragmas()); + request.profile(randomInt(5) == 2); + request.columnar(randomBoolean()); + if (ccsMetadata != null) { + request.includeCCSMetadata(ccsMetadata); + } + request.waitForCompletionTimeout(waitCompletionTime); + request.keepOnCompletion(false); + if (filter != null) { + request.filter(filter); + } + return runAsyncQuery(request); + } + + protected EsqlQueryResponse runAsyncQuery(EsqlQueryRequest request) { + try { + return client(LOCAL_CLUSTER).execute(EsqlQueryAction.INSTANCE, request).actionGet(30, TimeUnit.SECONDS); + } catch (ElasticsearchTimeoutException e) { + throw new AssertionError("timeout waiting for query response", e); + } + } + + AcknowledgedResponse deleteAsyncId(String id) { + try { + DeleteAsyncResultRequest request = new DeleteAsyncResultRequest(id); + return client().execute(TransportDeleteAsyncResultAction.TYPE, request).actionGet(30, TimeUnit.SECONDS); + } catch (ElasticsearchTimeoutException e) { + throw new AssertionError("timeout waiting for DELETE response", e); + } + } + + EsqlQueryResponse getAsyncResponse(String id) { + try { + var getResultsRequest = new GetAsyncResultRequest(id).setWaitForCompletionTimeout(timeValueMillis(1)); + return client().execute(EsqlAsyncGetResultAction.INSTANCE, getResultsRequest).actionGet(30, TimeUnit.SECONDS); + } catch (ElasticsearchTimeoutException e) { + throw new AssertionError("timeout waiting for GET async result", e); + } + } + + private static void assertClusterMetadataInResponse(EsqlQueryResponse resp, boolean responseExpectMeta, int numClusters) { + try { + final Map esqlResponseAsMap = XContentTestUtils.convertToMap(resp); + final Object clusters = esqlResponseAsMap.get("_clusters"); + if (responseExpectMeta) { + assertNotNull(clusters); + // test a few entries to ensure it looks correct (other tests do a full analysis of the metadata in the response) + @SuppressWarnings("unchecked") + Map inner = (Map) clusters; + assertTrue(inner.containsKey("total")); + assertThat((int) inner.get("total"), equalTo(numClusters)); + assertTrue(inner.containsKey("details")); + } else { + assertNull(clusters); + } + } catch (IOException e) { + fail("Could not convert ESQLQueryResponse to Map: " + e); + } + } + + /** + * v1: value to send to runQuery (can be null; null means use default value) + * v2: whether to expect CCS Metadata in the response (cannot be null) + * @return + */ + public static Tuple randomIncludeCCSMetadata() { + return switch (randomIntBetween(1, 3)) { + case 1 -> new Tuple<>(Boolean.TRUE, Boolean.TRUE); + case 2 -> new Tuple<>(Boolean.FALSE, Boolean.FALSE); + case 3 -> new Tuple<>(null, Boolean.FALSE); + default -> throw new AssertionError("should not get here"); + }; + } + + Map setupClusters(int numClusters) throws IOException { + assert numClusters == 2 || numClusters == 3 : "2 or 3 clusters supported not: " + numClusters; + int numShardsLocal = randomIntBetween(1, 5); + populateLocalIndices(LOCAL_INDEX, numShardsLocal); + + int numShardsRemote = randomIntBetween(1, 5); + populateRemoteIndices(REMOTE_CLUSTER_1, REMOTE_INDEX, numShardsRemote); + + Map clusterInfo = new HashMap<>(); + clusterInfo.put("local.num_shards", numShardsLocal); + clusterInfo.put("local.index", LOCAL_INDEX); + clusterInfo.put("remote1.num_shards", numShardsRemote); + clusterInfo.put("remote1.index", REMOTE_INDEX); + + if (numClusters == 3) { + int numShardsRemote2 = randomIntBetween(1, 5); + populateRemoteIndices(REMOTE_CLUSTER_2, REMOTE_INDEX, numShardsRemote2); + populateRemoteIndicesWithRuntimeMapping(REMOTE_CLUSTER_2); + clusterInfo.put("remote2.index", REMOTE_INDEX); + clusterInfo.put("remote2.num_shards", numShardsRemote2); + clusterInfo.put("remote2.blocking_index", INDEX_WITH_RUNTIME_MAPPING); + clusterInfo.put("remote2.blocking_index.num_shards", 1); + } + + String skipUnavailableKey = Strings.format("cluster.remote.%s.skip_unavailable", REMOTE_CLUSTER_1); + Setting skipUnavailableSetting = cluster(REMOTE_CLUSTER_1).clusterService().getClusterSettings().get(skipUnavailableKey); + boolean skipUnavailable = (boolean) cluster(RemoteClusterAware.LOCAL_CLUSTER_GROUP_KEY).clusterService() + .getClusterSettings() + .get(skipUnavailableSetting); + clusterInfo.put("remote.skip_unavailable", skipUnavailable); + + return clusterInfo; + } + + void populateLocalIndices(String indexName, int numShards) { + Client localClient = client(LOCAL_CLUSTER); + assertAcked( + localClient.admin() + .indices() + .prepareCreate(indexName) + .setSettings(Settings.builder().put("index.number_of_shards", numShards)) + .setMapping("id", "type=keyword", "tag", "type=keyword", "v", "type=long", "const", "type=long") + ); + for (int i = 0; i < 10; i++) { + localClient.prepareIndex(indexName).setSource("id", "local-" + i, "tag", "local", "v", i).get(); + } + localClient.admin().indices().prepareRefresh(indexName).get(); + } + + void populateRemoteIndicesWithRuntimeMapping(String clusterAlias) throws IOException { + XContentBuilder mapping = JsonXContent.contentBuilder().startObject(); + mapping.startObject("runtime"); + { + mapping.startObject("const"); + { + mapping.field("type", "long"); + mapping.startObject("script").field("source", "").field("lang", "pause").endObject(); + } + mapping.endObject(); + } + mapping.endObject(); + mapping.endObject(); + client(clusterAlias).admin().indices().prepareCreate(INDEX_WITH_RUNTIME_MAPPING).setMapping(mapping).get(); + BulkRequestBuilder bulk = client(clusterAlias).prepareBulk(INDEX_WITH_RUNTIME_MAPPING) + .setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE); + for (int i = 0; i < 10; i++) { + bulk.add(new IndexRequest().source("foo", i)); + } + bulk.get(); + } + + void populateRemoteIndices(String clusterAlias, String indexName, int numShards) throws IOException { + Client remoteClient = client(clusterAlias); + assertAcked( + remoteClient.admin() + .indices() + .prepareCreate(indexName) + .setSettings(Settings.builder().put("index.number_of_shards", numShards)) + .setMapping("id", "type=keyword", "tag", "type=keyword", "v", "type=long") + ); + for (int i = 0; i < 10; i++) { + remoteClient.prepareIndex(indexName).setSource("id", "remote-" + i, "tag", "remote", "v", i * i).get(); + } + remoteClient.admin().indices().prepareRefresh(indexName).get(); + } +} diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/CrossClustersQueryIT.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/CrossClustersQueryIT.java index 6801e1f4eb404..596c70e57ccd6 100644 --- a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/CrossClustersQueryIT.java +++ b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/CrossClustersQueryIT.java @@ -61,6 +61,10 @@ public class CrossClustersQueryIT extends AbstractMultiClustersTestCase { private static final String REMOTE_CLUSTER_1 = "cluster-a"; private static final String REMOTE_CLUSTER_2 = "remote-b"; + private static String LOCAL_INDEX = "logs-1"; + private static String IDX_ALIAS = "alias1"; + private static String FILTERED_IDX_ALIAS = "alias-filtered-1"; + private static String REMOTE_INDEX = "logs-2"; @Override protected Collection remoteClusterAlias() { @@ -1278,11 +1282,6 @@ Map setupTwoClusters() { return setupClusters(2); } - private static String LOCAL_INDEX = "logs-1"; - private static String IDX_ALIAS = "alias1"; - private static String FILTERED_IDX_ALIAS = "alias-filtered-1"; - private static String REMOTE_INDEX = "logs-2"; - Map setupClusters(int numClusters) { assert numClusters == 2 || numClusters == 3 : "2 or 3 clusters supported not: " + numClusters; int numShardsLocal = randomIntBetween(1, 5); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlExecutionInfo.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlExecutionInfo.java index 80bb2afe57122..ba7a7e8266845 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlExecutionInfo.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlExecutionInfo.java @@ -169,6 +169,17 @@ public TimeValue overallTook() { return overallTook; } + /** + * How much time the query took since starting. + */ + public TimeValue tookSoFar() { + if (relativeStartNanos == null) { + return new TimeValue(0); + } else { + return new TimeValue(System.nanoTime() - relativeStartNanos, TimeUnit.NANOSECONDS); + } + } + public Set clusterAliases() { return clusterInfo.keySet(); } @@ -478,7 +489,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws { builder.field(STATUS_FIELD.getPreferredName(), getStatus().toString()); builder.field(INDICES_FIELD.getPreferredName(), indexExpression); - if (took != null) { + if (took != null && status != Status.RUNNING) { builder.field(TOOK.getPreferredName(), took.millis()); } if (totalShards != null) { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlQueryResponse.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlQueryResponse.java index 4e59d5419fe6f..77aed298baea5 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlQueryResponse.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlQueryResponse.java @@ -196,8 +196,11 @@ public Iterator toXContentChunked(ToXContent.Params params } b.field("is_running", isRunning); } - if (executionInfo != null && executionInfo.overallTook() != null) { - b.field("took", executionInfo.overallTook().millis()); + if (executionInfo != null) { + long tookInMillis = executionInfo.overallTook() == null + ? executionInfo.tookSoFar().millis() + : executionInfo.overallTook().millis(); + b.field("took", tookInMillis); } if (dropNullColumns) { b.append(ResponseXContentUtils.allColumns(columns, "all_columns")) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlQueryTask.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlQueryTask.java index b12cf4eb354bf..f896a25317102 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlQueryTask.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlQueryTask.java @@ -17,6 +17,8 @@ public class EsqlQueryTask extends StoredAsyncTask { + private EsqlExecutionInfo executionInfo; + public EsqlQueryTask( long id, String type, @@ -29,10 +31,19 @@ public EsqlQueryTask( TimeValue keepAlive ) { super(id, type, action, description, parentTaskId, headers, originHeaders, asyncExecutionId, keepAlive); + this.executionInfo = null; + } + + public void setExecutionInfo(EsqlExecutionInfo executionInfo) { + this.executionInfo = executionInfo; + } + + public EsqlExecutionInfo executionInfo() { + return executionInfo; } @Override public EsqlQueryResponse getCurrentResult() { - return new EsqlQueryResponse(List.of(), List.of(), null, false, getExecutionId().getEncoded(), true, true, null); + return new EsqlQueryResponse(List.of(), List.of(), null, false, getExecutionId().getEncoded(), true, true, executionInfo); } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/ComputeListener.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/ComputeListener.java index 49af4a593e6e5..8d041ffbdf0e4 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/ComputeListener.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/ComputeListener.java @@ -112,6 +112,7 @@ private ComputeListener( if (runningOnRemoteCluster()) { // for remote executions - this ComputeResponse is created on the remote cluster/node and will be serialized and // received by the acquireCompute method callback on the coordinating cluster + setFinalStatusAndShardCounts(clusterAlias, executionInfo); EsqlExecutionInfo.Cluster cluster = esqlExecutionInfo.getCluster(clusterAlias); result = new ComputeResponse( collectedProfiles.isEmpty() ? List.of() : collectedProfiles.stream().toList(), @@ -126,19 +127,33 @@ private ComputeListener( if (coordinatingClusterIsSearchedInCCS()) { // if not already marked as SKIPPED, mark the local cluster as finished once the coordinator and all // data nodes have finished processing - executionInfo.swapCluster(RemoteClusterAware.LOCAL_CLUSTER_GROUP_KEY, (k, v) -> { - if (v.getStatus() != EsqlExecutionInfo.Cluster.Status.SKIPPED) { - return new EsqlExecutionInfo.Cluster.Builder(v).setStatus(EsqlExecutionInfo.Cluster.Status.SUCCESSFUL).build(); - } else { - return v; - } - }); + setFinalStatusAndShardCounts(RemoteClusterAware.LOCAL_CLUSTER_GROUP_KEY, executionInfo); } } delegate.onResponse(result); }, e -> delegate.onFailure(failureCollector.getFailure()))); } + private static void setFinalStatusAndShardCounts(String clusterAlias, EsqlExecutionInfo executionInfo) { + executionInfo.swapCluster(clusterAlias, (k, v) -> { + // TODO: once PARTIAL status is supported (partial results work to come), modify this code as needed + if (v.getStatus() != EsqlExecutionInfo.Cluster.Status.SKIPPED) { + assert v.getTotalShards() != null && v.getSkippedShards() != null : "Null total or skipped shard count: " + v; + return new EsqlExecutionInfo.Cluster.Builder(v).setStatus(EsqlExecutionInfo.Cluster.Status.SUCCESSFUL) + /* + * Total and skipped shard counts are set early in execution (after can-match). + * Until ES|QL supports shard-level partial results, we just set all non-skipped shards + * as successful and none are failed. + */ + .setSuccessfulShards(v.getTotalShards()) + .setFailedShards(0) + .build(); + } else { + return v; + } + }); + } + /** * @return true if the "local" querying/coordinator cluster is being searched in a cross-cluster search */ diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/ComputeService.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/ComputeService.java index 6a0d1bf9bb035..73266551f169c 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/ComputeService.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/ComputeService.java @@ -178,6 +178,7 @@ public void execute( null ); String local = RemoteClusterAware.LOCAL_CLUSTER_GROUP_KEY; + updateShardCountForCoordinatorOnlyQuery(execInfo); try (var computeListener = ComputeListener.create(local, transportService, rootTask, execInfo, listener.map(r -> { updateExecutionInfoAfterCoordinatorOnlyQuery(execInfo); return new Result(physicalPlan.output(), collectedPages, r.getProfiles(), execInfo); @@ -260,6 +261,22 @@ public void execute( } } + // For queries like: FROM logs* | LIMIT 0 (including cross-cluster LIMIT 0 queries) + private static void updateShardCountForCoordinatorOnlyQuery(EsqlExecutionInfo execInfo) { + if (execInfo.isCrossClusterSearch()) { + for (String clusterAlias : execInfo.clusterAliases()) { + execInfo.swapCluster( + clusterAlias, + (k, v) -> new EsqlExecutionInfo.Cluster.Builder(v).setTotalShards(0) + .setSuccessfulShards(0) + .setSkippedShards(0) + .setFailedShards(0) + .build() + ); + } + } + } + // For queries like: FROM logs* | LIMIT 0 (including cross-cluster LIMIT 0 queries) private static void updateExecutionInfoAfterCoordinatorOnlyQuery(EsqlExecutionInfo execInfo) { execInfo.markEndQuery(); // TODO: revisit this time recording model as part of INLINESTATS improvements @@ -267,11 +284,7 @@ private static void updateExecutionInfoAfterCoordinatorOnlyQuery(EsqlExecutionIn assert execInfo.planningTookTime() != null : "Planning took time should be set on EsqlExecutionInfo but is null"; for (String clusterAlias : execInfo.clusterAliases()) { execInfo.swapCluster(clusterAlias, (k, v) -> { - var builder = new EsqlExecutionInfo.Cluster.Builder(v).setTook(execInfo.overallTook()) - .setTotalShards(0) - .setSuccessfulShards(0) - .setSkippedShards(0) - .setFailedShards(0); + var builder = new EsqlExecutionInfo.Cluster.Builder(v).setTook(execInfo.overallTook()); if (v.getStatus() == EsqlExecutionInfo.Cluster.Status.RUNNING) { builder.setStatus(EsqlExecutionInfo.Cluster.Status.SUCCESSFUL); } @@ -324,9 +337,8 @@ private void startComputeOnDataNodes( executionInfo.swapCluster( clusterAlias, (k, v) -> new EsqlExecutionInfo.Cluster.Builder(v).setTotalShards(dataNodeResult.totalShards()) - .setSuccessfulShards(dataNodeResult.totalShards()) + // do not set successful or failed shard count here - do it when search is done .setSkippedShards(dataNodeResult.skippedShards()) - .setFailedShards(0) .build() ); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/TransportEsqlQueryAction.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/TransportEsqlQueryAction.java index fdc6e06a11032..76bfb95d07926 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/TransportEsqlQueryAction.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/TransportEsqlQueryAction.java @@ -151,6 +151,8 @@ private void doExecuteForked(Task task, EsqlQueryRequest request, ActionListener @Override public void execute(EsqlQueryRequest request, EsqlQueryTask task, ActionListener listener) { + // set EsqlExecutionInfo on async-search task so that it is accessible to GET _query/async while the query is still running + task.setExecutionInfo(createEsqlExecutionInfo(request)); ActionListener.run(listener, l -> innerExecute(task, request, l)); } @@ -170,10 +172,9 @@ private void innerExecute(Task task, EsqlQueryRequest request, ActionListener remoteClusterService.isSkipUnavailable(clusterAlias), - request.includeCCSMetadata() - ); + // async-query uses EsqlQueryTask, so pull the EsqlExecutionInfo out of the task + // sync query uses CancellableTask which does not have EsqlExecutionInfo, so create one + EsqlExecutionInfo executionInfo = getOrCreateExecutionInfo(task, request); PlanRunner planRunner = (plan, resultListener) -> computeService.execute( sessionId, (CancellableTask) task, @@ -194,6 +195,18 @@ private void innerExecute(Task task, EsqlQueryRequest request, ActionListener remoteClusterService.isSkipUnavailable(clusterAlias), request.includeCCSMetadata()); + } + private EsqlQueryResponse toResponse(Task task, EsqlQueryRequest request, Configuration configuration, Result result) { List columns = result.schema().stream().map(c -> new ColumnInfoImpl(c.name(), c.dataType().outputType())).toList(); EsqlQueryResponse.Profile profile = configuration.profile() ? new EsqlQueryResponse.Profile(result.profiles()) : null; @@ -269,7 +282,7 @@ public EsqlQueryResponse initialResponse(EsqlQueryTask task) { asyncExecutionId, true, // is_running true, // isAsync - null + task.executionInfo() ); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java index 8f65914d1c30d..021596c31f65d 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java @@ -147,6 +147,7 @@ public String sessionId() { * Execute an ESQL request. */ public void execute(EsqlQueryRequest request, EsqlExecutionInfo executionInfo, PlanRunner planRunner, ActionListener listener) { + assert executionInfo != null : "Null EsqlExecutionInfo"; LOGGER.debug("ESQL query:\n{}", request.query()); analyzedPlan( parse(request.query(), request.params()), diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/action/EsqlQueryResponseTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/action/EsqlQueryResponseTests.java index 35364089127cc..f7b402b909732 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/action/EsqlQueryResponseTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/action/EsqlQueryResponseTests.java @@ -519,14 +519,15 @@ static EsqlQueryResponse fromXContent(XContentParser parser) { } public void testChunkResponseSizeColumnar() { - int sizeClusterDetails = 14; try (EsqlQueryResponse resp = randomResponse(true, null)) { + int sizeClusterDetails = 14; int columnCount = resp.pages().get(0).getBlockCount(); int bodySize = resp.pages().stream().mapToInt(p -> p.getPositionCount() * p.getBlockCount()).sum() + columnCount * 2; assertChunkCount(resp, r -> 5 + sizeClusterDetails + bodySize); } try (EsqlQueryResponse resp = randomResponseAsync(true, null, true)) { + int sizeClusterDetails = resp.isRunning() ? 13 : 14; // overall took time not present when is_running=true int columnCount = resp.pages().get(0).getBlockCount(); int bodySize = resp.pages().stream().mapToInt(p -> p.getPositionCount() * p.getBlockCount()).sum() + columnCount * 2; assertChunkCount(resp, r -> 7 + sizeClusterDetails + bodySize); // is_running diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plugin/ComputeListenerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plugin/ComputeListenerTests.java index 625cb5628d039..b606f99df437c 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plugin/ComputeListenerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plugin/ComputeListenerTests.java @@ -353,10 +353,7 @@ public void testAcquireComputeRunningOnRemoteClusterFillsInTookTime() { assertThat(response.getTook().millis(), greaterThanOrEqualTo(0L)); assertThat(executionInfo.getCluster(remoteAlias).getTook().millis(), greaterThanOrEqualTo(0L)); assertThat(executionInfo.getCluster(remoteAlias).getTook(), equalTo(response.getTook())); - - // the status in the (remote) executionInfo will still be RUNNING, since the SUCCESSFUL status gets set on the querying - // cluster executionInfo in the acquireCompute CCS listener, NOT present in this test - see testCollectComputeResultsInCCSListener - assertThat(executionInfo.getCluster(remoteAlias).getStatus(), equalTo(EsqlExecutionInfo.Cluster.Status.RUNNING)); + assertThat(executionInfo.getCluster(remoteAlias).getStatus(), equalTo(EsqlExecutionInfo.Cluster.Status.SUCCESSFUL)); Mockito.verifyNoInteractions(transportService.getTaskManager()); } @@ -376,6 +373,17 @@ public void testAcquireComputeRunningOnQueryingClusterFillsInTookTime() { // fully filled in for cross-cluster searches executionInfo.swapCluster(localCluster, (k, v) -> new EsqlExecutionInfo.Cluster(localCluster, "logs*", false)); executionInfo.swapCluster("my_remote", (k, v) -> new EsqlExecutionInfo.Cluster("my_remote", "my_remote:logs*", false)); + + // before acquire-compute, can-match (SearchShards) runs filling in total shards and skipped shards, so simulate that here + executionInfo.swapCluster( + localCluster, + (k, v) -> new EsqlExecutionInfo.Cluster.Builder(v).setTotalShards(10).setSkippedShards(1).build() + ); + executionInfo.swapCluster( + "my_remote", + (k, v) -> new EsqlExecutionInfo.Cluster.Builder(v).setTotalShards(10).setSkippedShards(1).build() + ); + try ( ComputeListener computeListener = ComputeListener.create( // whereRunning=localCluster simulates running on the querying cluster From c2e4afcfd584fe35aa88a9b9840cf5ff4c3c80b6 Mon Sep 17 00:00:00 2001 From: Nhat Nguyen Date: Wed, 27 Nov 2024 13:23:20 -0800 Subject: [PATCH 048/139] Try to finish remote sink once (#117592) Currently, we have three clients fetching pages by default, each with its own lifecycle. This can result in scenarios where more than one request is sent to complete the remote sink. While this does not cause correctness issues, it is inefficient, especially for cross-cluster requests. This change tracks the status of the remote sink and tries to send only one finish request per remote sink. --- .../operator/exchange/ExchangeService.java | 28 +++++++++++++++++++ .../exchange/ExchangeServiceTests.java | 9 ++++++ 2 files changed, 37 insertions(+) diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/exchange/ExchangeService.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/exchange/ExchangeService.java index d633270b5c595..a943a90d02e87 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/exchange/ExchangeService.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/exchange/ExchangeService.java @@ -42,6 +42,7 @@ import java.util.Map; import java.util.Set; import java.util.concurrent.Executor; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicLong; /** @@ -292,6 +293,7 @@ static final class TransportRemoteSink implements RemoteSink { final Executor responseExecutor; final AtomicLong estimatedPageSizeInBytes = new AtomicLong(0L); + final AtomicBoolean finished = new AtomicBoolean(false); TransportRemoteSink( TransportService transportService, @@ -311,6 +313,32 @@ static final class TransportRemoteSink implements RemoteSink { @Override public void fetchPageAsync(boolean allSourcesFinished, ActionListener listener) { + if (allSourcesFinished) { + if (finished.compareAndSet(false, true)) { + doFetchPageAsync(true, listener); + } else { + // already finished or promised + listener.onResponse(new ExchangeResponse(blockFactory, null, true)); + } + } else { + // already finished + if (finished.get()) { + listener.onResponse(new ExchangeResponse(blockFactory, null, true)); + return; + } + doFetchPageAsync(false, ActionListener.wrap(r -> { + if (r.finished()) { + finished.set(true); + } + listener.onResponse(r); + }, e -> { + finished.set(true); + listener.onFailure(e); + })); + } + } + + private void doFetchPageAsync(boolean allSourcesFinished, ActionListener listener) { final long reservedBytes = allSourcesFinished ? 0 : estimatedPageSizeInBytes.get(); if (reservedBytes > 0) { // This doesn't fully protect ESQL from OOM, but reduces the likelihood. diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/exchange/ExchangeServiceTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/exchange/ExchangeServiceTests.java index 8949f61b7420d..4178f02898d79 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/exchange/ExchangeServiceTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/exchange/ExchangeServiceTests.java @@ -449,6 +449,15 @@ public void testConcurrentWithTransportActions() { ExchangeService exchange1 = new ExchangeService(Settings.EMPTY, threadPool, ESQL_TEST_EXECUTOR, blockFactory()); exchange1.registerTransportHandler(node1); AbstractSimpleTransportTestCase.connectToNode(node0, node1.getLocalNode()); + Set finishingRequests = ConcurrentCollections.newConcurrentSet(); + node1.addRequestHandlingBehavior(ExchangeService.EXCHANGE_ACTION_NAME, (handler, request, channel, task) -> { + final ExchangeRequest exchangeRequest = (ExchangeRequest) request; + if (exchangeRequest.sourcesFinished()) { + String exchangeId = exchangeRequest.exchangeId(); + assertTrue("tried to finish [" + exchangeId + "] twice", finishingRequests.add(exchangeId)); + } + handler.messageReceived(request, channel, task); + }); try (exchange0; exchange1; node0; node1) { String exchangeId = "exchange"; From 656b5f94804a9efe9329041a933e92075400f592 Mon Sep 17 00:00:00 2001 From: Jack Conradson Date: Wed, 27 Nov 2024 14:31:30 -0800 Subject: [PATCH 049/139] Refactor PluginsLoader to better support tests (#117522) This refactors the way PluginsLoader is created to better support various types of testing. --- .../script/ScriptScoreBenchmark.java | 2 +- .../bootstrap/Elasticsearch.java | 2 +- .../elasticsearch/plugins/PluginsLoader.java | 71 ++++++++++++------- .../plugins/PluginsServiceTests.java | 12 ++-- .../plugins/MockPluginsService.java | 13 ++-- .../bench/WatcherScheduleEngineBenchmark.java | 5 +- 6 files changed, 61 insertions(+), 44 deletions(-) diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/script/ScriptScoreBenchmark.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/script/ScriptScoreBenchmark.java index d44586ef4901a..b44f04c3a26a4 100644 --- a/benchmarks/src/main/java/org/elasticsearch/benchmark/script/ScriptScoreBenchmark.java +++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/script/ScriptScoreBenchmark.java @@ -77,7 +77,7 @@ public class ScriptScoreBenchmark { private final PluginsService pluginsService = new PluginsService( Settings.EMPTY, null, - new PluginsLoader(null, Path.of(System.getProperty("plugins.dir"))) + PluginsLoader.createPluginsLoader(null, Path.of(System.getProperty("plugins.dir"))) ); private final ScriptModule scriptModule = new ScriptModule(Settings.EMPTY, pluginsService.filterPlugins(ScriptPlugin.class).toList()); diff --git a/server/src/main/java/org/elasticsearch/bootstrap/Elasticsearch.java b/server/src/main/java/org/elasticsearch/bootstrap/Elasticsearch.java index b7774259bf289..c06ea9305aef8 100644 --- a/server/src/main/java/org/elasticsearch/bootstrap/Elasticsearch.java +++ b/server/src/main/java/org/elasticsearch/bootstrap/Elasticsearch.java @@ -206,7 +206,7 @@ private static void initPhase2(Bootstrap bootstrap) throws IOException { ); // load the plugin Java modules and layers now for use in entitlements - var pluginsLoader = new PluginsLoader(nodeEnv.modulesFile(), nodeEnv.pluginsFile()); + var pluginsLoader = PluginsLoader.createPluginsLoader(nodeEnv.modulesFile(), nodeEnv.pluginsFile()); bootstrap.setPluginsLoader(pluginsLoader); if (Boolean.parseBoolean(System.getProperty("es.entitlements.enabled"))) { diff --git a/server/src/main/java/org/elasticsearch/plugins/PluginsLoader.java b/server/src/main/java/org/elasticsearch/plugins/PluginsLoader.java index 6b3eda6c0c9b4..aa21e5c64d903 100644 --- a/server/src/main/java/org/elasticsearch/plugins/PluginsLoader.java +++ b/server/src/main/java/org/elasticsearch/plugins/PluginsLoader.java @@ -118,15 +118,30 @@ public static LayerAndLoader ofLoader(ClassLoader loader) { * @param modulesDirectory The directory modules exist in, or null if modules should not be loaded from the filesystem * @param pluginsDirectory The directory plugins exist in, or null if plugins should not be loaded from the filesystem */ - @SuppressWarnings("this-escape") - public PluginsLoader(Path modulesDirectory, Path pluginsDirectory) { + public static PluginsLoader createPluginsLoader(Path modulesDirectory, Path pluginsDirectory) { + return createPluginsLoader(modulesDirectory, pluginsDirectory, true); + } - Map> qualifiedExports = new HashMap<>(ModuleQualifiedExportsService.getBootServices()); - addServerExportsService(qualifiedExports); + /** + * Constructs a new PluginsLoader + * + * @param modulesDirectory The directory modules exist in, or null if modules should not be loaded from the filesystem + * @param pluginsDirectory The directory plugins exist in, or null if plugins should not be loaded from the filesystem + * @param withServerExports {@code true} to add server module exports + */ + public static PluginsLoader createPluginsLoader(Path modulesDirectory, Path pluginsDirectory, boolean withServerExports) { + Map> qualifiedExports; + if (withServerExports) { + qualifiedExports = new HashMap<>(ModuleQualifiedExportsService.getBootServices()); + addServerExportsService(qualifiedExports); + } else { + qualifiedExports = Collections.emptyMap(); + } Set seenBundles = new LinkedHashSet<>(); // load (elasticsearch) module layers + List moduleDescriptors; if (modulesDirectory != null) { try { Set modules = PluginsUtils.getModuleBundles(modulesDirectory); @@ -140,6 +155,7 @@ public PluginsLoader(Path modulesDirectory, Path pluginsDirectory) { } // load plugin layers + List pluginDescriptors; if (pluginsDirectory != null) { try { // TODO: remove this leniency, but tests bogusly rely on it @@ -158,7 +174,28 @@ public PluginsLoader(Path modulesDirectory, Path pluginsDirectory) { pluginDescriptors = Collections.emptyList(); } - this.loadedPluginLayers = Collections.unmodifiableMap(loadPluginLayers(seenBundles, qualifiedExports)); + Map loadedPluginLayers = new LinkedHashMap<>(); + Map> transitiveUrls = new HashMap<>(); + List sortedBundles = PluginsUtils.sortBundles(seenBundles); + if (sortedBundles.isEmpty() == false) { + Set systemLoaderURLs = JarHell.parseModulesAndClassPath(); + for (PluginBundle bundle : sortedBundles) { + PluginsUtils.checkBundleJarHell(systemLoaderURLs, bundle, transitiveUrls); + loadPluginLayer(bundle, loadedPluginLayers, qualifiedExports); + } + } + + return new PluginsLoader(moduleDescriptors, pluginDescriptors, loadedPluginLayers); + } + + PluginsLoader( + List moduleDescriptors, + List pluginDescriptors, + Map loadedPluginLayers + ) { + this.moduleDescriptors = moduleDescriptors; + this.pluginDescriptors = pluginDescriptors; + this.loadedPluginLayers = loadedPluginLayers; } public List moduleDescriptors() { @@ -173,25 +210,7 @@ public Stream pluginLayers() { return loadedPluginLayers.values().stream().map(Function.identity()); } - private Map loadPluginLayers( - Set bundles, - Map> qualifiedExports - ) { - Map loaded = new LinkedHashMap<>(); - Map> transitiveUrls = new HashMap<>(); - List sortedBundles = PluginsUtils.sortBundles(bundles); - if (sortedBundles.isEmpty() == false) { - Set systemLoaderURLs = JarHell.parseModulesAndClassPath(); - for (PluginBundle bundle : sortedBundles) { - PluginsUtils.checkBundleJarHell(systemLoaderURLs, bundle, transitiveUrls); - loadPluginLayer(bundle, loaded, qualifiedExports); - } - } - - return loaded; - } - - private void loadPluginLayer( + private static void loadPluginLayer( PluginBundle bundle, Map loaded, Map> qualifiedExports @@ -211,7 +230,7 @@ private void loadPluginLayer( } final ClassLoader parentLoader = ExtendedPluginsClassLoader.create( - getClass().getClassLoader(), + PluginsLoader.class.getClassLoader(), extendedPlugins.stream().map(LoadedPluginLayer::spiClassLoader).toList() ); LayerAndLoader spiLayerAndLoader = null; @@ -427,7 +446,7 @@ private static List parentLayersOrBoot(List parentLaye } } - protected void addServerExportsService(Map> qualifiedExports) { + private static void addServerExportsService(Map> qualifiedExports) { var exportsService = new ModuleQualifiedExportsService(serverModule) { @Override protected void addExports(String pkg, Module target) { diff --git a/server/src/test/java/org/elasticsearch/plugins/PluginsServiceTests.java b/server/src/test/java/org/elasticsearch/plugins/PluginsServiceTests.java index 015bc72747bf2..79d8f98c7dca6 100644 --- a/server/src/test/java/org/elasticsearch/plugins/PluginsServiceTests.java +++ b/server/src/test/java/org/elasticsearch/plugins/PluginsServiceTests.java @@ -18,7 +18,6 @@ import org.elasticsearch.env.Environment; import org.elasticsearch.env.TestEnvironment; import org.elasticsearch.index.IndexModule; -import org.elasticsearch.jdk.ModuleQualifiedExportsService; import org.elasticsearch.plugin.analysis.CharFilterFactory; import org.elasticsearch.plugins.scanners.PluginInfo; import org.elasticsearch.plugins.spi.BarPlugin; @@ -66,12 +65,11 @@ public class PluginsServiceTests extends ESTestCase { public static class FilterablePlugin extends Plugin implements ScriptPlugin {} static PluginsService newPluginsService(Settings settings) { - return new PluginsService(settings, null, new PluginsLoader(null, TestEnvironment.newEnvironment(settings).pluginsFile()) { - @Override - protected void addServerExportsService(Map> qualifiedExports) { - // tests don't run modular - } - }); + return new PluginsService( + settings, + null, + PluginsLoader.createPluginsLoader(null, TestEnvironment.newEnvironment(settings).pluginsFile(), false) + ); } static PluginsService newMockPluginsService(List> classpathPlugins) { diff --git a/test/framework/src/main/java/org/elasticsearch/plugins/MockPluginsService.java b/test/framework/src/main/java/org/elasticsearch/plugins/MockPluginsService.java index 9e96396493bdf..a9a825af3b865 100644 --- a/test/framework/src/main/java/org/elasticsearch/plugins/MockPluginsService.java +++ b/test/framework/src/main/java/org/elasticsearch/plugins/MockPluginsService.java @@ -16,7 +16,6 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.util.concurrent.ConcurrentCollections; import org.elasticsearch.env.Environment; -import org.elasticsearch.jdk.ModuleQualifiedExportsService; import org.elasticsearch.plugins.spi.SPIClassIterator; import java.lang.reflect.Constructor; @@ -43,13 +42,11 @@ public class MockPluginsService extends PluginsService { * @param classpathPlugins Plugins that exist in the classpath which should be loaded */ public MockPluginsService(Settings settings, Environment environment, Collection> classpathPlugins) { - super(settings, environment.configFile(), new PluginsLoader(environment.modulesFile(), environment.pluginsFile()) { - - @Override - protected void addServerExportsService(Map> qualifiedExports) { - // tests don't run modular - } - }); + super( + settings, + environment.configFile(), + new PluginsLoader(Collections.emptyList(), Collections.emptyList(), Collections.emptyMap()) + ); List pluginsLoaded = new ArrayList<>(); diff --git a/x-pack/plugin/watcher/src/test/java/org/elasticsearch/xpack/watcher/test/bench/WatcherScheduleEngineBenchmark.java b/x-pack/plugin/watcher/src/test/java/org/elasticsearch/xpack/watcher/test/bench/WatcherScheduleEngineBenchmark.java index 99fb626ad9474..59dc1db88e991 100644 --- a/x-pack/plugin/watcher/src/test/java/org/elasticsearch/xpack/watcher/test/bench/WatcherScheduleEngineBenchmark.java +++ b/x-pack/plugin/watcher/src/test/java/org/elasticsearch/xpack/watcher/test/bench/WatcherScheduleEngineBenchmark.java @@ -109,7 +109,10 @@ public static void main(String[] args) throws Exception { // First clean everything and index the watcher (but not via put alert api!) try ( - Node node = new Node(internalNodeEnv, new PluginsLoader(internalNodeEnv.modulesFile(), internalNodeEnv.pluginsFile())).start() + Node node = new Node( + internalNodeEnv, + PluginsLoader.createPluginsLoader(internalNodeEnv.modulesFile(), internalNodeEnv.pluginsFile()) + ).start() ) { final Client client = node.client(); ClusterHealthResponse response = client.admin().cluster().prepareHealth(TimeValue.THIRTY_SECONDS).setWaitForNodes("2").get(); From 77626d686b62fc85ce91d65cfff8adf631f84bcd Mon Sep 17 00:00:00 2001 From: Nhat Nguyen Date: Wed, 27 Nov 2024 16:45:22 -0800 Subject: [PATCH 050/139] Unmute FieldExtractorIT (#117669) Fixed in #117529 Closes #117524 Closes #117531 --- muted-tests.yml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/muted-tests.yml b/muted-tests.yml index 8b12bd2dd3365..5cf16fdf3da0a 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -214,14 +214,8 @@ tests: - class: org.elasticsearch.xpack.test.rest.XPackRestIT method: test {p0=transform/transforms_reset/Test reset running transform} issue: https://github.com/elastic/elasticsearch/issues/117473 -- class: org.elasticsearch.xpack.esql.qa.multi_node.FieldExtractorIT - method: testConstantKeywordField - issue: https://github.com/elastic/elasticsearch/issues/117524 - class: org.elasticsearch.repositories.s3.RepositoryS3EcsClientYamlTestSuiteIT issue: https://github.com/elastic/elasticsearch/issues/117525 -- class: org.elasticsearch.xpack.esql.qa.mixed.FieldExtractorIT - method: testConstantKeywordField - issue: https://github.com/elastic/elasticsearch/issues/117531 - class: org.elasticsearch.backwards.MixedClusterClientYamlTestSuiteIT method: test {p0=synonyms/90_synonyms_reloading_for_synset/Reload analyzers for specific synonym set} issue: https://github.com/elastic/elasticsearch/issues/116777 From bb93f1f3ce8f1460e48a4b86d3b0fee72b4fa4b1 Mon Sep 17 00:00:00 2001 From: Michael Peterson Date: Wed, 27 Nov 2024 21:14:19 -0500 Subject: [PATCH 051/139] Adjusted testChunkResponseSizeColumnar to always expected the overall took time in the async response (#117673) --- .../xpack/esql/action/EsqlQueryResponseTests.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/action/EsqlQueryResponseTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/action/EsqlQueryResponseTests.java index f7b402b909732..35364089127cc 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/action/EsqlQueryResponseTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/action/EsqlQueryResponseTests.java @@ -519,15 +519,14 @@ static EsqlQueryResponse fromXContent(XContentParser parser) { } public void testChunkResponseSizeColumnar() { + int sizeClusterDetails = 14; try (EsqlQueryResponse resp = randomResponse(true, null)) { - int sizeClusterDetails = 14; int columnCount = resp.pages().get(0).getBlockCount(); int bodySize = resp.pages().stream().mapToInt(p -> p.getPositionCount() * p.getBlockCount()).sum() + columnCount * 2; assertChunkCount(resp, r -> 5 + sizeClusterDetails + bodySize); } try (EsqlQueryResponse resp = randomResponseAsync(true, null, true)) { - int sizeClusterDetails = resp.isRunning() ? 13 : 14; // overall took time not present when is_running=true int columnCount = resp.pages().get(0).getBlockCount(); int bodySize = resp.pages().stream().mapToInt(p -> p.getPositionCount() * p.getBlockCount()).sum() + columnCount * 2; assertChunkCount(resp, r -> 7 + sizeClusterDetails + bodySize); // is_running From c3ac2bd58a5c406982212def72580cc25e89761a Mon Sep 17 00:00:00 2001 From: Liam Thompson <32779855+leemthompo@users.noreply.github.com> Date: Thu, 28 Nov 2024 08:23:28 +0100 Subject: [PATCH 052/139] [DOCS] Add Elastic Rerank usage docs (#117625) --- .../inference/service-elasticsearch.asciidoc | 41 +++++++-- .../reranking/semantic-reranking.asciidoc | 20 +++-- docs/reference/search/retriever.asciidoc | 83 +++++++++++++++++-- 3 files changed, 121 insertions(+), 23 deletions(-) diff --git a/docs/reference/inference/service-elasticsearch.asciidoc b/docs/reference/inference/service-elasticsearch.asciidoc index 0103b425faefe..cd06e6d7b2f64 100644 --- a/docs/reference/inference/service-elasticsearch.asciidoc +++ b/docs/reference/inference/service-elasticsearch.asciidoc @@ -69,15 +69,15 @@ include::inference-shared.asciidoc[tag=service-settings] These settings are specific to the `elasticsearch` service. -- -`adaptive_allocations`::: -(Optional, object) -include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=adaptive-allocation] - `deployment_id`::: (Optional, string) The `deployment_id` of an existing trained model deployment. When `deployment_id` is used the `model_id` is optional. +`adaptive_allocations`::: +(Optional, object) +include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=adaptive-allocation] + `enabled`:::: (Optional, Boolean) include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=adaptive-allocation-enabled] @@ -119,7 +119,6 @@ include::inference-shared.asciidoc[tag=task-settings] Returns the document instead of only the index. Defaults to `true`. ===== - [discrete] [[inference-example-elasticsearch-elser]] ==== ELSER via the `elasticsearch` service @@ -137,7 +136,7 @@ PUT _inference/sparse_embedding/my-elser-model "adaptive_allocations": { <1> "enabled": true, "min_number_of_allocations": 1, - "max_number_of_allocations": 10 + "max_number_of_allocations": 4 }, "num_threads": 1, "model_id": ".elser_model_2" <2> @@ -150,6 +149,34 @@ PUT _inference/sparse_embedding/my-elser-model Valid values are `.elser_model_2` and `.elser_model_2_linux-x86_64`. For further details, refer to the {ml-docs}/ml-nlp-elser.html[ELSER model documentation]. +[discrete] +[[inference-example-elastic-reranker]] +==== Elastic Rerank via the `elasticsearch` service + +The following example shows how to create an {infer} endpoint called `my-elastic-rerank` to perform a `rerank` task type using the built-in Elastic Rerank cross-encoder model. + +The API request below will automatically download the Elastic Rerank model if it isn't already downloaded and then deploy the model. +Once deployed, the model can be used for semantic re-ranking with a <>. + +[source,console] +------------------------------------------------------------ +PUT _inference/rerank/my-elastic-rerank +{ + "service": "elasticsearch", + "service_settings": { + "model_id": ".rerank-v1", <1> + "num_threads": 1, + "adaptive_allocations": { <2> + "enabled": true, + "min_number_of_allocations": 1, + "max_number_of_allocations": 4 + } + } +} +------------------------------------------------------------ +// TEST[skip:TBD] +<1> The `model_id` must be the ID of the built-in Elastic Rerank model: `.rerank-v1`. +<2> {ml-docs}/ml-nlp-auto-scale.html#nlp-model-adaptive-allocations[Adaptive allocations] will be enabled with the minimum of 1 and the maximum of 10 allocations. [discrete] [[inference-example-elasticsearch]] @@ -186,7 +213,7 @@ If using the Python client, you can set the `timeout` parameter to a higher valu [discrete] [[inference-example-eland]] -==== Models uploaded by Eland via the elasticsearch service +==== Models uploaded by Eland via the `elasticsearch` service The following example shows how to create an {infer} endpoint called `my-msmarco-minilm-model` to perform a `text_embedding` task type. diff --git a/docs/reference/reranking/semantic-reranking.asciidoc b/docs/reference/reranking/semantic-reranking.asciidoc index 4ebe90e44708e..e1e2abd224a8e 100644 --- a/docs/reference/reranking/semantic-reranking.asciidoc +++ b/docs/reference/reranking/semantic-reranking.asciidoc @@ -85,14 +85,16 @@ In {es}, semantic re-rankers are implemented using the {es} <> using the `rerank` task type -** Integrate directly with the <> using the `rerank` task type -** Upload a model to {es} from Hugging Face with {eland-docs}/machine-learning.html#ml-nlp-pytorch[Eland]. You'll need to use the `text_similarity` NLP task type when loading the model using Eland. Refer to {ml-docs}/ml-nlp-model-ref.html#ml-nlp-model-ref-text-similarity[the Elastic NLP model reference] for a list of third party text similarity models supported by {es} for semantic re-ranking. -*** Then set up an <> with the `rerank` task type -. *Create a `rerank` task using the <>*. +. *Select and configure a re-ranking model*. +You have the following options: +.. Use the <> cross-encoder model via the inference API's {es} service. +.. Use the <> to create a `rerank` endpoint. +.. Use the <> to create a `rerank` endpoint. +.. Upload a model to {es} from Hugging Face with {eland-docs}/machine-learning.html#ml-nlp-pytorch[Eland]. You'll need to use the `text_similarity` NLP task type when loading the model using Eland. Then set up an <> with the `rerank` endpoint type. ++ +Refer to {ml-docs}/ml-nlp-model-ref.html#ml-nlp-model-ref-text-similarity[the Elastic NLP model reference] for a list of third party text similarity models supported by {es} for semantic re-ranking. + +. *Create a `rerank` endpoint using the <>*. The Inference API creates an inference endpoint and configures your chosen machine learning model to perform the re-ranking task. . *Define a `text_similarity_reranker` retriever in your search request*. The retriever syntax makes it simple to configure both the retrieval and re-ranking of search results in a single API call. @@ -117,7 +119,7 @@ POST _search } }, "field": "text", - "inference_id": "my-cohere-rerank-model", + "inference_id": "elastic-rerank", "inference_text": "How often does the moon hide the sun?", "rank_window_size": 100, "min_score": 0.5 diff --git a/docs/reference/search/retriever.asciidoc b/docs/reference/search/retriever.asciidoc index 86a81f1d155d2..b90b7e312c790 100644 --- a/docs/reference/search/retriever.asciidoc +++ b/docs/reference/search/retriever.asciidoc @@ -11,6 +11,7 @@ This allows for complex behavior to be depicted in a tree-like structure, called [TIP] ==== Refer to <> for a high level overview of the retrievers abstraction. +Refer to <> for additional examples. ==== The following retrievers are available: @@ -382,16 +383,17 @@ Refer to <> for a high level overview of semantic re-ranking ===== Prerequisites -To use `text_similarity_reranker` you must first set up a `rerank` task using the <>. -The `rerank` task should be set up with a machine learning model that can compute text similarity. +To use `text_similarity_reranker` you must first set up an inference endpoint for the `rerank` task using the <>. +The endpoint should be set up with a machine learning model that can compute text similarity. Refer to {ml-docs}/ml-nlp-model-ref.html#ml-nlp-model-ref-text-similarity[the Elastic NLP model reference] for a list of third-party text similarity models supported by {es}. -Currently you can: +You have the following options: -* Integrate directly with the <> using the `rerank` task type -* Integrate directly with the <> using the `rerank` task type +* Use the the built-in <> cross-encoder model via the inference API's {es} service. +* Use the <> with the `rerank` task type. +* Use the <> with the `rerank` task type. * Upload a model to {es} with {eland-docs}/machine-learning.html#ml-nlp-pytorch[Eland] using the `text_similarity` NLP task type. -** Then set up an <> with the `rerank` task type +** Then set up an <> with the `rerank` task type. ** Refer to the <> on this page for a step-by-step guide. ===== Parameters @@ -436,13 +438,70 @@ Note that score calculations vary depending on the model used. Applies the specified <> to the child <>. If the child retriever already specifies any filters, then this top-level filter is applied in conjuction with the filter defined in the child retriever. +[discrete] +[[text-similarity-reranker-retriever-example-elastic-rerank]] +==== Example: Elastic Rerank + +This examples demonstrates how to deploy the Elastic Rerank model and use it to re-rank search results using the `text_similarity_reranker` retriever. + +Follow these steps: + +. Create an inference endpoint for the `rerank` task using the <>. ++ +[source,console] +---- +PUT _inference/rerank/my-elastic-rerank +{ + "service": "elasticsearch", + "service_settings": { + "model_id": ".rerank-v1", + "num_threads": 1, + "adaptive_allocations": { <1> + "enabled": true, + "min_number_of_allocations": 1, + "max_number_of_allocations": 10 + } + } +} +---- +// TEST[skip:uses ML] +<1> {ml-docs}/ml-nlp-auto-scale.html#nlp-model-adaptive-allocations[Adaptive allocations] will be enabled with the minimum of 1 and the maximum of 10 allocations. ++ +. Define a `text_similarity_rerank` retriever: ++ +[source,console] +---- +POST _search +{ + "retriever": { + "text_similarity_reranker": { + "retriever": { + "standard": { + "query": { + "match": { + "text": "How often does the moon hide the sun?" + } + } + } + }, + "field": "text", + "inference_id": "my-elastic-rerank", + "inference_text": "How often does the moon hide the sun?", + "rank_window_size": 100, + "min_score": 0.5 + } + } +} +---- +// TEST[skip:uses ML] + [discrete] [[text-similarity-reranker-retriever-example-cohere]] ==== Example: Cohere Rerank This example enables out-of-the-box semantic search by re-ranking top documents using the Cohere Rerank API. This approach eliminates the need to generate and store embeddings for all indexed documents. -This requires a <> using the `rerank` task type. +This requires a <> that is set up for the `rerank` task type. [source,console] ---- @@ -680,6 +739,12 @@ GET movies/_search <1> The `rule` retriever is the outermost retriever, applying rules to the search results that were previously reranked using the `rrf` retriever. <2> The `rrf` retriever returns results from all of its sub-retrievers, and the output of the `rrf` retriever is used as input to the `rule` retriever. +[discrete] +[[retriever-common-parameters]] +=== Common usage guidelines + +[discrete] +[[retriever-size-pagination]] ==== Using `from` and `size` with a retriever tree The <> and <> @@ -688,12 +753,16 @@ parameters are provided globally as part of the general They are applied to all retrievers in a retriever tree, unless a specific retriever overrides the `size` parameter using a different parameter such as `rank_window_size`. Though, the final search hits are always limited to `size`. +[discrete] +[[retriever-aggregations]] ==== Using aggregations with a retriever tree <> are globally specified as part of a search request. The query used for an aggregation is the combination of all leaf retrievers as `should` clauses in a <>. +[discrete] +[[retriever-restrictions]] ==== Restrictions on search parameters when specifying a retriever When a retriever is specified as part of a search, the following elements are not allowed at the top-level. From 79d70686b3ba86dcab4694d46e5a81de74ba06f8 Mon Sep 17 00:00:00 2001 From: kosabogi <105062005+kosabogi@users.noreply.github.com> Date: Thu, 28 Nov 2024 09:26:16 +0100 Subject: [PATCH 053/139] Fixes typo (#117684) --- .../ml/trained-models/apis/get-trained-models-stats.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/ml/trained-models/apis/get-trained-models-stats.asciidoc b/docs/reference/ml/trained-models/apis/get-trained-models-stats.asciidoc index beff87e6ec6e6..b55f022a5d168 100644 --- a/docs/reference/ml/trained-models/apis/get-trained-models-stats.asciidoc +++ b/docs/reference/ml/trained-models/apis/get-trained-models-stats.asciidoc @@ -235,7 +235,7 @@ The reason for the current state. Usually only populated when the `routing_state (string) The current routing state. -- -* `starting`: The model is attempting to allocate on this model, inference calls are not yet accepted. +* `starting`: The model is attempting to allocate on this node, inference calls are not yet accepted. * `started`: The model is allocated and ready to accept inference requests. * `stopping`: The model is being deallocated from this node. * `stopped`: The model is fully deallocated from this node. From dc7ea9eff9a5897fabc2fb9dd3bb291eee77ca11 Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Thu, 28 Nov 2024 09:40:38 +0100 Subject: [PATCH 054/139] ESQL: Fix LookupJoin output (#117639) * Fix output methods related to LookupJoin * Add tests with subsequent EVAL * Fix BinaryPlan.computeReferences This must not just use the references from its own output. Not only is this wrong, it also leads to failures when we call the .references() method on unresolved plans. --- .../xpack/esql/ccq/MultiClusterSpecIT.java | 4 +- .../src/main/resources/lookup-join.csv-spec | 67 +++++++++++++++---- .../xpack/esql/action/EsqlCapabilities.java | 2 +- .../xpack/esql/analysis/Analyzer.java | 15 ++--- .../xpack/esql/plan/QueryPlan.java | 5 ++ .../xpack/esql/plan/logical/BinaryPlan.java | 7 -- .../xpack/esql/plan/logical/join/Join.java | 48 ++++--------- .../esql/plan/logical/join/LookupJoin.java | 43 +++--------- .../xpack/esql/session/EsqlSession.java | 4 -- .../elasticsearch/xpack/esql/CsvTests.java | 2 +- .../xpack/esql/analysis/AnalyzerTests.java | 5 +- 11 files changed, 91 insertions(+), 111 deletions(-) diff --git a/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClusterSpecIT.java b/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClusterSpecIT.java index 5df85d1004dd1..8f4522573f880 100644 --- a/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClusterSpecIT.java +++ b/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClusterSpecIT.java @@ -47,7 +47,7 @@ import static org.elasticsearch.xpack.esql.EsqlTestUtils.classpathResources; import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.INLINESTATS; import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.INLINESTATS_V2; -import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.JOIN_LOOKUP; +import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.JOIN_LOOKUP_V2; import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.JOIN_PLANNING_V1; import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.METADATA_FIELDS_REMOTE_TEST; import static org.elasticsearch.xpack.esql.qa.rest.EsqlSpecTestCase.Mode.SYNC; @@ -125,7 +125,7 @@ protected void shouldSkipTest(String testName) throws IOException { assumeFalse("INLINESTATS not yet supported in CCS", testCase.requiredCapabilities.contains(INLINESTATS.capabilityName())); assumeFalse("INLINESTATS not yet supported in CCS", testCase.requiredCapabilities.contains(INLINESTATS_V2.capabilityName())); assumeFalse("INLINESTATS not yet supported in CCS", testCase.requiredCapabilities.contains(JOIN_PLANNING_V1.capabilityName())); - assumeFalse("LOOKUP JOIN not yet supported in CCS", testCase.requiredCapabilities.contains(JOIN_LOOKUP.capabilityName())); + assumeFalse("LOOKUP JOIN not yet supported in CCS", testCase.requiredCapabilities.contains(JOIN_LOOKUP_V2.capabilityName())); } private TestFeatureService remoteFeaturesService() throws IOException { diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec index 605bf78c20a32..11786fb905c60 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec @@ -3,22 +3,22 @@ // Reuses the sample dataset and commands from enrich.csv-spec // -basicOnTheDataNode -required_capability: join_lookup +//TODO: this sometimes returns null instead of the looked up value (likely related to the execution order) +basicOnTheDataNode-Ignore +required_capability: join_lookup_v2 -//TODO: this returns different results in CI then locally -// sometimes null, sometimes spanish (likely related to the execution order) FROM employees | EVAL language_code = languages | LOOKUP JOIN languages_lookup ON language_code -| WHERE emp_no < 500 -| KEEP emp_no, language_name +| WHERE emp_no >= 10091 AND emp_no < 10094 | SORT emp_no -| LIMIT 1 +| KEEP emp_no, language_code, language_name ; -emp_no:integer | language_name:keyword -//10091 | Spanish +emp_no:integer | language_code:integer | language_name:keyword +10091 | 3 | Spanish +10092 | 1 | English +10093 | 3 | Spanish ; basicRow-Ignore @@ -33,16 +33,55 @@ language_code:keyword | language_name:keyword ; basicOnTheCoordinator -required_capability: join_lookup +required_capability: join_lookup_v2 + +FROM employees +| SORT emp_no +| LIMIT 3 +| EVAL language_code = languages +| LOOKUP JOIN languages_lookup ON language_code +| KEEP emp_no, language_code, language_name +; + +emp_no:integer | language_code:integer | language_name:keyword +10001 | 2 | French +10002 | 5 | null +10003 | 4 | German +; + +//TODO: this sometimes returns null instead of the looked up value (likely related to the execution order) +subsequentEvalOnTheDataNode-Ignore +required_capability: join_lookup_v2 + +FROM employees +| EVAL language_code = languages +| LOOKUP JOIN languages_lookup ON language_code +| WHERE emp_no >= 10091 AND emp_no < 10094 +| SORT emp_no +| KEEP emp_no, language_code, language_name +| EVAL language_name = TO_LOWER(language_name), language_code_x2 = 2*language_code +; + +emp_no:integer | language_code:integer | language_name:keyword | language_code_x2:integer +10091 | 3 | spanish | 6 +10092 | 1 | english | 2 +10093 | 3 | spanish | 6 +; + +subsequentEvalOnTheCoordinator +required_capability: join_lookup_v2 FROM employees | SORT emp_no -| LIMIT 1 +| LIMIT 3 | EVAL language_code = languages | LOOKUP JOIN languages_lookup ON language_code -| KEEP emp_no, language_name +| KEEP emp_no, language_code, language_name +| EVAL language_name = TO_LOWER(language_name), language_code_x2 = 2*language_code ; -emp_no:integer | language_name:keyword -10001 | French +emp_no:integer | language_code:integer | language_name:keyword | language_code_x2:integer +10001 | 2 | french | 4 +10002 | 5 | null | 10 +10003 | 4 | german | 8 ; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index 58748781d1778..d8004f73f613f 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -524,7 +524,7 @@ public enum Cap { /** * LOOKUP JOIN */ - JOIN_LOOKUP(Build.current().isSnapshot()), + JOIN_LOOKUP_V2(Build.current().isSnapshot()), /** * Fix for https://github.com/elastic/elasticsearch/issues/117054 diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java index dde7bc09ac615..b847508d2b161 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Analyzer.java @@ -21,7 +21,6 @@ import org.elasticsearch.xpack.esql.core.capabilities.Resolvables; import org.elasticsearch.xpack.esql.core.expression.Alias; import org.elasticsearch.xpack.esql.core.expression.Attribute; -import org.elasticsearch.xpack.esql.core.expression.AttributeSet; import org.elasticsearch.xpack.esql.core.expression.EmptyAttribute; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.expression.Expressions; @@ -609,8 +608,7 @@ private Join resolveLookupJoin(LookupJoin join) { JoinConfig config = join.config(); // for now, support only (LEFT) USING clauses JoinType type = config.type(); - // rewrite the join into a equi-join between the field with the same name between left and right - // per SQL standard, the USING columns are placed first in the output, followed by the rest of left, then right + // rewrite the join into an equi-join between the field with the same name between left and right if (type instanceof UsingJoinType using) { List cols = using.columns(); // the lookup cannot be resolved, bail out @@ -632,14 +630,9 @@ private Join resolveLookupJoin(LookupJoin join) { // resolve the using columns against the left and the right side then assemble the new join config List leftKeys = resolveUsingColumns(cols, join.left().output(), "left"); List rightKeys = resolveUsingColumns(cols, join.right().output(), "right"); - List output = new ArrayList<>(join.left().output()); - // the order is stable (since the AttributeSet preservers the insertion order) - output.addAll(join.right().outputSet().subtract(new AttributeSet(rightKeys))); - - // update the config - pick the left keys as those in the output - type = new UsingJoinType(coreJoin, rightKeys); - config = new JoinConfig(type, leftKeys, leftKeys, rightKeys); - join = new LookupJoin(join.source(), join.left(), join.right(), config, output); + + config = new JoinConfig(coreJoin, leftKeys, leftKeys, rightKeys); + join = new LookupJoin(join.source(), join.left(), join.right(), config); } // everything else is unsupported for now else { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/QueryPlan.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/QueryPlan.java index ef8c3983faf2e..02373cc62e81f 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/QueryPlan.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/QueryPlan.java @@ -33,6 +33,10 @@ public QueryPlan(Source source, List children) { super(source, children); } + /** + * The ordered list of attributes (i.e. columns) this plan produces when executed. + * Must be called only on resolved plans, otherwise may throw an exception or return wrong results. + */ public abstract List output(); public AttributeSet outputSet() { @@ -87,6 +91,7 @@ public AttributeSet references() { /** * This very likely needs to be overridden for {@link QueryPlan#references} to be correct when inheriting. + * This can be called on unresolved plans and therefore must not rely on calls to {@link QueryPlan#output()}. */ protected AttributeSet computeReferences() { return Expressions.references(expressions()); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/BinaryPlan.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/BinaryPlan.java index e65cdda4b6069..91cd7f7a15840 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/BinaryPlan.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/BinaryPlan.java @@ -6,8 +6,6 @@ */ package org.elasticsearch.xpack.esql.plan.logical; -import org.elasticsearch.xpack.esql.core.expression.AttributeSet; -import org.elasticsearch.xpack.esql.core.expression.Expressions; import org.elasticsearch.xpack.esql.core.tree.Source; import java.util.Arrays; @@ -45,11 +43,6 @@ public final BinaryPlan replaceRight(LogicalPlan newRight) { return replaceChildren(left, newRight); } - protected AttributeSet computeReferences() { - // TODO: this needs to be driven by the join config - return Expressions.references(output()); - } - public abstract BinaryPlan replaceChildren(LogicalPlan left, LogicalPlan right); @Override diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/join/Join.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/join/Join.java index 0e182646d914a..dd6b3ea3455f7 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/join/Join.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/join/Join.java @@ -10,9 +10,8 @@ import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; -import org.elasticsearch.common.util.Maps; import org.elasticsearch.xpack.esql.core.expression.Attribute; -import org.elasticsearch.xpack.esql.core.expression.Nullability; +import org.elasticsearch.xpack.esql.core.expression.NamedExpression; import org.elasticsearch.xpack.esql.core.expression.ReferenceAttribute; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; import org.elasticsearch.xpack.esql.core.tree.Source; @@ -23,9 +22,11 @@ import java.io.IOException; import java.util.ArrayList; import java.util.List; -import java.util.Map; import java.util.Objects; +import java.util.Set; +import java.util.stream.Collectors; +import static org.elasticsearch.xpack.esql.expression.NamedExpressions.mergeOutputAttributes; import static org.elasticsearch.xpack.esql.plan.logical.join.JoinTypes.LEFT; import static org.elasticsearch.xpack.esql.plan.logical.join.JoinTypes.RIGHT; @@ -107,37 +108,24 @@ public static List computeOutput(List leftOutput, List output; // TODO: make the other side nullable + Set matchFieldNames = config.matchFields().stream().map(NamedExpression::name).collect(Collectors.toSet()); if (LEFT.equals(joinType)) { - // right side becomes nullable and overrides left - // output = merge(leftOutput, makeNullable(rightOutput)); - output = merge(leftOutput, rightOutput); + // right side becomes nullable and overrides left except for match fields, which we preserve from the left + List rightOutputWithoutMatchFields = rightOutput.stream() + .filter(attr -> matchFieldNames.contains(attr.name()) == false) + .toList(); + output = mergeOutputAttributes(rightOutputWithoutMatchFields, leftOutput); } else if (RIGHT.equals(joinType)) { - // left side becomes nullable and overrides right - // output = merge(makeNullable(leftOutput), rightOutput); - output = merge(leftOutput, rightOutput); + List leftOutputWithoutMatchFields = leftOutput.stream() + .filter(attr -> matchFieldNames.contains(attr.name()) == false) + .toList(); + output = mergeOutputAttributes(leftOutputWithoutMatchFields, rightOutput); } else { throw new IllegalArgumentException(joinType.joinName() + " unsupported"); } return output; } - /** - * Merge the two lists of attributes into one and preserves order. - */ - private static List merge(List left, List right) { - // use linked hash map to preserve order - Map nameToAttribute = Maps.newLinkedHashMapWithExpectedSize(left.size() + right.size()); - for (Attribute a : left) { - nameToAttribute.put(a.name(), a); - } - for (Attribute a : right) { - // override the existing entry in place - nameToAttribute.compute(a.name(), (name, existing) -> a); - } - - return new ArrayList<>(nameToAttribute.values()); - } - /** * Make fields references, so we don't check if they exist in the index. * We do this for fields that we know don't come from the index. @@ -161,14 +149,6 @@ public static List makeReference(List output) { return out; } - private static List makeNullable(List output) { - List out = new ArrayList<>(output.size()); - for (Attribute a : output) { - out.add(a.withNullability(Nullability.TRUE)); - } - return out; - } - @Override public boolean expressionsResolved() { return config.expressionsResolved(); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/join/LookupJoin.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/join/LookupJoin.java index 2ee9213f45b36..57c8cb00baa32 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/join/LookupJoin.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/join/LookupJoin.java @@ -16,7 +16,6 @@ import org.elasticsearch.xpack.esql.plan.logical.join.JoinTypes.UsingJoinType; import java.util.List; -import java.util.Objects; import static java.util.Collections.emptyList; import static org.elasticsearch.xpack.esql.plan.logical.join.JoinTypes.LEFT; @@ -26,10 +25,8 @@ */ public class LookupJoin extends Join implements SurrogateLogicalPlan { - private final List output; - public LookupJoin(Source source, LogicalPlan left, LogicalPlan right, List joinFields) { - this(source, left, right, new UsingJoinType(LEFT, joinFields), emptyList(), emptyList(), emptyList(), emptyList()); + this(source, left, right, new UsingJoinType(LEFT, joinFields), emptyList(), emptyList(), emptyList()); } public LookupJoin( @@ -39,15 +36,13 @@ public LookupJoin( JoinType type, List joinFields, List leftFields, - List rightFields, - List output + List rightFields ) { - this(source, left, right, new JoinConfig(type, joinFields, leftFields, rightFields), output); + this(source, left, right, new JoinConfig(type, joinFields, leftFields, rightFields)); } - public LookupJoin(Source source, LogicalPlan left, LogicalPlan right, JoinConfig joinConfig, List output) { + public LookupJoin(Source source, LogicalPlan left, LogicalPlan right, JoinConfig joinConfig) { super(source, left, right, joinConfig); - this.output = output; } /** @@ -55,20 +50,14 @@ public LookupJoin(Source source, LogicalPlan left, LogicalPlan right, JoinConfig */ @Override public LogicalPlan surrogate() { - JoinConfig cfg = config(); - JoinConfig newConfig = new JoinConfig(LEFT, cfg.matchFields(), cfg.leftFields(), cfg.rightFields()); - Join normalized = new Join(source(), left(), right(), newConfig); + Join normalized = new Join(source(), left(), right(), config()); // TODO: decide whether to introduce USING or just basic ON semantics - keep the ordering out for now - return new Project(source(), normalized, output); - } - - public List output() { - return output; + return new Project(source(), normalized, output()); } @Override public Join replaceChildren(LogicalPlan left, LogicalPlan right) { - return new LookupJoin(source(), left, right, config(), output); + return new LookupJoin(source(), left, right, config()); } @Override @@ -81,23 +70,7 @@ protected NodeInfo info() { config().type(), config().matchFields(), config().leftFields(), - config().rightFields(), - output + config().rightFields() ); } - - @Override - public int hashCode() { - return Objects.hash(super.hashCode(), output); - } - - @Override - public boolean equals(Object obj) { - if (super.equals(obj) == false) { - return false; - } - - LookupJoin other = (LookupJoin) obj; - return Objects.equals(output, other.output); - } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java index 021596c31f65d..3b0f9ab578df9 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java @@ -79,7 +79,6 @@ import java.util.List; import java.util.Map; import java.util.Set; -import java.util.function.Predicate; import java.util.stream.Collectors; import static org.elasticsearch.index.query.QueryBuilders.boolQuery; @@ -466,8 +465,6 @@ static Set fieldNames(LogicalPlan parsed, Set enrichPolicyMatchF // ie "from test | eval lang = languages + 1 | keep *l" should consider both "languages" and "*l" as valid fields to ask for AttributeSet keepCommandReferences = new AttributeSet(); AttributeSet keepJoinReferences = new AttributeSet(); - List> keepMatches = new ArrayList<>(); - List keepPatterns = new ArrayList<>(); parsed.forEachDown(p -> {// go over each plan top-down if (p instanceof RegexExtract re) { // for Grok and Dissect @@ -501,7 +498,6 @@ static Set fieldNames(LogicalPlan parsed, Set enrichPolicyMatchF references.add(ua); if (p instanceof Keep) { keepCommandReferences.add(ua); - keepMatches.add(up::match); } }); if (p instanceof Keep) { diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java index c745801bf505f..6763988eac638 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java @@ -263,7 +263,7 @@ public final void test() throws Throwable { ); assumeFalse( "lookup join disabled for csv tests", - testCase.requiredCapabilities.contains(EsqlCapabilities.Cap.JOIN_LOOKUP.capabilityName()) + testCase.requiredCapabilities.contains(EsqlCapabilities.Cap.JOIN_LOOKUP_V2.capabilityName()) ); if (Build.current().isSnapshot()) { assertThat( diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java index 2770ed1f336ae..e0ebc92afa95d 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java @@ -1945,9 +1945,10 @@ public void testLookup() { .item(startsWith("job{f}")) .item(startsWith("job.raw{f}")) /* - * Int key is returned as a full field (despite the rename) + * Int is a reference here because we renamed it in project. + * If we hadn't it'd be a field and that'd be fine. */ - .item(containsString("int{f}")) + .item(containsString("int{r}")) .item(startsWith("last_name{f}")) .item(startsWith("long_noidx{f}")) .item(startsWith("salary{f}")) From 11ffe8831793a5cad91b5bb5fb63e2365286451a Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Thu, 28 Nov 2024 09:54:42 +0100 Subject: [PATCH 055/139] Speedup HealthNodeTaskExecutor CS listener (#113436) This method was quite slow in tests because there's an expensive assertion in `ClusterApplierService.state()` that we run when calling `ClusterService.localNode()` --- .../selection/HealthNodeTaskExecutor.java | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/health/node/selection/HealthNodeTaskExecutor.java b/server/src/main/java/org/elasticsearch/health/node/selection/HealthNodeTaskExecutor.java index 3efad1aee26b0..5991bc248ba76 100644 --- a/server/src/main/java/org/elasticsearch/health/node/selection/HealthNodeTaskExecutor.java +++ b/server/src/main/java/org/elasticsearch/health/node/selection/HealthNodeTaskExecutor.java @@ -182,8 +182,8 @@ void startTask(ClusterChangedEvent event) { // visible for testing void shuttingDown(ClusterChangedEvent event) { - DiscoveryNode node = clusterService.localNode(); - if (isNodeShuttingDown(event, node.getId())) { + if (isNodeShuttingDown(event)) { + var node = event.state().getNodes().getLocalNode(); abortTaskIfApplicable("node [{" + node.getName() + "}{" + node.getId() + "}] shutting down"); } } @@ -198,9 +198,18 @@ void abortTaskIfApplicable(String reason) { } } - private static boolean isNodeShuttingDown(ClusterChangedEvent event, String nodeId) { - return event.previousState().metadata().nodeShutdowns().contains(nodeId) == false - && event.state().metadata().nodeShutdowns().contains(nodeId); + private static boolean isNodeShuttingDown(ClusterChangedEvent event) { + if (event.metadataChanged() == false) { + return false; + } + var shutdownsOld = event.previousState().metadata().nodeShutdowns(); + var shutdownsNew = event.state().metadata().nodeShutdowns(); + if (shutdownsNew == shutdownsOld) { + return false; + } + String nodeId = event.state().nodes().getLocalNodeId(); + return shutdownsOld.contains(nodeId) == false && shutdownsNew.contains(nodeId); + } public static List getNamedXContentParsers() { From d4bcd979a5b9196f23b00d97cb17aad1679818c8 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Thu, 28 Nov 2024 10:05:26 +0100 Subject: [PATCH 056/139] Update synthetic source legacy license cutoff date. (#117658) Update default cutoff date from 12-12-2024T00:00 UTC to 01-02-2025T00:00 UTC. --- .../xpack/logsdb/SyntheticSourceLicenseService.java | 2 +- .../SyntheticSourceIndexSettingsProviderLegacyLicenseTests.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/SyntheticSourceLicenseService.java b/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/SyntheticSourceLicenseService.java index 71de2f7909835..26a672fb1c903 100644 --- a/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/SyntheticSourceLicenseService.java +++ b/x-pack/plugin/logsdb/src/main/java/org/elasticsearch/xpack/logsdb/SyntheticSourceLicenseService.java @@ -29,7 +29,7 @@ final class SyntheticSourceLicenseService { // You can only override this property if you received explicit approval from Elastic. static final String CUTOFF_DATE_SYS_PROP_NAME = "es.mapping.synthetic_source_fallback_to_stored_source.cutoff_date_restricted_override"; private static final Logger LOGGER = LogManager.getLogger(SyntheticSourceLicenseService.class); - static final long DEFAULT_CUTOFF_DATE = LocalDateTime.of(2024, 12, 12, 0, 0).toInstant(ZoneOffset.UTC).toEpochMilli(); + static final long DEFAULT_CUTOFF_DATE = LocalDateTime.of(2025, 2, 1, 0, 0).toInstant(ZoneOffset.UTC).toEpochMilli(); /** * A setting that determines whether source mode should always be stored source. Regardless of licence. diff --git a/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/SyntheticSourceIndexSettingsProviderLegacyLicenseTests.java b/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/SyntheticSourceIndexSettingsProviderLegacyLicenseTests.java index 939d7d892a48d..eda0d87868745 100644 --- a/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/SyntheticSourceIndexSettingsProviderLegacyLicenseTests.java +++ b/x-pack/plugin/logsdb/src/test/java/org/elasticsearch/xpack/logsdb/SyntheticSourceIndexSettingsProviderLegacyLicenseTests.java @@ -98,7 +98,7 @@ public void testGetAdditionalIndexSettingsTsdb() throws IOException { } public void testGetAdditionalIndexSettingsTsdbAfterCutoffDate() throws Exception { - long start = LocalDateTime.of(2024, 12, 20, 0, 0).toInstant(ZoneOffset.UTC).toEpochMilli(); + long start = LocalDateTime.of(2025, 2, 2, 0, 0).toInstant(ZoneOffset.UTC).toEpochMilli(); License license = createGoldOrPlatinumLicense(start); long time = LocalDateTime.of(2024, 12, 31, 0, 0).toInstant(ZoneOffset.UTC).toEpochMilli(); var licenseState = new XPackLicenseState(() -> time, new XPackLicenseStatus(license.operationMode(), true, null)); From 5d686973084e926a2dbec96a311a6684807f5406 Mon Sep 17 00:00:00 2001 From: David Kyle Date: Thu, 28 Nov 2024 09:36:59 +0000 Subject: [PATCH 057/139] [ML] Delete accidental changelog for a non issue (#117636) --- docs/changelog/117235.yaml | 5 ----- 1 file changed, 5 deletions(-) delete mode 100644 docs/changelog/117235.yaml diff --git a/docs/changelog/117235.yaml b/docs/changelog/117235.yaml deleted file mode 100644 index dbf0b4cc18388..0000000000000 --- a/docs/changelog/117235.yaml +++ /dev/null @@ -1,5 +0,0 @@ -pr: 117235 -summary: "Deprecate `ChunkingOptions` parameter" -area: ES|QL -type: enhancement -issues: [] From 6a4b68d263fe3533fc44e90d779537b48ffaf5f6 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Thu, 28 Nov 2024 10:53:39 +0100 Subject: [PATCH 058/139] Add source mode stats to MappingStats (#117463) --- docs/reference/cluster/stats.asciidoc | 5 +- .../test/cluster.stats/40_source_modes.yml | 50 ++++++++++ server/src/main/java/module-info.java | 3 +- .../org/elasticsearch/TransportVersions.java | 3 + .../cluster/stats/ClusterStatsFeatures.java | 26 ++++++ .../admin/cluster/stats/MappingStats.java | 55 ++++++++++- ...lasticsearch.features.FeatureSpecification | 1 + .../cluster/stats/MappingStatsTests.java | 92 ++++++++++++++++++- .../ClusterStatsMonitoringDocTests.java | 3 +- 9 files changed, 226 insertions(+), 12 deletions(-) create mode 100644 rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/cluster.stats/40_source_modes.yml create mode 100644 server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsFeatures.java diff --git a/docs/reference/cluster/stats.asciidoc b/docs/reference/cluster/stats.asciidoc index bd818a538f78b..d875417bde51a 100644 --- a/docs/reference/cluster/stats.asciidoc +++ b/docs/reference/cluster/stats.asciidoc @@ -1644,7 +1644,10 @@ The API returns the following response: "total_deduplicated_mapping_size": "0b", "total_deduplicated_mapping_size_in_bytes": 0, "field_types": [], - "runtime_field_types": [] + "runtime_field_types": [], + "source_modes" : { + "stored": 0 + } }, "analysis": { "char_filter_types": [], diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/cluster.stats/40_source_modes.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/cluster.stats/40_source_modes.yml new file mode 100644 index 0000000000000..64bbad7fb1c6d --- /dev/null +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/cluster.stats/40_source_modes.yml @@ -0,0 +1,50 @@ +--- +test source modes: + - requires: + cluster_features: ["cluster.stats.source_modes"] + reason: requires source modes features + + - do: + indices.create: + index: test-synthetic + body: + settings: + index: + mapping: + source.mode: synthetic + + - do: + indices.create: + index: test-stored + + - do: + indices.create: + index: test-disabled + body: + settings: + index: + mapping: + source.mode: disabled + + - do: + bulk: + refresh: true + body: + - '{ "create": { "_index": "test-synthetic" } }' + - '{ "name": "aaaa", "some_string": "AaAa", "some_int": 1000, "some_double": 123.456789, "some_bool": true }' + - '{ "create": { "_index": "test-stored" } }' + - '{ "name": "bbbb", "some_string": "BbBb", "some_int": 2000, "some_double": 321.987654, "some_bool": false }' + - '{ "create": { "_index": "test-disabled" } }' + - '{ "name": "cccc", "some_string": "CcCc", "some_int": 3000, "some_double": 421.484654, "some_bool": false }' + + - do: + search: + index: test-* + - match: { hits.total.value: 3 } + + - do: + cluster.stats: { } + + - match: { indices.mappings.source_modes.disabled: 1 } + - match: { indices.mappings.source_modes.stored: 1 } + - match: { indices.mappings.source_modes.synthetic: 1 } diff --git a/server/src/main/java/module-info.java b/server/src/main/java/module-info.java index 35d1a44624b0f..63dbac3a72487 100644 --- a/server/src/main/java/module-info.java +++ b/server/src/main/java/module-info.java @@ -433,7 +433,8 @@ org.elasticsearch.search.SearchFeatures, org.elasticsearch.script.ScriptFeatures, org.elasticsearch.search.retriever.RetrieversFeatures, - org.elasticsearch.reservedstate.service.FileSettingsFeatures; + org.elasticsearch.reservedstate.service.FileSettingsFeatures, + org.elasticsearch.action.admin.cluster.stats.ClusterStatsFeatures; uses org.elasticsearch.plugins.internal.SettingsExtension; uses RestExtension; diff --git a/server/src/main/java/org/elasticsearch/TransportVersions.java b/server/src/main/java/org/elasticsearch/TransportVersions.java index dda7d7e5d4c4c..a1315ccf66701 100644 --- a/server/src/main/java/org/elasticsearch/TransportVersions.java +++ b/server/src/main/java/org/elasticsearch/TransportVersions.java @@ -205,10 +205,13 @@ static TransportVersion def(int id) { public static final TransportVersion ESQL_ENRICH_RUNTIME_WARNINGS = def(8_796_00_0); public static final TransportVersion INGEST_PIPELINE_CONFIGURATION_AS_MAP = def(8_797_00_0); public static final TransportVersion LOGSDB_TELEMETRY_CUSTOM_CUTOFF_DATE_FIX_8_17 = def(8_797_00_1); + public static final TransportVersion SOURCE_MODE_TELEMETRY_FIX_8_17 = def(8_797_00_2); public static final TransportVersion INDEXING_PRESSURE_THROTTLING_STATS = def(8_798_00_0); public static final TransportVersion REINDEX_DATA_STREAMS = def(8_799_00_0); public static final TransportVersion ESQL_REMOVE_NODE_LEVEL_PLAN = def(8_800_00_0); public static final TransportVersion LOGSDB_TELEMETRY_CUSTOM_CUTOFF_DATE = def(8_801_00_0); + public static final TransportVersion SOURCE_MODE_TELEMETRY = def(8_802_00_0); + /* * STOP! READ THIS FIRST! No, really, * ____ _____ ___ ____ _ ____ _____ _ ____ _____ _ _ ___ ____ _____ ___ ____ ____ _____ _ diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsFeatures.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsFeatures.java new file mode 100644 index 0000000000000..6e85093a52cdd --- /dev/null +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/ClusterStatsFeatures.java @@ -0,0 +1,26 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.action.admin.cluster.stats; + +import org.elasticsearch.features.FeatureSpecification; +import org.elasticsearch.features.NodeFeature; + +import java.util.Set; + +/** + * Spec for cluster stats features. + */ +public class ClusterStatsFeatures implements FeatureSpecification { + + @Override + public Set getFeatures() { + return Set.of(MappingStats.SOURCE_MODES_FEATURE); + } +} diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/MappingStats.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/MappingStats.java index d2e5973169919..1bc2e1d13c864 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/MappingStats.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/stats/MappingStats.java @@ -9,6 +9,7 @@ package org.elasticsearch.action.admin.cluster.stats; +import org.elasticsearch.TransportVersion; import org.elasticsearch.TransportVersions; import org.elasticsearch.cluster.metadata.IndexMetadata; import org.elasticsearch.cluster.metadata.MappingMetadata; @@ -19,6 +20,8 @@ import org.elasticsearch.common.io.stream.Writeable; import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.core.Nullable; +import org.elasticsearch.features.NodeFeature; +import org.elasticsearch.index.mapper.SourceFieldMapper; import org.elasticsearch.xcontent.ToXContentFragment; import org.elasticsearch.xcontent.XContentBuilder; @@ -31,6 +34,7 @@ import java.util.HashSet; import java.util.IdentityHashMap; import java.util.List; +import java.util.Locale; import java.util.Map; import java.util.Objects; import java.util.OptionalLong; @@ -44,6 +48,8 @@ */ public final class MappingStats implements ToXContentFragment, Writeable { + static final NodeFeature SOURCE_MODES_FEATURE = new NodeFeature("cluster.stats.source_modes"); + private static final Pattern DOC_PATTERN = Pattern.compile("doc[\\[.]"); private static final Pattern SOURCE_PATTERN = Pattern.compile("params\\._source"); @@ -53,6 +59,8 @@ public final class MappingStats implements ToXContentFragment, Writeable { public static MappingStats of(Metadata metadata, Runnable ensureNotCancelled) { Map fieldTypes = new HashMap<>(); Set concreteFieldNames = new HashSet<>(); + // Account different source modes based on index.mapping.source.mode setting: + Map sourceModeUsageCount = new HashMap<>(); Map runtimeFieldTypes = new HashMap<>(); final Map mappingCounts = new IdentityHashMap<>(metadata.getMappingsByHash().size()); for (IndexMetadata indexMetadata : metadata) { @@ -62,6 +70,9 @@ public static MappingStats of(Metadata metadata, Runnable ensureNotCancelled) { continue; } AnalysisStats.countMapping(mappingCounts, indexMetadata); + + var sourceMode = SourceFieldMapper.INDEX_MAPPER_SOURCE_MODE_SETTING.get(indexMetadata.getSettings()); + sourceModeUsageCount.merge(sourceMode.toString().toLowerCase(Locale.ENGLISH), 1, Integer::sum); } final AtomicLong totalFieldCount = new AtomicLong(); final AtomicLong totalDeduplicatedFieldCount = new AtomicLong(); @@ -175,12 +186,14 @@ public static MappingStats of(Metadata metadata, Runnable ensureNotCancelled) { for (MappingMetadata mappingMetadata : metadata.getMappingsByHash().values()) { totalMappingSizeBytes += mappingMetadata.source().compressed().length; } + return new MappingStats( totalFieldCount.get(), totalDeduplicatedFieldCount.get(), totalMappingSizeBytes, fieldTypes.values(), - runtimeFieldTypes.values() + runtimeFieldTypes.values(), + sourceModeUsageCount ); } @@ -215,17 +228,20 @@ private static int countOccurrences(String script, Pattern pattern) { private final List fieldTypeStats; private final List runtimeFieldStats; + private final Map sourceModeUsageCount; MappingStats( long totalFieldCount, long totalDeduplicatedFieldCount, long totalMappingSizeBytes, Collection fieldTypeStats, - Collection runtimeFieldStats + Collection runtimeFieldStats, + Map sourceModeUsageCount ) { this.totalFieldCount = totalFieldCount; this.totalDeduplicatedFieldCount = totalDeduplicatedFieldCount; this.totalMappingSizeBytes = totalMappingSizeBytes; + this.sourceModeUsageCount = sourceModeUsageCount; List stats = new ArrayList<>(fieldTypeStats); stats.sort(Comparator.comparing(IndexFeatureStats::getName)); this.fieldTypeStats = Collections.unmodifiableList(stats); @@ -246,6 +262,10 @@ private static int countOccurrences(String script, Pattern pattern) { } fieldTypeStats = in.readCollectionAsImmutableList(FieldStats::new); runtimeFieldStats = in.readCollectionAsImmutableList(RuntimeFieldStats::new); + var transportVersion = in.getTransportVersion(); + sourceModeUsageCount = canReadOrWriteSourceModeTelemetry(transportVersion) + ? in.readImmutableMap(StreamInput::readString, StreamInput::readVInt) + : Map.of(); } @Override @@ -257,6 +277,15 @@ public void writeTo(StreamOutput out) throws IOException { } out.writeCollection(fieldTypeStats); out.writeCollection(runtimeFieldStats); + var transportVersion = out.getTransportVersion(); + if (canReadOrWriteSourceModeTelemetry(transportVersion)) { + out.writeMap(sourceModeUsageCount, StreamOutput::writeVInt); + } + } + + private static boolean canReadOrWriteSourceModeTelemetry(TransportVersion version) { + return version.isPatchFrom(TransportVersions.SOURCE_MODE_TELEMETRY_FIX_8_17) + || version.onOrAfter(TransportVersions.SOURCE_MODE_TELEMETRY); } private static OptionalLong ofNullable(Long l) { @@ -300,6 +329,10 @@ public List getRuntimeFieldStats() { return runtimeFieldStats; } + public Map getSourceModeUsageCount() { + return sourceModeUsageCount; + } + @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject("mappings"); @@ -326,6 +359,12 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws st.toXContent(builder, params); } builder.endArray(); + builder.startObject("source_modes"); + var entries = sourceModeUsageCount.entrySet().stream().sorted(Map.Entry.comparingByKey()).toList(); + for (var entry : entries) { + builder.field(entry.getKey(), entry.getValue()); + } + builder.endObject(); builder.endObject(); return builder; } @@ -344,11 +383,19 @@ public boolean equals(Object o) { && Objects.equals(totalDeduplicatedFieldCount, that.totalDeduplicatedFieldCount) && Objects.equals(totalMappingSizeBytes, that.totalMappingSizeBytes) && fieldTypeStats.equals(that.fieldTypeStats) - && runtimeFieldStats.equals(that.runtimeFieldStats); + && runtimeFieldStats.equals(that.runtimeFieldStats) + && sourceModeUsageCount.equals(that.sourceModeUsageCount); } @Override public int hashCode() { - return Objects.hash(totalFieldCount, totalDeduplicatedFieldCount, totalMappingSizeBytes, fieldTypeStats, runtimeFieldStats); + return Objects.hash( + totalFieldCount, + totalDeduplicatedFieldCount, + totalMappingSizeBytes, + fieldTypeStats, + runtimeFieldStats, + sourceModeUsageCount + ); } } diff --git a/server/src/main/resources/META-INF/services/org.elasticsearch.features.FeatureSpecification b/server/src/main/resources/META-INF/services/org.elasticsearch.features.FeatureSpecification index 3955fc87bf392..12965152f260c 100644 --- a/server/src/main/resources/META-INF/services/org.elasticsearch.features.FeatureSpecification +++ b/server/src/main/resources/META-INF/services/org.elasticsearch.features.FeatureSpecification @@ -23,3 +23,4 @@ org.elasticsearch.search.retriever.RetrieversFeatures org.elasticsearch.script.ScriptFeatures org.elasticsearch.reservedstate.service.FileSettingsFeatures org.elasticsearch.cluster.routing.RoutingFeatures +org.elasticsearch.action.admin.cluster.stats.ClusterStatsFeatures diff --git a/server/src/test/java/org/elasticsearch/action/admin/cluster/stats/MappingStatsTests.java b/server/src/test/java/org/elasticsearch/action/admin/cluster/stats/MappingStatsTests.java index 2c374c7d26dee..96954458c18c4 100644 --- a/server/src/test/java/org/elasticsearch/action/admin/cluster/stats/MappingStatsTests.java +++ b/server/src/test/java/org/elasticsearch/action/admin/cluster/stats/MappingStatsTests.java @@ -18,6 +18,7 @@ import org.elasticsearch.common.io.stream.Writeable.Reader; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.IndexVersion; +import org.elasticsearch.index.mapper.SourceFieldMapper; import org.elasticsearch.script.Script; import org.elasticsearch.tasks.TaskCancelledException; import org.elasticsearch.test.AbstractWireSerializingTestCase; @@ -29,7 +30,15 @@ import java.util.ArrayList; import java.util.Collection; import java.util.Collections; +import java.util.HashMap; import java.util.List; +import java.util.Locale; +import java.util.Map; + +import static org.elasticsearch.index.mapper.SourceFieldMapper.Mode.DISABLED; +import static org.elasticsearch.index.mapper.SourceFieldMapper.Mode.STORED; +import static org.elasticsearch.index.mapper.SourceFieldMapper.Mode.SYNTHETIC; +import static org.hamcrest.Matchers.equalTo; public class MappingStatsTests extends AbstractWireSerializingTestCase { @@ -203,7 +212,10 @@ public void testToXContent() { "doc_max" : 0, "doc_total" : 0 } - ] + ], + "source_modes" : { + "stored" : 2 + } } }""", Strings.toString(mappingStats, true, true)); } @@ -332,7 +344,10 @@ public void testToXContentWithSomeSharedMappings() { "doc_max" : 0, "doc_total" : 0 } - ] + ], + "source_modes" : { + "stored" : 3 + } } }""", Strings.toString(mappingStats, true, true)); } @@ -362,7 +377,24 @@ protected MappingStats createTestInstance() { if (randomBoolean()) { runtimeFieldStats.add(randomRuntimeFieldStats("long")); } - return new MappingStats(randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong(), stats, runtimeFieldStats); + Map sourceModeUsageCount = randomBoolean() + ? Map.of() + : Map.of( + STORED.toString().toLowerCase(Locale.ENGLISH), + randomNonNegativeInt(), + SYNTHETIC.toString().toLowerCase(Locale.ENGLISH), + randomNonNegativeInt(), + DISABLED.toString().toLowerCase(Locale.ENGLISH), + randomNonNegativeInt() + ); + return new MappingStats( + randomNonNegativeLong(), + randomNonNegativeLong(), + randomNonNegativeLong(), + stats, + runtimeFieldStats, + sourceModeUsageCount + ); } private static FieldStats randomFieldStats(String type) { @@ -410,7 +442,8 @@ protected MappingStats mutateInstance(MappingStats instance) { long totalFieldCount = instance.getTotalFieldCount().getAsLong(); long totalDeduplicatedFieldCount = instance.getTotalDeduplicatedFieldCount().getAsLong(); long totalMappingSizeBytes = instance.getTotalMappingSizeBytes().getAsLong(); - switch (between(1, 5)) { + var sourceModeUsageCount = new HashMap<>(instance.getSourceModeUsageCount()); + switch (between(1, 6)) { case 1 -> { boolean remove = fieldTypes.size() > 0 && randomBoolean(); if (remove) { @@ -435,8 +468,22 @@ protected MappingStats mutateInstance(MappingStats instance) { case 3 -> totalFieldCount = randomValueOtherThan(totalFieldCount, ESTestCase::randomNonNegativeLong); case 4 -> totalDeduplicatedFieldCount = randomValueOtherThan(totalDeduplicatedFieldCount, ESTestCase::randomNonNegativeLong); case 5 -> totalMappingSizeBytes = randomValueOtherThan(totalMappingSizeBytes, ESTestCase::randomNonNegativeLong); + case 6 -> { + if (sourceModeUsageCount.isEmpty() == false) { + sourceModeUsageCount.remove(sourceModeUsageCount.keySet().stream().findFirst().get()); + } else { + sourceModeUsageCount.put("stored", randomNonNegativeInt()); + } + } } - return new MappingStats(totalFieldCount, totalDeduplicatedFieldCount, totalMappingSizeBytes, fieldTypes, runtimeFieldTypes); + return new MappingStats( + totalFieldCount, + totalDeduplicatedFieldCount, + totalMappingSizeBytes, + fieldTypes, + runtimeFieldTypes, + sourceModeUsageCount + ); } public void testDenseVectorType() { @@ -531,4 +578,39 @@ public void testWriteTo() throws IOException { assertEquals(instance.getFieldTypeStats(), deserialized.getFieldTypeStats()); assertEquals(instance.getRuntimeFieldStats(), deserialized.getRuntimeFieldStats()); } + + public void testSourceModes() { + var builder = Metadata.builder(); + int numStoredIndices = randomIntBetween(1, 5); + int numSyntheticIndices = randomIntBetween(1, 5); + int numDisabledIndices = randomIntBetween(1, 5); + for (int i = 0; i < numSyntheticIndices; i++) { + IndexMetadata.Builder indexMetadata = new IndexMetadata.Builder("foo-synthetic-" + i).settings( + indexSettings(IndexVersion.current(), 4, 1).put(SourceFieldMapper.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "synthetic") + ); + builder.put(indexMetadata); + } + for (int i = 0; i < numStoredIndices; i++) { + IndexMetadata.Builder indexMetadata; + if (randomBoolean()) { + indexMetadata = new IndexMetadata.Builder("foo-stored-" + i).settings( + indexSettings(IndexVersion.current(), 4, 1).put(SourceFieldMapper.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "stored") + ); + } else { + indexMetadata = new IndexMetadata.Builder("foo-stored-" + i).settings(indexSettings(IndexVersion.current(), 4, 1)); + } + builder.put(indexMetadata); + } + for (int i = 0; i < numDisabledIndices; i++) { + IndexMetadata.Builder indexMetadata = new IndexMetadata.Builder("foo-disabled-" + i).settings( + indexSettings(IndexVersion.current(), 4, 1).put(SourceFieldMapper.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "disabled") + ); + builder.put(indexMetadata); + } + var mappingStats = MappingStats.of(builder.build(), () -> {}); + assertThat(mappingStats.getSourceModeUsageCount().get("synthetic"), equalTo(numSyntheticIndices)); + assertThat(mappingStats.getSourceModeUsageCount().get("stored"), equalTo(numStoredIndices)); + assertThat(mappingStats.getSourceModeUsageCount().get("disabled"), equalTo(numDisabledIndices)); + } + } diff --git a/x-pack/plugin/monitoring/src/test/java/org/elasticsearch/xpack/monitoring/collector/cluster/ClusterStatsMonitoringDocTests.java b/x-pack/plugin/monitoring/src/test/java/org/elasticsearch/xpack/monitoring/collector/cluster/ClusterStatsMonitoringDocTests.java index 9458442557694..f4d50df4ff613 100644 --- a/x-pack/plugin/monitoring/src/test/java/org/elasticsearch/xpack/monitoring/collector/cluster/ClusterStatsMonitoringDocTests.java +++ b/x-pack/plugin/monitoring/src/test/java/org/elasticsearch/xpack/monitoring/collector/cluster/ClusterStatsMonitoringDocTests.java @@ -572,7 +572,8 @@ public void testToXContent() throws IOException { "total_deduplicated_field_count": 0, "total_deduplicated_mapping_size_in_bytes": 0, "field_types": [], - "runtime_field_types": [] + "runtime_field_types": [], + "source_modes": {} }, "analysis": { "char_filter_types": [], From 64dfed4e1f0610014f01fc7285fccac831a62c74 Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Thu, 28 Nov 2024 11:01:52 +0100 Subject: [PATCH 059/139] ESQL: Mute CATEGORIZE optimizer tests on release builds (#117690) --- .../xpack/esql/optimizer/LogicalPlanOptimizerTests.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java index 2b4fb6ad68972..8373528531902 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java @@ -20,6 +20,7 @@ import org.elasticsearch.xpack.esql.EsqlTestUtils; import org.elasticsearch.xpack.esql.TestBlockFactory; import org.elasticsearch.xpack.esql.VerificationException; +import org.elasticsearch.xpack.esql.action.EsqlCapabilities; import org.elasticsearch.xpack.esql.analysis.Analyzer; import org.elasticsearch.xpack.esql.analysis.AnalyzerContext; import org.elasticsearch.xpack.esql.analysis.AnalyzerTestUtils; @@ -1211,6 +1212,8 @@ public void testCombineProjectionWithAggregationFirstAndAliasedGroupingUsedInAgg * \_EsRelation[test][_meta_field{f}#23, emp_no{f}#17, first_name{f}#18, ..] */ public void testCombineProjectionWithCategorizeGrouping() { + assumeTrue("requires Categorize capability", EsqlCapabilities.Cap.CATEGORIZE_V2.isEnabled()); + var plan = plan(""" from test | eval k = first_name, k1 = k @@ -3946,6 +3949,8 @@ public void testNestedExpressionsInGroups() { * \_EsRelation[test][_meta_field{f}#14, emp_no{f}#8, first_name{f}#9, ge..] */ public void testNestedExpressionsInGroupsWithCategorize() { + assumeTrue("requires Categorize capability", EsqlCapabilities.Cap.CATEGORIZE_V2.isEnabled()); + var plan = optimizedPlan(""" from test | stats c = count(salary) by CATEGORIZE(CONCAT(first_name, "abc")) From 146cb39143f93b6ce453229abf5be08335a75366 Mon Sep 17 00:00:00 2001 From: Tommaso Teofili Date: Thu, 28 Nov 2024 13:46:24 +0100 Subject: [PATCH 060/139] ESQL - enabling scoring with METADATA _score (#113120) * ESQL - enabling scoring with METADATA _score Co-authored-by: ChrisHegarty --- docs/changelog/113120.yaml | 5 + muted-tests.yml | 6 + .../search/sort/SortBuilder.java | 15 +- .../core/expression/MetadataAttribute.java | 5 +- .../compute/lucene/LuceneOperator.java | 5 +- .../compute/lucene/LuceneSourceOperator.java | 96 ++++-- .../lucene/LuceneTopNSourceOperator.java | 141 +++++++-- .../elasticsearch/compute/OperatorTests.java | 3 +- .../LuceneQueryExpressionEvaluatorTests.java | 33 +- .../lucene/LuceneSourceOperatorTests.java | 31 +- .../LuceneTopNSourceOperatorScoringTests.java | 151 +++++++++ .../lucene/LuceneTopNSourceOperatorTests.java | 50 ++- .../ValueSourceReaderTypeConversionTests.java | 9 +- .../ValuesSourceReaderOperatorTests.java | 9 +- .../src/main/resources/qstr-function.csv-spec | 1 - .../src/main/resources/scoring.csv-spec | 285 +++++++++++++++++ .../xpack/esql/action/EsqlActionTaskIT.java | 7 +- .../xpack/esql/action/LookupFromIndexIT.java | 3 +- .../xpack/esql/plugin/MatchFunctionIT.java | 299 ++++++++++++++++++ .../xpack/esql/plugin/MatchOperatorIT.java | 51 +++ .../xpack/esql/plugin/QueryStringIT.java | 96 ++++++ .../xpack/esql/action/EsqlCapabilities.java | 7 +- .../xpack/esql/analysis/Verifier.java | 9 + .../local/LucenePushdownPredicates.java | 5 + .../physical/local/PushTopNToSource.java | 18 +- .../local/ReplaceSourceAttributes.java | 14 +- .../xpack/esql/parser/LogicalPlanBuilder.java | 4 +- .../xpack/esql/plan/physical/EsQueryExec.java | 14 + .../planner/EsPhysicalOperationProviders.java | 14 +- .../xpack/esql/analysis/VerifierTests.java | 25 ++ .../optimizer/PhysicalPlanOptimizerTests.java | 62 ++++ .../physical/local/PushTopNToSourceTests.java | 193 ++++++++++- 32 files changed, 1570 insertions(+), 96 deletions(-) create mode 100644 docs/changelog/113120.yaml create mode 100644 x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneTopNSourceOperatorScoringTests.java create mode 100644 x-pack/plugin/esql/qa/testFixtures/src/main/resources/scoring.csv-spec create mode 100644 x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/MatchFunctionIT.java diff --git a/docs/changelog/113120.yaml b/docs/changelog/113120.yaml new file mode 100644 index 0000000000000..801167d61c19c --- /dev/null +++ b/docs/changelog/113120.yaml @@ -0,0 +1,5 @@ +pr: 113120 +summary: ESQL - enabling scoring with METADATA `_score` +area: ES|QL +type: enhancement +issues: [] diff --git a/muted-tests.yml b/muted-tests.yml index 5cf16fdf3da0a..fdadc747289bb 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -224,6 +224,12 @@ tests: issue: https://github.com/elastic/elasticsearch/issues/117591 - class: org.elasticsearch.repositories.s3.RepositoryS3ClientYamlTestSuiteIT issue: https://github.com/elastic/elasticsearch/issues/117596 +- class: "org.elasticsearch.xpack.esql.qa.multi_node.EsqlSpecIT" + method: "test {scoring.*}" + issue: https://github.com/elastic/elasticsearch/issues/117641 +- class: "org.elasticsearch.xpack.esql.qa.single_node.EsqlSpecIT" + method: "test {scoring.*}" + issue: https://github.com/elastic/elasticsearch/issues/117641 # Examples: # diff --git a/server/src/main/java/org/elasticsearch/search/sort/SortBuilder.java b/server/src/main/java/org/elasticsearch/search/sort/SortBuilder.java index 0ac3b42dd5b10..5832b93b9462f 100644 --- a/server/src/main/java/org/elasticsearch/search/sort/SortBuilder.java +++ b/server/src/main/java/org/elasticsearch/search/sort/SortBuilder.java @@ -158,6 +158,11 @@ private static void parseCompoundSortField(XContentParser parser, List buildSort(List> sortBuilders, SearchExecutionContext context) throws IOException { + return buildSort(sortBuilders, context, true); + } + + public static Optional buildSort(List> sortBuilders, SearchExecutionContext context, boolean optimize) + throws IOException { List sortFields = new ArrayList<>(sortBuilders.size()); List sortFormats = new ArrayList<>(sortBuilders.size()); for (SortBuilder builder : sortBuilders) { @@ -172,9 +177,13 @@ public static Optional buildSort(List> sortBuilde if (sortFields.size() > 1) { sort = true; } else { - SortField sortField = sortFields.get(0); - if (sortField.getType() == SortField.Type.SCORE && sortField.getReverse() == false) { - sort = false; + if (optimize) { + SortField sortField = sortFields.get(0); + if (sortField.getType() == SortField.Type.SCORE && sortField.getReverse() == false) { + sort = false; + } else { + sort = true; + } } else { sort = true; } diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/MetadataAttribute.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/MetadataAttribute.java index 6e4e9292bfc99..0f1cfbb85039c 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/MetadataAttribute.java +++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/MetadataAttribute.java @@ -31,6 +31,7 @@ public class MetadataAttribute extends TypedAttribute { public static final String TIMESTAMP_FIELD = "@timestamp"; public static final String TSID_FIELD = "_tsid"; + public static final String SCORE = "_score"; static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry( Attribute.class, @@ -50,7 +51,9 @@ public class MetadataAttribute extends TypedAttribute { SourceFieldMapper.NAME, tuple(DataType.SOURCE, false), IndexModeFieldMapper.NAME, - tuple(DataType.KEYWORD, true) + tuple(DataType.KEYWORD, true), + SCORE, + tuple(DataType.DOUBLE, false) ); private final boolean searchable; diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneOperator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneOperator.java index 6f75298e95dd7..bbc3ace3716ba 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneOperator.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneOperator.java @@ -79,6 +79,7 @@ public abstract static class Factory implements SourceOperator.SourceOperatorFac protected final DataPartitioning dataPartitioning; protected final int taskConcurrency; protected final int limit; + protected final ScoreMode scoreMode; protected final LuceneSliceQueue sliceQueue; /** @@ -95,6 +96,7 @@ protected Factory( ScoreMode scoreMode ) { this.limit = limit; + this.scoreMode = scoreMode; this.dataPartitioning = dataPartitioning; var weightFunction = weightFunction(queryFunction, scoreMode); this.sliceQueue = LuceneSliceQueue.create(contexts, weightFunction, dataPartitioning, taskConcurrency); @@ -438,7 +440,8 @@ static Function weightFunction(Function 0) { - --remainingDocs; - docsBuilder.appendInt(doc); - currentPagePos++; - } else { - throw new CollectionTerminatedException(); - } + class LimitingCollector implements LeafCollector { + @Override + public void setScorer(Scorable scorer) {} + + @Override + public void collect(int doc) throws IOException { + if (remainingDocs > 0) { + --remainingDocs; + docsBuilder.appendInt(doc); + currentPagePos++; + } else { + throw new CollectionTerminatedException(); } - }; + } + } + + final class ScoringCollector extends LuceneSourceOperator.LimitingCollector { + private Scorable scorable; + + @Override + public void setScorer(Scorable scorer) { + this.scorable = scorer; + } + + @Override + public void collect(int doc) throws IOException { + super.collect(doc); + scoreBuilder.appendDouble(scorable.score()); + } } @Override @@ -139,15 +179,27 @@ public Page getCheckedOutput() throws IOException { IntBlock shard = null; IntBlock leaf = null; IntVector docs = null; + DoubleVector scores = null; + DocBlock docBlock = null; try { shard = blockFactory.newConstantIntBlockWith(scorer.shardContext().index(), currentPagePos); leaf = blockFactory.newConstantIntBlockWith(scorer.leafReaderContext().ord, currentPagePos); docs = docsBuilder.build(); docsBuilder = blockFactory.newIntVectorBuilder(Math.min(remainingDocs, maxPageSize)); - page = new Page(currentPagePos, new DocVector(shard.asVector(), leaf.asVector(), docs, true).asBlock()); + docBlock = new DocVector(shard.asVector(), leaf.asVector(), docs, true).asBlock(); + shard = null; + leaf = null; + docs = null; + if (scoreBuilder == null) { + page = new Page(currentPagePos, docBlock); + } else { + scores = scoreBuilder.build(); + scoreBuilder = blockFactory.newDoubleVectorBuilder(Math.min(remainingDocs, maxPageSize)); + page = new Page(currentPagePos, docBlock, scores.asBlock()); + } } finally { if (page == null) { - Releasables.closeExpectNoException(shard, leaf, docs); + Releasables.closeExpectNoException(shard, leaf, docs, docBlock, scores); } } currentPagePos = 0; @@ -160,7 +212,7 @@ public Page getCheckedOutput() throws IOException { @Override public void close() { - docsBuilder.close(); + Releasables.close(docsBuilder, scoreBuilder); } @Override diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneTopNSourceOperator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneTopNSourceOperator.java index 0f600958b93b3..8da62963ffb64 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneTopNSourceOperator.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/LuceneTopNSourceOperator.java @@ -10,15 +10,22 @@ import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.ReaderUtil; import org.apache.lucene.search.CollectionTerminatedException; +import org.apache.lucene.search.FieldDoc; import org.apache.lucene.search.LeafCollector; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.ScoreMode; -import org.apache.lucene.search.TopFieldCollector; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.SortField; +import org.apache.lucene.search.TopDocsCollector; import org.apache.lucene.search.TopFieldCollectorManager; +import org.apache.lucene.search.TopScoreDocCollectorManager; import org.elasticsearch.common.Strings; import org.elasticsearch.compute.data.BlockFactory; +import org.elasticsearch.compute.data.DocBlock; import org.elasticsearch.compute.data.DocVector; +import org.elasticsearch.compute.data.DoubleBlock; +import org.elasticsearch.compute.data.DoubleVector; import org.elasticsearch.compute.data.IntBlock; import org.elasticsearch.compute.data.IntVector; import org.elasticsearch.compute.data.Page; @@ -29,17 +36,21 @@ import org.elasticsearch.search.sort.SortBuilder; import java.io.IOException; +import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Optional; import java.util.function.Function; import java.util.stream.Collectors; +import static org.apache.lucene.search.ScoreMode.COMPLETE; +import static org.apache.lucene.search.ScoreMode.TOP_DOCS; + /** * Source operator that builds Pages out of the output of a TopFieldCollector (aka TopN) */ public final class LuceneTopNSourceOperator extends LuceneOperator { - public static final class Factory extends LuceneOperator.Factory { + public static class Factory extends LuceneOperator.Factory { private final int maxPageSize; private final List> sorts; @@ -50,16 +61,17 @@ public Factory( int taskConcurrency, int maxPageSize, int limit, - List> sorts + List> sorts, + boolean scoring ) { - super(contexts, queryFunction, dataPartitioning, taskConcurrency, limit, ScoreMode.TOP_DOCS); + super(contexts, queryFunction, dataPartitioning, taskConcurrency, limit, scoring ? COMPLETE : TOP_DOCS); this.maxPageSize = maxPageSize; this.sorts = sorts; } @Override public SourceOperator get(DriverContext driverContext) { - return new LuceneTopNSourceOperator(driverContext.blockFactory(), maxPageSize, sorts, limit, sliceQueue); + return new LuceneTopNSourceOperator(driverContext.blockFactory(), maxPageSize, sorts, limit, sliceQueue, scoreMode); } public int maxPageSize() { @@ -75,6 +87,8 @@ public String describe() { + maxPageSize + ", limit = " + limit + + ", scoreMode = " + + scoreMode + ", sorts = [" + notPrettySorts + "]]"; @@ -93,17 +107,20 @@ public String describe() { private PerShardCollector perShardCollector; private final List> sorts; private final int limit; + private final ScoreMode scoreMode; public LuceneTopNSourceOperator( BlockFactory blockFactory, int maxPageSize, List> sorts, int limit, - LuceneSliceQueue sliceQueue + LuceneSliceQueue sliceQueue, + ScoreMode scoreMode ) { super(blockFactory, maxPageSize, sliceQueue); this.sorts = sorts; this.limit = limit; + this.scoreMode = scoreMode; } @Override @@ -145,7 +162,7 @@ private Page collect() throws IOException { try { if (perShardCollector == null || perShardCollector.shardContext.index() != scorer.shardContext().index()) { // TODO: share the bottom between shardCollectors - perShardCollector = new PerShardCollector(scorer.shardContext(), sorts, limit); + perShardCollector = newPerShardCollector(scorer.shardContext(), sorts, limit); } var leafCollector = perShardCollector.getLeafCollector(scorer.leafReaderContext()); scorer.scoreNextRange(leafCollector, scorer.leafReaderContext().reader().getLiveDocs(), maxPageSize); @@ -171,7 +188,7 @@ private Page emit(boolean startEmitting) { assert isEmitting() == false : "offset=" + offset + " score_docs=" + Arrays.toString(scoreDocs); offset = 0; if (perShardCollector != null) { - scoreDocs = perShardCollector.topFieldCollector.topDocs().scoreDocs; + scoreDocs = perShardCollector.collector.topDocs().scoreDocs; } else { scoreDocs = new ScoreDoc[0]; } @@ -183,10 +200,13 @@ private Page emit(boolean startEmitting) { IntBlock shard = null; IntVector segments = null; IntVector docs = null; + DocBlock docBlock = null; + DoubleBlock scores = null; Page page = null; try ( IntVector.Builder currentSegmentBuilder = blockFactory.newIntVectorFixedBuilder(size); - IntVector.Builder currentDocsBuilder = blockFactory.newIntVectorFixedBuilder(size) + IntVector.Builder currentDocsBuilder = blockFactory.newIntVectorFixedBuilder(size); + DoubleVector.Builder currentScoresBuilder = scoreVectorOrNull(size); ) { int start = offset; offset += size; @@ -196,53 +216,130 @@ private Page emit(boolean startEmitting) { int segment = ReaderUtil.subIndex(doc, leafContexts); currentSegmentBuilder.appendInt(segment); currentDocsBuilder.appendInt(doc - leafContexts.get(segment).docBase); // the offset inside the segment + if (currentScoresBuilder != null) { + float score = getScore(scoreDocs[i]); + currentScoresBuilder.appendDouble(score); + } } shard = blockFactory.newConstantIntBlockWith(perShardCollector.shardContext.index(), size); segments = currentSegmentBuilder.build(); docs = currentDocsBuilder.build(); - page = new Page(size, new DocVector(shard.asVector(), segments, docs, null).asBlock()); + docBlock = new DocVector(shard.asVector(), segments, docs, null).asBlock(); + shard = null; + segments = null; + docs = null; + if (currentScoresBuilder == null) { + page = new Page(size, docBlock); + } else { + scores = currentScoresBuilder.build().asBlock(); + page = new Page(size, docBlock, scores); + } } finally { if (page == null) { - Releasables.closeExpectNoException(shard, segments, docs); + Releasables.closeExpectNoException(shard, segments, docs, docBlock, scores); } } pagesEmitted++; return page; } + private float getScore(ScoreDoc scoreDoc) { + if (scoreDoc instanceof FieldDoc fieldDoc) { + if (Float.isNaN(fieldDoc.score)) { + if (sorts != null) { + return (Float) fieldDoc.fields[sorts.size() + 1]; + } else { + return (Float) fieldDoc.fields[0]; + } + } else { + return fieldDoc.score; + } + } else { + return scoreDoc.score; + } + } + + private DoubleVector.Builder scoreVectorOrNull(int size) { + if (scoreMode.needsScores()) { + return blockFactory.newDoubleVectorFixedBuilder(size); + } else { + return null; + } + } + @Override protected void describe(StringBuilder sb) { sb.append(", limit = ").append(limit); + sb.append(", scoreMode = ").append(scoreMode); String notPrettySorts = sorts.stream().map(Strings::toString).collect(Collectors.joining(",")); sb.append(", sorts = [").append(notPrettySorts).append("]"); } - static final class PerShardCollector { + PerShardCollector newPerShardCollector(ShardContext shardContext, List> sorts, int limit) throws IOException { + Optional sortAndFormats = shardContext.buildSort(sorts); + if (sortAndFormats.isEmpty()) { + throw new IllegalStateException("sorts must not be disabled in TopN"); + } + if (scoreMode.needsScores() == false) { + return new NonScoringPerShardCollector(shardContext, sortAndFormats.get().sort, limit); + } else { + SortField[] sortFields = sortAndFormats.get().sort.getSort(); + if (sortFields != null && sortFields.length == 1 && sortFields[0].needsScores() && sortFields[0].getReverse() == false) { + // SORT _score DESC + return new ScoringPerShardCollector( + shardContext, + new TopScoreDocCollectorManager(limit, null, limit, false).newCollector() + ); + } else { + // SORT ..., _score, ... + var sort = new Sort(); + if (sortFields != null) { + var l = new ArrayList<>(Arrays.asList(sortFields)); + l.add(SortField.FIELD_DOC); + l.add(SortField.FIELD_SCORE); + sort = new Sort(l.toArray(SortField[]::new)); + } + return new ScoringPerShardCollector( + shardContext, + new TopFieldCollectorManager(sort, limit, null, limit, false).newCollector() + ); + } + } + } + + abstract static class PerShardCollector { private final ShardContext shardContext; - private final TopFieldCollector topFieldCollector; + private final TopDocsCollector collector; private int leafIndex; private LeafCollector leafCollector; private Thread currentThread; - PerShardCollector(ShardContext shardContext, List> sorts, int limit) throws IOException { + PerShardCollector(ShardContext shardContext, TopDocsCollector collector) { this.shardContext = shardContext; - Optional sortAndFormats = shardContext.buildSort(sorts); - if (sortAndFormats.isEmpty()) { - throw new IllegalStateException("sorts must not be disabled in TopN"); - } - - // We don't use CollectorManager here as we don't retrieve the total hits and sort by score. - this.topFieldCollector = new TopFieldCollectorManager(sortAndFormats.get().sort, limit, null, 0, false).newCollector(); + this.collector = collector; } LeafCollector getLeafCollector(LeafReaderContext leafReaderContext) throws IOException { if (currentThread != Thread.currentThread() || leafIndex != leafReaderContext.ord) { - leafCollector = topFieldCollector.getLeafCollector(leafReaderContext); + leafCollector = collector.getLeafCollector(leafReaderContext); leafIndex = leafReaderContext.ord; currentThread = Thread.currentThread(); } return leafCollector; } } + + static final class NonScoringPerShardCollector extends PerShardCollector { + NonScoringPerShardCollector(ShardContext shardContext, Sort sort, int limit) { + // We don't use CollectorManager here as we don't retrieve the total hits and sort by score. + super(shardContext, new TopFieldCollectorManager(sort, limit, null, 0, false).newCollector()); + } + } + + static final class ScoringPerShardCollector extends PerShardCollector { + ScoringPerShardCollector(ShardContext shardContext, TopDocsCollector topDocsCollector) { + super(shardContext, topDocsCollector); + } + } } diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/OperatorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/OperatorTests.java index 0d39a5bf8227e..e6ef10e53ec7c 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/OperatorTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/OperatorTests.java @@ -394,7 +394,8 @@ static LuceneOperator.Factory luceneOperatorFactory(IndexReader reader, Query qu randomFrom(DataPartitioning.values()), randomIntBetween(1, 10), randomPageSize(), - limit + limit, + false // no scoring ); } } diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluatorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluatorTests.java index beca522878358..ffaee536b443e 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluatorTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneQueryExpressionEvaluatorTests.java @@ -27,6 +27,8 @@ import org.elasticsearch.compute.data.BooleanVector; import org.elasticsearch.compute.data.BytesRefBlock; import org.elasticsearch.compute.data.BytesRefVector; +import org.elasticsearch.compute.data.DocBlock; +import org.elasticsearch.compute.data.DoubleBlock; import org.elasticsearch.compute.data.ElementType; import org.elasticsearch.compute.data.Page; import org.elasticsearch.compute.lucene.LuceneQueryExpressionEvaluator.DenseCollector; @@ -120,8 +122,9 @@ public void testTermQueryShuffled() throws IOException { private void assertTermQuery(String term, List results) { int matchCount = 0; for (Page page : results) { - BytesRefVector terms = page.getBlock(1).asVector(); - BooleanVector matches = page.getBlock(2).asVector(); + int initialBlockIndex = initialBlockIndex(page); + BytesRefVector terms = page.getBlock(initialBlockIndex).asVector(); + BooleanVector matches = page.getBlock(initialBlockIndex + 1).asVector(); for (int i = 0; i < page.getPositionCount(); i++) { BytesRef termAtPosition = terms.getBytesRef(i, new BytesRef()); assertThat(matches.getBoolean(i), equalTo(termAtPosition.utf8ToString().equals(term))); @@ -155,8 +158,9 @@ private void testTermsQuery(boolean shuffleDocs) throws IOException { List results = runQuery(values, new TermInSetQuery(MultiTermQuery.CONSTANT_SCORE_REWRITE, FIELD, matchingBytes), shuffleDocs); int matchCount = 0; for (Page page : results) { - BytesRefVector terms = page.getBlock(1).asVector(); - BooleanVector matches = page.getBlock(2).asVector(); + int initialBlockIndex = initialBlockIndex(page); + BytesRefVector terms = page.getBlock(initialBlockIndex).asVector(); + BooleanVector matches = page.getBlock(initialBlockIndex + 1).asVector(); for (int i = 0; i < page.getPositionCount(); i++) { BytesRef termAtPosition = terms.getBytesRef(i, new BytesRef()); assertThat(matches.getBoolean(i), equalTo(matching.contains(termAtPosition.utf8ToString()))); @@ -207,7 +211,7 @@ private List runQuery(Set values, Query query, boolean shuffleDocs List results = new ArrayList<>(); Driver driver = new Driver( driverContext, - luceneOperatorFactory(reader, new MatchAllDocsQuery(), LuceneOperator.NO_LIMIT).get(driverContext), + luceneOperatorFactory(reader, new MatchAllDocsQuery(), LuceneOperator.NO_LIMIT, scoring).get(driverContext), operators, new TestResultPageSinkOperator(results::add), () -> {} @@ -248,7 +252,21 @@ private DriverContext driverContext() { return new DriverContext(blockFactory.bigArrays(), blockFactory); } - static LuceneOperator.Factory luceneOperatorFactory(IndexReader reader, Query query, int limit) { + // Scores are not interesting to this test, but enabled conditionally and effectively ignored just for coverage. + private final boolean scoring = randomBoolean(); + + // Returns the initial block index, ignoring the score block if scoring is enabled + private int initialBlockIndex(Page page) { + assert page.getBlock(0) instanceof DocBlock : "expected doc block at index 0"; + if (scoring) { + assert page.getBlock(1) instanceof DoubleBlock : "expected double block at index 1"; + return 2; + } else { + return 1; + } + } + + static LuceneOperator.Factory luceneOperatorFactory(IndexReader reader, Query query, int limit, boolean scoring) { final ShardContext searchContext = new LuceneSourceOperatorTests.MockShardContext(reader, 0); return new LuceneSourceOperator.Factory( List.of(searchContext), @@ -256,7 +274,8 @@ static LuceneOperator.Factory luceneOperatorFactory(IndexReader reader, Query qu randomFrom(DataPartitioning.values()), randomIntBetween(1, 10), randomPageSize(), - limit + limit, + scoring ); } } diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneSourceOperatorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneSourceOperatorTests.java index 626190c04c501..2dcc5e20d3f98 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneSourceOperatorTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneSourceOperatorTests.java @@ -17,6 +17,8 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.tests.index.RandomIndexWriter; import org.elasticsearch.common.breaker.CircuitBreakingException; +import org.elasticsearch.compute.data.DocBlock; +import org.elasticsearch.compute.data.DoubleBlock; import org.elasticsearch.compute.data.ElementType; import org.elasticsearch.compute.data.LongBlock; import org.elasticsearch.compute.data.Page; @@ -63,10 +65,10 @@ public void closeIndex() throws IOException { @Override protected LuceneSourceOperator.Factory simple() { - return simple(randomFrom(DataPartitioning.values()), between(1, 10_000), 100); + return simple(randomFrom(DataPartitioning.values()), between(1, 10_000), 100, scoring); } - private LuceneSourceOperator.Factory simple(DataPartitioning dataPartitioning, int numDocs, int limit) { + private LuceneSourceOperator.Factory simple(DataPartitioning dataPartitioning, int numDocs, int limit, boolean scoring) { int commitEvery = Math.max(1, numDocs / 10); try ( RandomIndexWriter writer = new RandomIndexWriter( @@ -91,7 +93,7 @@ private LuceneSourceOperator.Factory simple(DataPartitioning dataPartitioning, i ShardContext ctx = new MockShardContext(reader, 0); Function queryFunction = c -> new MatchAllDocsQuery(); int maxPageSize = between(10, Math.max(10, numDocs)); - return new LuceneSourceOperator.Factory(List.of(ctx), queryFunction, dataPartitioning, 1, maxPageSize, limit); + return new LuceneSourceOperator.Factory(List.of(ctx), queryFunction, dataPartitioning, 1, maxPageSize, limit, scoring); } @Override @@ -101,7 +103,10 @@ protected Matcher expectedToStringOfSimple() { @Override protected Matcher expectedDescriptionOfSimple() { - return matchesRegex("LuceneSourceOperator\\[dataPartitioning = (DOC|SHARD|SEGMENT), maxPageSize = \\d+, limit = 100]"); + return matchesRegex( + "LuceneSourceOperator" + + "\\[dataPartitioning = (DOC|SHARD|SEGMENT), maxPageSize = \\d+, limit = 100, scoreMode = (COMPLETE|COMPLETE_NO_SCORES)]" + ); } // TODO tests for the other data partitioning configurations @@ -149,7 +154,7 @@ public void testShardDataPartitioningWithCranky() { } private void testSimple(DriverContext ctx, int size, int limit) { - LuceneSourceOperator.Factory factory = simple(DataPartitioning.SHARD, size, limit); + LuceneSourceOperator.Factory factory = simple(DataPartitioning.SHARD, size, limit, scoring); Operator.OperatorFactory readS = ValuesSourceReaderOperatorTests.factory(reader, S_FIELD, ElementType.LONG); List results = new ArrayList<>(); @@ -164,7 +169,7 @@ private void testSimple(DriverContext ctx, int size, int limit) { } for (Page page : results) { - LongBlock sBlock = page.getBlock(1); + LongBlock sBlock = page.getBlock(initialBlockIndex(page)); for (int p = 0; p < page.getPositionCount(); p++) { assertThat(sBlock.getLong(sBlock.getFirstValueIndex(p)), both(greaterThanOrEqualTo(0L)).and(lessThan((long) size))); } @@ -174,6 +179,20 @@ private void testSimple(DriverContext ctx, int size, int limit) { assertThat(results, hasSize(both(greaterThanOrEqualTo(minPages)).and(lessThanOrEqualTo(maxPages)))); } + // Scores are not interesting to this test, but enabled conditionally and effectively ignored just for coverage. + private final boolean scoring = randomBoolean(); + + // Returns the initial block index, ignoring the score block if scoring is enabled + private int initialBlockIndex(Page page) { + assert page.getBlock(0) instanceof DocBlock : "expected doc block at index 0"; + if (scoring) { + assert page.getBlock(1) instanceof DoubleBlock : "expected double block at index 1"; + return 2; + } else { + return 1; + } + } + /** * Creates a mock search context with the given index reader. * The returned mock search context can be used to test with {@link LuceneOperator}. diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneTopNSourceOperatorScoringTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneTopNSourceOperatorScoringTests.java new file mode 100644 index 0000000000000..a0fa1c2c01c0a --- /dev/null +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneTopNSourceOperatorScoringTests.java @@ -0,0 +1,151 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.lucene; + +import org.apache.lucene.document.SortedNumericDocValuesField; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.NoMergePolicy; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.SortField; +import org.apache.lucene.search.SortedNumericSelector; +import org.apache.lucene.search.SortedNumericSortField; +import org.apache.lucene.store.Directory; +import org.apache.lucene.tests.index.RandomIndexWriter; +import org.elasticsearch.compute.data.DoubleBlock; +import org.elasticsearch.compute.data.ElementType; +import org.elasticsearch.compute.data.Page; +import org.elasticsearch.compute.operator.Driver; +import org.elasticsearch.compute.operator.DriverContext; +import org.elasticsearch.compute.operator.Operator; +import org.elasticsearch.compute.operator.OperatorTestCase; +import org.elasticsearch.compute.operator.TestResultPageSinkOperator; +import org.elasticsearch.core.IOUtils; +import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.index.mapper.NumberFieldMapper; +import org.elasticsearch.search.DocValueFormat; +import org.elasticsearch.search.sort.FieldSortBuilder; +import org.elasticsearch.search.sort.SortAndFormats; +import org.elasticsearch.search.sort.SortBuilder; +import org.hamcrest.Matcher; +import org.junit.After; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; +import java.util.function.Function; + +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.hasSize; +import static org.hamcrest.Matchers.matchesRegex; + +public class LuceneTopNSourceOperatorScoringTests extends LuceneTopNSourceOperatorTests { + private static final MappedFieldType S_FIELD = new NumberFieldMapper.NumberFieldType("s", NumberFieldMapper.NumberType.LONG); + private Directory directory = newDirectory(); + private IndexReader reader; + + @After + private void closeIndex() throws IOException { + IOUtils.close(reader, directory); + } + + @Override + protected LuceneTopNSourceOperator.Factory simple() { + return simple(DataPartitioning.SHARD, 10_000, 100); + } + + private LuceneTopNSourceOperator.Factory simple(DataPartitioning dataPartitioning, int size, int limit) { + int commitEvery = Math.max(1, size / 10); + try ( + RandomIndexWriter writer = new RandomIndexWriter( + random(), + directory, + newIndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE) + ) + ) { + for (int d = 0; d < size; d++) { + List doc = new ArrayList<>(); + doc.add(new SortedNumericDocValuesField("s", d)); + writer.addDocument(doc); + if (d % commitEvery == 0) { + writer.commit(); + } + } + reader = writer.getReader(); + } catch (IOException e) { + throw new RuntimeException(e); + } + + ShardContext ctx = new LuceneSourceOperatorTests.MockShardContext(reader, 0) { + @Override + public Optional buildSort(List> sorts) { + SortField field = new SortedNumericSortField("s", SortField.Type.LONG, false, SortedNumericSelector.Type.MIN); + return Optional.of(new SortAndFormats(new Sort(field), new DocValueFormat[] { null })); + } + }; + Function queryFunction = c -> new MatchAllDocsQuery(); + int taskConcurrency = 0; + int maxPageSize = between(10, Math.max(10, size)); + List> sorts = List.of(new FieldSortBuilder("s")); + return new LuceneTopNSourceOperator.Factory( + List.of(ctx), + queryFunction, + dataPartitioning, + taskConcurrency, + maxPageSize, + limit, + sorts, + true // scoring + ); + } + + @Override + protected Matcher expectedToStringOfSimple() { + return matchesRegex("LuceneTopNSourceOperator\\[maxPageSize = \\d+, limit = 100, scoreMode = COMPLETE, sorts = \\[\\{.+}]]"); + } + + @Override + protected Matcher expectedDescriptionOfSimple() { + return matchesRegex( + "LuceneTopNSourceOperator" + + "\\[dataPartitioning = (DOC|SHARD|SEGMENT), maxPageSize = \\d+, limit = 100, scoreMode = COMPLETE, sorts = \\[\\{.+}]]" + ); + } + + @Override + protected void testSimple(DriverContext ctx, int size, int limit) { + LuceneTopNSourceOperator.Factory factory = simple(DataPartitioning.SHARD, size, limit); + Operator.OperatorFactory readS = ValuesSourceReaderOperatorTests.factory(reader, S_FIELD, ElementType.LONG); + + List results = new ArrayList<>(); + OperatorTestCase.runDriver( + new Driver(ctx, factory.get(ctx), List.of(readS.get(ctx)), new TestResultPageSinkOperator(results::add), () -> {}) + ); + OperatorTestCase.assertDriverContext(ctx); + + long expectedS = 0; + int maxPageSize = factory.maxPageSize(); + for (Page page : results) { + if (limit - expectedS < maxPageSize) { + assertThat(page.getPositionCount(), equalTo((int) (limit - expectedS))); + } else { + assertThat(page.getPositionCount(), equalTo(maxPageSize)); + } + DoubleBlock sBlock = page.getBlock(1); + for (int p = 0; p < page.getPositionCount(); p++) { + assertThat(sBlock.getDouble(sBlock.getFirstValueIndex(p)), equalTo(1.0d)); + expectedS++; + } + } + int pages = (int) Math.ceil((float) Math.min(size, limit) / maxPageSize); + assertThat(results, hasSize(pages)); + } +} diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneTopNSourceOperatorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneTopNSourceOperatorTests.java index 938c4ce5c9f7d..d9a0b70b7931e 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneTopNSourceOperatorTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneTopNSourceOperatorTests.java @@ -20,6 +20,8 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.tests.index.RandomIndexWriter; import org.elasticsearch.common.breaker.CircuitBreakingException; +import org.elasticsearch.compute.data.DocBlock; +import org.elasticsearch.compute.data.DoubleBlock; import org.elasticsearch.compute.data.ElementType; import org.elasticsearch.compute.data.LongBlock; import org.elasticsearch.compute.data.Page; @@ -56,7 +58,7 @@ public class LuceneTopNSourceOperatorTests extends AnyOperatorTestCase { private IndexReader reader; @After - public void closeIndex() throws IOException { + private void closeIndex() throws IOException { IOUtils.close(reader, directory); } @@ -105,19 +107,25 @@ public Optional buildSort(List> sorts) { taskConcurrency, maxPageSize, limit, - sorts + sorts, + scoring ); } @Override protected Matcher expectedToStringOfSimple() { - return matchesRegex("LuceneTopNSourceOperator\\[maxPageSize = \\d+, limit = 100, sorts = \\[\\{.+}]]"); + var s = scoring ? "COMPLETE" : "TOP_DOCS"; + return matchesRegex("LuceneTopNSourceOperator\\[maxPageSize = \\d+, limit = 100, scoreMode = " + s + ", sorts = \\[\\{.+}]]"); } @Override protected Matcher expectedDescriptionOfSimple() { + var s = scoring ? "COMPLETE" : "TOP_DOCS"; return matchesRegex( - "LuceneTopNSourceOperator\\[dataPartitioning = (DOC|SHARD|SEGMENT), maxPageSize = \\d+, limit = 100, sorts = \\[\\{.+}]]" + "LuceneTopNSourceOperator" + + "\\[dataPartitioning = (DOC|SHARD|SEGMENT), maxPageSize = \\d+, limit = 100, scoreMode = " + + s + + ", sorts = \\[\\{.+}]]" ); } @@ -137,12 +145,24 @@ public void testShardDataPartitioningWithCranky() { } } - private void testShardDataPartitioning(DriverContext context) { + void testShardDataPartitioning(DriverContext context) { int size = between(1_000, 20_000); int limit = between(10, size); testSimple(context, size, limit); } + public void testWithCranky() { + try { + int size = between(1_000, 20_000); + int limit = between(10, size); + testSimple(crankyDriverContext(), size, limit); + logger.info("cranky didn't break"); + } catch (CircuitBreakingException e) { + logger.info("broken", e); + assertThat(e.getMessage(), equalTo(CrankyCircuitBreakerService.ERROR_MESSAGE)); + } + } + public void testEmpty() { testEmpty(driverContext()); } @@ -157,11 +177,11 @@ public void testEmptyWithCranky() { } } - private void testEmpty(DriverContext context) { + void testEmpty(DriverContext context) { testSimple(context, 0, between(10, 10_000)); } - private void testSimple(DriverContext ctx, int size, int limit) { + protected void testSimple(DriverContext ctx, int size, int limit) { LuceneTopNSourceOperator.Factory factory = simple(DataPartitioning.SHARD, size, limit); Operator.OperatorFactory readS = ValuesSourceReaderOperatorTests.factory(reader, S_FIELD, ElementType.LONG); @@ -178,7 +198,7 @@ private void testSimple(DriverContext ctx, int size, int limit) { } else { assertThat(page.getPositionCount(), equalTo(factory.maxPageSize())); } - LongBlock sBlock = page.getBlock(1); + LongBlock sBlock = page.getBlock(initialBlockIndex(page)); for (int p = 0; p < page.getPositionCount(); p++) { assertThat(sBlock.getLong(sBlock.getFirstValueIndex(p)), equalTo(expectedS++)); } @@ -186,4 +206,18 @@ private void testSimple(DriverContext ctx, int size, int limit) { int pages = (int) Math.ceil((float) Math.min(size, limit) / factory.maxPageSize()); assertThat(results, hasSize(pages)); } + + // Scores are not interesting to this test, but enabled conditionally and effectively ignored just for coverage. + private final boolean scoring = randomBoolean(); + + // Returns the initial block index, ignoring the score block if scoring is enabled + private int initialBlockIndex(Page page) { + assert page.getBlock(0) instanceof DocBlock : "expected doc block at index 0"; + if (scoring) { + assert page.getBlock(1) instanceof DoubleBlock : "expected double block at index 1"; + return 2; + } else { + return 1; + } + } } diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/ValueSourceReaderTypeConversionTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/ValueSourceReaderTypeConversionTests.java index f6d81af7c14e5..f31573f121a71 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/ValueSourceReaderTypeConversionTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/ValueSourceReaderTypeConversionTests.java @@ -265,7 +265,8 @@ private SourceOperator simpleInput(DriverContext context, int size, int commitEv DataPartitioning.SHARD, 1,// randomIntBetween(1, 10), pageSize, - LuceneOperator.NO_LIMIT + LuceneOperator.NO_LIMIT, + false // no scoring ); return luceneFactory.get(context); } @@ -1292,7 +1293,8 @@ public void testWithNulls() throws IOException { randomFrom(DataPartitioning.values()), randomIntBetween(1, 10), randomPageSize(), - LuceneOperator.NO_LIMIT + LuceneOperator.NO_LIMIT, + false // no scoring ); var vsShardContext = new ValuesSourceReaderOperator.ShardContext(reader(indexKey), () -> SourceLoader.FROM_STORED_SOURCE); try ( @@ -1450,7 +1452,8 @@ public void testManyShards() throws IOException { DataPartitioning.SHARD, randomIntBetween(1, 10), 1000, - LuceneOperator.NO_LIMIT + LuceneOperator.NO_LIMIT, + false // no scoring ); // TODO add index2 MappedFieldType ft = mapperService(indexKey).fieldType("key"); diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/ValuesSourceReaderOperatorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/ValuesSourceReaderOperatorTests.java index c8dd6f87be5fc..95b313b0b5412 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/ValuesSourceReaderOperatorTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/ValuesSourceReaderOperatorTests.java @@ -170,7 +170,8 @@ private SourceOperator simpleInput(DriverContext context, int size, int commitEv DataPartitioning.SHARD, randomIntBetween(1, 10), pageSize, - LuceneOperator.NO_LIMIT + LuceneOperator.NO_LIMIT, + false // no scoring ); return luceneFactory.get(context); } @@ -1301,7 +1302,8 @@ public void testWithNulls() throws IOException { randomFrom(DataPartitioning.values()), randomIntBetween(1, 10), randomPageSize(), - LuceneOperator.NO_LIMIT + LuceneOperator.NO_LIMIT, + false // no scoring ); try ( Driver driver = new Driver( @@ -1524,7 +1526,8 @@ public void testManyShards() throws IOException { DataPartitioning.SHARD, randomIntBetween(1, 10), 1000, - LuceneOperator.NO_LIMIT + LuceneOperator.NO_LIMIT, + false // no scoring ); MappedFieldType ft = mapperService.fieldType("key"); var readerFactory = new ValuesSourceReaderOperator.Factory( diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/qstr-function.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/qstr-function.csv-spec index 6039dc05b6c44..2c84bdae6b32e 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/qstr-function.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/qstr-function.csv-spec @@ -100,7 +100,6 @@ book_no:keyword | title:text 7140 | The Lord of the Rings Poster Collection: Six Paintings by Alan Lee (No. 1) ; - qstrWithMultivaluedTextField required_capability: qstr_function diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/scoring.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/scoring.csv-spec new file mode 100644 index 0000000000000..d4c7b8c59fdbc --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/scoring.csv-spec @@ -0,0 +1,285 @@ +############################################### +# Tests for scoring support +# + +singleQstrBoostScoringSorted +required_capability: metadata_score +required_capability: qstr_function + +from books metadata _score +| where qstr("author:Lord Rings^2") +| eval c_score = ceil(_score) +| keep book_no, title, c_score +| sort c_score desc, book_no asc +| LIMIT 2; + +book_no:keyword | title:text | c_score:double +2675 | The Lord of the Rings - Boxed Set | 6.0 +4023 | A Tolkien Compass: Including J. R. R. Tolkien's Guide to the Names in The Lord of the Rings | 6.0 +; + +singleMatchWithKeywordFieldScoring +required_capability: metadata_score +required_capability: match_operator_colon + +from books metadata _score +| where author.keyword:"William Faulkner" +| keep book_no, author, _score +| sort book_no; + +book_no:keyword | author:text | _score:double +2713 | William Faulkner | 2.3142893314361572 +2883 | William Faulkner | 2.3142893314361572 +4724 | William Faulkner | 2.3142893314361572 +4977 | William Faulkner | 2.3142893314361572 +5119 | William Faulkner | 2.3142893314361572 +5404 | William Faulkner | 2.3142893314361572 +5578 | William Faulkner | 2.3142893314361572 +8077 | William Faulkner | 2.3142893314361572 +9896 | William Faulkner | 2.3142893314361572 +; + +qstrWithFieldAndScoringSortedEval +required_capability: qstr_function +required_capability: metadata_score + +from books metadata _score +| where qstr("title:rings") +| sort _score desc +| eval _score::long +| keep book_no, title, _score +| limit 3; + +book_no:keyword | title:text | _score:double +2675 | The Lord of the Rings - Boxed Set | 2.7583377361297607 +7140 | The Lord of the Rings Poster Collection: Six Paintings by Alan Lee (No. 1) | 1.9239964485168457 +2714 | Return of the King Being the Third Part of The Lord of the Rings | 1.9239964485168457 +; + +qstrWithFieldAndScoringSorted +required_capability: qstr_function +required_capability: metadata_score + +from books metadata _score +| where qstr("title:rings") +| sort _score desc, book_no desc +| keep book_no, title, _score +| limit 3; + +book_no:keyword | title:text | _score:double +2675 | The Lord of the Rings - Boxed Set | 2.7583377361297607 +7140 | The Lord of the Rings Poster Collection: Six Paintings by Alan Lee (No. 1) | 1.9239964485168457 +2714 | Return of the King Being the Third Part of The Lord of the Rings | 1.9239964485168457 +; + +singleQstrScoringManipulated +required_capability: metadata_score +required_capability: qstr_function + +from books metadata _score +| where qstr("author:William Faulkner") +| eval add_score = ceil(_score) + 1 +| keep book_no, author, add_score +| sort book_no +| LIMIT 2; + +book_no:keyword | author:text | add_score:double +2378 | [Carol Faulkner, Holly Byers Ochoa, Lucretia Mott] | 2.0 +2713 | William Faulkner | 7.0 +; + +testMultiValuedFieldWithConjunctionWithScore +required_capability: match_function +required_capability: metadata_score + +from employees metadata _score +| where match(job_positions, "Data Scientist") and match(job_positions, "Support Engineer") +| keep emp_no, first_name, last_name, job_positions, _score; + +emp_no:integer | first_name:keyword | last_name:keyword | job_positions:keyword | _score:double +10043 | Yishay | Tzvieli | [Data Scientist, Python Developer, Support Engineer] | 5.233309745788574 +; + +testMatchAndQueryStringFunctionsWithScore +required_capability: match_function +required_capability: metadata_score + +from employees metadata _score +| where match(job_positions, "Data Scientist") and qstr("job_positions: (Support Engineer) and gender: F") +| keep emp_no, first_name, last_name, job_positions, _score; +ignoreOrder:true + +emp_no:integer | first_name:keyword | last_name:keyword | job_positions:keyword | _score:double +10041 | Uri | Lenart | [Data Scientist, Head Human Resources, Internship, Senior Team Lead] | 3.509873867034912 +10043 | Yishay | Tzvieli | [Data Scientist, Python Developer, Support Engineer] | 5.233309745788574 +; + +multipleWhereWithMatchScoringNoSort +required_capability: metadata_score +required_capability: match_operator_colon + +from books metadata _score +| where title:"short stories" +| where author:"Ursula K. Le Guin" +| keep book_no, title, author, _score; + +ignoreOrder:true +book_no:keyword | title:text | author:text | _score:double +8480 | The wind's twelve quarters: Short stories | Ursula K. Le Guin | 14.489097595214844 +; + +multipleWhereWithMatchScoring +required_capability: metadata_score +required_capability: match_operator_colon + +from books metadata _score +| where title:"short stories" +| where author:"Ursula K. Le Guin" +| keep book_no, title, author, _score +| sort book_no; + +book_no:keyword | title:text | author:text | _score:double +8480 | The wind's twelve quarters: Short stories | Ursula K. Le Guin | 14.489097595214844 +; + +combinedMatchWithFunctionsScoring +required_capability: metadata_score +required_capability: match_operator_colon + +from books metadata _score +| where title:"Tolkien" AND author:"Tolkien" AND year > 2000 +| where mv_count(author) == 1 +| keep book_no, title, author, year, _score +| sort book_no; + +book_no:keyword | title:text | author:text | year:integer | _score:double +5335 | Letters of J R R Tolkien | J.R.R. Tolkien | 2014 | 5.448054313659668 +; + +singleQstrScoring +required_capability: metadata_score +required_capability: qstr_function + +from books metadata _score +| where qstr("author:William Faulkner") +| keep book_no, author, _score +| sort book_no +| LIMIT 2; + +book_no:keyword | author:text | _score:double +2378 | [Carol Faulkner, Holly Byers Ochoa, Lucretia Mott] | 0.9976131916046143 +2713 | William Faulkner | 5.9556169509887695 +; + +singleQstrScoringGrok +required_capability: metadata_score +required_capability: qstr_function + +from books metadata _score +| where qstr("author:Lord Rings") +| GROK title "%{WORD:title} %{WORD}" +| sort _score desc +| keep book_no, title, _score +| LIMIT 3; + +book_no:keyword | title:keyword | _score:double +8875 | The | 2.9505908489227295 +4023 | A | 2.8327860832214355 +2675 | The | 2.7583377361297607 +; + +combinedMatchWithScoringEvalNoSort +required_capability: metadata_score +required_capability: match_operator_colon + +from books metadata _score +| where title:"Tolkien" AND author:"Tolkien" AND year > 2000 +| where mv_count(author) == 1 +| eval c_score = ceil(_score) +| keep book_no, title, author, year, c_score; + +ignoreOrder:true +book_no:keyword | title:text | author:text | year:integer | c_score:double +5335 | Letters of J R R Tolkien | J.R.R. Tolkien | 2014 | 6 +; + +singleQstrScoringRename +required_capability: metadata_score +required_capability: qstr_function + +from books metadata _score +| where qstr("author:Lord Rings") +| rename _score as rank +| sort rank desc +| keep book_no, rank +| LIMIT 3; + +book_no:keyword | rank:double +8875 | 2.9505908489227295 +4023 | 2.8327860832214355 +2675 | 2.7583377361297607 +; + +singleMatchWithTextFieldScoring +required_capability: metadata_score +required_capability: match_operator_colon + +from books metadata _score +| where author:"William Faulkner" +| sort book_no +| keep book_no, author, _score +| limit 5; + +book_no:keyword | author:text | _score:double +2378 | [Carol Faulkner, Holly Byers Ochoa, Lucretia Mott] | 0.9976131916046143 +2713 | William Faulkner | 4.272439002990723 +2847 | Colleen Faulkner | 1.7401835918426514 +2883 | William Faulkner | 4.272439002990723 +3293 | Danny Faulkner | 1.7401835918426514 +; + +combinedMatchWithFunctionsScoringNoSort +required_capability: metadata_score +required_capability: match_operator_colon + +from books metadata _score +| where title:"Tolkien" AND author:"Tolkien" AND year > 2000 +| where mv_count(author) == 1 +| keep book_no, title, author, year, _score; + +ignoreOrder:true +book_no:keyword | title:text | author:text | year:integer | _score:double +5335 | Letters of J R R Tolkien | J.R.R. Tolkien | 2014 | 5.448054313659668 +; + +combinedMatchWithScoringEval +required_capability: metadata_score +required_capability: match_operator_colon + +from books metadata _score +| where title:"Tolkien" AND author:"Tolkien" AND year > 2000 +| where mv_count(author) == 1 +| eval c_score = ceil(_score) +| keep book_no, title, author, year, c_score +| sort book_no; + +book_no:keyword | title:text | author:text | year:integer | c_score:double +5335 | Letters of J R R Tolkien | J.R.R. Tolkien | 2014 | 6 +; + +singleQstrScoringEval +required_capability: metadata_score +required_capability: qstr_function + +from books metadata _score +| where qstr("author:Lord Rings") +| eval c_score = ceil(_score) +| keep book_no, c_score +| sort book_no desc +| LIMIT 3; + +book_no:keyword | c_score:double +8875 | 3.0 +7350 | 2.0 +7140 | 3.0 +; diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/EsqlActionTaskIT.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/EsqlActionTaskIT.java index 56453a291ea81..1939f81353c0e 100644 --- a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/EsqlActionTaskIT.java +++ b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/EsqlActionTaskIT.java @@ -89,7 +89,7 @@ public void setup() { assumeTrue("requires query pragmas", canUseQueryPragmas()); nodeLevelReduction = randomBoolean(); READ_DESCRIPTION = """ - \\_LuceneSourceOperator[dataPartitioning = SHARD, maxPageSize = pageSize(), limit = 2147483647] + \\_LuceneSourceOperator[dataPartitioning = SHARD, maxPageSize = pageSize(), limit = 2147483647, scoreMode = COMPLETE_NO_SCORES] \\_ValuesSourceReaderOperator[fields = [pause_me]] \\_AggregationOperator[mode = INITIAL, aggs = sum of longs] \\_ExchangeSinkOperator""".replace("pageSize()", Integer.toString(pageSize())); @@ -448,6 +448,7 @@ protected void doRun() throws Exception { public void testTaskContentsForTopNQuery() throws Exception { READ_DESCRIPTION = ("\\_LuceneTopNSourceOperator[dataPartitioning = SHARD, maxPageSize = pageSize(), limit = 1000, " + + "scoreMode = TOP_DOCS, " + "sorts = [{\"pause_me\":{\"order\":\"asc\",\"missing\":\"_last\",\"unmapped_type\":\"long\"}}]]\n" + "\\_ValuesSourceReaderOperator[fields = [pause_me]]\n" + "\\_ProjectOperator[projection = [1]]\n" @@ -482,7 +483,7 @@ public void testTaskContentsForTopNQuery() throws Exception { public void testTaskContentsForLimitQuery() throws Exception { String limit = Integer.toString(randomIntBetween(pageSize() + 1, 2 * numberOfDocs())); READ_DESCRIPTION = """ - \\_LuceneSourceOperator[dataPartitioning = SHARD, maxPageSize = pageSize(), limit = limit()] + \\_LuceneSourceOperator[dataPartitioning = SHARD, maxPageSize = pageSize(), limit = limit(), scoreMode = COMPLETE_NO_SCORES] \\_ValuesSourceReaderOperator[fields = [pause_me]] \\_ProjectOperator[projection = [1]] \\_ExchangeSinkOperator""".replace("pageSize()", Integer.toString(pageSize())).replace("limit()", limit); @@ -511,7 +512,7 @@ public void testTaskContentsForLimitQuery() throws Exception { public void testTaskContentsForGroupingStatsQuery() throws Exception { READ_DESCRIPTION = """ - \\_LuceneSourceOperator[dataPartitioning = SHARD, maxPageSize = pageSize(), limit = 2147483647] + \\_LuceneSourceOperator[dataPartitioning = SHARD, maxPageSize = pageSize(), limit = 2147483647, scoreMode = COMPLETE_NO_SCORES] \\_ValuesSourceReaderOperator[fields = [foo]] \\_OrdinalsGroupingOperator(aggs = max of longs) \\_ExchangeSinkOperator""".replace("pageSize()", Integer.toString(pageSize())); diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/LookupFromIndexIT.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/LookupFromIndexIT.java index 5c0c13b48df3b..3b9359fe66d40 100644 --- a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/LookupFromIndexIT.java +++ b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/LookupFromIndexIT.java @@ -148,7 +148,8 @@ public void testLookupIndex() throws IOException { DataPartitioning.SEGMENT, 1, 10000, - DocIdSetIterator.NO_MORE_DOCS + DocIdSetIterator.NO_MORE_DOCS, + false // no scoring ); ValuesSourceReaderOperator.Factory reader = new ValuesSourceReaderOperator.Factory( List.of( diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/MatchFunctionIT.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/MatchFunctionIT.java new file mode 100644 index 0000000000000..99f7d48a0d636 --- /dev/null +++ b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/MatchFunctionIT.java @@ -0,0 +1,299 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.plugin; + +import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.action.index.IndexRequest; +import org.elasticsearch.action.support.WriteRequest; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.xpack.esql.VerificationException; +import org.elasticsearch.xpack.esql.action.AbstractEsqlIntegTestCase; +import org.elasticsearch.xpack.esql.action.EsqlCapabilities; +import org.elasticsearch.xpack.esql.action.EsqlQueryRequest; +import org.elasticsearch.xpack.esql.action.EsqlQueryResponse; +import org.junit.Before; + +import java.util.List; + +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; +import static org.hamcrest.CoreMatchers.containsString; + +//@TestLogging(value = "org.elasticsearch.xpack.esql:TRACE,org.elasticsearch.compute:TRACE", reason = "debug") +public class MatchFunctionIT extends AbstractEsqlIntegTestCase { + + @Before + public void setupIndex() { + createAndPopulateIndex(); + } + + @Override + protected EsqlQueryResponse run(EsqlQueryRequest request) { + assumeTrue("match function capability not available", EsqlCapabilities.Cap.MATCH_FUNCTION.isEnabled()); + return super.run(request); + } + + public void testSimpleWhereMatch() { + var query = """ + FROM test + | WHERE match(content, "fox") + | KEEP id + | SORT id + """; + + try (var resp = run(query)) { + assertColumnNames(resp.columns(), List.of("id")); + assertColumnTypes(resp.columns(), List.of("integer")); + assertValues(resp.values(), List.of(List.of(1), List.of(6))); + } + } + + public void testCombinedWhereMatch() { + var query = """ + FROM test + | WHERE match(content, "fox") AND id > 5 + | KEEP id + | SORT id + """; + + try (var resp = run(query)) { + assertColumnNames(resp.columns(), List.of("id")); + assertColumnTypes(resp.columns(), List.of("integer")); + assertValues(resp.values(), List.of(List.of(6))); + } + } + + public void testMultipleMatch() { + var query = """ + FROM test + | WHERE match(content, "fox") AND match(content, "brown") + | KEEP id + | SORT id + """; + + try (var resp = run(query)) { + assertColumnNames(resp.columns(), List.of("id")); + assertColumnTypes(resp.columns(), List.of("integer")); + assertValues(resp.values(), List.of(List.of(1), List.of(6))); + } + } + + public void testMultipleWhereMatch() { + var query = """ + FROM test + | WHERE match(content, "fox") AND match(content, "brown") + | EVAL summary = CONCAT("document with id: ", to_str(id), "and content: ", content) + | SORT summary + | LIMIT 4 + | WHERE match(content, "brown fox") + | KEEP id + """; + + var error = expectThrows(ElasticsearchException.class, () -> run(query)); + assertThat(error.getMessage(), containsString("[MATCH] function cannot be used after LIMIT")); + } + + public void testNotWhereMatch() { + var query = """ + FROM test + | WHERE NOT match(content, "brown fox") + | KEEP id + | SORT id + """; + + try (var resp = run(query)) { + assertColumnNames(resp.columns(), List.of("id")); + assertColumnTypes(resp.columns(), List.of("integer")); + assertValues(resp.values(), List.of(List.of(5))); + } + } + + public void testWhereMatchWithScoring() { + assumeTrue("'METADATA _score' is disabled", EsqlCapabilities.Cap.METADATA_SCORE.isEnabled()); + var query = """ + FROM test + METADATA _score + | WHERE match(content, "fox") + | KEEP id, _score + | SORT id ASC + """; + + try (var resp = run(query)) { + assertColumnNames(resp.columns(), List.of("id", "_score")); + assertColumnTypes(resp.columns(), List.of("integer", "double")); + assertValues(resp.values(), List.of(List.of(1, 1.156558871269226), List.of(6, 0.9114001989364624))); + } + } + + public void testWhereMatchWithScoringDifferentSort() { + assumeTrue("'METADATA _score' is disabled", EsqlCapabilities.Cap.METADATA_SCORE.isEnabled()); + var query = """ + FROM test + METADATA _score + | WHERE match(content, "fox") + | KEEP id, _score + | SORT id DESC + """; + + try (var resp = run(query)) { + assertColumnNames(resp.columns(), List.of("id", "_score")); + assertColumnTypes(resp.columns(), List.of("integer", "double")); + assertValues(resp.values(), List.of(List.of(6, 0.9114001989364624), List.of(1, 1.156558871269226))); + } + } + + public void testWhereMatchWithScoringSortScore() { + assumeTrue("'METADATA _score' is disabled", EsqlCapabilities.Cap.METADATA_SCORE.isEnabled()); + var query = """ + FROM test + METADATA _score + | WHERE match(content, "fox") + | KEEP id, _score + | SORT _score DESC + """; + + try (var resp = run(query)) { + assertColumnNames(resp.columns(), List.of("id", "_score")); + assertColumnTypes(resp.columns(), List.of("integer", "double")); + assertValues(resp.values(), List.of(List.of(1, 1.156558871269226), List.of(6, 0.9114001989364624))); + } + } + + public void testWhereMatchWithScoringNoSort() { + assumeTrue("'METADATA _score' is disabled", EsqlCapabilities.Cap.METADATA_SCORE.isEnabled()); + var query = """ + FROM test + METADATA _score + | WHERE content:"fox" + | KEEP id, _score + """; + + try (var resp = run(query)) { + assertColumnNames(resp.columns(), List.of("id", "_score")); + assertColumnTypes(resp.columns(), List.of("integer", "double")); + assertValuesInAnyOrder(resp.values(), List.of(List.of(1, 1.156558871269226), List.of(6, 0.9114001989364624))); + } + } + + public void testNonExistingColumn() { + var query = """ + FROM test + | WHERE something:"fox" + """; + + var error = expectThrows(VerificationException.class, () -> run(query)); + assertThat(error.getMessage(), containsString("Unknown column [something]")); + } + + public void testWhereMatchEvalColumn() { + var query = """ + FROM test + | EVAL upper_content = to_upper(content) + | WHERE upper_content:"FOX" + | KEEP id + """; + + var error = expectThrows(VerificationException.class, () -> run(query)); + assertThat( + error.getMessage(), + containsString("[:] operator cannot operate on [upper_content], which is not a field from an index mapping") + ); + } + + public void testWhereMatchOverWrittenColumn() { + var query = """ + FROM test + | DROP content + | EVAL content = CONCAT("document with ID ", to_str(id)) + | WHERE content:"document" + """; + + var error = expectThrows(VerificationException.class, () -> run(query)); + assertThat( + error.getMessage(), + containsString("[:] operator cannot operate on [content], which is not a field from an index mapping") + ); + } + + public void testWhereMatchAfterStats() { + var query = """ + FROM test + | STATS count(*) + | WHERE content:"fox" + """; + + var error = expectThrows(VerificationException.class, () -> run(query)); + assertThat(error.getMessage(), containsString("Unknown column [content]")); + } + + public void testWhereMatchWithFunctions() { + var query = """ + FROM test + | WHERE content:"fox" OR to_upper(content) == "FOX" + """; + var error = expectThrows(ElasticsearchException.class, () -> run(query)); + assertThat( + error.getMessage(), + containsString( + "Invalid condition [content:\"fox\" OR to_upper(content) == \"FOX\"]. " + + "[:] operator can't be used as part of an or condition" + ) + ); + } + + public void testWhereMatchWithRow() { + var query = """ + ROW content = "a brown fox" + | WHERE content:"fox" + """; + + var error = expectThrows(ElasticsearchException.class, () -> run(query)); + assertThat( + error.getMessage(), + containsString("[:] operator cannot operate on [\"a brown fox\"], which is not a field from an index mapping") + ); + } + + public void testMatchWithinEval() { + var query = """ + FROM test + | EVAL matches_query = content:"fox" + """; + + var error = expectThrows(VerificationException.class, () -> run(query)); + assertThat(error.getMessage(), containsString("[:] operator is only supported in WHERE commands")); + } + + public void testMatchWithNonTextField() { + var query = """ + FROM test + | WHERE id:"fox" + """; + + var error = expectThrows(VerificationException.class, () -> run(query)); + assertThat(error.getMessage(), containsString("first argument of [id:\"fox\"] must be [string], found value [id] type [integer]")); + } + + private void createAndPopulateIndex() { + var indexName = "test"; + var client = client().admin().indices(); + var CreateRequest = client.prepareCreate(indexName) + .setSettings(Settings.builder().put("index.number_of_shards", 1)) + .setMapping("id", "type=integer", "content", "type=text"); + assertAcked(CreateRequest); + client().prepareBulk() + .add(new IndexRequest(indexName).id("1").source("id", 1, "content", "This is a brown fox")) + .add(new IndexRequest(indexName).id("2").source("id", 2, "content", "This is a brown dog")) + .add(new IndexRequest(indexName).id("3").source("id", 3, "content", "This dog is really brown")) + .add(new IndexRequest(indexName).id("4").source("id", 4, "content", "The dog is brown but this document is very very long")) + .add(new IndexRequest(indexName).id("5").source("id", 5, "content", "There is also a white cat")) + .add(new IndexRequest(indexName).id("6").source("id", 6, "content", "The quick brown fox jumps over the lazy dog")) + .setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE) + .get(); + ensureYellow(indexName); + } +} diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/MatchOperatorIT.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/MatchOperatorIT.java index 3b647583f1129..6a360eb319abb 100644 --- a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/MatchOperatorIT.java +++ b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/MatchOperatorIT.java @@ -14,6 +14,7 @@ import org.elasticsearch.test.junit.annotations.TestLogging; import org.elasticsearch.xpack.esql.VerificationException; import org.elasticsearch.xpack.esql.action.AbstractEsqlIntegTestCase; +import org.elasticsearch.xpack.esql.action.EsqlCapabilities; import org.junit.Before; import java.util.List; @@ -105,6 +106,56 @@ public void testNotWhereMatch() { } } + public void testWhereMatchWithScoring() { + assumeTrue("'METADATA _score' is disabled", EsqlCapabilities.Cap.METADATA_SCORE.isEnabled()); + var query = """ + FROM test + METADATA _score + | WHERE content:"fox" + | KEEP id, _score + | SORT id ASC + """; + + try (var resp = run(query)) { + assertColumnNames(resp.columns(), List.of("id", "_score")); + assertColumnTypes(resp.columns(), List.of("integer", "double")); + assertValues(resp.values(), List.of(List.of(1, 1.156558871269226), List.of(6, 0.9114001989364624))); + } + } + + public void testWhereMatchWithScoringDifferentSort() { + assumeTrue("'METADATA _score' is disabled", EsqlCapabilities.Cap.METADATA_SCORE.isEnabled()); + var query = """ + FROM test + METADATA _score + | WHERE content:"fox" + | KEEP id, _score + | SORT id + """; + + try (var resp = run(query)) { + assertColumnNames(resp.columns(), List.of("id", "_score")); + assertColumnTypes(resp.columns(), List.of("integer", "double")); + assertValues(resp.values(), List.of(List.of(1, 1.156558871269226), List.of(6, 0.9114001989364624))); + } + } + + public void testWhereMatchWithScoringNoSort() { + assumeTrue("'METADATA _score' is disabled", EsqlCapabilities.Cap.METADATA_SCORE.isEnabled()); + var query = """ + FROM test + METADATA _score + | WHERE content:"fox" + | KEEP id, _score + """; + + try (var resp = run(query)) { + assertColumnNames(resp.columns(), List.of("id", "_score")); + assertColumnTypes(resp.columns(), List.of("integer", "double")); + assertValuesInAnyOrder(resp.values(), List.of(List.of(1, 1.156558871269226), List.of(6, 0.9114001989364624))); + } + } + public void testNonExistingColumn() { var query = """ FROM test diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/QueryStringIT.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/QueryStringIT.java index 03af16d29e9b4..a3d1ac931528c 100644 --- a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/QueryStringIT.java +++ b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/QueryStringIT.java @@ -13,6 +13,7 @@ import org.elasticsearch.index.query.QueryShardException; import org.elasticsearch.xpack.esql.VerificationException; import org.elasticsearch.xpack.esql.action.AbstractEsqlIntegTestCase; +import org.elasticsearch.xpack.esql.action.EsqlCapabilities; import org.junit.Before; import java.util.List; @@ -137,4 +138,99 @@ private void createAndPopulateIndex() { .get(); ensureYellow(indexName); } + + public void testWhereQstrWithScoring() { + assumeTrue("'METADATA _score' is disabled", EsqlCapabilities.Cap.METADATA_SCORE.isEnabled()); + var query = """ + FROM test + METADATA _score + | WHERE qstr("content: fox") + | KEEP id, _score + """; + + try (var resp = run(query)) { + assertColumnNames(resp.columns(), List.of("id", "_score")); + assertColumnTypes(resp.columns(), List.of("integer", "double")); + assertValuesInAnyOrder( + resp.values(), + List.of( + List.of(2, 0.3028995096683502), + List.of(3, 0.3028995096683502), + List.of(4, 0.2547692656517029), + List.of(5, 0.28161853551864624) + ) + ); + + } + } + + public void testWhereQstrWithScoringSorted() { + assumeTrue("'METADATA _score' is disabled", EsqlCapabilities.Cap.METADATA_SCORE.isEnabled()); + var query = """ + FROM test + METADATA _score + | WHERE qstr("content:fox fox") + | KEEP id, _score + | SORT _score DESC + """; + + try (var resp = run(query)) { + assertColumnNames(resp.columns(), List.of("id", "_score")); + assertColumnTypes(resp.columns(), List.of("integer", "double")); + assertValues( + resp.values(), + List.of( + List.of(3, 1.5605685710906982), + List.of(2, 0.6057990193367004), + List.of(5, 0.5632370710372925), + List.of(4, 0.5095385313034058) + ) + ); + + } + } + + public void testWhereQstrWithScoringNoSort() { + assumeTrue("'METADATA _score' is disabled", EsqlCapabilities.Cap.METADATA_SCORE.isEnabled()); + var query = """ + FROM test + METADATA _score + | WHERE qstr("content: fox") + | KEEP id, _score + """; + + try (var resp = run(query)) { + assertColumnNames(resp.columns(), List.of("id", "_score")); + assertColumnTypes(resp.columns(), List.of("integer", "double")); + assertValuesInAnyOrder( + resp.values(), + List.of( + List.of(2, 0.3028995096683502), + List.of(3, 0.3028995096683502), + List.of(4, 0.2547692656517029), + List.of(5, 0.28161853551864624) + ) + ); + } + } + + public void testWhereQstrWithNonPushableAndScoring() { + assumeTrue("'METADATA _score' is disabled", EsqlCapabilities.Cap.METADATA_SCORE.isEnabled()); + var query = """ + FROM test + METADATA _score + | WHERE qstr("content: fox") + AND abs(id) > 0 + | EVAL c_score = ceil(_score) + | KEEP id, c_score + | SORT id DESC + | LIMIT 2 + """; + + try (var resp = run(query)) { + assertColumnNames(resp.columns(), List.of("id", "c_score")); + assertColumnTypes(resp.columns(), List.of("integer", "double")); + assertValuesInAnyOrder(resp.values(), List.of(List.of(5, 1.0), List.of(4, 1.0))); + } + } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index d8004f73f613f..9bd4211855699 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -539,7 +539,12 @@ public enum Cap { /** * Fix for https://github.com/elastic/elasticsearch/issues/114714, again */ - FIX_STATS_BY_FOLDABLE_EXPRESSION_2,; + FIX_STATS_BY_FOLDABLE_EXPRESSION_2, + + /** + * Support the "METADATA _score" directive to enable _score column. + */ + METADATA_SCORE(Build.current().isSnapshot()); private final boolean enabled; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java index 2be13398dab2f..5f8c011cff53a 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java @@ -19,6 +19,7 @@ import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.expression.Expressions; import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; +import org.elasticsearch.xpack.esql.core.expression.MetadataAttribute; import org.elasticsearch.xpack.esql.core.expression.NameId; import org.elasticsearch.xpack.esql.core.expression.NamedExpression; import org.elasticsearch.xpack.esql.core.expression.TypeResolutions; @@ -221,6 +222,7 @@ else if (p instanceof Lookup lookup) { checkFullTextQueryFunctions(p, failures); }); checkRemoteEnrich(plan, failures); + checkMetadataScoreNameReserved(plan, failures); if (failures.isEmpty()) { checkLicense(plan, licenseState, failures); @@ -234,6 +236,13 @@ else if (p instanceof Lookup lookup) { return failures; } + private static void checkMetadataScoreNameReserved(LogicalPlan p, Set failures) { + // _score can only be set as metadata attribute + if (p.inputSet().stream().anyMatch(a -> MetadataAttribute.SCORE.equals(a.name()) && (a instanceof MetadataAttribute) == false)) { + failures.add(fail(p, "`" + MetadataAttribute.SCORE + "` is a reserved METADATA attribute")); + } + } + private void checkSort(LogicalPlan p, Set failures) { if (p instanceof OrderBy ob) { ob.order().forEach(o -> { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/LucenePushdownPredicates.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/LucenePushdownPredicates.java index feb8717f007b7..8046d6bc56607 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/LucenePushdownPredicates.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/LucenePushdownPredicates.java @@ -9,6 +9,7 @@ import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; +import org.elasticsearch.xpack.esql.core.expression.MetadataAttribute; import org.elasticsearch.xpack.esql.core.type.DataType; import org.elasticsearch.xpack.esql.stats.SearchStats; @@ -59,6 +60,10 @@ default boolean isPushableFieldAttribute(Expression exp) { return false; } + default boolean isPushableMetadataAttribute(Expression exp) { + return exp instanceof MetadataAttribute ma && ma.name().equals(MetadataAttribute.SCORE); + } + /** * The default implementation of this has no access to SearchStats, so it can only make decisions based on the FieldAttribute itself. * In particular, it assumes TEXT fields have no exact subfields (underlying keyword field), diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/PushTopNToSource.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/PushTopNToSource.java index 925e144b69fcc..2b531257e594a 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/PushTopNToSource.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/PushTopNToSource.java @@ -14,6 +14,7 @@ import org.elasticsearch.xpack.esql.core.expression.AttributeMap; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; +import org.elasticsearch.xpack.esql.core.expression.MetadataAttribute; import org.elasticsearch.xpack.esql.core.expression.NameId; import org.elasticsearch.xpack.esql.core.expression.ReferenceAttribute; import org.elasticsearch.xpack.esql.expression.Order; @@ -57,6 +58,7 @@ * */ public class PushTopNToSource extends PhysicalOptimizerRules.ParameterizedOptimizerRule { + @Override protected PhysicalPlan rule(TopNExec topNExec, LocalPhysicalOptimizerContext ctx) { Pushable pushable = evaluatePushable(topNExec, LucenePushdownPredicates.from(ctx.searchStats())); @@ -155,6 +157,8 @@ && canPushDownOrders(topNExec.order(), lucenePushdownPredicates)) { order.nullsPosition() ) ); + } else if (lucenePushdownPredicates.isPushableMetadataAttribute(order.child())) { + pushableSorts.add(new EsQueryExec.ScoreSort(order.direction())); } else if (order.child() instanceof ReferenceAttribute referenceAttribute) { Attribute resolvedAttribute = aliasReplacedBy.resolve(referenceAttribute, referenceAttribute); if (distances.containsKey(resolvedAttribute.id())) { @@ -192,13 +196,23 @@ && canPushDownOrders(topNExec.order(), lucenePushdownPredicates)) { private static boolean canPushDownOrders(List orders, LucenePushdownPredicates lucenePushdownPredicates) { // allow only exact FieldAttributes (no expressions) for sorting - return orders.stream().allMatch(o -> lucenePushdownPredicates.isPushableFieldAttribute(o.child())); + return orders.stream() + .allMatch( + o -> lucenePushdownPredicates.isPushableFieldAttribute(o.child()) + || lucenePushdownPredicates.isPushableMetadataAttribute(o.child()) + ); } private static List buildFieldSorts(List orders) { List sorts = new ArrayList<>(orders.size()); for (Order o : orders) { - sorts.add(new EsQueryExec.FieldSort(((FieldAttribute) o.child()).exactAttribute(), o.direction(), o.nullsPosition())); + if (o.child() instanceof FieldAttribute fa) { + sorts.add(new EsQueryExec.FieldSort(fa.exactAttribute(), o.direction(), o.nullsPosition())); + } else if (o.child() instanceof MetadataAttribute ma && MetadataAttribute.SCORE.equals(ma.name())) { + sorts.add(new EsQueryExec.ScoreSort(o.direction())); + } else { + assert false : "unexpected ordering on expression type " + o.child().getClass(); + } } return sorts; } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/ReplaceSourceAttributes.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/ReplaceSourceAttributes.java index 74ea6f99e5e59..11e386ddd046c 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/ReplaceSourceAttributes.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/ReplaceSourceAttributes.java @@ -16,6 +16,7 @@ import org.elasticsearch.xpack.esql.plan.physical.EsSourceExec; import org.elasticsearch.xpack.esql.plan.physical.PhysicalPlan; +import java.util.ArrayList; import java.util.List; import static org.elasticsearch.xpack.esql.optimizer.rules.logical.OptimizerRules.TransformDirection.UP; @@ -29,6 +30,8 @@ public ReplaceSourceAttributes() { @Override protected PhysicalPlan rule(EsSourceExec plan) { var docId = new FieldAttribute(plan.source(), EsQueryExec.DOC_ID_FIELD.getName(), EsQueryExec.DOC_ID_FIELD); + final List attributes = new ArrayList<>(); + attributes.add(docId); if (plan.indexMode() == IndexMode.TIME_SERIES) { Attribute tsid = null, timestamp = null; for (Attribute attr : plan.output()) { @@ -42,9 +45,14 @@ protected PhysicalPlan rule(EsSourceExec plan) { if (tsid == null || timestamp == null) { throw new IllegalStateException("_tsid or @timestamp are missing from the time-series source"); } - return new EsQueryExec(plan.source(), plan.index(), plan.indexMode(), List.of(docId, tsid, timestamp), plan.query()); - } else { - return new EsQueryExec(plan.source(), plan.index(), plan.indexMode(), List.of(docId), plan.query()); + attributes.add(tsid); + attributes.add(timestamp); } + plan.output().forEach(attr -> { + if (attr instanceof MetadataAttribute ma && ma.name().equals(MetadataAttribute.SCORE)) { + attributes.add(ma); + } + }); + return new EsQueryExec(plan.source(), plan.index(), plan.indexMode(), attributes, plan.query()); } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/LogicalPlanBuilder.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/LogicalPlanBuilder.java index 99e03b3653f79..24398afa18010 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/LogicalPlanBuilder.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/LogicalPlanBuilder.java @@ -16,6 +16,7 @@ import org.elasticsearch.dissect.DissectParser; import org.elasticsearch.index.IndexMode; import org.elasticsearch.xpack.esql.VerificationException; +import org.elasticsearch.xpack.esql.action.EsqlCapabilities; import org.elasticsearch.xpack.esql.common.Failure; import org.elasticsearch.xpack.esql.core.expression.Alias; import org.elasticsearch.xpack.esql.core.expression.Attribute; @@ -276,7 +277,8 @@ public LogicalPlan visitFromCommand(EsqlBaseParser.FromCommandContext ctx) { for (var c : metadataOptionContext.UNQUOTED_SOURCE()) { String id = c.getText(); Source src = source(c); - if (MetadataAttribute.isSupported(id) == false) { + if (MetadataAttribute.isSupported(id) == false // TODO: drop check below once METADATA_SCORE is no longer snapshot-only + || (EsqlCapabilities.Cap.METADATA_SCORE.isEnabled() == false && MetadataAttribute.SCORE.equals(id))) { throw new ParsingException(src, "unsupported metadata field [" + id + "]"); } Attribute a = metadataMap.put(id, MetadataAttribute.create(src, id)); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/EsQueryExec.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/EsQueryExec.java index 82848fb2f1062..267b9e613abef 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/EsQueryExec.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/EsQueryExec.java @@ -15,6 +15,7 @@ import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.search.sort.FieldSortBuilder; import org.elasticsearch.search.sort.GeoDistanceSortBuilder; +import org.elasticsearch.search.sort.ScoreSortBuilder; import org.elasticsearch.search.sort.SortBuilder; import org.elasticsearch.search.sort.SortOrder; import org.elasticsearch.xpack.esql.core.expression.Attribute; @@ -94,6 +95,19 @@ public SortBuilder sortBuilder() { } } + public record ScoreSort(Order.OrderDirection direction) implements Sort { + @Override + public SortBuilder sortBuilder() { + return new ScoreSortBuilder(); + } + + @Override + public FieldAttribute field() { + // TODO: refactor this: not all Sorts are backed by FieldAttributes + return null; + } + } + public EsQueryExec(Source source, EsIndex index, IndexMode indexMode, List attributes, QueryBuilder query) { this(source, index, indexMode, attributes, query, null, null, null); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java index ab0d68b152262..15f5b6579098d 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java @@ -51,6 +51,7 @@ import org.elasticsearch.xpack.esql.core.expression.Attribute; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; +import org.elasticsearch.xpack.esql.core.expression.MetadataAttribute; import org.elasticsearch.xpack.esql.core.type.DataType; import org.elasticsearch.xpack.esql.core.type.MultiTypeEsField; import org.elasticsearch.xpack.esql.expression.function.scalar.convert.AbstractConvertFunction; @@ -165,7 +166,10 @@ public final PhysicalOperation sourcePhysicalOperation(EsQueryExec esQueryExec, assert esQueryExec.estimatedRowSize() != null : "estimated row size not initialized"; int rowEstimatedSize = esQueryExec.estimatedRowSize(); int limit = esQueryExec.limit() != null ? (Integer) esQueryExec.limit().fold() : NO_LIMIT; - if (sorts != null && sorts.isEmpty() == false) { + boolean scoring = esQueryExec.attrs() + .stream() + .anyMatch(a -> a instanceof MetadataAttribute && a.name().equals(MetadataAttribute.SCORE)); + if ((sorts != null && sorts.isEmpty() == false)) { List> sortBuilders = new ArrayList<>(sorts.size()); for (Sort sort : sorts) { sortBuilders.add(sort.sortBuilder()); @@ -177,7 +181,8 @@ public final PhysicalOperation sourcePhysicalOperation(EsQueryExec esQueryExec, context.queryPragmas().taskConcurrency(), context.pageSize(rowEstimatedSize), limit, - sortBuilders + sortBuilders, + scoring ); } else { if (esQueryExec.indexMode() == IndexMode.TIME_SERIES) { @@ -195,7 +200,8 @@ public final PhysicalOperation sourcePhysicalOperation(EsQueryExec esQueryExec, context.queryPragmas().dataPartitioning(), context.queryPragmas().taskConcurrency(), context.pageSize(rowEstimatedSize), - limit + limit, + scoring ); } } @@ -273,7 +279,7 @@ public IndexSearcher searcher() { @Override public Optional buildSort(List> sorts) throws IOException { - return SortBuilder.buildSort(sorts, ctx); + return SortBuilder.buildSort(sorts, ctx, false); } @Override diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java index 355073fcc873f..6074601535477 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java @@ -12,6 +12,7 @@ import org.elasticsearch.test.ESTestCase; import org.elasticsearch.xpack.esql.VerificationException; import org.elasticsearch.xpack.esql.action.EsqlCapabilities; +import org.elasticsearch.xpack.esql.core.expression.Attribute; import org.elasticsearch.xpack.esql.core.type.DataType; import org.elasticsearch.xpack.esql.core.type.EsField; import org.elasticsearch.xpack.esql.core.type.InvalidMappedField; @@ -21,6 +22,7 @@ import org.elasticsearch.xpack.esql.parser.EsqlParser; import org.elasticsearch.xpack.esql.parser.QueryParam; import org.elasticsearch.xpack.esql.parser.QueryParams; +import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; import java.util.ArrayList; import java.util.LinkedHashMap; @@ -1754,6 +1756,29 @@ public void testToDatePeriodToTimeDurationWithInvalidType() { ); } + public void testNonMetadataScore() { + assumeTrue("'METADATA _score' is disabled", EsqlCapabilities.Cap.METADATA_SCORE.isEnabled()); + assertEquals("1:12: `_score` is a reserved METADATA attribute", error("from foo | eval _score = 10")); + + assertEquals( + "1:48: `_score` is a reserved METADATA attribute", + error("from foo metadata _score | where qstr(\"bar\") | eval _score = _score + 1") + ); + } + + public void testScoreRenaming() { + assumeTrue("'METADATA _score' is disabled", EsqlCapabilities.Cap.METADATA_SCORE.isEnabled()); + assertEquals("1:33: `_score` is a reserved METADATA attribute", error("from foo METADATA _id, _score | rename _id as _score")); + + assertTrue(passes("from foo metadata _score | rename _score as foo").stream().anyMatch(a -> a.name().equals("foo"))); + } + + private List passes(String query) { + LogicalPlan logicalPlan = defaultAnalyzer.analyze(parser.createStatement(query)); + assertTrue(logicalPlan.resolved()); + return logicalPlan.output(); + } + public void testIntervalAsString() { // DateTrunc for (String interval : List.of("1 minu", "1 dy", "1.5 minutes", "0.5 days", "minutes 1", "day 5")) { diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizerTests.java index f3ba11457a715..1f131f79c3d0e 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizerTests.java @@ -36,6 +36,7 @@ import org.elasticsearch.xpack.esql.EsqlTestUtils.TestConfigurableSearchStats; import org.elasticsearch.xpack.esql.EsqlTestUtils.TestConfigurableSearchStats.Config; import org.elasticsearch.xpack.esql.VerificationException; +import org.elasticsearch.xpack.esql.action.EsqlCapabilities; import org.elasticsearch.xpack.esql.analysis.Analyzer; import org.elasticsearch.xpack.esql.analysis.AnalyzerContext; import org.elasticsearch.xpack.esql.analysis.EnrichResolution; @@ -63,6 +64,7 @@ import org.elasticsearch.xpack.esql.expression.function.aggregate.SpatialAggregateFunction; import org.elasticsearch.xpack.esql.expression.function.aggregate.SpatialCentroid; import org.elasticsearch.xpack.esql.expression.function.aggregate.Sum; +import org.elasticsearch.xpack.esql.expression.function.fulltext.Match; import org.elasticsearch.xpack.esql.expression.function.scalar.math.Round; import org.elasticsearch.xpack.esql.expression.function.scalar.spatial.SpatialContains; import org.elasticsearch.xpack.esql.expression.function.scalar.spatial.SpatialDisjoint; @@ -6581,6 +6583,66 @@ public void testLookupThenTopN() { ); } + public void testScore() { + assumeTrue("'METADATA _score' is disabled", EsqlCapabilities.Cap.METADATA_SCORE.isEnabled()); + var plan = physicalPlan(""" + from test metadata _score + | where match(first_name, "john") + | keep _score + """); + + ProjectExec outerProject = as(plan, ProjectExec.class); + LimitExec limitExec = as(outerProject.child(), LimitExec.class); + ExchangeExec exchange = as(limitExec.child(), ExchangeExec.class); + FragmentExec frag = as(exchange.child(), FragmentExec.class); + + LogicalPlan opt = logicalOptimizer.optimize(frag.fragment()); + Limit limit = as(opt, Limit.class); + Filter filter = as(limit.child(), Filter.class); + + Match match = as(filter.condition(), Match.class); + assertTrue(match.field() instanceof FieldAttribute); + assertEquals("first_name", ((FieldAttribute) match.field()).field().getName()); + + EsRelation esRelation = as(filter.child(), EsRelation.class); + assertTrue(esRelation.optimized()); + assertTrue(esRelation.resolved()); + assertTrue(esRelation.output().stream().anyMatch(a -> a.name().equals(MetadataAttribute.SCORE) && a instanceof MetadataAttribute)); + } + + public void testScoreTopN() { + assumeTrue("'METADATA _score' is disabled", EsqlCapabilities.Cap.METADATA_SCORE.isEnabled()); + var plan = physicalPlan(""" + from test metadata _score + | where match(first_name, "john") + | keep _score + | sort _score desc + """); + + ProjectExec projectExec = as(plan, ProjectExec.class); + TopNExec topNExec = as(projectExec.child(), TopNExec.class); + ExchangeExec exchange = as(topNExec.child(), ExchangeExec.class); + FragmentExec frag = as(exchange.child(), FragmentExec.class); + + LogicalPlan opt = logicalOptimizer.optimize(frag.fragment()); + TopN topN = as(opt, TopN.class); + List order = topN.order(); + Order scoreOrer = order.getFirst(); + assertEquals(Order.OrderDirection.DESC, scoreOrer.direction()); + Expression child = scoreOrer.child(); + assertTrue(child instanceof MetadataAttribute ma && ma.name().equals(MetadataAttribute.SCORE)); + Filter filter = as(topN.child(), Filter.class); + + Match match = as(filter.condition(), Match.class); + assertTrue(match.field() instanceof FieldAttribute); + assertEquals("first_name", ((FieldAttribute) match.field()).field().getName()); + + EsRelation esRelation = as(filter.child(), EsRelation.class); + assertTrue(esRelation.optimized()); + assertTrue(esRelation.resolved()); + assertTrue(esRelation.output().stream().anyMatch(a -> a.name().equals(MetadataAttribute.SCORE) && a instanceof MetadataAttribute)); + } + @SuppressWarnings("SameParameterValue") private static void assertFilterCondition( Filter filter, diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/PushTopNToSourceTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/PushTopNToSourceTests.java index 98f0af8e4b8e6..2429bcb1a1b04 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/PushTopNToSourceTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/PushTopNToSourceTests.java @@ -20,6 +20,7 @@ import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; import org.elasticsearch.xpack.esql.core.expression.Literal; +import org.elasticsearch.xpack.esql.core.expression.MetadataAttribute; import org.elasticsearch.xpack.esql.core.expression.Nullability; import org.elasticsearch.xpack.esql.core.expression.ReferenceAttribute; import org.elasticsearch.xpack.esql.core.tree.Source; @@ -64,6 +65,13 @@ public void testSimpleSortField() { assertNoPushdownSort(query.asTimeSeries(), "for time series index mode"); } + public void testSimpleScoreSortField() { + // FROM index METADATA _score | SORT _score | LIMIT 10 + var query = from("index").metadata("_score", DOUBLE, false).scoreSort().limit(10); + assertPushdownSort(query); + assertNoPushdownSort(query.asTimeSeries(), "for time series index mode"); + } + public void testSimpleSortMultipleFields() { // FROM index | SORT field, integer, double | LIMIT 10 var query = from("index").sort("field").sort("integer").sort("double").limit(10); @@ -71,6 +79,13 @@ public void testSimpleSortMultipleFields() { assertNoPushdownSort(query.asTimeSeries(), "for time series index mode"); } + public void testSimpleSortMultipleFieldsAndScore() { + // FROM index | SORT field, integer, double, _score | LIMIT 10 + var query = from("index").metadata("_score", DOUBLE, false).sort("field").sort("integer").sort("double").scoreSort().limit(10); + assertPushdownSort(query); + assertNoPushdownSort(query.asTimeSeries(), "for time series index mode"); + } + public void testSimpleSortFieldAndEvalLiteral() { // FROM index | EVAL x = 1 | SORT field | LIMIT 10 var query = from("index").eval("x", e -> e.i(1)).sort("field").limit(10); @@ -78,6 +93,13 @@ public void testSimpleSortFieldAndEvalLiteral() { assertNoPushdownSort(query.asTimeSeries(), "for time series index mode"); } + public void testSimpleSortFieldScoreAndEvalLiteral() { + // FROM index METADATA _score | EVAL x = 1 | SORT field, _score | LIMIT 10 + var query = from("index").metadata("_score", DOUBLE, false).eval("x", e -> e.i(1)).sort("field").scoreSort().limit(10); + assertPushdownSort(query, List.of(EvalExec.class, EsQueryExec.class)); + assertNoPushdownSort(query.asTimeSeries(), "for time series index mode"); + } + public void testSimpleSortFieldWithAlias() { // FROM index | EVAL x = field | SORT field | LIMIT 10 var query = from("index").eval("x", b -> b.field("field")).sort("field").limit(10); @@ -98,6 +120,21 @@ public void testSimpleSortMultipleFieldsWithAliases() { assertNoPushdownSort(query.asTimeSeries(), "for time series index mode"); } + public void testSimpleSortMultipleFieldsWithAliasesAndScore() { + // FROM index | EVAL x = field, y = integer, z = double | SORT field, integer, double, _score | LIMIT 10 + var query = from("index").metadata("_score", DOUBLE, false) + .eval("x", b -> b.field("field")) + .eval("y", b -> b.field("integer")) + .eval("z", b -> b.field("double")) + .sort("field") + .sort("integer") + .sort("double") + .scoreSort() + .limit(10); + assertPushdownSort(query, List.of(EvalExec.class, EsQueryExec.class)); + assertNoPushdownSort(query.asTimeSeries(), "for time series index mode"); + } + public void testSimpleSortFieldAsAlias() { // FROM index | EVAL x = field | SORT x | LIMIT 10 var query = from("index").eval("x", b -> b.field("field")).sort("x").limit(10); @@ -105,6 +142,13 @@ public void testSimpleSortFieldAsAlias() { assertNoPushdownSort(query.asTimeSeries(), "for time series index mode"); } + public void testSimpleSortFieldAsAliasAndScore() { + // FROM index METADATA _score | EVAL x = field | SORT x, _score | LIMIT 10 + var query = from("index").metadata("_score", DOUBLE, false).eval("x", b -> b.field("field")).sort("x").scoreSort().limit(10); + assertPushdownSort(query, Map.of("x", "field"), List.of(EvalExec.class, EsQueryExec.class)); + assertNoPushdownSort(query.asTimeSeries(), "for time series index mode"); + } + public void testSimpleSortFieldAndEvalSumLiterals() { // FROM index | EVAL sum = 1 + 2 | SORT field | LIMIT 10 var query = from("index").eval("sum", b -> b.add(b.i(1), b.i(2))).sort("field").limit(10); @@ -112,6 +156,17 @@ public void testSimpleSortFieldAndEvalSumLiterals() { assertNoPushdownSort(query.asTimeSeries(), "for time series index mode"); } + public void testSimpleSortFieldAndEvalSumLiteralsAndScore() { + // FROM index METADATA _score | EVAL sum = 1 + 2 | SORT field, _score | LIMIT 10 + var query = from("index").metadata("_score", DOUBLE, false) + .eval("sum", b -> b.add(b.i(1), b.i(2))) + .sort("field") + .scoreSort() + .limit(10); + assertPushdownSort(query, List.of(EvalExec.class, EsQueryExec.class)); + assertNoPushdownSort(query.asTimeSeries(), "for time series index mode"); + } + public void testSimpleSortFieldAndEvalSumLiteralAndField() { // FROM index | EVAL sum = 1 + integer | SORT integer | LIMIT 10 var query = from("index").eval("sum", b -> b.add(b.i(1), b.field("integer"))).sort("integer").limit(10); @@ -119,6 +174,17 @@ public void testSimpleSortFieldAndEvalSumLiteralAndField() { assertNoPushdownSort(query.asTimeSeries(), "for time series index mode"); } + public void testSimpleSortFieldAndEvalSumLiteralAndFieldAndScore() { + // FROM index METADATA _score | EVAL sum = 1 + integer | SORT integer, _score | LIMIT 10 + var query = from("index").metadata("_score", DOUBLE, false) + .eval("sum", b -> b.add(b.i(1), b.field("integer"))) + .sort("integer") + .scoreSort() + .limit(10); + assertPushdownSort(query, List.of(EvalExec.class, EsQueryExec.class)); + assertNoPushdownSort(query.asTimeSeries(), "for time series index mode"); + } + public void testSimpleSortEvalSumLiteralAndField() { // FROM index | EVAL sum = 1 + integer | SORT sum | LIMIT 10 var query = from("index").eval("sum", b -> b.add(b.i(1), b.field("integer"))).sort("sum").limit(10); @@ -144,6 +210,14 @@ public void testSortGeoPointField() { assertNoPushdownSort(query.asTimeSeries(), "for time series index mode"); } + public void testSortGeoPointFieldAnsScore() { + // FROM index METADATA _score | SORT location, _score | LIMIT 10 + var query = from("index").metadata("_score", DOUBLE, false).sort("location", Order.OrderDirection.ASC).scoreSort().limit(10); + // NOTE: while geo_point is not sortable, this is checked during logical planning and the physical planner does not know or care + assertPushdownSort(query); + assertNoPushdownSort(query.asTimeSeries(), "for time series index mode"); + } + public void testSortGeoDistanceFunction() { // FROM index | EVAL distance = ST_DISTANCE(location, POINT(1 2)) | SORT distance | LIMIT 10 var query = from("index").eval("distance", b -> b.distance("location", "POINT(1 2)")) @@ -154,6 +228,18 @@ public void testSortGeoDistanceFunction() { assertNoPushdownSort(query.asTimeSeries(), "for time series index mode"); } + public void testSortGeoDistanceFunctionAndScore() { + // FROM index METADATA _score | EVAL distance = ST_DISTANCE(location, POINT(1 2)) | SORT distance, _score | LIMIT 10 + var query = from("index").metadata("_score", DOUBLE, false) + .eval("distance", b -> b.distance("location", "POINT(1 2)")) + .sort("distance", Order.OrderDirection.ASC) + .scoreSort() + .limit(10); + // The pushed-down sort will use the underlying field 'location', not the sorted reference field 'distance' + assertPushdownSort(query, Map.of("distance", "location"), List.of(EvalExec.class, EsQueryExec.class)); + assertNoPushdownSort(query.asTimeSeries(), "for time series index mode"); + } + public void testSortGeoDistanceFunctionInverted() { // FROM index | EVAL distance = ST_DISTANCE(POINT(1 2), location) | SORT distance | LIMIT 10 var query = from("index").eval("distance", b -> b.distance("POINT(1 2)", "location")) @@ -164,6 +250,18 @@ public void testSortGeoDistanceFunctionInverted() { assertNoPushdownSort(query.asTimeSeries(), "for time series index mode"); } + public void testSortGeoDistanceFunctionInvertedAndScore() { + // FROM index METADATA _score | EVAL distance = ST_DISTANCE(POINT(1 2), location) | SORT distance, _score | LIMIT 10 + var query = from("index").metadata("_score", DOUBLE, false) + .eval("distance", b -> b.distance("POINT(1 2)", "location")) + .sort("distance", Order.OrderDirection.ASC) + .scoreSort() + .limit(10); + // The pushed-down sort will use the underlying field 'location', not the sorted reference field 'distance' + assertPushdownSort(query, Map.of("distance", "location"), List.of(EvalExec.class, EsQueryExec.class)); + assertNoPushdownSort(query.asTimeSeries(), "for time series index mode"); + } + public void testSortGeoDistanceFunctionLiterals() { // FROM index | EVAL distance = ST_DISTANCE(POINT(2 1), POINT(1 2)) | SORT distance | LIMIT 10 var query = from("index").eval("distance", b -> b.distance("POINT(2 1)", "POINT(1 2)")) @@ -174,6 +272,18 @@ public void testSortGeoDistanceFunctionLiterals() { assertNoPushdownSort(query.asTimeSeries(), "for time series index mode"); } + public void testSortGeoDistanceFunctionLiteralsAndScore() { + // FROM index METADATA _score | EVAL distance = ST_DISTANCE(POINT(2 1), POINT(1 2)) | SORT distance, _score | LIMIT 10 + var query = from("index").metadata("_score", DOUBLE, false) + .eval("distance", b -> b.distance("POINT(2 1)", "POINT(1 2)")) + .sort("distance", Order.OrderDirection.ASC) + .scoreSort() + .limit(10); + // The pushed-down sort will use the underlying field 'location', not the sorted reference field 'distance' + assertNoPushdownSort(query, "sort on foldable distance function"); + assertNoPushdownSort(query.asTimeSeries(), "for time series index mode"); + } + public void testSortGeoDistanceFunctionAndFieldsWithAliases() { // FROM index | EVAL distance = ST_DISTANCE(location, POINT(1 2)), x = field | SORT distance, field, integer | LIMIT 10 var query = from("index").eval("distance", b -> b.distance("location", "POINT(1 2)")) @@ -187,6 +297,21 @@ public void testSortGeoDistanceFunctionAndFieldsWithAliases() { assertNoPushdownSort(query.asTimeSeries(), "for time series index mode"); } + public void testSortGeoDistanceFunctionAndFieldsWithAliasesAndScore() { + // FROM index | EVAL distance = ST_DISTANCE(location, POINT(1 2)), x = field | SORT distance, field, integer, _score | LIMIT 10 + var query = from("index").metadata("_score", DOUBLE, false) + .eval("distance", b -> b.distance("location", "POINT(1 2)")) + .eval("x", b -> b.field("field")) + .sort("distance", Order.OrderDirection.ASC) + .sort("field", Order.OrderDirection.DESC) + .sort("integer", Order.OrderDirection.DESC) + .scoreSort() + .limit(10); + // The pushed-down sort will use the underlying field 'location', not the sorted reference field 'distance' + assertPushdownSort(query, query.orders, Map.of("distance", "location"), List.of(EvalExec.class, EsQueryExec.class)); + assertNoPushdownSort(query.asTimeSeries(), "for time series index mode"); + } + public void testSortGeoDistanceFunctionAndFieldsAndAliases() { // FROM index | EVAL distance = ST_DISTANCE(location, POINT(1 2)), x = field | SORT distance, x, integer | LIMIT 10 var query = from("index").eval("distance", b -> b.distance("location", "POINT(1 2)")) @@ -200,6 +325,21 @@ public void testSortGeoDistanceFunctionAndFieldsAndAliases() { assertNoPushdownSort(query.asTimeSeries(), "for time series index mode"); } + public void testSortGeoDistanceFunctionAndFieldsAndAliasesAndScore() { + // FROM index | EVAL distance = ST_DISTANCE(location, POINT(1 2)), x = field | SORT distance, x, integer, _score | LIMIT 10 + var query = from("index").metadata("_score", DOUBLE, false) + .eval("distance", b -> b.distance("location", "POINT(1 2)")) + .eval("x", b -> b.field("field")) + .sort("distance", Order.OrderDirection.ASC) + .sort("x", Order.OrderDirection.DESC) + .sort("integer", Order.OrderDirection.DESC) + .scoreSort() + .limit(10); + // The pushed-down sort will use the underlying field 'location', not the sorted reference field 'distance' + assertPushdownSort(query, query.orders, Map.of("distance", "location", "x", "field"), List.of(EvalExec.class, EsQueryExec.class)); + assertNoPushdownSort(query.asTimeSeries(), "for time series index mode"); + } + public void testSortGeoDistanceFunctionAndFieldsAndManyAliases() { // FROM index // | EVAL loc = location, loc2 = loc, loc3 = loc2, distance = ST_DISTANCE(loc3, POINT(1 2)), x = field @@ -219,6 +359,27 @@ public void testSortGeoDistanceFunctionAndFieldsAndManyAliases() { assertNoPushdownSort(query.asTimeSeries(), "for time series index mode"); } + public void testSortGeoDistanceFunctionAndFieldsAndManyAliasesAndScore() { + // FROM index METADATA _score + // | EVAL loc = location, loc2 = loc, loc3 = loc2, distance = ST_DISTANCE(loc3, POINT(1 2)), x = field + // | SORT distance, x, integer, _score + // | LIMIT 10 + var query = from("index").metadata("_score", DOUBLE, false) + .eval("loc", b -> b.field("location")) + .eval("loc2", b -> b.ref("loc")) + .eval("loc3", b -> b.ref("loc2")) + .eval("distance", b -> b.distance("loc3", "POINT(1 2)")) + .eval("x", b -> b.field("field")) + .sort("distance", Order.OrderDirection.ASC) + .sort("x", Order.OrderDirection.DESC) + .sort("integer", Order.OrderDirection.DESC) + .scoreSort() + .limit(10); + // The pushed-down sort will use the underlying field 'location', not the sorted reference field 'distance' + assertPushdownSort(query, Map.of("distance", "location", "x", "field"), List.of(EvalExec.class, EsQueryExec.class)); + assertNoPushdownSort(query.asTimeSeries(), "for time series index mode"); + } + private static void assertPushdownSort(TestPhysicalPlanBuilder builder) { assertPushdownSort(builder, null, List.of(EsQueryExec.class)); } @@ -289,9 +450,12 @@ private static void assertPushdownSort( assertThat("Expect sorts count to match", sorts.size(), is(expectedSorts.size())); for (int i = 0; i < expectedSorts.size(); i++) { String name = ((Attribute) expectedSorts.get(i).child()).name(); - String fieldName = sorts.get(i).field().fieldName(); - assertThat("Expect sort[" + i + "] name to match", fieldName, is(sortName(name, fieldMap))); - assertThat("Expect sort[" + i + "] direction to match", sorts.get(i).direction(), is(expectedSorts.get(i).direction())); + EsQueryExec.Sort sort = sorts.get(i); + if (sort.field() != null) { + String fieldName = sort.field().fieldName(); + assertThat("Expect sort[" + i + "] name to match", fieldName, is(sortName(name, fieldMap))); + } + assertThat("Expect sort[" + i + "] direction to match", sort.direction(), is(expectedSorts.get(i).direction())); } } @@ -317,6 +481,7 @@ static class TestPhysicalPlanBuilder { private final String index; private final LinkedHashMap fields; private final LinkedHashMap refs; + private final LinkedHashMap metadata; private IndexMode indexMode; private final List aliases = new ArrayList<>(); private final List orders = new ArrayList<>(); @@ -327,6 +492,7 @@ private TestPhysicalPlanBuilder(String index, IndexMode indexMode) { this.indexMode = indexMode; this.fields = new LinkedHashMap<>(); this.refs = new LinkedHashMap<>(); + this.metadata = new LinkedHashMap<>(); addSortableFieldAttributes(this.fields); } @@ -346,6 +512,11 @@ static TestPhysicalPlanBuilder from(String index) { return new TestPhysicalPlanBuilder(index, IndexMode.STANDARD); } + TestPhysicalPlanBuilder metadata(String metadataAttribute, DataType dataType, boolean searchable) { + metadata.put(metadataAttribute, new MetadataAttribute(Source.EMPTY, metadataAttribute, dataType, searchable)); + return this; + } + public TestPhysicalPlanBuilder eval(Alias... aliases) { if (orders.isEmpty() == false) { throw new IllegalArgumentException("Eval must be before sort"); @@ -376,6 +547,22 @@ public TestPhysicalPlanBuilder sort(String field) { return sort(field, Order.OrderDirection.ASC); } + public TestPhysicalPlanBuilder scoreSort(Order.OrderDirection direction) { + orders.add( + new Order( + Source.EMPTY, + MetadataAttribute.create(Source.EMPTY, MetadataAttribute.SCORE), + direction, + Order.NullsPosition.LAST + ) + ); + return this; + } + + public TestPhysicalPlanBuilder scoreSort() { + return scoreSort(Order.OrderDirection.DESC); + } + public TestPhysicalPlanBuilder sort(String field, Order.OrderDirection direction) { Attribute attr = refs.get(field); if (attr == null) { From 6b94a91633fc846fe02ac8cf3173d613af27bc01 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Iv=C3=A1n=20Cea=20Fontenla?= Date: Thu, 28 Nov 2024 16:07:07 +0100 Subject: [PATCH 061/139] ESQL: Add nulls support to Categorize (#117655) Handle nulls and empty strings (Which resolve to null) on Categorize grouping function. Also, implement `seenGroupIds()`, which would fail some queries with nulls otherwise. --- docs/changelog/117655.yaml | 5 + .../AbstractCategorizeBlockHash.java | 37 +++++- .../blockhash/CategorizeRawBlockHash.java | 12 +- .../CategorizedIntermediateBlockHash.java | 19 ++- .../blockhash/CategorizeBlockHashTests.java | 72 +++++++---- .../src/main/resources/categorize.csv-spec | 122 ++++++++++-------- .../xpack/esql/action/EsqlCapabilities.java | 5 +- .../xpack/esql/analysis/VerifierTests.java | 6 +- .../optimizer/LogicalPlanOptimizerTests.java | 4 +- .../categorization/TokenListCategorizer.java | 2 + 10 files changed, 186 insertions(+), 98 deletions(-) create mode 100644 docs/changelog/117655.yaml diff --git a/docs/changelog/117655.yaml b/docs/changelog/117655.yaml new file mode 100644 index 0000000000000..f2afd3570f104 --- /dev/null +++ b/docs/changelog/117655.yaml @@ -0,0 +1,5 @@ +pr: 117655 +summary: Add nulls support to Categorize +area: ES|QL +type: enhancement +issues: [] diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/AbstractCategorizeBlockHash.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/AbstractCategorizeBlockHash.java index 22d3a10facb06..0e89d77820883 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/AbstractCategorizeBlockHash.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/AbstractCategorizeBlockHash.java @@ -13,8 +13,10 @@ import org.elasticsearch.common.util.BigArrays; import org.elasticsearch.common.util.BitArray; import org.elasticsearch.common.util.BytesRefHash; +import org.elasticsearch.compute.aggregation.SeenGroupIds; import org.elasticsearch.compute.data.Block; import org.elasticsearch.compute.data.BlockFactory; +import org.elasticsearch.compute.data.BytesRefBlock; import org.elasticsearch.compute.data.BytesRefVector; import org.elasticsearch.compute.data.IntBlock; import org.elasticsearch.compute.data.IntVector; @@ -31,11 +33,21 @@ * Base BlockHash implementation for {@code Categorize} grouping function. */ public abstract class AbstractCategorizeBlockHash extends BlockHash { + protected static final int NULL_ORD = 0; + // TODO: this should probably also take an emitBatchSize private final int channel; private final boolean outputPartial; protected final TokenListCategorizer.CloseableTokenListCategorizer categorizer; + /** + * Store whether we've seen any {@code null} values. + *

+ * Null gets the {@link #NULL_ORD} ord. + *

+ */ + protected boolean seenNull = false; + AbstractCategorizeBlockHash(BlockFactory blockFactory, int channel, boolean outputPartial) { super(blockFactory); this.channel = channel; @@ -58,12 +70,12 @@ public Block[] getKeys() { @Override public IntVector nonEmpty() { - return IntVector.range(0, categorizer.getCategoryCount(), blockFactory); + return IntVector.range(seenNull ? 0 : 1, categorizer.getCategoryCount() + 1, blockFactory); } @Override public BitArray seenGroupIds(BigArrays bigArrays) { - throw new UnsupportedOperationException(); + return new SeenGroupIds.Range(seenNull ? 0 : 1, Math.toIntExact(categorizer.getCategoryCount() + 1)).seenGroupIds(bigArrays); } @Override @@ -76,24 +88,39 @@ public final ReleasableIterator lookup(Page page, ByteSizeValue target */ private Block buildIntermediateBlock() { if (categorizer.getCategoryCount() == 0) { - return blockFactory.newConstantNullBlock(0); + return blockFactory.newConstantNullBlock(seenNull ? 1 : 0); } try (BytesStreamOutput out = new BytesStreamOutput()) { // TODO be more careful here. + out.writeBoolean(seenNull); out.writeVInt(categorizer.getCategoryCount()); for (SerializableTokenListCategory category : categorizer.toCategoriesById()) { category.writeTo(out); } // We're returning a block with N positions just because the Page must have all blocks with the same position count! - return blockFactory.newConstantBytesRefBlockWith(out.bytes().toBytesRef(), categorizer.getCategoryCount()); + int positionCount = categorizer.getCategoryCount() + (seenNull ? 1 : 0); + return blockFactory.newConstantBytesRefBlockWith(out.bytes().toBytesRef(), positionCount); } catch (IOException e) { throw new RuntimeException(e); } } private Block buildFinalBlock() { + BytesRefBuilder scratch = new BytesRefBuilder(); + + if (seenNull) { + try (BytesRefBlock.Builder result = blockFactory.newBytesRefBlockBuilder(categorizer.getCategoryCount())) { + result.appendNull(); + for (SerializableTokenListCategory category : categorizer.toCategoriesById()) { + scratch.copyChars(category.getRegex()); + result.appendBytesRef(scratch.get()); + scratch.clear(); + } + return result.build(); + } + } + try (BytesRefVector.Builder result = blockFactory.newBytesRefVectorBuilder(categorizer.getCategoryCount())) { - BytesRefBuilder scratch = new BytesRefBuilder(); for (SerializableTokenListCategory category : categorizer.toCategoriesById()) { scratch.copyChars(category.getRegex()); result.appendBytesRef(scratch.get()); diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/CategorizeRawBlockHash.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/CategorizeRawBlockHash.java index bf633e0454384..0d0a2fef2f82b 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/CategorizeRawBlockHash.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/CategorizeRawBlockHash.java @@ -64,7 +64,7 @@ public void close() { /** * Similar implementation to an Evaluator. */ - public static final class CategorizeEvaluator implements Releasable { + public final class CategorizeEvaluator implements Releasable { private final CategorizationAnalyzer analyzer; private final TokenListCategorizer.CloseableTokenListCategorizer categorizer; @@ -95,7 +95,8 @@ public IntBlock eval(int positionCount, BytesRefBlock vBlock) { BytesRef vScratch = new BytesRef(); for (int p = 0; p < positionCount; p++) { if (vBlock.isNull(p)) { - result.appendNull(); + seenNull = true; + result.appendInt(NULL_ORD); continue; } int first = vBlock.getFirstValueIndex(p); @@ -126,7 +127,12 @@ public IntVector eval(int positionCount, BytesRefVector vVector) { } private int process(BytesRef v) { - return categorizer.computeCategory(v.utf8ToString(), analyzer).getId(); + var category = categorizer.computeCategory(v.utf8ToString(), analyzer); + if (category == null) { + seenNull = true; + return NULL_ORD; + } + return category.getId() + 1; } @Override diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/CategorizedIntermediateBlockHash.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/CategorizedIntermediateBlockHash.java index 1bca34a70e5fa..c774d3b26049d 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/CategorizedIntermediateBlockHash.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/CategorizedIntermediateBlockHash.java @@ -40,9 +40,19 @@ public void add(Page page, GroupingAggregatorFunction.AddInput addInput) { return; } BytesRefBlock categorizerState = page.getBlock(channel()); + if (categorizerState.areAllValuesNull()) { + seenNull = true; + try (var newIds = blockFactory.newConstantIntVector(NULL_ORD, 1)) { + addInput.add(0, newIds); + } + return; + } + Map idMap = readIntermediate(categorizerState.getBytesRef(0, new BytesRef())); try (IntBlock.Builder newIdsBuilder = blockFactory.newIntBlockBuilder(idMap.size())) { - for (int i = 0; i < idMap.size(); i++) { + int fromId = idMap.containsKey(0) ? 0 : 1; + int toId = fromId + idMap.size(); + for (int i = fromId; i < toId; i++) { newIdsBuilder.appendInt(idMap.get(i)); } try (IntBlock newIds = newIdsBuilder.build()) { @@ -59,10 +69,15 @@ public void add(Page page, GroupingAggregatorFunction.AddInput addInput) { private Map readIntermediate(BytesRef bytes) { Map idMap = new HashMap<>(); try (StreamInput in = new BytesArray(bytes).streamInput()) { + if (in.readBoolean()) { + seenNull = true; + idMap.put(NULL_ORD, NULL_ORD); + } int count = in.readVInt(); for (int oldCategoryId = 0; oldCategoryId < count; oldCategoryId++) { int newCategoryId = categorizer.mergeWireCategory(new SerializableTokenListCategory(in)).getId(); - idMap.put(oldCategoryId, newCategoryId); + // +1 because the 0 ordinal is reserved for null + idMap.put(oldCategoryId + 1, newCategoryId + 1); } return idMap; } catch (IOException e) { diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/blockhash/CategorizeBlockHashTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/blockhash/CategorizeBlockHashTests.java index de8a2a44266fe..dd7a87dc4a574 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/blockhash/CategorizeBlockHashTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/blockhash/CategorizeBlockHashTests.java @@ -52,7 +52,8 @@ public class CategorizeBlockHashTests extends BlockHashTestCase { public void testCategorizeRaw() { final Page page; - final int positions = 7; + boolean withNull = randomBoolean(); + final int positions = 7 + (withNull ? 1 : 0); try (BytesRefBlock.Builder builder = blockFactory.newBytesRefBlockBuilder(positions)) { builder.appendBytesRef(new BytesRef("Connected to 10.1.0.1")); builder.appendBytesRef(new BytesRef("Connection error")); @@ -61,6 +62,13 @@ public void testCategorizeRaw() { builder.appendBytesRef(new BytesRef("Disconnected")); builder.appendBytesRef(new BytesRef("Connected to 10.1.0.2")); builder.appendBytesRef(new BytesRef("Connected to 10.1.0.3")); + if (withNull) { + if (randomBoolean()) { + builder.appendNull(); + } else { + builder.appendBytesRef(new BytesRef("")); + } + } page = new Page(builder.build()); } @@ -70,13 +78,16 @@ public void testCategorizeRaw() { public void add(int positionOffset, IntBlock groupIds) { assertEquals(groupIds.getPositionCount(), positions); - assertEquals(0, groupIds.getInt(0)); - assertEquals(1, groupIds.getInt(1)); - assertEquals(1, groupIds.getInt(2)); - assertEquals(1, groupIds.getInt(3)); - assertEquals(2, groupIds.getInt(4)); - assertEquals(0, groupIds.getInt(5)); - assertEquals(0, groupIds.getInt(6)); + assertEquals(1, groupIds.getInt(0)); + assertEquals(2, groupIds.getInt(1)); + assertEquals(2, groupIds.getInt(2)); + assertEquals(2, groupIds.getInt(3)); + assertEquals(3, groupIds.getInt(4)); + assertEquals(1, groupIds.getInt(5)); + assertEquals(1, groupIds.getInt(6)); + if (withNull) { + assertEquals(0, groupIds.getInt(7)); + } } @Override @@ -100,7 +111,8 @@ public void close() { public void testCategorizeIntermediate() { Page page1; - int positions1 = 7; + boolean withNull = randomBoolean(); + int positions1 = 7 + (withNull ? 1 : 0); try (BytesRefBlock.Builder builder = blockFactory.newBytesRefBlockBuilder(positions1)) { builder.appendBytesRef(new BytesRef("Connected to 10.1.0.1")); builder.appendBytesRef(new BytesRef("Connection error")); @@ -109,6 +121,13 @@ public void testCategorizeIntermediate() { builder.appendBytesRef(new BytesRef("Connection error")); builder.appendBytesRef(new BytesRef("Connected to 10.1.0.3")); builder.appendBytesRef(new BytesRef("Connected to 10.1.0.4")); + if (withNull) { + if (randomBoolean()) { + builder.appendNull(); + } else { + builder.appendBytesRef(new BytesRef("")); + } + } page1 = new Page(builder.build()); } Page page2; @@ -133,13 +152,16 @@ public void testCategorizeIntermediate() { @Override public void add(int positionOffset, IntBlock groupIds) { assertEquals(groupIds.getPositionCount(), positions1); - assertEquals(0, groupIds.getInt(0)); - assertEquals(1, groupIds.getInt(1)); - assertEquals(1, groupIds.getInt(2)); - assertEquals(0, groupIds.getInt(3)); - assertEquals(1, groupIds.getInt(4)); - assertEquals(0, groupIds.getInt(5)); - assertEquals(0, groupIds.getInt(6)); + assertEquals(1, groupIds.getInt(0)); + assertEquals(2, groupIds.getInt(1)); + assertEquals(2, groupIds.getInt(2)); + assertEquals(1, groupIds.getInt(3)); + assertEquals(2, groupIds.getInt(4)); + assertEquals(1, groupIds.getInt(5)); + assertEquals(1, groupIds.getInt(6)); + if (withNull) { + assertEquals(0, groupIds.getInt(7)); + } } @Override @@ -158,11 +180,11 @@ public void close() { @Override public void add(int positionOffset, IntBlock groupIds) { assertEquals(groupIds.getPositionCount(), positions2); - assertEquals(0, groupIds.getInt(0)); - assertEquals(1, groupIds.getInt(1)); - assertEquals(0, groupIds.getInt(2)); - assertEquals(1, groupIds.getInt(3)); - assertEquals(2, groupIds.getInt(4)); + assertEquals(1, groupIds.getInt(0)); + assertEquals(2, groupIds.getInt(1)); + assertEquals(1, groupIds.getInt(2)); + assertEquals(2, groupIds.getInt(3)); + assertEquals(3, groupIds.getInt(4)); } @Override @@ -189,7 +211,11 @@ public void add(int positionOffset, IntBlock groupIds) { .map(groupIds::getInt) .boxed() .collect(Collectors.toSet()); - assertEquals(values, Set.of(0, 1)); + if (withNull) { + assertEquals(Set.of(0, 1, 2), values); + } else { + assertEquals(Set.of(1, 2), values); + } } @Override @@ -212,7 +238,7 @@ public void add(int positionOffset, IntBlock groupIds) { .collect(Collectors.toSet()); // The category IDs {0, 1, 2} should map to groups {0, 2, 3}, because // 0 matches an existing category (Connected to ...), and the others are new. - assertEquals(values, Set.of(0, 2, 3)); + assertEquals(Set.of(1, 3, 4), values); } @Override diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/categorize.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/categorize.csv-spec index 89d9026423204..547c430ed7518 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/categorize.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/categorize.csv-spec @@ -1,5 +1,5 @@ standard aggs -required_capability: categorize_v2 +required_capability: categorize_v3 FROM sample_data | STATS count=COUNT(), @@ -17,7 +17,7 @@ count:long | sum:long | avg:double | count_distinct:long | category:keyw ; values aggs -required_capability: categorize_v2 +required_capability: categorize_v3 FROM sample_data | STATS values=MV_SORT(VALUES(message)), @@ -33,7 +33,7 @@ values:keyword | top ; mv -required_capability: categorize_v2 +required_capability: categorize_v3 FROM mv_sample_data | STATS COUNT(), SUM(event_duration) BY category=CATEGORIZE(message) @@ -48,7 +48,7 @@ COUNT():long | SUM(event_duration):long | category:keyword ; row mv -required_capability: categorize_v2 +required_capability: categorize_v3 ROW message = ["connected to a", "connected to b", "disconnected"], str = ["a", "b", "c"] | STATS COUNT(), VALUES(str) BY category=CATEGORIZE(message) @@ -61,7 +61,7 @@ COUNT():long | VALUES(str):keyword | category:keyword ; with multiple indices -required_capability: categorize_v2 +required_capability: categorize_v3 required_capability: union_types FROM sample_data* @@ -76,7 +76,7 @@ COUNT():long | category:keyword ; mv with many values -required_capability: categorize_v2 +required_capability: categorize_v3 FROM employees | STATS COUNT() BY category=CATEGORIZE(job_positions) @@ -92,24 +92,37 @@ COUNT():long | category:keyword 10 | .*?Head.+?Human.+?Resources.*? ; -# Throws when calling AbstractCategorizeBlockHash.seenGroupIds() - Requires nulls support? -mv with many values-Ignore -required_capability: categorize_v2 +mv with many values and SUM +required_capability: categorize_v3 FROM employees | STATS SUM(languages) BY category=CATEGORIZE(job_positions) - | SORT category DESC + | SORT category | LIMIT 3 ; -SUM(languages):integer | category:keyword - 43 | .*?Accountant.*? - 46 | .*?Architect.*? - 35 | .*?Business.+?Analyst.*? +SUM(languages):long | category:keyword + 43 | .*?Accountant.*? + 46 | .*?Architect.*? + 35 | .*?Business.+?Analyst.*? +; + +mv with many values and nulls and SUM +required_capability: categorize_v3 + +FROM employees + | STATS SUM(languages) BY category=CATEGORIZE(job_positions) + | SORT category DESC + | LIMIT 2 +; + +SUM(languages):long | category:keyword + 27 | null + 46 | .*?Tech.+?Lead.*? ; mv via eval -required_capability: categorize_v2 +required_capability: categorize_v3 FROM sample_data | EVAL message = MV_APPEND(message, "Banana") @@ -125,7 +138,7 @@ COUNT():long | category:keyword ; mv via eval const -required_capability: categorize_v2 +required_capability: categorize_v3 FROM sample_data | EVAL message = ["Banana", "Bread"] @@ -139,7 +152,7 @@ COUNT():long | category:keyword ; mv via eval const without aliases -required_capability: categorize_v2 +required_capability: categorize_v3 FROM sample_data | EVAL message = ["Banana", "Bread"] @@ -153,7 +166,7 @@ COUNT():long | CATEGORIZE(message):keyword ; mv const in parameter -required_capability: categorize_v2 +required_capability: categorize_v3 FROM sample_data | STATS COUNT() BY c = CATEGORIZE(["Banana", "Bread"]) @@ -166,7 +179,7 @@ COUNT():long | c:keyword ; agg alias shadowing -required_capability: categorize_v2 +required_capability: categorize_v3 FROM sample_data | STATS c = COUNT() BY c = CATEGORIZE(["Banana", "Bread"]) @@ -181,7 +194,7 @@ c:keyword ; chained aggregations using categorize -required_capability: categorize_v2 +required_capability: categorize_v3 FROM sample_data | STATS COUNT() BY category=CATEGORIZE(message) @@ -196,7 +209,7 @@ COUNT():long | category:keyword ; stats without aggs -required_capability: categorize_v2 +required_capability: categorize_v3 FROM sample_data | STATS BY category=CATEGORIZE(message) @@ -210,7 +223,7 @@ category:keyword ; text field -required_capability: categorize_v2 +required_capability: categorize_v3 FROM hosts | STATS COUNT() BY category=CATEGORIZE(host_group) @@ -221,10 +234,11 @@ COUNT():long | category:keyword 2 | .*?DB.+?servers.*? 2 | .*?Gateway.+?instances.*? 5 | .*?Kubernetes.+?cluster.*? + 1 | null ; on TO_UPPER -required_capability: categorize_v2 +required_capability: categorize_v3 FROM sample_data | STATS COUNT() BY category=CATEGORIZE(TO_UPPER(message)) @@ -238,7 +252,7 @@ COUNT():long | category:keyword ; on CONCAT -required_capability: categorize_v2 +required_capability: categorize_v3 FROM sample_data | STATS COUNT() BY category=CATEGORIZE(CONCAT(message, " banana")) @@ -252,7 +266,7 @@ COUNT():long | category:keyword ; on CONCAT with unicode -required_capability: categorize_v2 +required_capability: categorize_v3 FROM sample_data | STATS COUNT() BY category=CATEGORIZE(CONCAT(message, " 👍🏽😊")) @@ -266,7 +280,7 @@ COUNT():long | category:keyword ; on REVERSE(CONCAT()) -required_capability: categorize_v2 +required_capability: categorize_v3 FROM sample_data | STATS COUNT() BY category=CATEGORIZE(REVERSE(CONCAT(message, " 👍🏽😊"))) @@ -280,7 +294,7 @@ COUNT():long | category:keyword ; and then TO_LOWER -required_capability: categorize_v2 +required_capability: categorize_v3 FROM sample_data | STATS COUNT() BY category=CATEGORIZE(message) @@ -294,9 +308,8 @@ COUNT():long | category:keyword 1 | .*?disconnected.*? ; -# Throws NPE - Requires nulls support -on const empty string-Ignore -required_capability: categorize_v2 +on const empty string +required_capability: categorize_v3 FROM sample_data | STATS COUNT() BY category=CATEGORIZE("") @@ -304,12 +317,11 @@ FROM sample_data ; COUNT():long | category:keyword - 7 | .*?.*? + 7 | null ; -# Throws NPE - Requires nulls support -on const empty string from eval-Ignore -required_capability: categorize_v2 +on const empty string from eval +required_capability: categorize_v3 FROM sample_data | EVAL x = "" @@ -318,26 +330,24 @@ FROM sample_data ; COUNT():long | category:keyword - 7 | .*?.*? + 7 | null ; -# Doesn't give the correct results - Requires nulls support -on null-Ignore -required_capability: categorize_v2 +on null +required_capability: categorize_v3 FROM sample_data | EVAL x = null - | STATS COUNT() BY category=CATEGORIZE(x) + | STATS COUNT(), SUM(event_duration) BY category=CATEGORIZE(x) | SORT category ; -COUNT():long | category:keyword - 7 | null +COUNT():long | SUM(event_duration):long | category:keyword + 7 | 23231327 | null ; -# Doesn't give the correct results - Requires nulls support -on null string-Ignore -required_capability: categorize_v2 +on null string +required_capability: categorize_v3 FROM sample_data | EVAL x = null::string @@ -350,7 +360,7 @@ COUNT():long | category:keyword ; filtering out all data -required_capability: categorize_v2 +required_capability: categorize_v3 FROM sample_data | WHERE @timestamp < "2023-10-23T00:00:00Z" @@ -362,7 +372,7 @@ COUNT():long | category:keyword ; filtering out all data with constant -required_capability: categorize_v2 +required_capability: categorize_v3 FROM sample_data | STATS COUNT() BY category=CATEGORIZE(message) @@ -373,7 +383,7 @@ COUNT():long | category:keyword ; drop output columns -required_capability: categorize_v2 +required_capability: categorize_v3 FROM sample_data | STATS count=COUNT() BY category=CATEGORIZE(message) @@ -388,7 +398,7 @@ x:integer ; category value processing -required_capability: categorize_v2 +required_capability: categorize_v3 ROW message = ["connected to a", "connected to b", "disconnected"] | STATS COUNT() BY category=CATEGORIZE(message) @@ -402,7 +412,7 @@ COUNT():long | category:keyword ; row aliases -required_capability: categorize_v2 +required_capability: categorize_v3 ROW message = "connected to a" | EVAL x = message @@ -416,7 +426,7 @@ COUNT():long | category:keyword | y:keyword ; from aliases -required_capability: categorize_v2 +required_capability: categorize_v3 FROM sample_data | EVAL x = message @@ -432,7 +442,7 @@ COUNT():long | category:keyword | y:keyword ; row aliases with keep -required_capability: categorize_v2 +required_capability: categorize_v3 ROW message = "connected to a" | EVAL x = message @@ -448,7 +458,7 @@ COUNT():long | y:keyword ; from aliases with keep -required_capability: categorize_v2 +required_capability: categorize_v3 FROM sample_data | EVAL x = message @@ -466,7 +476,7 @@ COUNT():long | y:keyword ; row rename -required_capability: categorize_v2 +required_capability: categorize_v3 ROW message = "connected to a" | RENAME message as x @@ -480,7 +490,7 @@ COUNT():long | y:keyword ; from rename -required_capability: categorize_v2 +required_capability: categorize_v3 FROM sample_data | RENAME message as x @@ -496,7 +506,7 @@ COUNT():long | y:keyword ; row drop -required_capability: categorize_v2 +required_capability: categorize_v3 ROW message = "connected to a" | STATS c = COUNT() BY category=CATEGORIZE(message) @@ -509,7 +519,7 @@ c:long ; from drop -required_capability: categorize_v2 +required_capability: categorize_v3 FROM sample_data | STATS c = COUNT() BY category=CATEGORIZE(message) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index 9bd4211855699..77a3e2840977f 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -402,11 +402,8 @@ public enum Cap { /** * Supported the text categorization function "CATEGORIZE". - *

- * This capability was initially named `CATEGORIZE`, and got renamed after the function started correctly returning keywords. - *

*/ - CATEGORIZE_V2(Build.current().isSnapshot()), + CATEGORIZE_V3(Build.current().isSnapshot()), /** * QSTR function diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java index 6074601535477..dd14e8dd82123 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java @@ -1846,7 +1846,7 @@ public void testIntervalAsString() { } public void testCategorizeSingleGrouping() { - assumeTrue("requires Categorize capability", EsqlCapabilities.Cap.CATEGORIZE_V2.isEnabled()); + assumeTrue("requires Categorize capability", EsqlCapabilities.Cap.CATEGORIZE_V3.isEnabled()); query("from test | STATS COUNT(*) BY CATEGORIZE(first_name)"); query("from test | STATS COUNT(*) BY cat = CATEGORIZE(first_name)"); @@ -1875,7 +1875,7 @@ public void testCategorizeSingleGrouping() { } public void testCategorizeNestedGrouping() { - assumeTrue("requires Categorize capability", EsqlCapabilities.Cap.CATEGORIZE_V2.isEnabled()); + assumeTrue("requires Categorize capability", EsqlCapabilities.Cap.CATEGORIZE_V3.isEnabled()); query("from test | STATS COUNT(*) BY CATEGORIZE(LENGTH(first_name)::string)"); @@ -1890,7 +1890,7 @@ public void testCategorizeNestedGrouping() { } public void testCategorizeWithinAggregations() { - assumeTrue("requires Categorize capability", EsqlCapabilities.Cap.CATEGORIZE_V2.isEnabled()); + assumeTrue("requires Categorize capability", EsqlCapabilities.Cap.CATEGORIZE_V3.isEnabled()); query("from test | STATS MV_COUNT(cat), COUNT(*) BY cat = CATEGORIZE(first_name)"); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java index 8373528531902..e98f2b88b33c9 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java @@ -1212,7 +1212,7 @@ public void testCombineProjectionWithAggregationFirstAndAliasedGroupingUsedInAgg * \_EsRelation[test][_meta_field{f}#23, emp_no{f}#17, first_name{f}#18, ..] */ public void testCombineProjectionWithCategorizeGrouping() { - assumeTrue("requires Categorize capability", EsqlCapabilities.Cap.CATEGORIZE_V2.isEnabled()); + assumeTrue("requires Categorize capability", EsqlCapabilities.Cap.CATEGORIZE_V3.isEnabled()); var plan = plan(""" from test @@ -3949,7 +3949,7 @@ public void testNestedExpressionsInGroups() { * \_EsRelation[test][_meta_field{f}#14, emp_no{f}#8, first_name{f}#9, ge..] */ public void testNestedExpressionsInGroupsWithCategorize() { - assumeTrue("requires Categorize capability", EsqlCapabilities.Cap.CATEGORIZE_V2.isEnabled()); + assumeTrue("requires Categorize capability", EsqlCapabilities.Cap.CATEGORIZE_V3.isEnabled()); var plan = optimizedPlan(""" from test diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/categorization/TokenListCategorizer.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/categorization/TokenListCategorizer.java index e4257270ce641..7fef6cdafa372 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/categorization/TokenListCategorizer.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/aggs/categorization/TokenListCategorizer.java @@ -115,6 +115,7 @@ public TokenListCategorizer( cacheRamUsage(0); } + @Nullable public TokenListCategory computeCategory(String s, CategorizationAnalyzer analyzer) { try (TokenStream ts = analyzer.tokenStream("text", s)) { return computeCategory(ts, s.length(), 1); @@ -123,6 +124,7 @@ public TokenListCategory computeCategory(String s, CategorizationAnalyzer analyz } } + @Nullable public TokenListCategory computeCategory(TokenStream ts, int unfilteredStringLen, long numDocs) throws IOException { assert partOfSpeechDictionary != null : "This version of computeCategory should only be used when a part-of-speech dictionary is available"; From 3c70cd081d40c36a5ac375b009932a0ce5eff1bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mariusz=20J=C3=B3zala?= <377355+jozala@users.noreply.github.com> Date: Thu, 28 Nov 2024 16:20:05 +0100 Subject: [PATCH 062/139] Revert "[CI] Ignore error about missing UBI artifact (#117506)" (#117704) This reverts commit 219372efaaf46a3b496df2142d3091d3434e67ec. This ignore is no longer necessary since the change to release-manager has been applied. --- .buildkite/scripts/dra-workflow.sh | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/.buildkite/scripts/dra-workflow.sh b/.buildkite/scripts/dra-workflow.sh index bbfa81f51b286..f2dc40ca1927f 100755 --- a/.buildkite/scripts/dra-workflow.sh +++ b/.buildkite/scripts/dra-workflow.sh @@ -75,7 +75,6 @@ find "$WORKSPACE" -type d -path "*/build/distributions" -exec chmod a+w {} \; echo --- Running release-manager -set +e # Artifacts should be generated docker run --rm \ --name release-manager \ @@ -92,16 +91,4 @@ docker run --rm \ --version "$ES_VERSION" \ --artifact-set main \ --dependency "beats:https://artifacts-${WORKFLOW}.elastic.co/beats/${BEATS_BUILD_ID}/manifest-${ES_VERSION}${VERSION_SUFFIX}.json" \ - --dependency "ml-cpp:https://artifacts-${WORKFLOW}.elastic.co/ml-cpp/${ML_CPP_BUILD_ID}/manifest-${ES_VERSION}${VERSION_SUFFIX}.json" \ -2>&1 | tee release-manager.log -EXIT_CODE=$? -set -e - -# This failure is just generating a ton of noise right now, so let's just ignore it -# This should be removed once this issue has been fixed -if grep "elasticsearch-ubi-9.0.0-SNAPSHOT-docker-image.tar.gz" release-manager.log; then - echo "Ignoring error about missing ubi artifact" - exit 0 -fi - -exit "$EXIT_CODE" + --dependency "ml-cpp:https://artifacts-${WORKFLOW}.elastic.co/ml-cpp/${ML_CPP_BUILD_ID}/manifest-${ES_VERSION}${VERSION_SUFFIX}.json" From 54db9470207df11f07475a6e8d4837b29515a4d7 Mon Sep 17 00:00:00 2001 From: Oleksandr Kolomiiets Date: Thu, 28 Nov 2024 07:33:35 -0800 Subject: [PATCH 063/139] Fix scaled_float test (#117662) --- .../index/mapper/extras/ScaledFloatFieldMapperTests.java | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/ScaledFloatFieldMapperTests.java b/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/ScaledFloatFieldMapperTests.java index dc9bc96f107a0..83fe07170d6e7 100644 --- a/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/ScaledFloatFieldMapperTests.java +++ b/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/ScaledFloatFieldMapperTests.java @@ -527,7 +527,13 @@ protected Number randomNumber() { public void testEncodeDecodeExactScalingFactor() { double v = randomValue(); - assertThat(encodeDecode(1 / v, v), equalTo(1 / v)); + double expected = 1 / v; + // We don't produce infinities while decoding. See #testDecodeHandlingInfinity(). + if (Double.isInfinite(expected)) { + var sign = expected == Double.POSITIVE_INFINITY ? 1 : -1; + expected = sign * Double.MAX_VALUE; + } + assertThat(encodeDecode(1 / v, v), equalTo(expected)); } /** From ab604ada78d779a18b82465d51829006540ce546 Mon Sep 17 00:00:00 2001 From: Liam Thompson <32779855+leemthompo@users.noreply.github.com> Date: Thu, 28 Nov 2024 16:34:57 +0100 Subject: [PATCH 064/139] [DOCS] Update tutorial example (#117538) --- .../full-text-filtering-tutorial.asciidoc | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/docs/reference/quickstart/full-text-filtering-tutorial.asciidoc b/docs/reference/quickstart/full-text-filtering-tutorial.asciidoc index fee4b797da724..a024305588cae 100644 --- a/docs/reference/quickstart/full-text-filtering-tutorial.asciidoc +++ b/docs/reference/quickstart/full-text-filtering-tutorial.asciidoc @@ -511,8 +511,9 @@ In this tutorial scenario it's useful for when users have complex requirements f Let's create a query that addresses the following user needs: -* Must be a vegetarian main course +* Must be a vegetarian recipe * Should contain "curry" or "spicy" in the title or description +* Should be a main course * Must not be a dessert * Must have a rating of at least 4.5 * Should prefer recipes published in the last month @@ -524,16 +525,7 @@ GET /cooking_blog/_search "query": { "bool": { "must": [ - { - "term": { - "category.keyword": "Main Course" - } - }, - { - "term": { - "tags": "vegetarian" - } - }, + { "term": { "tags": "vegetarian" } }, { "range": { "rating": { @@ -543,10 +535,18 @@ GET /cooking_blog/_search } ], "should": [ + { + "term": { + "category": "Main Course" + } + }, { "multi_match": { "query": "curry spicy", - "fields": ["title^2", "description"] + "fields": [ + "title^2", + "description" + ] } }, { @@ -590,12 +590,12 @@ GET /cooking_blog/_search "value": 1, "relation": "eq" }, - "max_score": 7.9835095, + "max_score": 7.444513, "hits": [ { "_index": "cooking_blog", "_id": "2", - "_score": 7.9835095, + "_score": 7.444513, "_source": { "title": "Spicy Thai Green Curry: A Vegetarian Adventure", <1> "description": "Dive into the flavors of Thailand with this vibrant green curry. Packed with vegetables and aromatic herbs, this dish is both healthy and satisfying. Don't worry about the heat - you can easily adjust the spice level to your liking.", <2> @@ -619,8 +619,8 @@ GET /cooking_blog/_search <1> The title contains "Spicy" and "Curry", matching our should condition. With the default <> behavior, this field contributes most to the relevance score. <2> While the description also contains matching terms, only the best matching field's score is used by default. <3> The recipe was published within the last month, satisfying our recency preference. -<4> The "Main Course" category matches our `must` condition. -<5> The "vegetarian" tag satisfies another `must` condition, while "curry" and "spicy" tags align with our `should` preferences. +<4> The "Main Course" category satisfies another `should` condition. +<5> The "vegetarian" tag satisfies a `must` condition, while "curry" and "spicy" tags align with our `should` preferences. <6> The rating of 4.6 meets our minimum rating requirement of 4.5. ============== From f096c317c06052dc26c00b72448eda4743ab5965 Mon Sep 17 00:00:00 2001 From: Dimitris Rempapis Date: Thu, 28 Nov 2024 19:38:37 +0200 Subject: [PATCH 065/139] fix/SearchStatesIt_failures (#117618) Investigate and unmute automatically muted tests --- docs/changelog/117618.yaml | 5 +++++ muted-tests.yml | 6 ------ 2 files changed, 5 insertions(+), 6 deletions(-) create mode 100644 docs/changelog/117618.yaml diff --git a/docs/changelog/117618.yaml b/docs/changelog/117618.yaml new file mode 100644 index 0000000000000..5de29e2fe768c --- /dev/null +++ b/docs/changelog/117618.yaml @@ -0,0 +1,5 @@ +pr: 117618 +summary: SearchStatesIt failures reported by CI +area: Search +type: bug +issues: [116617, 116618] diff --git a/muted-tests.yml b/muted-tests.yml index fdadc747289bb..d703cfaa1b9aa 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -156,12 +156,6 @@ tests: - class: org.elasticsearch.xpack.searchablesnapshots.SearchableSnapshotsCanMatchOnCoordinatorIntegTests method: testSearchableSnapshotShardsAreSkippedBySearchRequestWithoutQueryingAnyNodeWhenTheyAreOutsideOfTheQueryRange issue: https://github.com/elastic/elasticsearch/issues/116523 -- class: org.elasticsearch.upgrades.SearchStatesIT - method: testBWCSearchStates - issue: https://github.com/elastic/elasticsearch/issues/116617 -- class: org.elasticsearch.upgrades.SearchStatesIT - method: testCanMatch - issue: https://github.com/elastic/elasticsearch/issues/116618 - class: org.elasticsearch.reservedstate.service.RepositoriesFileSettingsIT method: testSettingsApplied issue: https://github.com/elastic/elasticsearch/issues/116694 From 8350ff29ba18c7d03d652b107532415705426da9 Mon Sep 17 00:00:00 2001 From: John Verwolf Date: Thu, 28 Nov 2024 13:25:02 -0800 Subject: [PATCH 066/139] Extensible Completion Postings Formats (#111494) Allows the Completion Postings Format to be extensible by providing an implementation of the CompletionsPostingsFormatExtension SPIs. --- docs/changelog/111494.yaml | 5 ++++ server/src/main/java/module-info.java | 6 +++- .../index/codec/PerFieldFormatSupplier.java | 24 ++++++++++++++-- .../index/mapper/CompletionFieldMapper.java | 5 ---- .../index/mapper/MappingLookup.java | 17 ----------- .../CompletionsPostingsFormatExtension.java | 28 +++++++++++++++++++ 6 files changed, 59 insertions(+), 26 deletions(-) create mode 100644 docs/changelog/111494.yaml create mode 100644 server/src/main/java/org/elasticsearch/internal/CompletionsPostingsFormatExtension.java diff --git a/docs/changelog/111494.yaml b/docs/changelog/111494.yaml new file mode 100644 index 0000000000000..6c7b84bb04798 --- /dev/null +++ b/docs/changelog/111494.yaml @@ -0,0 +1,5 @@ +pr: 111494 +summary: Extensible Completion Postings Formats +area: "Suggesters" +type: enhancement +issues: [] diff --git a/server/src/main/java/module-info.java b/server/src/main/java/module-info.java index 63dbac3a72487..d572d3b90fec8 100644 --- a/server/src/main/java/module-info.java +++ b/server/src/main/java/module-info.java @@ -7,6 +7,7 @@ * License v3.0 only", or the "Server Side Public License, v 1". */ +import org.elasticsearch.internal.CompletionsPostingsFormatExtension; import org.elasticsearch.plugins.internal.RestExtension; /** The Elasticsearch Server Module. */ @@ -288,7 +289,8 @@ to org.elasticsearch.serverless.version, org.elasticsearch.serverless.buildinfo, - org.elasticsearch.serverless.constants; + org.elasticsearch.serverless.constants, + org.elasticsearch.serverless.codec; exports org.elasticsearch.lucene.analysis.miscellaneous; exports org.elasticsearch.lucene.grouping; exports org.elasticsearch.lucene.queries; @@ -395,6 +397,7 @@ org.elasticsearch.stateless, org.elasticsearch.settings.secure, org.elasticsearch.serverless.constants, + org.elasticsearch.serverless.codec, org.elasticsearch.serverless.apifiltering, org.elasticsearch.internal.security; @@ -414,6 +417,7 @@ uses org.elasticsearch.node.internal.TerminationHandlerProvider; uses org.elasticsearch.internal.VersionExtension; uses org.elasticsearch.internal.BuildExtension; + uses CompletionsPostingsFormatExtension; uses org.elasticsearch.features.FeatureSpecification; uses org.elasticsearch.plugins.internal.LoggingDataProvider; diff --git a/server/src/main/java/org/elasticsearch/index/codec/PerFieldFormatSupplier.java b/server/src/main/java/org/elasticsearch/index/codec/PerFieldFormatSupplier.java index 9c2a08a69002c..4d3d37ab4f3af 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/PerFieldFormatSupplier.java +++ b/server/src/main/java/org/elasticsearch/index/codec/PerFieldFormatSupplier.java @@ -20,10 +20,15 @@ import org.elasticsearch.index.codec.bloomfilter.ES87BloomFilterPostingsFormat; import org.elasticsearch.index.codec.postings.ES812PostingsFormat; import org.elasticsearch.index.codec.tsdb.ES87TSDBDocValuesFormat; +import org.elasticsearch.index.mapper.CompletionFieldMapper; import org.elasticsearch.index.mapper.IdFieldMapper; import org.elasticsearch.index.mapper.Mapper; import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; +import org.elasticsearch.internal.CompletionsPostingsFormatExtension; +import org.elasticsearch.plugins.ExtensionLoader; + +import java.util.ServiceLoader; /** * Class that encapsulates the logic of figuring out the most appropriate file format for a given field, across postings, doc values and @@ -53,15 +58,28 @@ public PostingsFormat getPostingsFormatForField(String field) { private PostingsFormat internalGetPostingsFormatForField(String field) { if (mapperService != null) { - final PostingsFormat format = mapperService.mappingLookup().getPostingsFormat(field); - if (format != null) { - return format; + Mapper mapper = mapperService.mappingLookup().getMapper(field); + if (mapper instanceof CompletionFieldMapper) { + return PostingsFormatHolder.POSTINGS_FORMAT; } } // return our own posting format using PFOR return es812PostingsFormat; } + private static class PostingsFormatHolder { + private static final PostingsFormat POSTINGS_FORMAT = getPostingsFormat(); + + private static PostingsFormat getPostingsFormat() { + String defaultName = "Completion912"; // Caution: changing this name will result in exceptions if a field is created during a + // rolling upgrade and the new codec (specified by the name) is not available on all nodes in the cluster. + String codecName = ExtensionLoader.loadSingleton(ServiceLoader.load(CompletionsPostingsFormatExtension.class)) + .map(CompletionsPostingsFormatExtension::getFormatName) + .orElse(defaultName); + return PostingsFormat.forName(codecName); + } + } + boolean useBloomFilter(String field) { if (mapperService == null) { return false; diff --git a/server/src/main/java/org/elasticsearch/index/mapper/CompletionFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/CompletionFieldMapper.java index 53ccccdbd4bab..bb229c795a83e 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/CompletionFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/CompletionFieldMapper.java @@ -8,7 +8,6 @@ */ package org.elasticsearch.index.mapper; -import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.document.FieldType; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.Term; @@ -344,10 +343,6 @@ public CompletionFieldType fieldType() { return (CompletionFieldType) super.fieldType(); } - static PostingsFormat postingsFormat() { - return PostingsFormat.forName("Completion912"); - } - @Override public boolean parsesArrayValue() { return true; diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MappingLookup.java b/server/src/main/java/org/elasticsearch/index/mapper/MappingLookup.java index 2f78e11761448..ce3f8cfb53184 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MappingLookup.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MappingLookup.java @@ -9,7 +9,6 @@ package org.elasticsearch.index.mapper; -import org.apache.lucene.codecs.PostingsFormat; import org.elasticsearch.cluster.metadata.DataStream; import org.elasticsearch.cluster.metadata.InferenceFieldMetadata; import org.elasticsearch.index.IndexSettings; @@ -21,7 +20,6 @@ import java.util.Collection; import java.util.Collections; import java.util.HashMap; -import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; @@ -58,7 +56,6 @@ private CacheKey() {} private final Map indexAnalyzersMap; private final List indexTimeScriptMappers; private final Mapping mapping; - private final Set completionFields; private final int totalFieldsCount; /** @@ -161,7 +158,6 @@ private MappingLookup( this.nestedLookup = NestedLookup.build(nestedMappers); final Map indexAnalyzersMap = new HashMap<>(); - final Set completionFields = new HashSet<>(); final List indexTimeScriptMappers = new ArrayList<>(); for (FieldMapper mapper : mappers) { if (objects.containsKey(mapper.fullPath())) { @@ -174,9 +170,6 @@ private MappingLookup( if (mapper.hasScript()) { indexTimeScriptMappers.add(mapper); } - if (mapper instanceof CompletionFieldMapper) { - completionFields.add(mapper.fullPath()); - } } for (FieldAliasMapper aliasMapper : aliasMappers) { @@ -211,7 +204,6 @@ private MappingLookup( this.objectMappers = Map.copyOf(objects); this.runtimeFieldMappersCount = runtimeFields.size(); this.indexAnalyzersMap = Map.copyOf(indexAnalyzersMap); - this.completionFields = Set.copyOf(completionFields); this.indexTimeScriptMappers = List.copyOf(indexTimeScriptMappers); runtimeFields.stream().flatMap(RuntimeField::asMappedFieldTypes).map(MappedFieldType::name).forEach(this::validateDoesNotShadow); @@ -285,15 +277,6 @@ public Iterable fieldMappers() { return fieldMappers.values(); } - /** - * Gets the postings format for a particular field - * @param field the field to retrieve a postings format for - * @return the postings format for the field, or {@code null} if the default format should be used - */ - public PostingsFormat getPostingsFormat(String field) { - return completionFields.contains(field) ? CompletionFieldMapper.postingsFormat() : null; - } - void checkLimits(IndexSettings settings) { checkFieldLimit(settings.getMappingTotalFieldsLimit()); checkObjectDepthLimit(settings.getMappingDepthLimit()); diff --git a/server/src/main/java/org/elasticsearch/internal/CompletionsPostingsFormatExtension.java b/server/src/main/java/org/elasticsearch/internal/CompletionsPostingsFormatExtension.java new file mode 100644 index 0000000000000..bb28d4dd6c901 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/internal/CompletionsPostingsFormatExtension.java @@ -0,0 +1,28 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.internal; + +import org.apache.lucene.search.suggest.document.CompletionPostingsFormat; + +/** + * Allows plugging-in the Completions Postings Format. + */ +public interface CompletionsPostingsFormatExtension { + + /** + * Returns the name of the {@link CompletionPostingsFormat} that Elasticsearch should use. Should return null if the extension + * is not enabled. + *

+ * Note that the name must match a codec that is available on all nodes in the cluster, otherwise IndexCorruptionExceptions will occur. + * A feature can be used to protect against this scenario, or alternatively, the codec code can be rolled out prior to its usage by this + * extension. + */ + String getFormatName(); +} From 2895f1e900b2f41704fd507845102a281cff437e Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Fri, 29 Nov 2024 11:37:45 +1300 Subject: [PATCH 067/139] [ML] Remove deprecated sort from reindex operation (#117606) Sort in reindex is deprecated. This PR removes its use from within the reindexing step of dataframe analytics. Testing indicates that having the destination index sorted is a "nice to have" and not necessary for the DFA functionality to succeed. --- docs/changelog/117606.yaml | 5 +++++ .../xpack/ml/dataframe/steps/ReindexingStep.java | 3 --- 2 files changed, 5 insertions(+), 3 deletions(-) create mode 100644 docs/changelog/117606.yaml diff --git a/docs/changelog/117606.yaml b/docs/changelog/117606.yaml new file mode 100644 index 0000000000000..ea61099a1a6b4 --- /dev/null +++ b/docs/changelog/117606.yaml @@ -0,0 +1,5 @@ +pr: 117606 +summary: Remove deprecated sort from reindex operation within dataframe analytics procedure +area: Machine Learning +type: enhancement +issues: [] diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/steps/ReindexingStep.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/steps/ReindexingStep.java index 0ccdd1eb64601..2a6d6eb329503 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/steps/ReindexingStep.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/dataframe/steps/ReindexingStep.java @@ -27,13 +27,11 @@ import org.elasticsearch.core.Nullable; import org.elasticsearch.core.TimeValue; import org.elasticsearch.index.IndexNotFoundException; -import org.elasticsearch.index.mapper.SeqNoFieldMapper; import org.elasticsearch.index.reindex.BulkByScrollResponse; import org.elasticsearch.index.reindex.BulkByScrollTask; import org.elasticsearch.index.reindex.ReindexAction; import org.elasticsearch.index.reindex.ReindexRequest; import org.elasticsearch.script.Script; -import org.elasticsearch.search.sort.SortOrder; import org.elasticsearch.tasks.Task; import org.elasticsearch.tasks.TaskCancelledException; import org.elasticsearch.tasks.TaskId; @@ -147,7 +145,6 @@ protected void doExecute(ActionListener listener) { reindexRequest.setSourceQuery(config.getSource().getParsedQuery()); reindexRequest.getSearchRequest().allowPartialSearchResults(false); reindexRequest.getSearchRequest().source().fetchSource(config.getSource().getSourceFiltering()); - reindexRequest.getSearchRequest().source().sort(SeqNoFieldMapper.NAME, SortOrder.ASC); reindexRequest.setDestIndex(config.getDest().getIndex()); // We explicitly set slices to 1 as we cannot parallelize in order to have the incremental id From c35777a175f10a49ae860d28aa16b40d6f66c49a Mon Sep 17 00:00:00 2001 From: Rene Groeschke Date: Fri, 29 Nov 2024 02:26:34 +0100 Subject: [PATCH 068/139] [Build] Declare mirror for eclipse p2 repository (#117732) The spotlight plugin directly resolves dependencies from p2 which causes `java.io.IOException: Failed to load eclipse jdt formatter` issues if that repo is not accessible. This is a workaround for the eclipse p2 default repository being down resulting in all our ci jobs to fail. The artifacts in question we wanna cache live in `~/.m2/repository` --- .../conventions/precommit/FormattingPrecommitPlugin.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/build-conventions/src/main/java/org/elasticsearch/gradle/internal/conventions/precommit/FormattingPrecommitPlugin.java b/build-conventions/src/main/java/org/elasticsearch/gradle/internal/conventions/precommit/FormattingPrecommitPlugin.java index ea9009172c7e2..41c0b4d67e1df 100644 --- a/build-conventions/src/main/java/org/elasticsearch/gradle/internal/conventions/precommit/FormattingPrecommitPlugin.java +++ b/build-conventions/src/main/java/org/elasticsearch/gradle/internal/conventions/precommit/FormattingPrecommitPlugin.java @@ -17,6 +17,8 @@ import org.gradle.api.Project; import java.io.File; +import java.util.Arrays; +import java.util.Map; /** * This plugin configures formatting for Java source using Spotless @@ -64,7 +66,8 @@ public void apply(Project project) { java.importOrderFile(new File(elasticsearchWorkspace, importOrderPath)); // Most formatting is done through the Eclipse formatter - java.eclipse().configFile(new File(elasticsearchWorkspace, formatterConfigPath)); + java.eclipse().withP2Mirrors(Map.of("https://download.eclipse.org/", "https://mirror.umd.edu/eclipse/")) + .configFile(new File(elasticsearchWorkspace, formatterConfigPath)); // Ensure blank lines are actually empty. Since formatters are applied in // order, apply this one last, otherwise non-empty blank lines can creep From e54c7cf5edd4ffd24725412015b5d3db1e7ce5a4 Mon Sep 17 00:00:00 2001 From: David Roberts Date: Fri, 29 Nov 2024 02:19:48 +0000 Subject: [PATCH 069/139] [ML] Disable machine learning on macOS x86_64 (#104125) As previously advised in #104087, machine learning functionality will no longer be available on macOS x86_64. Machine learning functionality is still available on macOS by using an arm64 machine (Apple silicon). It is also possible to run Elasticsearch with machine learning functionality within a Docker container on macOS x86_64. This PR should be merged to main after the branch is split for the last minor release scheduled for before December 2024. For example, suppose 8.17.0 is scheduled for release in November 2024 and 8.18.0 is scheduled for release in January 2025. Then this PR should be merged to main after the 8.17 branch is split. One this PR is merged a followup PR should be opened against the ml-cpp repo to remove the build system for darwin-x86_64. It has been confirmed that with this change in place the Elasticsearch build system works with an ml-cpp bundle that does not contain a platform/darwin-x86_64 directory. It still produces an Elasticsearch build that will run providing xpack.ml.enabled is not explicitly set to true. After the build system for darwin-x86_64 has been removed from the ml-cpp repo, we will be able to do another PyTorch upgrade without having to worry about tweaking the build system to work on Intel macOS. --------- Co-authored-by: Ed Savage Co-authored-by: Valeriy Khakhutskyy <1292899+valeriy42@users.noreply.github.com> --- docs/changelog/104125.yaml | 18 +++++++++++++++ .../xpack/core/XPackSettings.java | 22 +++++++++++++++++-- .../xpack/ml/MachineLearning.java | 11 ---------- 3 files changed, 38 insertions(+), 13 deletions(-) create mode 100644 docs/changelog/104125.yaml diff --git a/docs/changelog/104125.yaml b/docs/changelog/104125.yaml new file mode 100644 index 0000000000000..e5c5ea6a3f1cd --- /dev/null +++ b/docs/changelog/104125.yaml @@ -0,0 +1,18 @@ +pr: 104125 +summary: Disable machine learning on macOS x86_64 +area: Machine Learning +type: breaking +issues: [] +breaking: + title: Disable machine learning on macOS x86_64 + area: Packaging + details: The machine learning plugin is permanently disabled on macOS x86_64. + For the last three years Apple has been selling hardware based on the arm64 + architecture, and support will increasingly focus on this architecture in + the future. Changes to upstream dependencies of Elastic's machine learning + functionality have made it unviable for Elastic to continue to build machine + learning on macOS x86_64. + impact: To continue to use machine learning functionality on macOS please switch to + an arm64 machine (Apple silicon). Alternatively, it will still be possible to run + Elasticsearch with machine learning enabled in a Docker container on macOS x86_64. + notable: false diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackSettings.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackSettings.java index 72e8805e96fc4..6aef618288fd2 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackSettings.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackSettings.java @@ -7,12 +7,16 @@ package org.elasticsearch.xpack.core; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import org.elasticsearch.common.settings.Setting; import org.elasticsearch.common.settings.Setting.Property; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.settings.SettingsException; import org.elasticsearch.common.ssl.SslClientAuthenticationMode; import org.elasticsearch.common.ssl.SslVerificationMode; import org.elasticsearch.core.Strings; +import org.elasticsearch.plugins.Platforms; import org.elasticsearch.transport.RemoteClusterPortSettings; import org.elasticsearch.xpack.core.security.SecurityField; import org.elasticsearch.xpack.core.security.authc.support.Hasher; @@ -26,6 +30,7 @@ import java.util.List; import java.util.Locale; import java.util.Map; +import java.util.Set; import java.util.function.Function; import javax.crypto.SecretKeyFactory; @@ -40,6 +45,8 @@ */ public class XPackSettings { + private static final Logger logger = LogManager.getLogger(XPackSettings.class); + private XPackSettings() { throw new IllegalStateException("Utility class should not be instantiated"); } @@ -76,10 +83,21 @@ public Iterator> settings() { /** Setting for enabling or disabling graph. Defaults to true. */ public static final Setting GRAPH_ENABLED = Setting.boolSetting("xpack.graph.enabled", true, Setting.Property.NodeScope); - /** Setting for enabling or disabling machine learning. Defaults to true. */ + public static final Set ML_NATIVE_CODE_PLATFORMS = Set.of("darwin-aarch64", "linux-aarch64", "linux-x86_64", "windows-x86_64"); + + /** Setting for enabling or disabling machine learning. Defaults to true on platforms that have the ML native code available. */ public static final Setting MACHINE_LEARNING_ENABLED = Setting.boolSetting( "xpack.ml.enabled", - true, + ML_NATIVE_CODE_PLATFORMS.contains(Platforms.PLATFORM_NAME), + enabled -> { + if (enabled && ML_NATIVE_CODE_PLATFORMS.contains(Platforms.PLATFORM_NAME) == false) { + SettingsException e = new SettingsException("xpack.ml.enabled cannot be set to [true] on [{}]", Platforms.PLATFORM_NAME); + // The exception doesn't get logged nicely on the console because it's thrown during initial plugin loading, + // so log separately here to make absolutely clear what happened + logger.fatal(e.getMessage()); + throw e; + } + }, Setting.Property.NodeScope ); diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java index 1feb95661f33a..8363e0f5c19a1 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java @@ -32,7 +32,6 @@ import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.breaker.CircuitBreaker; import org.elasticsearch.common.io.stream.NamedWriteableRegistry; -import org.elasticsearch.common.logging.DeprecationCategory; import org.elasticsearch.common.logging.DeprecationLogger; import org.elasticsearch.common.settings.ClusterSettings; import org.elasticsearch.common.settings.IndexScopedSettings; @@ -69,7 +68,6 @@ import org.elasticsearch.plugins.ExtensiblePlugin; import org.elasticsearch.plugins.IngestPlugin; import org.elasticsearch.plugins.PersistentTaskPlugin; -import org.elasticsearch.plugins.Platforms; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.plugins.SearchPlugin; import org.elasticsearch.plugins.ShutdownAwarePlugin; @@ -931,15 +929,6 @@ public Collection createComponents(PluginServices services) { return List.of(new JobManagerHolder(), new MachineLearningExtensionHolder()); } - if ("darwin-x86_64".equals(Platforms.PLATFORM_NAME)) { - String msg = "The machine learning plugin will be permanently disabled on macOS x86_64 in new minor versions released " - + "from December 2024 onwards. To continue to use machine learning functionality on macOS please switch to an arm64 " - + "machine (Apple silicon). Alternatively, it will still be possible to run Elasticsearch with machine learning " - + "enabled in a Docker container on macOS x86_64."; - logger.warn(msg); - deprecationLogger.warn(DeprecationCategory.PLUGINS, "ml-darwin-x86_64", msg); - } - machineLearningExtension.get().configure(environment.settings()); this.mlUpgradeModeActionFilter.set(new MlUpgradeModeActionFilter(clusterService)); From 56637285a8f2bacc88a12c7824b8b88d06752b07 Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Fri, 29 Nov 2024 13:47:40 +1100 Subject: [PATCH 070/139] Implement CAS support in Azure test fixture (#117104) Closes ES-5680 --- .../azure/AzureBlobStoreRepositoryTests.java | 8 +- .../AzureStorageCleanupThirdPartyTests.java | 4 +- .../azure/AzureBlobContainer.java | 2 +- .../repositories/azure/AzureBlobStore.java | 26 +- .../azure/AzureBlobContainerStatsTests.java | 3 +- .../RepositoryAzureClientYamlTestSuiteIT.java | 4 +- .../test/repository_azure/20_repository.yml | 14 + .../java/fixture/azure/AzureHttpFixture.java | 15 +- .../java/fixture/azure/AzureHttpHandler.java | 333 ++++++++---- .../fixture/azure/MockAzureBlobStore.java | 484 ++++++++++++++++++ .../azure/AzureRepositoriesMeteringIT.java | 4 +- .../AzureSearchableSnapshotsIT.java | 4 +- .../AzureSnapshotBasedRecoveryIT.java | 4 +- .../AzureRepositoryAnalysisRestIT.java | 12 +- 14 files changed, 800 insertions(+), 117 deletions(-) create mode 100644 test/fixtures/azure-fixture/src/main/java/fixture/azure/MockAzureBlobStore.java diff --git a/modules/repository-azure/src/internalClusterTest/java/org/elasticsearch/repositories/azure/AzureBlobStoreRepositoryTests.java b/modules/repository-azure/src/internalClusterTest/java/org/elasticsearch/repositories/azure/AzureBlobStoreRepositoryTests.java index bd21f208faac4..3fa4f7de7e717 100644 --- a/modules/repository-azure/src/internalClusterTest/java/org/elasticsearch/repositories/azure/AzureBlobStoreRepositoryTests.java +++ b/modules/repository-azure/src/internalClusterTest/java/org/elasticsearch/repositories/azure/AzureBlobStoreRepositoryTests.java @@ -9,6 +9,7 @@ package org.elasticsearch.repositories.azure; import fixture.azure.AzureHttpHandler; +import fixture.azure.MockAzureBlobStore; import com.azure.storage.common.policy.RequestRetryOptions; import com.azure.storage.common.policy.RetryPolicyType; @@ -184,7 +185,12 @@ long getUploadBlockSize() { @SuppressForbidden(reason = "this test uses a HttpHandler to emulate an Azure endpoint") private static class AzureBlobStoreHttpHandler extends AzureHttpHandler implements BlobStoreHttpHandler { AzureBlobStoreHttpHandler(final String account, final String container) { - super(account, container, null /* no auth header validation - sometimes it's omitted in these tests (TODO why?) */); + super( + account, + container, + null /* no auth header validation - sometimes it's omitted in these tests (TODO why?) */, + MockAzureBlobStore.LeaseExpiryPredicate.NEVER_EXPIRE + ); } } diff --git a/modules/repository-azure/src/internalClusterTest/java/org/elasticsearch/repositories/azure/AzureStorageCleanupThirdPartyTests.java b/modules/repository-azure/src/internalClusterTest/java/org/elasticsearch/repositories/azure/AzureStorageCleanupThirdPartyTests.java index 6d5c17c392141..40be0f8ca78c4 100644 --- a/modules/repository-azure/src/internalClusterTest/java/org/elasticsearch/repositories/azure/AzureStorageCleanupThirdPartyTests.java +++ b/modules/repository-azure/src/internalClusterTest/java/org/elasticsearch/repositories/azure/AzureStorageCleanupThirdPartyTests.java @@ -10,6 +10,7 @@ package org.elasticsearch.repositories.azure; import fixture.azure.AzureHttpFixture; +import fixture.azure.MockAzureBlobStore; import com.azure.core.exception.HttpResponseException; import com.azure.storage.blob.BlobContainerClient; @@ -60,7 +61,8 @@ public class AzureStorageCleanupThirdPartyTests extends AbstractThirdPartyReposi System.getProperty("test.azure.container"), System.getProperty("test.azure.tenant_id"), System.getProperty("test.azure.client_id"), - AzureHttpFixture.sharedKeyForAccountPredicate(AZURE_ACCOUNT) + AzureHttpFixture.sharedKeyForAccountPredicate(AZURE_ACCOUNT), + MockAzureBlobStore.LeaseExpiryPredicate.NEVER_EXPIRE ); @Override diff --git a/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureBlobContainer.java b/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureBlobContainer.java index 52bc1ee1399d4..73936d82fc204 100644 --- a/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureBlobContainer.java +++ b/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureBlobContainer.java @@ -180,7 +180,7 @@ protected String buildKey(String blobName) { } private boolean skipRegisterOperation(ActionListener listener) { - return skipCas(listener) || skipIfNotPrimaryOnlyLocationMode(listener); + return skipIfNotPrimaryOnlyLocationMode(listener); } private boolean skipIfNotPrimaryOnlyLocationMode(ActionListener listener) { diff --git a/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureBlobStore.java b/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureBlobStore.java index 3c64bb9f3b830..b4567a92184fc 100644 --- a/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureBlobStore.java +++ b/modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureBlobStore.java @@ -40,6 +40,7 @@ import com.azure.storage.blob.models.ListBlobsOptions; import com.azure.storage.blob.options.BlobParallelUploadOptions; import com.azure.storage.blob.options.BlockBlobSimpleUploadOptions; +import com.azure.storage.blob.specialized.BlobLeaseClient; import com.azure.storage.blob.specialized.BlobLeaseClientBuilder; import com.azure.storage.blob.specialized.BlockBlobAsyncClient; @@ -1010,7 +1011,7 @@ private static BytesReference innerCompareAndExchangeRegister( } return currentValue; } finally { - leaseClient.releaseLease(); + bestEffortRelease(leaseClient); } } else { if (expected.length() == 0) { @@ -1020,6 +1021,29 @@ private static BytesReference innerCompareAndExchangeRegister( } } + /** + * Release the lease, ignoring conflicts due to expiry + * + * @see Outcomes of lease operations by lease state + * @param leaseClient The client for the lease + */ + private static void bestEffortRelease(BlobLeaseClient leaseClient) { + try { + leaseClient.releaseLease(); + } catch (BlobStorageException blobStorageException) { + if (blobStorageException.getStatusCode() == RestStatus.CONFLICT.getStatus()) { + // This is OK, we tried to release a lease that was expired/re-acquired + logger.debug( + "Ignored conflict on release: errorCode={}, message={}", + blobStorageException.getErrorCode(), + blobStorageException.getMessage() + ); + } else { + throw blobStorageException; + } + } + } + private static BytesReference downloadRegisterBlob( String containerPath, String blobKey, diff --git a/modules/repository-azure/src/test/java/org/elasticsearch/repositories/azure/AzureBlobContainerStatsTests.java b/modules/repository-azure/src/test/java/org/elasticsearch/repositories/azure/AzureBlobContainerStatsTests.java index 6730e5c3c81bd..812d519e60260 100644 --- a/modules/repository-azure/src/test/java/org/elasticsearch/repositories/azure/AzureBlobContainerStatsTests.java +++ b/modules/repository-azure/src/test/java/org/elasticsearch/repositories/azure/AzureBlobContainerStatsTests.java @@ -10,6 +10,7 @@ package org.elasticsearch.repositories.azure; import fixture.azure.AzureHttpHandler; +import fixture.azure.MockAzureBlobStore; import org.elasticsearch.common.blobstore.OperationPurpose; import org.elasticsearch.common.bytes.BytesReference; @@ -26,7 +27,7 @@ public class AzureBlobContainerStatsTests extends AbstractAzureServerTestCase { @SuppressForbidden(reason = "use a http server") @Before public void configureAzureHandler() { - httpServer.createContext("/", new AzureHttpHandler(ACCOUNT, CONTAINER, null)); + httpServer.createContext("/", new AzureHttpHandler(ACCOUNT, CONTAINER, null, MockAzureBlobStore.LeaseExpiryPredicate.NEVER_EXPIRE)); } public void testOperationPurposeIsReflectedInBlobStoreStats() throws IOException { diff --git a/modules/repository-azure/src/yamlRestTest/java/org/elasticsearch/repositories/azure/RepositoryAzureClientYamlTestSuiteIT.java b/modules/repository-azure/src/yamlRestTest/java/org/elasticsearch/repositories/azure/RepositoryAzureClientYamlTestSuiteIT.java index 64dde0248ad2c..b24574da36825 100644 --- a/modules/repository-azure/src/yamlRestTest/java/org/elasticsearch/repositories/azure/RepositoryAzureClientYamlTestSuiteIT.java +++ b/modules/repository-azure/src/yamlRestTest/java/org/elasticsearch/repositories/azure/RepositoryAzureClientYamlTestSuiteIT.java @@ -10,6 +10,7 @@ package org.elasticsearch.repositories.azure; import fixture.azure.AzureHttpFixture; +import fixture.azure.MockAzureBlobStore; import com.carrotsearch.randomizedtesting.annotations.Name; import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; @@ -47,7 +48,8 @@ public class RepositoryAzureClientYamlTestSuiteIT extends ESClientYamlSuiteTestC AZURE_TEST_CONTAINER, AZURE_TEST_TENANT_ID, AZURE_TEST_CLIENT_ID, - decideAuthHeaderPredicate() + decideAuthHeaderPredicate(), + MockAzureBlobStore.LeaseExpiryPredicate.NEVER_EXPIRE ); private static Predicate decideAuthHeaderPredicate() { diff --git a/modules/repository-azure/src/yamlRestTest/resources/rest-api-spec/test/repository_azure/20_repository.yml b/modules/repository-azure/src/yamlRestTest/resources/rest-api-spec/test/repository_azure/20_repository.yml index a4a7d0b22a0ed..968e93cf9fc55 100644 --- a/modules/repository-azure/src/yamlRestTest/resources/rest-api-spec/test/repository_azure/20_repository.yml +++ b/modules/repository-azure/src/yamlRestTest/resources/rest-api-spec/test/repository_azure/20_repository.yml @@ -193,6 +193,20 @@ setup: container: zHHkfSqlbnBsbpSgvCYtxrEfFLqghXtyPvvvKPNBnRCicNHQLE client: integration_test +--- +"Register a read-only repository with a non existing container": + + - do: + catch: /repository_verification_exception/ + snapshot.create_repository: + repository: repository + body: + type: azure + settings: + container: zHHkfSqlbnBsbpSgvCYtxrEfFLqghXtyPvvvKPNBnRCicNHQLE + client: integration_test + readonly: true + --- "Register a repository with a non existing client": diff --git a/test/fixtures/azure-fixture/src/main/java/fixture/azure/AzureHttpFixture.java b/test/fixtures/azure-fixture/src/main/java/fixture/azure/AzureHttpFixture.java index 39105e0a27dc9..ab4d54f4fc451 100644 --- a/test/fixtures/azure-fixture/src/main/java/fixture/azure/AzureHttpFixture.java +++ b/test/fixtures/azure-fixture/src/main/java/fixture/azure/AzureHttpFixture.java @@ -45,6 +45,7 @@ public class AzureHttpFixture extends ExternalResource { private final String clientId; private final String tenantId; private final Predicate authHeaderPredicate; + private final MockAzureBlobStore.LeaseExpiryPredicate leaseExpiryPredicate; private HttpServer server; private HttpServer metadataServer; @@ -116,7 +117,8 @@ public AzureHttpFixture( String container, @Nullable String rawTenantId, @Nullable String rawClientId, - Predicate authHeaderPredicate + Predicate authHeaderPredicate, + MockAzureBlobStore.LeaseExpiryPredicate leaseExpiryPredicate ) { final var tenantId = Strings.hasText(rawTenantId) ? rawTenantId : null; final var clientId = Strings.hasText(rawClientId) ? rawClientId : null; @@ -135,6 +137,7 @@ public AzureHttpFixture( this.tenantId = tenantId; this.clientId = clientId; this.authHeaderPredicate = authHeaderPredicate; + this.leaseExpiryPredicate = leaseExpiryPredicate; } private String scheme() { @@ -193,7 +196,10 @@ protected void before() { } case HTTP -> { server = HttpServer.create(new InetSocketAddress(InetAddress.getLoopbackAddress(), 0), 0); - server.createContext("/" + account, new AzureHttpHandler(account, container, actualAuthHeaderPredicate)); + server.createContext( + "/" + account, + new AzureHttpHandler(account, container, actualAuthHeaderPredicate, leaseExpiryPredicate) + ); server.start(); oauthTokenServiceServer = HttpServer.create(new InetSocketAddress(InetAddress.getLoopbackAddress(), 0), 0); @@ -222,7 +228,10 @@ protected void before() { final var httpsServer = HttpsServer.create(new InetSocketAddress(InetAddress.getLoopbackAddress(), 0), 0); this.server = httpsServer; httpsServer.setHttpsConfigurator(new HttpsConfigurator(sslContext)); - httpsServer.createContext("/" + account, new AzureHttpHandler(account, container, actualAuthHeaderPredicate)); + httpsServer.createContext( + "/" + account, + new AzureHttpHandler(account, container, actualAuthHeaderPredicate, leaseExpiryPredicate) + ); httpsServer.start(); } { diff --git a/test/fixtures/azure-fixture/src/main/java/fixture/azure/AzureHttpHandler.java b/test/fixtures/azure-fixture/src/main/java/fixture/azure/AzureHttpHandler.java index bbcfe1f75dc06..904f4581ad2c9 100644 --- a/test/fixtures/azure-fixture/src/main/java/fixture/azure/AzureHttpHandler.java +++ b/test/fixtures/azure-fixture/src/main/java/fixture/azure/AzureHttpHandler.java @@ -15,7 +15,6 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.elasticsearch.common.Strings; -import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.io.Streams; import org.elasticsearch.common.regex.Regex; @@ -27,7 +26,6 @@ import org.elasticsearch.xcontent.XContentType; import java.io.BufferedReader; -import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStreamReader; import java.nio.charset.StandardCharsets; @@ -43,11 +41,11 @@ import java.util.Objects; import java.util.Set; import java.util.UUID; -import java.util.concurrent.ConcurrentHashMap; import java.util.function.Predicate; import java.util.regex.Matcher; import java.util.regex.Pattern; +import static fixture.azure.MockAzureBlobStore.failTestWithAssertionError; import static org.elasticsearch.repositories.azure.AzureFixtureHelper.assertValidBlockId; /** @@ -56,17 +54,29 @@ @SuppressForbidden(reason = "Uses a HttpServer to emulate an Azure endpoint") public class AzureHttpHandler implements HttpHandler { private static final Logger logger = LogManager.getLogger(AzureHttpHandler.class); + private static final Pattern RANGE_HEADER_PATTERN = Pattern.compile("^bytes=([0-9]+)-([0-9]+)$"); + static final String X_MS_LEASE_ID = "x-ms-lease-id"; + static final String X_MS_PROPOSED_LEASE_ID = "x-ms-proposed-lease-id"; + static final String X_MS_LEASE_DURATION = "x-ms-lease-duration"; + static final String X_MS_LEASE_BREAK_PERIOD = "x-ms-lease-break-period"; + static final String X_MS_BLOB_TYPE = "x-ms-blob-type"; + static final String X_MS_BLOB_CONTENT_LENGTH = "x-ms-blob-content-length"; - private final Map blobs; private final String account; private final String container; private final Predicate authHeaderPredicate; - - public AzureHttpHandler(final String account, final String container, @Nullable Predicate authHeaderPredicate) { + private final MockAzureBlobStore mockAzureBlobStore; + + public AzureHttpHandler( + final String account, + final String container, + @Nullable Predicate authHeaderPredicate, + MockAzureBlobStore.LeaseExpiryPredicate leaseExpiryPredicate + ) { this.account = Objects.requireNonNull(account); this.container = Objects.requireNonNull(container); this.authHeaderPredicate = authHeaderPredicate; - this.blobs = new ConcurrentHashMap<>(); + this.mockAzureBlobStore = new MockAzureBlobStore(leaseExpiryPredicate); } private static List getAuthHeader(HttpExchange exchange) { @@ -134,7 +144,7 @@ public void handle(final HttpExchange exchange) throws IOException { final String blockId = params.get("blockid"); assert assertValidBlockId(blockId); - blobs.put(blockId, Streams.readFully(exchange.getRequestBody())); + mockAzureBlobStore.putBlock(blobPath(exchange), blockId, Streams.readFully(exchange.getRequestBody()), leaseId(exchange)); exchange.sendResponseHeaders(RestStatus.CREATED.getStatus(), -1); } else if (Regex.simpleMatch("PUT /" + account + "/" + container + "/*comp=blocklist*", request)) { @@ -145,83 +155,124 @@ public void handle(final HttpExchange exchange) throws IOException { .map(line -> line.substring(0, line.indexOf(""))) .toList(); - final ByteArrayOutputStream blob = new ByteArrayOutputStream(); - for (String blockId : blockIds) { - BytesReference block = blobs.remove(blockId); - assert block != null; - block.writeTo(blob); - } - blobs.put(exchange.getRequestURI().getPath(), new BytesArray(blob.toByteArray())); + mockAzureBlobStore.putBlockList(blobPath(exchange), blockIds, leaseId(exchange)); exchange.getResponseHeaders().add("x-ms-request-server-encrypted", "false"); exchange.sendResponseHeaders(RestStatus.CREATED.getStatus(), -1); + } else if (Regex.simpleMatch("PUT /" + account + "/" + container + "*comp=lease*", request)) { + // Lease Blob (https://learn.microsoft.com/en-us/rest/api/storageservices/lease-blob) + final String leaseAction = requireHeader(exchange, "x-ms-lease-action"); + + switch (leaseAction) { + case "acquire" -> { + final int leaseDurationSeconds = requireIntegerHeader(exchange, X_MS_LEASE_DURATION); + final String proposedLeaseId = exchange.getRequestHeaders().getFirst(X_MS_PROPOSED_LEASE_ID); + final String newLeaseId = mockAzureBlobStore.acquireLease( + blobPath(exchange), + leaseDurationSeconds, + proposedLeaseId + ); + exchange.getResponseHeaders().set(X_MS_LEASE_ID, newLeaseId); + exchange.sendResponseHeaders(RestStatus.CREATED.getStatus(), -1); + } + case "release" -> { + final String leaseId = requireHeader(exchange, X_MS_LEASE_ID); + mockAzureBlobStore.releaseLease(blobPath(exchange), leaseId); + exchange.sendResponseHeaders(RestStatus.OK.getStatus(), -1); + } + case "break" -> { + mockAzureBlobStore.breakLease(blobPath(exchange), getOptionalIntegerHeader(exchange, X_MS_LEASE_BREAK_PERIOD)); + exchange.sendResponseHeaders(RestStatus.ACCEPTED.getStatus(), -1); + } + case "renew", "change" -> { + failTestWithAssertionError("Attempt was made to use not-implemented lease action: " + leaseAction); + throw new MockAzureBlobStore.AzureBlobStoreError( + RestStatus.NOT_IMPLEMENTED, + "NotImplemented", + "Attempted to use unsupported lease API: " + leaseAction + ); + } + default -> { + failTestWithAssertionError("Unrecognized lease action: " + leaseAction); + throw new MockAzureBlobStore.BadRequestException( + "InvalidHeaderValue", + "Invalid x-ms-lease-action header: " + leaseAction + ); + } + } } else if (Regex.simpleMatch("PUT /" + account + "/" + container + "/*", request)) { // PUT Blob (see https://docs.microsoft.com/en-us/rest/api/storageservices/put-blob) + final String blobType = requireHeader(exchange, X_MS_BLOB_TYPE); final String ifNoneMatch = exchange.getRequestHeaders().getFirst("If-None-Match"); - if ("*".equals(ifNoneMatch)) { - if (blobs.putIfAbsent(exchange.getRequestURI().getPath(), Streams.readFully(exchange.getRequestBody())) != null) { - sendError(exchange, RestStatus.CONFLICT); - return; - } - } else { - blobs.put(exchange.getRequestURI().getPath(), Streams.readFully(exchange.getRequestBody())); - } + mockAzureBlobStore.putBlob( + blobPath(exchange), + Streams.readFully(exchange.getRequestBody()), + blobType, + ifNoneMatch, + leaseId(exchange) + ); exchange.getResponseHeaders().add("x-ms-request-server-encrypted", "false"); exchange.sendResponseHeaders(RestStatus.CREATED.getStatus(), -1); } else if (Regex.simpleMatch("HEAD /" + account + "/" + container + "/*", request)) { // Get Blob Properties (see https://docs.microsoft.com/en-us/rest/api/storageservices/get-blob-properties) - final BytesReference blob = blobs.get(exchange.getRequestURI().getPath()); - if (blob == null) { - sendError(exchange, RestStatus.NOT_FOUND); - return; - } - exchange.getResponseHeaders().add("x-ms-blob-content-length", String.valueOf(blob.length())); - exchange.getResponseHeaders().add("Content-Length", String.valueOf(blob.length())); - exchange.getResponseHeaders().add("x-ms-blob-type", "BlockBlob"); + final MockAzureBlobStore.AzureBlockBlob blob = mockAzureBlobStore.getBlob(blobPath(exchange), leaseId(exchange)); + + final Headers responseHeaders = exchange.getResponseHeaders(); + final BytesReference blobContents = blob.getContents(); + responseHeaders.add(X_MS_BLOB_CONTENT_LENGTH, String.valueOf(blobContents.length())); + responseHeaders.add("Content-Length", String.valueOf(blobContents.length())); + responseHeaders.add(X_MS_BLOB_TYPE, blob.type()); exchange.sendResponseHeaders(RestStatus.OK.getStatus(), -1); } else if (Regex.simpleMatch("GET /" + account + "/" + container + "/*", request)) { - // GET Object (https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectGET.html) - final BytesReference blob = blobs.get(exchange.getRequestURI().getPath()); - if (blob == null) { - sendError(exchange, RestStatus.NOT_FOUND); - return; - } + // Get Blob (https://learn.microsoft.com/en-us/rest/api/storageservices/get-blob) + final MockAzureBlobStore.AzureBlockBlob blob = mockAzureBlobStore.getBlob(blobPath(exchange), leaseId(exchange)); + final BytesReference responseContent; + final RestStatus successStatus; // see Constants.HeaderConstants.STORAGE_RANGE_HEADER final String range = exchange.getRequestHeaders().getFirst("x-ms-range"); - final Matcher matcher = Pattern.compile("^bytes=([0-9]+)-([0-9]+)$").matcher(range); - if (matcher.matches() == false) { - throw new AssertionError("Range header does not match expected format: " + range); - } + if (range != null) { + final Matcher matcher = RANGE_HEADER_PATTERN.matcher(range); + if (matcher.matches() == false) { + throw new MockAzureBlobStore.BadRequestException( + "InvalidHeaderValue", + "Range header does not match expected format: " + range + ); + } - final long start = Long.parseLong(matcher.group(1)); - final long end = Long.parseLong(matcher.group(2)); + final long start = Long.parseLong(matcher.group(1)); + final long end = Long.parseLong(matcher.group(2)); - if (blob.length() <= start) { - exchange.getResponseHeaders().add("Content-Type", "application/octet-stream"); - exchange.sendResponseHeaders(RestStatus.REQUESTED_RANGE_NOT_SATISFIED.getStatus(), -1); - return; - } + final BytesReference blobContents = blob.getContents(); + if (blobContents.length() <= start) { + exchange.getResponseHeaders().add("Content-Type", "application/octet-stream"); + exchange.sendResponseHeaders(RestStatus.REQUESTED_RANGE_NOT_SATISFIED.getStatus(), -1); + return; + } - var responseBlob = blob.slice(Math.toIntExact(start), Math.toIntExact(Math.min(end - start + 1, blob.length() - start))); + responseContent = blobContents.slice( + Math.toIntExact(start), + Math.toIntExact(Math.min(end - start + 1, blobContents.length() - start)) + ); + successStatus = RestStatus.PARTIAL_CONTENT; + } else { + responseContent = blob.getContents(); + successStatus = RestStatus.OK; + } exchange.getResponseHeaders().add("Content-Type", "application/octet-stream"); - exchange.getResponseHeaders().add("x-ms-blob-content-length", String.valueOf(responseBlob.length())); - exchange.getResponseHeaders().add("x-ms-blob-type", "blockblob"); + exchange.getResponseHeaders().add(X_MS_BLOB_CONTENT_LENGTH, String.valueOf(responseContent.length())); + exchange.getResponseHeaders().add(X_MS_BLOB_TYPE, blob.type()); exchange.getResponseHeaders().add("ETag", "\"blockblob\""); - exchange.sendResponseHeaders(RestStatus.OK.getStatus(), responseBlob.length()); - responseBlob.writeTo(exchange.getResponseBody()); + exchange.sendResponseHeaders(successStatus.getStatus(), responseContent.length() == 0 ? -1 : responseContent.length()); + responseContent.writeTo(exchange.getResponseBody()); } else if (Regex.simpleMatch("DELETE /" + account + "/" + container + "/*", request)) { // Delete Blob (https://docs.microsoft.com/en-us/rest/api/storageservices/delete-blob) - final boolean deleted = blobs.entrySet().removeIf(blob -> blob.getKey().startsWith(exchange.getRequestURI().getPath())); - if (deleted) { - exchange.sendResponseHeaders(RestStatus.ACCEPTED.getStatus(), -1); - } else { - exchange.sendResponseHeaders(RestStatus.NOT_FOUND.getStatus(), -1); - } + mockAzureBlobStore.deleteBlob(blobPath(exchange), leaseId(exchange)); + exchange.sendResponseHeaders(RestStatus.ACCEPTED.getStatus(), -1); } else if (Regex.simpleMatch("GET /" + account + "/" + container + "?*restype=container*comp=list*", request)) { // List Blobs (https://docs.microsoft.com/en-us/rest/api/storageservices/list-blobs) @@ -239,11 +290,12 @@ public void handle(final HttpExchange exchange) throws IOException { list.append("").append(delimiter).append(""); } list.append(""); - for (Map.Entry blob : blobs.entrySet()) { - if (prefix != null && blob.getKey().startsWith("/" + account + "/" + container + "/" + prefix) == false) { - continue; - } - String blobPath = blob.getKey().replace("/" + account + "/" + container + "/", ""); + final Map matchingBlobs = mockAzureBlobStore.listBlobs( + prefix, + leaseId(exchange) + ); + for (Map.Entry blob : matchingBlobs.entrySet()) { + final String blobPath = blob.getKey(); if (delimiter != null) { int fromIndex = (prefix != null ? prefix.length() : 0); int delimiterPosition = blobPath.indexOf(delimiter, fromIndex); @@ -259,7 +311,7 @@ public void handle(final HttpExchange exchange) throws IOException { %s BlockBlob - """, blobPath, blob.getValue().length())); + """, blobPath, blob.getValue().getContents().length())); } if (blobPrefixes.isEmpty() == false) { blobPrefixes.forEach(p -> list.append("").append(p).append("")); @@ -294,7 +346,8 @@ public void handle(final HttpExchange exchange) throws IOException { } // Process the deletion - if (blobs.remove("/" + account + toDelete) != null) { + try { + mockAzureBlobStore.deleteBlob(toDelete, leaseId(exchange)); final String acceptedPart = Strings.format(""" --%s Content-Type: application/http @@ -307,32 +360,43 @@ public void handle(final HttpExchange exchange) throws IOException { """, responseBoundary, contentId, requestId).replaceAll("\n", "\r\n"); response.append(acceptedPart); - } else { - final String notFoundBody = Strings.format( + } catch (MockAzureBlobStore.AzureBlobStoreError e) { + final String errorResponseBody = Strings.format( """ - BlobNotFoundThe specified blob does not exist. + %s%s RequestId:%s Time:%s""", + e.getErrorCode(), + e.getMessage(), requestId, DateTimeFormatter.ISO_OFFSET_DATE_TIME.format(ZonedDateTime.now(ZoneId.of("UTC"))) ); - final String notFoundPart = Strings.format(""" - --%s - Content-Type: application/http - Content-ID: %s - - HTTP/1.1 404 The specified blob does not exist. - x-ms-error-code: BlobNotFound - x-ms-request-id: %s - x-ms-version: 2018-11-09 - Content-Length: %d - Content-Type: application/xml - - %s - """, responseBoundary, contentId, requestId, notFoundBody.length(), notFoundBody) - .replaceAll("\n", "\r\n"); - response.append(notFoundPart); + final String errorResponsePart = Strings.format( + """ + --%s + Content-Type: application/http + Content-ID: %s + + HTTP/1.1 %s %s + x-ms-error-code: %s + x-ms-request-id: %s + x-ms-version: 2018-11-09 + Content-Length: %d + Content-Type: application/xml + + %s + """, + responseBoundary, + contentId, + e.getRestStatus().getStatus(), + e.getMessage(), + e.getErrorCode(), + requestId, + errorResponseBody.length(), + errorResponseBody + ).replaceAll("\n", "\r\n"); + response.append(errorResponsePart); } // Clear the state @@ -350,19 +414,18 @@ public void handle(final HttpExchange exchange) throws IOException { } contentId = line.split("\\s")[1]; } else if (Regex.simpleMatch("DELETE /" + container + "/*", line)) { - String blobName = RestUtils.decodeComponent(line.split("(\\s|\\?)")[1]); + final String path = RestUtils.decodeComponent(line.split("(\\s|\\?)")[1]); if (toDelete != null) { throw new IllegalStateException("Got multiple deletes in a single request?"); } - toDelete = blobName; + toDelete = stripPrefix("/" + container + "/", path); } else if (Regex.simpleMatch("DELETE /" + account + "/" + container + "/*", line)) { // possible alternative DELETE url, depending on which method is used in the batch client String path = RestUtils.decodeComponent(line.split("(\\s|\\?)")[1]); - String blobName = path.split(account)[1]; if (toDelete != null) { throw new IllegalStateException("Got multiple deletes in a single request?"); } - toDelete = blobName; + toDelete = stripPrefix("/" + account + "/" + container + "/", path); } } response.append("--").append(responseBoundary).append("--\r\n0\r\n"); @@ -372,20 +435,90 @@ public void handle(final HttpExchange exchange) throws IOException { logger.debug("--> Sending response:\n{}", response); exchange.getResponseBody().write(response.toString().getBytes(StandardCharsets.UTF_8)); } - } else { - logger.warn("--> Unrecognised request received: {}", request); - sendError(exchange, RestStatus.BAD_REQUEST); - } + } else if (Regex.simpleMatch("PUT /*/*/*master.dat", request) + && Regex.simpleMatch("PUT /" + account + "/" + container + "*", request) == false) { + // An attempt to put master.dat to a different container. This is probably + // org.elasticsearch.repositories.blobstore.BlobStoreRepository#startVerification + throw new MockAzureBlobStore.AzureBlobStoreError( + RestStatus.NOT_FOUND, + "ContainerNotFound", + "The specified container does not exist." + ); + } else if (Regex.simpleMatch("GET /*/*restype=container*comp=list*", request) + && Regex.simpleMatch("GET /" + account + "/" + container + "*", request) == false) { + // An attempt to list the contents of a different container. This is probably + // org.elasticsearch.repositories.blobstore.BlobStoreRepository#startVerification for a read-only + // repository + throw new MockAzureBlobStore.AzureBlobStoreError( + RestStatus.NOT_FOUND, + "ContainerNotFound", + "The specified container does not exist." + ); + } else { + final String message = "You sent a request that is not supported by AzureHttpHandler: " + request; + failTestWithAssertionError(message); + throw new MockAzureBlobStore.BadRequestException("UnrecognisedRequest", message); + } + } catch (MockAzureBlobStore.AzureBlobStoreError e) { + sendError(exchange, e); + } catch (Exception e) { + failTestWithAssertionError("Uncaught exception", e); + sendError(exchange, RestStatus.INTERNAL_SERVER_ERROR, "InternalError", e.getMessage()); } finally { exchange.close(); } } + private String requireHeader(HttpExchange exchange, String headerName) { + final String headerValue = exchange.getRequestHeaders().getFirst(headerName); + if (headerValue == null) { + throw new MockAzureBlobStore.BadRequestException("MissingRequiredHeader", "Missing " + headerName + " header"); + } + return headerValue; + } + + private int requireIntegerHeader(HttpExchange exchange, String headerName) { + final String headerValue = requireHeader(exchange, headerName); + try { + return Integer.parseInt(headerValue); + } catch (NumberFormatException e) { + throw new MockAzureBlobStore.BadRequestException("InvalidHeaderValue", "Invalid " + headerName + " header"); + } + } + + @Nullable + private Integer getOptionalIntegerHeader(HttpExchange exchange, String headerName) { + final String headerValue = exchange.getRequestHeaders().getFirst(headerName); + try { + return headerValue == null ? null : Integer.parseInt(headerValue); + } catch (NumberFormatException e) { + throw new MockAzureBlobStore.BadRequestException("InvalidHeaderValue", "Invalid " + headerName + " header"); + } + } + + @Nullable + private String leaseId(HttpExchange exchange) { + return exchange.getRequestHeaders().getFirst(X_MS_LEASE_ID); + } + + private String blobPath(HttpExchange exchange) { + return stripPrefix("/" + account + "/" + container + "/", exchange.getRequestURI().getPath()); + } + public Map blobs() { - return blobs; + return mockAzureBlobStore.blobs(); + } + + public static void sendError(HttpExchange exchange, MockAzureBlobStore.AzureBlobStoreError error) throws IOException { + sendError(exchange, error.getRestStatus(), error.getErrorCode(), error.getMessage()); } public static void sendError(final HttpExchange exchange, final RestStatus status) throws IOException { + final String errorCode = toAzureErrorCode(status); + sendError(exchange, status, errorCode, status.toString()); + } + + public static void sendError(HttpExchange exchange, RestStatus restStatus, String errorCode, String errorMessage) throws IOException { final Headers headers = exchange.getResponseHeaders(); headers.add("Content-Type", "application/xml"); @@ -396,20 +529,19 @@ public static void sendError(final HttpExchange exchange, final RestStatus statu headers.add("x-ms-request-id", requestId); } - final String errorCode = toAzureErrorCode(status); // see Constants.HeaderConstants.ERROR_CODE headers.add("x-ms-error-code", errorCode); if ("HEAD".equals(exchange.getRequestMethod())) { - exchange.sendResponseHeaders(status.getStatus(), -1L); + exchange.sendResponseHeaders(restStatus.getStatus(), -1L); } else { final byte[] response = (String.format(Locale.ROOT, """ %s %s - """, errorCode, status)).getBytes(StandardCharsets.UTF_8); - exchange.sendResponseHeaders(status.getStatus(), response.length); + """, errorCode, errorMessage)).getBytes(StandardCharsets.UTF_8); + exchange.sendResponseHeaders(restStatus.getStatus(), response.length); exchange.getResponseBody().write(response); } } @@ -428,4 +560,9 @@ private static String toAzureErrorCode(final RestStatus status) { ); }; } + + private String stripPrefix(String prefix, String toStrip) { + assert toStrip.startsWith(prefix); + return toStrip.substring(prefix.length()); + } } diff --git a/test/fixtures/azure-fixture/src/main/java/fixture/azure/MockAzureBlobStore.java b/test/fixtures/azure-fixture/src/main/java/fixture/azure/MockAzureBlobStore.java new file mode 100644 index 0000000000000..c694c27c1293b --- /dev/null +++ b/test/fixtures/azure-fixture/src/main/java/fixture/azure/MockAzureBlobStore.java @@ -0,0 +1,484 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package fixture.azure; + +import org.elasticsearch.ExceptionsHelper; +import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.bytes.CompositeBytesReference; +import org.elasticsearch.common.util.Maps; +import org.elasticsearch.core.Nullable; +import org.elasticsearch.logging.LogManager; +import org.elasticsearch.logging.Logger; +import org.elasticsearch.rest.RestStatus; + +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.UUID; +import java.util.concurrent.ConcurrentHashMap; +import java.util.stream.Collectors; + +public class MockAzureBlobStore { + + private static final Logger logger = LogManager.getLogger(MockAzureBlobStore.class); + private static final String BLOCK_BLOB_TYPE = "BlockBlob"; + private static final String PAGE_BLOB_TYPE = "PageBlob"; + private static final String APPEND_BLOB_TYPE = "AppendBlob"; + + private final LeaseExpiryPredicate leaseExpiryPredicate; + private final Map blobs; + + /** + * Provide the means of triggering lease expiration + * + * @param leaseExpiryPredicate A Predicate that takes an active lease ID and returns true when it should be expired, or null to never fail leases + */ + public MockAzureBlobStore(LeaseExpiryPredicate leaseExpiryPredicate) { + this.blobs = new ConcurrentHashMap<>(); + this.leaseExpiryPredicate = Objects.requireNonNull(leaseExpiryPredicate); + } + + public void putBlock(String path, String blockId, BytesReference content, @Nullable String leaseId) { + blobs.compute(path, (p, existing) -> { + if (existing != null) { + existing.putBlock(blockId, content, leaseId); + return existing; + } else { + final AzureBlockBlob azureBlockBlob = new AzureBlockBlob(); + azureBlockBlob.putBlock(blockId, content, leaseId); + return azureBlockBlob; + } + }); + } + + public void putBlockList(String path, List blockIds, @Nullable String leaseId) { + final AzureBlockBlob blob = getExistingBlob(path); + blob.putBlockList(blockIds, leaseId); + } + + public void putBlob(String path, BytesReference contents, String blobType, @Nullable String ifNoneMatch, @Nullable String leaseId) { + blobs.compute(path, (p, existingValue) -> { + if (existingValue != null) { + existingValue.setContents(contents, leaseId, ifNoneMatch); + return existingValue; + } else { + validateBlobType(blobType); + final AzureBlockBlob newBlob = new AzureBlockBlob(); + newBlob.setContents(contents, leaseId); + return newBlob; + } + }); + } + + private void validateBlobType(String blobType) { + if (BLOCK_BLOB_TYPE.equals(blobType)) { + return; + } + final String errorMessage; + if (PAGE_BLOB_TYPE.equals(blobType) || APPEND_BLOB_TYPE.equals(blobType)) { + errorMessage = "Only BlockBlob is supported. This is a limitation of the MockAzureBlobStore"; + } else { + errorMessage = "Invalid blobType: " + blobType; + } + // Fail the test and respond with an error + failTestWithAssertionError(errorMessage); + throw new MockAzureBlobStore.BadRequestException("InvalidHeaderValue", errorMessage); + } + + public AzureBlockBlob getBlob(String path, @Nullable String leaseId) { + final AzureBlockBlob blob = getExistingBlob(path); + // This is the public implementation of "get blob" which will 404 for uncommitted block blobs + if (blob.isCommitted() == false) { + throw new BlobNotFoundException(); + } + blob.checkLeaseForRead(leaseId); + return blob; + } + + public void deleteBlob(String path, @Nullable String leaseId) { + final AzureBlockBlob blob = getExistingBlob(path); + blob.checkLeaseForWrite(leaseId); + blobs.remove(path); + } + + public Map listBlobs(String prefix, @Nullable String leaseId) { + return blobs.entrySet().stream().filter(e -> { + if (prefix == null || e.getKey().startsWith(prefix)) { + return true; + } + return false; + }) + .filter(e -> e.getValue().isCommitted()) + .peek(e -> e.getValue().checkLeaseForRead(leaseId)) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); + } + + public String acquireLease(String path, int leaseTimeSeconds, @Nullable String proposedLeaseId) { + final AzureBlockBlob blob = getExistingBlob(path); + return blob.acquireLease(proposedLeaseId, leaseTimeSeconds); + } + + public void releaseLease(String path, @Nullable String leaseId) { + final AzureBlockBlob blob = getExistingBlob(path); + blob.releaseLease(leaseId); + } + + public void breakLease(String path, @Nullable Integer leaseBreakPeriod) { + final AzureBlockBlob blob = getExistingBlob(path); + blob.breakLease(leaseBreakPeriod); + } + + public Map blobs() { + return Maps.transformValues(blobs, AzureBlockBlob::getContents); + } + + private AzureBlockBlob getExistingBlob(String path) { + final AzureBlockBlob blob = blobs.get(path); + if (blob == null) { + throw new BlobNotFoundException(); + } + return blob; + } + + static void failTestWithAssertionError(String message) { + ExceptionsHelper.maybeDieOnAnotherThread(new AssertionError(message)); + } + + static void failTestWithAssertionError(String message, Throwable throwable) { + ExceptionsHelper.maybeDieOnAnotherThread(new AssertionError(message, throwable)); + } + + public class AzureBlockBlob { + private final Object writeLock = new Object(); + private final Lease lease = new Lease(); + private final Map blocks; + private volatile BytesReference contents; + + private AzureBlockBlob() { + this.blocks = new ConcurrentHashMap<>(); + } + + public void putBlock(String blockId, BytesReference content, @Nullable String leaseId) { + synchronized (writeLock) { + lease.checkLeaseForWrite(leaseId); + this.blocks.put(blockId, content); + } + } + + public void putBlockList(List blockIds, @Nullable String leaseId) throws BadRequestException { + synchronized (writeLock) { + lease.checkLeaseForWrite(leaseId); + final List unresolvedBlocks = blockIds.stream().filter(bId -> blocks.containsKey(bId) == false).toList(); + if (unresolvedBlocks.isEmpty() == false) { + logger.warn("Block list contained non-existent block IDs: {}", unresolvedBlocks); + throw new BadRequestException("InvalidBlockList", "The specified blocklist is invalid."); + } + final BytesReference[] resolvedContents = blockIds.stream().map(blocks::get).toList().toArray(new BytesReference[0]); + contents = CompositeBytesReference.of(resolvedContents); + } + } + + private boolean matches(String ifNoneMatchHeaderValue) { + if (ifNoneMatchHeaderValue == null) { + return false; + } + // We only support * + if ("*".equals(ifNoneMatchHeaderValue)) { + return true; + } + // Fail the test, trigger an internal server error + failTestWithAssertionError("We've only implemented 'If-None-Match: *' in the MockAzureBlobStore"); + throw new AzureBlobStoreError( + RestStatus.INTERNAL_SERVER_ERROR, + "UnsupportedHeader", + "The test fixture only supports * for If-None-Match" + ); + } + + public synchronized void setContents(BytesReference contents, @Nullable String leaseId) { + synchronized (writeLock) { + lease.checkLeaseForWrite(leaseId); + this.contents = contents; + this.blocks.clear(); + } + } + + public void setContents(BytesReference contents, @Nullable String leaseId, @Nullable String ifNoneMatchHeaderValue) { + synchronized (writeLock) { + if (matches(ifNoneMatchHeaderValue)) { + throw new PreconditionFailedException( + "TargetConditionNotMet", + "The target condition specified using HTTP conditional header(s) is not met." + ); + } + setContents(contents, leaseId); + } + } + + /** + * Get the committed contents of the blob + * + * @return The last committed contents of the blob, or null if the blob is uncommitted + */ + @Nullable + public BytesReference getContents() { + return contents; + } + + public String type() { + return BLOCK_BLOB_TYPE; + } + + public boolean isCommitted() { + return contents != null; + } + + @Override + public String toString() { + return "MockAzureBlockBlob{" + "blocks=" + blocks + ", contents=" + contents + '}'; + } + + public String acquireLease(@Nullable String proposedLeaseId, int leaseTimeSeconds) { + synchronized (writeLock) { + return lease.acquire(proposedLeaseId, leaseTimeSeconds); + } + } + + public void releaseLease(String leaseId) { + synchronized (writeLock) { + lease.release(leaseId); + } + } + + public void breakLease(@Nullable Integer leaseBreakPeriod) { + synchronized (writeLock) { + lease.breakLease(leaseBreakPeriod); + } + } + + public void checkLeaseForRead(@Nullable String leaseId) { + lease.checkLeaseForRead(leaseId); + } + + public void checkLeaseForWrite(@Nullable String leaseId) { + lease.checkLeaseForWrite(leaseId); + } + } + + /** + * @see acquire/release rules + * @see read/write rules + */ + public class Lease { + + /** + * Minimal set of states, we don't support breaking/broken + */ + enum State { + Available, + Leased, + Expired, + Broken + } + + private String leaseId; + private State state = State.Available; + private int leaseDurationSeconds; + + public synchronized String acquire(@Nullable String proposedLeaseId, int leaseDurationSeconds) { + maybeExpire(proposedLeaseId); + switch (state) { + case Available, Expired, Broken -> { + final State prevState = state; + state = State.Leased; + leaseId = proposedLeaseId != null ? proposedLeaseId : UUID.randomUUID().toString(); + validateLeaseDuration(leaseDurationSeconds); + this.leaseDurationSeconds = leaseDurationSeconds; + logger.debug("Granting lease, prior state={}, leaseId={}, expires={}", prevState, leaseId); + } + case Leased -> { + if (leaseId.equals(proposedLeaseId) == false) { + logger.debug("Mismatch on acquire - proposed leaseId: {}, active leaseId: {}", proposedLeaseId, leaseId); + throw new ConflictException( + "LeaseIdMismatchWithLeaseOperation", + "The lease ID specified did not match the lease ID for the blob/container." + ); + } + validateLeaseDuration(leaseDurationSeconds); + } + } + return leaseId; + } + + public synchronized void release(String requestLeaseId) { + switch (state) { + case Available -> throw new ConflictException( + "LeaseNotPresentWithLeaseOperation", + "There is currently no lease on the blob/container." + ); + case Leased, Expired, Broken -> { + if (leaseId.equals(requestLeaseId) == false) { + logger.debug("Mismatch on release - submitted leaseId: {}, active leaseId: {}", requestLeaseId, this.leaseId); + throw new ConflictException( + "LeaseIdMismatchWithLeaseOperation", + "The lease ID specified did not match the lease ID for the blob/container." + ); + } + state = State.Available; + this.leaseId = null; + } + } + } + + public synchronized void breakLease(Integer leaseBreakPeriod) { + // We haven't implemented the "Breaking" state so we don't support 'breaks' for non-infinite leases unless break-period is 0 + if (leaseDurationSeconds != -1 && (leaseBreakPeriod == null || leaseBreakPeriod != 0)) { + failTestWithAssertionError( + "MockAzureBlobStore only supports breaking non-infinite leases with 'x-ms-lease-break-period: 0'" + ); + } + switch (state) { + case Available -> throw new ConflictException( + "LeaseNotPresentWithLeaseOperation", + "There is currently no lease on the blob/container." + ); + case Leased, Expired, Broken -> state = State.Broken; + } + } + + public synchronized void checkLeaseForWrite(@Nullable String requestLeaseId) { + maybeExpire(requestLeaseId); + switch (state) { + case Available, Expired, Broken -> { + if (requestLeaseId != null) { + throw new PreconditionFailedException( + "LeaseLost", + "A lease ID was specified, but the lease for the blob/container has expired." + ); + } + } + case Leased -> { + if (requestLeaseId == null) { + throw new PreconditionFailedException( + "LeaseIdMissing", + "There is currently a lease on the blob/container and no lease ID was specified in the request." + ); + } + if (leaseId.equals(requestLeaseId) == false) { + throw new ConflictException( + "LeaseIdMismatchWithBlobOperation", + "The lease ID specified did not match the lease ID for the blob." + ); + } + } + } + } + + public synchronized void checkLeaseForRead(@Nullable String requestLeaseId) { + maybeExpire(requestLeaseId); + switch (state) { + case Available, Expired, Broken -> { + if (requestLeaseId != null) { + throw new PreconditionFailedException( + "LeaseLost", + "A lease ID was specified, but the lease for the blob/container has expired." + ); + } + } + case Leased -> { + if (requestLeaseId != null && requestLeaseId.equals(leaseId) == false) { + throw new ConflictException( + "LeaseIdMismatchWithBlobOperation", + "The lease ID specified did not match the lease ID for the blob." + ); + } + } + } + } + + /** + * If there's an active lease, ask the predicate if we should expire the existing it + * + * @param requestLeaseId The lease of the request + */ + private void maybeExpire(String requestLeaseId) { + if (state == State.Leased && leaseExpiryPredicate.shouldExpireLease(leaseId, requestLeaseId)) { + logger.debug("Expiring lease, id={}", leaseId); + state = State.Expired; + } + } + + private void validateLeaseDuration(long leaseTimeSeconds) { + if (leaseTimeSeconds != -1 && (leaseTimeSeconds < 15 || leaseTimeSeconds > 60)) { + throw new BadRequestException( + "InvalidHeaderValue", + AzureHttpHandler.X_MS_LEASE_DURATION + " must be between 16 and 60 seconds (was " + leaseTimeSeconds + ")" + ); + } + } + } + + public static class AzureBlobStoreError extends RuntimeException { + private final RestStatus restStatus; + private final String errorCode; + + public AzureBlobStoreError(RestStatus restStatus, String errorCode, String message) { + super(message); + this.restStatus = restStatus; + this.errorCode = errorCode; + } + + public RestStatus getRestStatus() { + return restStatus; + } + + public String getErrorCode() { + return errorCode; + } + } + + public static class BlobNotFoundException extends AzureBlobStoreError { + public BlobNotFoundException() { + super(RestStatus.NOT_FOUND, "BlobNotFound", "The specified blob does not exist."); + } + } + + public static class BadRequestException extends AzureBlobStoreError { + public BadRequestException(String errorCode, String message) { + super(RestStatus.BAD_REQUEST, errorCode, message); + } + } + + public static class ConflictException extends AzureBlobStoreError { + public ConflictException(String errorCode, String message) { + super(RestStatus.CONFLICT, errorCode, message); + } + } + + public static class PreconditionFailedException extends AzureBlobStoreError { + public PreconditionFailedException(String errorCode, String message) { + super(RestStatus.PRECONDITION_FAILED, errorCode, message); + } + } + + public interface LeaseExpiryPredicate { + + LeaseExpiryPredicate NEVER_EXPIRE = (activeLeaseId, requestLeaseId) -> false; + + /** + * Should the lease be expired? + * + * @param activeLeaseId The current active lease ID + * @param requestLeaseId The request lease ID (if any) + * @return true to expire the lease, false otherwise + */ + boolean shouldExpireLease(String activeLeaseId, @Nullable String requestLeaseId); + } +} diff --git a/x-pack/plugin/repositories-metering-api/qa/azure/src/javaRestTest/java/org/elasticsearch/xpack/repositories/metering/azure/AzureRepositoriesMeteringIT.java b/x-pack/plugin/repositories-metering-api/qa/azure/src/javaRestTest/java/org/elasticsearch/xpack/repositories/metering/azure/AzureRepositoriesMeteringIT.java index 7029a38edcb5a..d21dc4b2982f1 100644 --- a/x-pack/plugin/repositories-metering-api/qa/azure/src/javaRestTest/java/org/elasticsearch/xpack/repositories/metering/azure/AzureRepositoriesMeteringIT.java +++ b/x-pack/plugin/repositories-metering-api/qa/azure/src/javaRestTest/java/org/elasticsearch/xpack/repositories/metering/azure/AzureRepositoriesMeteringIT.java @@ -7,6 +7,7 @@ package org.elasticsearch.xpack.repositories.metering.azure; import fixture.azure.AzureHttpFixture; +import fixture.azure.MockAzureBlobStore; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.core.Booleans; @@ -37,7 +38,8 @@ public class AzureRepositoriesMeteringIT extends AbstractRepositoriesMeteringAPI AZURE_TEST_CONTAINER, System.getProperty("test.azure.tenant_id"), System.getProperty("test.azure.client_id"), - AzureHttpFixture.sharedKeyForAccountPredicate(AZURE_TEST_ACCOUNT) + AzureHttpFixture.sharedKeyForAccountPredicate(AZURE_TEST_ACCOUNT), + MockAzureBlobStore.LeaseExpiryPredicate.NEVER_EXPIRE ); private static TestTrustStore trustStore = new TestTrustStore( diff --git a/x-pack/plugin/searchable-snapshots/qa/azure/src/javaRestTest/java/org/elasticsearch/xpack/searchablesnapshots/AzureSearchableSnapshotsIT.java b/x-pack/plugin/searchable-snapshots/qa/azure/src/javaRestTest/java/org/elasticsearch/xpack/searchablesnapshots/AzureSearchableSnapshotsIT.java index 610b58453716c..f65db6dab1e68 100644 --- a/x-pack/plugin/searchable-snapshots/qa/azure/src/javaRestTest/java/org/elasticsearch/xpack/searchablesnapshots/AzureSearchableSnapshotsIT.java +++ b/x-pack/plugin/searchable-snapshots/qa/azure/src/javaRestTest/java/org/elasticsearch/xpack/searchablesnapshots/AzureSearchableSnapshotsIT.java @@ -8,6 +8,7 @@ package org.elasticsearch.xpack.searchablesnapshots; import fixture.azure.AzureHttpFixture; +import fixture.azure.MockAzureBlobStore; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.core.Booleans; @@ -38,7 +39,8 @@ public class AzureSearchableSnapshotsIT extends AbstractSearchableSnapshotsRestT AZURE_TEST_CONTAINER, System.getProperty("test.azure.tenant_id"), System.getProperty("test.azure.client_id"), - AzureHttpFixture.sharedKeyForAccountPredicate(AZURE_TEST_ACCOUNT) + AzureHttpFixture.sharedKeyForAccountPredicate(AZURE_TEST_ACCOUNT), + MockAzureBlobStore.LeaseExpiryPredicate.NEVER_EXPIRE ); private static TestTrustStore trustStore = new TestTrustStore( diff --git a/x-pack/plugin/snapshot-based-recoveries/qa/azure/src/javaRestTest/java/org/elasticsearch/xpack/snapshotbasedrecoveries/recovery/AzureSnapshotBasedRecoveryIT.java b/x-pack/plugin/snapshot-based-recoveries/qa/azure/src/javaRestTest/java/org/elasticsearch/xpack/snapshotbasedrecoveries/recovery/AzureSnapshotBasedRecoveryIT.java index 591d4582d5905..8142b40166840 100644 --- a/x-pack/plugin/snapshot-based-recoveries/qa/azure/src/javaRestTest/java/org/elasticsearch/xpack/snapshotbasedrecoveries/recovery/AzureSnapshotBasedRecoveryIT.java +++ b/x-pack/plugin/snapshot-based-recoveries/qa/azure/src/javaRestTest/java/org/elasticsearch/xpack/snapshotbasedrecoveries/recovery/AzureSnapshotBasedRecoveryIT.java @@ -8,6 +8,7 @@ package org.elasticsearch.xpack.snapshotbasedrecoveries.recovery; import fixture.azure.AzureHttpFixture; +import fixture.azure.MockAzureBlobStore; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.core.Booleans; @@ -37,7 +38,8 @@ public class AzureSnapshotBasedRecoveryIT extends AbstractSnapshotBasedRecoveryR AZURE_TEST_CONTAINER, System.getProperty("test.azure.tenant_id"), System.getProperty("test.azure.client_id"), - AzureHttpFixture.sharedKeyForAccountPredicate(AZURE_TEST_ACCOUNT) + AzureHttpFixture.sharedKeyForAccountPredicate(AZURE_TEST_ACCOUNT), + MockAzureBlobStore.LeaseExpiryPredicate.NEVER_EXPIRE ); private static TestTrustStore trustStore = new TestTrustStore( diff --git a/x-pack/plugin/snapshot-repo-test-kit/qa/azure/src/javaRestTest/java/org/elasticsearch/repositories/blobstore/testkit/analyze/AzureRepositoryAnalysisRestIT.java b/x-pack/plugin/snapshot-repo-test-kit/qa/azure/src/javaRestTest/java/org/elasticsearch/repositories/blobstore/testkit/analyze/AzureRepositoryAnalysisRestIT.java index a9b8fe51c01cc..03906b3cf69da 100644 --- a/x-pack/plugin/snapshot-repo-test-kit/qa/azure/src/javaRestTest/java/org/elasticsearch/repositories/blobstore/testkit/analyze/AzureRepositoryAnalysisRestIT.java +++ b/x-pack/plugin/snapshot-repo-test-kit/qa/azure/src/javaRestTest/java/org/elasticsearch/repositories/blobstore/testkit/analyze/AzureRepositoryAnalysisRestIT.java @@ -25,6 +25,7 @@ import java.io.IOException; import java.util.Map; +import java.util.concurrent.ThreadLocalRandom; import java.util.function.Predicate; import static org.hamcrest.Matchers.blankOrNullString; @@ -49,7 +50,10 @@ public class AzureRepositoryAnalysisRestIT extends AbstractRepositoryAnalysisRes AZURE_TEST_CONTAINER, AZURE_TEST_TENANT_ID, AZURE_TEST_CLIENT_ID, - decideAuthHeaderPredicate() + decideAuthHeaderPredicate(), + // 5% of the time, in a contended lease scenario, expire the existing lease + (currentLeaseId, requestLeaseId) -> currentLeaseId.equals(requestLeaseId) == false + && ThreadLocalRandom.current().nextDouble() < 0.05 ); private static Predicate decideAuthHeaderPredicate() { @@ -78,12 +82,6 @@ private static Predicate decideAuthHeaderPredicate() { () -> "ignored;DefaultEndpointsProtocol=http;BlobEndpoint=" + fixture.getAddress(), s -> USE_FIXTURE ) - .apply(c -> { - if (USE_FIXTURE) { - // test fixture does not support CAS yet; TODO fix this - c.systemProperty("test.repository_test_kit.skip_cas", "true"); - } - }) .systemProperty( "tests.azure.credentials.disable_instance_discovery", () -> "true", From 24bc505e28cadad4a3253a458ce6493a916b22e8 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Fri, 29 Nov 2024 14:07:48 +1100 Subject: [PATCH 071/139] [Test] Increase test secret key length (#117675) Running with FIPS approved mode requires secret keys to be at least 114 bits long. Relates: #117324 Resolves: #117596 Resolves: #117709 Resolves: #117710 Resolves: #117711 Resolves: #117712 --- .../RepositoryS3RestReloadCredentialsIT.java | 19 +++++++++++++------ muted-tests.yml | 2 -- .../fixture/aws/sts/AwsStsHttpHandler.java | 3 ++- .../fixture/aws/imds/Ec2ImdsHttpHandler.java | 3 ++- .../org/elasticsearch/test/ESTestCase.java | 7 +++++++ 5 files changed, 24 insertions(+), 10 deletions(-) diff --git a/modules/repository-s3/src/javaRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3RestReloadCredentialsIT.java b/modules/repository-s3/src/javaRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3RestReloadCredentialsIT.java index 430c0a1994967..1f09fa6b081b9 100644 --- a/modules/repository-s3/src/javaRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3RestReloadCredentialsIT.java +++ b/modules/repository-s3/src/javaRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3RestReloadCredentialsIT.java @@ -10,6 +10,7 @@ package org.elasticsearch.repositories.s3; import fixture.s3.S3HttpFixture; +import io.netty.handler.codec.http.HttpMethod; import org.elasticsearch.client.Request; import org.elasticsearch.client.ResponseException; @@ -61,8 +62,6 @@ protected String getTestRestCluster() { } public void testReloadCredentialsFromKeystore() throws IOException { - assumeFalse("doesn't work in a FIPS JVM, but that's ok", inFipsJvm()); - // Register repository (?verify=false because we don't have access to the blob store yet) final var repositoryName = randomIdentifier(); registerRepository( @@ -77,15 +76,16 @@ public void testReloadCredentialsFromKeystore() throws IOException { final var accessKey1 = randomIdentifier(); repositoryAccessKey = accessKey1; keystoreSettings.put("s3.client.default.access_key", accessKey1); - keystoreSettings.put("s3.client.default.secret_key", randomIdentifier()); + keystoreSettings.put("s3.client.default.secret_key", randomSecretKey()); cluster.updateStoredSecureSettings(); - assertOK(client().performRequest(new Request("POST", "/_nodes/reload_secure_settings"))); + + assertOK(client().performRequest(createReloadSecureSettingsRequest())); // Check access using initial credentials assertOK(client().performRequest(verifyRequest)); // Rotate credentials in blob store - final var accessKey2 = randomValueOtherThan(accessKey1, ESTestCase::randomIdentifier); + final var accessKey2 = randomValueOtherThan(accessKey1, ESTestCase::randomSecretKey); repositoryAccessKey = accessKey2; // Ensure that initial credentials now invalid @@ -99,10 +99,17 @@ public void testReloadCredentialsFromKeystore() throws IOException { // Set up refreshed credentials keystoreSettings.put("s3.client.default.access_key", accessKey2); cluster.updateStoredSecureSettings(); - assertOK(client().performRequest(new Request("POST", "/_nodes/reload_secure_settings"))); + assertOK(client().performRequest(createReloadSecureSettingsRequest())); // Check access using refreshed credentials assertOK(client().performRequest(verifyRequest)); } + private Request createReloadSecureSettingsRequest() throws IOException { + return newXContentRequest( + HttpMethod.POST, + "/_nodes/reload_secure_settings", + (b, p) -> inFipsJvm() ? b.field("secure_settings_password", "keystore-password") : b + ); + } } diff --git a/muted-tests.yml b/muted-tests.yml index d703cfaa1b9aa..c3f67f97011ee 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -216,8 +216,6 @@ tests: - class: org.elasticsearch.reservedstate.service.FileSettingsServiceTests method: testStopWorksInMiddleOfProcessing issue: https://github.com/elastic/elasticsearch/issues/117591 -- class: org.elasticsearch.repositories.s3.RepositoryS3ClientYamlTestSuiteIT - issue: https://github.com/elastic/elasticsearch/issues/117596 - class: "org.elasticsearch.xpack.esql.qa.multi_node.EsqlSpecIT" method: "test {scoring.*}" issue: https://github.com/elastic/elasticsearch/issues/117641 diff --git a/test/fixtures/aws-sts-fixture/src/main/java/fixture/aws/sts/AwsStsHttpHandler.java b/test/fixtures/aws-sts-fixture/src/main/java/fixture/aws/sts/AwsStsHttpHandler.java index 84541f5e15211..ac3299f157485 100644 --- a/test/fixtures/aws-sts-fixture/src/main/java/fixture/aws/sts/AwsStsHttpHandler.java +++ b/test/fixtures/aws-sts-fixture/src/main/java/fixture/aws/sts/AwsStsHttpHandler.java @@ -28,6 +28,7 @@ import java.util.stream.Collectors; import static org.elasticsearch.test.ESTestCase.randomIdentifier; +import static org.elasticsearch.test.ESTestCase.randomSecretKey; /** * Minimal HTTP handler that emulates the AWS STS server @@ -102,7 +103,7 @@ public void handle(final HttpExchange exchange) throws IOException { ROLE_ARN, ROLE_NAME, sessionToken, - randomIdentifier(), + randomSecretKey(), ZonedDateTime.now().plusDays(1L).format(DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ssZ")), accessKey ).getBytes(StandardCharsets.UTF_8); diff --git a/test/fixtures/ec2-imds-fixture/src/main/java/fixture/aws/imds/Ec2ImdsHttpHandler.java b/test/fixtures/ec2-imds-fixture/src/main/java/fixture/aws/imds/Ec2ImdsHttpHandler.java index a92f1bdc5f9ae..bc87eff592bec 100644 --- a/test/fixtures/ec2-imds-fixture/src/main/java/fixture/aws/imds/Ec2ImdsHttpHandler.java +++ b/test/fixtures/ec2-imds-fixture/src/main/java/fixture/aws/imds/Ec2ImdsHttpHandler.java @@ -28,6 +28,7 @@ import java.util.function.BiConsumer; import static org.elasticsearch.test.ESTestCase.randomIdentifier; +import static org.elasticsearch.test.ESTestCase.randomSecretKey; /** * Minimal HTTP handler that emulates the EC2 IMDS server @@ -84,7 +85,7 @@ public void handle(final HttpExchange exchange) throws IOException { accessKey, ZonedDateTime.now(Clock.systemUTC()).plusDays(1L).format(DateTimeFormatter.ISO_DATE_TIME), randomIdentifier(), - randomIdentifier(), + randomSecretKey(), sessionToken ).getBytes(StandardCharsets.UTF_8); exchange.getResponseHeaders().add("Content-Type", "application/json"); diff --git a/test/framework/src/main/java/org/elasticsearch/test/ESTestCase.java b/test/framework/src/main/java/org/elasticsearch/test/ESTestCase.java index 5b2beaee00bfe..d983fc854bdfd 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/ESTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/test/ESTestCase.java @@ -1358,6 +1358,13 @@ public static String randomDateFormatterPattern() { return randomFrom(FormatNames.values()).getName(); } + /** + * Generate a random string of at least 112 bits to satisfy minimum entropy requirement when running in FIPS mode. + */ + public static String randomSecretKey() { + return randomAlphaOfLengthBetween(14, 20); + } + /** * Randomly choose between {@link EsExecutors#DIRECT_EXECUTOR_SERVICE} (which does not fork), {@link ThreadPool#generic}, and one of the * other named threadpool executors. From 5935f766df80325f748c3193e13e6e74fb5c1f37 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine <58790826+elasticsearchmachine@users.noreply.github.com> Date: Fri, 29 Nov 2024 17:44:27 +1100 Subject: [PATCH 072/139] Mute org.elasticsearch.xpack.inference.InferenceCrudIT testSupportedStream #117745 --- muted-tests.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/muted-tests.yml b/muted-tests.yml index c3f67f97011ee..40d3dcf46e1b9 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -222,6 +222,9 @@ tests: - class: "org.elasticsearch.xpack.esql.qa.single_node.EsqlSpecIT" method: "test {scoring.*}" issue: https://github.com/elastic/elasticsearch/issues/117641 +- class: org.elasticsearch.xpack.inference.InferenceCrudIT + method: testSupportedStream + issue: https://github.com/elastic/elasticsearch/issues/117745 # Examples: # From 17d280363c62dc4d35c320246d36ec8cd14e4533 Mon Sep 17 00:00:00 2001 From: David Turner Date: Fri, 29 Nov 2024 09:54:38 +0000 Subject: [PATCH 073/139] Add YAML test for status in indices stats (#116711) The feature added in #81954 lacks coverage in BwC situations. This commit adds a YAML test to address that. --- .../indices.stats/15_open_closed_state.yml | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.stats/15_open_closed_state.yml diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.stats/15_open_closed_state.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.stats/15_open_closed_state.yml new file mode 100644 index 0000000000000..94b6a3acc83a8 --- /dev/null +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.stats/15_open_closed_state.yml @@ -0,0 +1,22 @@ +--- +"Ensure index state is exposed": + - requires: + cluster_features: ["gte_v8.1.0"] + reason: index state added to stats in 8.1.0 + + - do: + indices.create: + index: openindex + - do: + indices.create: + index: closedindex + - do: + indices.close: + index: closedindex + - do: + indices.stats: + expand_wildcards: [open,closed] + forbid_closed_indices: false + + - match: { indices.openindex.status: open } + - match: { indices.closedindex.status: close } From c3f9e0172333b8edae525865c9d84b29a1c6ab8f Mon Sep 17 00:00:00 2001 From: David Turner Date: Fri, 29 Nov 2024 09:58:09 +0000 Subject: [PATCH 074/139] Migrate `repository-s3` YAML tests to Java REST tests (#117628) Today these YAML tests rely on a bunch of rather complex setup organised by Gradle, and contain lots of duplication and coincident strings, mostly because that was the only way to achieve what we wanted before we could orchestrate test clusters and fixtures directly from Java test suites. We're not actually running the YAML tests in ways that take advantage of their YAMLness (e.g. in mixed-version clusters, or from other client libraries). This commit replaces these tests with Java REST tests which enormously simplifies this area of code. Relates ES-9984 --- modules/repository-s3/build.gradle | 118 +----- .../s3/S3RepositoryThirdPartyTests.java | 7 +- .../s3/AbstractRepositoryS3RestTestCase.java | 383 ++++++++++++++++++ .../RepositoryS3BasicCredentialsRestIT.java | 65 +++ .../s3/RepositoryS3EcsCredentialsRestIT.java} | 44 +- .../RepositoryS3ImdsV1CredentialsRestIT.java | 73 ++++ ...ositoryS3MinioBasicCredentialsRestIT.java} | 44 +- .../RepositoryS3SessionCredentialsRestIT.java | 72 ++++ .../s3/RepositoryS3StsCredentialsRestIT.java} | 64 +-- .../repositories/s3/S3BlobStore.java | 2 +- .../repositories/s3/S3Service.java | 8 +- .../resources/aws-web-identity-token-file | 1 - .../s3/RepositoryS3ClientYamlTestSuiteIT.java | 57 +-- ...oryS3RegionalStsClientYamlTestSuiteIT.java | 12 +- .../20_repository_permanent_credentials.yml | 265 +----------- .../30_repository_temporary_credentials.yml | 278 ------------- .../40_repository_ec2_credentials.yml | 278 ------------- .../50_repository_ecs_credentials.yml | 278 ------------- .../60_repository_sts_credentials.yml | 279 ------------- .../fixtures/minio/MinioTestContainer.java | 12 +- .../main/java/fixture/s3/S3HttpFixture.java | 4 - .../local/AbstractLocalClusterFactory.java | 2 + .../minio/MinioSearchableSnapshotsIT.java | 7 +- .../MinioRepositoryAnalysisRestIT.java | 7 +- 24 files changed, 765 insertions(+), 1595 deletions(-) create mode 100644 modules/repository-s3/src/javaRestTest/java/org/elasticsearch/repositories/s3/AbstractRepositoryS3RestTestCase.java create mode 100644 modules/repository-s3/src/javaRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3BasicCredentialsRestIT.java rename modules/repository-s3/src/{yamlRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3EcsClientYamlTestSuiteIT.java => javaRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3EcsCredentialsRestIT.java} (59%) create mode 100644 modules/repository-s3/src/javaRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3ImdsV1CredentialsRestIT.java rename modules/repository-s3/src/{yamlRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3MinioClientYamlTestSuiteIT.java => javaRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3MinioBasicCredentialsRestIT.java} (50%) create mode 100644 modules/repository-s3/src/javaRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3SessionCredentialsRestIT.java rename modules/repository-s3/src/{yamlRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3StsClientYamlTestSuiteIT.java => javaRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3StsCredentialsRestIT.java} (53%) delete mode 100644 modules/repository-s3/src/test/resources/aws-web-identity-token-file delete mode 100644 modules/repository-s3/src/yamlRestTest/resources/rest-api-spec/test/repository_s3/30_repository_temporary_credentials.yml delete mode 100644 modules/repository-s3/src/yamlRestTest/resources/rest-api-spec/test/repository_s3/40_repository_ec2_credentials.yml delete mode 100644 modules/repository-s3/src/yamlRestTest/resources/rest-api-spec/test/repository_s3/50_repository_ecs_credentials.yml delete mode 100644 modules/repository-s3/src/yamlRestTest/resources/rest-api-spec/test/repository_s3/60_repository_sts_credentials.yml diff --git a/modules/repository-s3/build.gradle b/modules/repository-s3/build.gradle index ed1777891f40d..2cfb5d23db4ff 100644 --- a/modules/repository-s3/build.gradle +++ b/modules/repository-s3/build.gradle @@ -43,19 +43,24 @@ dependencies { api 'javax.xml.bind:jaxb-api:2.2.2' testImplementation project(':test:fixtures:s3-fixture') - yamlRestTestImplementation project(":test:framework") - yamlRestTestImplementation project(':test:fixtures:s3-fixture') - yamlRestTestImplementation project(':test:fixtures:ec2-imds-fixture') - yamlRestTestImplementation project(':test:fixtures:aws-sts-fixture') - yamlRestTestImplementation project(':test:fixtures:minio-fixture') - internalClusterTestImplementation project(':test:fixtures:minio-fixture') - javaRestTestImplementation project(":test:framework") - javaRestTestImplementation project(':test:fixtures:s3-fixture') - javaRestTestImplementation project(':modules:repository-s3') + internalClusterTestImplementation project(':test:fixtures:minio-fixture') + internalClusterTestRuntimeOnly "org.slf4j:slf4j-simple:${versions.slf4j}" + yamlRestTestImplementation project(':modules:repository-s3') + yamlRestTestImplementation project(':test:fixtures:s3-fixture') + yamlRestTestImplementation project(':test:fixtures:testcontainer-utils') + yamlRestTestImplementation project(':test:framework') yamlRestTestRuntimeOnly "org.slf4j:slf4j-simple:${versions.slf4j}" - internalClusterTestRuntimeOnly "org.slf4j:slf4j-simple:${versions.slf4j}" + + javaRestTestImplementation project(':modules:repository-s3') + javaRestTestImplementation project(':test:fixtures:aws-sts-fixture') + javaRestTestImplementation project(':test:fixtures:ec2-imds-fixture') + javaRestTestImplementation project(':test:fixtures:minio-fixture') + javaRestTestImplementation project(':test:fixtures:s3-fixture') + javaRestTestImplementation project(':test:fixtures:testcontainer-utils') + javaRestTestImplementation project(':test:framework') + javaRestTestRuntimeOnly "org.slf4j:slf4j-simple:${versions.slf4j}" } restResources { @@ -82,90 +87,25 @@ def testRepositoryCreds = tasks.register("testRepositoryCreds", Test) { testClassesDirs = sourceSets.test.output.classesDirs } -tasks.named('check').configure { - dependsOn(testRepositoryCreds) -} - tasks.named('test').configure { // this is tested explicitly in separate test tasks exclude '**/RepositoryCredentialsTests.class' } boolean useFixture = false - -// We test against two repositories, one which uses the usual two-part "permanent" credentials and -// the other which uses three-part "temporary" or "session" credentials. - String s3PermanentAccessKey = System.getenv("amazon_s3_access_key") String s3PermanentSecretKey = System.getenv("amazon_s3_secret_key") String s3PermanentBucket = System.getenv("amazon_s3_bucket") String s3PermanentBasePath = System.getenv("amazon_s3_base_path") -String s3TemporaryAccessKey = System.getenv("amazon_s3_access_key_temporary") -String s3TemporarySecretKey = System.getenv("amazon_s3_secret_key_temporary") -String s3TemporarySessionToken = System.getenv("amazon_s3_session_token_temporary") -String s3TemporaryBucket = System.getenv("amazon_s3_bucket_temporary") -String s3TemporaryBasePath = System.getenv("amazon_s3_base_path_temporary") - -String s3EC2Bucket = System.getenv("amazon_s3_bucket_ec2") -String s3EC2BasePath = System.getenv("amazon_s3_base_path_ec2") - -String s3ECSBucket = System.getenv("amazon_s3_bucket_ecs") -String s3ECSBasePath = System.getenv("amazon_s3_base_path_ecs") - -String s3STSBucket = System.getenv("amazon_s3_bucket_sts") -String s3STSBasePath = System.getenv("amazon_s3_base_path_sts") - -boolean s3DisableChunkedEncoding = buildParams.random.nextBoolean() - -// If all these variables are missing then we are testing against the internal fixture instead, which has the following -// credentials hard-coded in. +// If all these variables are missing then we are testing against the internal fixture instead, which has the following credentials hard-coded in. if (!s3PermanentAccessKey && !s3PermanentSecretKey && !s3PermanentBucket && !s3PermanentBasePath) { + useFixture = true s3PermanentAccessKey = 's3_test_access_key' s3PermanentSecretKey = 's3_test_secret_key' s3PermanentBucket = 'bucket' s3PermanentBasePath = 'base_path' - useFixture = true -} -if (!s3TemporaryAccessKey && !s3TemporarySecretKey && !s3TemporaryBucket && !s3TemporaryBasePath && !s3TemporarySessionToken) { - s3TemporaryAccessKey = 'session_token_access_key' - s3TemporarySecretKey = 'session_token_secret_key' - s3TemporaryBucket = 'session_token_bucket' - s3TemporaryBasePath = 'session_token_base_path' -} - -if (!s3EC2Bucket && !s3EC2BasePath && !s3ECSBucket && !s3ECSBasePath) { - s3EC2Bucket = 'ec2_bucket' - s3EC2BasePath = 'ec2_base_path' - s3ECSBucket = 'ecs_bucket' - s3ECSBasePath = 'ecs_base_path' -} - -if (!s3STSBucket && !s3STSBasePath) { - s3STSBucket = 'sts_bucket' - s3STSBasePath = 'sts_base_path' -} - -tasks.named("processYamlRestTestResources").configure { - from("src/test/resources") { - include "aws-web-identity-token-file" - } - Map expansions = [ - 'permanent_bucket' : s3PermanentBucket, - 'permanent_base_path' : s3PermanentBasePath + "_integration_tests", - 'temporary_bucket' : s3TemporaryBucket, - 'temporary_base_path' : s3TemporaryBasePath + "_integration_tests", - 'ec2_bucket' : s3EC2Bucket, - 'ec2_base_path' : s3EC2BasePath, - 'ecs_bucket' : s3ECSBucket, - 'ecs_base_path' : s3ECSBasePath, - 'sts_bucket' : s3STSBucket, - 'sts_base_path' : s3STSBasePath, - 'disable_chunked_encoding': s3DisableChunkedEncoding - ] - inputs.properties(expansions) - filter("tokens" : expansions.collectEntries {k, v -> [k, v.toString()]} /* must be a map of strings */, ReplaceTokens.class) } tasks.named("internalClusterTest").configure { @@ -175,22 +115,7 @@ tasks.named("internalClusterTest").configure { systemProperty 'es.insecure_network_trace_enabled', 'true' } -tasks.named("yamlRestTest").configure { - systemProperty("s3PermanentAccessKey", s3PermanentAccessKey) - systemProperty("s3PermanentSecretKey", s3PermanentSecretKey) - systemProperty("s3TemporaryAccessKey", s3TemporaryAccessKey) - systemProperty("s3TemporarySecretKey", s3TemporarySecretKey) - systemProperty("s3EC2AccessKey", s3PermanentAccessKey) - - // ideally we could resolve an env path in cluster config as resource similar to configuring a config file - // not sure how common this is, but it would be nice to support - File awsWebIdentityTokenExternalLocation = file('src/test/resources/aws-web-identity-token-file') - // The web identity token can be read only from the plugin config directory because of security restrictions - // Ideally we would create a symlink, but extraConfigFile doesn't support it - nonInputProperties.systemProperty("awsWebIdentityTokenExternalLocation", awsWebIdentityTokenExternalLocation.getAbsolutePath()) -} - -// 3rd Party Tests +// 3rd Party Tests, i.e. testing against a real S3 repository tasks.register("s3ThirdPartyTest", Test) { SourceSetContainer sourceSets = project.getExtensions().getByType(SourceSetContainer.class); SourceSet internalTestSourceSet = sourceSets.getByName(InternalClusterTestPlugin.SOURCE_SET_NAME) @@ -198,13 +123,13 @@ tasks.register("s3ThirdPartyTest", Test) { setClasspath(internalTestSourceSet.getRuntimeClasspath()) include '**/S3RepositoryThirdPartyTests.class' systemProperty("tests.use.fixture", Boolean.toString(useFixture)) - - // test container accesses ~/.testcontainers.properties read - systemProperty "tests.security.manager", "false" systemProperty 'test.s3.account', s3PermanentAccessKey systemProperty 'test.s3.key', s3PermanentSecretKey systemProperty 'test.s3.bucket', s3PermanentBucket nonInputProperties.systemProperty 'test.s3.base', s3PermanentBasePath + "_third_party_tests_" + buildParams.testSeed + + // test container accesses ~/.testcontainers.properties read + systemProperty "tests.security.manager", "false" } tasks.named("thirdPartyAudit").configure { @@ -241,5 +166,6 @@ tasks.named("thirdPartyAudit").configure { tasks.named("check").configure { dependsOn(tasks.withType(Test)) + dependsOn(testRepositoryCreds) } diff --git a/modules/repository-s3/src/internalClusterTest/java/org/elasticsearch/repositories/s3/S3RepositoryThirdPartyTests.java b/modules/repository-s3/src/internalClusterTest/java/org/elasticsearch/repositories/s3/S3RepositoryThirdPartyTests.java index 3552cb8d9389a..4cebedebfba07 100644 --- a/modules/repository-s3/src/internalClusterTest/java/org/elasticsearch/repositories/s3/S3RepositoryThirdPartyTests.java +++ b/modules/repository-s3/src/internalClusterTest/java/org/elasticsearch/repositories/s3/S3RepositoryThirdPartyTests.java @@ -61,7 +61,12 @@ public class S3RepositoryThirdPartyTests extends AbstractThirdPartyRepositoryTes static final boolean USE_FIXTURE = Booleans.parseBoolean(System.getProperty("tests.use.fixture", "true")); @ClassRule - public static MinioTestContainer minio = new MinioTestContainer(USE_FIXTURE); + public static MinioTestContainer minio = new MinioTestContainer( + USE_FIXTURE, + System.getProperty("test.s3.account"), + System.getProperty("test.s3.key"), + System.getProperty("test.s3.bucket") + ); @Override protected Collection> getPlugins() { diff --git a/modules/repository-s3/src/javaRestTest/java/org/elasticsearch/repositories/s3/AbstractRepositoryS3RestTestCase.java b/modules/repository-s3/src/javaRestTest/java/org/elasticsearch/repositories/s3/AbstractRepositoryS3RestTestCase.java new file mode 100644 index 0000000000000..2199a64521759 --- /dev/null +++ b/modules/repository-s3/src/javaRestTest/java/org/elasticsearch/repositories/s3/AbstractRepositoryS3RestTestCase.java @@ -0,0 +1,383 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.repositories.s3; + +import io.netty.handler.codec.http.HttpMethod; + +import org.elasticsearch.client.Request; +import org.elasticsearch.client.ResponseException; +import org.elasticsearch.cluster.routing.Murmur3HashFunction; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.repositories.blobstore.BlobStoreRepository; +import org.elasticsearch.rest.RestStatus; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.test.rest.ESRestTestCase; + +import java.io.Closeable; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Set; +import java.util.function.UnaryOperator; +import java.util.stream.Collectors; + +import static org.hamcrest.Matchers.allOf; +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; + +public abstract class AbstractRepositoryS3RestTestCase extends ESRestTestCase { + + public record TestRepository(String repositoryName, String clientName, String bucketName, String basePath) { + + public Closeable register() throws IOException { + return register(UnaryOperator.identity()); + } + + public Closeable register(UnaryOperator settingsUnaryOperator) throws IOException { + assertOK(client().performRequest(getRegisterRequest(settingsUnaryOperator))); + return () -> assertOK(client().performRequest(new Request("DELETE", "/_snapshot/" + repositoryName()))); + } + + private Request getRegisterRequest(UnaryOperator settingsUnaryOperator) throws IOException { + return newXContentRequest( + HttpMethod.PUT, + "/_snapshot/" + repositoryName(), + (b, p) -> b.field("type", S3Repository.TYPE) + .startObject("settings") + .value( + settingsUnaryOperator.apply( + Settings.builder() + .put("bucket", bucketName()) + .put("base_path", basePath()) + .put("client", clientName()) + .put("canned_acl", "private") + .put("storage_class", "standard") + .put("disable_chunked_encoding", randomBoolean()) + .build() + ) + ) + .endObject() + ); + } + } + + protected abstract String getBucketName(); + + protected abstract String getBasePath(); + + protected abstract String getClientName(); + + protected static String getIdentifierPrefix(String testSuiteName) { + return testSuiteName + "-" + Integer.toString(Murmur3HashFunction.hash(testSuiteName + System.getProperty("tests.seed")), 16) + "-"; + } + + private TestRepository newTestRepository() { + return new TestRepository(randomIdentifier(), getClientName(), getBucketName(), getBasePath()); + } + + private static UnaryOperator readonlyOperator(Boolean readonly) { + return readonly == null + ? UnaryOperator.identity() + : s -> Settings.builder().put(s).put(BlobStoreRepository.READONLY_SETTING_KEY, readonly).build(); + } + + public void testGetRepository() throws IOException { + testGetRepository(null); + } + + public void testGetRepositoryReadonlyTrue() throws IOException { + testGetRepository(Boolean.TRUE); + } + + public void testGetRepositoryReadonlyFalse() throws IOException { + testGetRepository(Boolean.FALSE); + } + + private void testGetRepository(Boolean readonly) throws IOException { + final var repository = newTestRepository(); + try (var ignored = repository.register(readonlyOperator(readonly))) { + final var repositoryName = repository.repositoryName(); + final var responseObjectPath = assertOKAndCreateObjectPath( + client().performRequest(new Request("GET", "/_snapshot/" + repositoryName)) + ); + + assertEquals("s3", responseObjectPath.evaluate(repositoryName + ".type")); + assertNotNull(responseObjectPath.evaluate(repositoryName + ".settings")); + assertEquals(repository.bucketName(), responseObjectPath.evaluate(repositoryName + ".settings.bucket")); + assertEquals(repository.clientName(), responseObjectPath.evaluate(repositoryName + ".settings.client")); + assertEquals(repository.basePath(), responseObjectPath.evaluate(repositoryName + ".settings.base_path")); + assertEquals("private", responseObjectPath.evaluate(repositoryName + ".settings.canned_acl")); + assertEquals("standard", responseObjectPath.evaluate(repositoryName + ".settings.storage_class")); + assertNull(responseObjectPath.evaluate(repositoryName + ".settings.access_key")); + assertNull(responseObjectPath.evaluate(repositoryName + ".settings.secret_key")); + assertNull(responseObjectPath.evaluate(repositoryName + ".settings.session_token")); + + if (readonly == null) { + assertNull(responseObjectPath.evaluate(repositoryName + ".settings." + BlobStoreRepository.READONLY_SETTING_KEY)); + } else { + assertEquals( + Boolean.toString(readonly), + responseObjectPath.evaluate(repositoryName + ".settings." + BlobStoreRepository.READONLY_SETTING_KEY) + ); + } + } + } + + public void testNonexistentBucket() throws Exception { + testNonexistentBucket(null); + } + + public void testNonexistentBucketReadonlyTrue() throws Exception { + testNonexistentBucket(Boolean.TRUE); + } + + public void testNonexistentBucketReadonlyFalse() throws Exception { + testNonexistentBucket(Boolean.FALSE); + } + + private void testNonexistentBucket(Boolean readonly) throws Exception { + final var repository = new TestRepository( + randomIdentifier(), + getClientName(), + randomValueOtherThan(getBucketName(), ESTestCase::randomIdentifier), + getBasePath() + ); + final var registerRequest = repository.getRegisterRequest(readonlyOperator(readonly)); + + final var responseException = expectThrows(ResponseException.class, () -> client().performRequest(registerRequest)); + assertEquals(RestStatus.INTERNAL_SERVER_ERROR.getStatus(), responseException.getResponse().getStatusLine().getStatusCode()); + assertThat( + responseException.getMessage(), + allOf(containsString("repository_verification_exception"), containsString("is not accessible on master node")) + ); + } + + public void testNonexistentClient() throws Exception { + testNonexistentClient(null); + } + + public void testNonexistentClientReadonlyTrue() throws Exception { + testNonexistentClient(Boolean.TRUE); + } + + public void testNonexistentClientReadonlyFalse() throws Exception { + testNonexistentClient(Boolean.FALSE); + } + + private void testNonexistentClient(Boolean readonly) throws Exception { + final var repository = new TestRepository( + randomIdentifier(), + randomValueOtherThanMany(c -> c.equals(getClientName()) || c.equals("default"), ESTestCase::randomIdentifier), + getBucketName(), + getBasePath() + ); + final var registerRequest = repository.getRegisterRequest(readonlyOperator(readonly)); + + final var responseException = expectThrows(ResponseException.class, () -> client().performRequest(registerRequest)); + assertEquals(RestStatus.INTERNAL_SERVER_ERROR.getStatus(), responseException.getResponse().getStatusLine().getStatusCode()); + assertThat( + responseException.getMessage(), + allOf( + containsString("repository_verification_exception"), + containsString("is not accessible on master node"), + containsString("illegal_argument_exception"), + containsString("Unknown s3 client name") + ) + ); + } + + public void testNonexistentSnapshot() throws Exception { + testNonexistentSnapshot(null); + } + + public void testNonexistentSnapshotReadonlyTrue() throws Exception { + testNonexistentSnapshot(Boolean.TRUE); + } + + public void testNonexistentSnapshotReadonlyFalse() throws Exception { + testNonexistentSnapshot(Boolean.FALSE); + } + + private void testNonexistentSnapshot(Boolean readonly) throws Exception { + final var repository = newTestRepository(); + try (var ignored = repository.register(readonlyOperator(readonly))) { + final var repositoryName = repository.repositoryName(); + + final var getSnapshotRequest = new Request("GET", "/_snapshot/" + repositoryName + "/" + randomIdentifier()); + final var getSnapshotException = expectThrows(ResponseException.class, () -> client().performRequest(getSnapshotRequest)); + assertEquals(RestStatus.NOT_FOUND.getStatus(), getSnapshotException.getResponse().getStatusLine().getStatusCode()); + assertThat(getSnapshotException.getMessage(), containsString("snapshot_missing_exception")); + + final var restoreRequest = new Request("POST", "/_snapshot/" + repositoryName + "/" + randomIdentifier() + "/_restore"); + if (randomBoolean()) { + restoreRequest.addParameter("wait_for_completion", Boolean.toString(randomBoolean())); + } + final var restoreException = expectThrows(ResponseException.class, () -> client().performRequest(restoreRequest)); + assertEquals(RestStatus.INTERNAL_SERVER_ERROR.getStatus(), restoreException.getResponse().getStatusLine().getStatusCode()); + assertThat(restoreException.getMessage(), containsString("snapshot_restore_exception")); + + if (readonly != Boolean.TRUE) { + final var deleteRequest = new Request("DELETE", "/_snapshot/" + repositoryName + "/" + randomIdentifier()); + final var deleteException = expectThrows(ResponseException.class, () -> client().performRequest(deleteRequest)); + assertEquals(RestStatus.NOT_FOUND.getStatus(), deleteException.getResponse().getStatusLine().getStatusCode()); + assertThat(deleteException.getMessage(), containsString("snapshot_missing_exception")); + } + } + } + + public void testUsageStats() throws Exception { + testUsageStats(null); + } + + public void testUsageStatsReadonlyTrue() throws Exception { + testUsageStats(Boolean.TRUE); + } + + public void testUsageStatsReadonlyFalse() throws Exception { + testUsageStats(Boolean.FALSE); + } + + private void testUsageStats(Boolean readonly) throws Exception { + final var repository = newTestRepository(); + try (var ignored = repository.register(readonlyOperator(readonly))) { + final var responseObjectPath = assertOKAndCreateObjectPath(client().performRequest(new Request("GET", "/_cluster/stats"))); + assertThat(responseObjectPath.evaluate("repositories.s3.count"), equalTo(1)); + + if (readonly == Boolean.TRUE) { + assertThat(responseObjectPath.evaluate("repositories.s3.read_only"), equalTo(1)); + assertNull(responseObjectPath.evaluate("repositories.s3.read_write")); + } else { + assertNull(responseObjectPath.evaluate("repositories.s3.read_only")); + assertThat(responseObjectPath.evaluate("repositories.s3.read_write"), equalTo(1)); + } + } + } + + public void testSnapshotAndRestore() throws Exception { + final var repository = newTestRepository(); + try (var ignored = repository.register()) { + final var repositoryName = repository.repositoryName(); + final var indexName = randomIdentifier(); + final var snapshotsToDelete = new ArrayList(2); + + try { + indexDocuments(indexName, """ + {"index":{"_id":"1"}} + {"snapshot":"one"} + {"index":{"_id":"2"}} + {"snapshot":"one"} + {"index":{"_id":"3"}} + {"snapshot":"one"} + """, 3); + + // create the first snapshot + final var snapshot1Name = randomIdentifier(); + createSnapshot(repositoryName, snapshotsToDelete, snapshot1Name); + + // check the first snapshot's status + { + final var snapshotStatusResponse = assertOKAndCreateObjectPath( + client().performRequest(new Request("GET", "/_snapshot/" + repositoryName + "/" + snapshot1Name + "/_status")) + ); + assertEquals(snapshot1Name, snapshotStatusResponse.evaluate("snapshots.0.snapshot")); + assertEquals("SUCCESS", snapshotStatusResponse.evaluate("snapshots.0.state")); + } + + // add more documents to the index + indexDocuments(indexName, """ + {"index":{"_id":"4"}} + {"snapshot":"one"} + {"index":{"_id":"5"}} + {"snapshot":"one"} + {"index":{"_id":"6"}} + {"snapshot":"one"} + {"index":{"_id":"7"}} + {"snapshot":"one"} + """, 7); + + // create the second snapshot + final var snapshot2Name = randomValueOtherThan(snapshot1Name, ESTestCase::randomIdentifier); + createSnapshot(repositoryName, snapshotsToDelete, snapshot2Name); + + // list the snapshots + { + final var listSnapshotsResponse = assertOKAndCreateObjectPath( + client().performRequest( + new Request("GET", "/_snapshot/" + repositoryName + "/" + snapshot1Name + "," + snapshot2Name) + ) + ); + assertEquals(2, listSnapshotsResponse.evaluateArraySize("snapshots")); + assertEquals( + Set.of(snapshot1Name, snapshot2Name), + Set.of( + listSnapshotsResponse.evaluate("snapshots.0.snapshot"), + listSnapshotsResponse.evaluate("snapshots.1.snapshot") + ) + ); + assertEquals("SUCCESS", listSnapshotsResponse.evaluate("snapshots.0.state")); + assertEquals("SUCCESS", listSnapshotsResponse.evaluate("snapshots.1.state")); + } + + // delete and restore the index from snapshot 2 + deleteAndRestoreIndex(indexName, repositoryName, snapshot2Name, 7); + + // delete and restore the index from snapshot 1 + deleteAndRestoreIndex(indexName, repositoryName, snapshot1Name, 3); + } finally { + if (snapshotsToDelete.isEmpty() == false) { + assertOK( + client().performRequest( + new Request( + "DELETE", + "/_snapshot/" + repositoryName + "/" + snapshotsToDelete.stream().collect(Collectors.joining(",")) + ) + ) + ); + } + } + } + } + + private static void deleteAndRestoreIndex(String indexName, String repositoryName, String snapshot2Name, int expectedDocCount) + throws IOException { + assertOK(client().performRequest(new Request("DELETE", "/" + indexName))); + final var restoreRequest = new Request("POST", "/_snapshot/" + repositoryName + "/" + snapshot2Name + "/_restore"); + restoreRequest.addParameter("wait_for_completion", "true"); + assertOK(client().performRequest(restoreRequest)); + assertIndexDocCount(indexName, expectedDocCount); + } + + private static void indexDocuments(String indexName, String body, int expectedDocCount) throws IOException { + // create and populate an index + final var indexDocsRequest = new Request("POST", "/" + indexName + "/_bulk"); + indexDocsRequest.addParameter("refresh", "true"); + indexDocsRequest.setJsonEntity(body); + assertFalse(assertOKAndCreateObjectPath(client().performRequest(indexDocsRequest)).evaluate("errors")); + + // check the index contents + assertIndexDocCount(indexName, expectedDocCount); + } + + private static void createSnapshot(String repositoryName, ArrayList snapshotsToDelete, String snapshotName) throws IOException { + final var createSnapshotRequest = new Request("POST", "/_snapshot/" + repositoryName + "/" + snapshotName); + createSnapshotRequest.addParameter("wait_for_completion", "true"); + final var createSnapshotResponse = assertOKAndCreateObjectPath(client().performRequest(createSnapshotRequest)); + snapshotsToDelete.add(snapshotName); + assertEquals(snapshotName, createSnapshotResponse.evaluate("snapshot.snapshot")); + assertEquals("SUCCESS", createSnapshotResponse.evaluate("snapshot.state")); + assertThat(createSnapshotResponse.evaluate("snapshot.shards.failed"), equalTo(0)); + } + + private static void assertIndexDocCount(String indexName, int expectedCount) throws IOException { + assertThat( + assertOKAndCreateObjectPath(client().performRequest(new Request("GET", "/" + indexName + "/_count"))).evaluate("count"), + equalTo(expectedCount) + ); + } +} diff --git a/modules/repository-s3/src/javaRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3BasicCredentialsRestIT.java b/modules/repository-s3/src/javaRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3BasicCredentialsRestIT.java new file mode 100644 index 0000000000000..45844703683bb --- /dev/null +++ b/modules/repository-s3/src/javaRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3BasicCredentialsRestIT.java @@ -0,0 +1,65 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.repositories.s3; + +import fixture.s3.S3HttpFixture; + +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope; + +import org.elasticsearch.test.cluster.ElasticsearchCluster; +import org.elasticsearch.test.fixtures.testcontainers.TestContainersThreadFilter; +import org.junit.ClassRule; +import org.junit.rules.RuleChain; +import org.junit.rules.TestRule; + +@ThreadLeakFilters(filters = { TestContainersThreadFilter.class }) +@ThreadLeakScope(ThreadLeakScope.Scope.NONE) // https://github.com/elastic/elasticsearch/issues/102482 +public class RepositoryS3BasicCredentialsRestIT extends AbstractRepositoryS3RestTestCase { + + private static final String PREFIX = getIdentifierPrefix("RepositoryS3BasicCredentialsRestIT"); + private static final String BUCKET = PREFIX + "bucket"; + private static final String BASE_PATH = PREFIX + "base_path"; + private static final String ACCESS_KEY = PREFIX + "access-key"; + private static final String SECRET_KEY = PREFIX + "secret-key"; + private static final String CLIENT = "basic_credentials_client"; + + private static final S3HttpFixture s3Fixture = new S3HttpFixture(true, BUCKET, BASE_PATH, S3HttpFixture.fixedAccessKey(ACCESS_KEY)); + + public static ElasticsearchCluster cluster = ElasticsearchCluster.local() + .module("repository-s3") + .keystore("s3.client." + CLIENT + ".access_key", ACCESS_KEY) + .keystore("s3.client." + CLIENT + ".secret_key", SECRET_KEY) + .setting("s3.client." + CLIENT + ".endpoint", s3Fixture::getAddress) + .build(); + + @ClassRule + public static TestRule ruleChain = RuleChain.outerRule(s3Fixture).around(cluster); + + @Override + protected String getTestRestCluster() { + return cluster.getHttpAddresses(); + } + + @Override + protected String getBucketName() { + return BUCKET; + } + + @Override + protected String getBasePath() { + return BASE_PATH; + } + + @Override + protected String getClientName() { + return CLIENT; + } +} diff --git a/modules/repository-s3/src/yamlRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3EcsClientYamlTestSuiteIT.java b/modules/repository-s3/src/javaRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3EcsCredentialsRestIT.java similarity index 59% rename from modules/repository-s3/src/yamlRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3EcsClientYamlTestSuiteIT.java rename to modules/repository-s3/src/javaRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3EcsCredentialsRestIT.java index bbd003f506ead..267ba6e6b3a13 100644 --- a/modules/repository-s3/src/yamlRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3EcsClientYamlTestSuiteIT.java +++ b/modules/repository-s3/src/javaRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3EcsCredentialsRestIT.java @@ -13,18 +13,25 @@ import fixture.s3.DynamicS3Credentials; import fixture.s3.S3HttpFixture; -import com.carrotsearch.randomizedtesting.annotations.Name; -import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope; import org.elasticsearch.test.cluster.ElasticsearchCluster; -import org.elasticsearch.test.rest.yaml.ClientYamlTestCandidate; +import org.elasticsearch.test.fixtures.testcontainers.TestContainersThreadFilter; import org.junit.ClassRule; import org.junit.rules.RuleChain; import org.junit.rules.TestRule; import java.util.Set; -public class RepositoryS3EcsClientYamlTestSuiteIT extends AbstractRepositoryS3ClientYamlTestSuiteIT { +@ThreadLeakFilters(filters = { TestContainersThreadFilter.class }) +@ThreadLeakScope(ThreadLeakScope.Scope.NONE) // https://github.com/elastic/elasticsearch/issues/102482 +public class RepositoryS3EcsCredentialsRestIT extends AbstractRepositoryS3RestTestCase { + + private static final String PREFIX = getIdentifierPrefix("RepositoryS3EcsCredentialsRestIT"); + private static final String BUCKET = PREFIX + "bucket"; + private static final String BASE_PATH = PREFIX + "base_path"; + private static final String CLIENT = "ecs_credentials_client"; private static final DynamicS3Credentials dynamicS3Credentials = new DynamicS3Credentials(); @@ -33,33 +40,34 @@ public class RepositoryS3EcsClientYamlTestSuiteIT extends AbstractRepositoryS3Cl Set.of("/ecs_credentials_endpoint") ); - private static final S3HttpFixture s3Fixture = new S3HttpFixture( - true, - "ecs_bucket", - "ecs_base_path", - dynamicS3Credentials::isAuthorized - ); + private static final S3HttpFixture s3Fixture = new S3HttpFixture(true, BUCKET, BASE_PATH, dynamicS3Credentials::isAuthorized); public static ElasticsearchCluster cluster = ElasticsearchCluster.local() .module("repository-s3") - .setting("s3.client.integration_test_ecs.endpoint", s3Fixture::getAddress) + .setting("s3.client." + CLIENT + ".endpoint", s3Fixture::getAddress) .environment("AWS_CONTAINER_CREDENTIALS_FULL_URI", () -> ec2ImdsHttpFixture.getAddress() + "/ecs_credentials_endpoint") .build(); @ClassRule public static TestRule ruleChain = RuleChain.outerRule(s3Fixture).around(ec2ImdsHttpFixture).around(cluster); - @ParametersFactory - public static Iterable parameters() throws Exception { - return createParameters(new String[] { "repository_s3/50_repository_ecs_credentials" }); + @Override + protected String getTestRestCluster() { + return cluster.getHttpAddresses(); } - public RepositoryS3EcsClientYamlTestSuiteIT(@Name("yaml") ClientYamlTestCandidate testCandidate) { - super(testCandidate); + @Override + protected String getBucketName() { + return BUCKET; } @Override - protected String getTestRestCluster() { - return cluster.getHttpAddresses(); + protected String getBasePath() { + return BASE_PATH; + } + + @Override + protected String getClientName() { + return CLIENT; } } diff --git a/modules/repository-s3/src/javaRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3ImdsV1CredentialsRestIT.java b/modules/repository-s3/src/javaRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3ImdsV1CredentialsRestIT.java new file mode 100644 index 0000000000000..de9c9b6ae0695 --- /dev/null +++ b/modules/repository-s3/src/javaRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3ImdsV1CredentialsRestIT.java @@ -0,0 +1,73 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.repositories.s3; + +import fixture.aws.imds.Ec2ImdsHttpFixture; +import fixture.s3.DynamicS3Credentials; +import fixture.s3.S3HttpFixture; + +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope; + +import org.elasticsearch.test.cluster.ElasticsearchCluster; +import org.elasticsearch.test.fixtures.testcontainers.TestContainersThreadFilter; +import org.junit.ClassRule; +import org.junit.rules.RuleChain; +import org.junit.rules.TestRule; + +import java.util.Set; + +@ThreadLeakFilters(filters = { TestContainersThreadFilter.class }) +@ThreadLeakScope(ThreadLeakScope.Scope.NONE) // https://github.com/elastic/elasticsearch/issues/102482 +public class RepositoryS3ImdsV1CredentialsRestIT extends AbstractRepositoryS3RestTestCase { + + private static final String PREFIX = getIdentifierPrefix("RepositoryS3ImdsV1CredentialsRestIT"); + private static final String BUCKET = PREFIX + "bucket"; + private static final String BASE_PATH = PREFIX + "base_path"; + private static final String CLIENT = "imdsv1_credentials_client"; + + private static final DynamicS3Credentials dynamicS3Credentials = new DynamicS3Credentials(); + + private static final Ec2ImdsHttpFixture ec2ImdsHttpFixture = new Ec2ImdsHttpFixture( + dynamicS3Credentials::addValidCredentials, + Set.of() + ); + + private static final S3HttpFixture s3Fixture = new S3HttpFixture(true, BUCKET, BASE_PATH, dynamicS3Credentials::isAuthorized); + + public static ElasticsearchCluster cluster = ElasticsearchCluster.local() + .module("repository-s3") + .setting("s3.client." + CLIENT + ".endpoint", s3Fixture::getAddress) + .systemProperty("com.amazonaws.sdk.ec2MetadataServiceEndpointOverride", ec2ImdsHttpFixture::getAddress) + .build(); + + @ClassRule + public static TestRule ruleChain = RuleChain.outerRule(ec2ImdsHttpFixture).around(s3Fixture).around(cluster); + + @Override + protected String getTestRestCluster() { + return cluster.getHttpAddresses(); + } + + @Override + protected String getBucketName() { + return BUCKET; + } + + @Override + protected String getBasePath() { + return BASE_PATH; + } + + @Override + protected String getClientName() { + return CLIENT; + } +} diff --git a/modules/repository-s3/src/yamlRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3MinioClientYamlTestSuiteIT.java b/modules/repository-s3/src/javaRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3MinioBasicCredentialsRestIT.java similarity index 50% rename from modules/repository-s3/src/yamlRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3MinioClientYamlTestSuiteIT.java rename to modules/repository-s3/src/javaRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3MinioBasicCredentialsRestIT.java index d2b1413295ceb..93915e8491d5b 100644 --- a/modules/repository-s3/src/yamlRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3MinioClientYamlTestSuiteIT.java +++ b/modules/repository-s3/src/javaRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3MinioBasicCredentialsRestIT.java @@ -9,44 +9,56 @@ package org.elasticsearch.repositories.s3; -import com.carrotsearch.randomizedtesting.annotations.Name; -import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope; import org.elasticsearch.test.cluster.ElasticsearchCluster; import org.elasticsearch.test.fixtures.minio.MinioTestContainer; import org.elasticsearch.test.fixtures.testcontainers.TestContainersThreadFilter; -import org.elasticsearch.test.rest.yaml.ClientYamlTestCandidate; import org.junit.ClassRule; import org.junit.rules.RuleChain; import org.junit.rules.TestRule; @ThreadLeakFilters(filters = { TestContainersThreadFilter.class }) -public class RepositoryS3MinioClientYamlTestSuiteIT extends AbstractRepositoryS3ClientYamlTestSuiteIT { +@ThreadLeakScope(ThreadLeakScope.Scope.NONE) // https://github.com/elastic/elasticsearch/issues/102482 +public class RepositoryS3MinioBasicCredentialsRestIT extends AbstractRepositoryS3RestTestCase { - public static MinioTestContainer minio = new MinioTestContainer(); + private static final String PREFIX = getIdentifierPrefix("RepositoryS3MinioBasicCredentialsRestIT"); + private static final String BUCKET = PREFIX + "bucket"; + private static final String BASE_PATH = PREFIX + "base_path"; + private static final String ACCESS_KEY = PREFIX + "access-key"; + private static final String SECRET_KEY = PREFIX + "secret-key"; + private static final String CLIENT = "minio_client"; + + private static final MinioTestContainer minioFixture = new MinioTestContainer(true, ACCESS_KEY, SECRET_KEY, BUCKET); public static ElasticsearchCluster cluster = ElasticsearchCluster.local() .module("repository-s3") - .keystore("s3.client.integration_test_permanent.access_key", System.getProperty("s3PermanentAccessKey")) - .keystore("s3.client.integration_test_permanent.secret_key", System.getProperty("s3PermanentSecretKey")) - .setting("s3.client.integration_test_permanent.endpoint", () -> minio.getAddress()) + .keystore("s3.client." + CLIENT + ".access_key", ACCESS_KEY) + .keystore("s3.client." + CLIENT + ".secret_key", SECRET_KEY) + .setting("s3.client." + CLIENT + ".endpoint", minioFixture::getAddress) .build(); @ClassRule - public static TestRule ruleChain = RuleChain.outerRule(minio).around(cluster); + public static TestRule ruleChain = RuleChain.outerRule(minioFixture).around(cluster); + + @Override + protected String getTestRestCluster() { + return cluster.getHttpAddresses(); + } - @ParametersFactory - public static Iterable parameters() throws Exception { - return createParameters(new String[] { "repository_s3/10_basic", "repository_s3/20_repository_permanent_credentials" }); + @Override + protected String getBucketName() { + return BUCKET; } - public RepositoryS3MinioClientYamlTestSuiteIT(@Name("yaml") ClientYamlTestCandidate testCandidate) { - super(testCandidate); + @Override + protected String getBasePath() { + return BASE_PATH; } @Override - protected String getTestRestCluster() { - return cluster.getHttpAddresses(); + protected String getClientName() { + return CLIENT; } } diff --git a/modules/repository-s3/src/javaRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3SessionCredentialsRestIT.java b/modules/repository-s3/src/javaRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3SessionCredentialsRestIT.java new file mode 100644 index 0000000000000..84a327ee131ae --- /dev/null +++ b/modules/repository-s3/src/javaRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3SessionCredentialsRestIT.java @@ -0,0 +1,72 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.repositories.s3; + +import fixture.s3.S3HttpFixture; + +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope; + +import org.elasticsearch.test.cluster.ElasticsearchCluster; +import org.elasticsearch.test.fixtures.testcontainers.TestContainersThreadFilter; +import org.junit.ClassRule; +import org.junit.rules.RuleChain; +import org.junit.rules.TestRule; + +@ThreadLeakFilters(filters = { TestContainersThreadFilter.class }) +@ThreadLeakScope(ThreadLeakScope.Scope.NONE) // https://github.com/elastic/elasticsearch/issues/102482 +public class RepositoryS3SessionCredentialsRestIT extends AbstractRepositoryS3RestTestCase { + + private static final String PREFIX = getIdentifierPrefix("RepositoryS3SessionCredentialsRestIT"); + private static final String BUCKET = PREFIX + "bucket"; + private static final String BASE_PATH = PREFIX + "base_path"; + private static final String ACCESS_KEY = PREFIX + "access-key"; + private static final String SECRET_KEY = PREFIX + "secret-key"; + private static final String SESSION_TOKEN = PREFIX + "session-token"; + private static final String CLIENT = "session_credentials_client"; + + private static final S3HttpFixture s3Fixture = new S3HttpFixture( + true, + BUCKET, + BASE_PATH, + S3HttpFixture.fixedAccessKeyAndToken(ACCESS_KEY, SESSION_TOKEN) + ); + + public static ElasticsearchCluster cluster = ElasticsearchCluster.local() + .module("repository-s3") + .keystore("s3.client." + CLIENT + ".access_key", ACCESS_KEY) + .keystore("s3.client." + CLIENT + ".secret_key", SECRET_KEY) + .keystore("s3.client." + CLIENT + ".session_token", SESSION_TOKEN) + .setting("s3.client." + CLIENT + ".endpoint", s3Fixture::getAddress) + .build(); + + @ClassRule + public static TestRule ruleChain = RuleChain.outerRule(s3Fixture).around(cluster); + + @Override + protected String getTestRestCluster() { + return cluster.getHttpAddresses(); + } + + @Override + protected String getBucketName() { + return BUCKET; + } + + @Override + protected String getBasePath() { + return BASE_PATH; + } + + @Override + protected String getClientName() { + return CLIENT; + } +} diff --git a/modules/repository-s3/src/yamlRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3StsClientYamlTestSuiteIT.java b/modules/repository-s3/src/javaRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3StsCredentialsRestIT.java similarity index 53% rename from modules/repository-s3/src/yamlRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3StsClientYamlTestSuiteIT.java rename to modules/repository-s3/src/javaRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3StsCredentialsRestIT.java index 7c4d719485113..de80e4179ef5e 100644 --- a/modules/repository-s3/src/yamlRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3StsClientYamlTestSuiteIT.java +++ b/modules/repository-s3/src/javaRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3StsCredentialsRestIT.java @@ -13,43 +13,53 @@ import fixture.s3.DynamicS3Credentials; import fixture.s3.S3HttpFixture; -import com.carrotsearch.randomizedtesting.annotations.Name; -import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope; import org.elasticsearch.test.cluster.ElasticsearchCluster; import org.elasticsearch.test.cluster.util.resource.Resource; -import org.elasticsearch.test.rest.yaml.ClientYamlTestCandidate; +import org.elasticsearch.test.fixtures.testcontainers.TestContainersThreadFilter; import org.junit.ClassRule; import org.junit.rules.RuleChain; import org.junit.rules.TestRule; -public class RepositoryS3StsClientYamlTestSuiteIT extends AbstractRepositoryS3ClientYamlTestSuiteIT { +@ThreadLeakFilters(filters = { TestContainersThreadFilter.class }) +@ThreadLeakScope(ThreadLeakScope.Scope.NONE) // https://github.com/elastic/elasticsearch/issues/102482 +public class RepositoryS3StsCredentialsRestIT extends AbstractRepositoryS3RestTestCase { + + private static final String PREFIX = getIdentifierPrefix("RepositoryS3StsCredentialsRestIT"); + private static final String BUCKET = PREFIX + "bucket"; + private static final String BASE_PATH = PREFIX + "base_path"; + private static final String CLIENT = "sts_credentials_client"; private static final DynamicS3Credentials dynamicS3Credentials = new DynamicS3Credentials(); - private static final S3HttpFixture s3HttpFixture = new S3HttpFixture( - true, - "sts_bucket", - "sts_base_path", - dynamicS3Credentials::isAuthorized - ); + private static final S3HttpFixture s3HttpFixture = new S3HttpFixture(true, BUCKET, BASE_PATH, dynamicS3Credentials::isAuthorized); - private static final AwsStsHttpFixture stsHttpFixture = new AwsStsHttpFixture(dynamicS3Credentials::addValidCredentials, """ + private static final String WEB_IDENTITY_TOKEN_FILE_CONTENTS = """ Atza|IQEBLjAsAhRFiXuWpUXuRvQ9PZL3GMFcYevydwIUFAHZwXZXXXXXXXXJnrulxKDHwy87oGKPznh0D6bEQZTSCzyoCtL_8S07pLpr0zMbn6w1lfVZKNTBdDans\ FBmtGnIsIapjI6xKR02Yc_2bQ8LZbUXSGm6Ry6_BG7PrtLZtj_dfCTj92xNGed-CrKqjG7nPBjNIL016GGvuS5gSvPRUxWES3VYfm1wl7WTI7jn-Pcb6M-buCgHhFO\ - zTQxod27L9CqnOLio7N3gZAGpsp6n1-AJBOCJckcyXe2c6uD0srOJeZlKUm2eTDVMf8IehDVI0r1QOnTV6KzzAI3OY87Vd_cVMQ"""); + zTQxod27L9CqnOLio7N3gZAGpsp6n1-AJBOCJckcyXe2c6uD0srOJeZlKUm2eTDVMf8IehDVI0r1QOnTV6KzzAI3OY87Vd_cVMQ"""; + + private static final AwsStsHttpFixture stsHttpFixture = new AwsStsHttpFixture( + dynamicS3Credentials::addValidCredentials, + WEB_IDENTITY_TOKEN_FILE_CONTENTS + ); public static ElasticsearchCluster cluster = ElasticsearchCluster.local() .module("repository-s3") - .setting("s3.client.integration_test_sts.endpoint", s3HttpFixture::getAddress) + .setting("s3.client." + CLIENT + ".endpoint", s3HttpFixture::getAddress) .systemProperty( "com.amazonaws.sdk.stsMetadataServiceEndpointOverride", () -> stsHttpFixture.getAddress() + "/assume-role-with-web-identity" ) - .configFile("repository-s3/aws-web-identity-token-file", Resource.fromClasspath("aws-web-identity-token-file")) - .environment("AWS_WEB_IDENTITY_TOKEN_FILE", System.getProperty("awsWebIdentityTokenExternalLocation")) - // // The AWS STS SDK requires the role and session names to be set. We can verify that they are sent to S3S in the - // // S3HttpFixtureWithSTS fixture + .configFile( + S3Service.CustomWebIdentityTokenCredentialsProvider.WEB_IDENTITY_TOKEN_FILE_LOCATION, + Resource.fromString(WEB_IDENTITY_TOKEN_FILE_CONTENTS) + ) + .environment("AWS_WEB_IDENTITY_TOKEN_FILE", S3Service.CustomWebIdentityTokenCredentialsProvider.WEB_IDENTITY_TOKEN_FILE_LOCATION) + // The AWS STS SDK requires the role and session names to be set. We can verify that they are sent to S3S in the + // S3HttpFixtureWithSTS fixture .environment("AWS_ROLE_ARN", "arn:aws:iam::123456789012:role/FederatedWebIdentityRole") .environment("AWS_ROLE_SESSION_NAME", "sts-fixture-test") .build(); @@ -57,17 +67,23 @@ public class RepositoryS3StsClientYamlTestSuiteIT extends AbstractRepositoryS3Cl @ClassRule public static TestRule ruleChain = RuleChain.outerRule(s3HttpFixture).around(stsHttpFixture).around(cluster); - @ParametersFactory - public static Iterable parameters() throws Exception { - return createParameters(new String[] { "repository_s3/60_repository_sts_credentials" }); + @Override + protected String getTestRestCluster() { + return cluster.getHttpAddresses(); + } + + @Override + protected String getBucketName() { + return BUCKET; } - public RepositoryS3StsClientYamlTestSuiteIT(@Name("yaml") ClientYamlTestCandidate testCandidate) { - super(testCandidate); + @Override + protected String getBasePath() { + return BASE_PATH; } @Override - protected String getTestRestCluster() { - return cluster.getHttpAddresses(); + protected String getClientName() { + return CLIENT; } } diff --git a/modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3BlobStore.java b/modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3BlobStore.java index 5fb3254df819b..d08bd40275fec 100644 --- a/modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3BlobStore.java +++ b/modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3BlobStore.java @@ -450,7 +450,7 @@ private static DeleteObjectsRequest bulkDelete(OperationPurpose purpose, S3BlobS @Override public void close() throws IOException { - this.service.close(); + service.onBlobStoreClose(); } @Override diff --git a/modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3Service.java b/modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3Service.java index 1ebd6f920d518..1a66f5782fc03 100644 --- a/modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3Service.java +++ b/modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3Service.java @@ -303,6 +303,10 @@ private synchronized void releaseCachedClients() { IdleConnectionReaper.shutdown(); } + public void onBlobStoreClose() { + releaseCachedClients(); + } + @Override public void close() throws IOException { releaseCachedClients(); @@ -345,6 +349,8 @@ static class CustomWebIdentityTokenCredentialsProvider implements AWSCredentials private static final String STS_HOSTNAME = "https://sts.amazonaws.com"; + static final String WEB_IDENTITY_TOKEN_FILE_LOCATION = "repository-s3/aws-web-identity-token-file"; + private STSAssumeRoleWithWebIdentitySessionCredentialsProvider credentialsProvider; private AWSSecurityTokenService stsClient; private String stsRegion; @@ -363,7 +369,7 @@ static class CustomWebIdentityTokenCredentialsProvider implements AWSCredentials } // Make sure that a readable symlink to the token file exists in the plugin config directory // AWS_WEB_IDENTITY_TOKEN_FILE exists but we only use Web Identity Tokens if a corresponding symlink exists and is readable - Path webIdentityTokenFileSymlink = environment.configFile().resolve("repository-s3/aws-web-identity-token-file"); + Path webIdentityTokenFileSymlink = environment.configFile().resolve(WEB_IDENTITY_TOKEN_FILE_LOCATION); if (Files.exists(webIdentityTokenFileSymlink) == false) { LOGGER.warn( "Cannot use AWS Web Identity Tokens: AWS_WEB_IDENTITY_TOKEN_FILE is defined but no corresponding symlink exists " diff --git a/modules/repository-s3/src/test/resources/aws-web-identity-token-file b/modules/repository-s3/src/test/resources/aws-web-identity-token-file deleted file mode 100644 index 15cb29eac2ff6..0000000000000 --- a/modules/repository-s3/src/test/resources/aws-web-identity-token-file +++ /dev/null @@ -1 +0,0 @@ -Atza|IQEBLjAsAhRFiXuWpUXuRvQ9PZL3GMFcYevydwIUFAHZwXZXXXXXXXXJnrulxKDHwy87oGKPznh0D6bEQZTSCzyoCtL_8S07pLpr0zMbn6w1lfVZKNTBdDansFBmtGnIsIapjI6xKR02Yc_2bQ8LZbUXSGm6Ry6_BG7PrtLZtj_dfCTj92xNGed-CrKqjG7nPBjNIL016GGvuS5gSvPRUxWES3VYfm1wl7WTI7jn-Pcb6M-buCgHhFOzTQxod27L9CqnOLio7N3gZAGpsp6n1-AJBOCJckcyXe2c6uD0srOJeZlKUm2eTDVMf8IehDVI0r1QOnTV6KzzAI3OY87Vd_cVMQ diff --git a/modules/repository-s3/src/yamlRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3ClientYamlTestSuiteIT.java b/modules/repository-s3/src/yamlRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3ClientYamlTestSuiteIT.java index a3b154b4bdfed..3d34934e54945 100644 --- a/modules/repository-s3/src/yamlRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3ClientYamlTestSuiteIT.java +++ b/modules/repository-s3/src/yamlRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3ClientYamlTestSuiteIT.java @@ -9,8 +9,6 @@ package org.elasticsearch.repositories.s3; -import fixture.aws.imds.Ec2ImdsHttpFixture; -import fixture.s3.DynamicS3Credentials; import fixture.s3.S3HttpFixture; import com.carrotsearch.randomizedtesting.annotations.Name; @@ -18,7 +16,6 @@ import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters; import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope; -import org.elasticsearch.cluster.routing.Murmur3HashFunction; import org.elasticsearch.test.cluster.ElasticsearchCluster; import org.elasticsearch.test.fixtures.testcontainers.TestContainersThreadFilter; import org.elasticsearch.test.rest.yaml.ClientYamlTestCandidate; @@ -26,67 +23,33 @@ import org.junit.rules.RuleChain; import org.junit.rules.TestRule; -import java.util.Set; - @ThreadLeakFilters(filters = { TestContainersThreadFilter.class }) @ThreadLeakScope(ThreadLeakScope.Scope.NONE) // https://github.com/elastic/elasticsearch/issues/102482 public class RepositoryS3ClientYamlTestSuiteIT extends AbstractRepositoryS3ClientYamlTestSuiteIT { - private static final String HASHED_SEED = Integer.toString(Murmur3HashFunction.hash(System.getProperty("tests.seed"))); - private static final String TEMPORARY_SESSION_TOKEN = "session_token-" + HASHED_SEED; - - private static final S3HttpFixture s3Fixture = new S3HttpFixture(); - - private static final S3HttpFixture s3HttpFixtureWithSessionToken = new S3HttpFixture( - true, - "session_token_bucket", - "session_token_base_path_integration_tests", - S3HttpFixture.fixedAccessKeyAndToken(System.getProperty("s3TemporaryAccessKey"), TEMPORARY_SESSION_TOKEN) - ); - - private static final DynamicS3Credentials dynamicS3Credentials = new DynamicS3Credentials(); - - private static final Ec2ImdsHttpFixture ec2ImdsHttpFixture = new Ec2ImdsHttpFixture( - dynamicS3Credentials::addValidCredentials, - Set.of() - ); + private static final String ACCESS_KEY = "RepositoryS3ClientYamlTestSuiteIT-access-key"; + private static final String SECRET_KEY = "RepositoryS3ClientYamlTestSuiteIT-secret-key"; - private static final S3HttpFixture s3HttpFixtureWithImdsSessionToken = new S3HttpFixture( + private static final S3HttpFixture s3Fixture = new S3HttpFixture( true, - "ec2_bucket", - "ec2_base_path", - dynamicS3Credentials::isAuthorized + "bucket", + "base_path_integration_tests", + S3HttpFixture.fixedAccessKey(ACCESS_KEY) ); public static ElasticsearchCluster cluster = ElasticsearchCluster.local() .module("repository-s3") - .keystore("s3.client.integration_test_permanent.access_key", System.getProperty("s3PermanentAccessKey")) - .keystore("s3.client.integration_test_permanent.secret_key", System.getProperty("s3PermanentSecretKey")) - .keystore("s3.client.integration_test_temporary.access_key", System.getProperty("s3TemporaryAccessKey")) - .keystore("s3.client.integration_test_temporary.secret_key", System.getProperty("s3TemporarySecretKey")) - .keystore("s3.client.integration_test_temporary.session_token", TEMPORARY_SESSION_TOKEN) + .keystore("s3.client.integration_test_permanent.access_key", ACCESS_KEY) + .keystore("s3.client.integration_test_permanent.secret_key", SECRET_KEY) .setting("s3.client.integration_test_permanent.endpoint", s3Fixture::getAddress) - .setting("s3.client.integration_test_temporary.endpoint", s3HttpFixtureWithSessionToken::getAddress) - .setting("s3.client.integration_test_ec2.endpoint", s3HttpFixtureWithImdsSessionToken::getAddress) - .systemProperty("com.amazonaws.sdk.ec2MetadataServiceEndpointOverride", ec2ImdsHttpFixture::getAddress) .build(); @ClassRule - public static TestRule ruleChain = RuleChain.outerRule(s3Fixture) - .around(s3HttpFixtureWithSessionToken) - .around(s3HttpFixtureWithImdsSessionToken) - .around(ec2ImdsHttpFixture) - .around(cluster); + public static TestRule ruleChain = RuleChain.outerRule(s3Fixture).around(cluster); @ParametersFactory public static Iterable parameters() throws Exception { - return createParameters( - new String[] { - "repository_s3/10_basic", - "repository_s3/20_repository_permanent_credentials", - "repository_s3/30_repository_temporary_credentials", - "repository_s3/40_repository_ec2_credentials" } - ); + return createParameters(new String[] { "repository_s3/10_basic", "repository_s3/20_repository_permanent_credentials" }); } public RepositoryS3ClientYamlTestSuiteIT(@Name("yaml") ClientYamlTestCandidate testCandidate) { diff --git a/modules/repository-s3/src/yamlRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3RegionalStsClientYamlTestSuiteIT.java b/modules/repository-s3/src/yamlRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3RegionalStsClientYamlTestSuiteIT.java index 2baba66a8a4d0..ac356083983eb 100644 --- a/modules/repository-s3/src/yamlRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3RegionalStsClientYamlTestSuiteIT.java +++ b/modules/repository-s3/src/yamlRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3RegionalStsClientYamlTestSuiteIT.java @@ -21,10 +21,11 @@ public class RepositoryS3RegionalStsClientYamlTestSuiteIT extends AbstractReposi @ClassRule public static ElasticsearchCluster cluster = ElasticsearchCluster.local() .module("repository-s3") - .configFile("repository-s3/aws-web-identity-token-file", Resource.fromClasspath("aws-web-identity-token-file")) - .environment("AWS_WEB_IDENTITY_TOKEN_FILE", System.getProperty("awsWebIdentityTokenExternalLocation")) - // The AWS STS SDK requires the role and session names to be set. We can verify that they are sent to S3S in the - // S3HttpFixtureWithSTS fixture + .configFile(S3Service.CustomWebIdentityTokenCredentialsProvider.WEB_IDENTITY_TOKEN_FILE_LOCATION, Resource.fromString(""" + Atza|IQEBLjAsAhRFiXuWpUXuRvQ9PZL3GMFcYevydwIUFAHZwXZXXXXXXXXJnrulxKDHwy87oGKPznh0D6bEQZTSCzyoCtL_8S07pLpr0zMbn6w1lfVZKNTBdDans\ + FBmtGnIsIapjI6xKR02Yc_2bQ8LZbUXSGm6Ry6_BG7PrtLZtj_dfCTj92xNGed-CrKqjG7nPBjNIL016GGvuS5gSvPRUxWES3VYfm1wl7WTI7jn-Pcb6M-buCgHhFO\ + zTQxod27L9CqnOLio7N3gZAGpsp6n1-AJBOCJckcyXe2c6uD0srOJeZlKUm2eTDVMf8IehDVI0r1QOnTV6KzzAI3OY87Vd_cVMQ""")) + .environment("AWS_WEB_IDENTITY_TOKEN_FILE", S3Service.CustomWebIdentityTokenCredentialsProvider.WEB_IDENTITY_TOKEN_FILE_LOCATION) .environment("AWS_ROLE_ARN", "arn:aws:iam::123456789012:role/FederatedWebIdentityRole") .environment("AWS_ROLE_SESSION_NAME", "sts-fixture-test") .environment("AWS_STS_REGIONAL_ENDPOINTS", "regional") @@ -33,6 +34,9 @@ public class RepositoryS3RegionalStsClientYamlTestSuiteIT extends AbstractReposi @ParametersFactory public static Iterable parameters() throws Exception { + // Run just the basic sanity test to make sure ES starts up and loads the S3 repository with a regional endpoint without an error. + // It would be great to make actual requests against a test fixture, but setting the region means using a production endpoint. + // See #102230 for more details. return createParameters(new String[] { "repository_s3/10_basic" }); } diff --git a/modules/repository-s3/src/yamlRestTest/resources/rest-api-spec/test/repository_s3/20_repository_permanent_credentials.yml b/modules/repository-s3/src/yamlRestTest/resources/rest-api-spec/test/repository_s3/20_repository_permanent_credentials.yml index e88a0861ec01c..6f6fdaed8c666 100644 --- a/modules/repository-s3/src/yamlRestTest/resources/rest-api-spec/test/repository_s3/20_repository_permanent_credentials.yml +++ b/modules/repository-s3/src/yamlRestTest/resources/rest-api-spec/test/repository_s3/20_repository_permanent_credentials.yml @@ -10,12 +10,11 @@ setup: body: type: s3 settings: - bucket: @permanent_bucket@ + bucket: bucket client: integration_test_permanent - base_path: "@permanent_base_path@" + base_path: base_path_integration_tests canned_acl: private storage_class: standard - disable_chunked_encoding: @disable_chunked_encoding@ # Remove the snapshots, if a previous test failed to delete them. This is # useful for third party tests that runs the test against a real external service. @@ -40,9 +39,9 @@ setup: body: type: s3 settings: - bucket: @permanent_bucket@ + bucket: bucket client: integration_test_permanent - base_path: "@permanent_base_path@" + base_path: base_path_integration_tests endpoint: 127.0.0.1:5 canned_acl: private storage_class: standard @@ -55,9 +54,9 @@ setup: body: type: s3 settings: - bucket: @permanent_bucket@ + bucket: bucket client: integration_test_permanent - base_path: "@permanent_base_path@" + base_path: base_path_integration_tests endpoint: 127.0.0.1:5 canned_acl: private storage_class: standard @@ -106,258 +105,6 @@ setup: - match: { snapshot.include_global_state: true } - match: { snapshot.shards.failed: 0 } ---- -"Snapshot and Restore with repository-s3 using permanent credentials": - - # Get repository - - do: - snapshot.get_repository: - repository: repository_permanent - - - match: { repository_permanent.settings.bucket : @permanent_bucket@ } - - match: { repository_permanent.settings.client : "integration_test_permanent" } - - match: { repository_permanent.settings.base_path : "@permanent_base_path@" } - - match: { repository_permanent.settings.canned_acl : "private" } - - match: { repository_permanent.settings.storage_class : "standard" } - - is_false: repository_permanent.settings.access_key - - is_false: repository_permanent.settings.secret_key - - is_false: repository_permanent.settings.session_token - - # Index documents - - do: - bulk: - refresh: true - body: - - index: - _index: docs - _id: "1" - - snapshot: one - - index: - _index: docs - _id: "2" - - snapshot: one - - index: - _index: docs - _id: "3" - - snapshot: one - - - do: - count: - index: docs - - - match: {count: 3} - - # Create a first snapshot - - do: - snapshot.create: - repository: repository_permanent - snapshot: snapshot-one - wait_for_completion: true - - - match: { snapshot.snapshot: snapshot-one } - - match: { snapshot.state : SUCCESS } - - match: { snapshot.include_global_state: true } - - match: { snapshot.shards.failed : 0 } - - - do: - snapshot.status: - repository: repository_permanent - snapshot: snapshot-one - - - is_true: snapshots - - match: { snapshots.0.snapshot: snapshot-one } - - match: { snapshots.0.state : SUCCESS } - - # Index more documents - - do: - bulk: - refresh: true - body: - - index: - _index: docs - _id: "4" - - snapshot: two - - index: - _index: docs - _id: "5" - - snapshot: two - - index: - _index: docs - _id: "6" - - snapshot: two - - index: - _index: docs - _id: "7" - - snapshot: two - - - do: - count: - index: docs - - - match: {count: 7} - - # Create a second snapshot - - do: - snapshot.create: - repository: repository_permanent - snapshot: snapshot-two - wait_for_completion: true - - - match: { snapshot.snapshot: snapshot-two } - - match: { snapshot.state : SUCCESS } - - match: { snapshot.shards.failed : 0 } - - - do: - snapshot.get: - repository: repository_permanent - snapshot: snapshot-one,snapshot-two - - - is_true: snapshots - - match: { snapshots.0.state : SUCCESS } - - match: { snapshots.1.state : SUCCESS } - - # Delete the index - - do: - indices.delete: - index: docs - - # Restore the second snapshot - - do: - snapshot.restore: - repository: repository_permanent - snapshot: snapshot-two - wait_for_completion: true - - - do: - count: - index: docs - - - match: {count: 7} - - # Delete the index again - - do: - indices.delete: - index: docs - - # Restore the first snapshot - - do: - snapshot.restore: - repository: repository_permanent - snapshot: snapshot-one - wait_for_completion: true - - - do: - count: - index: docs - - - match: {count: 3} - - # Remove the snapshots - - do: - snapshot.delete: - repository: repository_permanent - snapshot: snapshot-two - - - do: - snapshot.delete: - repository: repository_permanent - snapshot: snapshot-one - ---- -"Register a repository with a non existing bucket": - - - do: - catch: /repository_verification_exception/ - snapshot.create_repository: - repository: repository_permanent - body: - type: s3 - settings: - bucket: zHHkfSqlbnBsbpSgvCYtxrEfFLqghXtyPvvvKPNBnRCicNHQLE - client: integration_test_permanent - ---- -"Register a repository with a non existing client": - - - do: - catch: /illegal_argument_exception/ - snapshot.create_repository: - repository: repository_permanent - body: - type: s3 - settings: - bucket: repository_permanent - client: unknown - ---- -"Register a read-only repository with a non existing bucket": - -- do: - catch: /repository_verification_exception/ - snapshot.create_repository: - repository: repository_permanent - body: - type: s3 - settings: - readonly: true - bucket: zHHkfSqlbnBsbpSgvCYtxrEfFLqghXtyPvvvKPNBnRCicNHQLE - client: integration_test_permanent - ---- -"Register a read-only repository with a non existing client": - -- do: - catch: /illegal_argument_exception/ - snapshot.create_repository: - repository: repository_permanent - body: - type: s3 - settings: - readonly: true - bucket: repository_permanent - client: unknown - ---- -"Get a non existing snapshot": - - - do: - catch: /snapshot_missing_exception/ - snapshot.get: - repository: repository_permanent - snapshot: missing - ---- -"Delete a non existing snapshot": - - - do: - catch: /snapshot_missing_exception/ - snapshot.delete: - repository: repository_permanent - snapshot: missing - ---- -"Restore a non existing snapshot": - - - do: - catch: /snapshot_restore_exception/ - snapshot.restore: - repository: repository_permanent - snapshot: missing - wait_for_completion: true - ---- -"Usage stats": - - requires: - cluster_features: - - repositories.supports_usage_stats - reason: requires this feature - - - do: - cluster.stats: {} - - - gte: { repositories.s3.count: 1 } - - gte: { repositories.s3.read_write: 1 } - --- teardown: diff --git a/modules/repository-s3/src/yamlRestTest/resources/rest-api-spec/test/repository_s3/30_repository_temporary_credentials.yml b/modules/repository-s3/src/yamlRestTest/resources/rest-api-spec/test/repository_s3/30_repository_temporary_credentials.yml deleted file mode 100644 index 501af980e17e3..0000000000000 --- a/modules/repository-s3/src/yamlRestTest/resources/rest-api-spec/test/repository_s3/30_repository_temporary_credentials.yml +++ /dev/null @@ -1,278 +0,0 @@ -# Integration tests for repository-s3 - ---- -setup: - - # Register repository with temporary credentials - - do: - snapshot.create_repository: - repository: repository_temporary - body: - type: s3 - settings: - bucket: @temporary_bucket@ - client: integration_test_temporary - base_path: "@temporary_base_path@" - canned_acl: private - storage_class: standard - disable_chunked_encoding: @disable_chunked_encoding@ - ---- -"Snapshot and Restore with repository-s3 using temporary credentials": - - # Get repository - - do: - snapshot.get_repository: - repository: repository_temporary - - - match: { repository_temporary.settings.bucket : @temporary_bucket@ } - - match: { repository_temporary.settings.client : "integration_test_temporary" } - - match: { repository_temporary.settings.base_path : "@temporary_base_path@" } - - match: { repository_temporary.settings.canned_acl : "private" } - - match: { repository_temporary.settings.storage_class : "standard" } - - is_false: repository_temporary.settings.access_key - - is_false: repository_temporary.settings.secret_key - - is_false: repository_temporary.settings.session_token - - # Index documents - - do: - bulk: - refresh: true - body: - - index: - _index: docs - _id: "1" - - snapshot: one - - index: - _index: docs - _id: "2" - - snapshot: one - - index: - _index: docs - _id: "3" - - snapshot: one - - - do: - count: - index: docs - - - match: {count: 3} - - # Create a first snapshot - - do: - snapshot.create: - repository: repository_temporary - snapshot: snapshot-one - wait_for_completion: true - - - match: { snapshot.snapshot: snapshot-one } - - match: { snapshot.state : SUCCESS } - - match: { snapshot.include_global_state: true } - - match: { snapshot.shards.failed : 0 } - - - do: - snapshot.status: - repository: repository_temporary - snapshot: snapshot-one - - - is_true: snapshots - - match: { snapshots.0.snapshot: snapshot-one } - - match: { snapshots.0.state : SUCCESS } - - # Index more documents - - do: - bulk: - refresh: true - body: - - index: - _index: docs - _id: "4" - - snapshot: two - - index: - _index: docs - _id: "5" - - snapshot: two - - index: - _index: docs - _id: "6" - - snapshot: two - - index: - _index: docs - _id: "7" - - snapshot: two - - - do: - count: - index: docs - - - match: {count: 7} - - # Create a second snapshot - - do: - snapshot.create: - repository: repository_temporary - snapshot: snapshot-two - wait_for_completion: true - - - match: { snapshot.snapshot: snapshot-two } - - match: { snapshot.state : SUCCESS } - - match: { snapshot.shards.failed : 0 } - - - do: - snapshot.get: - repository: repository_temporary - snapshot: snapshot-one,snapshot-two - - - is_true: snapshots - - match: { snapshots.0.state : SUCCESS } - - match: { snapshots.1.state : SUCCESS } - - # Delete the index - - do: - indices.delete: - index: docs - - # Restore the second snapshot - - do: - snapshot.restore: - repository: repository_temporary - snapshot: snapshot-two - wait_for_completion: true - - - do: - count: - index: docs - - - match: {count: 7} - - # Delete the index again - - do: - indices.delete: - index: docs - - # Restore the first snapshot - - do: - snapshot.restore: - repository: repository_temporary - snapshot: snapshot-one - wait_for_completion: true - - - do: - count: - index: docs - - - match: {count: 3} - - # Remove the snapshots - - do: - snapshot.delete: - repository: repository_temporary - snapshot: snapshot-two - - - do: - snapshot.delete: - repository: repository_temporary - snapshot: snapshot-one - ---- -"Register a repository with a non existing bucket": - - - do: - catch: /repository_verification_exception/ - snapshot.create_repository: - repository: repository_temporary - body: - type: s3 - settings: - bucket: zHHkfSqlbnBsbpSgvCYtxrEfFLqghXtyPvvvKPNBnRCicNHQLE - client: integration_test_temporary - ---- -"Register a repository with a non existing client": - - - do: - catch: /illegal_argument_exception/ - snapshot.create_repository: - repository: repository_temporary - body: - type: s3 - settings: - bucket: repository_temporary - client: unknown - ---- -"Register a read-only repository with a non existing bucket": - -- do: - catch: /repository_verification_exception/ - snapshot.create_repository: - repository: repository_temporary - body: - type: s3 - settings: - readonly: true - bucket: zHHkfSqlbnBsbpSgvCYtxrEfFLqghXtyPvvvKPNBnRCicNHQLE - client: integration_test_temporary - ---- -"Register a read-only repository with a non existing client": - -- do: - catch: /illegal_argument_exception/ - snapshot.create_repository: - repository: repository_temporary - body: - type: s3 - settings: - readonly: true - bucket: repository_temporary - client: unknown - ---- -"Get a non existing snapshot": - - - do: - catch: /snapshot_missing_exception/ - snapshot.get: - repository: repository_temporary - snapshot: missing - ---- -"Delete a non existing snapshot": - - - do: - catch: /snapshot_missing_exception/ - snapshot.delete: - repository: repository_temporary - snapshot: missing - ---- -"Restore a non existing snapshot": - - - do: - catch: /snapshot_restore_exception/ - snapshot.restore: - repository: repository_temporary - snapshot: missing - wait_for_completion: true - ---- -"Usage stats": - - requires: - cluster_features: - - repositories.supports_usage_stats - reason: requires this feature - - - do: - cluster.stats: {} - - - gte: { repositories.s3.count: 1 } - - gte: { repositories.s3.read_write: 1 } - ---- -teardown: - - # Remove our repository - - do: - snapshot.delete_repository: - repository: repository_temporary diff --git a/modules/repository-s3/src/yamlRestTest/resources/rest-api-spec/test/repository_s3/40_repository_ec2_credentials.yml b/modules/repository-s3/src/yamlRestTest/resources/rest-api-spec/test/repository_s3/40_repository_ec2_credentials.yml deleted file mode 100644 index 129f0ba5d7588..0000000000000 --- a/modules/repository-s3/src/yamlRestTest/resources/rest-api-spec/test/repository_s3/40_repository_ec2_credentials.yml +++ /dev/null @@ -1,278 +0,0 @@ -# Integration tests for repository-s3 - ---- -setup: - - # Register repository with ec2 credentials - - do: - snapshot.create_repository: - repository: repository_ec2 - body: - type: s3 - settings: - bucket: @ec2_bucket@ - client: integration_test_ec2 - base_path: "@ec2_base_path@" - canned_acl: private - storage_class: standard - disable_chunked_encoding: @disable_chunked_encoding@ - ---- -"Snapshot and Restore with repository-s3 using ec2 credentials": - - # Get repository - - do: - snapshot.get_repository: - repository: repository_ec2 - - - match: { repository_ec2.settings.bucket : @ec2_bucket@ } - - match: { repository_ec2.settings.client : "integration_test_ec2" } - - match: { repository_ec2.settings.base_path : "@ec2_base_path@" } - - match: { repository_ec2.settings.canned_acl : "private" } - - match: { repository_ec2.settings.storage_class : "standard" } - - is_false: repository_ec2.settings.access_key - - is_false: repository_ec2.settings.secret_key - - is_false: repository_ec2.settings.session_token - - # Index documents - - do: - bulk: - refresh: true - body: - - index: - _index: docs - _id: "1" - - snapshot: one - - index: - _index: docs - _id: "2" - - snapshot: one - - index: - _index: docs - _id: "3" - - snapshot: one - - - do: - count: - index: docs - - - match: {count: 3} - - # Create a first snapshot - - do: - snapshot.create: - repository: repository_ec2 - snapshot: snapshot-one - wait_for_completion: true - - - match: { snapshot.snapshot: snapshot-one } - - match: { snapshot.state : SUCCESS } - - match: { snapshot.include_global_state: true } - - match: { snapshot.shards.failed : 0 } - - - do: - snapshot.status: - repository: repository_ec2 - snapshot: snapshot-one - - - is_true: snapshots - - match: { snapshots.0.snapshot: snapshot-one } - - match: { snapshots.0.state : SUCCESS } - - # Index more documents - - do: - bulk: - refresh: true - body: - - index: - _index: docs - _id: "4" - - snapshot: two - - index: - _index: docs - _id: "5" - - snapshot: two - - index: - _index: docs - _id: "6" - - snapshot: two - - index: - _index: docs - _id: "7" - - snapshot: two - - - do: - count: - index: docs - - - match: {count: 7} - - # Create a second snapshot - - do: - snapshot.create: - repository: repository_ec2 - snapshot: snapshot-two - wait_for_completion: true - - - match: { snapshot.snapshot: snapshot-two } - - match: { snapshot.state : SUCCESS } - - match: { snapshot.shards.failed : 0 } - - - do: - snapshot.get: - repository: repository_ec2 - snapshot: snapshot-one,snapshot-two - - - is_true: snapshots - - match: { snapshots.0.state : SUCCESS } - - match: { snapshots.1.state : SUCCESS } - - # Delete the index - - do: - indices.delete: - index: docs - - # Restore the second snapshot - - do: - snapshot.restore: - repository: repository_ec2 - snapshot: snapshot-two - wait_for_completion: true - - - do: - count: - index: docs - - - match: {count: 7} - - # Delete the index again - - do: - indices.delete: - index: docs - - # Restore the first snapshot - - do: - snapshot.restore: - repository: repository_ec2 - snapshot: snapshot-one - wait_for_completion: true - - - do: - count: - index: docs - - - match: {count: 3} - - # Remove the snapshots - - do: - snapshot.delete: - repository: repository_ec2 - snapshot: snapshot-two - - - do: - snapshot.delete: - repository: repository_ec2 - snapshot: snapshot-one - ---- -"Register a repository with a non existing bucket": - - - do: - catch: /repository_verification_exception/ - snapshot.create_repository: - repository: repository_ec2 - body: - type: s3 - settings: - bucket: zHHkfSqlbnBsbpSgvCYtxrEfFLqghXtyPvvvKPNBnRCicNHQLE - client: integration_test_temporary - ---- -"Register a repository with a non existing client": - - - do: - catch: /illegal_argument_exception/ - snapshot.create_repository: - repository: repository_ec2 - body: - type: s3 - settings: - bucket: repository_ec2 - client: unknown - ---- -"Register a read-only repository with a non existing bucket": - -- do: - catch: /repository_verification_exception/ - snapshot.create_repository: - repository: repository_ec2 - body: - type: s3 - settings: - readonly: true - bucket: zHHkfSqlbnBsbpSgvCYtxrEfFLqghXtyPvvvKPNBnRCicNHQLE - client: integration_test_temporary - ---- -"Register a read-only repository with a non existing client": - -- do: - catch: /illegal_argument_exception/ - snapshot.create_repository: - repository: repository_ec2 - body: - type: s3 - settings: - readonly: true - bucket: repository_ec2 - client: unknown - ---- -"Get a non existing snapshot": - - - do: - catch: /snapshot_missing_exception/ - snapshot.get: - repository: repository_ec2 - snapshot: missing - ---- -"Delete a non existing snapshot": - - - do: - catch: /snapshot_missing_exception/ - snapshot.delete: - repository: repository_ec2 - snapshot: missing - ---- -"Restore a non existing snapshot": - - - do: - catch: /snapshot_restore_exception/ - snapshot.restore: - repository: repository_ec2 - snapshot: missing - wait_for_completion: true - ---- -"Usage stats": - - requires: - cluster_features: - - repositories.supports_usage_stats - reason: requires this feature - - - do: - cluster.stats: {} - - - gte: { repositories.s3.count: 1 } - - gte: { repositories.s3.read_write: 1 } - ---- -teardown: - - # Remove our repository - - do: - snapshot.delete_repository: - repository: repository_ec2 diff --git a/modules/repository-s3/src/yamlRestTest/resources/rest-api-spec/test/repository_s3/50_repository_ecs_credentials.yml b/modules/repository-s3/src/yamlRestTest/resources/rest-api-spec/test/repository_s3/50_repository_ecs_credentials.yml deleted file mode 100644 index de334b4b3df96..0000000000000 --- a/modules/repository-s3/src/yamlRestTest/resources/rest-api-spec/test/repository_s3/50_repository_ecs_credentials.yml +++ /dev/null @@ -1,278 +0,0 @@ -# Integration tests for repository-s3 - ---- -setup: - - # Register repository with ecs credentials - - do: - snapshot.create_repository: - repository: repository_ecs - body: - type: s3 - settings: - bucket: @ecs_bucket@ - client: integration_test_ecs - base_path: "@ecs_base_path@" - canned_acl: private - storage_class: standard - disable_chunked_encoding: @disable_chunked_encoding@ - ---- -"Snapshot and Restore with repository-s3 using ecs credentials": - - # Get repository - - do: - snapshot.get_repository: - repository: repository_ecs - - - match: { repository_ecs.settings.bucket : @ecs_bucket@ } - - match: { repository_ecs.settings.client : "integration_test_ecs" } - - match: { repository_ecs.settings.base_path : "@ecs_base_path@" } - - match: { repository_ecs.settings.canned_acl : "private" } - - match: { repository_ecs.settings.storage_class : "standard" } - - is_false: repository_ecs.settings.access_key - - is_false: repository_ecs.settings.secret_key - - is_false: repository_ecs.settings.session_token - - # Index documents - - do: - bulk: - refresh: true - body: - - index: - _index: docs - _id: "1" - - snapshot: one - - index: - _index: docs - _id: "2" - - snapshot: one - - index: - _index: docs - _id: "3" - - snapshot: one - - - do: - count: - index: docs - - - match: {count: 3} - - # Create a first snapshot - - do: - snapshot.create: - repository: repository_ecs - snapshot: snapshot-one - wait_for_completion: true - - - match: { snapshot.snapshot: snapshot-one } - - match: { snapshot.state : SUCCESS } - - match: { snapshot.include_global_state: true } - - match: { snapshot.shards.failed : 0 } - - - do: - snapshot.status: - repository: repository_ecs - snapshot: snapshot-one - - - is_true: snapshots - - match: { snapshots.0.snapshot: snapshot-one } - - match: { snapshots.0.state : SUCCESS } - - # Index more documents - - do: - bulk: - refresh: true - body: - - index: - _index: docs - _id: "4" - - snapshot: two - - index: - _index: docs - _id: "5" - - snapshot: two - - index: - _index: docs - _id: "6" - - snapshot: two - - index: - _index: docs - _id: "7" - - snapshot: two - - - do: - count: - index: docs - - - match: {count: 7} - - # Create a second snapshot - - do: - snapshot.create: - repository: repository_ecs - snapshot: snapshot-two - wait_for_completion: true - - - match: { snapshot.snapshot: snapshot-two } - - match: { snapshot.state : SUCCESS } - - match: { snapshot.shards.failed : 0 } - - - do: - snapshot.get: - repository: repository_ecs - snapshot: snapshot-one,snapshot-two - - - is_true: snapshots - - match: { snapshots.0.state : SUCCESS } - - match: { snapshots.1.state : SUCCESS } - - # Delete the index - - do: - indices.delete: - index: docs - - # Restore the second snapshot - - do: - snapshot.restore: - repository: repository_ecs - snapshot: snapshot-two - wait_for_completion: true - - - do: - count: - index: docs - - - match: {count: 7} - - # Delete the index again - - do: - indices.delete: - index: docs - - # Restore the first snapshot - - do: - snapshot.restore: - repository: repository_ecs - snapshot: snapshot-one - wait_for_completion: true - - - do: - count: - index: docs - - - match: {count: 3} - - # Remove the snapshots - - do: - snapshot.delete: - repository: repository_ecs - snapshot: snapshot-two - - - do: - snapshot.delete: - repository: repository_ecs - snapshot: snapshot-one - ---- -"Register a repository with a non existing bucket": - - - do: - catch: /repository_verification_exception/ - snapshot.create_repository: - repository: repository_ecs - body: - type: s3 - settings: - bucket: zHHkfSqlbnBsbpSgvCYtxrEfFLqghXtyPvvvKPNBnRCicNHQLE - client: integration_test_ecs - ---- -"Register a repository with a non existing client": - - - do: - catch: /illegal_argument_exception/ - snapshot.create_repository: - repository: repository_ecs - body: - type: s3 - settings: - bucket: repository_ecs - client: unknown - ---- -"Register a read-only repository with a non existing bucket": - -- do: - catch: /repository_verification_exception/ - snapshot.create_repository: - repository: repository_ecs - body: - type: s3 - settings: - readonly: true - bucket: zHHkfSqlbnBsbpSgvCYtxrEfFLqghXtyPvvvKPNBnRCicNHQLE - client: integration_test_ecs - ---- -"Register a read-only repository with a non existing client": - -- do: - catch: /illegal_argument_exception/ - snapshot.create_repository: - repository: repository_ecs - body: - type: s3 - settings: - readonly: true - bucket: repository_ecs - client: unknown - ---- -"Get a non existing snapshot": - - - do: - catch: /snapshot_missing_exception/ - snapshot.get: - repository: repository_ecs - snapshot: missing - ---- -"Delete a non existing snapshot": - - - do: - catch: /snapshot_missing_exception/ - snapshot.delete: - repository: repository_ecs - snapshot: missing - ---- -"Restore a non existing snapshot": - - - do: - catch: /snapshot_restore_exception/ - snapshot.restore: - repository: repository_ecs - snapshot: missing - wait_for_completion: true - ---- -"Usage stats": - - requires: - cluster_features: - - repositories.supports_usage_stats - reason: requires this feature - - - do: - cluster.stats: {} - - - gte: { repositories.s3.count: 1 } - - gte: { repositories.s3.read_write: 1 } - ---- -teardown: - - # Remove our repository - - do: - snapshot.delete_repository: - repository: repository_ecs diff --git a/modules/repository-s3/src/yamlRestTest/resources/rest-api-spec/test/repository_s3/60_repository_sts_credentials.yml b/modules/repository-s3/src/yamlRestTest/resources/rest-api-spec/test/repository_s3/60_repository_sts_credentials.yml deleted file mode 100644 index 09a8526017960..0000000000000 --- a/modules/repository-s3/src/yamlRestTest/resources/rest-api-spec/test/repository_s3/60_repository_sts_credentials.yml +++ /dev/null @@ -1,279 +0,0 @@ -# Integration tests for repository-s3 - ---- -setup: - - # Register repository with sts credentials - - do: - snapshot.create_repository: - repository: repository_sts - body: - type: s3 - settings: - bucket: @sts_bucket@ - client: integration_test_sts - base_path: "@sts_base_path@" - canned_acl: private - storage_class: standard - disable_chunked_encoding: @disable_chunked_encoding@ - ---- -"Snapshot and Restore repository-s3 using sts credentials": - - # Get repository - - do: - snapshot.get_repository: - repository: repository_sts - - - match: { repository_sts.settings.bucket: @sts_bucket@ } - - match: { repository_sts.settings.client: "integration_test_sts" } - - match: { repository_sts.settings.base_path: "@sts_base_path@" } - - match: { repository_sts.settings.canned_acl: "private" } - - match: { repository_sts.settings.storage_class: "standard" } - - is_false: repository_sts.settings.access_key - - is_false: repository_sts.settings.secret_key - - is_false: repository_sts.settings.session_token - - # Index documents - - do: - bulk: - refresh: true - body: - - index: - _index: docs - _id: 1 - - snapshot: one - - index: - _index: docs - _id: 2 - - snapshot: one - - index: - _index: docs - _id: 3 - - snapshot: one - - - do: - count: - index: docs - - - match: { count: 3 } - - # Create a first snapshot - - do: - snapshot.create: - repository: repository_sts - snapshot: snapshot-one - wait_for_completion: true - - - match: { snapshot.snapshot: snapshot-one } - - match: { snapshot.state: SUCCESS } - - match: { snapshot.include_global_state: true } - - match: { snapshot.shards.failed: 0 } - - - do: - snapshot.status: - repository: repository_sts - snapshot: snapshot-one - - - is_true: snapshots - - match: { snapshots.0.snapshot: snapshot-one } - - match: { snapshots.0.state: SUCCESS } - - # Index more documents - - do: - bulk: - refresh: true - body: - - index: - _index: docs - _id: 4 - - snapshot: two - - index: - _index: docs - _id: 5 - - snapshot: two - - index: - _index: docs - _id: 6 - - snapshot: two - - index: - _index: docs - _id: 7 - - snapshot: two - - - do: - count: - index: docs - - - match: { count: 7 } - - # Create a second snapshot - - do: - snapshot.create: - repository: repository_sts - snapshot: snapshot-two - wait_for_completion: true - - - match: { snapshot.snapshot: snapshot-two } - - match: { snapshot.state: SUCCESS } - - match: { snapshot.shards.failed: 0 } - - - do: - snapshot.get: - repository: repository_sts - snapshot: snapshot-one,snapshot-two - - - is_true: snapshots - - match: { snapshots.0.state: SUCCESS } - - match: { snapshots.1.state: SUCCESS } - - # Delete the index - - do: - indices.delete: - index: docs - - # Restore the second snapshot - - do: - snapshot.restore: - repository: repository_sts - snapshot: snapshot-two - wait_for_completion: true - - - do: - count: - index: docs - - - match: { count: 7 } - - # Delete the index again - - do: - indices.delete: - index: docs - - # Restore the first snapshot - - do: - snapshot.restore: - repository: repository_sts - snapshot: snapshot-one - wait_for_completion: true - - - do: - count: - index: docs - - - match: { count: 3 } - - # Remove the snapshots - - do: - snapshot.delete: - repository: repository_sts - snapshot: snapshot-two - - - do: - snapshot.delete: - repository: repository_sts - snapshot: snapshot-one - ---- - -"Register a repository with a non existing bucket": - - - do: - catch: /repository_verification_exception/ - snapshot.create_repository: - repository: repository_sts - body: - type: s3 - settings: - bucket: zHHkfSqlbnBsbpSgvCYtxrEfFLqghXtyPvvvKPNBnRCicNHQLE - client: integration_test_sts - ---- -"Register a repository with a non existing client": - - - do: - catch: /illegal_argument_exception/ - snapshot.create_repository: - repository: repository_sts - body: - type: s3 - settings: - bucket: repository_sts - client: unknown - ---- -"Register a read-only repository with a non existing bucket": - - - do: - catch: /repository_verification_exception/ - snapshot.create_repository: - repository: repository_sts - body: - type: s3 - settings: - readonly: true - bucket: zHHkfSqlbnBsbpSgvCYtxrEfFLqghXtyPvvvKPNBnRCicNHQLE - client: integration_test_sts - ---- -"Register a read-only repository with a non existing client": - - - do: - catch: /illegal_argument_exception/ - snapshot.create_repository: - repository: repository_sts - body: - type: s3 - settings: - readonly: true - bucket: repository_sts - client: unknown - ---- -"Get a non existing snapshot": - - - do: - catch: /snapshot_missing_exception/ - snapshot.get: - repository: repository_sts - snapshot: missing - ---- -"Delete a non existing snapshot": - - - do: - catch: /snapshot_missing_exception/ - snapshot.delete: - repository: repository_sts - snapshot: missing - ---- -"Restore a non existing snapshot": - - - do: - catch: /snapshot_restore_exception/ - snapshot.restore: - repository: repository_sts - snapshot: missing - wait_for_completion: true - ---- -"Usage stats": - - requires: - cluster_features: - - repositories.supports_usage_stats - reason: requires this feature - - - do: - cluster.stats: {} - - - gte: { repositories.s3.count: 1 } - - gte: { repositories.s3.read_write: 1 } - ---- -teardown: - - # Remove our repository - - do: - snapshot.delete_repository: - repository: repository_sts diff --git a/test/fixtures/minio-fixture/src/main/java/org/elasticsearch/test/fixtures/minio/MinioTestContainer.java b/test/fixtures/minio-fixture/src/main/java/org/elasticsearch/test/fixtures/minio/MinioTestContainer.java index 285bbb91983cc..3ee18d71a5a79 100644 --- a/test/fixtures/minio-fixture/src/main/java/org/elasticsearch/test/fixtures/minio/MinioTestContainer.java +++ b/test/fixtures/minio-fixture/src/main/java/org/elasticsearch/test/fixtures/minio/MinioTestContainer.java @@ -18,17 +18,13 @@ public final class MinioTestContainer extends DockerEnvironmentAwareTestContaine public static final String DOCKER_BASE_IMAGE = "minio/minio:RELEASE.2021-03-01T04-20-55Z"; private final boolean enabled; - public MinioTestContainer() { - this(true); - } - - public MinioTestContainer(boolean enabled) { + public MinioTestContainer(boolean enabled, String accessKey, String secretKey, String bucketName) { super( new ImageFromDockerfile("es-minio-testfixture").withDockerfileFromBuilder( builder -> builder.from(DOCKER_BASE_IMAGE) - .env("MINIO_ACCESS_KEY", "s3_test_access_key") - .env("MINIO_SECRET_KEY", "s3_test_secret_key") - .run("mkdir -p /minio/data/bucket") + .env("MINIO_ACCESS_KEY", accessKey) + .env("MINIO_SECRET_KEY", secretKey) + .run("mkdir -p /minio/data/" + bucketName) .cmd("server", "/minio/data") .build() ) diff --git a/test/fixtures/s3-fixture/src/main/java/fixture/s3/S3HttpFixture.java b/test/fixtures/s3-fixture/src/main/java/fixture/s3/S3HttpFixture.java index 36f8fedcb3335..ab70f043043cc 100644 --- a/test/fixtures/s3-fixture/src/main/java/fixture/s3/S3HttpFixture.java +++ b/test/fixtures/s3-fixture/src/main/java/fixture/s3/S3HttpFixture.java @@ -33,10 +33,6 @@ public class S3HttpFixture extends ExternalResource { private final String basePath; private final BiPredicate authorizationPredicate; - public S3HttpFixture() { - this(true); - } - public S3HttpFixture(boolean enabled) { this(enabled, "bucket", "base_path_integration_tests", fixedAccessKey("s3_test_access_key")); } diff --git a/test/test-clusters/src/main/java/org/elasticsearch/test/cluster/local/AbstractLocalClusterFactory.java b/test/test-clusters/src/main/java/org/elasticsearch/test/cluster/local/AbstractLocalClusterFactory.java index 717cf96ad6a92..2dac2ee232aa5 100644 --- a/test/test-clusters/src/main/java/org/elasticsearch/test/cluster/local/AbstractLocalClusterFactory.java +++ b/test/test-clusters/src/main/java/org/elasticsearch/test/cluster/local/AbstractLocalClusterFactory.java @@ -49,6 +49,7 @@ import java.util.List; import java.util.Locale; import java.util.Map; +import java.util.Objects; import java.util.Properties; import java.util.Set; import java.util.UUID; @@ -473,6 +474,7 @@ private void createKeystore() { private void addKeystoreSettings() { spec.resolveKeystore().forEach((key, value) -> { + Objects.requireNonNull(value, "keystore setting for '" + key + "' may not be null"); String input = spec.getKeystorePassword() == null || spec.getKeystorePassword().isEmpty() ? value : spec.getKeystorePassword() + "\n" + value; diff --git a/x-pack/plugin/searchable-snapshots/qa/minio/src/javaRestTest/java/org/elasticsearch/xpack/searchablesnapshots/minio/MinioSearchableSnapshotsIT.java b/x-pack/plugin/searchable-snapshots/qa/minio/src/javaRestTest/java/org/elasticsearch/xpack/searchablesnapshots/minio/MinioSearchableSnapshotsIT.java index 5c2b19fe75a07..53f1a9a88e10e 100644 --- a/x-pack/plugin/searchable-snapshots/qa/minio/src/javaRestTest/java/org/elasticsearch/xpack/searchablesnapshots/minio/MinioSearchableSnapshotsIT.java +++ b/x-pack/plugin/searchable-snapshots/qa/minio/src/javaRestTest/java/org/elasticsearch/xpack/searchablesnapshots/minio/MinioSearchableSnapshotsIT.java @@ -21,7 +21,12 @@ @ThreadLeakFilters(filters = { TestContainersThreadFilter.class }) public class MinioSearchableSnapshotsIT extends AbstractSearchableSnapshotsRestTestCase { - public static final MinioTestContainer minioFixture = new MinioTestContainer(); + public static final MinioTestContainer minioFixture = new MinioTestContainer( + true, + "s3_test_access_key", + "s3_test_secret_key", + "bucket" + ); public static ElasticsearchCluster cluster = ElasticsearchCluster.local() .distribution(DistributionType.DEFAULT) diff --git a/x-pack/plugin/snapshot-repo-test-kit/qa/minio/src/javaRestTest/java/org/elasticsearch/repositories/blobstore/testkit/analyze/MinioRepositoryAnalysisRestIT.java b/x-pack/plugin/snapshot-repo-test-kit/qa/minio/src/javaRestTest/java/org/elasticsearch/repositories/blobstore/testkit/analyze/MinioRepositoryAnalysisRestIT.java index b0068bd7bfdaf..3b5edaf768057 100644 --- a/x-pack/plugin/snapshot-repo-test-kit/qa/minio/src/javaRestTest/java/org/elasticsearch/repositories/blobstore/testkit/analyze/MinioRepositoryAnalysisRestIT.java +++ b/x-pack/plugin/snapshot-repo-test-kit/qa/minio/src/javaRestTest/java/org/elasticsearch/repositories/blobstore/testkit/analyze/MinioRepositoryAnalysisRestIT.java @@ -20,7 +20,12 @@ @ThreadLeakFilters(filters = { TestContainersThreadFilter.class }) public class MinioRepositoryAnalysisRestIT extends AbstractRepositoryAnalysisRestTestCase { - public static final MinioTestContainer minioFixture = new MinioTestContainer(); + public static final MinioTestContainer minioFixture = new MinioTestContainer( + true, + "s3_test_access_key", + "s3_test_secret_key", + "bucket" + ); public static ElasticsearchCluster cluster = ElasticsearchCluster.local() .distribution(DistributionType.DEFAULT) From d729558529cafc80d705296328140b45830aa974 Mon Sep 17 00:00:00 2001 From: Jan Kuipers <148754765+jan-elastic@users.noreply.github.com> Date: Fri, 29 Nov 2024 11:00:54 +0100 Subject: [PATCH 075/139] Correct categorization analyzer in ES|QL categorize (#117695) * Correct categorization analyzer in ES|QL categorize * close categorizer if constructing analyzer fails * Rename capability CATEGORIZE_V4 * add comments --- x-pack/plugin/esql/compute/build.gradle | 4 +- .../compute/src/main/java/module-info.java | 1 + .../aggregation/blockhash/BlockHash.java | 10 +- .../blockhash/CategorizeRawBlockHash.java | 34 ++--- .../operator/HashAggregationOperator.java | 6 +- .../GroupingAggregatorFunctionTestCase.java | 4 +- .../blockhash/CategorizeBlockHashTests.java | 76 +++++++---- .../HashAggregationOperatorTests.java | 3 +- .../src/main/resources/categorize.csv-spec | 123 ++++++++++-------- .../xpack/esql/action/EsqlCapabilities.java | 2 +- .../AbstractPhysicalOperationProviders.java | 9 +- .../planner/EsPhysicalOperationProviders.java | 4 +- .../xpack/esql/plugin/ComputeService.java | 2 +- .../xpack/esql/analysis/VerifierTests.java | 6 +- .../optimizer/LogicalPlanOptimizerTests.java | 4 +- .../planner/LocalExecutionPlannerTests.java | 4 +- .../TestPhysicalOperationProviders.java | 20 ++- 17 files changed, 199 insertions(+), 113 deletions(-) diff --git a/x-pack/plugin/esql/compute/build.gradle b/x-pack/plugin/esql/compute/build.gradle index 609c778df5929..8e866cec3f421 100644 --- a/x-pack/plugin/esql/compute/build.gradle +++ b/x-pack/plugin/esql/compute/build.gradle @@ -11,11 +11,13 @@ base { dependencies { compileOnly project(':server') compileOnly project('ann') + compileOnly project(xpackModule('core')) compileOnly project(xpackModule('ml')) annotationProcessor project('gen') implementation 'com.carrotsearch:hppc:0.8.1' - testImplementation project(':test:framework') + testImplementation(project(':modules:analysis-common')) + testImplementation(project(':test:framework')) testImplementation(project(xpackModule('esql-core'))) testImplementation(project(xpackModule('core'))) testImplementation(project(xpackModule('ml'))) diff --git a/x-pack/plugin/esql/compute/src/main/java/module-info.java b/x-pack/plugin/esql/compute/src/main/java/module-info.java index 573d9e048a4d4..1b3253694b298 100644 --- a/x-pack/plugin/esql/compute/src/main/java/module-info.java +++ b/x-pack/plugin/esql/compute/src/main/java/module-info.java @@ -19,6 +19,7 @@ requires org.elasticsearch.ml; requires org.elasticsearch.tdigest; requires org.elasticsearch.geo; + requires org.elasticsearch.xcore; requires hppc; exports org.elasticsearch.compute; diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/BlockHash.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/BlockHash.java index ef0f3ceb112c4..ea76c3bd0a0aa 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/BlockHash.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/BlockHash.java @@ -25,6 +25,7 @@ import org.elasticsearch.compute.data.Page; import org.elasticsearch.core.Releasable; import org.elasticsearch.core.ReleasableIterator; +import org.elasticsearch.index.analysis.AnalysisRegistry; import java.util.Iterator; import java.util.List; @@ -169,14 +170,19 @@ public static BlockHash buildPackedValuesBlockHash(List groups, Block /** * Builds a BlockHash for the Categorize grouping function. */ - public static BlockHash buildCategorizeBlockHash(List groups, AggregatorMode aggregatorMode, BlockFactory blockFactory) { + public static BlockHash buildCategorizeBlockHash( + List groups, + AggregatorMode aggregatorMode, + BlockFactory blockFactory, + AnalysisRegistry analysisRegistry + ) { if (groups.size() != 1) { throw new IllegalArgumentException("only a single CATEGORIZE group can used"); } return aggregatorMode.isInputPartial() ? new CategorizedIntermediateBlockHash(groups.get(0).channel, blockFactory, aggregatorMode.isOutputPartial()) - : new CategorizeRawBlockHash(groups.get(0).channel, blockFactory, aggregatorMode.isOutputPartial()); + : new CategorizeRawBlockHash(groups.get(0).channel, blockFactory, aggregatorMode.isOutputPartial(), analysisRegistry); } /** diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/CategorizeRawBlockHash.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/CategorizeRawBlockHash.java index 0d0a2fef2f82b..47dd7f650dffa 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/CategorizeRawBlockHash.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/CategorizeRawBlockHash.java @@ -7,7 +7,6 @@ package org.elasticsearch.compute.aggregation.blockhash; -import org.apache.lucene.analysis.core.WhitespaceTokenizer; import org.apache.lucene.util.BytesRef; import org.elasticsearch.compute.aggregation.GroupingAggregatorFunction; import org.elasticsearch.compute.data.Block; @@ -19,13 +18,14 @@ import org.elasticsearch.compute.data.Page; import org.elasticsearch.core.Releasable; import org.elasticsearch.core.Releasables; -import org.elasticsearch.index.analysis.CharFilterFactory; -import org.elasticsearch.index.analysis.CustomAnalyzer; -import org.elasticsearch.index.analysis.TokenFilterFactory; -import org.elasticsearch.index.analysis.TokenizerFactory; +import org.elasticsearch.index.analysis.AnalysisRegistry; +import org.elasticsearch.xpack.core.ml.job.config.CategorizationAnalyzerConfig; import org.elasticsearch.xpack.ml.aggs.categorization.TokenListCategorizer; import org.elasticsearch.xpack.ml.job.categorization.CategorizationAnalyzer; +import java.io.IOException; +import java.util.List; + /** * BlockHash implementation for {@code Categorize} grouping function. *

@@ -33,19 +33,23 @@ *

*/ public class CategorizeRawBlockHash extends AbstractCategorizeBlockHash { + private static final CategorizationAnalyzerConfig ANALYZER_CONFIG = CategorizationAnalyzerConfig.buildStandardCategorizationAnalyzer( + List.of() + ); + private final CategorizeEvaluator evaluator; - CategorizeRawBlockHash(int channel, BlockFactory blockFactory, boolean outputPartial) { + CategorizeRawBlockHash(int channel, BlockFactory blockFactory, boolean outputPartial, AnalysisRegistry analysisRegistry) { super(blockFactory, channel, outputPartial); - CategorizationAnalyzer analyzer = new CategorizationAnalyzer( - // TODO: should be the same analyzer as used in Production - new CustomAnalyzer( - TokenizerFactory.newFactory("whitespace", WhitespaceTokenizer::new), - new CharFilterFactory[0], - new TokenFilterFactory[0] - ), - true - ); + + CategorizationAnalyzer analyzer; + try { + analyzer = new CategorizationAnalyzer(analysisRegistry, ANALYZER_CONFIG); + } catch (IOException e) { + categorizer.close(); + throw new RuntimeException(e); + } + this.evaluator = new CategorizeEvaluator(analyzer, categorizer, blockFactory); } diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/HashAggregationOperator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/HashAggregationOperator.java index a69e8ca767014..6f8386ec08de1 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/HashAggregationOperator.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/HashAggregationOperator.java @@ -24,6 +24,7 @@ import org.elasticsearch.compute.data.Page; import org.elasticsearch.core.Releasables; import org.elasticsearch.core.TimeValue; +import org.elasticsearch.index.analysis.AnalysisRegistry; import org.elasticsearch.xcontent.XContentBuilder; import java.io.IOException; @@ -42,14 +43,15 @@ public record HashAggregationOperatorFactory( List groups, AggregatorMode aggregatorMode, List aggregators, - int maxPageSize + int maxPageSize, + AnalysisRegistry analysisRegistry ) implements OperatorFactory { @Override public Operator get(DriverContext driverContext) { if (groups.stream().anyMatch(BlockHash.GroupSpec::isCategorize)) { return new HashAggregationOperator( aggregators, - () -> BlockHash.buildCategorizeBlockHash(groups, aggregatorMode, driverContext.blockFactory()), + () -> BlockHash.buildCategorizeBlockHash(groups, aggregatorMode, driverContext.blockFactory(), analysisRegistry), driverContext ); } diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/GroupingAggregatorFunctionTestCase.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/GroupingAggregatorFunctionTestCase.java index 1e97bdf5a2e79..58925a5ca36fc 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/GroupingAggregatorFunctionTestCase.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/GroupingAggregatorFunctionTestCase.java @@ -54,7 +54,6 @@ import static org.elasticsearch.compute.data.BlockTestUtils.append; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.hasSize; -import static org.hamcrest.Matchers.in; /** * Shared tests for testing grouped aggregations. @@ -107,7 +106,8 @@ private Operator.OperatorFactory simpleWithMode( List.of(new BlockHash.GroupSpec(0, ElementType.LONG)), mode, List.of(supplier.groupingAggregatorFactory(mode)), - randomPageSize() + randomPageSize(), + null ); } diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/blockhash/CategorizeBlockHashTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/blockhash/CategorizeBlockHashTests.java index dd7a87dc4a574..8a3c723557151 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/blockhash/CategorizeBlockHashTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/blockhash/CategorizeBlockHashTests.java @@ -8,8 +8,10 @@ package org.elasticsearch.compute.aggregation.blockhash; import org.apache.lucene.util.BytesRef; +import org.elasticsearch.analysis.common.CommonAnalysisPlugin; import org.elasticsearch.common.breaker.CircuitBreaker; import org.elasticsearch.common.collect.Iterators; +import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.common.util.BigArrays; import org.elasticsearch.common.util.MockBigArrays; @@ -35,7 +37,15 @@ import org.elasticsearch.compute.operator.LocalSourceOperator; import org.elasticsearch.compute.operator.PageConsumerOperator; import org.elasticsearch.core.Releasables; - +import org.elasticsearch.env.Environment; +import org.elasticsearch.env.TestEnvironment; +import org.elasticsearch.index.analysis.AnalysisRegistry; +import org.elasticsearch.indices.analysis.AnalysisModule; +import org.elasticsearch.plugins.scanners.StablePluginsRegistry; +import org.elasticsearch.xpack.ml.MachineLearning; +import org.junit.Before; + +import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; @@ -50,6 +60,19 @@ public class CategorizeBlockHashTests extends BlockHashTestCase { + private AnalysisRegistry analysisRegistry; + + @Before + private void initAnalysisRegistry() throws IOException { + analysisRegistry = new AnalysisModule( + TestEnvironment.newEnvironment( + Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).build() + ), + List.of(new MachineLearning(Settings.EMPTY), new CommonAnalysisPlugin()), + new StablePluginsRegistry() + ).getAnalysisRegistry(); + } + public void testCategorizeRaw() { final Page page; boolean withNull = randomBoolean(); @@ -72,7 +95,7 @@ public void testCategorizeRaw() { page = new Page(builder.build()); } - try (BlockHash hash = new CategorizeRawBlockHash(0, blockFactory, true)) { + try (BlockHash hash = new CategorizeRawBlockHash(0, blockFactory, true, analysisRegistry)) { hash.add(page, new GroupingAggregatorFunction.AddInput() { @Override public void add(int positionOffset, IntBlock groupIds) { @@ -145,8 +168,8 @@ public void testCategorizeIntermediate() { // Fill intermediatePages with the intermediate state from the raw hashes try ( - BlockHash rawHash1 = new CategorizeRawBlockHash(0, blockFactory, true); - BlockHash rawHash2 = new CategorizeRawBlockHash(0, blockFactory, true) + BlockHash rawHash1 = new CategorizeRawBlockHash(0, blockFactory, true, analysisRegistry); + BlockHash rawHash2 = new CategorizeRawBlockHash(0, blockFactory, true, analysisRegistry); ) { rawHash1.add(page1, new GroupingAggregatorFunction.AddInput() { @Override @@ -267,14 +290,16 @@ public void testCategorize_withDriver() { BytesRefVector.Builder textsBuilder = driverContext.blockFactory().newBytesRefVectorBuilder(10); LongVector.Builder countsBuilder = driverContext.blockFactory().newLongVectorBuilder(10) ) { - textsBuilder.appendBytesRef(new BytesRef("a")); - textsBuilder.appendBytesRef(new BytesRef("b")); + // Note that just using "a" or "aaa" doesn't work, because the ml_standard + // tokenizer drops numbers, including hexadecimal ones. + textsBuilder.appendBytesRef(new BytesRef("aaazz")); + textsBuilder.appendBytesRef(new BytesRef("bbbzz")); textsBuilder.appendBytesRef(new BytesRef("words words words goodbye jan")); textsBuilder.appendBytesRef(new BytesRef("words words words goodbye nik")); textsBuilder.appendBytesRef(new BytesRef("words words words goodbye tom")); textsBuilder.appendBytesRef(new BytesRef("words words words hello jan")); - textsBuilder.appendBytesRef(new BytesRef("c")); - textsBuilder.appendBytesRef(new BytesRef("d")); + textsBuilder.appendBytesRef(new BytesRef("ccczz")); + textsBuilder.appendBytesRef(new BytesRef("dddzz")); countsBuilder.appendLong(1); countsBuilder.appendLong(2); countsBuilder.appendLong(800); @@ -293,10 +318,10 @@ public void testCategorize_withDriver() { ) { textsBuilder.appendBytesRef(new BytesRef("words words words hello nik")); textsBuilder.appendBytesRef(new BytesRef("words words words hello nik")); - textsBuilder.appendBytesRef(new BytesRef("c")); + textsBuilder.appendBytesRef(new BytesRef("ccczz")); textsBuilder.appendBytesRef(new BytesRef("words words words goodbye chris")); - textsBuilder.appendBytesRef(new BytesRef("d")); - textsBuilder.appendBytesRef(new BytesRef("e")); + textsBuilder.appendBytesRef(new BytesRef("dddzz")); + textsBuilder.appendBytesRef(new BytesRef("eeezz")); countsBuilder.appendLong(9); countsBuilder.appendLong(90); countsBuilder.appendLong(3); @@ -320,7 +345,8 @@ public void testCategorize_withDriver() { new SumLongAggregatorFunctionSupplier(List.of(1)).groupingAggregatorFactory(AggregatorMode.INITIAL), new MaxLongAggregatorFunctionSupplier(List.of(1)).groupingAggregatorFactory(AggregatorMode.INITIAL) ), - 16 * 1024 + 16 * 1024, + analysisRegistry ).get(driverContext) ), new PageConsumerOperator(intermediateOutput::add), @@ -339,7 +365,8 @@ public void testCategorize_withDriver() { new SumLongAggregatorFunctionSupplier(List.of(1)).groupingAggregatorFactory(AggregatorMode.INITIAL), new MaxLongAggregatorFunctionSupplier(List.of(1)).groupingAggregatorFactory(AggregatorMode.INITIAL) ), - 16 * 1024 + 16 * 1024, + analysisRegistry ).get(driverContext) ), new PageConsumerOperator(intermediateOutput::add), @@ -360,7 +387,8 @@ public void testCategorize_withDriver() { new SumLongAggregatorFunctionSupplier(List.of(1, 2)).groupingAggregatorFactory(AggregatorMode.FINAL), new MaxLongAggregatorFunctionSupplier(List.of(3, 4)).groupingAggregatorFactory(AggregatorMode.FINAL) ), - 16 * 1024 + 16 * 1024, + analysisRegistry ).get(driverContext) ), new PageConsumerOperator(finalOutput::add), @@ -385,15 +413,15 @@ public void testCategorize_withDriver() { sums, equalTo( Map.of( - ".*?a.*?", + ".*?aaazz.*?", 1L, - ".*?b.*?", + ".*?bbbzz.*?", 2L, - ".*?c.*?", + ".*?ccczz.*?", 33L, - ".*?d.*?", + ".*?dddzz.*?", 44L, - ".*?e.*?", + ".*?eeezz.*?", 5L, ".*?words.+?words.+?words.+?goodbye.*?", 8888L, @@ -406,15 +434,15 @@ public void testCategorize_withDriver() { maxs, equalTo( Map.of( - ".*?a.*?", + ".*?aaazz.*?", 1L, - ".*?b.*?", + ".*?bbbzz.*?", 2L, - ".*?c.*?", + ".*?ccczz.*?", 30L, - ".*?d.*?", + ".*?dddzz.*?", 40L, - ".*?e.*?", + ".*?eeezz.*?", 5L, ".*?words.+?words.+?words.+?goodbye.*?", 8000L, diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/HashAggregationOperatorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/HashAggregationOperatorTests.java index b2f4ad594936e..953c7d1c313f1 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/HashAggregationOperatorTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/HashAggregationOperatorTests.java @@ -59,7 +59,8 @@ protected Operator.OperatorFactory simpleWithMode(AggregatorMode mode) { new SumLongAggregatorFunctionSupplier(sumChannels).groupingAggregatorFactory(mode), new MaxLongAggregatorFunctionSupplier(maxChannels).groupingAggregatorFactory(mode) ), - randomPageSize() + randomPageSize(), + null ); } diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/categorize.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/categorize.csv-spec index 547c430ed7518..e45b10d1aa122 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/categorize.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/categorize.csv-spec @@ -1,5 +1,5 @@ standard aggs -required_capability: categorize_v3 +required_capability: categorize_v4 FROM sample_data | STATS count=COUNT(), @@ -17,7 +17,7 @@ count:long | sum:long | avg:double | count_distinct:long | category:keyw ; values aggs -required_capability: categorize_v3 +required_capability: categorize_v4 FROM sample_data | STATS values=MV_SORT(VALUES(message)), @@ -33,7 +33,7 @@ values:keyword | top ; mv -required_capability: categorize_v3 +required_capability: categorize_v4 FROM mv_sample_data | STATS COUNT(), SUM(event_duration) BY category=CATEGORIZE(message) @@ -48,7 +48,7 @@ COUNT():long | SUM(event_duration):long | category:keyword ; row mv -required_capability: categorize_v3 +required_capability: categorize_v4 ROW message = ["connected to a", "connected to b", "disconnected"], str = ["a", "b", "c"] | STATS COUNT(), VALUES(str) BY category=CATEGORIZE(message) @@ -60,8 +60,20 @@ COUNT():long | VALUES(str):keyword | category:keyword 1 | [a, b, c] | .*?disconnected.*? ; +skips stopwords +required_capability: categorize_v4 + +ROW message = ["Mon Tue connected to a", "Jul Aug connected to b September ", "UTC connected GMT to c UTC"] + | STATS COUNT() BY category=CATEGORIZE(message) + | SORT category +; + +COUNT():long | category:keyword + 3 | .*?connected.+?to.*? +; + with multiple indices -required_capability: categorize_v3 +required_capability: categorize_v4 required_capability: union_types FROM sample_data* @@ -76,7 +88,7 @@ COUNT():long | category:keyword ; mv with many values -required_capability: categorize_v3 +required_capability: categorize_v4 FROM employees | STATS COUNT() BY category=CATEGORIZE(job_positions) @@ -93,7 +105,7 @@ COUNT():long | category:keyword ; mv with many values and SUM -required_capability: categorize_v3 +required_capability: categorize_v4 FROM employees | STATS SUM(languages) BY category=CATEGORIZE(job_positions) @@ -108,7 +120,7 @@ SUM(languages):long | category:keyword ; mv with many values and nulls and SUM -required_capability: categorize_v3 +required_capability: categorize_v4 FROM employees | STATS SUM(languages) BY category=CATEGORIZE(job_positions) @@ -122,7 +134,7 @@ SUM(languages):long | category:keyword ; mv via eval -required_capability: categorize_v3 +required_capability: categorize_v4 FROM sample_data | EVAL message = MV_APPEND(message, "Banana") @@ -138,7 +150,7 @@ COUNT():long | category:keyword ; mv via eval const -required_capability: categorize_v3 +required_capability: categorize_v4 FROM sample_data | EVAL message = ["Banana", "Bread"] @@ -152,7 +164,7 @@ COUNT():long | category:keyword ; mv via eval const without aliases -required_capability: categorize_v3 +required_capability: categorize_v4 FROM sample_data | EVAL message = ["Banana", "Bread"] @@ -166,7 +178,7 @@ COUNT():long | CATEGORIZE(message):keyword ; mv const in parameter -required_capability: categorize_v3 +required_capability: categorize_v4 FROM sample_data | STATS COUNT() BY c = CATEGORIZE(["Banana", "Bread"]) @@ -179,7 +191,7 @@ COUNT():long | c:keyword ; agg alias shadowing -required_capability: categorize_v3 +required_capability: categorize_v4 FROM sample_data | STATS c = COUNT() BY c = CATEGORIZE(["Banana", "Bread"]) @@ -194,7 +206,7 @@ c:keyword ; chained aggregations using categorize -required_capability: categorize_v3 +required_capability: categorize_v4 FROM sample_data | STATS COUNT() BY category=CATEGORIZE(message) @@ -203,13 +215,13 @@ FROM sample_data ; COUNT():long | category:keyword - 1 | .*?\.\*\?Connected\.\+\?to\.\*\?.*? - 1 | .*?\.\*\?Connection\.\+\?error\.\*\?.*? - 1 | .*?\.\*\?Disconnected\.\*\?.*? + 1 | .*?Connected.+?to.*? + 1 | .*?Connection.+?error.*? + 1 | .*?Disconnected.*? ; stats without aggs -required_capability: categorize_v3 +required_capability: categorize_v4 FROM sample_data | STATS BY category=CATEGORIZE(message) @@ -223,7 +235,7 @@ category:keyword ; text field -required_capability: categorize_v3 +required_capability: categorize_v4 FROM hosts | STATS COUNT() BY category=CATEGORIZE(host_group) @@ -231,14 +243,17 @@ FROM hosts ; COUNT():long | category:keyword - 2 | .*?DB.+?servers.*? 2 | .*?Gateway.+?instances.*? 5 | .*?Kubernetes.+?cluster.*? + 2 | .*?servers.*? 1 | null + +// Note: DB is removed from "DB servers", because the ml_standard +// tokenizer drops numbers, including hexadecimal ones. ; on TO_UPPER -required_capability: categorize_v3 +required_capability: categorize_v4 FROM sample_data | STATS COUNT() BY category=CATEGORIZE(TO_UPPER(message)) @@ -252,7 +267,7 @@ COUNT():long | category:keyword ; on CONCAT -required_capability: categorize_v3 +required_capability: categorize_v4 FROM sample_data | STATS COUNT() BY category=CATEGORIZE(CONCAT(message, " banana")) @@ -266,7 +281,7 @@ COUNT():long | category:keyword ; on CONCAT with unicode -required_capability: categorize_v3 +required_capability: categorize_v4 FROM sample_data | STATS COUNT() BY category=CATEGORIZE(CONCAT(message, " 👍🏽😊")) @@ -274,13 +289,13 @@ FROM sample_data ; COUNT():long | category:keyword - 3 | .*?Connected.+?to.+?👍🏽😊.*? - 3 | .*?Connection.+?error.+?👍🏽😊.*? - 1 | .*?Disconnected.+?👍🏽😊.*? + 3 | .*?Connected.+?to.*? + 3 | .*?Connection.+?error.*? + 1 | .*?Disconnected.*? ; on REVERSE(CONCAT()) -required_capability: categorize_v3 +required_capability: categorize_v4 FROM sample_data | STATS COUNT() BY category=CATEGORIZE(REVERSE(CONCAT(message, " 👍🏽😊"))) @@ -288,13 +303,13 @@ FROM sample_data ; COUNT():long | category:keyword - 1 | .*?😊👍🏽.+?detcennocsiD.*? - 3 | .*?😊👍🏽.+?ot.+?detcennoC.*? - 3 | .*?😊👍🏽.+?rorre.+?noitcennoC.*? + 1 | .*?detcennocsiD.*? + 3 | .*?ot.+?detcennoC.*? + 3 | .*?rorre.+?noitcennoC.*? ; and then TO_LOWER -required_capability: categorize_v3 +required_capability: categorize_v4 FROM sample_data | STATS COUNT() BY category=CATEGORIZE(message) @@ -309,7 +324,7 @@ COUNT():long | category:keyword ; on const empty string -required_capability: categorize_v3 +required_capability: categorize_v4 FROM sample_data | STATS COUNT() BY category=CATEGORIZE("") @@ -321,7 +336,7 @@ COUNT():long | category:keyword ; on const empty string from eval -required_capability: categorize_v3 +required_capability: categorize_v4 FROM sample_data | EVAL x = "" @@ -334,7 +349,7 @@ COUNT():long | category:keyword ; on null -required_capability: categorize_v3 +required_capability: categorize_v4 FROM sample_data | EVAL x = null @@ -347,7 +362,7 @@ COUNT():long | SUM(event_duration):long | category:keyword ; on null string -required_capability: categorize_v3 +required_capability: categorize_v4 FROM sample_data | EVAL x = null::string @@ -360,7 +375,7 @@ COUNT():long | category:keyword ; filtering out all data -required_capability: categorize_v3 +required_capability: categorize_v4 FROM sample_data | WHERE @timestamp < "2023-10-23T00:00:00Z" @@ -372,7 +387,7 @@ COUNT():long | category:keyword ; filtering out all data with constant -required_capability: categorize_v3 +required_capability: categorize_v4 FROM sample_data | STATS COUNT() BY category=CATEGORIZE(message) @@ -383,7 +398,7 @@ COUNT():long | category:keyword ; drop output columns -required_capability: categorize_v3 +required_capability: categorize_v4 FROM sample_data | STATS count=COUNT() BY category=CATEGORIZE(message) @@ -398,7 +413,7 @@ x:integer ; category value processing -required_capability: categorize_v3 +required_capability: categorize_v4 ROW message = ["connected to a", "connected to b", "disconnected"] | STATS COUNT() BY category=CATEGORIZE(message) @@ -412,21 +427,21 @@ COUNT():long | category:keyword ; row aliases -required_capability: categorize_v3 +required_capability: categorize_v4 -ROW message = "connected to a" +ROW message = "connected to xyz" | EVAL x = message | STATS COUNT() BY category=CATEGORIZE(x) | EVAL y = category | SORT y ; -COUNT():long | category:keyword | y:keyword - 1 | .*?connected.+?to.+?a.*? | .*?connected.+?to.+?a.*? +COUNT():long | category:keyword | y:keyword + 1 | .*?connected.+?to.+?xyz.*? | .*?connected.+?to.+?xyz.*? ; from aliases -required_capability: categorize_v3 +required_capability: categorize_v4 FROM sample_data | EVAL x = message @@ -442,9 +457,9 @@ COUNT():long | category:keyword | y:keyword ; row aliases with keep -required_capability: categorize_v3 +required_capability: categorize_v4 -ROW message = "connected to a" +ROW message = "connected to xyz" | EVAL x = message | KEEP x | STATS COUNT() BY category=CATEGORIZE(x) @@ -454,11 +469,11 @@ ROW message = "connected to a" ; COUNT():long | y:keyword - 1 | .*?connected.+?to.+?a.*? + 1 | .*?connected.+?to.+?xyz.*? ; from aliases with keep -required_capability: categorize_v3 +required_capability: categorize_v4 FROM sample_data | EVAL x = message @@ -476,9 +491,9 @@ COUNT():long | y:keyword ; row rename -required_capability: categorize_v3 +required_capability: categorize_v4 -ROW message = "connected to a" +ROW message = "connected to xyz" | RENAME message as x | STATS COUNT() BY category=CATEGORIZE(x) | RENAME category as y @@ -486,11 +501,11 @@ ROW message = "connected to a" ; COUNT():long | y:keyword - 1 | .*?connected.+?to.+?a.*? + 1 | .*?connected.+?to.+?xyz.*? ; from rename -required_capability: categorize_v3 +required_capability: categorize_v4 FROM sample_data | RENAME message as x @@ -506,7 +521,7 @@ COUNT():long | y:keyword ; row drop -required_capability: categorize_v3 +required_capability: categorize_v4 ROW message = "connected to a" | STATS c = COUNT() BY category=CATEGORIZE(message) @@ -519,7 +534,7 @@ c:long ; from drop -required_capability: categorize_v3 +required_capability: categorize_v4 FROM sample_data | STATS c = COUNT() BY category=CATEGORIZE(message) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index 77a3e2840977f..373be23cdf847 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -403,7 +403,7 @@ public enum Cap { /** * Supported the text categorization function "CATEGORIZE". */ - CATEGORIZE_V3(Build.current().isSnapshot()), + CATEGORIZE_V4(Build.current().isSnapshot()), /** * QSTR function diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/AbstractPhysicalOperationProviders.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/AbstractPhysicalOperationProviders.java index a7418654f6b0e..69e2d1c45aa3c 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/AbstractPhysicalOperationProviders.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/AbstractPhysicalOperationProviders.java @@ -18,6 +18,7 @@ import org.elasticsearch.compute.operator.EvalOperator; import org.elasticsearch.compute.operator.HashAggregationOperator.HashAggregationOperatorFactory; import org.elasticsearch.compute.operator.Operator; +import org.elasticsearch.index.analysis.AnalysisRegistry; import org.elasticsearch.xpack.esql.EsqlIllegalArgumentException; import org.elasticsearch.xpack.esql.core.InvalidArgumentException; import org.elasticsearch.xpack.esql.core.expression.Alias; @@ -46,6 +47,11 @@ public abstract class AbstractPhysicalOperationProviders implements PhysicalOperationProviders { private final AggregateMapper aggregateMapper = new AggregateMapper(); + private final AnalysisRegistry analysisRegistry; + + AbstractPhysicalOperationProviders(AnalysisRegistry analysisRegistry) { + this.analysisRegistry = analysisRegistry; + } @Override public final PhysicalOperation groupingPhysicalOperation( @@ -173,7 +179,8 @@ else if (aggregatorMode.isOutputPartial()) { groupSpecs.stream().map(GroupSpec::toHashGroupSpec).toList(), aggregatorMode, aggregatorFactories, - context.pageSize(aggregateExec.estimatedRowSize()) + context.pageSize(aggregateExec.estimatedRowSize()), + analysisRegistry ); } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java index 15f5b6579098d..7bf7d0e2d08eb 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/EsPhysicalOperationProviders.java @@ -34,6 +34,7 @@ import org.elasticsearch.compute.operator.SourceOperator; import org.elasticsearch.index.IndexMode; import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.analysis.AnalysisRegistry; import org.elasticsearch.index.mapper.BlockLoader; import org.elasticsearch.index.mapper.FieldNamesFieldMapper; import org.elasticsearch.index.mapper.MappedFieldType; @@ -98,7 +99,8 @@ public interface ShardContext extends org.elasticsearch.compute.lucene.ShardCont private final List shardContexts; - public EsPhysicalOperationProviders(List shardContexts) { + public EsPhysicalOperationProviders(List shardContexts, AnalysisRegistry analysisRegistry) { + super(analysisRegistry); this.shardContexts = shardContexts; } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/ComputeService.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/ComputeService.java index 73266551f169c..b06dd3cdb64d3 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/ComputeService.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/ComputeService.java @@ -452,7 +452,7 @@ void runCompute(CancellableTask task, ComputeContext context, PhysicalPlan plan, context.exchangeSink(), enrichLookupService, lookupFromIndexService, - new EsPhysicalOperationProviders(contexts) + new EsPhysicalOperationProviders(contexts, searchService.getIndicesService().getAnalysis()) ); LOGGER.debug("Received physical plan:\n{}", plan); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java index dd14e8dd82123..d4fca2a0a2540 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java @@ -1846,7 +1846,7 @@ public void testIntervalAsString() { } public void testCategorizeSingleGrouping() { - assumeTrue("requires Categorize capability", EsqlCapabilities.Cap.CATEGORIZE_V3.isEnabled()); + assumeTrue("requires Categorize capability", EsqlCapabilities.Cap.CATEGORIZE_V4.isEnabled()); query("from test | STATS COUNT(*) BY CATEGORIZE(first_name)"); query("from test | STATS COUNT(*) BY cat = CATEGORIZE(first_name)"); @@ -1875,7 +1875,7 @@ public void testCategorizeSingleGrouping() { } public void testCategorizeNestedGrouping() { - assumeTrue("requires Categorize capability", EsqlCapabilities.Cap.CATEGORIZE_V3.isEnabled()); + assumeTrue("requires Categorize capability", EsqlCapabilities.Cap.CATEGORIZE_V4.isEnabled()); query("from test | STATS COUNT(*) BY CATEGORIZE(LENGTH(first_name)::string)"); @@ -1890,7 +1890,7 @@ public void testCategorizeNestedGrouping() { } public void testCategorizeWithinAggregations() { - assumeTrue("requires Categorize capability", EsqlCapabilities.Cap.CATEGORIZE_V3.isEnabled()); + assumeTrue("requires Categorize capability", EsqlCapabilities.Cap.CATEGORIZE_V4.isEnabled()); query("from test | STATS MV_COUNT(cat), COUNT(*) BY cat = CATEGORIZE(first_name)"); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java index e98f2b88b33c9..57d0c7432f97b 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java @@ -1212,7 +1212,7 @@ public void testCombineProjectionWithAggregationFirstAndAliasedGroupingUsedInAgg * \_EsRelation[test][_meta_field{f}#23, emp_no{f}#17, first_name{f}#18, ..] */ public void testCombineProjectionWithCategorizeGrouping() { - assumeTrue("requires Categorize capability", EsqlCapabilities.Cap.CATEGORIZE_V3.isEnabled()); + assumeTrue("requires Categorize capability", EsqlCapabilities.Cap.CATEGORIZE_V4.isEnabled()); var plan = plan(""" from test @@ -3949,7 +3949,7 @@ public void testNestedExpressionsInGroups() { * \_EsRelation[test][_meta_field{f}#14, emp_no{f}#8, first_name{f}#9, ge..] */ public void testNestedExpressionsInGroupsWithCategorize() { - assumeTrue("requires Categorize capability", EsqlCapabilities.Cap.CATEGORIZE_V3.isEnabled()); + assumeTrue("requires Categorize capability", EsqlCapabilities.Cap.CATEGORIZE_V4.isEnabled()); var plan = optimizedPlan(""" from test diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlannerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlannerTests.java index ff9e45a9f9233..5d8da21c6faad 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlannerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlannerTests.java @@ -156,7 +156,7 @@ private Configuration config() { randomZone(), randomLocale(random()), "test_user", - "test_cluser", + "test_cluster", pragmas, EsqlPlugin.QUERY_RESULT_TRUNCATION_MAX_SIZE.getDefault(null), EsqlPlugin.QUERY_RESULT_TRUNCATION_DEFAULT_SIZE.getDefault(null), @@ -187,7 +187,7 @@ private EsPhysicalOperationProviders esPhysicalOperationProviders() throws IOExc ); } releasables.add(searcher); - return new EsPhysicalOperationProviders(shardContexts); + return new EsPhysicalOperationProviders(shardContexts, null); } private IndexReader reader() { diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/planner/TestPhysicalOperationProviders.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/planner/TestPhysicalOperationProviders.java index c811643c8daea..e91fc6e49312d 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/planner/TestPhysicalOperationProviders.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/planner/TestPhysicalOperationProviders.java @@ -8,7 +8,9 @@ package org.elasticsearch.xpack.esql.planner; import org.apache.lucene.util.BytesRef; +import org.elasticsearch.analysis.common.CommonAnalysisPlugin; import org.elasticsearch.common.Randomness; +import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.util.BigArrays; import org.elasticsearch.compute.Describable; import org.elasticsearch.compute.aggregation.GroupingAggregator; @@ -28,7 +30,11 @@ import org.elasticsearch.compute.operator.OrdinalsGroupingOperator; import org.elasticsearch.compute.operator.SourceOperator; import org.elasticsearch.compute.operator.SourceOperator.SourceOperatorFactory; +import org.elasticsearch.env.Environment; +import org.elasticsearch.env.TestEnvironment; import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.indices.analysis.AnalysisModule; +import org.elasticsearch.plugins.scanners.StablePluginsRegistry; import org.elasticsearch.xpack.esql.EsqlIllegalArgumentException; import org.elasticsearch.xpack.esql.TestBlockFactory; import org.elasticsearch.xpack.esql.core.expression.Attribute; @@ -39,7 +45,9 @@ import org.elasticsearch.xpack.esql.plan.physical.FieldExtractExec; import org.elasticsearch.xpack.esql.planner.LocalExecutionPlanner.LocalExecutionPlannerContext; import org.elasticsearch.xpack.esql.planner.LocalExecutionPlanner.PhysicalOperation; +import org.elasticsearch.xpack.ml.MachineLearning; +import java.io.IOException; import java.util.List; import java.util.Random; import java.util.function.Function; @@ -48,6 +56,7 @@ import static com.carrotsearch.randomizedtesting.generators.RandomNumbers.randomIntBetween; import static java.util.stream.Collectors.joining; +import static org.apache.lucene.tests.util.LuceneTestCase.createTempDir; import static org.elasticsearch.index.mapper.MappedFieldType.FieldExtractPreference.DOC_VALUES; import static org.elasticsearch.index.mapper.MappedFieldType.FieldExtractPreference.NONE; @@ -56,7 +65,16 @@ public class TestPhysicalOperationProviders extends AbstractPhysicalOperationPro private final Page testData; private final List columnNames; - public TestPhysicalOperationProviders(Page testData, List columnNames) { + public TestPhysicalOperationProviders(Page testData, List columnNames) throws IOException { + super( + new AnalysisModule( + TestEnvironment.newEnvironment( + Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).build() + ), + List.of(new MachineLearning(Settings.EMPTY), new CommonAnalysisPlugin()), + new StablePluginsRegistry() + ).getAnalysisRegistry() + ); this.testData = testData; this.columnNames = columnNames; } From 2226d6cbfa434206826207da46e95969fc77776c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20B=C3=BCscher?= Date: Fri, 29 Nov 2024 11:24:28 +0100 Subject: [PATCH 076/139] Add _field_names disabling to archival index tests (#117703) Disabling the "_field_names" field in mappings was possible until 8.x and now issues a deprecation warning. We need to maintain the ability to read these mappings for archival indices so this change adds this case to one of the index mappings in tests and checks for the deprecation warning for it. --- .../test/java/org/elasticsearch/oldrepos/OldMappingsIT.java | 6 +++++- .../test/resources/org/elasticsearch/oldrepos/custom.json | 3 +++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/x-pack/qa/repository-old-versions/src/test/java/org/elasticsearch/oldrepos/OldMappingsIT.java b/x-pack/qa/repository-old-versions/src/test/java/org/elasticsearch/oldrepos/OldMappingsIT.java index 67dbdec6b8399..95bc92d4f185a 100644 --- a/x-pack/qa/repository-old-versions/src/test/java/org/elasticsearch/oldrepos/OldMappingsIT.java +++ b/x-pack/qa/repository-old-versions/src/test/java/org/elasticsearch/oldrepos/OldMappingsIT.java @@ -13,6 +13,7 @@ import org.elasticsearch.Version; import org.elasticsearch.client.Request; import org.elasticsearch.client.RequestOptions; +import org.elasticsearch.client.Response; import org.elasticsearch.client.ResponseException; import org.elasticsearch.client.RestClient; import org.elasticsearch.client.WarningsHandler; @@ -166,7 +167,10 @@ public void setupIndex() throws IOException { createRestoreRequest.addParameter("wait_for_completion", "true"); createRestoreRequest.setJsonEntity("{\"indices\":\"" + indices.stream().collect(Collectors.joining(",")) + "\"}"); createRestoreRequest.setOptions(RequestOptions.DEFAULT.toBuilder().setWarningsHandler(WarningsHandler.PERMISSIVE)); - assertOK(client().performRequest(createRestoreRequest)); + Response response = client().performRequest(createRestoreRequest); + // check deprecation warning for "_field_name" disabling + assertTrue(response.getWarnings().stream().filter(s -> s.contains("Disabling _field_names is not necessary")).count() > 0); + assertOK(response); } private Request createIndex(String indexName, String file) throws IOException { diff --git a/x-pack/qa/repository-old-versions/src/test/resources/org/elasticsearch/oldrepos/custom.json b/x-pack/qa/repository-old-versions/src/test/resources/org/elasticsearch/oldrepos/custom.json index ae52ccbcce330..ad1c6b0dc59ae 100644 --- a/x-pack/qa/repository-old-versions/src/test/resources/org/elasticsearch/oldrepos/custom.json +++ b/x-pack/qa/repository-old-versions/src/test/resources/org/elasticsearch/oldrepos/custom.json @@ -1,4 +1,7 @@ "_default_": { + "_field_names": { + "enabled": false + }, "properties": { "apache2": { "properties": { From b7c38a1451d13fa7402ff7055231451f43ac3ac6 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine <58790826+elasticsearchmachine@users.noreply.github.com> Date: Fri, 29 Nov 2024 21:54:34 +1100 Subject: [PATCH 077/139] Mute org.elasticsearch.xpack.esql.ccq.MultiClusterSpecIT test {scoring.QstrWithFieldAndScoringSortedEval} #117751 --- muted-tests.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/muted-tests.yml b/muted-tests.yml index 40d3dcf46e1b9..96631d15f374f 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -225,6 +225,9 @@ tests: - class: org.elasticsearch.xpack.inference.InferenceCrudIT method: testSupportedStream issue: https://github.com/elastic/elasticsearch/issues/117745 +- class: org.elasticsearch.xpack.esql.ccq.MultiClusterSpecIT + method: test {scoring.QstrWithFieldAndScoringSortedEval} + issue: https://github.com/elastic/elasticsearch/issues/117751 # Examples: # From 045f6a31f994f51d87a217be60251e060132c8a1 Mon Sep 17 00:00:00 2001 From: Tanguy Leroux Date: Fri, 29 Nov 2024 11:55:51 +0100 Subject: [PATCH 078/139] Add INDEX_REFRESH_BLOCK (#117543) This change adds a new ClusterBlockLevel called REFRESH. This level is used in a new ClusterBlock.INDEX_REFRESH_BLOCK which is automatically added to new indices that are created from empty store, with replicas, and only on serverless deployments that have a feature flag enabled. This block is also only added when all nodes of a cluster are in a recent enough transport version. If for some reason the new ClusterBlock is sent over the wire to a node with an old transport version, the REFRESH cluster block level will be removed from the set of level blocked. In the future, the REFRESH cluster block will be used: to block refreshes on shards until an unpromotable shard is started to allow skipping shards when searching Relates ES-10131 --- .../org/elasticsearch/TransportVersions.java | 1 + .../cluster/block/ClusterBlock.java | 24 +++++- .../cluster/block/ClusterBlockLevel.java | 3 +- .../cluster/metadata/IndexMetadata.java | 9 ++ .../metadata/MetadataCreateIndexService.java | 54 ++++++++++++ .../cluster/ClusterStateTests.java | 18 ++-- .../cluster/block/ClusterBlockTests.java | 49 +++++++++-- .../MetadataCreateIndexServiceTests.java | 86 ++++++++++++++++++- 8 files changed, 228 insertions(+), 16 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/TransportVersions.java b/server/src/main/java/org/elasticsearch/TransportVersions.java index a1315ccf66701..b38a285907937 100644 --- a/server/src/main/java/org/elasticsearch/TransportVersions.java +++ b/server/src/main/java/org/elasticsearch/TransportVersions.java @@ -211,6 +211,7 @@ static TransportVersion def(int id) { public static final TransportVersion ESQL_REMOVE_NODE_LEVEL_PLAN = def(8_800_00_0); public static final TransportVersion LOGSDB_TELEMETRY_CUSTOM_CUTOFF_DATE = def(8_801_00_0); public static final TransportVersion SOURCE_MODE_TELEMETRY = def(8_802_00_0); + public static final TransportVersion NEW_REFRESH_CLUSTER_BLOCK = def(8_803_00_0); /* * STOP! READ THIS FIRST! No, really, diff --git a/server/src/main/java/org/elasticsearch/cluster/block/ClusterBlock.java b/server/src/main/java/org/elasticsearch/cluster/block/ClusterBlock.java index 4e47925d383c2..25c6a1ff5b67f 100644 --- a/server/src/main/java/org/elasticsearch/cluster/block/ClusterBlock.java +++ b/server/src/main/java/org/elasticsearch/cluster/block/ClusterBlock.java @@ -9,6 +9,7 @@ package org.elasticsearch.cluster.block; +import org.elasticsearch.TransportVersions; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.io.stream.Writeable; @@ -21,6 +22,7 @@ import java.util.EnumSet; import java.util.Locale; import java.util.Objects; +import java.util.function.Predicate; public class ClusterBlock implements Writeable, ToXContentFragment { @@ -142,7 +144,12 @@ public void writeTo(StreamOutput out) throws IOException { out.writeVInt(id); out.writeOptionalString(uuid); out.writeString(description); - out.writeEnumSet(levels); + if (out.getTransportVersion().onOrAfter(TransportVersions.NEW_REFRESH_CLUSTER_BLOCK)) { + out.writeEnumSet(levels); + } else { + // do not send ClusterBlockLevel.REFRESH to old nodes + out.writeEnumSet(filterLevels(levels, level -> ClusterBlockLevel.REFRESH.equals(level) == false)); + } out.writeBoolean(retryable); out.writeBoolean(disableStatePersistence); RestStatus.writeTo(out, status); @@ -185,4 +192,19 @@ public int hashCode() { public boolean isAllowReleaseResources() { return allowReleaseResources; } + + static EnumSet filterLevels(EnumSet levels, Predicate predicate) { + assert levels != null; + int size = levels.size(); + if (size == 0 || (size == 1 && predicate.test(levels.iterator().next()))) { + return levels; + } + var filteredLevels = EnumSet.noneOf(ClusterBlockLevel.class); + for (ClusterBlockLevel level : levels) { + if (predicate.test(level)) { + filteredLevels.add(level); + } + } + return filteredLevels; + } } diff --git a/server/src/main/java/org/elasticsearch/cluster/block/ClusterBlockLevel.java b/server/src/main/java/org/elasticsearch/cluster/block/ClusterBlockLevel.java index f6330fb18e5e6..262044b091ac7 100644 --- a/server/src/main/java/org/elasticsearch/cluster/block/ClusterBlockLevel.java +++ b/server/src/main/java/org/elasticsearch/cluster/block/ClusterBlockLevel.java @@ -15,7 +15,8 @@ public enum ClusterBlockLevel { READ, WRITE, METADATA_READ, - METADATA_WRITE; + METADATA_WRITE, + REFRESH; public static final EnumSet ALL = EnumSet.allOf(ClusterBlockLevel.class); public static final EnumSet READ_WRITE = EnumSet.of(READ, WRITE); diff --git a/server/src/main/java/org/elasticsearch/cluster/metadata/IndexMetadata.java b/server/src/main/java/org/elasticsearch/cluster/metadata/IndexMetadata.java index 6456240c2317e..b7c1ee5fbad96 100644 --- a/server/src/main/java/org/elasticsearch/cluster/metadata/IndexMetadata.java +++ b/server/src/main/java/org/elasticsearch/cluster/metadata/IndexMetadata.java @@ -140,6 +140,15 @@ public class IndexMetadata implements Diffable, ToXContentFragmen RestStatus.TOO_MANY_REQUESTS, EnumSet.of(ClusterBlockLevel.WRITE) ); + public static final ClusterBlock INDEX_REFRESH_BLOCK = new ClusterBlock( + 14, + "index refresh blocked, waiting for shard(s) to be started", + true, + false, + false, + RestStatus.REQUEST_TIMEOUT, + EnumSet.of(ClusterBlockLevel.REFRESH) + ); // 'event.ingested' (part of Elastic Common Schema) range is tracked in cluster state, along with @timestamp public static final String EVENT_INGESTED_FIELD_NAME = "event.ingested"; diff --git a/server/src/main/java/org/elasticsearch/cluster/metadata/MetadataCreateIndexService.java b/server/src/main/java/org/elasticsearch/cluster/metadata/MetadataCreateIndexService.java index 1f014a526b9a6..52e4d75ac5116 100644 --- a/server/src/main/java/org/elasticsearch/cluster/metadata/MetadataCreateIndexService.java +++ b/server/src/main/java/org/elasticsearch/cluster/metadata/MetadataCreateIndexService.java @@ -28,6 +28,7 @@ import org.elasticsearch.cluster.ClusterStateUpdateTask; import org.elasticsearch.cluster.block.ClusterBlockLevel; import org.elasticsearch.cluster.block.ClusterBlocks; +import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.node.DiscoveryNodes; import org.elasticsearch.cluster.routing.IndexRoutingTable; import org.elasticsearch.cluster.routing.RoutingTable; @@ -127,6 +128,16 @@ public class MetadataCreateIndexService { public static final int MAX_INDEX_NAME_BYTES = 255; + /** + * Name of the setting used to allow blocking refreshes on newly created indices. + */ + public static final String USE_INDEX_REFRESH_BLOCK_SETTING_NAME = "stateless.indices.use_refresh_block_upon_index_creation"; + + @FunctionalInterface + interface ClusterBlocksTransformer { + void apply(ClusterBlocks.Builder clusterBlocks, IndexMetadata indexMetadata, TransportVersion minClusterTransportVersion); + } + private final Settings settings; private final ClusterService clusterService; private final IndicesService indicesService; @@ -139,6 +150,7 @@ public class MetadataCreateIndexService { private final boolean forbidPrivateIndexSettings; private final Set indexSettingProviders; private final ThreadPool threadPool; + private final ClusterBlocksTransformer blocksTransformerUponIndexCreation; public MetadataCreateIndexService( final Settings settings, @@ -166,6 +178,7 @@ public MetadataCreateIndexService( this.shardLimitValidator = shardLimitValidator; this.indexSettingProviders = indexSettingProviders.getIndexSettingProviders(); this.threadPool = threadPool; + this.blocksTransformerUponIndexCreation = createClusterBlocksTransformerForIndexCreation(settings); } /** @@ -540,8 +553,10 @@ private ClusterState applyCreateIndexWithTemporaryService( currentState, indexMetadata, metadataTransformer, + blocksTransformerUponIndexCreation, allocationService.getShardRoutingRoleStrategy() ); + assert assertHasRefreshBlock(indexMetadata, updated, updated.getMinTransportVersion()); if (request.performReroute()) { updated = allocationService.reroute(updated, "index [" + indexMetadata.getIndex().getName() + "] created", rerouteListener); } @@ -1294,6 +1309,7 @@ static ClusterState clusterStateCreateIndex( ClusterState currentState, IndexMetadata indexMetadata, BiConsumer metadataTransformer, + ClusterBlocksTransformer blocksTransformer, ShardRoutingRoleStrategy shardRoutingRoleStrategy ) { final Metadata newMetadata; @@ -1307,6 +1323,9 @@ static ClusterState clusterStateCreateIndex( var blocksBuilder = ClusterBlocks.builder().blocks(currentState.blocks()); blocksBuilder.updateBlocks(indexMetadata); + if (blocksTransformer != null) { + blocksTransformer.apply(blocksBuilder, indexMetadata, currentState.getMinTransportVersion()); + } var routingTableBuilder = RoutingTable.builder(shardRoutingRoleStrategy, currentState.routingTable()) .addAsNew(newMetadata.index(indexMetadata.getIndex().getName())); @@ -1745,4 +1764,39 @@ public static void validateStoreTypeSetting(Settings indexSettings) { ); } } + + private static boolean useRefreshBlock(Settings settings) { + return DiscoveryNode.isStateless(settings) && settings.getAsBoolean(USE_INDEX_REFRESH_BLOCK_SETTING_NAME, false); + } + + static ClusterBlocksTransformer createClusterBlocksTransformerForIndexCreation(Settings settings) { + if (useRefreshBlock(settings) == false) { + return (clusterBlocks, indexMetadata, minClusterTransportVersion) -> {}; + } + logger.debug("applying refresh block on index creation"); + return (clusterBlocks, indexMetadata, minClusterTransportVersion) -> { + if (applyRefreshBlock(indexMetadata, minClusterTransportVersion)) { + // Applies the INDEX_REFRESH_BLOCK to the index. This block will remain in cluster state until an unpromotable shard is + // started or a configurable delay is elapsed. + clusterBlocks.addIndexBlock(indexMetadata.getIndex().getName(), IndexMetadata.INDEX_REFRESH_BLOCK); + } + }; + } + + private static boolean applyRefreshBlock(IndexMetadata indexMetadata, TransportVersion minClusterTransportVersion) { + return 0 < indexMetadata.getNumberOfReplicas() // index has replicas + && indexMetadata.getResizeSourceIndex() == null // index is not a split/shrink index + && indexMetadata.getInSyncAllocationIds().values().stream().allMatch(Set::isEmpty) // index is a new index + && minClusterTransportVersion.onOrAfter(TransportVersions.NEW_REFRESH_CLUSTER_BLOCK); + } + + private boolean assertHasRefreshBlock(IndexMetadata indexMetadata, ClusterState clusterState, TransportVersion minTransportVersion) { + var hasRefreshBlock = clusterState.blocks().hasIndexBlock(indexMetadata.getIndex().getName(), IndexMetadata.INDEX_REFRESH_BLOCK); + if (useRefreshBlock(settings) == false || applyRefreshBlock(indexMetadata, minTransportVersion) == false) { + assert hasRefreshBlock == false : indexMetadata.getIndex(); + } else { + assert hasRefreshBlock : indexMetadata.getIndex(); + } + return true; + } } diff --git a/server/src/test/java/org/elasticsearch/cluster/ClusterStateTests.java b/server/src/test/java/org/elasticsearch/cluster/ClusterStateTests.java index 9613086aa9f57..668aea70c23f2 100644 --- a/server/src/test/java/org/elasticsearch/cluster/ClusterStateTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/ClusterStateTests.java @@ -167,7 +167,8 @@ public void testToXContent() throws IOException { "read", "write", "metadata_read", - "metadata_write" + "metadata_write", + "refresh" ] } }, @@ -180,7 +181,8 @@ public void testToXContent() throws IOException { "read", "write", "metadata_read", - "metadata_write" + "metadata_write", + "refresh" ] } } @@ -440,7 +442,8 @@ public void testToXContent_FlatSettingTrue_ReduceMappingFalse() throws IOExcepti "read", "write", "metadata_read", - "metadata_write" + "metadata_write", + "refresh" ] } }, @@ -453,7 +456,8 @@ public void testToXContent_FlatSettingTrue_ReduceMappingFalse() throws IOExcepti "read", "write", "metadata_read", - "metadata_write" + "metadata_write", + "refresh" ] } } @@ -712,7 +716,8 @@ public void testToXContent_FlatSettingFalse_ReduceMappingTrue() throws IOExcepti "read", "write", "metadata_read", - "metadata_write" + "metadata_write", + "refresh" ] } }, @@ -725,7 +730,8 @@ public void testToXContent_FlatSettingFalse_ReduceMappingTrue() throws IOExcepti "read", "write", "metadata_read", - "metadata_write" + "metadata_write", + "refresh" ] } } diff --git a/server/src/test/java/org/elasticsearch/cluster/block/ClusterBlockTests.java b/server/src/test/java/org/elasticsearch/cluster/block/ClusterBlockTests.java index 311f2ec36af5c..0237fff8fdda5 100644 --- a/server/src/test/java/org/elasticsearch/cluster/block/ClusterBlockTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/block/ClusterBlockTests.java @@ -10,19 +10,22 @@ package org.elasticsearch.cluster.block; import org.elasticsearch.TransportVersion; +import org.elasticsearch.TransportVersions; import org.elasticsearch.common.UUIDs; import org.elasticsearch.common.io.stream.BytesStreamOutput; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.rest.RestStatus; import org.elasticsearch.test.ESTestCase; -import java.util.Arrays; import java.util.Collections; -import java.util.List; +import java.util.EnumSet; import java.util.Map; import static java.util.EnumSet.copyOf; +import static org.elasticsearch.test.TransportVersionUtils.getFirstVersion; +import static org.elasticsearch.test.TransportVersionUtils.getPreviousVersion; import static org.elasticsearch.test.TransportVersionUtils.randomVersion; +import static org.elasticsearch.test.TransportVersionUtils.randomVersionBetween; import static org.hamcrest.CoreMatchers.endsWith; import static org.hamcrest.CoreMatchers.equalTo; import static org.hamcrest.CoreMatchers.not; @@ -36,7 +39,7 @@ public void testSerialization() throws Exception { int iterations = randomIntBetween(5, 20); for (int i = 0; i < iterations; i++) { TransportVersion version = randomVersion(random()); - ClusterBlock clusterBlock = randomClusterBlock(); + ClusterBlock clusterBlock = randomClusterBlock(version); BytesStreamOutput out = new BytesStreamOutput(); out.setTransportVersion(version); @@ -50,13 +53,41 @@ public void testSerialization() throws Exception { } } + public void testSerializationBwc() throws Exception { + var out = new BytesStreamOutput(); + out.setTransportVersion( + randomVersionBetween(random(), getFirstVersion(), getPreviousVersion(TransportVersions.NEW_REFRESH_CLUSTER_BLOCK)) + ); + + var clusterBlock = randomClusterBlock(TransportVersions.NEW_REFRESH_CLUSTER_BLOCK); + clusterBlock.writeTo(out); + + var in = out.bytes().streamInput(); + in.setTransportVersion(randomVersion()); + + assertClusterBlockEquals( + new ClusterBlock( + clusterBlock.id(), + clusterBlock.uuid(), + clusterBlock.description(), + clusterBlock.retryable(), + clusterBlock.disableStatePersistence(), + clusterBlock.isAllowReleaseResources(), + clusterBlock.status(), + // ClusterBlockLevel.REFRESH should not be sent over the wire to nodes with version < NEW_REFRESH_CLUSTER_BLOCK + ClusterBlock.filterLevels(clusterBlock.levels(), level -> ClusterBlockLevel.REFRESH.equals(level) == false) + ), + new ClusterBlock(in) + ); + } + public void testToStringDanglingComma() { - final ClusterBlock clusterBlock = randomClusterBlock(); + final ClusterBlock clusterBlock = randomClusterBlock(randomVersion(random())); assertThat(clusterBlock.toString(), not(endsWith(","))); } public void testGlobalBlocksCheckedIfNoIndicesSpecified() { - ClusterBlock globalBlock = randomClusterBlock(); + ClusterBlock globalBlock = randomClusterBlock(randomVersion(random())); ClusterBlocks clusterBlocks = new ClusterBlocks(Collections.singleton(globalBlock), Map.of()); ClusterBlockException exception = clusterBlocks.indicesBlockedException(randomFrom(globalBlock.levels()), new String[0]); assertNotNull(exception); @@ -113,9 +144,13 @@ public void testGetIndexBlockWithId() { assertThat(builder.build().getIndexBlockWithId("index", randomValueOtherThan(blockId, ESTestCase::randomInt)), nullValue()); } - private static ClusterBlock randomClusterBlock() { + private static ClusterBlock randomClusterBlock(TransportVersion version) { final String uuid = randomBoolean() ? UUIDs.randomBase64UUID() : null; - final List levels = Arrays.asList(ClusterBlockLevel.values()); + final EnumSet levels = ClusterBlock.filterLevels( + EnumSet.allOf(ClusterBlockLevel.class), + // Filter out ClusterBlockLevel.REFRESH for versions < TransportVersions.NEW_REFRESH_CLUSTER_BLOCK + level -> ClusterBlockLevel.REFRESH.equals(level) == false || version.onOrAfter(TransportVersions.NEW_REFRESH_CLUSTER_BLOCK) + ); return new ClusterBlock( randomInt(), uuid, diff --git a/server/src/test/java/org/elasticsearch/cluster/metadata/MetadataCreateIndexServiceTests.java b/server/src/test/java/org/elasticsearch/cluster/metadata/MetadataCreateIndexServiceTests.java index 96a74d2e23aad..1876a1f2da556 100644 --- a/server/src/test/java/org/elasticsearch/cluster/metadata/MetadataCreateIndexServiceTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/metadata/MetadataCreateIndexServiceTests.java @@ -36,6 +36,7 @@ import org.elasticsearch.cluster.routing.allocation.decider.AllocationDeciders; import org.elasticsearch.cluster.routing.allocation.decider.MaxRetryAllocationDecider; import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.cluster.version.CompatibilityVersions; import org.elasticsearch.common.Strings; import org.elasticsearch.common.compress.CompressedXContent; import org.elasticsearch.common.settings.IndexScopedSettings; @@ -66,6 +67,7 @@ import org.elasticsearch.snapshots.EmptySnapshotsInfoService; import org.elasticsearch.test.ClusterServiceUtils; import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.test.TransportVersionUtils; import org.elasticsearch.test.gateway.TestGatewayAllocator; import org.elasticsearch.test.index.IndexVersionUtils; import org.elasticsearch.threadpool.TestThreadPool; @@ -105,6 +107,8 @@ import static org.elasticsearch.cluster.metadata.MetadataCreateIndexService.resolveAndValidateAliases; import static org.elasticsearch.index.IndexSettings.INDEX_SOFT_DELETES_SETTING; import static org.elasticsearch.indices.ShardLimitValidatorTests.createTestShardLimitService; +import static org.hamcrest.Matchers.aMapWithSize; +import static org.hamcrest.Matchers.anEmptyMap; import static org.hamcrest.Matchers.contains; import static org.hamcrest.Matchers.endsWith; import static org.hamcrest.Matchers.equalTo; @@ -1133,7 +1137,7 @@ public void testClusterStateCreateIndexThrowsWriteIndexValidationException() thr assertThat( expectThrows( IllegalStateException.class, - () -> clusterStateCreateIndex(currentClusterState, newIndex, null, TestShardRoutingRoleStrategies.DEFAULT_ROLE_ONLY) + () -> clusterStateCreateIndex(currentClusterState, newIndex, null, null, TestShardRoutingRoleStrategies.DEFAULT_ROLE_ONLY) ).getMessage(), startsWith("alias [alias1] has more than one write index [") ); @@ -1153,6 +1157,7 @@ public void testClusterStateCreateIndex() { currentClusterState, newIndexMetadata, null, + null, TestShardRoutingRoleStrategies.DEFAULT_ROLE_ONLY ); assertThat(updatedClusterState.blocks().getIndexBlockWithId("test", INDEX_READ_ONLY_BLOCK.id()), is(INDEX_READ_ONLY_BLOCK)); @@ -1198,6 +1203,7 @@ public void testClusterStateCreateIndexWithMetadataTransaction() { currentClusterState, newIndexMetadata, metadataTransformer, + null, TestShardRoutingRoleStrategies.DEFAULT_ROLE_ONLY ); assertTrue(updatedClusterState.metadata().findAllAliases(new String[] { "my-index" }).containsKey("my-index")); @@ -1547,6 +1553,84 @@ public void testDeprecateSimpleFS() { ); } + public void testClusterStateCreateIndexWithClusterBlockTransformer() { + { + var emptyClusterState = ClusterState.builder(ClusterState.EMPTY_STATE).build(); + var updatedClusterState = clusterStateCreateIndex( + emptyClusterState, + IndexMetadata.builder("test") + .settings(settings(IndexVersion.current())) + .numberOfShards(1) + .numberOfReplicas(randomIntBetween(1, 3)) + .build(), + null, + MetadataCreateIndexService.createClusterBlocksTransformerForIndexCreation(Settings.EMPTY), + TestShardRoutingRoleStrategies.DEFAULT_ROLE_ONLY + ); + assertThat(updatedClusterState.blocks().indices(), is(anEmptyMap())); + assertThat(updatedClusterState.blocks().hasIndexBlock("test", IndexMetadata.INDEX_REFRESH_BLOCK), is(false)); + assertThat(updatedClusterState.routingTable().index("test"), is(notNullValue())); + } + { + var minTransportVersion = TransportVersionUtils.randomCompatibleVersion(random()); + var emptyClusterState = ClusterState.builder(ClusterState.EMPTY_STATE) + .nodes(DiscoveryNodes.builder().add(DiscoveryNodeUtils.create("_node_id")).build()) + .putCompatibilityVersions("_node_id", new CompatibilityVersions(minTransportVersion, Map.of())) + .build(); + var settings = Settings.builder() + .put(DiscoveryNode.STATELESS_ENABLED_SETTING_NAME, true) + .put(MetadataCreateIndexService.USE_INDEX_REFRESH_BLOCK_SETTING_NAME, true) + .build(); + int nbReplicas = randomIntBetween(0, 1); + var updatedClusterState = clusterStateCreateIndex( + emptyClusterState, + IndexMetadata.builder("test") + .settings(settings(IndexVersion.current())) + .numberOfShards(1) + .numberOfReplicas(nbReplicas) + .build() + .withTimestampRanges(IndexLongFieldRange.UNKNOWN, IndexLongFieldRange.UNKNOWN, minTransportVersion), + null, + MetadataCreateIndexService.createClusterBlocksTransformerForIndexCreation(settings), + TestShardRoutingRoleStrategies.DEFAULT_ROLE_ONLY + ); + + var expectRefreshBlock = 0 < nbReplicas && minTransportVersion.onOrAfter(TransportVersions.NEW_REFRESH_CLUSTER_BLOCK); + assertThat(updatedClusterState.blocks().indices(), is(aMapWithSize(expectRefreshBlock ? 1 : 0))); + assertThat(updatedClusterState.blocks().hasIndexBlock("test", IndexMetadata.INDEX_REFRESH_BLOCK), is(expectRefreshBlock)); + assertThat(updatedClusterState.routingTable().index("test"), is(notNullValue())); + } + } + + public void testCreateClusterBlocksTransformerForIndexCreation() { + boolean isStateless = randomBoolean(); + boolean useRefreshBlock = randomBoolean(); + var minTransportVersion = TransportVersionUtils.randomCompatibleVersion(random()); + + var applier = MetadataCreateIndexService.createClusterBlocksTransformerForIndexCreation( + Settings.builder() + .put(DiscoveryNode.STATELESS_ENABLED_SETTING_NAME, isStateless) + .put(MetadataCreateIndexService.USE_INDEX_REFRESH_BLOCK_SETTING_NAME, useRefreshBlock) + .build() + ); + assertThat(applier, notNullValue()); + + var blocks = ClusterBlocks.builder().blocks(ClusterState.EMPTY_STATE.blocks()); + applier.apply( + blocks, + IndexMetadata.builder("test") + .settings(settings(IndexVersion.current())) + .numberOfShards(1) + .numberOfReplicas(randomIntBetween(1, 3)) + .build(), + minTransportVersion + ); + assertThat( + blocks.hasIndexBlock("test", IndexMetadata.INDEX_REFRESH_BLOCK), + is(isStateless && useRefreshBlock && minTransportVersion.onOrAfter(TransportVersions.NEW_REFRESH_CLUSTER_BLOCK)) + ); + } + private IndexTemplateMetadata addMatchingTemplate(Consumer configurator) { IndexTemplateMetadata.Builder builder = templateMetadataBuilder("template1", "te*"); configurator.accept(builder); From ad83d9b35ddc01229a5b2b5de21b122f9d1b2106 Mon Sep 17 00:00:00 2001 From: Panagiotis Bailis Date: Fri, 29 Nov 2024 14:50:01 +0200 Subject: [PATCH 079/139] Updating retriever-examples documentation to run validation tests on the provided snippets (#116643) --- docs/reference/search/rrf.asciidoc | 98 +- .../retrievers-examples.asciidoc | 1270 ++++++++++++++--- 2 files changed, 1149 insertions(+), 219 deletions(-) diff --git a/docs/reference/search/rrf.asciidoc b/docs/reference/search/rrf.asciidoc index edd3b67e3de04..a942c0162a80a 100644 --- a/docs/reference/search/rrf.asciidoc +++ b/docs/reference/search/rrf.asciidoc @@ -105,7 +105,7 @@ The `rrf` retriever does not currently support: * <> Using unsupported features as part of a search with an `rrf` retriever results in an exception. -+ + IMPORTANT: It is best to avoid providing a <> as part of the request, as RRF creates one internally that is shared by all sub-retrievers to ensure consistent results. @@ -703,3 +703,99 @@ So for the same params as above, we would now have: * `from=0, size=2` would return [`1`, `5`] with ranks `[1, 2]` * `from=2, size=2` would return an empty result set as it would fall outside the available `rank_window_size` results. + +==== Aggregations in RRF + +The `rrf` retriever supports aggregations from all specified sub-retrievers. Important notes about aggregations: + +* They operate on the complete result set from all sub-retrievers +* They are not limited by the `rank_window_size` parameter +* They process the union of all matching documents + +For example, consider the following document set: +[source,js] +---- +{ + "_id": 1, "termA": "foo", + "_id": 2, "termA": "foo", "termB": "bar", + "_id": 3, "termA": "aardvark", "termB": "bar", + "_id": 4, "termA": "foo", "termB": "bar" +} +---- +// NOTCONSOLE + +Perform a term aggregation on the `termA` field using an `rrf` retriever: +[source,js] +---- +{ + "retriever": { + "rrf": { + "retrievers": [ + { + "standard": { + "query": { + "term": { + "termB": "bar" + } + } + } + }, + { + "standard": { + "query": { + "match_all": { } + } + } + } + ], + "rank_window_size": 1 + } + }, + "size": 1, + "aggs": { + "termA_agg": { + "terms": { + "field": "termA" + } + } + } +} +---- +// NOTCONSOLE + +The aggregation results will include *all* matching documents, regardless of `rank_window_size`. +[source, js] +---- +{ + "foo": 3, + "aardvark": 1 +} + +---- +// NOTCONSOLE + +==== Highlighting in RRF + +Using the `rrf` retriever, you can add <> to show relevant text snippets in your search results. Highlighted snippets are computed based +on the matching text queries defined on the sub-retrievers. + +IMPORTANT: Highlighting on vector fields, using either the `knn` retriever or a `knn` query, is not supported. + +A more specific example of highlighting in RRF can also be found in the <> page. + +==== Inner hits in RRF + +The `rrf` retriever supports <> functionality, allowing you to retrieve +related nested or parent/child documents alongside your main search results. Inner hits can be +specified as part of any nested sub-retriever and will be propagated to the top-level parent +retriever. Note that the inner hit computation will take place only at end of `rrf` retriever's +evaluation on the top matching documents, and not as part of the query execution of the nested +sub-retrievers. + +[IMPORTANT] +==== +When defining multiple `inner_hits` sections across sub-retrievers: + +* Each `inner_hits` section must have a unique name +* Names must be unique across all sub-retrievers in the search request +==== diff --git a/docs/reference/search/search-your-data/retrievers-examples.asciidoc b/docs/reference/search/search-your-data/retrievers-examples.asciidoc index 8cd1a4bf5ce98..ad1cc32dcee01 100644 --- a/docs/reference/search/search-your-data/retrievers-examples.asciidoc +++ b/docs/reference/search/search-your-data/retrievers-examples.asciidoc @@ -1,31 +1,16 @@ [[retrievers-examples]] -=== Retrievers examples Learn how to combine different retrievers in these hands-on examples. -To demonstrate the full functionality of retrievers, these examples require access to a <> set up using the <>. + +=== Retrievers examples [discrete] [[retrievers-examples-setup]] ==== Add example data -To begin with, we'll set up the necessary services and have them in place for later use. - -[source,js] ----- -// Setup rerank task stored as `my-rerank-model` -PUT _inference/rerank/my-rerank-model -{ - "service": "cohere", - "service_settings": { - "model_id": "rerank-english-v3.0", - "api_key": "{{COHERE_API_KEY}}" - } -} ----- -//NOTCONSOLE +To begin with, lets create the `retrievers_example` index, and add some documents to it. -Now that we have our reranking service in place, lets create the `retrievers_example` index, and add some documents to it. -[source,js] +[source,console] ---- PUT retrievers_example { @@ -49,11 +34,7 @@ PUT retrievers_example } } } ----- -//NOTCONSOLE -[source,js] ----- POST /retrievers_example/_doc/1 { "vector": [0.23, 0.67, 0.89], @@ -94,10 +75,12 @@ POST /retrievers_example/_doc/5 "topic": ["documentation", "observability", "elastic"] } +POST /retrievers_example/_refresh + ---- -//NOTCONSOLE +// TESTSETUP -Now that we also have our documents in place, let's try to run some queries using retrievers. +Now that we have our documents in place, let's try to run some queries using retrievers. [discrete] [[retrievers-examples-combining-standard-knn-retrievers-with-rrf]] @@ -112,170 +95,272 @@ To implement this in the retriever framework, we start with the top-level elemen retriever. This retriever operates on top of two other retrievers: a `knn` retriever and a `standard` retriever. Our query structure would look like this: -[source,js] +[source,console] ---- GET /retrievers_example/_search { - "retriever":{ - "rrf": { - "retrievers":[ - { - "standard":{ - "query":{ - "query_string":{ - "query": "(information retrieval) OR (artificial intelligence)", - "default_field": "text" - } - } - } - }, - { - "knn": { - "field": "vector", - "query_vector": [ - 0.23, - 0.67, - 0.89 - ], - "k": 3, - "num_candidates": 5 - } - } - ], - "rank_window_size": 10, - "rank_constant": 1 - } - }, - "_source": ["text", "topic"] + "retriever": { + "rrf": { + "retrievers": [ + { + "standard": { + "query": { + "query_string": { + "query": "(information retrieval) OR (artificial intelligence)", + "default_field": "text" + } + } + } + }, + { + "knn": { + "field": "vector", + "query_vector": [ + 0.23, + 0.67, + 0.89 + ], + "k": 3, + "num_candidates": 5 + } + } + ], + "rank_window_size": 10, + "rank_constant": 1 + } + }, + "_source": false +} +---- +// TEST + +This returns the following response based on the final rrf score for each result. + +.Example response +[%collapsible] +============== +[source,console-result] +---- +{ + "took": 42, + "timed_out": false, + "_shards": { + "total": 1, + "successful": 1, + "skipped": 0, + "failed": 0 + }, + "hits": { + "total": { + "value": 3, + "relation": "eq" + }, + "max_score": 0.8333334, + "hits": [ + { + "_index": "retrievers_example", + "_id": "1", + "_score": 0.8333334 + }, + { + "_index": "retrievers_example", + "_id": "2", + "_score": 0.8333334 + }, + { + "_index": "retrievers_example", + "_id": "3", + "_score": 0.25 + } + ] + } } ---- -//NOTCONSOLE +// TESTRESPONSE[s/"took": 42/"took": $body.took/] +============== [discrete] [[retrievers-examples-collapsing-retriever-results]] ==== Example: Grouping results by year with `collapse` In our result set, we have many documents with the same `year` value. We can clean this -up using the `collapse` parameter with our retriever. This enables grouping results by -any field and returns only the highest-scoring document from each group. In this example +up using the `collapse` parameter with our retriever. This, as with the standard <> feature, +enables grouping results by any field and returns only the highest-scoring document from each group. In this example we'll collapse our results based on the `year` field. -[source,js] +[source,console] ---- GET /retrievers_example/_search { - "retriever":{ - "rrf": { - "retrievers":[ - { - "standard":{ - "query":{ - "query_string":{ - "query": "(information retrieval) OR (artificial intelligence)", - "default_field": "text" - } - } - } - }, - { - "knn": { - "field": "vector", - "query_vector": [ - 0.23, - 0.67, - 0.89 - ], - "k": 3, - "num_candidates": 5 - } - } - ], - "rank_window_size": 10, - "rank_constant": 1 - } - }, - "collapse": { - "field": "year", - "inner_hits": { - "name": "topic related documents", - "_source": ["text", "year"] - } - }, - "_source": ["text", "topic"] + "retriever": { + "rrf": { + "retrievers": [ + { + "standard": { + "query": { + "query_string": { + "query": "(information retrieval) OR (artificial intelligence)", + "default_field": "text" + } + } + } + }, + { + "knn": { + "field": "vector", + "query_vector": [ + 0.23, + 0.67, + 0.89 + ], + "k": 3, + "num_candidates": 5 + } + } + ], + "rank_window_size": 10, + "rank_constant": 1 + } + }, + "collapse": { + "field": "year", + "inner_hits": { + "name": "topic related documents", + "_source": [ + "year" + ] + } + }, + "_source": false } ---- -//NOTCONSOLE +// TEST[continued] -[discrete] -[[retrievers-examples-text-similarity-reranker-on-top-of-rrf]] -==== Example: Rerank results of an RRF retriever +This returns the following response with collapsed results. -Previously, we used a `text_similarity_reranker` retriever within an `rrf` retriever. -Because retrievers support full composability, we can also rerank the results of an -`rrf` retriever. Let's apply this to our first example. - -[source,js] +.Example response +[%collapsible] +============== +[source,console-result] ---- -GET retrievers_example/_search { - "retriever": { - "text_similarity_reranker": { - "retriever": { - "rrf": { - "retrievers": [ - { - "standard":{ - "query":{ - "query_string":{ - "query": "(information retrieval) OR (artificial intelligence)", - "default_field": "text" - } - } - } - }, - { - "knn": { - "field": "vector", - "query_vector": [ - 0.23, - 0.67, - 0.89 - ], - "k": 3, - "num_candidates": 5 - } - } - ], - "rank_window_size": 10, - "rank_constant": 1 - } - }, - "field": "text", - "inference_id": "my-rerank-model", - "inference_text": "What are the state of the art applications of AI in information retrieval?" - } - }, - "_source": ["text", "topic"] + "took": 42, + "timed_out": false, + "_shards": { + "total": 1, + "successful": 1, + "skipped": 0, + "failed": 0 + }, + "hits": { + "total": { + "value": 3, + "relation": "eq" + }, + "max_score": 0.8333334, + "hits": [ + { + "_index": "retrievers_example", + "_id": "1", + "_score": 0.8333334, + "fields": { + "year": [ + 2024 + ] + }, + "inner_hits": { + "topic related documents": { + "hits": { + "total": { + "value": 2, + "relation": "eq" + }, + "max_score": 0.8333334, + "hits": [ + { + "_index": "retrievers_example", + "_id": "1", + "_score": 0.8333334, + "_source": { + "year": 2024 + } + }, + { + "_index": "retrievers_example", + "_id": "3", + "_score": 0.25, + "_source": { + "year": 2024 + } + } + ] + } + } + } + }, + { + "_index": "retrievers_example", + "_id": "2", + "_score": 0.8333334, + "fields": { + "year": [ + 2023 + ] + }, + "inner_hits": { + "topic related documents": { + "hits": { + "total": { + "value": 1, + "relation": "eq" + }, + "max_score": 0.8333334, + "hits": [ + { + "_index": "retrievers_example", + "_id": "2", + "_score": 0.8333334, + "_source": { + "year": 2023 + } + } + ] + } + } + } + } + ] + } } - ---- -//NOTCONSOLE +// TESTRESPONSE[s/"took": 42/"took": $body.took/] +============== [discrete] -[[retrievers-examples-rrf-ranking-on-text-similarity-reranker-results]] -==== Example: RRF with semantic reranker +[[retrievers-examples-highlighting-retriever-results]] +==== Example: Highlighting results based on nested sub-retrievers -For this example, we'll replace our semantic query with the `my-rerank-model` -reranker we previously configured. Since this is a reranker, it needs an initial pool of -documents to work with. In this case, we'll filter for documents about `ai` topics. +Highlighting is now also available for nested sub-retrievers matches. For example, consider the same +`rrf` retriever as above, with a `knn` and `standard` retriever as its sub-retrievers. We can specify a `highlight` +section, as defined in <> documentation, and compute highlights for the top results. -[source,js] +[source,console] ---- GET /retrievers_example/_search { "retriever": { "rrf": { "retrievers": [ + { + "standard": { + "query": { + "query_string": { + "query": "(information retrieval) OR (artificial intelligence)", + "default_field": "text" + } + } + } + }, { "knn": { "field": "vector", @@ -287,21 +372,221 @@ GET /retrievers_example/_search "k": 3, "num_candidates": 5 } - }, + } + ], + "rank_window_size": 10, + "rank_constant": 1 + } + }, + "highlight": { + "fields": { + "text": { + "fragment_size": 150, + "number_of_fragments": 3 + } + } + }, + "_source": false +} +---- +// TEST[continued] + +This would highlight the `text` field, based on the matches produced by the `standard` retriever. The highlighted snippets +would then be included in the response as usual, i.e. under each search hit. + +.Example response +[%collapsible] +============== +[source,console-result] +---- +{ + "took": 42, + "timed_out": false, + "_shards": { + "total": 1, + "successful": 1, + "skipped": 0, + "failed": 0 + }, + "hits": { + "total": { + "value": 3, + "relation": "eq" + }, + "max_score": 0.8333334, + "hits": [ + { + "_index": "retrievers_example", + "_id": "1", + "_score": 0.8333334, + "highlight": { + "text": [ + "Large language models are revolutionizing information retrieval by boosting search precision, deepening contextual understanding, and reshaping user experiences" + ] + } + }, + { + "_index": "retrievers_example", + "_id": "2", + "_score": 0.8333334, + "highlight": { + "text": [ + "Artificial intelligence is transforming medicine, from advancing diagnostics and tailoring treatment plans to empowering predictive patient care for improved" + ] + } + }, + { + "_index": "retrievers_example", + "_id": "3", + "_score": 0.25 + } + ] + } +} +---- +// TESTRESPONSE[s/"took": 42/"took": $body.took/] +============== + +[discrete] +[[retrievers-examples-inner-hits-retriever-results]] +==== Example: Computing inner hits from nested sub-retrievers + +We can also define `inner_hits` to be computed on any of the sub-retrievers, and propagate those computations to the top +level compound retriever. For example, let's create a new index with a `knn` field, nested under the `nested_field` field, +and index a couple of documents. + +[source,console] +---- +PUT retrievers_example_nested +{ + "mappings": { + "properties": { + "nested_field": { + "type": "nested", + "properties": { + "paragraph_id": { + "type": "keyword" + }, + "nested_vector": { + "type": "dense_vector", + "dims": 3, + "similarity": "l2_norm", + "index": true + } + } + }, + "topic": { + "type": "keyword" + } + } + } +} + +POST /retrievers_example_nested/_doc/1 +{ + "nested_field": [ + { + "paragraph_id": "1a", + "nested_vector": [ + -1.12, + -0.59, + 0.78 + ] + }, + { + "paragraph_id": "1b", + "nested_vector": [ + -0.12, + 1.56, + 0.42 + ] + }, + { + "paragraph_id": "1c", + "nested_vector": [ + 1, + -1, + 0 + ] + } + ], + "topic": [ + "ai" + ] +} + +POST /retrievers_example_nested/_doc/2 +{ + "nested_field": [ + { + "paragraph_id": "2a", + "nested_vector": [ + 0.23, + 1.24, + 0.65 + ] + } + ], + "topic": [ + "information_retrieval" + ] +} + +POST /retrievers_example_nested/_doc/3 +{ + "topic": [ + "ai" + ] +} + +POST /retrievers_example_nested/_refresh +---- +// TEST[continued] + +Now we can run an `rrf` retriever query and also compute <> for the `nested_field.nested_vector` +field, based on the `knn` query specified. + +[source,console] +---- +GET /retrievers_example_nested/_search +{ + "retriever": { + "rrf": { + "retrievers": [ { - "text_similarity_reranker": { - "retriever": { - "standard": { + "standard": { + "query": { + "nested": { + "path": "nested_field", + "inner_hits": { + "name": "nested_vector", + "_source": false, + "fields": [ + "nested_field.paragraph_id" + ] + }, "query": { - "term": { - "topic": "ai" + "knn": { + "field": "nested_field.nested_vector", + "query_vector": [ + 1, + 0, + 0.5 + ], + "k": 10 } } } - }, - "field": "text", - "inference_id": "my-rerank-model", - "inference_text": "Can I use generative AI to identify user intent and improve search relevance?" + } + } + }, + { + "standard": { + "query": { + "term": { + "topic": "ai" + } + } } } ], @@ -310,64 +595,184 @@ GET /retrievers_example/_search } }, "_source": [ - "text", "topic" ] } ---- -//NOTCONSOLE - -[discrete] -[[retrievers-examples-chaining-text-similarity-reranker-retrievers]] -==== Example: Chaining multiple semantic rerankers +// TEST[continued] -Full composability means we can chain together multiple retrievers of the same type. For instance, imagine we have a computationally expensive reranker that's specialized for AI content. We can rerank the results of a `text_similarity_reranker` using another `text_similarity_reranker` retriever. Each reranker can operate on different fields and/or use different inference services. +This would propagate the `inner_hits` defined for the `knn` query to the `rrf` retriever, and compute inner hits for `rrf`'s top results. -[source,js] +.Example response +[%collapsible] +============== +[source,console-result] ---- -GET retrievers_example/_search { - "retriever": { - "text_similarity_reranker": { - "retriever": { - "text_similarity_reranker": { - "retriever": { - "knn": { - "field": "vector", - "query_vector": [ - 0.23, - 0.67, - 0.89 - ], - "k": 3, - "num_candidates": 5 - } - }, - "rank_window_size": 100, - "field": "text", - "inference_id": "my-rerank-model", - "inference_text": "What are the state of the art applications of AI in information retrieval?" - } - }, - "rank_window_size": 10, - "field": "text", - "inference_id": "my-other-more-expensive-rerank-model", - "inference_text": "Applications of Large Language Models in technology and their impact on user satisfaction" - } - }, - "_source": [ - "text", - "topic" - ] + "took": 42, + "timed_out": false, + "_shards": { + "total": 1, + "successful": 1, + "skipped": 0, + "failed": 0 + }, + "hits": { + "total": { + "value": 3, + "relation": "eq" + }, + "max_score": 1.0, + "hits": [ + { + "_index": "retrievers_example_nested", + "_id": "1", + "_score": 1.0, + "_source": { + "topic": [ + "ai" + ] + }, + "inner_hits": { + "nested_vector": { + "hits": { + "total": { + "value": 3, + "relation": "eq" + }, + "max_score": 0.44353113, + "hits": [ + { + "_index": "retrievers_example_nested", + "_id": "1", + "_nested": { + "field": "nested_field", + "offset": 2 + }, + "_score": 0.44353113, + "fields": { + "nested_field": [ + { + "paragraph_id": [ + "1c" + ] + } + ] + } + }, + { + "_index": "retrievers_example_nested", + "_id": "1", + "_nested": { + "field": "nested_field", + "offset": 1 + }, + "_score": 0.26567122, + "fields": { + "nested_field": [ + { + "paragraph_id": [ + "1b" + ] + } + ] + } + }, + { + "_index": "retrievers_example_nested", + "_id": "1", + "_nested": { + "field": "nested_field", + "offset": 0 + }, + "_score": 0.18478848, + "fields": { + "nested_field": [ + { + "paragraph_id": [ + "1a" + ] + } + ] + } + } + ] + } + } + } + }, + { + "_index": "retrievers_example_nested", + "_id": "2", + "_score": 0.33333334, + "_source": { + "topic": [ + "information_retrieval" + ] + }, + "inner_hits": { + "nested_vector": { + "hits": { + "total": { + "value": 1, + "relation": "eq" + }, + "max_score": 0.32002488, + "hits": [ + { + "_index": "retrievers_example_nested", + "_id": "2", + "_nested": { + "field": "nested_field", + "offset": 0 + }, + "_score": 0.32002488, + "fields": { + "nested_field": [ + { + "paragraph_id": [ + "2a" + ] + } + ] + } + } + ] + } + } + } + }, + { + "_index": "retrievers_example_nested", + "_id": "3", + "_score": 0.33333334, + "_source": { + "topic": [ + "ai" + ] + }, + "inner_hits": { + "nested_vector": { + "hits": { + "total": { + "value": 0, + "relation": "eq" + }, + "max_score": null, + "hits": [] + } + } + } + } + ] + } } ---- -//NOTCONSOLE +// TESTRESPONSE[s/"took": 42/"took": $body.took/] +============== - -Note that our example applies two reranking steps. First, we rerank the top 100 -documents from the `knn` search using the `my-rerank-model` reranker. Then we -pick the top 10 results and rerank them using the more fine-grained -`my-other-more-expensive-rerank-model`. +Note: if using more than one `inner_hits` we need to provide custom names for each `inner_hits` so that they +are unique across all retrievers within the request. [discrete] [[retrievers-examples-rrf-and-aggregations]] @@ -380,7 +785,7 @@ the `terms` aggregation for the `topic` field will include all results, not just from the 2 nested retrievers, i.e. all documents whose `year` field is greater than 2023, and whose `topic` field matches the term `elastic`. -[source,js] +[source,console] ---- GET retrievers_example/_search { @@ -412,10 +817,7 @@ GET retrievers_example/_search "rank_constant": 1 } }, - "_source": [ - "text", - "topic" - ], + "_source": false, "aggs": { "topics": { "terms": { @@ -425,4 +827,436 @@ GET retrievers_example/_search } } ---- -//NOTCONSOLE +// TEST[continued] + +.Example response +[%collapsible] +============== +[source, console-result] +---- +{ + "took": 42, + "timed_out": false, + "_shards": { + "total": 1, + "successful": 1, + "skipped": 0, + "failed": 0 + }, + "hits": { + "total": { + "value": 4, + "relation": "eq" + }, + "max_score": 0.5833334, + "hits": [ + { + "_index": "retrievers_example", + "_id": "5", + "_score": 0.5833334 + }, + { + "_index": "retrievers_example", + "_id": "1", + "_score": 0.5 + }, + { + "_index": "retrievers_example", + "_id": "4", + "_score": 0.5 + }, + { + "_index": "retrievers_example", + "_id": "3", + "_score": 0.33333334 + } + ] + }, + "aggregations": { + "topics": { + "doc_count_error_upper_bound": 0, + "sum_other_doc_count": 0, + "buckets": [ + { + "key": "ai", + "doc_count": 3 + }, + { + "key": "elastic", + "doc_count": 2 + }, + { + "key": "assistant", + "doc_count": 1 + }, + { + "key": "documentation", + "doc_count": 1 + }, + { + "key": "information_retrieval", + "doc_count": 1 + }, + { + "key": "llm", + "doc_count": 1 + }, + { + "key": "observability", + "doc_count": 1 + }, + { + "key": "security", + "doc_count": 1 + } + ] + } + } +} +---- +// TESTRESPONSE[s/"took": 42/"took": $body.took/] +============== + +[discrete] +[[retrievers-examples-explain-multiple-rrf]] +==== Example: Explainability with multiple retrievers + +By adding `explain: true` to the request, each retriever will now provide a detailed explanation of all the steps +and calculations required to compute the final score. Composability is fully supported in the context of `explain`, and +each retriever will provide its own explanation, as shown in the example below. + +[source,console] +---- +GET /retrievers_example/_search +{ + "retriever": { + "rrf": { + "retrievers": [ + { + "standard": { + "query": { + "term": { + "topic": "elastic" + } + } + } + }, + { + "rrf": { + "retrievers": [ + { + "standard": { + "query": { + "query_string": { + "query": "(information retrieval) OR (artificial intelligence)", + "default_field": "text" + } + } + } + }, + { + "knn": { + "field": "vector", + "query_vector": [ + 0.23, + 0.67, + 0.89 + ], + "k": 3, + "num_candidates": 5 + } + } + ], + "rank_window_size": 10, + "rank_constant": 1 + } + } + ], + "rank_window_size": 10, + "rank_constant": 1 + } + }, + "_source": false, + "size": 1, + "explain": true +} +---- +// TEST[continued] + +The output of which, albeit a bit verbose, will provide all the necessary info to assist in debugging and reason with ranking. + +.Example response +[%collapsible] +============== +[source, console-result] +---- +{ + "took": 42, + "timed_out": false, + "_shards": { + "total": 1, + "successful": 1, + "skipped": 0, + "failed": 0 + }, + "hits": { + "total": { + "value": 5, + "relation": "eq" + }, + "max_score": 0.5, + "hits": [ + { + "_shard": "[retrievers_example][0]", + "_node": "jnrdZFKS3abUgWVsVdj2Vg", + "_index": "retrievers_example", + "_id": "1", + "_score": 0.5, + "_explanation": { + "value": 0.5, + "description": "rrf score: [0.5] computed for initial ranks [0, 1] with rankConstant: [1] as sum of [1 / (rank + rankConstant)] for each query", + "details": [ + { + "value": 0.0, + "description": "rrf score: [0], result not found in query at index [0]", + "details": [] + }, + { + "value": 1, + "description": "rrf score: [0.5], for rank [1] in query at index [1] computed as [1 / (1 + 1)], for matching query with score", + "details": [ + { + "value": 0.8333334, + "description": "rrf score: [0.8333334] computed for initial ranks [2, 1] with rankConstant: [1] as sum of [1 / (rank + rankConstant)] for each query", + "details": [ + { + "value": 2, + "description": "rrf score: [0.33333334], for rank [2] in query at index [0] computed as [1 / (2 + 1)], for matching query with score", + "details": [ + { + "value": 2.8129659, + "description": "sum of:", + "details": [ + { + "value": 1.4064829, + "description": "weight(text:information in 0) [PerFieldSimilarity], result of:", + "details": [ + *** + ] + }, + { + "value": 1.4064829, + "description": "weight(text:retrieval in 0) [PerFieldSimilarity], result of:", + "details": [ + *** + ] + } + ] + } + ] + }, + { + "value": 1, + "description": "rrf score: [0.5], for rank [1] in query at index [1] computed as [1 / (1 + 1)], for matching query with score", + "details": [ + { + "value": 1, + "description": "doc [0] with an original score of [1.0] is at rank [1] from the following source queries.", + "details": [ + { + "value": 1.0, + "description": "found vector with calculated similarity: 1.0", + "details": [] + } + ] + } + ] + } + ] + } + ] + } + ] + } + } + ] + } +} +---- +// TESTRESPONSE[s/"took": 42/"took": $body.took/] +// TESTRESPONSE[s/\.\.\./$body.hits.hits.0._explanation.details.1.details.0.details.0.details.0.details.0.details.0/] +// TESTRESPONSE[s/\*\*\*/$body.hits.hits.0._explanation.details.1.details.0.details.0.details.0.details.1.details.0/] +// TESTRESPONSE[s/jnrdZFKS3abUgWVsVdj2Vg/$body.hits.hits.0._node/] +============== + +[discrete] +[[retrievers-examples-text-similarity-reranker-on-top-of-rrf]] +==== Example: Rerank results of an RRF retriever + +To demonstrate the full functionality of retrievers, the following examples also require access to a <> set up using the <>. + +In this example we'll set up a reranking service and use it with the `text_similarity_reranker` retriever to rerank our top results. + +[source,console] +---- +PUT _inference/rerank/my-rerank-model +{ + "service": "cohere", + "service_settings": { + "model_id": "rerank-english-v3.0", + "api_key": "{{COHERE_API_KEY}}" + } +} +---- +// TEST[skip: no_access_to_ml] + +Let's start by reranking the results of the `rrf` retriever in our previous example. + +[source,console] +---- +GET retrievers_example/_search +{ + "retriever": { + "text_similarity_reranker": { + "retriever": { + "rrf": { + "retrievers": [ + { + "standard": { + "query": { + "query_string": { + "query": "(information retrieval) OR (artificial intelligence)", + "default_field": "text" + } + } + } + }, + { + "knn": { + "field": "vector", + "query_vector": [ + 0.23, + 0.67, + 0.89 + ], + "k": 3, + "num_candidates": 5 + } + } + ], + "rank_window_size": 10, + "rank_constant": 1 + } + }, + "field": "text", + "inference_id": "my-rerank-model", + "inference_text": "What are the state of the art applications of AI in information retrieval?" + } + }, + "_source": false +} + +---- +// TEST[skip: no_access_to_ml] + +[discrete] +[[retrievers-examples-rrf-ranking-on-text-similarity-reranker-results]] +==== Example: RRF with semantic reranker + +For this example, we'll replace the rrf's `standard` retriever with the `text_similarity_reranker` retriever, using the +`my-rerank-model` reranker we previously configured. Since this is a reranker, it needs an initial pool of +documents to work with. In this case, we'll rerank the top `rank_window_size` documents matching the `ai` topic. + +[source,console] +---- +GET /retrievers_example/_search +{ + "retriever": { + "rrf": { + "retrievers": [ + { + "knn": { + "field": "vector", + "query_vector": [ + 0.23, + 0.67, + 0.89 + ], + "k": 3, + "num_candidates": 5 + } + }, + { + "text_similarity_reranker": { + "retriever": { + "standard": { + "query": { + "term": { + "topic": "ai" + } + } + } + }, + "field": "text", + "inference_id": "my-rerank-model", + "inference_text": "Can I use generative AI to identify user intent and improve search relevance?" + } + } + ], + "rank_window_size": 10, + "rank_constant": 1 + } + }, + "_source": false +} +---- +// TEST[skip: no_access_to_ml] + +[discrete] +[[retrievers-examples-chaining-text-similarity-reranker-retrievers]] +==== Example: Chaining multiple semantic rerankers + +Full composability means we can chain together multiple retrievers of the same type. For instance, +imagine we have a computationally expensive reranker that's specialized for AI content. We can rerank the results of a `text_similarity_reranker` using another `text_similarity_reranker` retriever. Each reranker can operate on different fields and/or use different inference services. + +[source,console] +---- +GET retrievers_example/_search +{ + "retriever": { + "text_similarity_reranker": { + "retriever": { + "text_similarity_reranker": { + "retriever": { + "knn": { + "field": "vector", + "query_vector": [ + 0.23, + 0.67, + 0.89 + ], + "k": 3, + "num_candidates": 5 + } + }, + "rank_window_size": 100, + "field": "text", + "inference_id": "my-rerank-model", + "inference_text": "What are the state of the art applications of AI in information retrieval?" + } + }, + "rank_window_size": 10, + "field": "text", + "inference_id": "my-other-more-expensive-rerank-model", + "inference_text": "Applications of Large Language Models in technology and their impact on user satisfaction" + } + }, + "_source": false +} +---- +// TEST[skip: no_access_to_ml] + +Note that our example applies two reranking steps. First, we rerank the top 100 +documents from the `knn` search using the `my-rerank-model` reranker. Then we +pick the top 10 results and rerank them using the more fine-grained +`my-other-more-expensive-rerank-model`. From 6417e0912f2876c00f4e3b970af84875f23cd943 Mon Sep 17 00:00:00 2001 From: Dimitris Rempapis Date: Fri, 29 Nov 2024 14:53:20 +0200 Subject: [PATCH 080/139] CrossClusterIT testCancel failure (#117750) Investigate and fix test failure --- docs/changelog/117750.yaml | 6 ++++++ .../java/org/elasticsearch/search/ccs/CrossClusterIT.java | 4 ++-- 2 files changed, 8 insertions(+), 2 deletions(-) create mode 100644 docs/changelog/117750.yaml diff --git a/docs/changelog/117750.yaml b/docs/changelog/117750.yaml new file mode 100644 index 0000000000000..3ba3f1693f4df --- /dev/null +++ b/docs/changelog/117750.yaml @@ -0,0 +1,6 @@ +pr: 117750 +summary: '`CrossClusterIT` `testCancel` failure' +area: Search +type: bug +issues: + - 108061 diff --git a/server/src/internalClusterTest/java/org/elasticsearch/search/ccs/CrossClusterIT.java b/server/src/internalClusterTest/java/org/elasticsearch/search/ccs/CrossClusterIT.java index 5d2d5c917415a..cb4d0681cdb23 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/search/ccs/CrossClusterIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/search/ccs/CrossClusterIT.java @@ -63,6 +63,7 @@ import java.util.List; import java.util.Map; import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicReference; @@ -189,7 +190,6 @@ public void testProxyConnectionDisconnect() throws Exception { } } - @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/108061") public void testCancel() throws Exception { assertAcked(client(LOCAL_CLUSTER).admin().indices().prepareCreate("demo")); indexDocs(client(LOCAL_CLUSTER), "demo"); @@ -307,7 +307,7 @@ public void testCancel() throws Exception { } }); - RuntimeException e = expectThrows(RuntimeException.class, () -> queryFuture.result()); + ExecutionException e = expectThrows(ExecutionException.class, () -> queryFuture.result()); assertNotNull(e); assertNotNull(e.getCause()); Throwable t = ExceptionsHelper.unwrap(e, TaskCancelledException.class); From e19f2b7fbb908228a9b53821e275b8ccb58e7029 Mon Sep 17 00:00:00 2001 From: Quentin Pradet Date: Fri, 29 Nov 2024 17:22:37 +0400 Subject: [PATCH 081/139] Remove unsupported async_search parameters from rest-api-spec (#117626) --- .../rest-api-spec/api/async_search.submit.json | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/async_search.submit.json b/rest-api-spec/src/main/resources/rest-api-spec/api/async_search.submit.json index a7a7ebe838eab..3de0dec85f547 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/api/async_search.submit.json +++ b/rest-api-spec/src/main/resources/rest-api-spec/api/async_search.submit.json @@ -43,11 +43,6 @@ "description":"Control whether the response should be stored in the cluster if it completed within the provided [wait_for_completion] time (default: false)", "default":false }, - "keep_alive": { - "type": "time", - "description": "Update the time interval in which the results (partial or final) for this search will be available", - "default": "5d" - }, "batched_reduce_size":{ "type":"number", "description":"The number of shard results that should be reduced at once on the coordinating node. This value should be used as the granularity at which progress results will be made available.", @@ -131,11 +126,6 @@ "type":"string", "description":"Specify the node or shard the operation should be performed on (default: random)" }, - "pre_filter_shard_size":{ - "type":"number", - "default": 1, - "description":"Cannot be changed: this is to enforce the execution of a pre-filter roundtrip to retrieve statistics from each shard so that the ones that surely don’t hold any document matching the query get skipped." - }, "rest_total_hits_as_int":{ "type":"boolean", "description":"Indicates whether hits.total should be rendered as an integer or an object in the rest search response", From 60ce74a7870a9e050ddac64900c3b35682e8e355 Mon Sep 17 00:00:00 2001 From: Tommaso Teofili Date: Fri, 29 Nov 2024 15:38:12 +0100 Subject: [PATCH 082/139] mute csv test for scoring in esql for mixed cluster (#117767) --- muted-tests.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/muted-tests.yml b/muted-tests.yml index 96631d15f374f..f5f6b84ab8639 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -222,6 +222,9 @@ tests: - class: "org.elasticsearch.xpack.esql.qa.single_node.EsqlSpecIT" method: "test {scoring.*}" issue: https://github.com/elastic/elasticsearch/issues/117641 +- class: "org.elasticsearch.xpack.esql.qa.mixed.MixedClusterEsqlSpecIT" + method: "test {scoring.*}" + issue: https://github.com/elastic/elasticsearch/issues/117641 - class: org.elasticsearch.xpack.inference.InferenceCrudIT method: testSupportedStream issue: https://github.com/elastic/elasticsearch/issues/117745 From 5f045c05811ffd30f480d08403e3139c9686d97b Mon Sep 17 00:00:00 2001 From: Jan Kuipers <148754765+jan-elastic@users.noreply.github.com> Date: Fri, 29 Nov 2024 16:20:39 +0100 Subject: [PATCH 083/139] One Categorize BlockHash (#117723) * Move all categorize blockhash code to one "CategorizeBlockHash". * close resources in case of failure --- .../AbstractCategorizeBlockHash.java | 132 -------- .../aggregation/blockhash/BlockHash.java | 4 +- .../blockhash/CategorizeBlockHash.java | 309 ++++++++++++++++++ .../blockhash/CategorizeRawBlockHash.java | 147 --------- .../CategorizedIntermediateBlockHash.java | 92 ------ .../blockhash/CategorizeBlockHashTests.java | 8 +- .../function/grouping/Categorize.java | 6 +- 7 files changed, 315 insertions(+), 383 deletions(-) delete mode 100644 x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/AbstractCategorizeBlockHash.java create mode 100644 x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/CategorizeBlockHash.java delete mode 100644 x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/CategorizeRawBlockHash.java delete mode 100644 x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/CategorizedIntermediateBlockHash.java diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/AbstractCategorizeBlockHash.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/AbstractCategorizeBlockHash.java deleted file mode 100644 index 0e89d77820883..0000000000000 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/AbstractCategorizeBlockHash.java +++ /dev/null @@ -1,132 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. - */ - -package org.elasticsearch.compute.aggregation.blockhash; - -import org.apache.lucene.util.BytesRefBuilder; -import org.elasticsearch.common.io.stream.BytesStreamOutput; -import org.elasticsearch.common.unit.ByteSizeValue; -import org.elasticsearch.common.util.BigArrays; -import org.elasticsearch.common.util.BitArray; -import org.elasticsearch.common.util.BytesRefHash; -import org.elasticsearch.compute.aggregation.SeenGroupIds; -import org.elasticsearch.compute.data.Block; -import org.elasticsearch.compute.data.BlockFactory; -import org.elasticsearch.compute.data.BytesRefBlock; -import org.elasticsearch.compute.data.BytesRefVector; -import org.elasticsearch.compute.data.IntBlock; -import org.elasticsearch.compute.data.IntVector; -import org.elasticsearch.compute.data.Page; -import org.elasticsearch.core.ReleasableIterator; -import org.elasticsearch.xpack.ml.aggs.categorization.CategorizationBytesRefHash; -import org.elasticsearch.xpack.ml.aggs.categorization.CategorizationPartOfSpeechDictionary; -import org.elasticsearch.xpack.ml.aggs.categorization.SerializableTokenListCategory; -import org.elasticsearch.xpack.ml.aggs.categorization.TokenListCategorizer; - -import java.io.IOException; - -/** - * Base BlockHash implementation for {@code Categorize} grouping function. - */ -public abstract class AbstractCategorizeBlockHash extends BlockHash { - protected static final int NULL_ORD = 0; - - // TODO: this should probably also take an emitBatchSize - private final int channel; - private final boolean outputPartial; - protected final TokenListCategorizer.CloseableTokenListCategorizer categorizer; - - /** - * Store whether we've seen any {@code null} values. - *

- * Null gets the {@link #NULL_ORD} ord. - *

- */ - protected boolean seenNull = false; - - AbstractCategorizeBlockHash(BlockFactory blockFactory, int channel, boolean outputPartial) { - super(blockFactory); - this.channel = channel; - this.outputPartial = outputPartial; - this.categorizer = new TokenListCategorizer.CloseableTokenListCategorizer( - new CategorizationBytesRefHash(new BytesRefHash(2048, blockFactory.bigArrays())), - CategorizationPartOfSpeechDictionary.getInstance(), - 0.70f - ); - } - - protected int channel() { - return channel; - } - - @Override - public Block[] getKeys() { - return new Block[] { outputPartial ? buildIntermediateBlock() : buildFinalBlock() }; - } - - @Override - public IntVector nonEmpty() { - return IntVector.range(seenNull ? 0 : 1, categorizer.getCategoryCount() + 1, blockFactory); - } - - @Override - public BitArray seenGroupIds(BigArrays bigArrays) { - return new SeenGroupIds.Range(seenNull ? 0 : 1, Math.toIntExact(categorizer.getCategoryCount() + 1)).seenGroupIds(bigArrays); - } - - @Override - public final ReleasableIterator lookup(Page page, ByteSizeValue targetBlockSize) { - throw new UnsupportedOperationException(); - } - - /** - * Serializes the intermediate state into a single BytesRef block, or an empty Null block if there are no categories. - */ - private Block buildIntermediateBlock() { - if (categorizer.getCategoryCount() == 0) { - return blockFactory.newConstantNullBlock(seenNull ? 1 : 0); - } - try (BytesStreamOutput out = new BytesStreamOutput()) { - // TODO be more careful here. - out.writeBoolean(seenNull); - out.writeVInt(categorizer.getCategoryCount()); - for (SerializableTokenListCategory category : categorizer.toCategoriesById()) { - category.writeTo(out); - } - // We're returning a block with N positions just because the Page must have all blocks with the same position count! - int positionCount = categorizer.getCategoryCount() + (seenNull ? 1 : 0); - return blockFactory.newConstantBytesRefBlockWith(out.bytes().toBytesRef(), positionCount); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - private Block buildFinalBlock() { - BytesRefBuilder scratch = new BytesRefBuilder(); - - if (seenNull) { - try (BytesRefBlock.Builder result = blockFactory.newBytesRefBlockBuilder(categorizer.getCategoryCount())) { - result.appendNull(); - for (SerializableTokenListCategory category : categorizer.toCategoriesById()) { - scratch.copyChars(category.getRegex()); - result.appendBytesRef(scratch.get()); - scratch.clear(); - } - return result.build(); - } - } - - try (BytesRefVector.Builder result = blockFactory.newBytesRefVectorBuilder(categorizer.getCategoryCount())) { - for (SerializableTokenListCategory category : categorizer.toCategoriesById()) { - scratch.copyChars(category.getRegex()); - result.appendBytesRef(scratch.get()); - scratch.clear(); - } - return result.build().asBlock(); - } - } -} diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/BlockHash.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/BlockHash.java index ea76c3bd0a0aa..30afa7ae3128d 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/BlockHash.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/BlockHash.java @@ -180,9 +180,7 @@ public static BlockHash buildCategorizeBlockHash( throw new IllegalArgumentException("only a single CATEGORIZE group can used"); } - return aggregatorMode.isInputPartial() - ? new CategorizedIntermediateBlockHash(groups.get(0).channel, blockFactory, aggregatorMode.isOutputPartial()) - : new CategorizeRawBlockHash(groups.get(0).channel, blockFactory, aggregatorMode.isOutputPartial(), analysisRegistry); + return new CategorizeBlockHash(blockFactory, groups.get(0).channel, aggregatorMode, analysisRegistry); } /** diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/CategorizeBlockHash.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/CategorizeBlockHash.java new file mode 100644 index 0000000000000..35c6faf84e623 --- /dev/null +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/CategorizeBlockHash.java @@ -0,0 +1,309 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.aggregation.blockhash; + +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.BytesRefBuilder; +import org.elasticsearch.common.bytes.BytesArray; +import org.elasticsearch.common.io.stream.BytesStreamOutput; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.unit.ByteSizeValue; +import org.elasticsearch.common.util.BigArrays; +import org.elasticsearch.common.util.BitArray; +import org.elasticsearch.common.util.BytesRefHash; +import org.elasticsearch.compute.aggregation.AggregatorMode; +import org.elasticsearch.compute.aggregation.GroupingAggregatorFunction; +import org.elasticsearch.compute.aggregation.SeenGroupIds; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.BlockFactory; +import org.elasticsearch.compute.data.BytesRefBlock; +import org.elasticsearch.compute.data.BytesRefVector; +import org.elasticsearch.compute.data.IntBlock; +import org.elasticsearch.compute.data.IntVector; +import org.elasticsearch.compute.data.Page; +import org.elasticsearch.core.Releasable; +import org.elasticsearch.core.ReleasableIterator; +import org.elasticsearch.core.Releasables; +import org.elasticsearch.index.analysis.AnalysisRegistry; +import org.elasticsearch.xpack.core.ml.job.config.CategorizationAnalyzerConfig; +import org.elasticsearch.xpack.ml.aggs.categorization.CategorizationBytesRefHash; +import org.elasticsearch.xpack.ml.aggs.categorization.CategorizationPartOfSpeechDictionary; +import org.elasticsearch.xpack.ml.aggs.categorization.SerializableTokenListCategory; +import org.elasticsearch.xpack.ml.aggs.categorization.TokenListCategorizer; +import org.elasticsearch.xpack.ml.job.categorization.CategorizationAnalyzer; + +import java.io.IOException; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; + +/** + * Base BlockHash implementation for {@code Categorize} grouping function. + */ +public class CategorizeBlockHash extends BlockHash { + + private static final CategorizationAnalyzerConfig ANALYZER_CONFIG = CategorizationAnalyzerConfig.buildStandardCategorizationAnalyzer( + List.of() + ); + private static final int NULL_ORD = 0; + + // TODO: this should probably also take an emitBatchSize + private final int channel; + private final AggregatorMode aggregatorMode; + private final TokenListCategorizer.CloseableTokenListCategorizer categorizer; + + private final CategorizeEvaluator evaluator; + + /** + * Store whether we've seen any {@code null} values. + *

+ * Null gets the {@link #NULL_ORD} ord. + *

+ */ + private boolean seenNull = false; + + CategorizeBlockHash(BlockFactory blockFactory, int channel, AggregatorMode aggregatorMode, AnalysisRegistry analysisRegistry) { + super(blockFactory); + + this.channel = channel; + this.aggregatorMode = aggregatorMode; + + this.categorizer = new TokenListCategorizer.CloseableTokenListCategorizer( + new CategorizationBytesRefHash(new BytesRefHash(2048, blockFactory.bigArrays())), + CategorizationPartOfSpeechDictionary.getInstance(), + 0.70f + ); + + if (aggregatorMode.isInputPartial() == false) { + CategorizationAnalyzer analyzer; + try { + Objects.requireNonNull(analysisRegistry); + analyzer = new CategorizationAnalyzer(analysisRegistry, ANALYZER_CONFIG); + } catch (Exception e) { + categorizer.close(); + throw new RuntimeException(e); + } + this.evaluator = new CategorizeEvaluator(analyzer); + } else { + this.evaluator = null; + } + } + + @Override + public void add(Page page, GroupingAggregatorFunction.AddInput addInput) { + if (aggregatorMode.isInputPartial() == false) { + addInitial(page, addInput); + } else { + addIntermediate(page, addInput); + } + } + + @Override + public Block[] getKeys() { + return new Block[] { aggregatorMode.isOutputPartial() ? buildIntermediateBlock() : buildFinalBlock() }; + } + + @Override + public IntVector nonEmpty() { + return IntVector.range(seenNull ? 0 : 1, categorizer.getCategoryCount() + 1, blockFactory); + } + + @Override + public BitArray seenGroupIds(BigArrays bigArrays) { + return new SeenGroupIds.Range(seenNull ? 0 : 1, Math.toIntExact(categorizer.getCategoryCount() + 1)).seenGroupIds(bigArrays); + } + + @Override + public final ReleasableIterator lookup(Page page, ByteSizeValue targetBlockSize) { + throw new UnsupportedOperationException(); + } + + @Override + public void close() { + Releasables.close(evaluator, categorizer); + } + + /** + * Adds initial (raw) input to the state. + */ + private void addInitial(Page page, GroupingAggregatorFunction.AddInput addInput) { + try (IntBlock result = (IntBlock) evaluator.eval(page.getBlock(channel))) { + addInput.add(0, result); + } + } + + /** + * Adds intermediate state to the state. + */ + private void addIntermediate(Page page, GroupingAggregatorFunction.AddInput addInput) { + if (page.getPositionCount() == 0) { + return; + } + BytesRefBlock categorizerState = page.getBlock(channel); + if (categorizerState.areAllValuesNull()) { + seenNull = true; + try (var newIds = blockFactory.newConstantIntVector(NULL_ORD, 1)) { + addInput.add(0, newIds); + } + return; + } + + Map idMap = readIntermediate(categorizerState.getBytesRef(0, new BytesRef())); + try (IntBlock.Builder newIdsBuilder = blockFactory.newIntBlockBuilder(idMap.size())) { + int fromId = idMap.containsKey(0) ? 0 : 1; + int toId = fromId + idMap.size(); + for (int i = fromId; i < toId; i++) { + newIdsBuilder.appendInt(idMap.get(i)); + } + try (IntBlock newIds = newIdsBuilder.build()) { + addInput.add(0, newIds); + } + } + } + + /** + * Read intermediate state from a block. + * + * @return a map from the old category id to the new one. The old ids go from 0 to {@code size - 1}. + */ + private Map readIntermediate(BytesRef bytes) { + Map idMap = new HashMap<>(); + try (StreamInput in = new BytesArray(bytes).streamInput()) { + if (in.readBoolean()) { + seenNull = true; + idMap.put(NULL_ORD, NULL_ORD); + } + int count = in.readVInt(); + for (int oldCategoryId = 0; oldCategoryId < count; oldCategoryId++) { + int newCategoryId = categorizer.mergeWireCategory(new SerializableTokenListCategory(in)).getId(); + // +1 because the 0 ordinal is reserved for null + idMap.put(oldCategoryId + 1, newCategoryId + 1); + } + return idMap; + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + /** + * Serializes the intermediate state into a single BytesRef block, or an empty Null block if there are no categories. + */ + private Block buildIntermediateBlock() { + if (categorizer.getCategoryCount() == 0) { + return blockFactory.newConstantNullBlock(seenNull ? 1 : 0); + } + try (BytesStreamOutput out = new BytesStreamOutput()) { + out.writeBoolean(seenNull); + out.writeVInt(categorizer.getCategoryCount()); + for (SerializableTokenListCategory category : categorizer.toCategoriesById()) { + category.writeTo(out); + } + // We're returning a block with N positions just because the Page must have all blocks with the same position count! + int positionCount = categorizer.getCategoryCount() + (seenNull ? 1 : 0); + return blockFactory.newConstantBytesRefBlockWith(out.bytes().toBytesRef(), positionCount); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + private Block buildFinalBlock() { + BytesRefBuilder scratch = new BytesRefBuilder(); + + if (seenNull) { + try (BytesRefBlock.Builder result = blockFactory.newBytesRefBlockBuilder(categorizer.getCategoryCount())) { + result.appendNull(); + for (SerializableTokenListCategory category : categorizer.toCategoriesById()) { + scratch.copyChars(category.getRegex()); + result.appendBytesRef(scratch.get()); + scratch.clear(); + } + return result.build(); + } + } + + try (BytesRefVector.Builder result = blockFactory.newBytesRefVectorBuilder(categorizer.getCategoryCount())) { + for (SerializableTokenListCategory category : categorizer.toCategoriesById()) { + scratch.copyChars(category.getRegex()); + result.appendBytesRef(scratch.get()); + scratch.clear(); + } + return result.build().asBlock(); + } + } + + /** + * Similar implementation to an Evaluator. + */ + private final class CategorizeEvaluator implements Releasable { + private final CategorizationAnalyzer analyzer; + + CategorizeEvaluator(CategorizationAnalyzer analyzer) { + this.analyzer = analyzer; + } + + Block eval(BytesRefBlock vBlock) { + BytesRefVector vVector = vBlock.asVector(); + if (vVector == null) { + return eval(vBlock.getPositionCount(), vBlock); + } + IntVector vector = eval(vBlock.getPositionCount(), vVector); + return vector.asBlock(); + } + + IntBlock eval(int positionCount, BytesRefBlock vBlock) { + try (IntBlock.Builder result = blockFactory.newIntBlockBuilder(positionCount)) { + BytesRef vScratch = new BytesRef(); + for (int p = 0; p < positionCount; p++) { + if (vBlock.isNull(p)) { + seenNull = true; + result.appendInt(NULL_ORD); + continue; + } + int first = vBlock.getFirstValueIndex(p); + int count = vBlock.getValueCount(p); + if (count == 1) { + result.appendInt(process(vBlock.getBytesRef(first, vScratch))); + continue; + } + int end = first + count; + result.beginPositionEntry(); + for (int i = first; i < end; i++) { + result.appendInt(process(vBlock.getBytesRef(i, vScratch))); + } + result.endPositionEntry(); + } + return result.build(); + } + } + + IntVector eval(int positionCount, BytesRefVector vVector) { + try (IntVector.FixedBuilder result = blockFactory.newIntVectorFixedBuilder(positionCount)) { + BytesRef vScratch = new BytesRef(); + for (int p = 0; p < positionCount; p++) { + result.appendInt(p, process(vVector.getBytesRef(p, vScratch))); + } + return result.build(); + } + } + + int process(BytesRef v) { + var category = categorizer.computeCategory(v.utf8ToString(), analyzer); + if (category == null) { + seenNull = true; + return NULL_ORD; + } + return category.getId() + 1; + } + + @Override + public void close() { + analyzer.close(); + } + } +} diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/CategorizeRawBlockHash.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/CategorizeRawBlockHash.java deleted file mode 100644 index 47dd7f650dffa..0000000000000 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/CategorizeRawBlockHash.java +++ /dev/null @@ -1,147 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. - */ - -package org.elasticsearch.compute.aggregation.blockhash; - -import org.apache.lucene.util.BytesRef; -import org.elasticsearch.compute.aggregation.GroupingAggregatorFunction; -import org.elasticsearch.compute.data.Block; -import org.elasticsearch.compute.data.BlockFactory; -import org.elasticsearch.compute.data.BytesRefBlock; -import org.elasticsearch.compute.data.BytesRefVector; -import org.elasticsearch.compute.data.IntBlock; -import org.elasticsearch.compute.data.IntVector; -import org.elasticsearch.compute.data.Page; -import org.elasticsearch.core.Releasable; -import org.elasticsearch.core.Releasables; -import org.elasticsearch.index.analysis.AnalysisRegistry; -import org.elasticsearch.xpack.core.ml.job.config.CategorizationAnalyzerConfig; -import org.elasticsearch.xpack.ml.aggs.categorization.TokenListCategorizer; -import org.elasticsearch.xpack.ml.job.categorization.CategorizationAnalyzer; - -import java.io.IOException; -import java.util.List; - -/** - * BlockHash implementation for {@code Categorize} grouping function. - *

- * This implementation expects rows, and can't deserialize intermediate states coming from other nodes. - *

- */ -public class CategorizeRawBlockHash extends AbstractCategorizeBlockHash { - private static final CategorizationAnalyzerConfig ANALYZER_CONFIG = CategorizationAnalyzerConfig.buildStandardCategorizationAnalyzer( - List.of() - ); - - private final CategorizeEvaluator evaluator; - - CategorizeRawBlockHash(int channel, BlockFactory blockFactory, boolean outputPartial, AnalysisRegistry analysisRegistry) { - super(blockFactory, channel, outputPartial); - - CategorizationAnalyzer analyzer; - try { - analyzer = new CategorizationAnalyzer(analysisRegistry, ANALYZER_CONFIG); - } catch (IOException e) { - categorizer.close(); - throw new RuntimeException(e); - } - - this.evaluator = new CategorizeEvaluator(analyzer, categorizer, blockFactory); - } - - @Override - public void add(Page page, GroupingAggregatorFunction.AddInput addInput) { - try (IntBlock result = (IntBlock) evaluator.eval(page.getBlock(channel()))) { - addInput.add(0, result); - } - } - - @Override - public void close() { - evaluator.close(); - } - - /** - * Similar implementation to an Evaluator. - */ - public final class CategorizeEvaluator implements Releasable { - private final CategorizationAnalyzer analyzer; - - private final TokenListCategorizer.CloseableTokenListCategorizer categorizer; - - private final BlockFactory blockFactory; - - public CategorizeEvaluator( - CategorizationAnalyzer analyzer, - TokenListCategorizer.CloseableTokenListCategorizer categorizer, - BlockFactory blockFactory - ) { - this.analyzer = analyzer; - this.categorizer = categorizer; - this.blockFactory = blockFactory; - } - - public Block eval(BytesRefBlock vBlock) { - BytesRefVector vVector = vBlock.asVector(); - if (vVector == null) { - return eval(vBlock.getPositionCount(), vBlock); - } - IntVector vector = eval(vBlock.getPositionCount(), vVector); - return vector.asBlock(); - } - - public IntBlock eval(int positionCount, BytesRefBlock vBlock) { - try (IntBlock.Builder result = blockFactory.newIntBlockBuilder(positionCount)) { - BytesRef vScratch = new BytesRef(); - for (int p = 0; p < positionCount; p++) { - if (vBlock.isNull(p)) { - seenNull = true; - result.appendInt(NULL_ORD); - continue; - } - int first = vBlock.getFirstValueIndex(p); - int count = vBlock.getValueCount(p); - if (count == 1) { - result.appendInt(process(vBlock.getBytesRef(first, vScratch))); - continue; - } - int end = first + count; - result.beginPositionEntry(); - for (int i = first; i < end; i++) { - result.appendInt(process(vBlock.getBytesRef(i, vScratch))); - } - result.endPositionEntry(); - } - return result.build(); - } - } - - public IntVector eval(int positionCount, BytesRefVector vVector) { - try (IntVector.FixedBuilder result = blockFactory.newIntVectorFixedBuilder(positionCount)) { - BytesRef vScratch = new BytesRef(); - for (int p = 0; p < positionCount; p++) { - result.appendInt(p, process(vVector.getBytesRef(p, vScratch))); - } - return result.build(); - } - } - - private int process(BytesRef v) { - var category = categorizer.computeCategory(v.utf8ToString(), analyzer); - if (category == null) { - seenNull = true; - return NULL_ORD; - } - return category.getId() + 1; - } - - @Override - public void close() { - Releasables.closeExpectNoException(analyzer, categorizer); - } - } -} diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/CategorizedIntermediateBlockHash.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/CategorizedIntermediateBlockHash.java deleted file mode 100644 index c774d3b26049d..0000000000000 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/blockhash/CategorizedIntermediateBlockHash.java +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. - */ - -package org.elasticsearch.compute.aggregation.blockhash; - -import org.apache.lucene.util.BytesRef; -import org.elasticsearch.common.bytes.BytesArray; -import org.elasticsearch.common.io.stream.StreamInput; -import org.elasticsearch.compute.aggregation.GroupingAggregatorFunction; -import org.elasticsearch.compute.data.BlockFactory; -import org.elasticsearch.compute.data.BytesRefBlock; -import org.elasticsearch.compute.data.IntBlock; -import org.elasticsearch.compute.data.Page; -import org.elasticsearch.xpack.ml.aggs.categorization.SerializableTokenListCategory; - -import java.io.IOException; -import java.util.HashMap; -import java.util.Map; - -/** - * BlockHash implementation for {@code Categorize} grouping function. - *

- * This implementation expects a single intermediate state in a block, as generated by {@link AbstractCategorizeBlockHash}. - *

- */ -public class CategorizedIntermediateBlockHash extends AbstractCategorizeBlockHash { - - CategorizedIntermediateBlockHash(int channel, BlockFactory blockFactory, boolean outputPartial) { - super(blockFactory, channel, outputPartial); - } - - @Override - public void add(Page page, GroupingAggregatorFunction.AddInput addInput) { - if (page.getPositionCount() == 0) { - // No categories - return; - } - BytesRefBlock categorizerState = page.getBlock(channel()); - if (categorizerState.areAllValuesNull()) { - seenNull = true; - try (var newIds = blockFactory.newConstantIntVector(NULL_ORD, 1)) { - addInput.add(0, newIds); - } - return; - } - - Map idMap = readIntermediate(categorizerState.getBytesRef(0, new BytesRef())); - try (IntBlock.Builder newIdsBuilder = blockFactory.newIntBlockBuilder(idMap.size())) { - int fromId = idMap.containsKey(0) ? 0 : 1; - int toId = fromId + idMap.size(); - for (int i = fromId; i < toId; i++) { - newIdsBuilder.appendInt(idMap.get(i)); - } - try (IntBlock newIds = newIdsBuilder.build()) { - addInput.add(0, newIds); - } - } - } - - /** - * Read intermediate state from a block. - * - * @return a map from the old category id to the new one. The old ids go from 0 to {@code size - 1}. - */ - private Map readIntermediate(BytesRef bytes) { - Map idMap = new HashMap<>(); - try (StreamInput in = new BytesArray(bytes).streamInput()) { - if (in.readBoolean()) { - seenNull = true; - idMap.put(NULL_ORD, NULL_ORD); - } - int count = in.readVInt(); - for (int oldCategoryId = 0; oldCategoryId < count; oldCategoryId++) { - int newCategoryId = categorizer.mergeWireCategory(new SerializableTokenListCategory(in)).getId(); - // +1 because the 0 ordinal is reserved for null - idMap.put(oldCategoryId + 1, newCategoryId + 1); - } - return idMap; - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - @Override - public void close() { - categorizer.close(); - } -} diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/blockhash/CategorizeBlockHashTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/blockhash/CategorizeBlockHashTests.java index 8a3c723557151..3c47e85a4a9c8 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/blockhash/CategorizeBlockHashTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/aggregation/blockhash/CategorizeBlockHashTests.java @@ -95,7 +95,7 @@ public void testCategorizeRaw() { page = new Page(builder.build()); } - try (BlockHash hash = new CategorizeRawBlockHash(0, blockFactory, true, analysisRegistry)) { + try (BlockHash hash = new CategorizeBlockHash(blockFactory, 0, AggregatorMode.INITIAL, analysisRegistry)) { hash.add(page, new GroupingAggregatorFunction.AddInput() { @Override public void add(int positionOffset, IntBlock groupIds) { @@ -168,8 +168,8 @@ public void testCategorizeIntermediate() { // Fill intermediatePages with the intermediate state from the raw hashes try ( - BlockHash rawHash1 = new CategorizeRawBlockHash(0, blockFactory, true, analysisRegistry); - BlockHash rawHash2 = new CategorizeRawBlockHash(0, blockFactory, true, analysisRegistry); + BlockHash rawHash1 = new CategorizeBlockHash(blockFactory, 0, AggregatorMode.INITIAL, analysisRegistry); + BlockHash rawHash2 = new CategorizeBlockHash(blockFactory, 0, AggregatorMode.INITIAL, analysisRegistry); ) { rawHash1.add(page1, new GroupingAggregatorFunction.AddInput() { @Override @@ -226,7 +226,7 @@ public void close() { page2.releaseBlocks(); } - try (BlockHash intermediateHash = new CategorizedIntermediateBlockHash(0, blockFactory, true)) { + try (BlockHash intermediateHash = new CategorizeBlockHash(blockFactory, 0, AggregatorMode.INTERMEDIATE, null)) { intermediateHash.add(intermediatePage1, new GroupingAggregatorFunction.AddInput() { @Override public void add(int positionOffset, IntBlock groupIds) { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/Categorize.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/Categorize.java index 31b603ecef889..63b5073c2217a 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/Categorize.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/Categorize.java @@ -32,12 +32,8 @@ * This function has no evaluators, as it works like an aggregation (Accumulates values, stores intermediate states, etc). *

*

- * For the implementation, see: + * For the implementation, see {@link org.elasticsearch.compute.aggregation.blockhash.CategorizeBlockHash} *

- *
    - *
  • {@link org.elasticsearch.compute.aggregation.blockhash.CategorizedIntermediateBlockHash}
  • - *
  • {@link org.elasticsearch.compute.aggregation.blockhash.CategorizeRawBlockHash}
  • - *
*/ public class Categorize extends GroupingFunction implements Validatable { public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry( From 64107e0a0b032c0ee1ed319f0d6bfefce23def9a Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Fri, 29 Nov 2024 17:34:05 +0100 Subject: [PATCH 084/139] Compute output of LookupJoinExec dynamically (#117763) LookupJoinExec should not assume its output but instead compute it from - Its input fields from the left - The fields added from the lookup index Currently, LookupJoinExec's output is determined when the logical plan is mapped to a physical one, and thereafter the output cannot be changed anymore. This makes it impossible to have late materialization of fields from the left hand side via field extractions, because we are forced to extract *all* fields before the LookupJoinExec, otherwise we do not achieve the prescribed output. Avoid that by tracking only which fields the LookupJoinExec will add from the lookup index instead of tracking the whole output (that was only correct for the logical plan). **Note:** While this PR is a refactoring for the current functionality, it should unblock @craigtaverner 's ongoing work related to field extractions and getting multiple LOOKUP JOIN queries to work correctly without adding hacks. --- .../xpack/esql/ccq/MultiClusterSpecIT.java | 4 +- .../src/main/resources/lookup-join.csv-spec | 10 +-- .../xpack/esql/action/EsqlCapabilities.java | 2 +- .../optimizer/PhysicalOptimizerRules.java | 32 --------- .../physical/local/InsertFieldExtraction.java | 4 +- .../xpack/esql/plan/logical/join/Join.java | 31 +++++---- .../esql/plan/physical/LookupJoinExec.java | 65 ++++++++----------- .../esql/planner/LocalExecutionPlanner.java | 6 +- .../esql/planner/mapper/LocalMapper.java | 10 +-- .../xpack/esql/planner/mapper/Mapper.java | 10 +-- .../elasticsearch/xpack/esql/CsvTests.java | 2 +- 11 files changed, 60 insertions(+), 116 deletions(-) diff --git a/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClusterSpecIT.java b/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClusterSpecIT.java index 8f4522573f880..af5eadc7358a2 100644 --- a/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClusterSpecIT.java +++ b/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClusterSpecIT.java @@ -47,7 +47,7 @@ import static org.elasticsearch.xpack.esql.EsqlTestUtils.classpathResources; import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.INLINESTATS; import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.INLINESTATS_V2; -import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.JOIN_LOOKUP_V2; +import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.JOIN_LOOKUP_V3; import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.JOIN_PLANNING_V1; import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.METADATA_FIELDS_REMOTE_TEST; import static org.elasticsearch.xpack.esql.qa.rest.EsqlSpecTestCase.Mode.SYNC; @@ -125,7 +125,7 @@ protected void shouldSkipTest(String testName) throws IOException { assumeFalse("INLINESTATS not yet supported in CCS", testCase.requiredCapabilities.contains(INLINESTATS.capabilityName())); assumeFalse("INLINESTATS not yet supported in CCS", testCase.requiredCapabilities.contains(INLINESTATS_V2.capabilityName())); assumeFalse("INLINESTATS not yet supported in CCS", testCase.requiredCapabilities.contains(JOIN_PLANNING_V1.capabilityName())); - assumeFalse("LOOKUP JOIN not yet supported in CCS", testCase.requiredCapabilities.contains(JOIN_LOOKUP_V2.capabilityName())); + assumeFalse("LOOKUP JOIN not yet supported in CCS", testCase.requiredCapabilities.contains(JOIN_LOOKUP_V3.capabilityName())); } private TestFeatureService remoteFeaturesService() throws IOException { diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec index 11786fb905c60..5de353978b307 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec @@ -5,7 +5,7 @@ //TODO: this sometimes returns null instead of the looked up value (likely related to the execution order) basicOnTheDataNode-Ignore -required_capability: join_lookup_v2 +required_capability: join_lookup_v3 FROM employees | EVAL language_code = languages @@ -22,7 +22,7 @@ emp_no:integer | language_code:integer | language_name:keyword ; basicRow-Ignore -required_capability: join_lookup +required_capability: join_lookup_v3 ROW language_code = 1 | LOOKUP JOIN languages_lookup ON language_code @@ -33,7 +33,7 @@ language_code:keyword | language_name:keyword ; basicOnTheCoordinator -required_capability: join_lookup_v2 +required_capability: join_lookup_v3 FROM employees | SORT emp_no @@ -51,7 +51,7 @@ emp_no:integer | language_code:integer | language_name:keyword //TODO: this sometimes returns null instead of the looked up value (likely related to the execution order) subsequentEvalOnTheDataNode-Ignore -required_capability: join_lookup_v2 +required_capability: join_lookup_v3 FROM employees | EVAL language_code = languages @@ -69,7 +69,7 @@ emp_no:integer | language_code:integer | language_name:keyword | language_code_x ; subsequentEvalOnTheCoordinator -required_capability: join_lookup_v2 +required_capability: join_lookup_v3 FROM employees | SORT emp_no diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index 373be23cdf847..dc3329a906741 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -521,7 +521,7 @@ public enum Cap { /** * LOOKUP JOIN */ - JOIN_LOOKUP_V2(Build.current().isSnapshot()), + JOIN_LOOKUP_V3(Build.current().isSnapshot()), /** * Fix for https://github.com/elastic/elasticsearch/issues/117054 diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/PhysicalOptimizerRules.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/PhysicalOptimizerRules.java index 482a89b50c865..ee192c2420da8 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/PhysicalOptimizerRules.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/PhysicalOptimizerRules.java @@ -7,8 +7,6 @@ package org.elasticsearch.xpack.esql.optimizer; -import org.elasticsearch.xpack.esql.core.expression.Expression; -import org.elasticsearch.xpack.esql.core.util.ReflectionUtils; import org.elasticsearch.xpack.esql.optimizer.rules.logical.OptimizerRules.TransformDirection; import org.elasticsearch.xpack.esql.plan.physical.PhysicalPlan; import org.elasticsearch.xpack.esql.rule.ParameterizedRule; @@ -62,34 +60,4 @@ public final PhysicalPlan apply(PhysicalPlan plan) { protected abstract PhysicalPlan rule(SubPlan plan); } - - public abstract static class OptimizerExpressionRule extends Rule { - - private final TransformDirection direction; - // overriding type token which returns the correct class but does an uncheck cast to LogicalPlan due to its generic bound - // a proper solution is to wrap the Expression rule into a Plan rule but that would affect the rule declaration - // so instead this is hacked here - private final Class expressionTypeToken = ReflectionUtils.detectSuperTypeForRuleLike(getClass()); - - public OptimizerExpressionRule(TransformDirection direction) { - this.direction = direction; - } - - @Override - public final PhysicalPlan apply(PhysicalPlan plan) { - return direction == TransformDirection.DOWN - ? plan.transformExpressionsDown(expressionTypeToken, this::rule) - : plan.transformExpressionsUp(expressionTypeToken, this::rule); - } - - protected PhysicalPlan rule(PhysicalPlan plan) { - return plan; - } - - protected abstract Expression rule(E e); - - public Class expressionToken() { - return expressionTypeToken; - } - } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/InsertFieldExtraction.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/InsertFieldExtraction.java index 72573821dfeb8..cafe3726f92ac 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/InsertFieldExtraction.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/InsertFieldExtraction.java @@ -104,15 +104,15 @@ private static Set missingAttributes(PhysicalPlan p) { var missing = new LinkedHashSet(); var inputSet = p.inputSet(); - // FIXME: the extractors should work on the right side as well + // TODO: We need to extract whatever fields are missing from the left hand side. // skip the lookup join since the right side is always materialized and a projection if (p instanceof LookupJoinExec join) { - // collect fields used in the join condition return Collections.emptySet(); } var input = inputSet; // collect field attributes used inside expressions + // TODO: Rather than going over all expressions manually, this should just call .references() p.forEachExpression(TypedAttribute.class, f -> { if (f instanceof FieldAttribute || f instanceof MetadataAttribute) { if (input.contains(f) == false) { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/join/Join.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/join/Join.java index dd6b3ea3455f7..6af29fb23b3bb 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/join/Join.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/join/Join.java @@ -11,7 +11,7 @@ import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.xpack.esql.core.expression.Attribute; -import org.elasticsearch.xpack.esql.core.expression.NamedExpression; +import org.elasticsearch.xpack.esql.core.expression.AttributeSet; import org.elasticsearch.xpack.esql.core.expression.ReferenceAttribute; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; import org.elasticsearch.xpack.esql.core.tree.Source; @@ -23,12 +23,9 @@ import java.util.ArrayList; import java.util.List; import java.util.Objects; -import java.util.Set; -import java.util.stream.Collectors; import static org.elasticsearch.xpack.esql.expression.NamedExpressions.mergeOutputAttributes; import static org.elasticsearch.xpack.esql.plan.logical.join.JoinTypes.LEFT; -import static org.elasticsearch.xpack.esql.plan.logical.join.JoinTypes.RIGHT; public class Join extends BinaryPlan { public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(LogicalPlan.class, "Join", Join::new); @@ -100,6 +97,19 @@ public List output() { return lazyOutput; } + public List rightOutputFields() { + AttributeSet leftInputs = left().outputSet(); + + List rightOutputFields = new ArrayList<>(); + for (Attribute attr : output()) { + if (leftInputs.contains(attr) == false) { + rightOutputFields.add(attr); + } + } + + return rightOutputFields; + } + /** * Combine the two lists of attributes into one. * In case of (name) conflicts, specify which sides wins, that is overrides the other column - the left or the right. @@ -108,18 +118,11 @@ public static List computeOutput(List leftOutput, List output; // TODO: make the other side nullable - Set matchFieldNames = config.matchFields().stream().map(NamedExpression::name).collect(Collectors.toSet()); if (LEFT.equals(joinType)) { - // right side becomes nullable and overrides left except for match fields, which we preserve from the left - List rightOutputWithoutMatchFields = rightOutput.stream() - .filter(attr -> matchFieldNames.contains(attr.name()) == false) - .toList(); + // right side becomes nullable and overrides left except for join keys, which we preserve from the left + AttributeSet rightKeys = new AttributeSet(config.rightFields()); + List rightOutputWithoutMatchFields = rightOutput.stream().filter(attr -> rightKeys.contains(attr) == false).toList(); output = mergeOutputAttributes(rightOutputWithoutMatchFields, leftOutput); - } else if (RIGHT.equals(joinType)) { - List leftOutputWithoutMatchFields = leftOutput.stream() - .filter(attr -> matchFieldNames.contains(attr.name()) == false) - .toList(); - output = mergeOutputAttributes(leftOutputWithoutMatchFields, rightOutput); } else { throw new IllegalArgumentException(joinType.joinName() + " unsupported"); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/LookupJoinExec.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/LookupJoinExec.java index e01451ceaecac..2d3caa27da4cd 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/LookupJoinExec.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/LookupJoinExec.java @@ -19,7 +19,6 @@ import java.io.IOException; import java.util.ArrayList; -import java.util.Comparator; import java.util.List; import java.util.Objects; @@ -30,43 +29,43 @@ public class LookupJoinExec extends BinaryExec implements EstimatesRowSize { LookupJoinExec::new ); - private final List matchFields; private final List leftFields; private final List rightFields; - private final List output; - private List lazyAddedFields; + /** + * These cannot be computed from the left + right outputs, because + * {@link org.elasticsearch.xpack.esql.optimizer.rules.physical.local.ReplaceSourceAttributes} will replace the {@link EsSourceExec} on + * the right hand side by a {@link EsQueryExec}, and thus lose the information of which fields we'll get from the lookup index. + */ + private final List addedFields; + private List lazyOutput; public LookupJoinExec( Source source, PhysicalPlan left, PhysicalPlan lookup, - List matchFields, List leftFields, List rightFields, - List output + List addedFields ) { super(source, left, lookup); - this.matchFields = matchFields; this.leftFields = leftFields; this.rightFields = rightFields; - this.output = output; + this.addedFields = addedFields; } private LookupJoinExec(StreamInput in) throws IOException { super(Source.readFrom((PlanStreamInput) in), in.readNamedWriteable(PhysicalPlan.class), in.readNamedWriteable(PhysicalPlan.class)); - this.matchFields = in.readNamedWriteableCollectionAsList(Attribute.class); this.leftFields = in.readNamedWriteableCollectionAsList(Attribute.class); this.rightFields = in.readNamedWriteableCollectionAsList(Attribute.class); - this.output = in.readNamedWriteableCollectionAsList(Attribute.class); + this.addedFields = in.readNamedWriteableCollectionAsList(Attribute.class); } @Override public void writeTo(StreamOutput out) throws IOException { super.writeTo(out); - out.writeNamedWriteableCollection(matchFields); out.writeNamedWriteableCollection(leftFields); out.writeNamedWriteableCollection(rightFields); - out.writeNamedWriteableCollection(output); + out.writeNamedWriteableCollection(addedFields); } @Override @@ -78,10 +77,6 @@ public PhysicalPlan lookup() { return right(); } - public List matchFields() { - return matchFields; - } - public List leftFields() { return leftFields; } @@ -91,29 +86,26 @@ public List rightFields() { } public List addedFields() { - if (lazyAddedFields == null) { - AttributeSet set = outputSet(); - set.removeAll(left().output()); - for (Attribute m : matchFields) { - set.removeIf(a -> a.name().equals(m.name())); + return addedFields; + } + + @Override + public List output() { + if (lazyOutput == null) { + lazyOutput = new ArrayList<>(left().output()); + for (Attribute attr : addedFields) { + lazyOutput.add(attr); } - lazyAddedFields = new ArrayList<>(set); - lazyAddedFields.sort(Comparator.comparing(Attribute::name)); } - return lazyAddedFields; + return lazyOutput; } @Override public PhysicalPlan estimateRowSize(State state) { - state.add(false, output); + state.add(false, output()); return this; } - @Override - public List output() { - return output; - } - @Override public AttributeSet inputSet() { // TODO: this is a hack since the right side is always materialized - instead this should @@ -129,12 +121,12 @@ protected AttributeSet computeReferences() { @Override public LookupJoinExec replaceChildren(PhysicalPlan left, PhysicalPlan right) { - return new LookupJoinExec(source(), left, right, matchFields, leftFields, rightFields, output); + return new LookupJoinExec(source(), left, right, leftFields, rightFields, addedFields); } @Override protected NodeInfo info() { - return NodeInfo.create(this, LookupJoinExec::new, left(), right(), matchFields, leftFields, rightFields, output); + return NodeInfo.create(this, LookupJoinExec::new, left(), right(), leftFields, rightFields, addedFields); } @Override @@ -148,15 +140,12 @@ public boolean equals(Object o) { if (super.equals(o) == false) { return false; } - LookupJoinExec hash = (LookupJoinExec) o; - return matchFields.equals(hash.matchFields) - && leftFields.equals(hash.leftFields) - && rightFields.equals(hash.rightFields) - && output.equals(hash.output); + LookupJoinExec other = (LookupJoinExec) o; + return leftFields.equals(other.leftFields) && rightFields.equals(other.rightFields) && addedFields.equals(other.addedFields); } @Override public int hashCode() { - return Objects.hash(super.hashCode(), matchFields, leftFields, rightFields, output); + return Objects.hash(super.hashCode(), leftFields, rightFields, addedFields); } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java index 1ffc652e54337..a8afaa4d8119b 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java @@ -583,8 +583,8 @@ private PhysicalOperation planLookupJoin(LookupJoinExec join, LocalExecutionPlan if (localSourceExec.indexMode() != IndexMode.LOOKUP) { throw new IllegalArgumentException("can't plan [" + join + "]"); } - List matchFields = new ArrayList<>(join.matchFields().size()); - for (Attribute m : join.matchFields()) { + List matchFields = new ArrayList<>(join.leftFields().size()); + for (Attribute m : join.leftFields()) { Layout.ChannelAndType t = source.layout.get(m.id()); if (t == null) { throw new IllegalArgumentException("can't plan [" + join + "][" + m + "]"); @@ -604,7 +604,7 @@ private PhysicalOperation planLookupJoin(LookupJoinExec join, LocalExecutionPlan lookupFromIndexService, matchFields.getFirst().type(), localSourceExec.index().name(), - join.matchFields().getFirst().name(), + join.leftFields().getFirst().name(), join.addedFields().stream().map(f -> (NamedExpression) f).toList(), join.source() ), diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/mapper/LocalMapper.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/mapper/LocalMapper.java index fc52f2d5a9d23..f95ae0e0783e5 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/mapper/LocalMapper.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/mapper/LocalMapper.java @@ -120,15 +120,7 @@ private PhysicalPlan mapBinary(BinaryPlan binary) { ); } if (right instanceof EsSourceExec source && source.indexMode() == IndexMode.LOOKUP) { - return new LookupJoinExec( - join.source(), - left, - right, - config.matchFields(), - config.leftFields(), - config.rightFields(), - join.output() - ); + return new LookupJoinExec(join.source(), left, right, config.leftFields(), config.rightFields(), join.rightOutputFields()); } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/mapper/Mapper.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/mapper/Mapper.java index 23e6f4fb91d18..8a4325ed84b2a 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/mapper/Mapper.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/mapper/Mapper.java @@ -207,15 +207,7 @@ private PhysicalPlan mapBinary(BinaryPlan bp) { if (right instanceof FragmentExec fragment && fragment.fragment() instanceof EsRelation relation && relation.indexMode() == IndexMode.LOOKUP) { - return new LookupJoinExec( - join.source(), - left, - right, - config.matchFields(), - config.leftFields(), - config.rightFields(), - join.output() - ); + return new LookupJoinExec(join.source(), left, right, config.leftFields(), config.rightFields(), join.rightOutputFields()); } } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java index 6763988eac638..df974a88a4c57 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java @@ -263,7 +263,7 @@ public final void test() throws Throwable { ); assumeFalse( "lookup join disabled for csv tests", - testCase.requiredCapabilities.contains(EsqlCapabilities.Cap.JOIN_LOOKUP_V2.capabilityName()) + testCase.requiredCapabilities.contains(EsqlCapabilities.Cap.JOIN_LOOKUP_V3.capabilityName()) ); if (Build.current().isSnapshot()) { assertThat( From 39481e912f10f9ce4ca85176b1bee9a9b97c43f6 Mon Sep 17 00:00:00 2001 From: Mikhail Berezovskiy Date: Fri, 29 Nov 2024 09:40:31 -0800 Subject: [PATCH 085/139] trash derived buffers (#117744) --- .../transport/netty4/NettyAllocator.java | 43 -- .../transport/netty4/TrashingByteBuf.java | 536 ++++++++++++++++++ .../transport/netty4/NettyAllocatorTests.java | 1 - 3 files changed, 536 insertions(+), 44 deletions(-) create mode 100644 modules/transport-netty4/src/main/java/org/elasticsearch/transport/netty4/TrashingByteBuf.java diff --git a/modules/transport-netty4/src/main/java/org/elasticsearch/transport/netty4/NettyAllocator.java b/modules/transport-netty4/src/main/java/org/elasticsearch/transport/netty4/NettyAllocator.java index 1eb7e13889338..e8bd5514947d6 100644 --- a/modules/transport-netty4/src/main/java/org/elasticsearch/transport/netty4/NettyAllocator.java +++ b/modules/transport-netty4/src/main/java/org/elasticsearch/transport/netty4/NettyAllocator.java @@ -362,49 +362,6 @@ public ByteBufAllocator getDelegate() { } } - static class TrashingByteBuf extends WrappedByteBuf { - - private boolean trashed = false; - - protected TrashingByteBuf(ByteBuf buf) { - super(buf); - } - - @Override - public boolean release() { - if (refCnt() == 1) { - // see [NOTE on racy trashContent() calls] - trashContent(); - } - return super.release(); - } - - @Override - public boolean release(int decrement) { - if (refCnt() == decrement && refCnt() > 0) { - // see [NOTE on racy trashContent() calls] - trashContent(); - } - return super.release(decrement); - } - - // [NOTE on racy trashContent() calls]: We trash the buffer content _before_ reducing the ref - // count to zero, which looks racy because in principle a concurrent caller could come along - // and successfully retain() this buffer to keep it alive after it's been trashed. Such a - // caller would sometimes get an IllegalReferenceCountException ofc but that's something it - // could handle - see for instance org.elasticsearch.transport.netty4.Netty4Utils.ByteBufRefCounted.tryIncRef. - // Yet in practice this should never happen, we only ever retain() these buffers while we - // know them to be alive (i.e. via RefCounted#mustIncRef or its moral equivalents) so it'd - // be a bug for a caller to retain() a buffer whose ref count is heading to zero and whose - // contents we've already decided to trash. - private void trashContent() { - if (trashed == false) { - trashed = true; - TrashingByteBufAllocator.trashBuffer(buf); - } - } - } - static class TrashingCompositeByteBuf extends CompositeByteBuf { TrashingCompositeByteBuf(ByteBufAllocator alloc, boolean direct, int maxNumComponents) { diff --git a/modules/transport-netty4/src/main/java/org/elasticsearch/transport/netty4/TrashingByteBuf.java b/modules/transport-netty4/src/main/java/org/elasticsearch/transport/netty4/TrashingByteBuf.java new file mode 100644 index 0000000000000..ead0d595f0105 --- /dev/null +++ b/modules/transport-netty4/src/main/java/org/elasticsearch/transport/netty4/TrashingByteBuf.java @@ -0,0 +1,536 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.transport.netty4; + +import io.netty.buffer.ByteBuf; + +import java.io.IOException; +import java.io.OutputStream; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; + +class TrashingByteBuf extends WrappedByteBuf { + + private boolean trashed = false; + + protected TrashingByteBuf(ByteBuf buf) { + super(buf); + } + + static TrashingByteBuf newBuf(ByteBuf buf) { + return new TrashingByteBuf(buf); + } + + @Override + public boolean release() { + if (refCnt() == 1) { + // see [NOTE on racy trashContent() calls] + trashContent(); + } + return super.release(); + } + + @Override + public boolean release(int decrement) { + if (refCnt() == decrement && refCnt() > 0) { + // see [NOTE on racy trashContent() calls] + trashContent(); + } + return super.release(decrement); + } + + // [NOTE on racy trashContent() calls]: We trash the buffer content _before_ reducing the ref + // count to zero, which looks racy because in principle a concurrent caller could come along + // and successfully retain() this buffer to keep it alive after it's been trashed. Such a + // caller would sometimes get an IllegalReferenceCountException ofc but that's something it + // could handle - see for instance org.elasticsearch.transport.netty4.Netty4Utils.ByteBufRefCounted.tryIncRef. + // Yet in practice this should never happen, we only ever retain() these buffers while we + // know them to be alive (i.e. via RefCounted#mustIncRef or its moral equivalents) so it'd + // be a bug for a caller to retain() a buffer whose ref count is heading to zero and whose + // contents we've already decided to trash. + private void trashContent() { + if (trashed == false) { + trashed = true; + NettyAllocator.TrashingByteBufAllocator.trashBuffer(buf); + } + } + + @Override + public ByteBuf capacity(int newCapacity) { + super.capacity(newCapacity); + return this; + } + + @Override + public ByteBuf order(ByteOrder endianness) { + return newBuf(super.order(endianness)); + } + + @Override + public ByteBuf asReadOnly() { + return newBuf(super.asReadOnly()); + } + + @Override + public ByteBuf setIndex(int readerIndex, int writerIndex) { + super.setIndex(readerIndex, writerIndex); + return this; + } + + @Override + public ByteBuf discardReadBytes() { + super.discardReadBytes(); + return this; + } + + @Override + public ByteBuf discardSomeReadBytes() { + super.discardSomeReadBytes(); + return this; + } + + @Override + public ByteBuf ensureWritable(int minWritableBytes) { + super.ensureWritable(minWritableBytes); + return this; + } + + @Override + public ByteBuf getBytes(int index, ByteBuf dst) { + super.getBytes(index, dst); + return this; + } + + @Override + public ByteBuf getBytes(int index, ByteBuf dst, int length) { + super.getBytes(index, dst, length); + return this; + } + + @Override + public ByteBuf getBytes(int index, ByteBuf dst, int dstIndex, int length) { + super.getBytes(index, dst, dstIndex, length); + return this; + } + + @Override + public ByteBuf getBytes(int index, byte[] dst) { + super.getBytes(index, dst); + return this; + } + + @Override + public ByteBuf getBytes(int index, byte[] dst, int dstIndex, int length) { + super.getBytes(index, dst, dstIndex, length); + return this; + } + + @Override + public ByteBuf getBytes(int index, ByteBuffer dst) { + super.getBytes(index, dst); + return this; + } + + @Override + public ByteBuf getBytes(int index, OutputStream out, int length) throws IOException { + super.getBytes(index, out, length); + return this; + } + + @Override + public ByteBuf setBoolean(int index, boolean value) { + super.setBoolean(index, value); + return this; + } + + @Override + public ByteBuf setByte(int index, int value) { + super.setByte(index, value); + return this; + } + + @Override + public ByteBuf setShort(int index, int value) { + super.setShort(index, value); + return this; + } + + @Override + public ByteBuf setShortLE(int index, int value) { + super.setShortLE(index, value); + return this; + } + + @Override + public ByteBuf setMedium(int index, int value) { + super.setMedium(index, value); + return this; + } + + @Override + public ByteBuf setMediumLE(int index, int value) { + super.setMediumLE(index, value); + return this; + } + + @Override + public ByteBuf setInt(int index, int value) { + super.setInt(index, value); + return this; + } + + @Override + public ByteBuf setIntLE(int index, int value) { + super.setIntLE(index, value); + return this; + } + + @Override + public ByteBuf setLong(int index, long value) { + super.setLong(index, value); + return this; + } + + @Override + public ByteBuf setLongLE(int index, long value) { + super.setLongLE(index, value); + return this; + } + + @Override + public ByteBuf setChar(int index, int value) { + super.setChar(index, value); + return this; + } + + @Override + public ByteBuf setFloat(int index, float value) { + super.setFloat(index, value); + return this; + } + + @Override + public ByteBuf setDouble(int index, double value) { + super.setDouble(index, value); + return this; + } + + @Override + public ByteBuf setBytes(int index, ByteBuf src) { + super.setBytes(index, src); + return this; + } + + @Override + public ByteBuf setBytes(int index, ByteBuf src, int length) { + super.setBytes(index, src, length); + return this; + } + + @Override + public ByteBuf setBytes(int index, ByteBuf src, int srcIndex, int length) { + super.setBytes(index, src, srcIndex, length); + return this; + } + + @Override + public ByteBuf setBytes(int index, byte[] src) { + super.setBytes(index, src); + return this; + } + + @Override + public ByteBuf setBytes(int index, byte[] src, int srcIndex, int length) { + super.setBytes(index, src, srcIndex, length); + return this; + } + + @Override + public ByteBuf setBytes(int index, ByteBuffer src) { + super.setBytes(index, src); + return this; + } + + @Override + public ByteBuf readBytes(int length) { + return newBuf(super.readBytes(length)); + } + + @Override + public ByteBuf readSlice(int length) { + return newBuf(super.readSlice(length)); + } + + @Override + public ByteBuf readRetainedSlice(int length) { + return newBuf(super.readRetainedSlice(length)); + } + + @Override + public ByteBuf readBytes(ByteBuf dst) { + super.readBytes(dst); + return this; + } + + @Override + public ByteBuf readBytes(ByteBuf dst, int length) { + super.readBytes(dst, length); + return this; + } + + @Override + public ByteBuf readBytes(ByteBuf dst, int dstIndex, int length) { + super.readBytes(dst, dstIndex, length); + return this; + } + + @Override + public ByteBuf readBytes(byte[] dst) { + super.readBytes(dst); + return this; + } + + @Override + public ByteBuf readBytes(ByteBuffer dst) { + super.readBytes(dst); + return this; + } + + @Override + public ByteBuf readBytes(byte[] dst, int dstIndex, int length) { + super.readBytes(dst, dstIndex, length); + return this; + } + + @Override + public ByteBuf readBytes(OutputStream out, int length) throws IOException { + super.readBytes(out, length); + return this; + } + + @Override + public ByteBuf skipBytes(int length) { + super.skipBytes(length); + return this; + } + + @Override + public ByteBuf writeBoolean(boolean value) { + super.writeBoolean(value); + return this; + } + + @Override + public ByteBuf writeByte(int value) { + super.writeByte(value); + return this; + } + + @Override + public ByteBuf writeShort(int value) { + super.writeShort(value); + return this; + } + + @Override + public ByteBuf writeShortLE(int value) { + super.writeShortLE(value); + return this; + } + + @Override + public ByteBuf writeMedium(int value) { + super.writeMedium(value); + return this; + } + + @Override + public ByteBuf writeMediumLE(int value) { + super.writeMediumLE(value); + return this; + } + + @Override + public ByteBuf writeInt(int value) { + super.writeInt(value); + return this; + + } + + @Override + public ByteBuf writeIntLE(int value) { + super.writeIntLE(value); + return this; + } + + @Override + public ByteBuf writeLong(long value) { + super.writeLong(value); + return this; + } + + @Override + public ByteBuf writeLongLE(long value) { + super.writeLongLE(value); + return this; + } + + @Override + public ByteBuf writeChar(int value) { + super.writeChar(value); + return this; + } + + @Override + public ByteBuf writeFloat(float value) { + super.writeFloat(value); + return this; + } + + @Override + public ByteBuf writeDouble(double value) { + super.writeDouble(value); + return this; + } + + @Override + public ByteBuf writeBytes(ByteBuf src) { + super.writeBytes(src); + return this; + } + + @Override + public ByteBuf writeBytes(ByteBuf src, int length) { + super.writeBytes(src, length); + return this; + } + + @Override + public ByteBuf writeBytes(ByteBuf src, int srcIndex, int length) { + super.writeBytes(src, srcIndex, length); + return this; + } + + @Override + public ByteBuf writeBytes(byte[] src) { + super.writeBytes(src); + return this; + } + + @Override + public ByteBuf writeBytes(byte[] src, int srcIndex, int length) { + super.writeBytes(src, srcIndex, length); + return this; + } + + @Override + public ByteBuf writeBytes(ByteBuffer src) { + super.writeBytes(src); + return this; + } + + @Override + public ByteBuf writeZero(int length) { + super.writeZero(length); + return this; + } + + @Override + public ByteBuf copy() { + return newBuf(super.copy()); + } + + @Override + public ByteBuf copy(int index, int length) { + return newBuf(super.copy(index, length)); + } + + @Override + public ByteBuf slice() { + return newBuf(super.slice()); + } + + @Override + public ByteBuf retainedSlice() { + return newBuf(super.retainedSlice()); + } + + @Override + public ByteBuf slice(int index, int length) { + return newBuf(super.slice(index, length)); + } + + @Override + public ByteBuf retainedSlice(int index, int length) { + return newBuf(super.retainedSlice(index, length)); + } + + @Override + public ByteBuf duplicate() { + return newBuf(super.duplicate()); + } + + @Override + public ByteBuf retainedDuplicate() { + return newBuf(super.retainedDuplicate()); + } + + @Override + public ByteBuf retain(int increment) { + super.retain(increment); + return this; + } + + @Override + public ByteBuf touch(Object hint) { + super.touch(hint); + return this; + } + + @Override + public ByteBuf retain() { + super.retain(); + return this; + } + + @Override + public ByteBuf touch() { + super.touch(); + return this; + } + + @Override + public ByteBuf setFloatLE(int index, float value) { + return super.setFloatLE(index, value); + } + + @Override + public ByteBuf setDoubleLE(int index, double value) { + super.setDoubleLE(index, value); + return this; + } + + @Override + public ByteBuf writeFloatLE(float value) { + super.writeFloatLE(value); + return this; + } + + @Override + public ByteBuf writeDoubleLE(double value) { + super.writeDoubleLE(value); + return this; + } + + @Override + public ByteBuf asByteBuf() { + return this; + } +} diff --git a/modules/transport-netty4/src/test/java/org/elasticsearch/transport/netty4/NettyAllocatorTests.java b/modules/transport-netty4/src/test/java/org/elasticsearch/transport/netty4/NettyAllocatorTests.java index a76eb9fa4875b..b9e9b667e72fe 100644 --- a/modules/transport-netty4/src/test/java/org/elasticsearch/transport/netty4/NettyAllocatorTests.java +++ b/modules/transport-netty4/src/test/java/org/elasticsearch/transport/netty4/NettyAllocatorTests.java @@ -20,7 +20,6 @@ import java.nio.ByteBuffer; import java.util.List; -import static org.elasticsearch.transport.netty4.NettyAllocator.TrashingByteBuf; import static org.elasticsearch.transport.netty4.NettyAllocator.TrashingByteBufAllocator; public class NettyAllocatorTests extends ESTestCase { From 0b764adbc19a99ee14d88b96f5f99002fabc19cb Mon Sep 17 00:00:00 2001 From: elasticsearchmachine <58790826+elasticsearchmachine@users.noreply.github.com> Date: Sat, 30 Nov 2024 08:29:46 +1100 Subject: [PATCH 086/139] Mute org.elasticsearch.search.ccs.CrossClusterIT testCancel #108061 --- muted-tests.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/muted-tests.yml b/muted-tests.yml index f5f6b84ab8639..b82e95ea26890 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -231,6 +231,9 @@ tests: - class: org.elasticsearch.xpack.esql.ccq.MultiClusterSpecIT method: test {scoring.QstrWithFieldAndScoringSortedEval} issue: https://github.com/elastic/elasticsearch/issues/117751 +- class: org.elasticsearch.search.ccs.CrossClusterIT + method: testCancel + issue: https://github.com/elastic/elasticsearch/issues/108061 # Examples: # From c74c06daee0583562c82597b19178268b9f415e5 Mon Sep 17 00:00:00 2001 From: Nick Tindall Date: Sat, 30 Nov 2024 11:33:20 +1100 Subject: [PATCH 087/139] Deduplicate Range header parsing (#117304) --- ...CloudStorageBlobContainerRetriesTests.java | 7 +-- .../java/fixture/azure/AzureHttpHandler.java | 23 ++++---- .../gcs/GoogleCloudStorageHttpHandler.java | 17 +++--- test/fixtures/s3-fixture/build.gradle | 2 +- .../main/java/fixture/s3/S3HttpHandler.java | 20 +++---- .../src/main/java/fixture/url/URLFixture.java | 16 +++--- .../AbstractBlobContainerRetriesTestCase.java | 24 ++++----- .../test/fixture/HttpHeaderParser.java | 42 +++++++++++++++ .../http/HttpHeaderParserTests.java | 53 +++++++++++++++++++ 9 files changed, 141 insertions(+), 63 deletions(-) create mode 100644 test/framework/src/main/java/org/elasticsearch/test/fixture/HttpHeaderParser.java create mode 100644 test/framework/src/test/java/org/elasticsearch/http/HttpHeaderParserTests.java diff --git a/modules/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobContainerRetriesTests.java b/modules/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobContainerRetriesTests.java index 110c31b212ea1..a53ec71f66376 100644 --- a/modules/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobContainerRetriesTests.java +++ b/modules/repository-gcs/src/test/java/org/elasticsearch/repositories/gcs/GoogleCloudStorageBlobContainerRetriesTests.java @@ -41,6 +41,7 @@ import org.elasticsearch.repositories.blobstore.ESMockAPIBasedRepositoryIntegTestCase; import org.elasticsearch.rest.RestStatus; import org.elasticsearch.rest.RestUtils; +import org.elasticsearch.test.fixture.HttpHeaderParser; import org.threeten.bp.Duration; import java.io.IOException; @@ -177,9 +178,9 @@ public void testReadLargeBlobWithRetries() throws Exception { httpServer.createContext(downloadStorageEndpoint(blobContainer, "large_blob_retries"), exchange -> { Streams.readFully(exchange.getRequestBody()); exchange.getResponseHeaders().add("Content-Type", "application/octet-stream"); - final Tuple range = getRange(exchange); - final int offset = Math.toIntExact(range.v1()); - final byte[] chunk = Arrays.copyOfRange(bytes, offset, Math.toIntExact(Math.min(range.v2() + 1, bytes.length))); + final HttpHeaderParser.Range range = getRange(exchange); + final int offset = Math.toIntExact(range.start()); + final byte[] chunk = Arrays.copyOfRange(bytes, offset, Math.toIntExact(Math.min(range.end() + 1, bytes.length))); exchange.sendResponseHeaders(RestStatus.OK.getStatus(), chunk.length); if (randomBoolean() && countDown.decrementAndGet() >= 0) { exchange.getResponseBody().write(chunk, 0, chunk.length - 1); diff --git a/test/fixtures/azure-fixture/src/main/java/fixture/azure/AzureHttpHandler.java b/test/fixtures/azure-fixture/src/main/java/fixture/azure/AzureHttpHandler.java index 904f4581ad2c9..cb7c700376a1a 100644 --- a/test/fixtures/azure-fixture/src/main/java/fixture/azure/AzureHttpHandler.java +++ b/test/fixtures/azure-fixture/src/main/java/fixture/azure/AzureHttpHandler.java @@ -22,6 +22,7 @@ import org.elasticsearch.core.SuppressForbidden; import org.elasticsearch.rest.RestStatus; import org.elasticsearch.rest.RestUtils; +import org.elasticsearch.test.fixture.HttpHeaderParser; import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xcontent.XContentType; @@ -42,8 +43,6 @@ import java.util.Set; import java.util.UUID; import java.util.function.Predicate; -import java.util.regex.Matcher; -import java.util.regex.Pattern; import static fixture.azure.MockAzureBlobStore.failTestWithAssertionError; import static org.elasticsearch.repositories.azure.AzureFixtureHelper.assertValidBlockId; @@ -54,7 +53,6 @@ @SuppressForbidden(reason = "Uses a HttpServer to emulate an Azure endpoint") public class AzureHttpHandler implements HttpHandler { private static final Logger logger = LogManager.getLogger(AzureHttpHandler.class); - private static final Pattern RANGE_HEADER_PATTERN = Pattern.compile("^bytes=([0-9]+)-([0-9]+)$"); static final String X_MS_LEASE_ID = "x-ms-lease-id"; static final String X_MS_PROPOSED_LEASE_ID = "x-ms-proposed-lease-id"; static final String X_MS_LEASE_DURATION = "x-ms-lease-duration"; @@ -232,29 +230,26 @@ public void handle(final HttpExchange exchange) throws IOException { final BytesReference responseContent; final RestStatus successStatus; // see Constants.HeaderConstants.STORAGE_RANGE_HEADER - final String range = exchange.getRequestHeaders().getFirst("x-ms-range"); - if (range != null) { - final Matcher matcher = RANGE_HEADER_PATTERN.matcher(range); - if (matcher.matches() == false) { + final String rangeHeader = exchange.getRequestHeaders().getFirst("x-ms-range"); + if (rangeHeader != null) { + final HttpHeaderParser.Range range = HttpHeaderParser.parseRangeHeader(rangeHeader); + if (range == null) { throw new MockAzureBlobStore.BadRequestException( "InvalidHeaderValue", - "Range header does not match expected format: " + range + "Range header does not match expected format: " + rangeHeader ); } - final long start = Long.parseLong(matcher.group(1)); - final long end = Long.parseLong(matcher.group(2)); - final BytesReference blobContents = blob.getContents(); - if (blobContents.length() <= start) { + if (blobContents.length() <= range.start()) { exchange.getResponseHeaders().add("Content-Type", "application/octet-stream"); exchange.sendResponseHeaders(RestStatus.REQUESTED_RANGE_NOT_SATISFIED.getStatus(), -1); return; } responseContent = blobContents.slice( - Math.toIntExact(start), - Math.toIntExact(Math.min(end - start + 1, blobContents.length() - start)) + Math.toIntExact(range.start()), + Math.toIntExact(Math.min(range.end() - range.start() + 1, blobContents.length() - range.start())) ); successStatus = RestStatus.PARTIAL_CONTENT; } else { diff --git a/test/fixtures/gcs-fixture/src/main/java/fixture/gcs/GoogleCloudStorageHttpHandler.java b/test/fixtures/gcs-fixture/src/main/java/fixture/gcs/GoogleCloudStorageHttpHandler.java index 51e3185623360..f6b52a32a9a1d 100644 --- a/test/fixtures/gcs-fixture/src/main/java/fixture/gcs/GoogleCloudStorageHttpHandler.java +++ b/test/fixtures/gcs-fixture/src/main/java/fixture/gcs/GoogleCloudStorageHttpHandler.java @@ -24,6 +24,7 @@ import org.elasticsearch.core.Tuple; import org.elasticsearch.rest.RestStatus; import org.elasticsearch.rest.RestUtils; +import org.elasticsearch.test.fixture.HttpHeaderParser; import java.io.BufferedReader; import java.io.IOException; @@ -58,8 +59,6 @@ public class GoogleCloudStorageHttpHandler implements HttpHandler { private static final Logger logger = LogManager.getLogger(GoogleCloudStorageHttpHandler.class); - private static final Pattern RANGE_MATCHER = Pattern.compile("bytes=([0-9]*)-([0-9]*)"); - private final ConcurrentMap blobs; private final String bucket; @@ -131,19 +130,19 @@ public void handle(final HttpExchange exchange) throws IOException { // Download Object https://cloud.google.com/storage/docs/request-body BytesReference blob = blobs.get(exchange.getRequestURI().getPath().replace("/download/storage/v1/b/" + bucket + "/o/", "")); if (blob != null) { - final String range = exchange.getRequestHeaders().getFirst("Range"); + final String rangeHeader = exchange.getRequestHeaders().getFirst("Range"); final long offset; final long end; - if (range == null) { + if (rangeHeader == null) { offset = 0L; end = blob.length() - 1; } else { - Matcher matcher = RANGE_MATCHER.matcher(range); - if (matcher.find() == false) { - throw new AssertionError("Range bytes header does not match expected format: " + range); + final HttpHeaderParser.Range range = HttpHeaderParser.parseRangeHeader(rangeHeader); + if (range == null) { + throw new AssertionError("Range bytes header does not match expected format: " + rangeHeader); } - offset = Long.parseLong(matcher.group(1)); - end = Long.parseLong(matcher.group(2)); + offset = range.start(); + end = range.end(); } if (offset >= blob.length()) { diff --git a/test/fixtures/s3-fixture/build.gradle b/test/fixtures/s3-fixture/build.gradle index d628800497293..e4c35464608a8 100644 --- a/test/fixtures/s3-fixture/build.gradle +++ b/test/fixtures/s3-fixture/build.gradle @@ -15,5 +15,5 @@ dependencies { api("junit:junit:${versions.junit}") { transitive = false } - testImplementation project(':test:framework') + implementation project(':test:framework') } diff --git a/test/fixtures/s3-fixture/src/main/java/fixture/s3/S3HttpHandler.java b/test/fixtures/s3-fixture/src/main/java/fixture/s3/S3HttpHandler.java index 56d3454aa5544..bfc0428731c56 100644 --- a/test/fixtures/s3-fixture/src/main/java/fixture/s3/S3HttpHandler.java +++ b/test/fixtures/s3-fixture/src/main/java/fixture/s3/S3HttpHandler.java @@ -28,6 +28,7 @@ import org.elasticsearch.logging.Logger; import org.elasticsearch.rest.RestStatus; import org.elasticsearch.rest.RestUtils; +import org.elasticsearch.test.fixture.HttpHeaderParser; import java.io.IOException; import java.io.InputStreamReader; @@ -269,8 +270,8 @@ public void handle(final HttpExchange exchange) throws IOException { exchange.sendResponseHeaders(RestStatus.NOT_FOUND.getStatus(), -1); return; } - final String range = exchange.getRequestHeaders().getFirst("Range"); - if (range == null) { + final String rangeHeader = exchange.getRequestHeaders().getFirst("Range"); + if (rangeHeader == null) { exchange.getResponseHeaders().add("Content-Type", "application/octet-stream"); exchange.sendResponseHeaders(RestStatus.OK.getStatus(), blob.length()); blob.writeTo(exchange.getResponseBody()); @@ -281,17 +282,12 @@ public void handle(final HttpExchange exchange) throws IOException { // requests with a header value like "Range: bytes=start-end" where both {@code start} and {@code end} are always defined // (sometimes to very high value for {@code end}). It would be too tedious to fully support the RFC so S3HttpHandler only // supports when both {@code start} and {@code end} are defined to match the SDK behavior. - final Matcher matcher = Pattern.compile("^bytes=([0-9]+)-([0-9]+)$").matcher(range); - if (matcher.matches() == false) { - throw new AssertionError("Bytes range does not match expected pattern: " + range); - } - var groupStart = matcher.group(1); - var groupEnd = matcher.group(2); - if (groupStart == null || groupEnd == null) { - throw new AssertionError("Bytes range does not match expected pattern: " + range); + final HttpHeaderParser.Range range = HttpHeaderParser.parseRangeHeader(rangeHeader); + if (range == null) { + throw new AssertionError("Bytes range does not match expected pattern: " + rangeHeader); } - long start = Long.parseLong(groupStart); - long end = Long.parseLong(groupEnd); + long start = range.start(); + long end = range.end(); if (end < start) { exchange.getResponseHeaders().add("Content-Type", "application/octet-stream"); exchange.sendResponseHeaders(RestStatus.OK.getStatus(), blob.length()); diff --git a/test/fixtures/url-fixture/src/main/java/fixture/url/URLFixture.java b/test/fixtures/url-fixture/src/main/java/fixture/url/URLFixture.java index 4c3159fc3c849..860f6ff141689 100644 --- a/test/fixtures/url-fixture/src/main/java/fixture/url/URLFixture.java +++ b/test/fixtures/url-fixture/src/main/java/fixture/url/URLFixture.java @@ -10,6 +10,7 @@ import org.elasticsearch.rest.RestStatus; import org.elasticsearch.test.fixture.AbstractHttpFixture; +import org.elasticsearch.test.fixture.HttpHeaderParser; import org.junit.rules.TemporaryFolder; import org.junit.rules.TestRule; @@ -21,15 +22,12 @@ import java.nio.file.Path; import java.util.HashMap; import java.util.Map; -import java.util.regex.Matcher; -import java.util.regex.Pattern; /** * This {@link URLFixture} exposes a filesystem directory over HTTP. It is used in repository-url * integration tests to expose a directory created by a regular FS repository. */ public class URLFixture extends AbstractHttpFixture implements TestRule { - private static final Pattern RANGE_PATTERN = Pattern.compile("bytes=(\\d+)-(\\d+)$"); private final TemporaryFolder temporaryFolder; private Path repositoryDir; @@ -60,19 +58,19 @@ private AbstractHttpFixture.Response handleGetRequest(Request request) throws IO if (normalizedPath.startsWith(normalizedRepositoryDir)) { if (Files.exists(normalizedPath) && Files.isReadable(normalizedPath) && Files.isRegularFile(normalizedPath)) { - final String range = request.getHeader("Range"); + final String rangeHeader = request.getHeader("Range"); final Map headers = new HashMap<>(contentType("application/octet-stream")); - if (range == null) { + if (rangeHeader == null) { byte[] content = Files.readAllBytes(normalizedPath); headers.put("Content-Length", String.valueOf(content.length)); return new Response(RestStatus.OK.getStatus(), headers, content); } else { - final Matcher matcher = RANGE_PATTERN.matcher(range); - if (matcher.matches() == false) { + final HttpHeaderParser.Range range = HttpHeaderParser.parseRangeHeader(rangeHeader); + if (range == null) { return new Response(RestStatus.REQUESTED_RANGE_NOT_SATISFIED.getStatus(), TEXT_PLAIN_CONTENT_TYPE, EMPTY_BYTE); } else { - long start = Long.parseLong(matcher.group(1)); - long end = Long.parseLong(matcher.group(2)); + long start = range.start(); + long end = range.end(); long rangeLength = end - start + 1; final long fileSize = Files.size(normalizedPath); if (start >= fileSize || start > end || rangeLength > fileSize) { diff --git a/test/framework/src/main/java/org/elasticsearch/repositories/blobstore/AbstractBlobContainerRetriesTestCase.java b/test/framework/src/main/java/org/elasticsearch/repositories/blobstore/AbstractBlobContainerRetriesTestCase.java index 12094b31a049d..17768c54b2eaf 100644 --- a/test/framework/src/main/java/org/elasticsearch/repositories/blobstore/AbstractBlobContainerRetriesTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/repositories/blobstore/AbstractBlobContainerRetriesTestCase.java @@ -23,9 +23,9 @@ import org.elasticsearch.core.Nullable; import org.elasticsearch.core.SuppressForbidden; import org.elasticsearch.core.TimeValue; -import org.elasticsearch.core.Tuple; import org.elasticsearch.mocksocket.MockHttpServer; import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.test.fixture.HttpHeaderParser; import org.junit.After; import org.junit.Before; @@ -40,8 +40,6 @@ import java.util.OptionalInt; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicLong; -import java.util.regex.Matcher; -import java.util.regex.Pattern; import static org.elasticsearch.repositories.blobstore.BlobStoreTestUtil.randomPurpose; import static org.elasticsearch.test.NeverMatcher.never; @@ -371,28 +369,24 @@ protected static byte[] randomBlobContent(int minSize) { return randomByteArrayOfLength(randomIntBetween(minSize, frequently() ? 512 : 1 << 20)); // rarely up to 1mb } - private static final Pattern RANGE_PATTERN = Pattern.compile("^bytes=([0-9]+)-([0-9]+)$"); - - protected static Tuple getRange(HttpExchange exchange) { + protected static HttpHeaderParser.Range getRange(HttpExchange exchange) { final String rangeHeader = exchange.getRequestHeaders().getFirst("Range"); if (rangeHeader == null) { - return Tuple.tuple(0L, MAX_RANGE_VAL); + return new HttpHeaderParser.Range(0L, MAX_RANGE_VAL); } - final Matcher matcher = RANGE_PATTERN.matcher(rangeHeader); - assertTrue(rangeHeader + " matches expected pattern", matcher.matches()); - long rangeStart = Long.parseLong(matcher.group(1)); - long rangeEnd = Long.parseLong(matcher.group(2)); - assertThat(rangeStart, lessThanOrEqualTo(rangeEnd)); - return Tuple.tuple(rangeStart, rangeEnd); + final HttpHeaderParser.Range range = HttpHeaderParser.parseRangeHeader(rangeHeader); + assertNotNull(rangeHeader + " matches expected pattern", range); + assertThat(range.start(), lessThanOrEqualTo(range.end())); + return range; } protected static int getRangeStart(HttpExchange exchange) { - return Math.toIntExact(getRange(exchange).v1()); + return Math.toIntExact(getRange(exchange).start()); } protected static OptionalInt getRangeEnd(HttpExchange exchange) { - final long rangeEnd = getRange(exchange).v2(); + final long rangeEnd = getRange(exchange).end(); if (rangeEnd == MAX_RANGE_VAL) { return OptionalInt.empty(); } diff --git a/test/framework/src/main/java/org/elasticsearch/test/fixture/HttpHeaderParser.java b/test/framework/src/main/java/org/elasticsearch/test/fixture/HttpHeaderParser.java new file mode 100644 index 0000000000000..7018e5e259584 --- /dev/null +++ b/test/framework/src/main/java/org/elasticsearch/test/fixture/HttpHeaderParser.java @@ -0,0 +1,42 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.test.fixture; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public enum HttpHeaderParser { + ; + + private static final Pattern RANGE_HEADER_PATTERN = Pattern.compile("bytes=([0-9]+)-([0-9]+)"); + + /** + * Parse a "Range" header + * + * Note: only a single bounded range is supported (e.g. Range: bytes={range_start}-{range_end}) + * + * @see MDN: Range header + * @param rangeHeaderValue The header value as a string + * @return a {@link Range} instance representing the parsed value, or null if the header is malformed + */ + public static Range parseRangeHeader(String rangeHeaderValue) { + final Matcher matcher = RANGE_HEADER_PATTERN.matcher(rangeHeaderValue); + if (matcher.matches()) { + try { + return new Range(Long.parseLong(matcher.group(1)), Long.parseLong(matcher.group(2))); + } catch (NumberFormatException e) { + return null; + } + } + return null; + } + + public record Range(long start, long end) {} +} diff --git a/test/framework/src/test/java/org/elasticsearch/http/HttpHeaderParserTests.java b/test/framework/src/test/java/org/elasticsearch/http/HttpHeaderParserTests.java new file mode 100644 index 0000000000000..e025e7770ea4c --- /dev/null +++ b/test/framework/src/test/java/org/elasticsearch/http/HttpHeaderParserTests.java @@ -0,0 +1,53 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.http; + +import org.elasticsearch.common.Strings; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.test.fixture.HttpHeaderParser; + +import java.math.BigInteger; + +public class HttpHeaderParserTests extends ESTestCase { + + public void testParseRangeHeader() { + final long start = randomLongBetween(0, 10_000); + final long end = randomLongBetween(start, start + 10_000); + assertEquals(new HttpHeaderParser.Range(start, end), HttpHeaderParser.parseRangeHeader("bytes=" + start + "-" + end)); + } + + public void testParseRangeHeaderInvalidLong() { + final BigInteger longOverflow = BigInteger.valueOf(Long.MAX_VALUE).add(BigInteger.ONE).add(randomBigInteger()); + assertNull(HttpHeaderParser.parseRangeHeader("bytes=123-" + longOverflow)); + assertNull(HttpHeaderParser.parseRangeHeader("bytes=" + longOverflow + "-123")); + } + + public void testParseRangeHeaderMultipleRangesNotMatched() { + assertNull( + HttpHeaderParser.parseRangeHeader( + Strings.format( + "bytes=%d-%d,%d-%d", + randomIntBetween(0, 99), + randomIntBetween(100, 199), + randomIntBetween(200, 299), + randomIntBetween(300, 399) + ) + ) + ); + } + + public void testParseRangeHeaderEndlessRangeNotMatched() { + assertNull(HttpHeaderParser.parseRangeHeader(Strings.format("bytes=%d-", randomLongBetween(0, Long.MAX_VALUE)))); + } + + public void testParseRangeHeaderSuffixLengthNotMatched() { + assertNull(HttpHeaderParser.parseRangeHeader(Strings.format("bytes=-%d", randomLongBetween(0, Long.MAX_VALUE)))); + } +} From c77f09e436563fa312db791a9ea4c8ac5d97a623 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lorenzo=20Dematt=C3=A9?= Date: Sat, 30 Nov 2024 09:38:40 +0100 Subject: [PATCH 088/139] [Entitlements] Refactor InstrumenterImpl tests (#117688) Following up https://github.com/elastic/elasticsearch/pull/117332#discussion_r1856803255, I refactored `InstrumenterImpl` tests, splitting them into 2 suites: - `SyntheticInstrumenterImplTests`, which tests the mechanics of instrumentation using ad-hoc test cases. This should see little change now that we have our Instrumenter working as intended - `InstrumenterImplTests`, which is back to its original intent to make sure (1) the right arguments make it all the way to the check methods, and (2) if the check method throws, that exception correctly bubbles up through the instrumented method. The PR also includes a little change to `InstrumenterImpl` construction to clean it up a bit and make it more testable. --- .../impl/InstrumentationServiceImpl.java | 28 +- .../impl/InstrumenterImpl.java | 61 +-- .../impl/InstrumentationServiceImplTests.java | 42 +- .../impl/InstrumenterTests.java | 378 ++++------------- .../impl/SyntheticInstrumenterTests.java | 383 ++++++++++++++++++ .../instrumentation/impl/TestException.java | 12 + .../instrumentation/impl/TestLoader.java | 20 + .../instrumentation/impl/TestMethodUtils.java | 81 ++++ .../EntitlementInitialization.java | 8 +- .../{CheckerMethod.java => CheckMethod.java} | 4 +- .../InstrumentationService.java | 10 +- 11 files changed, 646 insertions(+), 381 deletions(-) create mode 100644 libs/entitlement/asm-provider/src/test/java/org/elasticsearch/entitlement/instrumentation/impl/SyntheticInstrumenterTests.java create mode 100644 libs/entitlement/asm-provider/src/test/java/org/elasticsearch/entitlement/instrumentation/impl/TestException.java create mode 100644 libs/entitlement/asm-provider/src/test/java/org/elasticsearch/entitlement/instrumentation/impl/TestLoader.java create mode 100644 libs/entitlement/asm-provider/src/test/java/org/elasticsearch/entitlement/instrumentation/impl/TestMethodUtils.java rename libs/entitlement/src/main/java/org/elasticsearch/entitlement/instrumentation/{CheckerMethod.java => CheckMethod.java} (82%) diff --git a/libs/entitlement/asm-provider/src/main/java/org/elasticsearch/entitlement/instrumentation/impl/InstrumentationServiceImpl.java b/libs/entitlement/asm-provider/src/main/java/org/elasticsearch/entitlement/instrumentation/impl/InstrumentationServiceImpl.java index 16bd04e60c5e3..9e23d2c0412c3 100644 --- a/libs/entitlement/asm-provider/src/main/java/org/elasticsearch/entitlement/instrumentation/impl/InstrumentationServiceImpl.java +++ b/libs/entitlement/asm-provider/src/main/java/org/elasticsearch/entitlement/instrumentation/impl/InstrumentationServiceImpl.java @@ -9,7 +9,7 @@ package org.elasticsearch.entitlement.instrumentation.impl; -import org.elasticsearch.entitlement.instrumentation.CheckerMethod; +import org.elasticsearch.entitlement.instrumentation.CheckMethod; import org.elasticsearch.entitlement.instrumentation.InstrumentationService; import org.elasticsearch.entitlement.instrumentation.Instrumenter; import org.elasticsearch.entitlement.instrumentation.MethodKey; @@ -20,37 +20,23 @@ import org.objectweb.asm.Type; import java.io.IOException; -import java.lang.reflect.Method; import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Locale; import java.util.Map; -import java.util.stream.Stream; public class InstrumentationServiceImpl implements InstrumentationService { @Override - public Instrumenter newInstrumenter(String classNameSuffix, Map instrumentationMethods) { - return new InstrumenterImpl(classNameSuffix, instrumentationMethods); - } - - /** - * @return a {@link MethodKey} suitable for looking up the given {@code targetMethod} in the entitlements trampoline - */ - public MethodKey methodKeyForTarget(Method targetMethod) { - Type actualType = Type.getMethodType(Type.getMethodDescriptor(targetMethod)); - return new MethodKey( - Type.getInternalName(targetMethod.getDeclaringClass()), - targetMethod.getName(), - Stream.of(actualType.getArgumentTypes()).map(Type::getInternalName).toList() - ); + public Instrumenter newInstrumenter(Map checkMethods) { + return InstrumenterImpl.create(checkMethods); } @Override - public Map lookupMethodsToInstrument(String entitlementCheckerClassName) throws ClassNotFoundException, + public Map lookupMethodsToInstrument(String entitlementCheckerClassName) throws ClassNotFoundException, IOException { - var methodsToInstrument = new HashMap(); + var methodsToInstrument = new HashMap(); var checkerClass = Class.forName(entitlementCheckerClassName); var classFileInfo = InstrumenterImpl.getClassFileInfo(checkerClass); ClassReader reader = new ClassReader(classFileInfo.bytecodes()); @@ -69,9 +55,9 @@ public MethodVisitor visitMethod( var methodToInstrument = parseCheckerMethodSignature(checkerMethodName, checkerMethodArgumentTypes); var checkerParameterDescriptors = Arrays.stream(checkerMethodArgumentTypes).map(Type::getDescriptor).toList(); - var checkerMethod = new CheckerMethod(Type.getInternalName(checkerClass), checkerMethodName, checkerParameterDescriptors); + var checkMethod = new CheckMethod(Type.getInternalName(checkerClass), checkerMethodName, checkerParameterDescriptors); - methodsToInstrument.put(methodToInstrument, checkerMethod); + methodsToInstrument.put(methodToInstrument, checkMethod); return mv; } diff --git a/libs/entitlement/asm-provider/src/main/java/org/elasticsearch/entitlement/instrumentation/impl/InstrumenterImpl.java b/libs/entitlement/asm-provider/src/main/java/org/elasticsearch/entitlement/instrumentation/impl/InstrumenterImpl.java index 4d762dc997383..57e30c01c5c28 100644 --- a/libs/entitlement/asm-provider/src/main/java/org/elasticsearch/entitlement/instrumentation/impl/InstrumenterImpl.java +++ b/libs/entitlement/asm-provider/src/main/java/org/elasticsearch/entitlement/instrumentation/impl/InstrumenterImpl.java @@ -9,7 +9,7 @@ package org.elasticsearch.entitlement.instrumentation.impl; -import org.elasticsearch.entitlement.instrumentation.CheckerMethod; +import org.elasticsearch.entitlement.instrumentation.CheckMethod; import org.elasticsearch.entitlement.instrumentation.Instrumenter; import org.elasticsearch.entitlement.instrumentation.MethodKey; import org.objectweb.asm.AnnotationVisitor; @@ -37,9 +37,28 @@ public class InstrumenterImpl implements Instrumenter { - private static final String checkerClassDescriptor; - private static final String handleClass; - static { + private final String getCheckerClassMethodDescriptor; + private final String handleClass; + + /** + * To avoid class name collisions during testing without an agent to replace classes in-place. + */ + private final String classNameSuffix; + private final Map checkMethods; + + InstrumenterImpl( + String handleClass, + String getCheckerClassMethodDescriptor, + String classNameSuffix, + Map checkMethods + ) { + this.handleClass = handleClass; + this.getCheckerClassMethodDescriptor = getCheckerClassMethodDescriptor; + this.classNameSuffix = classNameSuffix; + this.checkMethods = checkMethods; + } + + static String getCheckerClassName() { int javaVersion = Runtime.version().feature(); final String classNamePrefix; if (javaVersion >= 23) { @@ -47,20 +66,14 @@ public class InstrumenterImpl implements Instrumenter { } else { classNamePrefix = ""; } - String checkerClass = "org/elasticsearch/entitlement/bridge/" + classNamePrefix + "EntitlementChecker"; - handleClass = checkerClass + "Handle"; - checkerClassDescriptor = Type.getObjectType(checkerClass).getDescriptor(); + return "org/elasticsearch/entitlement/bridge/" + classNamePrefix + "EntitlementChecker"; } - /** - * To avoid class name collisions during testing without an agent to replace classes in-place. - */ - private final String classNameSuffix; - private final Map instrumentationMethods; - - public InstrumenterImpl(String classNameSuffix, Map instrumentationMethods) { - this.classNameSuffix = classNameSuffix; - this.instrumentationMethods = instrumentationMethods; + public static InstrumenterImpl create(Map checkMethods) { + String checkerClass = getCheckerClassName(); + String handleClass = checkerClass + "Handle"; + String getCheckerClassMethodDescriptor = Type.getMethodDescriptor(Type.getObjectType(checkerClass)); + return new InstrumenterImpl(handleClass, getCheckerClassMethodDescriptor, "", checkMethods); } public ClassFileInfo instrumentClassFile(Class clazz) throws IOException { @@ -156,7 +169,7 @@ public MethodVisitor visitMethod(int access, String name, String descriptor, Str boolean isStatic = (access & ACC_STATIC) != 0; boolean isCtor = "".equals(name); var key = new MethodKey(className, name, Stream.of(Type.getArgumentTypes(descriptor)).map(Type::getInternalName).toList()); - var instrumentationMethod = instrumentationMethods.get(key); + var instrumentationMethod = checkMethods.get(key); if (instrumentationMethod != null) { // LOGGER.debug("Will instrument method {}", key); return new EntitlementMethodVisitor(Opcodes.ASM9, mv, isStatic, isCtor, descriptor, instrumentationMethod); @@ -190,7 +203,7 @@ class EntitlementMethodVisitor extends MethodVisitor { private final boolean instrumentedMethodIsStatic; private final boolean instrumentedMethodIsCtor; private final String instrumentedMethodDescriptor; - private final CheckerMethod instrumentationMethod; + private final CheckMethod checkMethod; private boolean hasCallerSensitiveAnnotation = false; EntitlementMethodVisitor( @@ -199,13 +212,13 @@ class EntitlementMethodVisitor extends MethodVisitor { boolean instrumentedMethodIsStatic, boolean instrumentedMethodIsCtor, String instrumentedMethodDescriptor, - CheckerMethod instrumentationMethod + CheckMethod checkMethod ) { super(api, methodVisitor); this.instrumentedMethodIsStatic = instrumentedMethodIsStatic; this.instrumentedMethodIsCtor = instrumentedMethodIsCtor; this.instrumentedMethodDescriptor = instrumentedMethodDescriptor; - this.instrumentationMethod = instrumentationMethod; + this.checkMethod = checkMethod; } @Override @@ -278,11 +291,11 @@ private void forwardIncomingArguments() { private void invokeInstrumentationMethod() { mv.visitMethodInsn( INVOKEINTERFACE, - instrumentationMethod.className(), - instrumentationMethod.methodName(), + checkMethod.className(), + checkMethod.methodName(), Type.getMethodDescriptor( Type.VOID_TYPE, - instrumentationMethod.parameterDescriptors().stream().map(Type::getType).toArray(Type[]::new) + checkMethod.parameterDescriptors().stream().map(Type::getType).toArray(Type[]::new) ), true ); @@ -290,7 +303,7 @@ private void invokeInstrumentationMethod() { } protected void pushEntitlementChecker(MethodVisitor mv) { - mv.visitMethodInsn(INVOKESTATIC, handleClass, "instance", "()" + checkerClassDescriptor, false); + mv.visitMethodInsn(INVOKESTATIC, handleClass, "instance", getCheckerClassMethodDescriptor, false); } public record ClassFileInfo(String fileName, byte[] bytecodes) {} diff --git a/libs/entitlement/asm-provider/src/test/java/org/elasticsearch/entitlement/instrumentation/impl/InstrumentationServiceImplTests.java b/libs/entitlement/asm-provider/src/test/java/org/elasticsearch/entitlement/instrumentation/impl/InstrumentationServiceImplTests.java index 5eee0bf27d1df..9ccb72637d463 100644 --- a/libs/entitlement/asm-provider/src/test/java/org/elasticsearch/entitlement/instrumentation/impl/InstrumentationServiceImplTests.java +++ b/libs/entitlement/asm-provider/src/test/java/org/elasticsearch/entitlement/instrumentation/impl/InstrumentationServiceImplTests.java @@ -9,7 +9,7 @@ package org.elasticsearch.entitlement.instrumentation.impl; -import org.elasticsearch.entitlement.instrumentation.CheckerMethod; +import org.elasticsearch.entitlement.instrumentation.CheckMethod; import org.elasticsearch.entitlement.instrumentation.InstrumentationService; import org.elasticsearch.entitlement.instrumentation.MethodKey; import org.elasticsearch.test.ESTestCase; @@ -52,15 +52,15 @@ interface TestCheckerCtors { } public void testInstrumentationTargetLookup() throws IOException, ClassNotFoundException { - Map methodsMap = instrumentationService.lookupMethodsToInstrument(TestChecker.class.getName()); + Map checkMethods = instrumentationService.lookupMethodsToInstrument(TestChecker.class.getName()); - assertThat(methodsMap, aMapWithSize(3)); + assertThat(checkMethods, aMapWithSize(3)); assertThat( - methodsMap, + checkMethods, hasEntry( equalTo(new MethodKey("org/example/TestTargetClass", "staticMethod", List.of("I", "java/lang/String", "java/lang/Object"))), equalTo( - new CheckerMethod( + new CheckMethod( "org/elasticsearch/entitlement/instrumentation/impl/InstrumentationServiceImplTests$TestChecker", "check$org_example_TestTargetClass$staticMethod", List.of("Ljava/lang/Class;", "I", "Ljava/lang/String;", "Ljava/lang/Object;") @@ -69,7 +69,7 @@ public void testInstrumentationTargetLookup() throws IOException, ClassNotFoundE ) ); assertThat( - methodsMap, + checkMethods, hasEntry( equalTo( new MethodKey( @@ -79,7 +79,7 @@ public void testInstrumentationTargetLookup() throws IOException, ClassNotFoundE ) ), equalTo( - new CheckerMethod( + new CheckMethod( "org/elasticsearch/entitlement/instrumentation/impl/InstrumentationServiceImplTests$TestChecker", "check$$instanceMethodNoArgs", List.of( @@ -91,7 +91,7 @@ public void testInstrumentationTargetLookup() throws IOException, ClassNotFoundE ) ); assertThat( - methodsMap, + checkMethods, hasEntry( equalTo( new MethodKey( @@ -101,7 +101,7 @@ public void testInstrumentationTargetLookup() throws IOException, ClassNotFoundE ) ), equalTo( - new CheckerMethod( + new CheckMethod( "org/elasticsearch/entitlement/instrumentation/impl/InstrumentationServiceImplTests$TestChecker", "check$$instanceMethodWithArgs", List.of( @@ -117,15 +117,15 @@ public void testInstrumentationTargetLookup() throws IOException, ClassNotFoundE } public void testInstrumentationTargetLookupWithOverloads() throws IOException, ClassNotFoundException { - Map methodsMap = instrumentationService.lookupMethodsToInstrument(TestCheckerOverloads.class.getName()); + Map checkMethods = instrumentationService.lookupMethodsToInstrument(TestCheckerOverloads.class.getName()); - assertThat(methodsMap, aMapWithSize(2)); + assertThat(checkMethods, aMapWithSize(2)); assertThat( - methodsMap, + checkMethods, hasEntry( equalTo(new MethodKey("org/example/TestTargetClass", "staticMethodWithOverload", List.of("I", "java/lang/String"))), equalTo( - new CheckerMethod( + new CheckMethod( "org/elasticsearch/entitlement/instrumentation/impl/InstrumentationServiceImplTests$TestCheckerOverloads", "check$org_example_TestTargetClass$staticMethodWithOverload", List.of("Ljava/lang/Class;", "I", "Ljava/lang/String;") @@ -134,11 +134,11 @@ public void testInstrumentationTargetLookupWithOverloads() throws IOException, C ) ); assertThat( - methodsMap, + checkMethods, hasEntry( equalTo(new MethodKey("org/example/TestTargetClass", "staticMethodWithOverload", List.of("I", "I"))), equalTo( - new CheckerMethod( + new CheckMethod( "org/elasticsearch/entitlement/instrumentation/impl/InstrumentationServiceImplTests$TestCheckerOverloads", "check$org_example_TestTargetClass$staticMethodWithOverload", List.of("Ljava/lang/Class;", "I", "I") @@ -149,15 +149,15 @@ public void testInstrumentationTargetLookupWithOverloads() throws IOException, C } public void testInstrumentationTargetLookupWithCtors() throws IOException, ClassNotFoundException { - Map methodsMap = instrumentationService.lookupMethodsToInstrument(TestCheckerCtors.class.getName()); + Map checkMethods = instrumentationService.lookupMethodsToInstrument(TestCheckerCtors.class.getName()); - assertThat(methodsMap, aMapWithSize(2)); + assertThat(checkMethods, aMapWithSize(2)); assertThat( - methodsMap, + checkMethods, hasEntry( equalTo(new MethodKey("org/example/TestTargetClass", "", List.of("I", "java/lang/String"))), equalTo( - new CheckerMethod( + new CheckMethod( "org/elasticsearch/entitlement/instrumentation/impl/InstrumentationServiceImplTests$TestCheckerCtors", "check$org_example_TestTargetClass$", List.of("Ljava/lang/Class;", "I", "Ljava/lang/String;") @@ -166,11 +166,11 @@ public void testInstrumentationTargetLookupWithCtors() throws IOException, Class ) ); assertThat( - methodsMap, + checkMethods, hasEntry( equalTo(new MethodKey("org/example/TestTargetClass", "", List.of())), equalTo( - new CheckerMethod( + new CheckMethod( "org/elasticsearch/entitlement/instrumentation/impl/InstrumentationServiceImplTests$TestCheckerCtors", "check$org_example_TestTargetClass$", List.of("Ljava/lang/Class;") diff --git a/libs/entitlement/asm-provider/src/test/java/org/elasticsearch/entitlement/instrumentation/impl/InstrumenterTests.java b/libs/entitlement/asm-provider/src/test/java/org/elasticsearch/entitlement/instrumentation/impl/InstrumenterTests.java index 40f0162d2eaa2..c8e1b26d1fc52 100644 --- a/libs/entitlement/asm-provider/src/test/java/org/elasticsearch/entitlement/instrumentation/impl/InstrumenterTests.java +++ b/libs/entitlement/asm-provider/src/test/java/org/elasticsearch/entitlement/instrumentation/impl/InstrumenterTests.java @@ -9,10 +9,8 @@ package org.elasticsearch.entitlement.instrumentation.impl; -import org.elasticsearch.common.Strings; import org.elasticsearch.entitlement.bridge.EntitlementChecker; -import org.elasticsearch.entitlement.instrumentation.CheckerMethod; -import org.elasticsearch.entitlement.instrumentation.InstrumentationService; +import org.elasticsearch.entitlement.instrumentation.CheckMethod; import org.elasticsearch.entitlement.instrumentation.MethodKey; import org.elasticsearch.logging.LogManager; import org.elasticsearch.logging.Logger; @@ -23,16 +21,21 @@ import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Method; +import java.net.MalformedURLException; +import java.net.URI; import java.net.URL; import java.net.URLStreamHandlerFactory; -import java.util.Arrays; import java.util.List; import java.util.Map; import static org.elasticsearch.entitlement.instrumentation.impl.ASMUtils.bytecode2text; -import static org.elasticsearch.entitlement.instrumentation.impl.InstrumenterImpl.getClassFileInfo; +import static org.elasticsearch.entitlement.instrumentation.impl.TestMethodUtils.callStaticMethod; +import static org.elasticsearch.entitlement.instrumentation.impl.TestMethodUtils.getCheckMethod; +import static org.elasticsearch.entitlement.instrumentation.impl.TestMethodUtils.methodKeyForConstructor; +import static org.elasticsearch.entitlement.instrumentation.impl.TestMethodUtils.methodKeyForTarget; +import static org.hamcrest.Matchers.arrayContaining; +import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.instanceOf; -import static org.hamcrest.Matchers.startsWith; import static org.objectweb.asm.Opcodes.INVOKESTATIC; /** @@ -42,7 +45,6 @@ */ @ESTestCase.WithoutSecurityManager public class InstrumenterTests extends ESTestCase { - final InstrumentationService instrumentationService = new InstrumentationServiceImpl(); static volatile TestEntitlementChecker testChecker; @@ -59,12 +61,7 @@ public void initialize() { * Contains all the virtual methods from {@link ClassToInstrument}, * allowing this test to call them on the dynamically loaded instrumented class. */ - public interface Testable { - // This method is here to demonstrate Instrumenter does not get confused by overloads - void someMethod(int arg); - - void someMethod(int arg, String anotherArg); - } + public interface Testable {} /** * This is a placeholder for real class library methods. @@ -78,41 +75,24 @@ public static class ClassToInstrument implements Testable { public ClassToInstrument() {} - public ClassToInstrument(int arg) {} + // URLClassLoader ctor + public ClassToInstrument(URL[] urls) {} public static void systemExit(int status) { assertEquals(123, status); } - - public static void anotherSystemExit(int status) { - assertEquals(123, status); - } - - public void someMethod(int arg) {} - - public void someMethod(int arg, String anotherArg) {} - - public static void someStaticMethod(int arg) {} - - public static void someStaticMethod(int arg, String anotherArg) {} } - static final class TestException extends RuntimeException {} + private static final String SAMPLE_NAME = "TEST"; - /** - * Interface to test specific, "synthetic" cases (e.g. overloaded methods, overloaded constructors, etc.) that - * may be not present/may be difficult to find or not clear in the production EntitlementChecker interface - */ - public interface MockEntitlementChecker extends EntitlementChecker { - void checkSomeStaticMethod(Class clazz, int arg); - - void checkSomeStaticMethod(Class clazz, int arg, String anotherArg); - - void checkSomeInstanceMethod(Class clazz, Testable that, int arg, String anotherArg); + private static final URL SAMPLE_URL = createSampleUrl(); - void checkCtor(Class clazz); - - void checkCtor(Class clazz, int arg); + private static URL createSampleUrl() { + try { + return URI.create("file:/test/example").toURL(); + } catch (MalformedURLException e) { + return null; + } } /** @@ -122,7 +102,7 @@ public interface MockEntitlementChecker extends EntitlementChecker { * just to demonstrate that the injected bytecodes succeed in calling these methods. * It also asserts that the arguments are correct. */ - public static class TestEntitlementChecker implements MockEntitlementChecker { + public static class TestEntitlementChecker implements EntitlementChecker { /** * This allows us to test that the instrumentation is correct in both cases: * if the check throws, and if it doesn't. @@ -130,104 +110,84 @@ public static class TestEntitlementChecker implements MockEntitlementChecker { volatile boolean isActive; int checkSystemExitCallCount = 0; - int checkSomeStaticMethodIntCallCount = 0; - int checkSomeStaticMethodIntStringCallCount = 0; - int checkSomeInstanceMethodCallCount = 0; - - int checkCtorCallCount = 0; - int checkCtorIntCallCount = 0; + int checkURLClassLoaderCallCount = 0; @Override public void check$java_lang_System$exit(Class callerClass, int status) { checkSystemExitCallCount++; - assertSame(InstrumenterTests.class, callerClass); + assertSame(TestMethodUtils.class, callerClass); assertEquals(123, status); throwIfActive(); } @Override - public void check$java_net_URLClassLoader$(Class callerClass, URL[] urls) {} - - @Override - public void check$java_net_URLClassLoader$(Class callerClass, URL[] urls, ClassLoader parent) {} - - @Override - public void check$java_net_URLClassLoader$(Class callerClass, URL[] urls, ClassLoader parent, URLStreamHandlerFactory factory) {} - - @Override - public void check$java_net_URLClassLoader$(Class callerClass, String name, URL[] urls, ClassLoader parent) {} - - @Override - public void check$java_net_URLClassLoader$( - Class callerClass, - String name, - URL[] urls, - ClassLoader parent, - URLStreamHandlerFactory factory - ) {} - - private void throwIfActive() { - if (isActive) { - throw new TestException(); - } - } - - @Override - public void checkSomeStaticMethod(Class callerClass, int arg) { - checkSomeStaticMethodIntCallCount++; + public void check$java_net_URLClassLoader$(Class callerClass, URL[] urls) { + checkURLClassLoaderCallCount++; assertSame(InstrumenterTests.class, callerClass); - assertEquals(123, arg); + assertThat(urls, arrayContaining(SAMPLE_URL)); throwIfActive(); } @Override - public void checkSomeStaticMethod(Class callerClass, int arg, String anotherArg) { - checkSomeStaticMethodIntStringCallCount++; + public void check$java_net_URLClassLoader$(Class callerClass, URL[] urls, ClassLoader parent) { + checkURLClassLoaderCallCount++; assertSame(InstrumenterTests.class, callerClass); - assertEquals(123, arg); - assertEquals("abc", anotherArg); + assertThat(urls, arrayContaining(SAMPLE_URL)); + assertThat(parent, equalTo(ClassLoader.getSystemClassLoader())); throwIfActive(); } @Override - public void checkSomeInstanceMethod(Class callerClass, Testable that, int arg, String anotherArg) { - checkSomeInstanceMethodCallCount++; + public void check$java_net_URLClassLoader$(Class callerClass, URL[] urls, ClassLoader parent, URLStreamHandlerFactory factory) { + checkURLClassLoaderCallCount++; assertSame(InstrumenterTests.class, callerClass); - assertThat( - that.getClass().getName(), - startsWith("org.elasticsearch.entitlement.instrumentation.impl.InstrumenterTests$ClassToInstrument") - ); - assertEquals(123, arg); - assertEquals("def", anotherArg); + assertThat(urls, arrayContaining(SAMPLE_URL)); + assertThat(parent, equalTo(ClassLoader.getSystemClassLoader())); throwIfActive(); } @Override - public void checkCtor(Class callerClass) { - checkCtorCallCount++; + public void check$java_net_URLClassLoader$(Class callerClass, String name, URL[] urls, ClassLoader parent) { + checkURLClassLoaderCallCount++; assertSame(InstrumenterTests.class, callerClass); + assertThat(name, equalTo(SAMPLE_NAME)); + assertThat(urls, arrayContaining(SAMPLE_URL)); + assertThat(parent, equalTo(ClassLoader.getSystemClassLoader())); throwIfActive(); } @Override - public void checkCtor(Class callerClass, int arg) { - checkCtorIntCallCount++; + public void check$java_net_URLClassLoader$( + Class callerClass, + String name, + URL[] urls, + ClassLoader parent, + URLStreamHandlerFactory factory + ) { + checkURLClassLoaderCallCount++; assertSame(InstrumenterTests.class, callerClass); - assertEquals(123, arg); + assertThat(name, equalTo(SAMPLE_NAME)); + assertThat(urls, arrayContaining(SAMPLE_URL)); + assertThat(parent, equalTo(ClassLoader.getSystemClassLoader())); throwIfActive(); } + + private void throwIfActive() { + if (isActive) { + throw new TestException(); + } + } } - public void testClassIsInstrumented() throws Exception { + public void testSystemExitIsInstrumented() throws Exception { var classToInstrument = ClassToInstrument.class; - CheckerMethod checkerMethod = getCheckerMethod(EntitlementChecker.class, "check$java_lang_System$exit", Class.class, int.class); - Map methods = Map.of( - instrumentationService.methodKeyForTarget(classToInstrument.getMethod("systemExit", int.class)), - checkerMethod + Map checkMethods = Map.of( + methodKeyForTarget(classToInstrument.getMethod("systemExit", int.class)), + getCheckMethod(EntitlementChecker.class, "check$java_lang_System$exit", Class.class, int.class) ); - var instrumenter = createInstrumenter(methods); + var instrumenter = createInstrumenter(checkMethods); byte[] newBytecode = instrumenter.instrumentClassFile(classToInstrument).bytecodes(); @@ -251,86 +211,15 @@ public void testClassIsInstrumented() throws Exception { assertThrows(TestException.class, () -> callStaticMethod(newClass, "systemExit", 123)); } - public void testClassIsNotInstrumentedTwice() throws Exception { - var classToInstrument = ClassToInstrument.class; - - CheckerMethod checkerMethod = getCheckerMethod(EntitlementChecker.class, "check$java_lang_System$exit", Class.class, int.class); - Map methods = Map.of( - instrumentationService.methodKeyForTarget(classToInstrument.getMethod("systemExit", int.class)), - checkerMethod - ); - - var instrumenter = createInstrumenter(methods); - - InstrumenterImpl.ClassFileInfo initial = getClassFileInfo(classToInstrument); - var internalClassName = Type.getInternalName(classToInstrument); - - byte[] instrumentedBytecode = instrumenter.instrumentClass(internalClassName, initial.bytecodes()); - byte[] instrumentedTwiceBytecode = instrumenter.instrumentClass(internalClassName, instrumentedBytecode); - - logger.trace(() -> Strings.format("Bytecode after 1st instrumentation:\n%s", bytecode2text(instrumentedBytecode))); - logger.trace(() -> Strings.format("Bytecode after 2nd instrumentation:\n%s", bytecode2text(instrumentedTwiceBytecode))); - - Class newClass = new TestLoader(Testable.class.getClassLoader()).defineClassFromBytes( - classToInstrument.getName() + "_NEW_NEW", - instrumentedTwiceBytecode - ); - - getTestEntitlementChecker().isActive = true; - getTestEntitlementChecker().checkSystemExitCallCount = 0; - - assertThrows(TestException.class, () -> callStaticMethod(newClass, "systemExit", 123)); - assertEquals(1, getTestEntitlementChecker().checkSystemExitCallCount); - } - - public void testClassAllMethodsAreInstrumentedFirstPass() throws Exception { + public void testURLClassLoaderIsInstrumented() throws Exception { var classToInstrument = ClassToInstrument.class; - CheckerMethod checkerMethod = getCheckerMethod(EntitlementChecker.class, "check$java_lang_System$exit", Class.class, int.class); - Map methods = Map.of( - instrumentationService.methodKeyForTarget(classToInstrument.getMethod("systemExit", int.class)), - checkerMethod, - instrumentationService.methodKeyForTarget(classToInstrument.getMethod("anotherSystemExit", int.class)), - checkerMethod + Map checkMethods = Map.of( + methodKeyForConstructor(classToInstrument, List.of(Type.getInternalName(URL[].class))), + getCheckMethod(EntitlementChecker.class, "check$java_net_URLClassLoader$", Class.class, URL[].class) ); - var instrumenter = createInstrumenter(methods); - - InstrumenterImpl.ClassFileInfo initial = getClassFileInfo(classToInstrument); - var internalClassName = Type.getInternalName(classToInstrument); - - byte[] instrumentedBytecode = instrumenter.instrumentClass(internalClassName, initial.bytecodes()); - byte[] instrumentedTwiceBytecode = instrumenter.instrumentClass(internalClassName, instrumentedBytecode); - - logger.trace(() -> Strings.format("Bytecode after 1st instrumentation:\n%s", bytecode2text(instrumentedBytecode))); - logger.trace(() -> Strings.format("Bytecode after 2nd instrumentation:\n%s", bytecode2text(instrumentedTwiceBytecode))); - - Class newClass = new TestLoader(Testable.class.getClassLoader()).defineClassFromBytes( - classToInstrument.getName() + "_NEW_NEW", - instrumentedTwiceBytecode - ); - - getTestEntitlementChecker().isActive = true; - getTestEntitlementChecker().checkSystemExitCallCount = 0; - - assertThrows(TestException.class, () -> callStaticMethod(newClass, "systemExit", 123)); - assertEquals(1, getTestEntitlementChecker().checkSystemExitCallCount); - - assertThrows(TestException.class, () -> callStaticMethod(newClass, "anotherSystemExit", 123)); - assertEquals(2, getTestEntitlementChecker().checkSystemExitCallCount); - } - - public void testInstrumenterWorksWithOverloads() throws Exception { - var classToInstrument = ClassToInstrument.class; - - Map methods = Map.of( - instrumentationService.methodKeyForTarget(classToInstrument.getMethod("someStaticMethod", int.class)), - getCheckerMethod(MockEntitlementChecker.class, "checkSomeStaticMethod", Class.class, int.class), - instrumentationService.methodKeyForTarget(classToInstrument.getMethod("someStaticMethod", int.class, String.class)), - getCheckerMethod(MockEntitlementChecker.class, "checkSomeStaticMethod", Class.class, int.class, String.class) - ); - - var instrumenter = createInstrumenter(methods); + var instrumenter = createInstrumenter(checkMethods); byte[] newBytecode = instrumenter.instrumentClassFile(classToInstrument).bytecodes(); @@ -343,80 +232,19 @@ public void testInstrumenterWorksWithOverloads() throws Exception { newBytecode ); - getTestEntitlementChecker().isActive = true; - - // After checking is activated, everything should throw - assertThrows(TestException.class, () -> callStaticMethod(newClass, "someStaticMethod", 123)); - assertThrows(TestException.class, () -> callStaticMethod(newClass, "someStaticMethod", 123, "abc")); - - assertEquals(1, getTestEntitlementChecker().checkSomeStaticMethodIntCallCount); - assertEquals(1, getTestEntitlementChecker().checkSomeStaticMethodIntStringCallCount); - } - - public void testInstrumenterWorksWithInstanceMethodsAndOverloads() throws Exception { - var classToInstrument = ClassToInstrument.class; - - Map methods = Map.of( - instrumentationService.methodKeyForTarget(classToInstrument.getMethod("someMethod", int.class, String.class)), - getCheckerMethod(MockEntitlementChecker.class, "checkSomeInstanceMethod", Class.class, Testable.class, int.class, String.class) - ); - - var instrumenter = createInstrumenter(methods); - - byte[] newBytecode = instrumenter.instrumentClassFile(classToInstrument).bytecodes(); - - if (logger.isTraceEnabled()) { - logger.trace("Bytecode after instrumentation:\n{}", bytecode2text(newBytecode)); - } + getTestEntitlementChecker().isActive = false; - Class newClass = new TestLoader(Testable.class.getClassLoader()).defineClassFromBytes( - classToInstrument.getName() + "_NEW", - newBytecode - ); + // Before checking is active, nothing should throw + newClass.getConstructor(URL[].class).newInstance((Object) new URL[] { SAMPLE_URL }); getTestEntitlementChecker().isActive = true; - Testable testTargetClass = (Testable) (newClass.getConstructor().newInstance()); - - // This overload is not instrumented, so it will not throw - testTargetClass.someMethod(123); - assertThrows(TestException.class, () -> testTargetClass.someMethod(123, "def")); - - assertEquals(1, getTestEntitlementChecker().checkSomeInstanceMethodCallCount); - } - - public void testInstrumenterWorksWithConstructors() throws Exception { - var classToInstrument = ClassToInstrument.class; - - Map methods = Map.of( - new MethodKey(classToInstrument.getName().replace('.', '/'), "", List.of()), - getCheckerMethod(MockEntitlementChecker.class, "checkCtor", Class.class), - new MethodKey(classToInstrument.getName().replace('.', '/'), "", List.of("I")), - getCheckerMethod(MockEntitlementChecker.class, "checkCtor", Class.class, int.class) - ); - - var instrumenter = createInstrumenter(methods); - - byte[] newBytecode = instrumenter.instrumentClassFile(classToInstrument).bytecodes(); - - if (logger.isTraceEnabled()) { - logger.trace("Bytecode after instrumentation:\n{}", bytecode2text(newBytecode)); - } - - Class newClass = new TestLoader(Testable.class.getClassLoader()).defineClassFromBytes( - classToInstrument.getName() + "_NEW", - newBytecode + // After checking is activated, everything should throw + var exception = assertThrows( + InvocationTargetException.class, + () -> newClass.getConstructor(URL[].class).newInstance((Object) new URL[] { SAMPLE_URL }) ); - - getTestEntitlementChecker().isActive = true; - - var ex = assertThrows(InvocationTargetException.class, () -> newClass.getConstructor().newInstance()); - assertThat(ex.getCause(), instanceOf(TestException.class)); - var ex2 = assertThrows(InvocationTargetException.class, () -> newClass.getConstructor(int.class).newInstance(123)); - assertThat(ex2.getCause(), instanceOf(TestException.class)); - - assertEquals(1, getTestEntitlementChecker().checkCtorCallCount); - assertEquals(1, getTestEntitlementChecker().checkCtorIntCallCount); + assertThat(exception.getCause(), instanceOf(TestException.class)); } /** This test doesn't replace classToInstrument in-place but instead loads a separate @@ -425,9 +253,10 @@ public void testInstrumenterWorksWithConstructors() throws Exception { * MethodKey and instrumentationMethod with slightly different signatures (using the common interface * Testable) which is not what would happen when it's run by the agent. */ - private InstrumenterImpl createInstrumenter(Map methods) throws NoSuchMethodException { + private InstrumenterImpl createInstrumenter(Map checkMethods) throws NoSuchMethodException { Method getter = InstrumenterTests.class.getMethod("getTestEntitlementChecker"); - return new InstrumenterImpl("_NEW", methods) { + + return new InstrumenterImpl(null, null, "_NEW", checkMethods) { /** * We're not testing the bridge library here. * Just call our own getter instead. @@ -445,58 +274,5 @@ protected void pushEntitlementChecker(MethodVisitor mv) { }; } - private static CheckerMethod getCheckerMethod(Class clazz, String methodName, Class... parameterTypes) - throws NoSuchMethodException { - var method = clazz.getMethod(methodName, parameterTypes); - return new CheckerMethod( - Type.getInternalName(clazz), - method.getName(), - Arrays.stream(Type.getArgumentTypes(method)).map(Type::getDescriptor).toList() - ); - } - - /** - * Calling a static method of a dynamically loaded class is significantly more cumbersome - * than calling a virtual method. - */ - private static void callStaticMethod(Class c, String methodName, int arg) throws NoSuchMethodException, IllegalAccessException { - try { - c.getMethod(methodName, int.class).invoke(null, arg); - } catch (InvocationTargetException e) { - Throwable cause = e.getCause(); - if (cause instanceof TestException n) { - // Sometimes we're expecting this one! - throw n; - } else { - throw new AssertionError(cause); - } - } - } - - private static void callStaticMethod(Class c, String methodName, int arg1, String arg2) throws NoSuchMethodException, - IllegalAccessException { - try { - c.getMethod(methodName, int.class, String.class).invoke(null, arg1, arg2); - } catch (InvocationTargetException e) { - Throwable cause = e.getCause(); - if (cause instanceof TestException n) { - // Sometimes we're expecting this one! - throw n; - } else { - throw new AssertionError(cause); - } - } - } - - static class TestLoader extends ClassLoader { - TestLoader(ClassLoader parent) { - super(parent); - } - - public Class defineClassFromBytes(String name, byte[] bytes) { - return defineClass(name, bytes, 0, bytes.length); - } - } - private static final Logger logger = LogManager.getLogger(InstrumenterTests.class); } diff --git a/libs/entitlement/asm-provider/src/test/java/org/elasticsearch/entitlement/instrumentation/impl/SyntheticInstrumenterTests.java b/libs/entitlement/asm-provider/src/test/java/org/elasticsearch/entitlement/instrumentation/impl/SyntheticInstrumenterTests.java new file mode 100644 index 0000000000000..8e0409971ba61 --- /dev/null +++ b/libs/entitlement/asm-provider/src/test/java/org/elasticsearch/entitlement/instrumentation/impl/SyntheticInstrumenterTests.java @@ -0,0 +1,383 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.entitlement.instrumentation.impl; + +import org.elasticsearch.common.Strings; +import org.elasticsearch.entitlement.instrumentation.CheckMethod; +import org.elasticsearch.entitlement.instrumentation.MethodKey; +import org.elasticsearch.logging.LogManager; +import org.elasticsearch.logging.Logger; +import org.elasticsearch.test.ESTestCase; +import org.objectweb.asm.Type; + +import java.lang.reflect.InvocationTargetException; +import java.util.List; +import java.util.Map; + +import static org.elasticsearch.entitlement.instrumentation.impl.ASMUtils.bytecode2text; +import static org.elasticsearch.entitlement.instrumentation.impl.InstrumenterImpl.getClassFileInfo; +import static org.elasticsearch.entitlement.instrumentation.impl.TestMethodUtils.callStaticMethod; +import static org.elasticsearch.entitlement.instrumentation.impl.TestMethodUtils.getCheckMethod; +import static org.elasticsearch.entitlement.instrumentation.impl.TestMethodUtils.methodKeyForTarget; +import static org.hamcrest.Matchers.instanceOf; +import static org.hamcrest.Matchers.startsWith; + +/** + * This tests {@link InstrumenterImpl} with some ad-hoc instrumented method and checker methods, to allow us to check + * some ad-hoc test cases (e.g. overloaded methods, overloaded targets, multiple instrumentation, etc.) + */ +@ESTestCase.WithoutSecurityManager +public class SyntheticInstrumenterTests extends ESTestCase { + private static final Logger logger = LogManager.getLogger(SyntheticInstrumenterTests.class); + + /** + * Contains all the virtual methods from {@link TestClassToInstrument}, + * allowing this test to call them on the dynamically loaded instrumented class. + */ + public interface Testable { + // This method is here to demonstrate Instrumenter does not get confused by overloads + void someMethod(int arg); + + void someMethod(int arg, String anotherArg); + } + + /** + * This is a placeholder for real class library methods. + * Without the java agent, we can't instrument the real methods, so we instrument this instead. + *

+ * Methods of this class must have the same signature and the same static/virtual condition as the corresponding real method. + * They should assert that the arguments came through correctly. + * They must not throw {@link TestException}. + */ + public static class TestClassToInstrument implements Testable { + + public TestClassToInstrument() {} + + public TestClassToInstrument(int arg) {} + + public void someMethod(int arg) {} + + public void someMethod(int arg, String anotherArg) {} + + public static void someStaticMethod(int arg) {} + + public static void someStaticMethod(int arg, String anotherArg) {} + + public static void anotherStaticMethod(int arg) {} + } + + /** + * Interface to test specific, "synthetic" cases (e.g. overloaded methods, overloaded constructors, etc.) that + * may be not present/may be difficult to find or not clear in the production EntitlementChecker interface + */ + public interface MockEntitlementChecker { + void checkSomeStaticMethod(Class clazz, int arg); + + void checkSomeStaticMethod(Class clazz, int arg, String anotherArg); + + void checkSomeInstanceMethod(Class clazz, Testable that, int arg, String anotherArg); + + void checkCtor(Class clazz); + + void checkCtor(Class clazz, int arg); + } + + public static class TestEntitlementCheckerHolder { + static TestEntitlementChecker checkerInstance = new TestEntitlementChecker(); + + public static MockEntitlementChecker instance() { + return checkerInstance; + } + } + + public static class TestEntitlementChecker implements MockEntitlementChecker { + /** + * This allows us to test that the instrumentation is correct in both cases: + * if the check throws, and if it doesn't. + */ + volatile boolean isActive; + + int checkSomeStaticMethodIntCallCount = 0; + int checkSomeStaticMethodIntStringCallCount = 0; + int checkSomeInstanceMethodCallCount = 0; + + int checkCtorCallCount = 0; + int checkCtorIntCallCount = 0; + + private void throwIfActive() { + if (isActive) { + throw new TestException(); + } + } + + @Override + public void checkSomeStaticMethod(Class callerClass, int arg) { + checkSomeStaticMethodIntCallCount++; + assertSame(TestMethodUtils.class, callerClass); + assertEquals(123, arg); + throwIfActive(); + } + + @Override + public void checkSomeStaticMethod(Class callerClass, int arg, String anotherArg) { + checkSomeStaticMethodIntStringCallCount++; + assertSame(TestMethodUtils.class, callerClass); + assertEquals(123, arg); + assertEquals("abc", anotherArg); + throwIfActive(); + } + + @Override + public void checkSomeInstanceMethod(Class callerClass, Testable that, int arg, String anotherArg) { + checkSomeInstanceMethodCallCount++; + assertSame(SyntheticInstrumenterTests.class, callerClass); + assertThat( + that.getClass().getName(), + startsWith("org.elasticsearch.entitlement.instrumentation.impl.SyntheticInstrumenterTests$TestClassToInstrument") + ); + assertEquals(123, arg); + assertEquals("def", anotherArg); + throwIfActive(); + } + + @Override + public void checkCtor(Class callerClass) { + checkCtorCallCount++; + assertSame(SyntheticInstrumenterTests.class, callerClass); + throwIfActive(); + } + + @Override + public void checkCtor(Class callerClass, int arg) { + checkCtorIntCallCount++; + assertSame(SyntheticInstrumenterTests.class, callerClass); + assertEquals(123, arg); + throwIfActive(); + } + } + + public void testClassIsInstrumented() throws Exception { + var classToInstrument = TestClassToInstrument.class; + + CheckMethod checkMethod = getCheckMethod(MockEntitlementChecker.class, "checkSomeStaticMethod", Class.class, int.class); + Map checkMethods = Map.of( + methodKeyForTarget(classToInstrument.getMethod("someStaticMethod", int.class)), + checkMethod + ); + + var instrumenter = createInstrumenter(checkMethods); + + byte[] newBytecode = instrumenter.instrumentClassFile(classToInstrument).bytecodes(); + + if (logger.isTraceEnabled()) { + logger.trace("Bytecode after instrumentation:\n{}", bytecode2text(newBytecode)); + } + + Class newClass = new TestLoader(Testable.class.getClassLoader()).defineClassFromBytes( + classToInstrument.getName() + "_NEW", + newBytecode + ); + + TestEntitlementCheckerHolder.checkerInstance.isActive = false; + + // Before checking is active, nothing should throw + callStaticMethod(newClass, "someStaticMethod", 123); + + TestEntitlementCheckerHolder.checkerInstance.isActive = true; + + // After checking is activated, everything should throw + assertThrows(TestException.class, () -> callStaticMethod(newClass, "someStaticMethod", 123)); + } + + public void testClassIsNotInstrumentedTwice() throws Exception { + var classToInstrument = TestClassToInstrument.class; + + CheckMethod checkMethod = getCheckMethod(MockEntitlementChecker.class, "checkSomeStaticMethod", Class.class, int.class); + Map checkMethods = Map.of( + methodKeyForTarget(classToInstrument.getMethod("someStaticMethod", int.class)), + checkMethod + ); + + var instrumenter = createInstrumenter(checkMethods); + + InstrumenterImpl.ClassFileInfo initial = getClassFileInfo(classToInstrument); + var internalClassName = Type.getInternalName(classToInstrument); + + byte[] instrumentedBytecode = instrumenter.instrumentClass(internalClassName, initial.bytecodes()); + byte[] instrumentedTwiceBytecode = instrumenter.instrumentClass(internalClassName, instrumentedBytecode); + + logger.trace(() -> Strings.format("Bytecode after 1st instrumentation:\n%s", bytecode2text(instrumentedBytecode))); + logger.trace(() -> Strings.format("Bytecode after 2nd instrumentation:\n%s", bytecode2text(instrumentedTwiceBytecode))); + + Class newClass = new TestLoader(Testable.class.getClassLoader()).defineClassFromBytes( + classToInstrument.getName() + "_NEW_NEW", + instrumentedTwiceBytecode + ); + + TestEntitlementCheckerHolder.checkerInstance.isActive = true; + TestEntitlementCheckerHolder.checkerInstance.checkSomeStaticMethodIntCallCount = 0; + + assertThrows(TestException.class, () -> callStaticMethod(newClass, "someStaticMethod", 123)); + assertEquals(1, TestEntitlementCheckerHolder.checkerInstance.checkSomeStaticMethodIntCallCount); + } + + public void testClassAllMethodsAreInstrumentedFirstPass() throws Exception { + var classToInstrument = TestClassToInstrument.class; + + CheckMethod checkMethod = getCheckMethod(MockEntitlementChecker.class, "checkSomeStaticMethod", Class.class, int.class); + Map checkMethods = Map.of( + methodKeyForTarget(classToInstrument.getMethod("someStaticMethod", int.class)), + checkMethod, + methodKeyForTarget(classToInstrument.getMethod("anotherStaticMethod", int.class)), + checkMethod + ); + + var instrumenter = createInstrumenter(checkMethods); + + InstrumenterImpl.ClassFileInfo initial = getClassFileInfo(classToInstrument); + var internalClassName = Type.getInternalName(classToInstrument); + + byte[] instrumentedBytecode = instrumenter.instrumentClass(internalClassName, initial.bytecodes()); + byte[] instrumentedTwiceBytecode = instrumenter.instrumentClass(internalClassName, instrumentedBytecode); + + logger.trace(() -> Strings.format("Bytecode after 1st instrumentation:\n%s", bytecode2text(instrumentedBytecode))); + logger.trace(() -> Strings.format("Bytecode after 2nd instrumentation:\n%s", bytecode2text(instrumentedTwiceBytecode))); + + Class newClass = new TestLoader(Testable.class.getClassLoader()).defineClassFromBytes( + classToInstrument.getName() + "_NEW_NEW", + instrumentedTwiceBytecode + ); + + TestEntitlementCheckerHolder.checkerInstance.isActive = true; + TestEntitlementCheckerHolder.checkerInstance.checkSomeStaticMethodIntCallCount = 0; + + assertThrows(TestException.class, () -> callStaticMethod(newClass, "someStaticMethod", 123)); + assertEquals(1, TestEntitlementCheckerHolder.checkerInstance.checkSomeStaticMethodIntCallCount); + + assertThrows(TestException.class, () -> callStaticMethod(newClass, "anotherStaticMethod", 123)); + assertEquals(2, TestEntitlementCheckerHolder.checkerInstance.checkSomeStaticMethodIntCallCount); + } + + public void testInstrumenterWorksWithOverloads() throws Exception { + var classToInstrument = TestClassToInstrument.class; + + Map checkMethods = Map.of( + methodKeyForTarget(classToInstrument.getMethod("someStaticMethod", int.class)), + getCheckMethod(MockEntitlementChecker.class, "checkSomeStaticMethod", Class.class, int.class), + methodKeyForTarget(classToInstrument.getMethod("someStaticMethod", int.class, String.class)), + getCheckMethod(MockEntitlementChecker.class, "checkSomeStaticMethod", Class.class, int.class, String.class) + ); + + var instrumenter = createInstrumenter(checkMethods); + + byte[] newBytecode = instrumenter.instrumentClassFile(classToInstrument).bytecodes(); + + if (logger.isTraceEnabled()) { + logger.trace("Bytecode after instrumentation:\n{}", bytecode2text(newBytecode)); + } + + Class newClass = new TestLoader(Testable.class.getClassLoader()).defineClassFromBytes( + classToInstrument.getName() + "_NEW", + newBytecode + ); + + TestEntitlementCheckerHolder.checkerInstance.isActive = true; + TestEntitlementCheckerHolder.checkerInstance.checkSomeStaticMethodIntCallCount = 0; + TestEntitlementCheckerHolder.checkerInstance.checkSomeStaticMethodIntStringCallCount = 0; + + // After checking is activated, everything should throw + assertThrows(TestException.class, () -> callStaticMethod(newClass, "someStaticMethod", 123)); + assertThrows(TestException.class, () -> callStaticMethod(newClass, "someStaticMethod", 123, "abc")); + + assertEquals(1, TestEntitlementCheckerHolder.checkerInstance.checkSomeStaticMethodIntCallCount); + assertEquals(1, TestEntitlementCheckerHolder.checkerInstance.checkSomeStaticMethodIntStringCallCount); + } + + public void testInstrumenterWorksWithInstanceMethodsAndOverloads() throws Exception { + var classToInstrument = TestClassToInstrument.class; + + Map checkMethods = Map.of( + methodKeyForTarget(classToInstrument.getMethod("someMethod", int.class, String.class)), + getCheckMethod(MockEntitlementChecker.class, "checkSomeInstanceMethod", Class.class, Testable.class, int.class, String.class) + ); + + var instrumenter = createInstrumenter(checkMethods); + + byte[] newBytecode = instrumenter.instrumentClassFile(classToInstrument).bytecodes(); + + if (logger.isTraceEnabled()) { + logger.trace("Bytecode after instrumentation:\n{}", bytecode2text(newBytecode)); + } + + Class newClass = new TestLoader(Testable.class.getClassLoader()).defineClassFromBytes( + classToInstrument.getName() + "_NEW", + newBytecode + ); + + TestEntitlementCheckerHolder.checkerInstance.isActive = true; + TestEntitlementCheckerHolder.checkerInstance.checkSomeInstanceMethodCallCount = 0; + + Testable testTargetClass = (Testable) (newClass.getConstructor().newInstance()); + + // This overload is not instrumented, so it will not throw + testTargetClass.someMethod(123); + assertThrows(TestException.class, () -> testTargetClass.someMethod(123, "def")); + + assertEquals(1, TestEntitlementCheckerHolder.checkerInstance.checkSomeInstanceMethodCallCount); + } + + public void testInstrumenterWorksWithConstructors() throws Exception { + var classToInstrument = TestClassToInstrument.class; + + Map checkMethods = Map.of( + new MethodKey(classToInstrument.getName().replace('.', '/'), "", List.of()), + getCheckMethod(MockEntitlementChecker.class, "checkCtor", Class.class), + new MethodKey(classToInstrument.getName().replace('.', '/'), "", List.of("I")), + getCheckMethod(MockEntitlementChecker.class, "checkCtor", Class.class, int.class) + ); + + var instrumenter = createInstrumenter(checkMethods); + + byte[] newBytecode = instrumenter.instrumentClassFile(classToInstrument).bytecodes(); + + if (logger.isTraceEnabled()) { + logger.trace("Bytecode after instrumentation:\n{}", bytecode2text(newBytecode)); + } + + Class newClass = new TestLoader(Testable.class.getClassLoader()).defineClassFromBytes( + classToInstrument.getName() + "_NEW", + newBytecode + ); + + TestEntitlementCheckerHolder.checkerInstance.isActive = true; + + var ex = assertThrows(InvocationTargetException.class, () -> newClass.getConstructor().newInstance()); + assertThat(ex.getCause(), instanceOf(TestException.class)); + var ex2 = assertThrows(InvocationTargetException.class, () -> newClass.getConstructor(int.class).newInstance(123)); + assertThat(ex2.getCause(), instanceOf(TestException.class)); + + assertEquals(1, TestEntitlementCheckerHolder.checkerInstance.checkCtorCallCount); + assertEquals(1, TestEntitlementCheckerHolder.checkerInstance.checkCtorIntCallCount); + } + + /** This test doesn't replace classToInstrument in-place but instead loads a separate + * class with the same class name plus a "_NEW" suffix (classToInstrument.class.getName() + "_NEW") + * that contains the instrumentation. Because of this, we need to configure the Transformer to use a + * MethodKey and instrumentationMethod with slightly different signatures (using the common interface + * Testable) which is not what would happen when it's run by the agent. + */ + private InstrumenterImpl createInstrumenter(Map checkMethods) { + String checkerClass = Type.getInternalName(SyntheticInstrumenterTests.MockEntitlementChecker.class); + String handleClass = Type.getInternalName(SyntheticInstrumenterTests.TestEntitlementCheckerHolder.class); + String getCheckerClassMethodDescriptor = Type.getMethodDescriptor(Type.getObjectType(checkerClass)); + + return new InstrumenterImpl(handleClass, getCheckerClassMethodDescriptor, "_NEW", checkMethods); + } +} diff --git a/libs/entitlement/asm-provider/src/test/java/org/elasticsearch/entitlement/instrumentation/impl/TestException.java b/libs/entitlement/asm-provider/src/test/java/org/elasticsearch/entitlement/instrumentation/impl/TestException.java new file mode 100644 index 0000000000000..5e308e5bd4a98 --- /dev/null +++ b/libs/entitlement/asm-provider/src/test/java/org/elasticsearch/entitlement/instrumentation/impl/TestException.java @@ -0,0 +1,12 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.entitlement.instrumentation.impl; + +final class TestException extends RuntimeException {} diff --git a/libs/entitlement/asm-provider/src/test/java/org/elasticsearch/entitlement/instrumentation/impl/TestLoader.java b/libs/entitlement/asm-provider/src/test/java/org/elasticsearch/entitlement/instrumentation/impl/TestLoader.java new file mode 100644 index 0000000000000..9eb8e9328ecba --- /dev/null +++ b/libs/entitlement/asm-provider/src/test/java/org/elasticsearch/entitlement/instrumentation/impl/TestLoader.java @@ -0,0 +1,20 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.entitlement.instrumentation.impl; + +class TestLoader extends ClassLoader { + TestLoader(ClassLoader parent) { + super(parent); + } + + public Class defineClassFromBytes(String name, byte[] bytes) { + return defineClass(name, bytes, 0, bytes.length); + } +} diff --git a/libs/entitlement/asm-provider/src/test/java/org/elasticsearch/entitlement/instrumentation/impl/TestMethodUtils.java b/libs/entitlement/asm-provider/src/test/java/org/elasticsearch/entitlement/instrumentation/impl/TestMethodUtils.java new file mode 100644 index 0000000000000..de7822fea926e --- /dev/null +++ b/libs/entitlement/asm-provider/src/test/java/org/elasticsearch/entitlement/instrumentation/impl/TestMethodUtils.java @@ -0,0 +1,81 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.entitlement.instrumentation.impl; + +import org.elasticsearch.entitlement.instrumentation.CheckMethod; +import org.elasticsearch.entitlement.instrumentation.MethodKey; +import org.objectweb.asm.Type; + +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; +import java.util.Arrays; +import java.util.List; +import java.util.stream.Stream; + +class TestMethodUtils { + + /** + * @return a {@link MethodKey} suitable for looking up the given {@code targetMethod} in the entitlements trampoline + */ + static MethodKey methodKeyForTarget(Method targetMethod) { + Type actualType = Type.getMethodType(Type.getMethodDescriptor(targetMethod)); + return new MethodKey( + Type.getInternalName(targetMethod.getDeclaringClass()), + targetMethod.getName(), + Stream.of(actualType.getArgumentTypes()).map(Type::getInternalName).toList() + ); + } + + static MethodKey methodKeyForConstructor(Class classToInstrument, List params) { + return new MethodKey(classToInstrument.getName().replace('.', '/'), "", params); + } + + static CheckMethod getCheckMethod(Class clazz, String methodName, Class... parameterTypes) throws NoSuchMethodException { + var method = clazz.getMethod(methodName, parameterTypes); + return new CheckMethod( + Type.getInternalName(clazz), + method.getName(), + Arrays.stream(Type.getArgumentTypes(method)).map(Type::getDescriptor).toList() + ); + } + + /** + * Calling a static method of a dynamically loaded class is significantly more cumbersome + * than calling a virtual method. + */ + static void callStaticMethod(Class c, String methodName, int arg) throws NoSuchMethodException, IllegalAccessException { + try { + c.getMethod(methodName, int.class).invoke(null, arg); + } catch (InvocationTargetException e) { + Throwable cause = e.getCause(); + if (cause instanceof TestException n) { + // Sometimes we're expecting this one! + throw n; + } else { + throw new AssertionError(cause); + } + } + } + + static void callStaticMethod(Class c, String methodName, int arg1, String arg2) throws NoSuchMethodException, + IllegalAccessException { + try { + c.getMethod(methodName, int.class, String.class).invoke(null, arg1, arg2); + } catch (InvocationTargetException e) { + Throwable cause = e.getCause(); + if (cause instanceof TestException n) { + // Sometimes we're expecting this one! + throw n; + } else { + throw new AssertionError(cause); + } + } + } +} diff --git a/libs/entitlement/src/main/java/org/elasticsearch/entitlement/initialization/EntitlementInitialization.java b/libs/entitlement/src/main/java/org/elasticsearch/entitlement/initialization/EntitlementInitialization.java index 1f87e067e04f1..0ffab5f93969f 100644 --- a/libs/entitlement/src/main/java/org/elasticsearch/entitlement/initialization/EntitlementInitialization.java +++ b/libs/entitlement/src/main/java/org/elasticsearch/entitlement/initialization/EntitlementInitialization.java @@ -13,7 +13,7 @@ import org.elasticsearch.core.internal.provider.ProviderLocator; import org.elasticsearch.entitlement.bootstrap.EntitlementBootstrap; import org.elasticsearch.entitlement.bridge.EntitlementChecker; -import org.elasticsearch.entitlement.instrumentation.CheckerMethod; +import org.elasticsearch.entitlement.instrumentation.CheckMethod; import org.elasticsearch.entitlement.instrumentation.InstrumentationService; import org.elasticsearch.entitlement.instrumentation.MethodKey; import org.elasticsearch.entitlement.instrumentation.Transformer; @@ -63,13 +63,13 @@ public static EntitlementChecker checker() { public static void initialize(Instrumentation inst) throws Exception { manager = initChecker(); - Map methodMap = INSTRUMENTER_FACTORY.lookupMethodsToInstrument( + Map checkMethods = INSTRUMENTER_FACTORY.lookupMethodsToInstrument( "org.elasticsearch.entitlement.bridge.EntitlementChecker" ); - var classesToTransform = methodMap.keySet().stream().map(MethodKey::className).collect(Collectors.toSet()); + var classesToTransform = checkMethods.keySet().stream().map(MethodKey::className).collect(Collectors.toSet()); - inst.addTransformer(new Transformer(INSTRUMENTER_FACTORY.newInstrumenter("", methodMap), classesToTransform), true); + inst.addTransformer(new Transformer(INSTRUMENTER_FACTORY.newInstrumenter(checkMethods), classesToTransform), true); // TODO: should we limit this array somehow? var classesToRetransform = classesToTransform.stream().map(EntitlementInitialization::internalNameToClass).toArray(Class[]::new); inst.retransformClasses(classesToRetransform); diff --git a/libs/entitlement/src/main/java/org/elasticsearch/entitlement/instrumentation/CheckerMethod.java b/libs/entitlement/src/main/java/org/elasticsearch/entitlement/instrumentation/CheckMethod.java similarity index 82% rename from libs/entitlement/src/main/java/org/elasticsearch/entitlement/instrumentation/CheckerMethod.java rename to libs/entitlement/src/main/java/org/elasticsearch/entitlement/instrumentation/CheckMethod.java index c20a75a61a608..384d455c7a34b 100644 --- a/libs/entitlement/src/main/java/org/elasticsearch/entitlement/instrumentation/CheckerMethod.java +++ b/libs/entitlement/src/main/java/org/elasticsearch/entitlement/instrumentation/CheckMethod.java @@ -12,7 +12,7 @@ import java.util.List; /** - * A structure to use as a representation of the checker method the instrumentation will inject. + * A structure to use as a representation of the checkXxx method the instrumentation will inject. * * @param className the "internal name" of the class: includes the package info, but with periods replaced by slashes * @param methodName the checker method name @@ -20,4 +20,4 @@ * type descriptors) * for methodName parameters. */ -public record CheckerMethod(String className, String methodName, List parameterDescriptors) {} +public record CheckMethod(String className, String methodName, List parameterDescriptors) {} diff --git a/libs/entitlement/src/main/java/org/elasticsearch/entitlement/instrumentation/InstrumentationService.java b/libs/entitlement/src/main/java/org/elasticsearch/entitlement/instrumentation/InstrumentationService.java index 12316bfb043c5..d0331d756d2b2 100644 --- a/libs/entitlement/src/main/java/org/elasticsearch/entitlement/instrumentation/InstrumentationService.java +++ b/libs/entitlement/src/main/java/org/elasticsearch/entitlement/instrumentation/InstrumentationService.java @@ -10,19 +10,13 @@ package org.elasticsearch.entitlement.instrumentation; import java.io.IOException; -import java.lang.reflect.Method; import java.util.Map; /** * The SPI service entry point for instrumentation. */ public interface InstrumentationService { - Instrumenter newInstrumenter(String classNameSuffix, Map instrumentationMethods); + Instrumenter newInstrumenter(Map checkMethods); - /** - * @return a {@link MethodKey} suitable for looking up the given {@code targetMethod} in the entitlements trampoline - */ - MethodKey methodKeyForTarget(Method targetMethod); - - Map lookupMethodsToInstrument(String entitlementCheckerClassName) throws ClassNotFoundException, IOException; + Map lookupMethodsToInstrument(String entitlementCheckerClassName) throws ClassNotFoundException, IOException; } From deb838c027ecd83bc34fd487566571c61bfcd8be Mon Sep 17 00:00:00 2001 From: elasticsearchmachine <58790826+elasticsearchmachine@users.noreply.github.com> Date: Sun, 1 Dec 2024 01:25:39 +1100 Subject: [PATCH 089/139] Mute org.elasticsearch.xpack.esql.action.CrossClustersCancellationIT testCancel #117568 --- muted-tests.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/muted-tests.yml b/muted-tests.yml index b82e95ea26890..d5e2dbd84cb4a 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -234,6 +234,9 @@ tests: - class: org.elasticsearch.search.ccs.CrossClusterIT method: testCancel issue: https://github.com/elastic/elasticsearch/issues/108061 +- class: org.elasticsearch.xpack.esql.action.CrossClustersCancellationIT + method: testCancel + issue: https://github.com/elastic/elasticsearch/issues/117568 # Examples: # From 31cb0f658a8b3239bb38dd190e1efeb79062b2f9 Mon Sep 17 00:00:00 2001 From: Rene Groeschke Date: Sat, 30 Nov 2024 23:32:18 +0100 Subject: [PATCH 090/139] [Build] Replace usage of deprecated develocity system prop (#117793) see https://buildkite.com/elastic/elasticsearch-intake/builds/13680#019374ed-096e-4965-8651-1b3fd26dd9c2/79-392 --- .buildkite/pipelines/intake.template.yml | 16 ++++++++-------- .buildkite/pipelines/intake.yml | 16 ++++++++-------- .../pipelines/lucene-snapshot/run-tests.yml | 16 ++++++++-------- 3 files changed, 24 insertions(+), 24 deletions(-) diff --git a/.buildkite/pipelines/intake.template.yml b/.buildkite/pipelines/intake.template.yml index 57412bbe908bc..9d7cf3c7e0083 100644 --- a/.buildkite/pipelines/intake.template.yml +++ b/.buildkite/pipelines/intake.template.yml @@ -1,6 +1,6 @@ steps: - label: sanity-check - command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-task-input-files precommit + command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-file-fingerprints precommit timeout_in_minutes: 300 agents: provider: gcp @@ -9,7 +9,7 @@ steps: buildDirectory: /dev/shm/bk - wait - label: part1 - command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-task-input-files checkPart1 + command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-file-fingerprints checkPart1 timeout_in_minutes: 300 agents: provider: gcp @@ -17,7 +17,7 @@ steps: machineType: n1-standard-32 buildDirectory: /dev/shm/bk - label: part2 - command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-task-input-files checkPart2 + command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-file-fingerprints checkPart2 timeout_in_minutes: 300 agents: provider: gcp @@ -25,7 +25,7 @@ steps: machineType: n1-standard-32 buildDirectory: /dev/shm/bk - label: part3 - command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-task-input-files checkPart3 + command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-file-fingerprints checkPart3 timeout_in_minutes: 300 agents: provider: gcp @@ -33,7 +33,7 @@ steps: machineType: n1-standard-32 buildDirectory: /dev/shm/bk - label: part4 - command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-task-input-files checkPart4 + command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-file-fingerprints checkPart4 timeout_in_minutes: 300 agents: provider: gcp @@ -41,7 +41,7 @@ steps: machineType: n1-standard-32 buildDirectory: /dev/shm/bk - label: part5 - command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-task-input-files checkPart5 + command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-file-fingerprints checkPart5 timeout_in_minutes: 300 agents: provider: gcp @@ -51,7 +51,7 @@ steps: - group: bwc-snapshots steps: - label: "{{matrix.BWC_VERSION}} / bwc-snapshots" - command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-task-input-files v$$BWC_VERSION#bwcTest + command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-file-fingerprints v$$BWC_VERSION#bwcTest timeout_in_minutes: 300 matrix: setup: @@ -64,7 +64,7 @@ steps: env: BWC_VERSION: "{{matrix.BWC_VERSION}}" - label: rest-compat - command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-task-input-files checkRestCompat + command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-file-fingerprints checkRestCompat timeout_in_minutes: 300 agents: provider: gcp diff --git a/.buildkite/pipelines/intake.yml b/.buildkite/pipelines/intake.yml index 5be5990cfb203..6c8b8edfcbac1 100644 --- a/.buildkite/pipelines/intake.yml +++ b/.buildkite/pipelines/intake.yml @@ -1,7 +1,7 @@ # This file is auto-generated. See .buildkite/pipelines/intake.template.yml steps: - label: sanity-check - command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-task-input-files precommit + command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-file-fingerprints precommit timeout_in_minutes: 300 agents: provider: gcp @@ -10,7 +10,7 @@ steps: buildDirectory: /dev/shm/bk - wait - label: part1 - command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-task-input-files checkPart1 + command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-file-fingerprints checkPart1 timeout_in_minutes: 300 agents: provider: gcp @@ -18,7 +18,7 @@ steps: machineType: n1-standard-32 buildDirectory: /dev/shm/bk - label: part2 - command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-task-input-files checkPart2 + command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-file-fingerprints checkPart2 timeout_in_minutes: 300 agents: provider: gcp @@ -26,7 +26,7 @@ steps: machineType: n1-standard-32 buildDirectory: /dev/shm/bk - label: part3 - command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-task-input-files checkPart3 + command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-file-fingerprints checkPart3 timeout_in_minutes: 300 agents: provider: gcp @@ -34,7 +34,7 @@ steps: machineType: n1-standard-32 buildDirectory: /dev/shm/bk - label: part4 - command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-task-input-files checkPart4 + command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-file-fingerprints checkPart4 timeout_in_minutes: 300 agents: provider: gcp @@ -42,7 +42,7 @@ steps: machineType: n1-standard-32 buildDirectory: /dev/shm/bk - label: part5 - command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-task-input-files checkPart5 + command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-file-fingerprints checkPart5 timeout_in_minutes: 300 agents: provider: gcp @@ -52,7 +52,7 @@ steps: - group: bwc-snapshots steps: - label: "{{matrix.BWC_VERSION}} / bwc-snapshots" - command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-task-input-files v$$BWC_VERSION#bwcTest + command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-file-fingerprints v$$BWC_VERSION#bwcTest timeout_in_minutes: 300 matrix: setup: @@ -65,7 +65,7 @@ steps: env: BWC_VERSION: "{{matrix.BWC_VERSION}}" - label: rest-compat - command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-task-input-files checkRestCompat + command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-file-fingerprints checkRestCompat timeout_in_minutes: 300 agents: provider: gcp diff --git a/.buildkite/pipelines/lucene-snapshot/run-tests.yml b/.buildkite/pipelines/lucene-snapshot/run-tests.yml index f7293e051467c..ddc63419a2e2f 100644 --- a/.buildkite/pipelines/lucene-snapshot/run-tests.yml +++ b/.buildkite/pipelines/lucene-snapshot/run-tests.yml @@ -1,6 +1,6 @@ steps: - label: sanity-check - command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-task-input-files precommit + command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-file-fingerprints precommit timeout_in_minutes: 300 agents: provider: gcp @@ -9,7 +9,7 @@ steps: buildDirectory: /dev/shm/bk - wait: null - label: part1 - command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-task-input-files checkPart1 + command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-file-fingerprints checkPart1 timeout_in_minutes: 300 agents: provider: gcp @@ -17,7 +17,7 @@ steps: machineType: custom-32-98304 buildDirectory: /dev/shm/bk - label: part2 - command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-task-input-files checkPart2 + command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-file-fingerprints checkPart2 timeout_in_minutes: 300 agents: provider: gcp @@ -25,7 +25,7 @@ steps: machineType: custom-32-98304 buildDirectory: /dev/shm/bk - label: part3 - command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-task-input-files checkPart3 + command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-file-fingerprints checkPart3 timeout_in_minutes: 300 agents: provider: gcp @@ -33,7 +33,7 @@ steps: machineType: custom-32-98304 buildDirectory: /dev/shm/bk - label: part4 - command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-task-input-files checkPart4 + command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-file-fingerprints checkPart4 timeout_in_minutes: 300 agents: provider: gcp @@ -41,7 +41,7 @@ steps: machineType: custom-32-98304 buildDirectory: /dev/shm/bk - label: part5 - command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-task-input-files checkPart5 + command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-file-fingerprints checkPart5 timeout_in_minutes: 300 agents: provider: gcp @@ -51,7 +51,7 @@ steps: - group: bwc-snapshots steps: - label: "{{matrix.BWC_VERSION}} / bwc-snapshots" - command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-task-input-files v$$BWC_VERSION#bwcTest + command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-file-fingerprints v$$BWC_VERSION#bwcTest timeout_in_minutes: 300 matrix: setup: @@ -66,7 +66,7 @@ steps: env: BWC_VERSION: "{{matrix.BWC_VERSION}}" - label: rest-compat - command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-task-input-files checkRestCompat + command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-file-fingerprints checkRestCompat timeout_in_minutes: 300 agents: provider: gcp From bda415b7fdf4a73091a198339e5f1660c1378029 Mon Sep 17 00:00:00 2001 From: Nhat Nguyen Date: Sat, 30 Nov 2024 20:09:08 -0800 Subject: [PATCH 091/139] Fix CCS cancellation test (#117790) We should have checked that all drivers were canceled, not cancellable (which is always true), before unblocking the compute tasks. Closes #117568 --- muted-tests.yml | 3 -- .../action/CrossClustersCancellationIT.java | 29 ++++++++++--------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/muted-tests.yml b/muted-tests.yml index d5e2dbd84cb4a..b82e95ea26890 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -234,9 +234,6 @@ tests: - class: org.elasticsearch.search.ccs.CrossClusterIT method: testCancel issue: https://github.com/elastic/elasticsearch/issues/108061 -- class: org.elasticsearch.xpack.esql.action.CrossClustersCancellationIT - method: testCancel - issue: https://github.com/elastic/elasticsearch/issues/117568 # Examples: # diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/CrossClustersCancellationIT.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/CrossClustersCancellationIT.java index c426e0f528eab..5ffc92636b272 100644 --- a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/CrossClustersCancellationIT.java +++ b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/CrossClustersCancellationIT.java @@ -179,19 +179,22 @@ public void testCancel() throws Exception { }); var cancelRequest = new CancelTasksRequest().setTargetTaskId(rootTasks.get(0).taskId()).setReason("proxy timeout"); client().execute(TransportCancelTasksAction.TYPE, cancelRequest); - assertBusy(() -> { - List drivers = client(REMOTE_CLUSTER).admin() - .cluster() - .prepareListTasks() - .setActions(DriverTaskRunner.ACTION_NAME) - .get() - .getTasks(); - assertThat(drivers.size(), greaterThanOrEqualTo(1)); - for (TaskInfo driver : drivers) { - assertTrue(driver.cancellable()); - } - }); - PauseFieldPlugin.allowEmitting.countDown(); + try { + assertBusy(() -> { + List drivers = client(REMOTE_CLUSTER).admin() + .cluster() + .prepareListTasks() + .setActions(DriverTaskRunner.ACTION_NAME) + .get() + .getTasks(); + assertThat(drivers.size(), greaterThanOrEqualTo(1)); + for (TaskInfo driver : drivers) { + assertTrue(driver.cancelled()); + } + }); + } finally { + PauseFieldPlugin.allowEmitting.countDown(); + } Exception error = expectThrows(Exception.class, requestFuture::actionGet); assertThat(error.getMessage(), containsString("proxy timeout")); } From 5025f6cd3d9ba7b008ff9bdca91c1a466b36a2e6 Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Sun, 1 Dec 2024 10:10:56 +0100 Subject: [PATCH 092/139] Lazy compute description in ReplicationRequest.createTask (#117783) These can at times be quite long strings, no need to materialize unless requested. This is showing up as allocating needless heap of O(GB) in some benchmarks during indexing needlessly. --- .../action/support/replication/ReplicationRequest.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/action/support/replication/ReplicationRequest.java b/server/src/main/java/org/elasticsearch/action/support/replication/ReplicationRequest.java index 530f22f4bed53..debc64914a171 100644 --- a/server/src/main/java/org/elasticsearch/action/support/replication/ReplicationRequest.java +++ b/server/src/main/java/org/elasticsearch/action/support/replication/ReplicationRequest.java @@ -210,7 +210,12 @@ public void writeThin(StreamOutput out) throws IOException { @Override public Task createTask(long id, String type, String action, TaskId parentTaskId, Map headers) { - return new ReplicationTask(id, type, action, getDescription(), parentTaskId, headers); + return new ReplicationTask(id, type, action, "", parentTaskId, headers) { + @Override + public String getDescription() { + return ReplicationRequest.this.getDescription(); + } + }; } @Override From 3e7159d9e97e2d1645e5d5bc56fb98c653186b9f Mon Sep 17 00:00:00 2001 From: Rene Groeschke Date: Sun, 1 Dec 2024 21:33:27 +0100 Subject: [PATCH 093/139] [Build] Fix cacheability of discovery-azure-classic (#117806) Also update cache validation scripts --- .buildkite/scripts/gradle-build-cache-validation.sh | 7 +++---- plugins/discovery-azure-classic/build.gradle | 3 ++- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.buildkite/scripts/gradle-build-cache-validation.sh b/.buildkite/scripts/gradle-build-cache-validation.sh index 75dc9b264b8bc..3c5021e436e4a 100755 --- a/.buildkite/scripts/gradle-build-cache-validation.sh +++ b/.buildkite/scripts/gradle-build-cache-validation.sh @@ -2,18 +2,17 @@ set -euo pipefail -VALIDATION_SCRIPTS_VERSION=2.5.1 +VALIDATION_SCRIPTS_VERSION=2.7.1 GRADLE_ENTERPRISE_ACCESS_KEY=$(vault kv get -field=value secret/ci/elastic-elasticsearch/gradle-enterprise-api-key) export GRADLE_ENTERPRISE_ACCESS_KEY - -curl -s -L -O https://github.com/gradle/gradle-enterprise-build-validation-scripts/releases/download/v$VALIDATION_SCRIPTS_VERSION/gradle-enterprise-gradle-build-validation-$VALIDATION_SCRIPTS_VERSION.zip && unzip -q -o gradle-enterprise-gradle-build-validation-$VALIDATION_SCRIPTS_VERSION.zip +curl -s -L -O https://github.com/gradle/gradle-enterprise-build-validation-scripts/releases/download/v$VALIDATION_SCRIPTS_VERSION/develocity-gradle-build-validation-$VALIDATION_SCRIPTS_VERSION.zip && unzip -q -o develocity-gradle-build-validation-$VALIDATION_SCRIPTS_VERSION.zip # Create a temporary file tmpOutputFile=$(mktemp) trap "rm $tmpOutputFile" EXIT set +e -gradle-enterprise-gradle-build-validation/03-validate-local-build-caching-different-locations.sh -r https://github.com/elastic/elasticsearch.git -b $BUILDKITE_BRANCH --gradle-enterprise-server https://gradle-enterprise.elastic.co -t precommit --fail-if-not-fully-cacheable | tee $tmpOutputFile +develocity-gradle-build-validation/03-validate-local-build-caching-different-locations.sh -r https://github.com/elastic/elasticsearch.git -b $BUILDKITE_BRANCH --develocity-server https://gradle-enterprise.elastic.co -t precommit --fail-if-not-fully-cacheable | tee $tmpOutputFile # Capture the return value retval=$? set -e diff --git a/plugins/discovery-azure-classic/build.gradle b/plugins/discovery-azure-classic/build.gradle index 3ec2ec531ae92..9549236775bfe 100644 --- a/plugins/discovery-azure-classic/build.gradle +++ b/plugins/discovery-azure-classic/build.gradle @@ -65,9 +65,10 @@ TaskProvider createKey = tasks.register("createKey", LoggedExec) { outputs.file(keystore).withPropertyName('keystoreFile') executable = "${buildParams.runtimeJavaHome.get()}/bin/keytool" getStandardInput().set('FirstName LastName\nUnit\nOrganization\nCity\nState\nNL\nyes\n\n') + String keystorePath = projectDir.toPath().relativize(keystore.toPath()).toString() args '-genkey', '-alias', 'test-node', - '-keystore', keystore, + '-keystore', keystorePath, '-keyalg', 'RSA', '-keysize', '2048', '-validity', '712', From 3cfb649661438f002816b1c9bbd17d78c14827a6 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine <58790826+elasticsearchmachine@users.noreply.github.com> Date: Mon, 2 Dec 2024 17:29:57 +1100 Subject: [PATCH 094/139] Mute org.elasticsearch.test.rest.yaml.CcsCommonYamlTestSuiteIT test {p0=search.highlight/50_synthetic_source/text multi unified from vectors} #117815 --- muted-tests.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/muted-tests.yml b/muted-tests.yml index b82e95ea26890..8d64e1557ca19 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -234,6 +234,9 @@ tests: - class: org.elasticsearch.search.ccs.CrossClusterIT method: testCancel issue: https://github.com/elastic/elasticsearch/issues/108061 +- class: org.elasticsearch.test.rest.yaml.CcsCommonYamlTestSuiteIT + method: test {p0=search.highlight/50_synthetic_source/text multi unified from vectors} + issue: https://github.com/elastic/elasticsearch/issues/117815 # Examples: # From 2b7adcd89dfb31411f68dda211f689d42b979af8 Mon Sep 17 00:00:00 2001 From: Kostas Krikellas <131142368+kkrik-es@users.noreply.github.com> Date: Mon, 2 Dec 2024 09:58:25 +0200 Subject: [PATCH 095/139] Add debug logging for doc parsing exceptions (#117768) --- .../index/mapper/DocumentMapper.java | 23 ++++++++++++++++--- .../index/mapper/MapperService.java | 9 +++++++- .../index/mapper/DocumentMapperTests.java | 3 ++- .../index/mapper/DocumentParserTests.java | 3 ++- 4 files changed, 32 insertions(+), 6 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocumentMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/DocumentMapper.java index 10484a1c26098..1c9321737ab5f 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DocumentMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DocumentMapper.java @@ -9,7 +9,9 @@ package org.elasticsearch.index.mapper; +import org.apache.logging.log4j.Logger; import org.elasticsearch.common.compress.CompressedXContent; +import org.elasticsearch.common.logging.Loggers; import org.elasticsearch.features.NodeFeature; import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.IndexSortConfig; @@ -25,6 +27,7 @@ public class DocumentMapper { private final DocumentParser documentParser; private final MapperMetrics mapperMetrics; private final IndexVersion indexVersion; + private final Logger logger; static final NodeFeature INDEX_SORTING_ON_NESTED = new NodeFeature("mapper.index_sorting_on_nested"); @@ -44,7 +47,8 @@ public static DocumentMapper createEmpty(MapperService mapperService) { mapping, mapping.toCompressedXContent(), IndexVersion.current(), - mapperService.getMapperMetrics() + mapperService.getMapperMetrics(), + mapperService.index().getName() ); } @@ -53,7 +57,8 @@ public static DocumentMapper createEmpty(MapperService mapperService) { Mapping mapping, CompressedXContent source, IndexVersion version, - MapperMetrics mapperMetrics + MapperMetrics mapperMetrics, + String indexName ) { this.documentParser = documentParser; this.type = mapping.getRoot().fullPath(); @@ -61,11 +66,18 @@ public static DocumentMapper createEmpty(MapperService mapperService) { this.mappingSource = source; this.mapperMetrics = mapperMetrics; this.indexVersion = version; + this.logger = Loggers.getLogger(getClass(), indexName); assert mapping.toCompressedXContent().equals(source) || isSyntheticSourceMalformed(source, version) : "provided source [" + source + "] differs from mapping [" + mapping.toCompressedXContent() + "]"; } + private void maybeLogDebug(Exception ex) { + if (logger.isDebugEnabled()) { + logger.debug("Error while parsing document: " + ex.getMessage(), ex); + } + } + /** * Indexes built at v.8.7 were missing an explicit entry for synthetic_source. * This got restored in v.8.10 to avoid confusion. The change is only restricted to mapping printout, it has no @@ -110,7 +122,12 @@ public MappingLookup mappers() { } public ParsedDocument parse(SourceToParse source) throws DocumentParsingException { - return documentParser.parseDocument(source, mappingLookup); + try { + return documentParser.parseDocument(source, mappingLookup); + } catch (Exception e) { + maybeLogDebug(e); + throw e; + } } public void validate(IndexSettings settings, boolean checkLimits) { diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MapperService.java b/server/src/main/java/org/elasticsearch/index/mapper/MapperService.java index 7f952153c6453..1673b1719d8bf 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MapperService.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MapperService.java @@ -585,7 +585,14 @@ private DocumentMapper doMerge(String type, MergeReason reason, Map Date: Mon, 2 Dec 2024 08:11:09 +0000 Subject: [PATCH 096/139] Revert "(+Doc) Link split-brain wiki (#108914)" This reverts commit 12aab083301958ddfbeec9ee09d333da8278fd2c. --- docs/reference/modules/discovery/voting.asciidoc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/reference/modules/discovery/voting.asciidoc b/docs/reference/modules/discovery/voting.asciidoc index 9e483d5883017..04cae9d02ab66 100644 --- a/docs/reference/modules/discovery/voting.asciidoc +++ b/docs/reference/modules/discovery/voting.asciidoc @@ -63,8 +63,7 @@ departed nodes from the voting configuration manually. Use the of resilience. No matter how it is configured, Elasticsearch will not suffer from a -"{wikipedia}/Split-brain_(computing)[split-brain]" inconsistency. -The `cluster.auto_shrink_voting_configuration` +"split-brain" inconsistency. The `cluster.auto_shrink_voting_configuration` setting affects only its availability in the event of the failure of some of its nodes and the administrative tasks that must be performed as nodes join and leave the cluster. From 9dcd9751f481952f5f08332b15aed31179af324d Mon Sep 17 00:00:00 2001 From: David Turner Date: Mon, 2 Dec 2024 09:01:48 +0000 Subject: [PATCH 097/139] Add IMDSv2 support to `repository-s3` (#117748) The version of the AWS Java SDK we use already magically switches to IMDSv2 if available, but today we cannot claim to support IMDSv2 in Elasticsearch since we have no tests demonstrating that the magic really works for us. In particular, this sort of thing often risks falling foul of some restrictions imposed by the security manager (if not now then maybe in some future release). This commit adds proper support for IMDSv2 by enhancing the test suite to add the missing coverage to avoid any risk of breaking this magical SDK behaviour in future. Closes #105135 Closes ES-9984 --- docs/changelog/117748.yaml | 6 ++ .../snapshot-restore/repository-s3.asciidoc | 42 ++++++----- .../s3/RepositoryS3EcsCredentialsRestIT.java | 2 + .../RepositoryS3ImdsV1CredentialsRestIT.java | 2 + .../RepositoryS3ImdsV2CredentialsRestIT.java | 75 +++++++++++++++++++ .../fixture/aws/imds/Ec2ImdsHttpFixture.java | 10 ++- .../fixture/aws/imds/Ec2ImdsHttpHandler.java | 35 ++++++++- .../java/fixture/aws/imds/Ec2ImdsVersion.java | 26 +++++++ .../aws/imds/Ec2ImdsHttpHandlerTests.java | 67 +++++++++++++++-- 9 files changed, 236 insertions(+), 29 deletions(-) create mode 100644 docs/changelog/117748.yaml create mode 100644 modules/repository-s3/src/javaRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3ImdsV2CredentialsRestIT.java create mode 100644 test/fixtures/ec2-imds-fixture/src/main/java/fixture/aws/imds/Ec2ImdsVersion.java diff --git a/docs/changelog/117748.yaml b/docs/changelog/117748.yaml new file mode 100644 index 0000000000000..615adbae07ad7 --- /dev/null +++ b/docs/changelog/117748.yaml @@ -0,0 +1,6 @@ +pr: 117748 +summary: Add IMDSv2 support to `repository-s3` +area: Snapshot/Restore +type: enhancement +issues: + - 105135 diff --git a/docs/reference/snapshot-restore/repository-s3.asciidoc b/docs/reference/snapshot-restore/repository-s3.asciidoc index 1b08a802a444f..9b71fe9220385 100644 --- a/docs/reference/snapshot-restore/repository-s3.asciidoc +++ b/docs/reference/snapshot-restore/repository-s3.asciidoc @@ -38,7 +38,8 @@ PUT _snapshot/my_s3_repository The client that you use to connect to S3 has a number of settings available. The settings have the form `s3.client.CLIENT_NAME.SETTING_NAME`. By default, `s3` repositories use a client named `default`, but this can be modified using -the <> `client`. For example: +the <> `client`. For example, to +use a client named `my-alternate-client`, register the repository as follows: [source,console] ---- @@ -69,10 +70,19 @@ bin/elasticsearch-keystore add s3.client.default.secret_key bin/elasticsearch-keystore add s3.client.default.session_token ---- -If instead you want to use the instance role or container role to access S3 -then you should leave these settings unset. You can switch from using specific -credentials back to the default of using the instance role or container role by -removing these settings from the keystore as follows: +If you do not configure these settings then {es} will attempt to automatically +obtain credentials from the environment in which it is running: + +* Nodes running on an instance in AWS EC2 will attempt to use the EC2 Instance + Metadata Service (IMDS) to obtain instance role credentials. {es} supports + both IMDS version 1 and IMDS version 2. + +* Nodes running in a container in AWS ECS and AWS EKS will attempt to obtain + container role credentials similarly. + +You can switch from using specific credentials back to the default of using the +instance role or container role by removing these settings from the keystore as +follows: [source,sh] ---- @@ -82,20 +92,14 @@ bin/elasticsearch-keystore remove s3.client.default.secret_key bin/elasticsearch-keystore remove s3.client.default.session_token ---- -*All* client secure settings of this repository type are -{ref}/secure-settings.html#reloadable-secure-settings[reloadable]. -You can define these settings before the node is started, -or call the <> -after the settings are defined to apply them to a running node. - -After you reload the settings, the internal `s3` clients, used to transfer the snapshot -contents, will utilize the latest settings from the keystore. Any existing `s3` -repositories, as well as any newly created ones, will pick up the new values -stored in the keystore. - -NOTE: In-progress snapshot/restore tasks will not be preempted by a *reload* of -the client's secure settings. The task will complete using the client as it was -built when the operation started. +Define the relevant secure settings in each node's keystore before starting the +node. The secure settings described here are all +{ref}/secure-settings.html#reloadable-secure-settings[reloadable] so you may +update the keystore contents on each node while the node is running and then +call the <> to apply the updated settings to the nodes in the cluster. After this API +completes, {es} will use the updated setting values for all future snapshot +operations, but ongoing operations may continue to use older setting values. The following list contains the available client settings. Those that must be stored in the keystore are marked as "secure" and are *reloadable*; the other diff --git a/modules/repository-s3/src/javaRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3EcsCredentialsRestIT.java b/modules/repository-s3/src/javaRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3EcsCredentialsRestIT.java index 267ba6e6b3a13..a79ae4de7cc66 100644 --- a/modules/repository-s3/src/javaRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3EcsCredentialsRestIT.java +++ b/modules/repository-s3/src/javaRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3EcsCredentialsRestIT.java @@ -10,6 +10,7 @@ package org.elasticsearch.repositories.s3; import fixture.aws.imds.Ec2ImdsHttpFixture; +import fixture.aws.imds.Ec2ImdsVersion; import fixture.s3.DynamicS3Credentials; import fixture.s3.S3HttpFixture; @@ -36,6 +37,7 @@ public class RepositoryS3EcsCredentialsRestIT extends AbstractRepositoryS3RestTe private static final DynamicS3Credentials dynamicS3Credentials = new DynamicS3Credentials(); private static final Ec2ImdsHttpFixture ec2ImdsHttpFixture = new Ec2ImdsHttpFixture( + Ec2ImdsVersion.V1, dynamicS3Credentials::addValidCredentials, Set.of("/ecs_credentials_endpoint") ); diff --git a/modules/repository-s3/src/javaRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3ImdsV1CredentialsRestIT.java b/modules/repository-s3/src/javaRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3ImdsV1CredentialsRestIT.java index de9c9b6ae0695..ead91981b3fa8 100644 --- a/modules/repository-s3/src/javaRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3ImdsV1CredentialsRestIT.java +++ b/modules/repository-s3/src/javaRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3ImdsV1CredentialsRestIT.java @@ -10,6 +10,7 @@ package org.elasticsearch.repositories.s3; import fixture.aws.imds.Ec2ImdsHttpFixture; +import fixture.aws.imds.Ec2ImdsVersion; import fixture.s3.DynamicS3Credentials; import fixture.s3.S3HttpFixture; @@ -36,6 +37,7 @@ public class RepositoryS3ImdsV1CredentialsRestIT extends AbstractRepositoryS3Res private static final DynamicS3Credentials dynamicS3Credentials = new DynamicS3Credentials(); private static final Ec2ImdsHttpFixture ec2ImdsHttpFixture = new Ec2ImdsHttpFixture( + Ec2ImdsVersion.V1, dynamicS3Credentials::addValidCredentials, Set.of() ); diff --git a/modules/repository-s3/src/javaRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3ImdsV2CredentialsRestIT.java b/modules/repository-s3/src/javaRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3ImdsV2CredentialsRestIT.java new file mode 100644 index 0000000000000..67adb096bd1ba --- /dev/null +++ b/modules/repository-s3/src/javaRestTest/java/org/elasticsearch/repositories/s3/RepositoryS3ImdsV2CredentialsRestIT.java @@ -0,0 +1,75 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.repositories.s3; + +import fixture.aws.imds.Ec2ImdsHttpFixture; +import fixture.aws.imds.Ec2ImdsVersion; +import fixture.s3.DynamicS3Credentials; +import fixture.s3.S3HttpFixture; + +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters; +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope; + +import org.elasticsearch.test.cluster.ElasticsearchCluster; +import org.elasticsearch.test.fixtures.testcontainers.TestContainersThreadFilter; +import org.junit.ClassRule; +import org.junit.rules.RuleChain; +import org.junit.rules.TestRule; + +import java.util.Set; + +@ThreadLeakFilters(filters = { TestContainersThreadFilter.class }) +@ThreadLeakScope(ThreadLeakScope.Scope.NONE) // https://github.com/elastic/elasticsearch/issues/102482 +public class RepositoryS3ImdsV2CredentialsRestIT extends AbstractRepositoryS3RestTestCase { + + private static final String PREFIX = getIdentifierPrefix("RepositoryS3ImdsV2CredentialsRestIT"); + private static final String BUCKET = PREFIX + "bucket"; + private static final String BASE_PATH = PREFIX + "base_path"; + private static final String CLIENT = "imdsv2_credentials_client"; + + private static final DynamicS3Credentials dynamicS3Credentials = new DynamicS3Credentials(); + + private static final Ec2ImdsHttpFixture ec2ImdsHttpFixture = new Ec2ImdsHttpFixture( + Ec2ImdsVersion.V2, + dynamicS3Credentials::addValidCredentials, + Set.of() + ); + + private static final S3HttpFixture s3Fixture = new S3HttpFixture(true, BUCKET, BASE_PATH, dynamicS3Credentials::isAuthorized); + + public static ElasticsearchCluster cluster = ElasticsearchCluster.local() + .module("repository-s3") + .setting("s3.client." + CLIENT + ".endpoint", s3Fixture::getAddress) + .systemProperty("com.amazonaws.sdk.ec2MetadataServiceEndpointOverride", ec2ImdsHttpFixture::getAddress) + .build(); + + @ClassRule + public static TestRule ruleChain = RuleChain.outerRule(ec2ImdsHttpFixture).around(s3Fixture).around(cluster); + + @Override + protected String getTestRestCluster() { + return cluster.getHttpAddresses(); + } + + @Override + protected String getBucketName() { + return BUCKET; + } + + @Override + protected String getBasePath() { + return BASE_PATH; + } + + @Override + protected String getClientName() { + return CLIENT; + } +} diff --git a/test/fixtures/ec2-imds-fixture/src/main/java/fixture/aws/imds/Ec2ImdsHttpFixture.java b/test/fixtures/ec2-imds-fixture/src/main/java/fixture/aws/imds/Ec2ImdsHttpFixture.java index 13d36c6fc4812..c63c65a750d7c 100644 --- a/test/fixtures/ec2-imds-fixture/src/main/java/fixture/aws/imds/Ec2ImdsHttpFixture.java +++ b/test/fixtures/ec2-imds-fixture/src/main/java/fixture/aws/imds/Ec2ImdsHttpFixture.java @@ -24,16 +24,22 @@ public class Ec2ImdsHttpFixture extends ExternalResource { private HttpServer server; + private final Ec2ImdsVersion ec2ImdsVersion; private final BiConsumer newCredentialsConsumer; private final Set alternativeCredentialsEndpoints; - public Ec2ImdsHttpFixture(BiConsumer newCredentialsConsumer, Set alternativeCredentialsEndpoints) { + public Ec2ImdsHttpFixture( + Ec2ImdsVersion ec2ImdsVersion, + BiConsumer newCredentialsConsumer, + Set alternativeCredentialsEndpoints + ) { + this.ec2ImdsVersion = Objects.requireNonNull(ec2ImdsVersion); this.newCredentialsConsumer = Objects.requireNonNull(newCredentialsConsumer); this.alternativeCredentialsEndpoints = Objects.requireNonNull(alternativeCredentialsEndpoints); } protected HttpHandler createHandler() { - return new Ec2ImdsHttpHandler(newCredentialsConsumer, alternativeCredentialsEndpoints); + return new Ec2ImdsHttpHandler(ec2ImdsVersion, newCredentialsConsumer, alternativeCredentialsEndpoints); } public String getAddress() { diff --git a/test/fixtures/ec2-imds-fixture/src/main/java/fixture/aws/imds/Ec2ImdsHttpHandler.java b/test/fixtures/ec2-imds-fixture/src/main/java/fixture/aws/imds/Ec2ImdsHttpHandler.java index bc87eff592bec..281465b96de05 100644 --- a/test/fixtures/ec2-imds-fixture/src/main/java/fixture/aws/imds/Ec2ImdsHttpHandler.java +++ b/test/fixtures/ec2-imds-fixture/src/main/java/fixture/aws/imds/Ec2ImdsHttpHandler.java @@ -38,10 +38,18 @@ public class Ec2ImdsHttpHandler implements HttpHandler { private static final String IMDS_SECURITY_CREDENTIALS_PATH = "/latest/meta-data/iam/security-credentials/"; + private final Ec2ImdsVersion ec2ImdsVersion; + private final Set validImdsTokens = ConcurrentCollections.newConcurrentSet(); + private final BiConsumer newCredentialsConsumer; private final Set validCredentialsEndpoints = ConcurrentCollections.newConcurrentSet(); - public Ec2ImdsHttpHandler(BiConsumer newCredentialsConsumer, Collection alternativeCredentialsEndpoints) { + public Ec2ImdsHttpHandler( + Ec2ImdsVersion ec2ImdsVersion, + BiConsumer newCredentialsConsumer, + Collection alternativeCredentialsEndpoints + ) { + this.ec2ImdsVersion = Objects.requireNonNull(ec2ImdsVersion); this.newCredentialsConsumer = Objects.requireNonNull(newCredentialsConsumer); this.validCredentialsEndpoints.addAll(alternativeCredentialsEndpoints); } @@ -55,11 +63,32 @@ public void handle(final HttpExchange exchange) throws IOException { final var requestMethod = exchange.getRequestMethod(); if ("PUT".equals(requestMethod) && "/latest/api/token".equals(path)) { - // Reject IMDSv2 probe - exchange.sendResponseHeaders(RestStatus.METHOD_NOT_ALLOWED.getStatus(), -1); + switch (ec2ImdsVersion) { + case V1 -> exchange.sendResponseHeaders(RestStatus.METHOD_NOT_ALLOWED.getStatus(), -1); + case V2 -> { + final var token = randomSecretKey(); + validImdsTokens.add(token); + final var responseBody = token.getBytes(StandardCharsets.UTF_8); + exchange.getResponseHeaders().add("Content-Type", "text/plain"); + exchange.sendResponseHeaders(RestStatus.OK.getStatus(), responseBody.length); + exchange.getResponseBody().write(responseBody); + } + } return; } + if (ec2ImdsVersion == Ec2ImdsVersion.V2) { + final var token = exchange.getRequestHeaders().getFirst("X-aws-ec2-metadata-token"); + if (token == null) { + exchange.sendResponseHeaders(RestStatus.UNAUTHORIZED.getStatus(), -1); + return; + } + if (validImdsTokens.contains(token) == false) { + exchange.sendResponseHeaders(RestStatus.FORBIDDEN.getStatus(), -1); + return; + } + } + if ("GET".equals(requestMethod)) { if (path.equals(IMDS_SECURITY_CREDENTIALS_PATH)) { final var profileName = randomIdentifier(); diff --git a/test/fixtures/ec2-imds-fixture/src/main/java/fixture/aws/imds/Ec2ImdsVersion.java b/test/fixtures/ec2-imds-fixture/src/main/java/fixture/aws/imds/Ec2ImdsVersion.java new file mode 100644 index 0000000000000..7ed028c374cc7 --- /dev/null +++ b/test/fixtures/ec2-imds-fixture/src/main/java/fixture/aws/imds/Ec2ImdsVersion.java @@ -0,0 +1,26 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package fixture.aws.imds; + +/** + * Represents the IMDS protocol version simulated by the {@link Ec2ImdsHttpHandler}. + */ +public enum Ec2ImdsVersion { + /** + * Classic V1 behavior: plain {@code GET} requests, no tokens. + */ + V1, + + /** + * Newer V2 behavior: {@code GET} requests must include a {@code X-aws-ec2-metadata-token} header providing a token previously obtained + * by calling {@code PUT /latest/api/token}. + */ + V2 +} diff --git a/test/fixtures/ec2-imds-fixture/src/test/java/fixture/aws/imds/Ec2ImdsHttpHandlerTests.java b/test/fixtures/ec2-imds-fixture/src/test/java/fixture/aws/imds/Ec2ImdsHttpHandlerTests.java index 369b0ef449b2f..bb613395a0fba 100644 --- a/test/fixtures/ec2-imds-fixture/src/test/java/fixture/aws/imds/Ec2ImdsHttpHandlerTests.java +++ b/test/fixtures/ec2-imds-fixture/src/test/java/fixture/aws/imds/Ec2ImdsHttpHandlerTests.java @@ -19,6 +19,7 @@ import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.io.stream.BytesStreamOutput; import org.elasticsearch.common.xcontent.XContentHelper; +import org.elasticsearch.core.Nullable; import org.elasticsearch.rest.RestStatus; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.xcontent.XContentType; @@ -29,6 +30,7 @@ import java.net.InetSocketAddress; import java.net.URI; import java.util.HashMap; +import java.util.List; import java.util.Map; import java.util.Set; @@ -36,17 +38,19 @@ public class Ec2ImdsHttpHandlerTests extends ESTestCase { + private static final String SECURITY_CREDENTIALS_URI = "/latest/meta-data/iam/security-credentials/"; + public void testImdsV1() throws IOException { final Map generatedCredentials = new HashMap<>(); - final var handler = new Ec2ImdsHttpHandler(generatedCredentials::put, Set.of()); + final var handler = new Ec2ImdsHttpHandler(Ec2ImdsVersion.V1, generatedCredentials::put, Set.of()); - final var roleResponse = handleRequest(handler, "GET", "/latest/meta-data/iam/security-credentials/"); + final var roleResponse = handleRequest(handler, "GET", SECURITY_CREDENTIALS_URI); assertEquals(RestStatus.OK, roleResponse.status()); final var profileName = roleResponse.body().utf8ToString(); assertTrue(Strings.hasText(profileName)); - final var credentialsResponse = handleRequest(handler, "GET", "/latest/meta-data/iam/security-credentials/" + profileName); + final var credentialsResponse = handleRequest(handler, "GET", SECURITY_CREDENTIALS_URI + profileName); assertEquals(RestStatus.OK, credentialsResponse.status()); assertThat(generatedCredentials, aMapWithSize(1)); @@ -62,14 +66,67 @@ public void testImdsV1() throws IOException { public void testImdsV2Disabled() { assertEquals( RestStatus.METHOD_NOT_ALLOWED, - handleRequest(new Ec2ImdsHttpHandler((accessKey, sessionToken) -> fail(), Set.of()), "PUT", "/latest/api/token").status() + handleRequest( + new Ec2ImdsHttpHandler(Ec2ImdsVersion.V1, (accessKey, sessionToken) -> fail(), Set.of()), + "PUT", + "/latest/api/token" + ).status() ); } + public void testImdsV2() throws IOException { + final Map generatedCredentials = new HashMap<>(); + + final var handler = new Ec2ImdsHttpHandler(Ec2ImdsVersion.V2, generatedCredentials::put, Set.of()); + + final var tokenResponse = handleRequest(handler, "PUT", "/latest/api/token"); + assertEquals(RestStatus.OK, tokenResponse.status()); + final var token = tokenResponse.body().utf8ToString(); + + final var roleResponse = checkImdsV2GetRequest(handler, SECURITY_CREDENTIALS_URI, token); + assertEquals(RestStatus.OK, roleResponse.status()); + final var profileName = roleResponse.body().utf8ToString(); + assertTrue(Strings.hasText(profileName)); + + final var credentialsResponse = checkImdsV2GetRequest(handler, SECURITY_CREDENTIALS_URI + profileName, token); + assertEquals(RestStatus.OK, credentialsResponse.status()); + + assertThat(generatedCredentials, aMapWithSize(1)); + final var accessKey = generatedCredentials.keySet().iterator().next(); + final var sessionToken = generatedCredentials.values().iterator().next(); + + final var responseMap = XContentHelper.convertToMap(XContentType.JSON.xContent(), credentialsResponse.body().streamInput(), false); + assertEquals(Set.of("AccessKeyId", "Expiration", "RoleArn", "SecretAccessKey", "Token"), responseMap.keySet()); + assertEquals(accessKey, responseMap.get("AccessKeyId")); + assertEquals(sessionToken, responseMap.get("Token")); + } + private record TestHttpResponse(RestStatus status, BytesReference body) {} + private static TestHttpResponse checkImdsV2GetRequest(Ec2ImdsHttpHandler handler, String uri, String token) { + final var unauthorizedResponse = handleRequest(handler, "GET", uri, null); + assertEquals(RestStatus.UNAUTHORIZED, unauthorizedResponse.status()); + + final var forbiddenResponse = handleRequest(handler, "GET", uri, randomValueOtherThan(token, ESTestCase::randomSecretKey)); + assertEquals(RestStatus.FORBIDDEN, forbiddenResponse.status()); + + return handleRequest(handler, "GET", uri, token); + } + private static TestHttpResponse handleRequest(Ec2ImdsHttpHandler handler, String method, String uri) { - final var httpExchange = new TestHttpExchange(method, uri, BytesArray.EMPTY, TestHttpExchange.EMPTY_HEADERS); + return handleRequest(handler, method, uri, null); + } + + private static TestHttpResponse handleRequest(Ec2ImdsHttpHandler handler, String method, String uri, @Nullable String token) { + final Headers headers; + if (token == null) { + headers = TestHttpExchange.EMPTY_HEADERS; + } else { + headers = new Headers(); + headers.put("X-aws-ec2-metadata-token", List.of(token)); + } + + final var httpExchange = new TestHttpExchange(method, uri, BytesArray.EMPTY, headers); try { handler.handle(httpExchange); } catch (IOException e) { From d2a4c70ca1f85e408efdc572ed4dda847733b0be Mon Sep 17 00:00:00 2001 From: Dimitris Rempapis Date: Mon, 2 Dec 2024 11:16:38 +0200 Subject: [PATCH 098/139] Search Queries in parallel - part 3 (#117149) Update IT tests grouping assertResponses --- .../elasticsearch/aliases/IndexAliasesIT.java | 74 ++----- .../fetch/subphase/MatchedQueriesIT.java | 109 ++++------ .../highlight/HighlighterSearchIT.java | 193 +++++++----------- .../search/functionscore/QueryRescorerIT.java | 74 +++---- .../search/query/MultiMatchQueryIT.java | 109 +++------- 5 files changed, 175 insertions(+), 384 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/aliases/IndexAliasesIT.java b/server/src/internalClusterTest/java/org/elasticsearch/aliases/IndexAliasesIT.java index b70da34c8fe3f..309bf69f00be0 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/aliases/IndexAliasesIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/aliases/IndexAliasesIT.java @@ -65,6 +65,7 @@ import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailuresAndResponse; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertResponse; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertResponses; import static org.hamcrest.Matchers.containsInAnyOrder; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.emptyArray; @@ -262,27 +263,16 @@ public void testSearchingFilteringAliasesSingleIndex() throws Exception { .setRefreshPolicy(RefreshPolicy.IMMEDIATE) ).actionGet(); - logger.info("--> checking single filtering alias search"); - assertResponse( + assertResponses( + searchResponse -> assertHits(searchResponse.getHits(), "1"), prepareSearch("foos").setQuery(QueryBuilders.matchAllQuery()), - searchResponse -> assertHits(searchResponse.getHits(), "1") - ); - - logger.info("--> checking single filtering alias wildcard search"); - assertResponse( - prepareSearch("fo*").setQuery(QueryBuilders.matchAllQuery()), - searchResponse -> assertHits(searchResponse.getHits(), "1") + prepareSearch("fo*").setQuery(QueryBuilders.matchAllQuery()) ); - assertResponse( + assertResponses( + searchResponse -> assertHits(searchResponse.getHits(), "1", "2", "3"), prepareSearch("tests").setQuery(QueryBuilders.matchAllQuery()), - searchResponse -> assertHits(searchResponse.getHits(), "1", "2", "3") - ); - - logger.info("--> checking single filtering alias search with sort"); - assertResponse( - prepareSearch("tests").setQuery(QueryBuilders.matchAllQuery()).addSort("_index", SortOrder.ASC), - searchResponse -> assertHits(searchResponse.getHits(), "1", "2", "3") + prepareSearch("tests").setQuery(QueryBuilders.matchAllQuery()).addSort("_index", SortOrder.ASC) ); logger.info("--> checking single filtering alias search with global facets"); @@ -323,28 +313,12 @@ public void testSearchingFilteringAliasesSingleIndex() throws Exception { searchResponse -> assertHits(searchResponse.getHits(), "1", "2") ); - logger.info("--> checking single non-filtering alias search"); - assertResponse( + assertResponses( + searchResponse -> assertHits(searchResponse.getHits(), "1", "2", "3", "4"), prepareSearch("alias1").setQuery(QueryBuilders.matchAllQuery()), - searchResponse -> assertHits(searchResponse.getHits(), "1", "2", "3", "4") - ); - - logger.info("--> checking non-filtering alias and filtering alias search"); - assertResponse( prepareSearch("alias1", "foos").setQuery(QueryBuilders.matchAllQuery()), - searchResponse -> assertHits(searchResponse.getHits(), "1", "2", "3", "4") - ); - - logger.info("--> checking index and filtering alias search"); - assertResponse( prepareSearch("test", "foos").setQuery(QueryBuilders.matchAllQuery()), - searchResponse -> assertHits(searchResponse.getHits(), "1", "2", "3", "4") - ); - - logger.info("--> checking index and alias wildcard search"); - assertResponse( - prepareSearch("te*", "fo*").setQuery(QueryBuilders.matchAllQuery()), - searchResponse -> assertHits(searchResponse.getHits(), "1", "2", "3", "4") + prepareSearch("te*", "fo*").setQuery(QueryBuilders.matchAllQuery()) ); } @@ -506,11 +480,11 @@ public void testSearchingFilteringAliasesMultipleIndices() throws Exception { prepareSearch("filter23", "filter13").setQuery(QueryBuilders.matchAllQuery()), searchResponse -> assertHits(searchResponse.getHits(), "21", "31", "13", "33") ); - assertResponse( + assertResponses( + searchResponse -> assertThat(searchResponse.getHits().getTotalHits().value(), equalTo(4L)), prepareSearch("filter23", "filter13").setSize(0).setQuery(QueryBuilders.matchAllQuery()), - searchResponse -> assertThat(searchResponse.getHits().getTotalHits().value(), equalTo(4L)) + prepareSearch("filter13", "filter1").setSize(0).setQuery(QueryBuilders.matchAllQuery()) ); - assertResponse( prepareSearch("filter23", "filter1").setQuery(QueryBuilders.matchAllQuery()), searchResponse -> assertHits(searchResponse.getHits(), "21", "31", "11", "12", "13") @@ -519,16 +493,10 @@ public void testSearchingFilteringAliasesMultipleIndices() throws Exception { prepareSearch("filter23", "filter1").setSize(0).setQuery(QueryBuilders.matchAllQuery()), searchResponse -> assertThat(searchResponse.getHits().getTotalHits().value(), equalTo(5L)) ); - assertResponse( prepareSearch("filter13", "filter1").setQuery(QueryBuilders.matchAllQuery()), searchResponse -> assertHits(searchResponse.getHits(), "11", "12", "13", "33") ); - assertResponse( - prepareSearch("filter13", "filter1").setSize(0).setQuery(QueryBuilders.matchAllQuery()), - searchResponse -> assertThat(searchResponse.getHits().getTotalHits().value(), equalTo(4L)) - ); - assertResponse( prepareSearch("filter13", "filter1", "filter23").setQuery(QueryBuilders.matchAllQuery()), searchResponse -> assertHits(searchResponse.getHits(), "11", "12", "13", "21", "31", "33") @@ -537,7 +505,6 @@ public void testSearchingFilteringAliasesMultipleIndices() throws Exception { prepareSearch("filter13", "filter1", "filter23").setSize(0).setQuery(QueryBuilders.matchAllQuery()), searchResponse -> assertThat(searchResponse.getHits().getTotalHits().value(), equalTo(6L)) ); - assertResponse( prepareSearch("filter23", "filter13", "test2").setQuery(QueryBuilders.matchAllQuery()), searchResponse -> assertHits(searchResponse.getHits(), "21", "22", "23", "31", "13", "33") @@ -546,7 +513,6 @@ public void testSearchingFilteringAliasesMultipleIndices() throws Exception { prepareSearch("filter23", "filter13", "test2").setSize(0).setQuery(QueryBuilders.matchAllQuery()), searchResponse -> assertThat(searchResponse.getHits().getTotalHits().value(), equalTo(6L)) ); - assertResponse( prepareSearch("filter23", "filter13", "test1", "test2").setQuery(QueryBuilders.matchAllQuery()), searchResponse -> assertHits(searchResponse.getHits(), "11", "12", "13", "21", "22", "23", "31", "33") @@ -1325,17 +1291,13 @@ public void testIndexingAndQueryingHiddenAliases() throws Exception { searchResponse -> assertHits(searchResponse.getHits(), "2", "3") ); - // Ensure that all docs can be gotten through the alias - assertResponse( + assertResponses( + searchResponse -> assertHits(searchResponse.getHits(), "1", "2", "3"), + // Ensure that all docs can be gotten through the alias prepareSearch(alias).setQuery(QueryBuilders.matchAllQuery()), - searchResponse -> assertHits(searchResponse.getHits(), "1", "2", "3") - ); - - // And querying using a wildcard with indices options set to expand hidden - assertResponse( + // And querying using a wildcard with indices options set to expand hidden prepareSearch("alias*").setQuery(QueryBuilders.matchAllQuery()) - .setIndicesOptions(IndicesOptions.fromOptions(false, false, true, false, true, true, true, false, false)), - searchResponse -> assertHits(searchResponse.getHits(), "1", "2", "3") + .setIndicesOptions(IndicesOptions.fromOptions(false, false, true, false, true, true, true, false, false)) ); // And that querying the alias with a wildcard and no expand options fails diff --git a/server/src/internalClusterTest/java/org/elasticsearch/search/fetch/subphase/MatchedQueriesIT.java b/server/src/internalClusterTest/java/org/elasticsearch/search/fetch/subphase/MatchedQueriesIT.java index c796522eda0e8..b0faeeb295e33 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/search/fetch/subphase/MatchedQueriesIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/search/fetch/subphase/MatchedQueriesIT.java @@ -33,6 +33,7 @@ import static org.elasticsearch.index.query.QueryBuilders.wrapperQuery; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertResponse; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertResponses; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThan; import static org.hamcrest.Matchers.hasKey; @@ -105,54 +106,32 @@ public void testSimpleMatchedQueryFromTopLevelFilter() throws Exception { prepareIndex("test").setId("3").setSource("name", "test").get(); refresh(); - assertResponse( + assertResponses(response -> { + assertHitCount(response, 3L); + for (SearchHit hit : response.getHits()) { + if (hit.getId().equals("1")) { + assertThat(hit.getMatchedQueriesAndScores().size(), equalTo(2)); + assertThat(hit.getMatchedQueriesAndScores(), hasKey("name")); + assertThat(hit.getMatchedQueryScore("name"), greaterThan(0f)); + assertThat(hit.getMatchedQueriesAndScores(), hasKey("title")); + assertThat(hit.getMatchedQueryScore("title"), greaterThan(0f)); + } else if (hit.getId().equals("2") || hit.getId().equals("3")) { + assertThat(hit.getMatchedQueriesAndScores().size(), equalTo(1)); + assertThat(hit.getMatchedQueriesAndScores(), hasKey("name")); + assertThat(hit.getMatchedQueryScore("name"), greaterThan(0f)); + } else { + fail("Unexpected document returned with id " + hit.getId()); + } + } + }, prepareSearch().setQuery(matchAllQuery()) .setPostFilter( boolQuery().should(termQuery("name", "test").queryName("name")).should(termQuery("title", "title1").queryName("title")) ), - response -> { - assertHitCount(response, 3L); - for (SearchHit hit : response.getHits()) { - if (hit.getId().equals("1")) { - assertThat(hit.getMatchedQueriesAndScores().size(), equalTo(2)); - assertThat(hit.getMatchedQueriesAndScores(), hasKey("name")); - assertThat(hit.getMatchedQueryScore("name"), greaterThan(0f)); - assertThat(hit.getMatchedQueriesAndScores(), hasKey("title")); - assertThat(hit.getMatchedQueryScore("title"), greaterThan(0f)); - } else if (hit.getId().equals("2") || hit.getId().equals("3")) { - assertThat(hit.getMatchedQueriesAndScores().size(), equalTo(1)); - assertThat(hit.getMatchedQueriesAndScores(), hasKey("name")); - assertThat(hit.getMatchedQueryScore("name"), greaterThan(0f)); - } else { - fail("Unexpected document returned with id " + hit.getId()); - } - } - } - ); - - assertResponse( prepareSearch().setQuery(matchAllQuery()) .setPostFilter( boolQuery().should(termQuery("name", "test").queryName("name")).should(termQuery("title", "title1").queryName("title")) - ), - response -> { - assertHitCount(response, 3L); - for (SearchHit hit : response.getHits()) { - if (hit.getId().equals("1")) { - assertThat(hit.getMatchedQueriesAndScores().size(), equalTo(2)); - assertThat(hit.getMatchedQueriesAndScores(), hasKey("name")); - assertThat(hit.getMatchedQueryScore("name"), greaterThan(0f)); - assertThat(hit.getMatchedQueriesAndScores(), hasKey("title")); - assertThat(hit.getMatchedQueryScore("title"), greaterThan(0f)); - } else if (hit.getId().equals("2") || hit.getId().equals("3")) { - assertThat(hit.getMatchedQueriesAndScores().size(), equalTo(1)); - assertThat(hit.getMatchedQueriesAndScores(), hasKey("name")); - assertThat(hit.getMatchedQueryScore("name"), greaterThan(0f)); - } else { - fail("Unexpected document returned with id " + hit.getId()); - } - } - } + ) ); } @@ -165,43 +144,25 @@ public void testSimpleMatchedQueryFromTopLevelFilterAndFilteredQuery() throws Ex prepareIndex("test").setId("3").setSource("name", "test", "title", "title3").get(); refresh(); - assertResponse( + assertResponses(response -> { + assertHitCount(response, 3L); + for (SearchHit hit : response.getHits()) { + if (hit.getId().equals("1") || hit.getId().equals("2") || hit.getId().equals("3")) { + assertThat(hit.getMatchedQueriesAndScores().size(), equalTo(2)); + assertThat(hit.getMatchedQueriesAndScores(), hasKey("name")); + assertThat(hit.getMatchedQueryScore("name"), greaterThan(0f)); + assertThat(hit.getMatchedQueriesAndScores(), hasKey("title")); + assertThat(hit.getMatchedQueryScore("title"), greaterThan(0f)); + } else { + fail("Unexpected document returned with id " + hit.getId()); + } + } + }, prepareSearch().setQuery( boolQuery().must(matchAllQuery()).filter(termsQuery("title", "title1", "title2", "title3").queryName("title")) ).setPostFilter(termQuery("name", "test").queryName("name")), - response -> { - assertHitCount(response, 3L); - for (SearchHit hit : response.getHits()) { - if (hit.getId().equals("1") || hit.getId().equals("2") || hit.getId().equals("3")) { - assertThat(hit.getMatchedQueriesAndScores().size(), equalTo(2)); - assertThat(hit.getMatchedQueriesAndScores(), hasKey("name")); - assertThat(hit.getMatchedQueryScore("name"), greaterThan(0f)); - assertThat(hit.getMatchedQueriesAndScores(), hasKey("title")); - assertThat(hit.getMatchedQueryScore("title"), greaterThan(0f)); - } else { - fail("Unexpected document returned with id " + hit.getId()); - } - } - } - ); - - assertResponse( prepareSearch().setQuery(termsQuery("title", "title1", "title2", "title3").queryName("title")) - .setPostFilter(matchQuery("name", "test").queryName("name")), - response -> { - assertHitCount(response, 3L); - for (SearchHit hit : response.getHits()) { - if (hit.getId().equals("1") || hit.getId().equals("2") || hit.getId().equals("3")) { - assertThat(hit.getMatchedQueriesAndScores().size(), equalTo(2)); - assertThat(hit.getMatchedQueriesAndScores(), hasKey("name")); - assertThat(hit.getMatchedQueryScore("name"), greaterThan(0f)); - assertThat(hit.getMatchedQueriesAndScores(), hasKey("title")); - assertThat(hit.getMatchedQueryScore("title"), greaterThan(0f)); - } else { - fail("Unexpected document returned with id " + hit.getId()); - } - } - } + .setPostFilter(matchQuery("name", "test").queryName("name")) ); } diff --git a/server/src/internalClusterTest/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java b/server/src/internalClusterTest/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java index 0805d0f366b0f..36580ebda8aee 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java @@ -97,6 +97,7 @@ import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailuresAndResponse; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNotHighlighted; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertResponse; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertResponses; import static org.elasticsearch.xcontent.XContentFactory.jsonBuilder; import static org.hamcrest.Matchers.anyOf; import static org.hamcrest.Matchers.containsString; @@ -596,40 +597,24 @@ public void testSourceLookupHighlightingUsingPostingsHighlighter() throws Except } indexRandom(true, indexRequestBuilders); - assertResponse( + assertResponses(response -> { + for (int i = 0; i < indexRequestBuilders.length; i++) { + assertHighlight( + response, + i, + "title", + 0, + equalTo("This is a test on the highlighting bug present in elasticsearch. Hopefully it works.") + ); + assertHighlight(response, i, "title", 1, 2, equalTo("This is the second bug to perform highlighting on.")); + } + }, prepareSearch().setQuery(matchQuery("title", "bug")) // asking for the whole field to be highlighted .highlighter(new HighlightBuilder().field("title", -1, 0)), - response -> { - for (int i = 0; i < indexRequestBuilders.length; i++) { - assertHighlight( - response, - i, - "title", - 0, - equalTo("This is a test on the highlighting bug present in elasticsearch. Hopefully it works.") - ); - assertHighlight(response, i, "title", 1, 2, equalTo("This is the second bug to perform highlighting on.")); - } - } - ); - - assertResponse( prepareSearch().setQuery(matchQuery("title", "bug")) // sentences will be generated out of each value - .highlighter(new HighlightBuilder().field("title")), - response -> { - for (int i = 0; i < indexRequestBuilders.length; i++) { - assertHighlight( - response, - i, - "title", - 0, - equalTo("This is a test on the highlighting bug present in elasticsearch. Hopefully it works.") - ); - assertHighlight(response, i, "title", 1, 2, equalTo("This is the second bug to perform highlighting on.")); - } - } + .highlighter(new HighlightBuilder().field("title")) ); assertResponse( @@ -792,27 +777,31 @@ public void testPlainHighlighterOrder() throws Exception { refresh(); { - // fragments should be in order of appearance by default - SearchSourceBuilder source = searchSource().query(matchQuery("field1", "brown dog")) - .highlighter(highlight().highlighterType("plain").field("field1").preTags("").postTags("").fragmentSize(25)); - - assertResponse(prepareSearch("test").setSource(source), response -> { - - assertHighlight(response, 0, "field1", 0, 3, equalTo("The quick brown fox")); - assertHighlight(response, 0, "field1", 1, 3, equalTo(" jumps over the lazy brown dog")); - assertHighlight(response, 0, "field1", 2, 3, equalTo(" dog doesn't care")); - }); - // lets be explicit about the order - source = searchSource().query(matchQuery("field1", "brown dog")) - .highlighter( - highlight().highlighterType("plain").field("field1").order("none").preTags("").postTags("").fragmentSize(25) - ); - - assertResponse(prepareSearch("test").setSource(source), response -> { + assertResponses(response -> { assertHighlight(response, 0, "field1", 0, 3, equalTo("The quick brown fox")); assertHighlight(response, 0, "field1", 1, 3, equalTo(" jumps over the lazy brown dog")); assertHighlight(response, 0, "field1", 2, 3, equalTo(" dog doesn't care")); - }); + }, + // fragments should be in order of appearance by default + prepareSearch("test").setSource( + searchSource().query(matchQuery("field1", "brown dog")) + .highlighter( + highlight().highlighterType("plain").field("field1").preTags("").postTags("").fragmentSize(25) + ) + ), + // lets be explicit about the order + prepareSearch("test").setSource( + searchSource().query(matchQuery("field1", "brown dog")) + .highlighter( + highlight().highlighterType("plain") + .field("field1") + .order("none") + .preTags("") + .postTags("") + .fragmentSize(25) + ) + ) + ); } { // order by score @@ -1701,42 +1690,26 @@ public void testDisableFastVectorHighlighter() throws Exception { } ); - // Using plain highlighter instead of FVH - assertResponse( + assertResponses(response -> { + for (int i = 0; i < indexRequestBuilders.length; i++) { + assertHighlight( + response, + i, + "title", + 0, + 1, + equalTo("This is a test for the workaround for the fast vector highlighting SOLR-3724") + ); + } + }, + // Using plain highlighter instead of FVH prepareSearch().setQuery(matchPhraseQuery("title", "test for the workaround")) .highlighter(new HighlightBuilder().field("title", 50, 1, 10).highlighterType("plain")), - response -> { - for (int i = 0; i < indexRequestBuilders.length; i++) { - assertHighlight( - response, - i, - "title", - 0, - 1, - equalTo("This is a test for the workaround for the fast vector highlighting SOLR-3724") - ); - } - } - ); - - // Using plain highlighter instead of FVH on the field level - assertResponse( + // Using plain highlighter instead of FVH on the field level prepareSearch().setQuery(matchPhraseQuery("title", "test for the workaround")) .highlighter( new HighlightBuilder().field(new HighlightBuilder.Field("title").highlighterType("plain")).highlighterType("plain") - ), - response -> { - for (int i = 0; i < indexRequestBuilders.length; i++) { - assertHighlight( - response, - i, - "title", - 0, - 1, - equalTo("This is a test for the workaround for the fast vector highlighting SOLR-3724") - ); - } - } + ) ); } @@ -1826,44 +1799,29 @@ public void testPlainHighlightDifferentFragmenter() throws Exception { .get(); refresh(); - assertResponse( + assertResponses(response -> { + assertHighlight(response, 0, "tags", 0, equalTo("this is a really long tag i would like to highlight")); + assertHighlight( + response, + 0, + "tags", + 1, + 2, + equalTo("here is another one that is very long tag and has the tag token near the end") + ); + }, prepareSearch("test").setQuery(QueryBuilders.matchPhraseQuery("tags", "long tag")) .highlighter( new HighlightBuilder().field( new HighlightBuilder.Field("tags").highlighterType("plain").fragmentSize(-1).numOfFragments(2).fragmenter("simple") ) ), - response -> { - assertHighlight(response, 0, "tags", 0, equalTo("this is a really long tag i would like to highlight")); - assertHighlight( - response, - 0, - "tags", - 1, - 2, - equalTo("here is another one that is very long tag and has the tag token near the end") - ); - } - ); - - assertResponse( prepareSearch("test").setQuery(QueryBuilders.matchPhraseQuery("tags", "long tag")) .highlighter( new HighlightBuilder().field( new Field("tags").highlighterType("plain").fragmentSize(-1).numOfFragments(2).fragmenter("span") ) - ), - response -> { - assertHighlight(response, 0, "tags", 0, equalTo("this is a really long tag i would like to highlight")); - assertHighlight( - response, - 0, - "tags", - 1, - 2, - equalTo("here is another one that is very long tag and has the tag token near the end") - ); - } + ) ); assertFailures( @@ -3627,15 +3585,16 @@ public void testWithNestedQuery() throws Exception { assertThat(field.fragments()[1].string(), equalTo("cow")); } ); - assertResponse( + assertResponses(response -> { + assertHitCount(response, 1); + HighlightField field = response.getHits().getAt(0).getHighlightFields().get("foo.text"); + assertThat(field.fragments().length, equalTo(1)); + assertThat(field.fragments()[0].string(), equalTo("brown shoes")); + }, prepareSearch().setQuery(nestedQuery("foo", prefixQuery("foo.text", "bro"), ScoreMode.None)) .highlighter(new HighlightBuilder().field(new Field("foo.text").highlighterType(type))), - response -> { - assertHitCount(response, 1); - HighlightField field = response.getHits().getAt(0).getHighlightFields().get("foo.text"); - assertThat(field.fragments().length, equalTo(1)); - assertThat(field.fragments()[0].string(), equalTo("brown shoes")); - } + prepareSearch().setQuery(nestedQuery("foo", matchPhrasePrefixQuery("foo.text", "bro"), ScoreMode.None)) + .highlighter(new HighlightBuilder().field(new Field("foo.text").highlighterType(type))) ); assertResponse( prepareSearch().setQuery(nestedQuery("foo", matchPhraseQuery("foo.text", "brown shoes"), ScoreMode.None)) @@ -3647,16 +3606,6 @@ public void testWithNestedQuery() throws Exception { assertThat(field.fragments()[0].string(), equalTo("brown shoes")); } ); - assertResponse( - prepareSearch().setQuery(nestedQuery("foo", matchPhrasePrefixQuery("foo.text", "bro"), ScoreMode.None)) - .highlighter(new HighlightBuilder().field(new Field("foo.text").highlighterType(type))), - response -> { - assertHitCount(response, 1); - HighlightField field = response.getHits().getAt(0).getHighlightFields().get("foo.text"); - assertThat(field.fragments().length, equalTo(1)); - assertThat(field.fragments()[0].string(), equalTo("brown shoes")); - } - ); } // For unified and fvh highlighters we just check that the nested query is correctly extracted diff --git a/server/src/internalClusterTest/java/org/elasticsearch/search/functionscore/QueryRescorerIT.java b/server/src/internalClusterTest/java/org/elasticsearch/search/functionscore/QueryRescorerIT.java index 9fed4ead8c248..a7efb2fe0e68b 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/search/functionscore/QueryRescorerIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/search/functionscore/QueryRescorerIT.java @@ -69,6 +69,7 @@ import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailures; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailuresAndResponse; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertResponse; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertResponses; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSecondHit; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertThirdHit; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.hasId; @@ -149,33 +150,24 @@ public void testRescorePhrase() throws Exception { 5 ), response -> { - assertThat(response.getHits().getTotalHits().value(), equalTo(3L)); + assertHitCount(response, 3); assertThat(response.getHits().getMaxScore(), equalTo(response.getHits().getHits()[0].getScore())); - assertThat(response.getHits().getHits()[0].getId(), equalTo("1")); - assertThat(response.getHits().getHits()[1].getId(), equalTo("3")); - assertThat(response.getHits().getHits()[2].getId(), equalTo("2")); + assertFirstHit(response, hasId("1")); + assertSecondHit(response, hasId("3")); + assertThirdHit(response, hasId("2")); } ); - assertResponse( + assertResponses(response -> { + assertHitCount(response, 3); + assertThat(response.getHits().getMaxScore(), equalTo(response.getHits().getHits()[0].getScore())); + assertFirstHit(response, hasId("1")); + assertSecondHit(response, hasId("2")); + assertThirdHit(response, hasId("3")); + }, prepareSearch().setQuery(QueryBuilders.matchQuery("field1", "the quick brown").operator(Operator.OR)) .setRescorer(new QueryRescorerBuilder(matchPhraseQuery("field1", "the quick brown").slop(3)), 5), - response -> { - assertHitCount(response, 3); - assertFirstHit(response, hasId("1")); - assertSecondHit(response, hasId("2")); - assertThirdHit(response, hasId("3")); - } - ); - assertResponse( prepareSearch().setQuery(QueryBuilders.matchQuery("field1", "the quick brown").operator(Operator.OR)) - .setRescorer(new QueryRescorerBuilder(matchPhraseQuery("field1", "the quick brown")), 5), - response -> { - assertHitCount(response, 3); - assertThat(response.getHits().getMaxScore(), equalTo(response.getHits().getHits()[0].getScore())); - assertFirstHit(response, hasId("1")); - assertSecondHit(response, hasId("2")); - assertThirdHit(response, hasId("3")); - } + .setRescorer(new QueryRescorerBuilder(matchPhraseQuery("field1", "the quick brown")), 5) ); } @@ -212,7 +204,15 @@ public void testMoreDocs() throws Exception { prepareIndex("test").setId("11").setSource("field1", "2st street boston massachusetts").get(); prepareIndex("test").setId("12").setSource("field1", "3st street boston massachusetts").get(); indicesAdmin().prepareRefresh("test").get(); - assertResponse( + + assertResponses(response -> { + assertThat(response.getHits().getHits().length, equalTo(5)); + assertHitCount(response, 9); + assertThat(response.getHits().getMaxScore(), equalTo(response.getHits().getHits()[0].getScore())); + assertFirstHit(response, hasId("2")); + assertSecondHit(response, hasId("6")); + assertThirdHit(response, hasId("3")); + }, prepareSearch().setQuery(QueryBuilders.matchQuery("field1", "lexington avenue massachusetts").operator(Operator.OR)) .setFrom(0) .setSize(5) @@ -221,16 +221,6 @@ public void testMoreDocs() throws Exception { .setRescoreQueryWeight(2.0f), 20 ), - response -> { - assertThat(response.getHits().getHits().length, equalTo(5)); - assertHitCount(response, 9); - assertFirstHit(response, hasId("2")); - assertSecondHit(response, hasId("6")); - assertThirdHit(response, hasId("3")); - } - ); - - assertResponse( prepareSearch().setQuery(QueryBuilders.matchQuery("field1", "lexington avenue massachusetts").operator(Operator.OR)) .setFrom(0) .setSize(5) @@ -239,15 +229,7 @@ public void testMoreDocs() throws Exception { new QueryRescorerBuilder(matchPhraseQuery("field1", "lexington avenue massachusetts").slop(3)).setQueryWeight(0.6f) .setRescoreQueryWeight(2.0f), 20 - ), - response -> { - assertThat(response.getHits().getHits().length, equalTo(5)); - assertHitCount(response, 9); - assertThat(response.getHits().getMaxScore(), equalTo(response.getHits().getHits()[0].getScore())); - assertFirstHit(response, hasId("2")); - assertSecondHit(response, hasId("6")); - assertThirdHit(response, hasId("3")); - } + ) ); // Make sure non-zero from works: assertResponse( @@ -465,7 +447,8 @@ public void testEquivalence() throws Exception { .setFrom(0) .setSize(resultSize), plain -> { - assertResponse( + assertResponses( + rescored -> assertEquivalent(query, plain, rescored), prepareSearch().setSearchType(SearchType.QUERY_THEN_FETCH) .setPreference("test") // ensure we hit the same shards for tie-breaking .setQuery(QueryBuilders.matchQuery("field1", query).operator(Operator.OR)) @@ -478,10 +461,6 @@ public void testEquivalence() throws Exception { .setRescoreQueryWeight(0.0f), rescoreWindow ), - rescored -> assertEquivalent(query, plain, rescored) - ); // check equivalence - - assertResponse( prepareSearch().setSearchType(SearchType.QUERY_THEN_FETCH) .setPreference("test") // ensure we hit the same shards for tie-breaking .setQuery(QueryBuilders.matchQuery("field1", query).operator(Operator.OR)) @@ -492,8 +471,7 @@ public void testEquivalence() throws Exception { .setQueryWeight(1.0f) .setRescoreQueryWeight(1.0f), rescoreWindow - ), - rescored -> assertEquivalent(query, plain, rescored) + ) ); // check equivalence } ); diff --git a/server/src/internalClusterTest/java/org/elasticsearch/search/query/MultiMatchQueryIT.java b/server/src/internalClusterTest/java/org/elasticsearch/search/query/MultiMatchQueryIT.java index 3f6f7af56eb08..69a9fd7fdd4c7 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/search/query/MultiMatchQueryIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/search/query/MultiMatchQueryIT.java @@ -302,27 +302,20 @@ public void testDefaults() throws ExecutionException, InterruptedException { ), response -> assertFirstHit(response, hasId("theother")) ); - assertResponse( + assertResponses(response -> { + assertHitCount(response, 1L); + assertFirstHit(response, hasId("theone")); + }, prepareSearch("test").setQuery( randomizeType( multiMatchQuery("captain america", "full_name", "first_name", "last_name", "category").operator(Operator.AND).type(type) ) ), - response -> { - assertHitCount(response, 1L); - assertFirstHit(response, hasId("theone")); - } - ); - assertResponse( prepareSearch("test").setQuery( randomizeType( multiMatchQuery("captain america", "full_name", "first_name", "last_name", "category").operator(Operator.AND).type(type) ) - ), - response -> { - assertHitCount(response, 1L); - assertFirstHit(response, hasId("theone")); - } + ) ); } @@ -630,7 +623,10 @@ public void testCrossFieldMode() throws ExecutionException, InterruptedException response -> assertFirstHit(response, hasId("theother")) ); - assertResponse( + assertResponses(response -> { + assertHitCount(response, 1L); + assertFirstHit(response, hasId("theone")); + }, prepareSearch("test").setQuery( randomizeType( multiMatchQuery("captain america", "full_name", "first_name", "last_name", "category").type( @@ -638,12 +634,6 @@ public void testCrossFieldMode() throws ExecutionException, InterruptedException ).operator(Operator.AND) ) ), - response -> { - assertHitCount(response, 1L); - assertFirstHit(response, hasId("theone")); - } - ); - assertResponse( prepareSearch("test").setQuery( randomizeType( multiMatchQuery("captain america 15", "full_name", "first_name", "last_name", "category", "skill").type( @@ -651,12 +641,6 @@ public void testCrossFieldMode() throws ExecutionException, InterruptedException ).analyzer("category").lenient(true).operator(Operator.AND) ) ), - response -> { - assertHitCount(response, 1L); - assertFirstHit(response, hasId("theone")); - } - ); - assertResponse( prepareSearch("test").setQuery( randomizeType( multiMatchQuery("captain america 15", "full_name", "first_name", "last_name", "category", "skill", "int-field").type( @@ -664,25 +648,17 @@ public void testCrossFieldMode() throws ExecutionException, InterruptedException ).analyzer("category").lenient(true).operator(Operator.AND) ) ), - response -> { - assertHitCount(response, 1L); - assertFirstHit(response, hasId("theone")); - } - ); - assertResponse( prepareSearch("test").setQuery( randomizeType( multiMatchQuery("captain america 15", "skill", "full_name", "first_name", "last_name", "category", "int-field").type( MultiMatchQueryBuilder.Type.CROSS_FIELDS ).analyzer("category").lenient(true).operator(Operator.AND) ) - ), - response -> { - assertHitCount(response, 1L); - assertFirstHit(response, hasId("theone")); - } + ) ); - assertResponse( + + assertResponses( + response -> assertFirstHit(response, hasId("theone")), prepareSearch("test").setQuery( randomizeType( multiMatchQuery("captain america 15", "first_name", "last_name", "skill").type(MultiMatchQueryBuilder.Type.CROSS_FIELDS) @@ -690,71 +666,42 @@ public void testCrossFieldMode() throws ExecutionException, InterruptedException .analyzer("category") ) ), - response -> assertFirstHit(response, hasId("theone")) - ); - - assertResponse( prepareSearch("test").setQuery( randomizeType(multiMatchQuery("15", "skill").type(MultiMatchQueryBuilder.Type.CROSS_FIELDS).analyzer("category")) ), - response -> assertFirstHit(response, hasId("theone")) - ); - - assertResponse( prepareSearch("test").setQuery( randomizeType(multiMatchQuery("25 15", "skill").type(MultiMatchQueryBuilder.Type.CROSS_FIELDS).analyzer("category")) ), - response -> assertFirstHit(response, hasId("theone")) - ); - - assertResponse( prepareSearch("test").setQuery( randomizeType( multiMatchQuery("25 15", "int-field", "skill").type(MultiMatchQueryBuilder.Type.CROSS_FIELDS).analyzer("category") ) ), - response -> assertFirstHit(response, hasId("theone")) - ); - - assertResponse( prepareSearch("test").setQuery( randomizeType( multiMatchQuery("25 15", "first_name", "int-field", "skill").type(MultiMatchQueryBuilder.Type.CROSS_FIELDS) .analyzer("category") ) ), - response -> assertFirstHit(response, hasId("theone")) - ); - - assertResponse( prepareSearch("test").setQuery( randomizeType( multiMatchQuery("25 15", "int-field", "skill", "first_name").type(MultiMatchQueryBuilder.Type.CROSS_FIELDS) .analyzer("category") ) ), - response -> assertFirstHit(response, hasId("theone")) - ); - - assertResponse( prepareSearch("test").setQuery( randomizeType( multiMatchQuery("25 15", "int-field", "first_name", "skill").type(MultiMatchQueryBuilder.Type.CROSS_FIELDS) .analyzer("category") ) ), - response -> assertFirstHit(response, hasId("theone")) - ); - - assertResponse( prepareSearch("test").setQuery( randomizeType( multiMatchQuery("captain america marvel hero", "first_name", "last_name", "category").type( MultiMatchQueryBuilder.Type.CROSS_FIELDS ).analyzer("category").operator(Operator.OR) ) - ), - response -> assertFirstHit(response, hasId("theone")) + ) ); // test group based on analyzer -- all fields are grouped into a cross field search @@ -771,6 +718,7 @@ public void testCrossFieldMode() throws ExecutionException, InterruptedException assertFirstHit(response, hasId("theone")); } ); + // counter example assertHitCount( 0L, @@ -840,33 +788,26 @@ public void testCrossFieldMode() throws ExecutionException, InterruptedException randomizeType(multiMatchQuery("15", "int-field", "first_name", "skill").type(MultiMatchQueryBuilder.Type.CROSS_FIELDS)) ) ); - assertResponse( + assertResponses(response -> { + /* + * Doesn't find the one because "alpha 15" isn't a number and we don't + * break on spaces. + */ + assertHitCount(response, 1L); + assertFirstHit(response, hasId("ultimate1")); + }, prepareSearch("test").setQuery( randomizeType( multiMatchQuery("alpha 15", "first_name", "skill").type(MultiMatchQueryBuilder.Type.CROSS_FIELDS).lenient(true) ) ), - response -> { - /* - * Doesn't find the one because "alpha 15" isn't a number and we don't - * break on spaces. - */ - assertHitCount(response, 1L); - assertFirstHit(response, hasId("ultimate1")); - } - ); - // Lenient wasn't always properly lenient with two numeric fields - assertResponse( + // Lenient wasn't always properly lenient with two numeric fields prepareSearch("test").setQuery( randomizeType( multiMatchQuery("alpha 15", "int-field", "first_name", "skill").type(MultiMatchQueryBuilder.Type.CROSS_FIELDS) .lenient(true) ) - ), - response -> { - assertHitCount(response, 1L); - assertFirstHit(response, hasId("ultimate1")); - } + ) ); // Check that cross fields works with date fields assertResponse( From 2a30fbc1e8284b8a23f285983be1a91f362c48a7 Mon Sep 17 00:00:00 2001 From: Ignacio Vera Date: Mon, 2 Dec 2024 11:58:16 +0100 Subject: [PATCH 099/139] Remove bucketOrd from InternalGeoGridBucket (#117615) This commit removes the need of having a bucketOrd in InternalGeoGridBucket that is only used to build the InternalAggregation from the aggregator. --- .../bucket/BucketsAggregator.java | 39 +++++++---- .../bucket/geogrid/BucketPriorityQueue.java | 17 +++-- .../bucket/geogrid/GeoGridAggregator.java | 66 ++++++++++++------- .../bucket/geogrid/InternalGeoGrid.java | 9 ++- .../bucket/geogrid/InternalGeoGridBucket.java | 2 - .../bucket/terms/BucketAndOrd.java | 21 ++++++ 6 files changed, 112 insertions(+), 42 deletions(-) create mode 100644 server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/BucketAndOrd.java diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java index 665dd49e3381d..e86c7127ec2f4 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java @@ -160,7 +160,8 @@ protected void prepareSubAggs(LongArray ordsToCollect) throws IOException {} * the provided ordinals. *

* Most aggregations should probably use something like - * {@link #buildSubAggsForAllBuckets(ObjectArray, ToLongFunction, BiConsumer)} + * {@link #buildSubAggsForAllBuckets(ObjectArray, LongArray, BiConsumer)} + * or {@link #buildSubAggsForAllBuckets(ObjectArray, ToLongFunction, BiConsumer)} * or {@link #buildAggregationsForVariableBuckets(LongArray, LongKeyedBucketOrds, BucketBuilderForVariable, ResultBuilderForVariable)} * or {@link #buildAggregationsForFixedBucketCount(LongArray, int, BucketBuilderForFixedCount, Function)} * or {@link #buildAggregationsForSingleBucket(LongArray, SingleBucketResultBuilder)} @@ -193,10 +194,9 @@ public int size() { } /** - * Build the sub aggregation results for a list of buckets and set them on - * the buckets. This is usually used by aggregations that are selective - * in which bucket they build. They use some mechanism of selecting a list - * of buckets to build use this method to "finish" building the results. + * Similarly to {@link #buildSubAggsForAllBuckets(ObjectArray, LongArray, BiConsumer)} + * but it needs to build the bucket ordinals. This method usually requires for buckets + * to contain the bucket ordinal. * @param buckets the buckets to finish building * @param bucketToOrd how to convert a bucket into an ordinal * @param setAggs how to set the sub-aggregation results on a bucket @@ -218,12 +218,29 @@ protected final void buildSubAggsForAllBuckets( bucketOrdsToCollect.set(s++, bucketToOrd.applyAsLong(bucket)); } } - var results = buildSubAggsForBuckets(bucketOrdsToCollect); - s = 0; - for (long ord = 0; ord < buckets.size(); ord++) { - for (B value : buckets.get(ord)) { - setAggs.accept(value, results.apply(s++)); - } + buildSubAggsForAllBuckets(buckets, bucketOrdsToCollect, setAggs); + } + } + + /** + * Build the sub aggregation results for a list of buckets and set them on + * the buckets. This is usually used by aggregations that are selective + * in which bucket they build. They use some mechanism of selecting a list + * of buckets to build use this method to "finish" building the results. + * @param buckets the buckets to finish building + * @param bucketOrdsToCollect bucket ordinals + * @param setAggs how to set the sub-aggregation results on a bucket + */ + protected final void buildSubAggsForAllBuckets( + ObjectArray buckets, + LongArray bucketOrdsToCollect, + BiConsumer setAggs + ) throws IOException { + var results = buildSubAggsForBuckets(bucketOrdsToCollect); + int s = 0; + for (long ord = 0; ord < buckets.size(); ord++) { + for (B value : buckets.get(ord)) { + setAggs.accept(value, results.apply(s++)); } } } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/geogrid/BucketPriorityQueue.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/geogrid/BucketPriorityQueue.java index cc677605c4528..85c79df42a714 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/geogrid/BucketPriorityQueue.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/geogrid/BucketPriorityQueue.java @@ -11,17 +11,24 @@ import org.elasticsearch.common.util.BigArrays; import org.elasticsearch.common.util.ObjectArrayPriorityQueue; -class BucketPriorityQueue extends ObjectArrayPriorityQueue { +import java.util.function.Function; - BucketPriorityQueue(int size, BigArrays bigArrays) { +class BucketPriorityQueue extends ObjectArrayPriorityQueue { + + private final Function bucketSupplier; + + BucketPriorityQueue(int size, BigArrays bigArrays, Function bucketSupplier) { super(size, bigArrays); + this.bucketSupplier = bucketSupplier; } @Override - protected boolean lessThan(InternalGeoGridBucket o1, InternalGeoGridBucket o2) { - int cmp = Long.compare(o2.getDocCount(), o1.getDocCount()); + protected boolean lessThan(A o1, A o2) { + final B b1 = bucketSupplier.apply(o1); + final B b2 = bucketSupplier.apply(o2); + int cmp = Long.compare(b2.getDocCount(), b1.getDocCount()); if (cmp == 0) { - cmp = o2.compareTo(o1); + cmp = b2.compareTo(b1); if (cmp == 0) { cmp = System.identityHashCode(o2) - System.identityHashCode(o1); } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/geogrid/GeoGridAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/geogrid/GeoGridAggregator.java index 1d3614af08768..b84dff6e73e0b 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/geogrid/GeoGridAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/geogrid/GeoGridAggregator.java @@ -12,6 +12,7 @@ import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.search.ScoreMode; +import org.elasticsearch.common.util.IntArray; import org.elasticsearch.common.util.LongArray; import org.elasticsearch.common.util.ObjectArray; import org.elasticsearch.core.Releasables; @@ -23,6 +24,7 @@ import org.elasticsearch.search.aggregations.LeafBucketCollector; import org.elasticsearch.search.aggregations.LeafBucketCollectorBase; import org.elasticsearch.search.aggregations.bucket.BucketsAggregator; +import org.elasticsearch.search.aggregations.bucket.terms.BucketAndOrd; import org.elasticsearch.search.aggregations.bucket.terms.LongKeyedBucketOrds; import org.elasticsearch.search.aggregations.support.AggregationContext; import org.elasticsearch.search.aggregations.support.ValuesSource; @@ -135,34 +137,52 @@ public void collect(int doc, long owningBucketOrd) throws IOException { @Override public InternalAggregation[] buildAggregations(LongArray owningBucketOrds) throws IOException { + try (ObjectArray topBucketsPerOrd = bigArrays().newObjectArray(owningBucketOrds.size())) { - for (long ordIdx = 0; ordIdx < topBucketsPerOrd.size(); ordIdx++) { - int size = (int) Math.min(bucketOrds.bucketsInOrd(owningBucketOrds.get(ordIdx)), shardSize); - - try (BucketPriorityQueue ordered = new BucketPriorityQueue<>(size, bigArrays())) { - InternalGeoGridBucket spare = null; - LongKeyedBucketOrds.BucketOrdsEnum ordsEnum = bucketOrds.ordsEnum(owningBucketOrds.get(ordIdx)); - while (ordsEnum.next()) { - if (spare == null) { - checkRealMemoryCBForInternalBucket(); - spare = newEmptyBucket(); + try (IntArray bucketsSizePerOrd = bigArrays().newIntArray(owningBucketOrds.size())) { + long ordsToCollect = 0; + for (long ordIdx = 0; ordIdx < owningBucketOrds.size(); ordIdx++) { + int size = (int) Math.min(bucketOrds.bucketsInOrd(owningBucketOrds.get(ordIdx)), shardSize); + ordsToCollect += size; + bucketsSizePerOrd.set(ordIdx, size); + } + try (LongArray ordsArray = bigArrays().newLongArray(ordsToCollect)) { + long ordsCollected = 0; + for (long ordIdx = 0; ordIdx < topBucketsPerOrd.size(); ordIdx++) { + try ( + BucketPriorityQueue, InternalGeoGridBucket> ordered = + new BucketPriorityQueue<>(bucketsSizePerOrd.get(ordIdx), bigArrays(), b -> b.bucket) + ) { + BucketAndOrd spare = null; + LongKeyedBucketOrds.BucketOrdsEnum ordsEnum = bucketOrds.ordsEnum(owningBucketOrds.get(ordIdx)); + while (ordsEnum.next()) { + if (spare == null) { + checkRealMemoryCBForInternalBucket(); + spare = new BucketAndOrd<>(newEmptyBucket()); + } + + // need a special function to keep the source bucket + // up-to-date so it can get the appropriate key + spare.bucket.hashAsLong = ordsEnum.value(); + spare.bucket.docCount = bucketDocCount(ordsEnum.ord()); + spare.ord = ordsEnum.ord(); + spare = ordered.insertWithOverflow(spare); + } + final int orderedSize = (int) ordered.size(); + final InternalGeoGridBucket[] buckets = new InternalGeoGridBucket[orderedSize]; + for (int i = orderedSize - 1; i >= 0; --i) { + BucketAndOrd bucketBucketAndOrd = ordered.pop(); + buckets[i] = bucketBucketAndOrd.bucket; + ordsArray.set(ordsCollected + i, bucketBucketAndOrd.ord); + } + topBucketsPerOrd.set(ordIdx, buckets); + ordsCollected += orderedSize; } - - // need a special function to keep the source bucket - // up-to-date so it can get the appropriate key - spare.hashAsLong = ordsEnum.value(); - spare.docCount = bucketDocCount(ordsEnum.ord()); - spare.bucketOrd = ordsEnum.ord(); - spare = ordered.insertWithOverflow(spare); - } - - topBucketsPerOrd.set(ordIdx, new InternalGeoGridBucket[(int) ordered.size()]); - for (int i = (int) ordered.size() - 1; i >= 0; --i) { - topBucketsPerOrd.get(ordIdx)[i] = ordered.pop(); } + assert ordsCollected == ordsArray.size(); + buildSubAggsForAllBuckets(topBucketsPerOrd, ordsArray, (b, aggs) -> b.aggregations = aggs); } } - buildSubAggsForAllBuckets(topBucketsPerOrd, b -> b.bucketOrd, (b, aggs) -> b.aggregations = aggs); return buildAggregations( Math.toIntExact(owningBucketOrds.size()), ordIdx -> buildAggregation(name, requiredSize, Arrays.asList(topBucketsPerOrd.get(ordIdx)), metadata()) diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/geogrid/InternalGeoGrid.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/geogrid/InternalGeoGrid.java index 6a32b41034503..343c92b353884 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/geogrid/InternalGeoGrid.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/geogrid/InternalGeoGrid.java @@ -27,6 +27,7 @@ import java.util.List; import java.util.Map; import java.util.Objects; +import java.util.function.Function; import static java.util.Collections.unmodifiableList; @@ -106,7 +107,13 @@ public InternalAggregation get() { final int size = Math.toIntExact( context.isFinalReduce() == false ? bucketsReducer.size() : Math.min(requiredSize, bucketsReducer.size()) ); - try (BucketPriorityQueue ordered = new BucketPriorityQueue<>(size, context.bigArrays())) { + try ( + BucketPriorityQueue ordered = new BucketPriorityQueue<>( + size, + context.bigArrays(), + Function.identity() + ) + ) { bucketsReducer.forEach(entry -> { InternalGeoGridBucket bucket = createBucket(entry.key, entry.value.getDocCount(), entry.value.getAggregations()); ordered.insertWithOverflow(bucket); diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/geogrid/InternalGeoGridBucket.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/geogrid/InternalGeoGridBucket.java index 60de4c3974c92..8884a412bcf41 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/geogrid/InternalGeoGridBucket.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/geogrid/InternalGeoGridBucket.java @@ -28,8 +28,6 @@ public abstract class InternalGeoGridBucket extends InternalMultiBucketAggregati protected long docCount; protected InternalAggregations aggregations; - long bucketOrd; - public InternalGeoGridBucket(long hashAsLong, long docCount, InternalAggregations aggregations) { this.docCount = docCount; this.aggregations = aggregations; diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/BucketAndOrd.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/BucketAndOrd.java new file mode 100644 index 0000000000000..7b853860b7959 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/BucketAndOrd.java @@ -0,0 +1,21 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.search.aggregations.bucket.terms; + +/** Represents a bucket and its bucket ordinal */ +public final class BucketAndOrd { + + public final B bucket; // the bucket + public long ord; // mutable ordinal of the bucket + + public BucketAndOrd(B bucket) { + this.bucket = bucket; + } +} From 79ce6e38728a7710f01f18d9769cd6941c2312f6 Mon Sep 17 00:00:00 2001 From: Ignacio Vera Date: Mon, 2 Dec 2024 11:59:34 +0100 Subject: [PATCH 100/139] Improve performance of H3.h3ToGeoBoundary (#117812) There are two clear code paths depending if a h3 bin belongs to even resolutions (class II) or uneven resolutions (class III). especializing the code paths for each type leads to an improvement in performance. --- .../java/org/elasticsearch/h3/FaceIJK.java | 241 ++++++++++-------- .../main/java/org/elasticsearch/h3/H3.java | 6 +- 2 files changed, 142 insertions(+), 105 deletions(-) diff --git a/libs/h3/src/main/java/org/elasticsearch/h3/FaceIJK.java b/libs/h3/src/main/java/org/elasticsearch/h3/FaceIJK.java index 866fdfe8a7f8b..a5744ed5eb6bc 100644 --- a/libs/h3/src/main/java/org/elasticsearch/h3/FaceIJK.java +++ b/libs/h3/src/main/java/org/elasticsearch/h3/FaceIJK.java @@ -417,43 +417,64 @@ public LatLng faceIjkToGeo(int res) { * for this FaceIJK address at a specified resolution. * * @param res The H3 resolution of the cell. - * @param start The first topological vertex to return. - * @param length The number of topological vertexes to return. */ - public CellBoundary faceIjkPentToCellBoundary(int res, int start, int length) { + public CellBoundary faceIjkPentToCellBoundary(int res) { // adjust the center point to be in an aperture 33r substrate grid // these should be composed for speed this.coord.downAp3(); this.coord.downAp3r(); // if res is Class III we need to add a cw aperture 7 to get to // icosahedral Class II - int adjRes = res; - if (H3Index.isResolutionClassIII(res)) { - this.coord.downAp7r(); - adjRes += 1; - } + final int adjRes = adjustRes(this.coord, res); + // If we're returning the entire loop, we need one more iteration in case // of a distortion vertex on the last edge - final int additionalIteration = length == Constants.NUM_PENT_VERTS ? 1 : 0; - final boolean isResolutionClassIII = H3Index.isResolutionClassIII(res); - // convert each vertex to lat/lng - // adjust the face of each vertex as appropriate and introduce - // edge-crossing vertices as needed + if (H3Index.isResolutionClassIII(res)) { + return faceIjkPentToCellBoundaryClassIII(adjRes); + } else { + return faceIjkPentToCellBoundaryClassII(adjRes); + } + } + + private CellBoundary faceIjkPentToCellBoundaryClassII(int adjRes) { + final LatLng[] points = new LatLng[Constants.NUM_PENT_VERTS]; + final FaceIJK fijk = new FaceIJK(this.face, new CoordIJK(0, 0, 0)); + for (int vert = 0; vert < Constants.NUM_PENT_VERTS; vert++) { + // The center point is now in the same substrate grid as the origin + // cell vertices. Add the center point substate coordinates + // to each vertex to translate the vertices to that cell. + fijk.coord.reset( + VERTEX_CLASSII[vert][0] + this.coord.i, + VERTEX_CLASSII[vert][1] + this.coord.j, + VERTEX_CLASSII[vert][2] + this.coord.k + ); + fijk.coord.ijkNormalize(); + fijk.face = this.face; + + fijk.adjustPentVertOverage(adjRes); + + points[vert] = fijk.coord.ijkToGeo(fijk.face, adjRes, true); + } + return new CellBoundary(points, Constants.NUM_PENT_VERTS); + } + + private CellBoundary faceIjkPentToCellBoundaryClassIII(int adjRes) { final LatLng[] points = new LatLng[CellBoundary.MAX_CELL_BNDRY_VERTS]; int numPoints = 0; - final CoordIJK scratch = new CoordIJK(0, 0, 0); - final FaceIJK fijk = new FaceIJK(this.face, scratch); - final int[][] coord = isResolutionClassIII ? VERTEX_CLASSIII : VERTEX_CLASSII; + final FaceIJK fijk = new FaceIJK(this.face, new CoordIJK(0, 0, 0)); final CoordIJK lastCoord = new CoordIJK(0, 0, 0); int lastFace = this.face; - for (int vert = start; vert < start + length + additionalIteration; vert++) { + for (int vert = 0; vert < Constants.NUM_PENT_VERTS + 1; vert++) { final int v = vert % Constants.NUM_PENT_VERTS; // The center point is now in the same substrate grid as the origin // cell vertices. Add the center point substate coordinates // to each vertex to translate the vertices to that cell. - scratch.reset(coord[v][0], coord[v][1], coord[v][2]); - scratch.ijkAdd(this.coord.i, this.coord.j, this.coord.k); - scratch.ijkNormalize(); + fijk.coord.reset( + VERTEX_CLASSIII[v][0] + this.coord.i, + VERTEX_CLASSIII[v][1] + this.coord.j, + VERTEX_CLASSIII[v][2] + this.coord.k + ); + fijk.coord.ijkNormalize(); fijk.face = this.face; fijk.adjustPentVertOverage(adjRes); @@ -461,7 +482,7 @@ public CellBoundary faceIjkPentToCellBoundary(int res, int start, int length) { // all Class III pentagon edges cross icosa edges // note that Class II pentagons have vertices on the edge, // not edge intersections - if (isResolutionClassIII && vert > start) { + if (vert > 0) { // find hex2d of the two vertexes on the last face final Vec2d orig2d0 = lastCoord.ijkToHex2d(); @@ -480,35 +501,17 @@ public CellBoundary faceIjkPentToCellBoundary(int res, int start, int length) { final Vec2d orig2d1 = lastCoord.ijkToHex2d(); - // find the appropriate icosa face edge vertexes - final Vec2d edge0; - final Vec2d edge1; - switch (adjacentFaceDir[fijkOrient.face][fijk.face]) { - case IJ -> { - edge0 = maxDimByCIIVec2d[adjRes][0]; - edge1 = maxDimByCIIVec2d[adjRes][1]; - } - case JK -> { - edge0 = maxDimByCIIVec2d[adjRes][1]; - edge1 = maxDimByCIIVec2d[adjRes][2]; - } - // case KI: - default -> { - assert (adjacentFaceDir[fijkOrient.face][fijk.face] == KI); - edge0 = maxDimByCIIVec2d[adjRes][2]; - edge1 = maxDimByCIIVec2d[adjRes][0]; - } - } - // find the intersection and add the lat/lng point to the result - final Vec2d inter = Vec2d.v2dIntersect(orig2d0, orig2d1, edge0, edge1); - points[numPoints++] = inter.hex2dToGeo(fijkOrient.face, adjRes, true); + final Vec2d inter = findIntersectionPoint(orig2d0, orig2d1, adjRes, adjacentFaceDir[fijkOrient.face][fijk.face]); + if (inter != null) { + points[numPoints++] = inter.hex2dToGeo(fijkOrient.face, adjRes, true); + } } // convert vertex to lat/lng and add to the result // vert == start + NUM_PENT_VERTS is only used to test for possible // intersection on last edge - if (vert < start + Constants.NUM_PENT_VERTS) { + if (vert < Constants.NUM_PENT_VERTS) { points[numPoints++] = fijk.coord.ijkToGeo(fijk.face, adjRes, true); } lastFace = fijk.face; @@ -522,10 +525,8 @@ public CellBoundary faceIjkPentToCellBoundary(int res, int start, int length) { * FaceIJK address at a specified resolution. * * @param res The H3 resolution of the cell. - * @param start The first topological vertex to return. - * @param length The number of topological vertexes to return. */ - public CellBoundary faceIjkToCellBoundary(final int res, final int start, final int length) { + public CellBoundary faceIjkToCellBoundary(final int res) { // adjust the center point to be in an aperture 33r substrate grid // these should be composed for speed this.coord.downAp3(); @@ -533,32 +534,63 @@ public CellBoundary faceIjkToCellBoundary(final int res, final int start, final // if res is Class III we need to add a cw aperture 7 to get to // icosahedral Class II - int adjRes = res; - if (H3Index.isResolutionClassIII(res)) { - this.coord.downAp7r(); - adjRes += 1; - } + final int adjRes = adjustRes(this.coord, res); - // If we're returning the entire loop, we need one more iteration in case - // of a distortion vertex on the last edge - final int additionalIteration = length == Constants.NUM_HEX_VERTS ? 1 : 0; - final boolean isResolutionClassIII = H3Index.isResolutionClassIII(res); // convert each vertex to lat/lng // adjust the face of each vertex as appropriate and introduce // edge-crossing vertices as needed + if (H3Index.isResolutionClassIII(res)) { + return faceIjkToCellBoundaryClassIII(adjRes); + } else { + return faceIjkToCellBoundaryClassII(adjRes); + } + } + + private static int adjustRes(CoordIJK coord, int res) { + if (H3Index.isResolutionClassIII(res)) { + coord.downAp7r(); + res += 1; + } + return res; + } + + private CellBoundary faceIjkToCellBoundaryClassII(int adjRes) { + final LatLng[] points = new LatLng[Constants.NUM_HEX_VERTS]; + final FaceIJK fijk = new FaceIJK(this.face, new CoordIJK(0, 0, 0)); + for (int vert = 0; vert < Constants.NUM_HEX_VERTS; vert++) { + fijk.coord.reset( + VERTEX_CLASSII[vert][0] + this.coord.i, + VERTEX_CLASSII[vert][1] + this.coord.j, + VERTEX_CLASSII[vert][2] + this.coord.k + ); + fijk.coord.ijkNormalize(); + fijk.face = this.face; + + fijk.adjustOverageClassII(adjRes, false, true); + + // convert vertex to lat/lng and add to the result + // vert == start + NUM_HEX_VERTS is only used to test for possible + // intersection on last edge + points[vert] = fijk.coord.ijkToGeo(fijk.face, adjRes, true); + } + return new CellBoundary(points, Constants.NUM_HEX_VERTS); + } + + private CellBoundary faceIjkToCellBoundaryClassIII(int adjRes) { final LatLng[] points = new LatLng[CellBoundary.MAX_CELL_BNDRY_VERTS]; int numPoints = 0; - final CoordIJK scratch1 = new CoordIJK(0, 0, 0); - final FaceIJK fijk = new FaceIJK(this.face, scratch1); - final CoordIJK scratch2 = isResolutionClassIII ? new CoordIJK(0, 0, 0) : null; - final int[][] verts = isResolutionClassIII ? VERTEX_CLASSIII : VERTEX_CLASSII; + final FaceIJK fijk = new FaceIJK(this.face, new CoordIJK(0, 0, 0)); + final CoordIJK scratch = new CoordIJK(0, 0, 0); int lastFace = -1; Overage lastOverage = Overage.NO_OVERAGE; - for (int vert = start; vert < start + length + additionalIteration; vert++) { - int v = vert % Constants.NUM_HEX_VERTS; - scratch1.reset(verts[v][0], verts[v][1], verts[v][2]); - scratch1.ijkAdd(this.coord.i, this.coord.j, this.coord.k); - scratch1.ijkNormalize(); + for (int vert = 0; vert < Constants.NUM_HEX_VERTS + 1; vert++) { + final int v = vert % Constants.NUM_HEX_VERTS; + fijk.coord.reset( + VERTEX_CLASSIII[v][0] + this.coord.i, + VERTEX_CLASSIII[v][1] + this.coord.j, + VERTEX_CLASSIII[v][2] + this.coord.k + ); + fijk.coord.ijkNormalize(); fijk.face = this.face; final Overage overage = fijk.adjustOverageClassII(adjRes, false, true); @@ -572,50 +604,20 @@ public CellBoundary faceIjkToCellBoundary(final int res, final int start, final projection. Note that Class II cell edges have vertices on the face edge, with no edge line intersections. */ - if (isResolutionClassIII && vert > start && fijk.face != lastFace && lastOverage != Overage.FACE_EDGE) { + if (vert > 0 && fijk.face != lastFace && lastOverage != Overage.FACE_EDGE) { // find hex2d of the two vertexes on original face final int lastV = (v + 5) % Constants.NUM_HEX_VERTS; // The center point is now in the same substrate grid as the origin // cell vertices. Add the center point substate coordinates // to each vertex to translate the vertices to that cell. - final int[] vertexLast = verts[lastV]; - final int[] vertexV = verts[v]; - scratch2.reset(vertexLast[0] + this.coord.i, vertexLast[1] + this.coord.j, vertexLast[2] + this.coord.k); - scratch2.ijkNormalize(); - final Vec2d orig2d0 = scratch2.ijkToHex2d(); - scratch2.reset(vertexV[0] + this.coord.i, vertexV[1] + this.coord.j, vertexV[2] + this.coord.k); - scratch2.ijkNormalize(); - final Vec2d orig2d1 = scratch2.ijkToHex2d(); + final Vec2d orig2d0 = orig(scratch, VERTEX_CLASSIII[lastV]); + final Vec2d orig2d1 = orig(scratch, VERTEX_CLASSIII[v]); // find the appropriate icosa face edge vertexes final int face2 = ((lastFace == this.face) ? fijk.face : lastFace); - final Vec2d edge0; - final Vec2d edge1; - switch (adjacentFaceDir[this.face][face2]) { - case IJ -> { - edge0 = maxDimByCIIVec2d[adjRes][0]; - edge1 = maxDimByCIIVec2d[adjRes][1]; - } - case JK -> { - edge0 = maxDimByCIIVec2d[adjRes][1]; - edge1 = maxDimByCIIVec2d[adjRes][2]; - } - // case KI: - default -> { - assert (adjacentFaceDir[this.face][face2] == KI); - edge0 = maxDimByCIIVec2d[adjRes][2]; - edge1 = maxDimByCIIVec2d[adjRes][0]; - } - } // find the intersection and add the lat/lng point to the result - final Vec2d inter = Vec2d.v2dIntersect(orig2d0, orig2d1, edge0, edge1); - /* - If a point of intersection occurs at a hexagon vertex, then each - adjacent hexagon edge will lie completely on a single icosahedron - face, and no additional vertex is required. - */ - final boolean isIntersectionAtVertex = orig2d0.numericallyIdentical(inter) || orig2d1.numericallyIdentical(inter); - if (isIntersectionAtVertex == false) { + final Vec2d inter = findIntersectionPoint(orig2d0, orig2d1, adjRes, adjacentFaceDir[this.face][face2]); + if (inter != null) { points[numPoints++] = inter.hex2dToGeo(this.face, adjRes, true); } } @@ -623,7 +625,7 @@ public CellBoundary faceIjkToCellBoundary(final int res, final int start, final // convert vertex to lat/lng and add to the result // vert == start + NUM_HEX_VERTS is only used to test for possible // intersection on last edge - if (vert < start + Constants.NUM_HEX_VERTS) { + if (vert < Constants.NUM_HEX_VERTS) { points[numPoints++] = fijk.coord.ijkToGeo(fijk.face, adjRes, true); } lastFace = fijk.face; @@ -632,6 +634,42 @@ public CellBoundary faceIjkToCellBoundary(final int res, final int start, final return new CellBoundary(points, numPoints); } + private Vec2d orig(CoordIJK scratch, int[] vertexLast) { + scratch.reset(vertexLast[0] + this.coord.i, vertexLast[1] + this.coord.j, vertexLast[2] + this.coord.k); + scratch.ijkNormalize(); + return scratch.ijkToHex2d(); + } + + private Vec2d findIntersectionPoint(Vec2d orig2d0, Vec2d orig2d1, int adjRes, int faceDir) { + // find the appropriate icosa face edge vertexes + final Vec2d edge0; + final Vec2d edge1; + switch (faceDir) { + case IJ -> { + edge0 = maxDimByCIIVec2d[adjRes][0]; + edge1 = maxDimByCIIVec2d[adjRes][1]; + } + case JK -> { + edge0 = maxDimByCIIVec2d[adjRes][1]; + edge1 = maxDimByCIIVec2d[adjRes][2]; + } + // case KI: + default -> { + assert (faceDir == KI); + edge0 = maxDimByCIIVec2d[adjRes][2]; + edge1 = maxDimByCIIVec2d[adjRes][0]; + } + } + // find the intersection and add the lat/lng point to the result + final Vec2d inter = Vec2d.v2dIntersect(orig2d0, orig2d1, edge0, edge1); + /* + If a point of intersection occurs at a hexagon vertex, then each + adjacent hexagon edge will lie completely on a single icosahedron + face, and no additional vertex is required. + */ + return orig2d0.numericallyIdentical(inter) || orig2d1.numericallyIdentical(inter) ? null : inter; + } + /** * compute the corresponding H3Index. * @param res The cell resolution. @@ -651,7 +689,6 @@ static long faceIjkToH3(int res, int face, CoordIJK coord) { // out of range input throw new IllegalArgumentException(" out of range input"); } - return H3Index.H3_set_base_cell(h, BaseCells.getBaseCell(face, coord)); } diff --git a/libs/h3/src/main/java/org/elasticsearch/h3/H3.java b/libs/h3/src/main/java/org/elasticsearch/h3/H3.java index 8c0bba62cecdb..08031088728ba 100644 --- a/libs/h3/src/main/java/org/elasticsearch/h3/H3.java +++ b/libs/h3/src/main/java/org/elasticsearch/h3/H3.java @@ -174,11 +174,11 @@ public static LatLng h3ToLatLng(String h3Address) { * Find the cell {@link CellBoundary} coordinates for the cell */ public static CellBoundary h3ToGeoBoundary(long h3) { - FaceIJK fijk = H3Index.h3ToFaceIjk(h3); + final FaceIJK fijk = H3Index.h3ToFaceIjk(h3); if (H3Index.H3_is_pentagon(h3)) { - return fijk.faceIjkPentToCellBoundary(H3Index.H3_get_resolution(h3), 0, Constants.NUM_PENT_VERTS); + return fijk.faceIjkPentToCellBoundary(H3Index.H3_get_resolution(h3)); } else { - return fijk.faceIjkToCellBoundary(H3Index.H3_get_resolution(h3), 0, Constants.NUM_HEX_VERTS); + return fijk.faceIjkToCellBoundary(H3Index.H3_get_resolution(h3)); } } From c7c725b2b37bdca3789207ac21a7ac2f15ce0c36 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine <58790826+elasticsearchmachine@users.noreply.github.com> Date: Tue, 3 Dec 2024 01:05:56 +1100 Subject: [PATCH 101/139] Mute org.elasticsearch.xpack.ml.integration.DatafeedJobsRestIT org.elasticsearch.xpack.ml.integration.DatafeedJobsRestIT #111319 --- muted-tests.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/muted-tests.yml b/muted-tests.yml index 8d64e1557ca19..d01b956db9199 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -237,6 +237,8 @@ tests: - class: org.elasticsearch.test.rest.yaml.CcsCommonYamlTestSuiteIT method: test {p0=search.highlight/50_synthetic_source/text multi unified from vectors} issue: https://github.com/elastic/elasticsearch/issues/117815 +- class: org.elasticsearch.xpack.ml.integration.DatafeedJobsRestIT + issue: https://github.com/elastic/elasticsearch/issues/111319 # Examples: # From 285a71b89b5bf9a70729261e254ddd432f9a46f2 Mon Sep 17 00:00:00 2001 From: Pat Whelan Date: Mon, 2 Dec 2024 09:16:28 -0500 Subject: [PATCH 102/139] [ML] Abstract upgrade mode into core logic (#117512) Transform is adding an identical upgrade mode for 9.x migration. The logic to set the metadata is roughly the same, but the follow-up actions once the upgrade mode is changed will be different. --- ...AbstractTransportSetUpgradeModeAction.java | 186 +++++++++++++++ .../action/SetUpgradeModeActionRequest.java | 79 +++++++ .../core/ml/action/SetUpgradeModeAction.java | 49 +--- ...actTransportSetUpgradeModeActionTests.java | 219 ++++++++++++++++++ .../action/TransportSetUpgradeModeAction.java | 179 ++++---------- 5 files changed, 535 insertions(+), 177 deletions(-) create mode 100644 x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/action/AbstractTransportSetUpgradeModeAction.java create mode 100644 x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/action/SetUpgradeModeActionRequest.java create mode 100644 x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/action/AbstractTransportSetUpgradeModeActionTests.java diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/action/AbstractTransportSetUpgradeModeAction.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/action/AbstractTransportSetUpgradeModeAction.java new file mode 100644 index 0000000000000..bbd90448cf855 --- /dev/null +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/action/AbstractTransportSetUpgradeModeAction.java @@ -0,0 +1,186 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.core.action; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.elasticsearch.ElasticsearchStatusException; +import org.elasticsearch.ElasticsearchTimeoutException; +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.support.ActionFilters; +import org.elasticsearch.action.support.master.AcknowledgedResponse; +import org.elasticsearch.action.support.master.AcknowledgedTransportMasterNodeAction; +import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.ClusterStateTaskListener; +import org.elasticsearch.cluster.SimpleBatchedExecutor; +import org.elasticsearch.cluster.block.ClusterBlockException; +import org.elasticsearch.cluster.block.ClusterBlockLevel; +import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver; +import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.cluster.service.MasterServiceTaskQueue; +import org.elasticsearch.common.Priority; +import org.elasticsearch.common.util.concurrent.EsExecutors; +import org.elasticsearch.core.Strings; +import org.elasticsearch.core.Tuple; +import org.elasticsearch.rest.RestStatus; +import org.elasticsearch.tasks.Task; +import org.elasticsearch.threadpool.ThreadPool; +import org.elasticsearch.transport.TransportService; + +import java.util.concurrent.atomic.AtomicBoolean; + +public abstract class AbstractTransportSetUpgradeModeAction extends AcknowledgedTransportMasterNodeAction { + + private static final Logger logger = LogManager.getLogger(AbstractTransportSetUpgradeModeAction.class); + private final AtomicBoolean isRunning = new AtomicBoolean(false); + private final MasterServiceTaskQueue taskQueue; + + public AbstractTransportSetUpgradeModeAction( + String actionName, + String taskQueuePrefix, + TransportService transportService, + ClusterService clusterService, + ThreadPool threadPool, + ActionFilters actionFilters, + IndexNameExpressionResolver indexNameExpressionResolver + ) { + super( + actionName, + transportService, + clusterService, + threadPool, + actionFilters, + SetUpgradeModeActionRequest::new, + indexNameExpressionResolver, + EsExecutors.DIRECT_EXECUTOR_SERVICE + ); + + this.taskQueue = clusterService.createTaskQueue(taskQueuePrefix + " upgrade mode", Priority.NORMAL, new UpdateModeExecutor()); + } + + @Override + protected void masterOperation( + Task task, + SetUpgradeModeActionRequest request, + ClusterState state, + ActionListener listener + ) throws Exception { + // Don't want folks spamming this endpoint while it is in progress, only allow one request to be handled at a time + if (isRunning.compareAndSet(false, true) == false) { + String msg = Strings.format( + "Attempted to set [upgrade_mode] for feature name [%s] to [%s] from [%s] while previous request was processing.", + featureName(), + request.enabled(), + upgradeMode(state) + ); + logger.info(msg); + Exception detail = new IllegalStateException(msg); + listener.onFailure( + new ElasticsearchStatusException( + "Cannot change [upgrade_mode] for feature name [{}]. Previous request is still being processed.", + RestStatus.TOO_MANY_REQUESTS, + detail, + featureName() + ) + ); + return; + } + + // Noop, nothing for us to do, simply return fast to the caller + var upgradeMode = upgradeMode(state); + if (request.enabled() == upgradeMode) { + logger.info("Upgrade mode noop"); + isRunning.set(false); + listener.onResponse(AcknowledgedResponse.TRUE); + return; + } + + logger.info( + "Starting to set [upgrade_mode] for feature name [{}] to [{}] from [{}]", + featureName(), + request.enabled(), + upgradeMode + ); + + ActionListener wrappedListener = ActionListener.wrap(r -> { + logger.info("Finished setting [upgrade_mode] for feature name [{}]", featureName()); + isRunning.set(false); + listener.onResponse(r); + }, e -> { + logger.info("Failed to set [upgrade_mode] for feature name [{}]", featureName()); + isRunning.set(false); + listener.onFailure(e); + }); + + ActionListener setUpgradeModeListener = wrappedListener.delegateFailure((delegate, ack) -> { + if (ack.isAcknowledged()) { + upgradeModeSuccessfullyChanged(task, request, state, delegate); + } else { + logger.info("Cluster state update is NOT acknowledged"); + wrappedListener.onFailure(new ElasticsearchTimeoutException("Unknown error occurred while updating cluster state")); + } + }); + + taskQueue.submitTask(featureName(), new UpdateModeStateListener(request, setUpgradeModeListener), request.ackTimeout()); + } + + /** + * Define the feature name, used in log messages and naming the task on the task queue. + */ + protected abstract String featureName(); + + /** + * Parse the ClusterState for the implementation's {@link org.elasticsearch.cluster.metadata.Metadata.Custom} and find the upgradeMode + * boolean stored there. We will compare this boolean with the request's desired state to determine if we should change the metadata. + */ + protected abstract boolean upgradeMode(ClusterState state); + + /** + * This is called from the ClusterState updater and is expected to return quickly. + */ + protected abstract ClusterState createUpdatedState(SetUpgradeModeActionRequest request, ClusterState state); + + /** + * This method is only called when the cluster state was successfully changed. + * If we failed to update for any reason, this will not be called. + * The ClusterState param is the previous ClusterState before we called update. + */ + protected abstract void upgradeModeSuccessfullyChanged( + Task task, + SetUpgradeModeActionRequest request, + ClusterState state, + ActionListener listener + ); + + @Override + protected ClusterBlockException checkBlock(SetUpgradeModeActionRequest request, ClusterState state) { + return state.blocks().globalBlockedException(ClusterBlockLevel.METADATA_WRITE); + } + + private record UpdateModeStateListener(SetUpgradeModeActionRequest request, ActionListener listener) + implements + ClusterStateTaskListener { + + @Override + public void onFailure(Exception e) { + listener.onFailure(e); + } + } + + private class UpdateModeExecutor extends SimpleBatchedExecutor { + @Override + public Tuple executeTask(UpdateModeStateListener clusterStateListener, ClusterState clusterState) { + return Tuple.tuple(createUpdatedState(clusterStateListener.request(), clusterState), null); + } + + @Override + public void taskSucceeded(UpdateModeStateListener clusterStateListener, Void unused) { + clusterStateListener.listener().onResponse(AcknowledgedResponse.TRUE); + } + } +} diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/action/SetUpgradeModeActionRequest.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/action/SetUpgradeModeActionRequest.java new file mode 100644 index 0000000000000..98e30b284c21a --- /dev/null +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/action/SetUpgradeModeActionRequest.java @@ -0,0 +1,79 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.core.action; + +import org.elasticsearch.action.support.master.AcknowledgedRequest; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.xcontent.ConstructingObjectParser; +import org.elasticsearch.xcontent.ParseField; +import org.elasticsearch.xcontent.ToXContentObject; +import org.elasticsearch.xcontent.XContentBuilder; + +import java.io.IOException; +import java.util.Objects; + +public class SetUpgradeModeActionRequest extends AcknowledgedRequest implements ToXContentObject { + + private final boolean enabled; + + private static final ParseField ENABLED = new ParseField("enabled"); + public static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>( + "set_upgrade_mode_action_request", + a -> new SetUpgradeModeActionRequest((Boolean) a[0]) + ); + + static { + PARSER.declareBoolean(ConstructingObjectParser.constructorArg(), ENABLED); + } + + public SetUpgradeModeActionRequest(boolean enabled) { + super(TRAPPY_IMPLICIT_DEFAULT_MASTER_NODE_TIMEOUT, DEFAULT_ACK_TIMEOUT); + this.enabled = enabled; + } + + public SetUpgradeModeActionRequest(StreamInput in) throws IOException { + super(in); + this.enabled = in.readBoolean(); + } + + public boolean enabled() { + return enabled; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + super.writeTo(out); + out.writeBoolean(enabled); + } + + @Override + public int hashCode() { + return Objects.hash(enabled); + } + + @Override + public boolean equals(Object obj) { + if (this == obj) { + return true; + } + if (obj == null || obj.getClass() != getClass()) { + return false; + } + SetUpgradeModeActionRequest other = (SetUpgradeModeActionRequest) obj; + return enabled == other.enabled(); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.field(ENABLED.getPreferredName(), enabled); + builder.endObject(); + return builder; + } +} diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/SetUpgradeModeAction.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/SetUpgradeModeAction.java index 821caf001f3e0..a67ae33e85801 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/SetUpgradeModeAction.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/action/SetUpgradeModeAction.java @@ -7,17 +7,13 @@ package org.elasticsearch.xpack.core.ml.action; import org.elasticsearch.action.ActionType; -import org.elasticsearch.action.support.master.AcknowledgedRequest; import org.elasticsearch.action.support.master.AcknowledgedResponse; import org.elasticsearch.common.io.stream.StreamInput; -import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.xcontent.ConstructingObjectParser; import org.elasticsearch.xcontent.ParseField; -import org.elasticsearch.xcontent.ToXContentObject; -import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xpack.core.action.SetUpgradeModeActionRequest; import java.io.IOException; -import java.util.Objects; public class SetUpgradeModeAction extends ActionType { @@ -28,9 +24,7 @@ private SetUpgradeModeAction() { super(NAME); } - public static class Request extends AcknowledgedRequest implements ToXContentObject { - - private final boolean enabled; + public static class Request extends SetUpgradeModeActionRequest { private static final ParseField ENABLED = new ParseField("enabled"); public static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>( @@ -43,48 +37,11 @@ public static class Request extends AcknowledgedRequest implements ToXC } public Request(boolean enabled) { - super(TRAPPY_IMPLICIT_DEFAULT_MASTER_NODE_TIMEOUT, DEFAULT_ACK_TIMEOUT); - this.enabled = enabled; + super(enabled); } public Request(StreamInput in) throws IOException { super(in); - this.enabled = in.readBoolean(); - } - - public boolean isEnabled() { - return enabled; - } - - @Override - public void writeTo(StreamOutput out) throws IOException { - super.writeTo(out); - out.writeBoolean(enabled); - } - - @Override - public int hashCode() { - return Objects.hash(enabled); - } - - @Override - public boolean equals(Object obj) { - if (this == obj) { - return true; - } - if (obj == null || obj.getClass() != getClass()) { - return false; - } - Request other = (Request) obj; - return Objects.equals(enabled, other.enabled); - } - - @Override - public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { - builder.startObject(); - builder.field(ENABLED.getPreferredName(), enabled); - builder.endObject(); - return builder; } } } diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/action/AbstractTransportSetUpgradeModeActionTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/action/AbstractTransportSetUpgradeModeActionTests.java new file mode 100644 index 0000000000000..d780b7fbc32f4 --- /dev/null +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/action/AbstractTransportSetUpgradeModeActionTests.java @@ -0,0 +1,219 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.core.action; + +import org.elasticsearch.ElasticsearchStatusException; +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.support.master.AcknowledgedResponse; +import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.ClusterStateTaskListener; +import org.elasticsearch.cluster.SimpleBatchedExecutor; +import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.cluster.service.MasterServiceTaskQueue; +import org.elasticsearch.core.Tuple; +import org.elasticsearch.tasks.Task; +import org.elasticsearch.test.ESTestCase; + +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicReference; +import java.util.function.Consumer; + +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.instanceOf; +import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.not; +import static org.hamcrest.Matchers.notNullValue; +import static org.hamcrest.Matchers.nullValue; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +public class AbstractTransportSetUpgradeModeActionTests extends ESTestCase { + /** + * Creates a TaskQueue that invokes the SimpleBatchedExecutor. + */ + public static ClusterService clusterService() { + AtomicReference> executor = new AtomicReference<>(); + MasterServiceTaskQueue taskQueue = mock(); + ClusterService clusterService = mock(); + doAnswer(ans -> { + executor.set(ans.getArgument(2)); + return taskQueue; + }).when(clusterService).createTaskQueue(any(), any(), any()); + doAnswer(ans -> { + if (executor.get() == null) { + fail("We should create the task queue before we submit tasks to it"); + } else { + executor.get().executeTask(ans.getArgument(1), ClusterState.EMPTY_STATE); + executor.get().taskSucceeded(ans.getArgument(1), null); + } + return null; + }).when(taskQueue).submitTask(any(), any(), any()); + return clusterService; + } + + /** + * Creates a TaskQueue that calls the listener with an error. + */ + public static ClusterService clusterServiceWithError(Exception e) { + MasterServiceTaskQueue taskQueue = mock(); + ClusterService clusterService = mock(); + when(clusterService.createTaskQueue(any(), any(), any())).thenReturn(taskQueue); + doAnswer(ans -> { + ClusterStateTaskListener listener = ans.getArgument(1); + listener.onFailure(e); + return null; + }).when(taskQueue).submitTask(any(), any(), any()); + return clusterService; + } + + /** + * TaskQueue that does nothing. + */ + public static ClusterService clusterServiceThatDoesNothing() { + ClusterService clusterService = mock(); + when(clusterService.createTaskQueue(any(), any(), any())).thenReturn(mock()); + return clusterService; + } + + public void testIdempotent() throws Exception { + // create with update mode set to false + var action = new TestTransportSetUpgradeModeAction(clusterServiceThatDoesNothing(), false); + + // flip to true but do nothing (cluster service mock won't invoke the listener) + action.runWithoutWaiting(true); + // call again + var response = action.run(true); + + assertThat(response.v1(), nullValue()); + assertThat(response.v2(), notNullValue()); + assertThat(response.v2(), instanceOf(ElasticsearchStatusException.class)); + assertThat( + response.v2().getMessage(), + is("Cannot change [upgrade_mode] for feature name [" + action.featureName() + "]. Previous request is still being processed.") + ); + } + + public void testUpdateDoesNotRun() throws Exception { + var shouldNotChange = new AtomicBoolean(true); + var action = new TestTransportSetUpgradeModeAction(true, l -> shouldNotChange.set(false)); + + var response = action.run(true); + + assertThat(response.v1(), is(AcknowledgedResponse.TRUE)); + assertThat(response.v2(), nullValue()); + assertThat(shouldNotChange.get(), is(true)); + } + + public void testErrorReleasesLock() throws Exception { + var action = new TestTransportSetUpgradeModeAction(false, l -> l.onFailure(new IllegalStateException("hello there"))); + + action.run(true); + var response = action.run(true); + assertThat( + "Previous request should have finished processing.", + response.v2().getMessage(), + not(containsString("Previous request is still being processed")) + ); + } + + public void testErrorFromAction() throws Exception { + var expectedException = new IllegalStateException("hello there"); + var action = new TestTransportSetUpgradeModeAction(false, l -> l.onFailure(expectedException)); + + var response = action.run(true); + + assertThat(response.v1(), nullValue()); + assertThat(response.v2(), is(expectedException)); + } + + public void testErrorFromTaskQueue() throws Exception { + var expectedException = new IllegalStateException("hello there"); + var action = new TestTransportSetUpgradeModeAction(clusterServiceWithError(expectedException), false); + + var response = action.run(true); + + assertThat(response.v1(), nullValue()); + assertThat(response.v2(), is(expectedException)); + } + + public void testSuccess() throws Exception { + var action = new TestTransportSetUpgradeModeAction(false, l -> l.onResponse(AcknowledgedResponse.TRUE)); + + var response = action.run(true); + + assertThat(response.v1(), is(AcknowledgedResponse.TRUE)); + assertThat(response.v2(), nullValue()); + } + + private static class TestTransportSetUpgradeModeAction extends AbstractTransportSetUpgradeModeAction { + private final boolean upgradeMode; + private final ClusterState updatedClusterState; + private final Consumer> successFunc; + + TestTransportSetUpgradeModeAction(boolean upgradeMode, Consumer> successFunc) { + super("actionName", "taskQueuePrefix", mock(), clusterService(), mock(), mock(), mock()); + this.upgradeMode = upgradeMode; + this.updatedClusterState = ClusterState.EMPTY_STATE; + this.successFunc = successFunc; + } + + TestTransportSetUpgradeModeAction(ClusterService clusterService, boolean upgradeMode) { + super("actionName", "taskQueuePrefix", mock(), clusterService, mock(), mock(), mock()); + this.upgradeMode = upgradeMode; + this.updatedClusterState = ClusterState.EMPTY_STATE; + this.successFunc = listener -> {}; + } + + public void runWithoutWaiting(boolean upgrade) throws Exception { + masterOperation(mock(), new SetUpgradeModeActionRequest(upgrade), ClusterState.EMPTY_STATE, ActionListener.noop()); + } + + public Tuple run(boolean upgrade) throws Exception { + AtomicReference> response = new AtomicReference<>(); + CountDownLatch latch = new CountDownLatch(1); + masterOperation(mock(), new SetUpgradeModeActionRequest(upgrade), ClusterState.EMPTY_STATE, ActionListener.wrap(r -> { + response.set(Tuple.tuple(r, null)); + latch.countDown(); + }, e -> { + response.set(Tuple.tuple(null, e)); + latch.countDown(); + })); + assertTrue("Failed to run TestTransportSetUpgradeModeAction in 10s", latch.await(10, TimeUnit.SECONDS)); + return response.get(); + } + + @Override + protected String featureName() { + return "test-feature-name"; + } + + @Override + protected boolean upgradeMode(ClusterState state) { + return upgradeMode; + } + + @Override + protected ClusterState createUpdatedState(SetUpgradeModeActionRequest request, ClusterState state) { + return updatedClusterState; + } + + @Override + protected void upgradeModeSuccessfullyChanged( + Task task, + SetUpgradeModeActionRequest request, + ClusterState state, + ActionListener listener + ) { + successFunc.accept(listener); + } + } +} diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportSetUpgradeModeAction.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportSetUpgradeModeAction.java index 744d5dbd6974f..5912619e892ed 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportSetUpgradeModeAction.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/action/TransportSetUpgradeModeAction.java @@ -9,35 +9,27 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.elasticsearch.ElasticsearchException; -import org.elasticsearch.ElasticsearchStatusException; -import org.elasticsearch.ElasticsearchTimeoutException; import org.elasticsearch.ResourceNotFoundException; import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.support.ActionFilters; import org.elasticsearch.action.support.master.AcknowledgedResponse; -import org.elasticsearch.action.support.master.AcknowledgedTransportMasterNodeAction; import org.elasticsearch.client.internal.Client; import org.elasticsearch.client.internal.OriginSettingClient; -import org.elasticsearch.cluster.AckedClusterStateUpdateTask; import org.elasticsearch.cluster.ClusterState; -import org.elasticsearch.cluster.ClusterStateUpdateTask; -import org.elasticsearch.cluster.block.ClusterBlockException; -import org.elasticsearch.cluster.block.ClusterBlockLevel; import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver; import org.elasticsearch.cluster.metadata.Metadata; import org.elasticsearch.cluster.service.ClusterService; -import org.elasticsearch.common.util.concurrent.EsExecutors; import org.elasticsearch.core.Predicates; -import org.elasticsearch.core.SuppressForbidden; import org.elasticsearch.injection.guice.Inject; import org.elasticsearch.persistent.PersistentTasksClusterService; import org.elasticsearch.persistent.PersistentTasksCustomMetadata; import org.elasticsearch.persistent.PersistentTasksCustomMetadata.PersistentTask; import org.elasticsearch.persistent.PersistentTasksService; -import org.elasticsearch.rest.RestStatus; import org.elasticsearch.tasks.Task; import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.transport.TransportService; +import org.elasticsearch.xpack.core.action.AbstractTransportSetUpgradeModeAction; +import org.elasticsearch.xpack.core.action.SetUpgradeModeActionRequest; import org.elasticsearch.xpack.core.ml.MlMetadata; import org.elasticsearch.xpack.core.ml.MlTasks; import org.elasticsearch.xpack.core.ml.action.IsolateDatafeedAction; @@ -48,7 +40,6 @@ import java.util.Comparator; import java.util.List; import java.util.Set; -import java.util.concurrent.atomic.AtomicBoolean; import java.util.stream.Collectors; import static org.elasticsearch.ExceptionsHelper.rethrowAndSuppress; @@ -58,12 +49,11 @@ import static org.elasticsearch.xpack.core.ml.MlTasks.DATA_FRAME_ANALYTICS_TASK_NAME; import static org.elasticsearch.xpack.core.ml.MlTasks.JOB_TASK_NAME; -public class TransportSetUpgradeModeAction extends AcknowledgedTransportMasterNodeAction { +public class TransportSetUpgradeModeAction extends AbstractTransportSetUpgradeModeAction { private static final Set ML_TASK_NAMES = Set.of(JOB_TASK_NAME, DATAFEED_TASK_NAME, DATA_FRAME_ANALYTICS_TASK_NAME); private static final Logger logger = LogManager.getLogger(TransportSetUpgradeModeAction.class); - private final AtomicBoolean isRunning = new AtomicBoolean(false); private final PersistentTasksClusterService persistentTasksClusterService; private final PersistentTasksService persistentTasksService; private final OriginSettingClient client; @@ -79,69 +69,38 @@ public TransportSetUpgradeModeAction( Client client, PersistentTasksService persistentTasksService ) { - super( - SetUpgradeModeAction.NAME, - transportService, - clusterService, - threadPool, - actionFilters, - SetUpgradeModeAction.Request::new, - indexNameExpressionResolver, - EsExecutors.DIRECT_EXECUTOR_SERVICE - ); + super(SetUpgradeModeAction.NAME, "ml", transportService, clusterService, threadPool, actionFilters, indexNameExpressionResolver); this.persistentTasksClusterService = persistentTasksClusterService; this.client = new OriginSettingClient(client, ML_ORIGIN); this.persistentTasksService = persistentTasksService; } @Override - protected void masterOperation( - Task task, - SetUpgradeModeAction.Request request, - ClusterState state, - ActionListener listener - ) throws Exception { - - // Don't want folks spamming this endpoint while it is in progress, only allow one request to be handled at a time - if (isRunning.compareAndSet(false, true) == false) { - String msg = "Attempted to set [upgrade_mode] to [" - + request.isEnabled() - + "] from [" - + MlMetadata.getMlMetadata(state).isUpgradeMode() - + "] while previous request was processing."; - logger.info(msg); - Exception detail = new IllegalStateException(msg); - listener.onFailure( - new ElasticsearchStatusException( - "Cannot change [upgrade_mode]. Previous request is still being processed.", - RestStatus.TOO_MANY_REQUESTS, - detail - ) - ); - return; - } + protected String featureName() { + return "ml-set-upgrade-mode"; + } - // Noop, nothing for us to do, simply return fast to the caller - if (request.isEnabled() == MlMetadata.getMlMetadata(state).isUpgradeMode()) { - logger.info("Upgrade mode noop"); - isRunning.set(false); - listener.onResponse(AcknowledgedResponse.TRUE); - return; - } + @Override + protected boolean upgradeMode(ClusterState state) { + return MlMetadata.getMlMetadata(state).isUpgradeMode(); + } - logger.info( - "Starting to set [upgrade_mode] to [" + request.isEnabled() + "] from [" + MlMetadata.getMlMetadata(state).isUpgradeMode() + "]" - ); + @Override + protected ClusterState createUpdatedState(SetUpgradeModeActionRequest request, ClusterState currentState) { + logger.trace("Executing cluster state update"); + MlMetadata.Builder builder = new MlMetadata.Builder(currentState.metadata().custom(MlMetadata.TYPE)); + builder.isUpgradeMode(request.enabled()); + ClusterState.Builder newState = ClusterState.builder(currentState); + newState.metadata(Metadata.builder(currentState.getMetadata()).putCustom(MlMetadata.TYPE, builder.build()).build()); + return newState.build(); + } - ActionListener wrappedListener = ActionListener.wrap(r -> { - logger.info("Completed upgrade mode request"); - isRunning.set(false); - listener.onResponse(r); - }, e -> { - logger.info("Completed upgrade mode request but with failure", e); - isRunning.set(false); - listener.onFailure(e); - }); + protected void upgradeModeSuccessfullyChanged( + Task task, + SetUpgradeModeActionRequest request, + ClusterState state, + ActionListener wrappedListener + ) { final PersistentTasksCustomMetadata tasksCustomMetadata = state.metadata().custom(PersistentTasksCustomMetadata.TYPE); // <4> We have unassigned the tasks, respond to the listener. @@ -201,71 +160,29 @@ protected void masterOperation( */ - ActionListener clusterStateUpdateListener = ActionListener.wrap(acknowledgedResponse -> { - // State change was not acknowledged, we either timed out or ran into some exception - // We should not continue and alert failure to the end user - if (acknowledgedResponse.isAcknowledged() == false) { - logger.info("Cluster state update is NOT acknowledged"); - wrappedListener.onFailure(new ElasticsearchTimeoutException("Unknown error occurred while updating cluster state")); - return; - } - - // There are no tasks to worry about starting/stopping - if (tasksCustomMetadata == null || tasksCustomMetadata.tasks().isEmpty()) { - logger.info("No tasks to worry about after state update"); - wrappedListener.onResponse(AcknowledgedResponse.TRUE); - return; - } - - // Did we change from disabled -> enabled? - if (request.isEnabled()) { - logger.info("Enabling upgrade mode, must isolate datafeeds"); - isolateDatafeeds(tasksCustomMetadata, isolateDatafeedListener); - } else { - logger.info("Disabling upgrade mode, must wait for tasks to not have AWAITING_UPGRADE assignment"); - persistentTasksService.waitForPersistentTasksCondition( - // Wait for jobs, datafeeds and analytics not to be "Awaiting upgrade" - persistentTasksCustomMetadata -> persistentTasksCustomMetadata.tasks() - .stream() - .noneMatch(t -> ML_TASK_NAMES.contains(t.getTaskName()) && t.getAssignment().equals(AWAITING_UPGRADE)), - request.ackTimeout(), - ActionListener.wrap(r -> { - logger.info("Done waiting for tasks to be out of AWAITING_UPGRADE"); - wrappedListener.onResponse(AcknowledgedResponse.TRUE); - }, wrappedListener::onFailure) - ); - } - }, wrappedListener::onFailure); - - // <1> Change MlMetadata to indicate that upgrade_mode is now enabled - submitUnbatchedTask("ml-set-upgrade-mode", new AckedClusterStateUpdateTask(request, clusterStateUpdateListener) { - - @Override - protected AcknowledgedResponse newResponse(boolean acknowledged) { - logger.trace("Cluster update response built: " + acknowledged); - return AcknowledgedResponse.of(acknowledged); - } - - @Override - public ClusterState execute(ClusterState currentState) throws Exception { - logger.trace("Executing cluster state update"); - MlMetadata.Builder builder = new MlMetadata.Builder(currentState.metadata().custom(MlMetadata.TYPE)); - builder.isUpgradeMode(request.isEnabled()); - ClusterState.Builder newState = ClusterState.builder(currentState); - newState.metadata(Metadata.builder(currentState.getMetadata()).putCustom(MlMetadata.TYPE, builder.build()).build()); - return newState.build(); - } - }); - } - - @SuppressForbidden(reason = "legacy usage of unbatched task") // TODO add support for batching here - private void submitUnbatchedTask(@SuppressWarnings("SameParameterValue") String source, ClusterStateUpdateTask task) { - clusterService.submitUnbatchedStateUpdateTask(source, task); - } + if (tasksCustomMetadata == null || tasksCustomMetadata.tasks().isEmpty()) { + logger.info("No tasks to worry about after state update"); + wrappedListener.onResponse(AcknowledgedResponse.TRUE); + return; + } - @Override - protected ClusterBlockException checkBlock(SetUpgradeModeAction.Request request, ClusterState state) { - return state.blocks().globalBlockedException(ClusterBlockLevel.METADATA_WRITE); + if (request.enabled()) { + logger.info("Enabling upgrade mode, must isolate datafeeds"); + isolateDatafeeds(tasksCustomMetadata, isolateDatafeedListener); + } else { + logger.info("Disabling upgrade mode, must wait for tasks to not have AWAITING_UPGRADE assignment"); + persistentTasksService.waitForPersistentTasksCondition( + // Wait for jobs, datafeeds and analytics not to be "Awaiting upgrade" + persistentTasksCustomMetadata -> persistentTasksCustomMetadata.tasks() + .stream() + .noneMatch(t -> ML_TASK_NAMES.contains(t.getTaskName()) && t.getAssignment().equals(AWAITING_UPGRADE)), + request.ackTimeout(), + ActionListener.wrap(r -> { + logger.info("Done waiting for tasks to be out of AWAITING_UPGRADE"); + wrappedListener.onResponse(AcknowledgedResponse.TRUE); + }, wrappedListener::onFailure) + ); + } } /** From 49b707b1d6958f6593419bf936f3764bcc4a4432 Mon Sep 17 00:00:00 2001 From: Simon Cooper Date: Mon, 2 Dec 2024 14:34:58 +0000 Subject: [PATCH 103/139] Remove some old metadata serialization conditions (#117825) --- .../cluster/metadata/IndexMetadata.java | 4 +- .../cluster/metadata/Metadata.java | 89 +++++-------------- 2 files changed, 22 insertions(+), 71 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/metadata/IndexMetadata.java b/server/src/main/java/org/elasticsearch/cluster/metadata/IndexMetadata.java index b7c1ee5fbad96..681ea84513088 100644 --- a/server/src/main/java/org/elasticsearch/cluster/metadata/IndexMetadata.java +++ b/server/src/main/java/org/elasticsearch/cluster/metadata/IndexMetadata.java @@ -1785,7 +1785,7 @@ public static IndexMetadata readFrom(StreamInput in, @Nullable Function diff(Metadata previousState) { } public static Diff readDiffFrom(StreamInput in) throws IOException { - if (in.getTransportVersion().onOrAfter(MetadataDiff.NOOP_METADATA_DIFF_VERSION) && in.readBoolean()) { - return SimpleDiffable.empty(); - } - return new MetadataDiff(in); + return in.readBoolean() ? SimpleDiffable.empty() : new MetadataDiff(in); } public static Metadata fromXContent(XContentParser parser) throws IOException { @@ -1552,10 +1547,6 @@ public Map getMappingsByHash() { private static class MetadataDiff implements Diff { - private static final TransportVersion NOOP_METADATA_DIFF_VERSION = TransportVersions.V_8_5_0; - private static final TransportVersion NOOP_METADATA_DIFF_SAFE_VERSION = - PublicationTransportHandler.INCLUDES_LAST_COMMITTED_DATA_VERSION; - private final long version; private final String clusterUUID; private final boolean clusterUUIDCommitted; @@ -1620,36 +1611,19 @@ private MetadataDiff(StreamInput in) throws IOException { coordinationMetadata = new CoordinationMetadata(in); transientSettings = Settings.readSettingsFromStream(in); persistentSettings = Settings.readSettingsFromStream(in); - if (in.getTransportVersion().onOrAfter(TransportVersions.V_7_3_0)) { - hashesOfConsistentSettings = DiffableStringMap.readDiffFrom(in); - } else { - hashesOfConsistentSettings = DiffableStringMap.DiffableStringMapDiff.EMPTY; - } + hashesOfConsistentSettings = DiffableStringMap.readDiffFrom(in); indices = DiffableUtils.readImmutableOpenMapDiff(in, DiffableUtils.getStringKeySerializer(), INDEX_METADATA_DIFF_VALUE_READER); templates = DiffableUtils.readImmutableOpenMapDiff(in, DiffableUtils.getStringKeySerializer(), TEMPLATES_DIFF_VALUE_READER); customs = DiffableUtils.readImmutableOpenMapDiff(in, DiffableUtils.getStringKeySerializer(), CUSTOM_VALUE_SERIALIZER); - if (in.getTransportVersion().onOrAfter(TransportVersions.V_8_4_0)) { - reservedStateMetadata = DiffableUtils.readJdkMapDiff( - in, - DiffableUtils.getStringKeySerializer(), - RESERVED_DIFF_VALUE_READER - ); - } else { - reservedStateMetadata = DiffableUtils.emptyDiff(); - } + reservedStateMetadata = DiffableUtils.readJdkMapDiff(in, DiffableUtils.getStringKeySerializer(), RESERVED_DIFF_VALUE_READER); } @Override public void writeTo(StreamOutput out) throws IOException { - if (out.getTransportVersion().onOrAfter(NOOP_METADATA_DIFF_SAFE_VERSION)) { - out.writeBoolean(empty); - if (empty) { - // noop diff - return; - } - } else if (out.getTransportVersion().onOrAfter(NOOP_METADATA_DIFF_VERSION)) { - // noops are not safe with these versions, see #92259 - out.writeBoolean(false); + out.writeBoolean(empty); + if (empty) { + // noop diff + return; } out.writeString(clusterUUID); out.writeBoolean(clusterUUIDCommitted); @@ -1657,15 +1631,11 @@ public void writeTo(StreamOutput out) throws IOException { coordinationMetadata.writeTo(out); transientSettings.writeTo(out); persistentSettings.writeTo(out); - if (out.getTransportVersion().onOrAfter(TransportVersions.V_7_3_0)) { - hashesOfConsistentSettings.writeTo(out); - } + hashesOfConsistentSettings.writeTo(out); indices.writeTo(out); templates.writeTo(out); customs.writeTo(out); - if (out.getTransportVersion().onOrAfter(TransportVersions.V_8_4_0)) { - reservedStateMetadata.writeTo(out); - } + reservedStateMetadata.writeTo(out); } @Override @@ -1696,8 +1666,6 @@ public Metadata apply(Metadata part) { } } - public static final TransportVersion MAPPINGS_AS_HASH_VERSION = TransportVersions.V_8_1_0; - public static Metadata readFrom(StreamInput in) throws IOException { Builder builder = new Builder(); builder.version = in.readLong(); @@ -1706,17 +1674,11 @@ public static Metadata readFrom(StreamInput in) throws IOException { builder.coordinationMetadata(new CoordinationMetadata(in)); builder.transientSettings(readSettingsFromStream(in)); builder.persistentSettings(readSettingsFromStream(in)); - if (in.getTransportVersion().onOrAfter(TransportVersions.V_7_3_0)) { - builder.hashesOfConsistentSettings(DiffableStringMap.readFrom(in)); - } + builder.hashesOfConsistentSettings(DiffableStringMap.readFrom(in)); final Function mappingLookup; - if (in.getTransportVersion().onOrAfter(MAPPINGS_AS_HASH_VERSION)) { - final Map mappingMetadataMap = in.readMapValues(MappingMetadata::new, MappingMetadata::getSha256); - if (mappingMetadataMap.size() > 0) { - mappingLookup = mappingMetadataMap::get; - } else { - mappingLookup = null; - } + final Map mappingMetadataMap = in.readMapValues(MappingMetadata::new, MappingMetadata::getSha256); + if (mappingMetadataMap.isEmpty() == false) { + mappingLookup = mappingMetadataMap::get; } else { mappingLookup = null; } @@ -1733,11 +1695,9 @@ public static Metadata readFrom(StreamInput in) throws IOException { Custom customIndexMetadata = in.readNamedWriteable(Custom.class); builder.putCustom(customIndexMetadata.getWriteableName(), customIndexMetadata); } - if (in.getTransportVersion().onOrAfter(TransportVersions.V_8_4_0)) { - int reservedStateSize = in.readVInt(); - for (int i = 0; i < reservedStateSize; i++) { - builder.put(ReservedStateMetadata.readFrom(in)); - } + int reservedStateSize = in.readVInt(); + for (int i = 0; i < reservedStateSize; i++) { + builder.put(ReservedStateMetadata.readFrom(in)); } return builder.build(); } @@ -1750,24 +1710,15 @@ public void writeTo(StreamOutput out) throws IOException { coordinationMetadata.writeTo(out); transientSettings.writeTo(out); persistentSettings.writeTo(out); - if (out.getTransportVersion().onOrAfter(TransportVersions.V_7_3_0)) { - hashesOfConsistentSettings.writeTo(out); - } - // Starting in #MAPPINGS_AS_HASH_VERSION we write the mapping metadata first and then write the indices without metadata so that - // we avoid writing duplicate mappings twice - if (out.getTransportVersion().onOrAfter(MAPPINGS_AS_HASH_VERSION)) { - out.writeMapValues(mappingsByHash); - } + hashesOfConsistentSettings.writeTo(out); + out.writeMapValues(mappingsByHash); out.writeVInt(indices.size()); - final boolean writeMappingsHash = out.getTransportVersion().onOrAfter(MAPPINGS_AS_HASH_VERSION); for (IndexMetadata indexMetadata : this) { - indexMetadata.writeTo(out, writeMappingsHash); + indexMetadata.writeTo(out, true); } out.writeCollection(templates.values()); VersionedNamedWriteable.writeVersionedWritables(out, customs); - if (out.getTransportVersion().onOrAfter(TransportVersions.V_8_4_0)) { - out.writeCollection(reservedStateMetadata.values()); - } + out.writeCollection(reservedStateMetadata.values()); } public static Builder builder() { From ddc8b959ee7b028ffa8b3d5c9d90b5271d72a3cd Mon Sep 17 00:00:00 2001 From: Jan Kuipers <148754765+jan-elastic@users.noreply.github.com> Date: Mon, 2 Dec 2024 16:41:02 +0100 Subject: [PATCH 104/139] ES|QL categorize docs (#117827) * Move ES|QL categorize out of snapshot functions * Categorize docs * Add experimental + fix docs * Add experimental + fix docs --- .../functions/description/categorize.asciidoc | 2 +- .../esql/functions/examples/categorize.asciidoc | 14 ++++++++++++++ .../esql/functions/grouping-functions.asciidoc | 2 ++ .../functions/kibana/definition/categorize.json | 9 ++++++--- .../esql/functions/kibana/docs/categorize.md | 6 +++++- .../esql/functions/layout/categorize.asciidoc | 3 +++ .../src/main/resources/docs.csv-spec | 17 +++++++++++++++++ .../function/EsqlFunctionRegistry.java | 5 +++-- .../function/grouping/Categorize.java | 14 +++++++++++++- 9 files changed, 64 insertions(+), 8 deletions(-) create mode 100644 docs/reference/esql/functions/examples/categorize.asciidoc diff --git a/docs/reference/esql/functions/description/categorize.asciidoc b/docs/reference/esql/functions/description/categorize.asciidoc index b6574c1855505..a5e8e2d507574 100644 --- a/docs/reference/esql/functions/description/categorize.asciidoc +++ b/docs/reference/esql/functions/description/categorize.asciidoc @@ -2,4 +2,4 @@ *Description* -Categorizes text messages. +Groups text messages into categories of similarly formatted text values. diff --git a/docs/reference/esql/functions/examples/categorize.asciidoc b/docs/reference/esql/functions/examples/categorize.asciidoc new file mode 100644 index 0000000000000..4167be6910c89 --- /dev/null +++ b/docs/reference/esql/functions/examples/categorize.asciidoc @@ -0,0 +1,14 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +*Example* + +This example categorizes server logs messages into categories and aggregates their counts. +[source.merge.styled,esql] +---- +include::{esql-specs}/docs.csv-spec[tag=docsCategorize] +---- +[%header.monospaced.styled,format=dsv,separator=|] +|=== +include::{esql-specs}/docs.csv-spec[tag=docsCategorize-result] +|=== + diff --git a/docs/reference/esql/functions/grouping-functions.asciidoc b/docs/reference/esql/functions/grouping-functions.asciidoc index ed0caf5ec2a4c..839320ce23392 100644 --- a/docs/reference/esql/functions/grouping-functions.asciidoc +++ b/docs/reference/esql/functions/grouping-functions.asciidoc @@ -9,6 +9,8 @@ The <> command supports these grouping functions: // tag::group_list[] * <> +* experimental:[] <> // end::group_list[] include::layout/bucket.asciidoc[] +include::layout/categorize.asciidoc[] diff --git a/docs/reference/esql/functions/kibana/definition/categorize.json b/docs/reference/esql/functions/kibana/definition/categorize.json index ca3971a6e05a3..ed5fa15232b85 100644 --- a/docs/reference/esql/functions/kibana/definition/categorize.json +++ b/docs/reference/esql/functions/kibana/definition/categorize.json @@ -2,7 +2,7 @@ "comment" : "This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.", "type" : "eval", "name" : "categorize", - "description" : "Categorizes text messages.", + "description" : "Groups text messages into categories of similarly formatted text values.", "signatures" : [ { "params" : [ @@ -29,6 +29,9 @@ "returnType" : "keyword" } ], - "preview" : false, - "snapshot_only" : true + "examples" : [ + "FROM sample_data\n| STATS count=COUNT() BY category=CATEGORIZE(message)" + ], + "preview" : true, + "snapshot_only" : false } diff --git a/docs/reference/esql/functions/kibana/docs/categorize.md b/docs/reference/esql/functions/kibana/docs/categorize.md index f59151b5bee65..80c04b79084e9 100644 --- a/docs/reference/esql/functions/kibana/docs/categorize.md +++ b/docs/reference/esql/functions/kibana/docs/categorize.md @@ -3,5 +3,9 @@ This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../READ --> ### CATEGORIZE -Categorizes text messages. +Groups text messages into categories of similarly formatted text values. +``` +FROM sample_data +| STATS count=COUNT() BY category=CATEGORIZE(message) +``` diff --git a/docs/reference/esql/functions/layout/categorize.asciidoc b/docs/reference/esql/functions/layout/categorize.asciidoc index c547362b71ab0..4075949ab4d12 100644 --- a/docs/reference/esql/functions/layout/categorize.asciidoc +++ b/docs/reference/esql/functions/layout/categorize.asciidoc @@ -4,6 +4,8 @@ [[esql-categorize]] === `CATEGORIZE` +preview::["Do not use on production environments. This functionality is in technical preview and may be changed or removed in a future release. Elastic will work to fix any issues, but features in technical preview are not subject to the support SLA of official GA features."] + *Syntax* [.text-center] @@ -12,3 +14,4 @@ image::esql/functions/signature/categorize.svg[Embedded,opts=inline] include::../parameters/categorize.asciidoc[] include::../description/categorize.asciidoc[] include::../types/categorize.asciidoc[] +include::../examples/categorize.asciidoc[] diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/docs.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/docs.csv-spec index a6e1a771374ca..24baf1263d06a 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/docs.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/docs.csv-spec @@ -676,3 +676,20 @@ Ahmedabad | 9 | 72 Bangalore | 9 | 72 // end::bitLength-result[] ; + +docsCategorize +required_capability: categorize_v4 +// tag::docsCategorize[] +FROM sample_data +| STATS count=COUNT() BY category=CATEGORIZE(message) +// end::docsCategorize[] +| SORT category +; + +// tag::docsCategorize-result[] +count:long | category:keyword + 3 | .*?Connected.+?to.*? + 3 | .*?Connection.+?error.*? + 1 | .*?Disconnected.*? +// end::docsCategorize-result[] +; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java index 3d26bc170b723..37b159922906c 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java @@ -265,7 +265,9 @@ public Collection listFunctions(String pattern) { private static FunctionDefinition[][] functions() { return new FunctionDefinition[][] { // grouping functions - new FunctionDefinition[] { def(Bucket.class, Bucket::new, "bucket", "bin"), }, + new FunctionDefinition[] { + def(Bucket.class, Bucket::new, "bucket", "bin"), + def(Categorize.class, Categorize::new, "categorize") }, // aggregate functions // since they declare two public constructors - one with filter (for nested where) and one without // use casting to disambiguate between the two @@ -411,7 +413,6 @@ private static FunctionDefinition[][] snapshotFunctions() { // The delay() function is for debug/snapshot environments only and should never be enabled in a non-snapshot build. // This is an experimental function and can be removed without notice. def(Delay.class, Delay::new, "delay"), - def(Categorize.class, Categorize::new, "categorize"), def(Kql.class, Kql::new, "kql"), def(Rate.class, Rate::withUnresolvedTimestamp, "rate") } }; } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/Categorize.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/Categorize.java index 63b5073c2217a..ca0447ce11ec4 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/Categorize.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/grouping/Categorize.java @@ -16,6 +16,7 @@ import org.elasticsearch.xpack.esql.core.tree.NodeInfo; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.expression.function.Example; import org.elasticsearch.xpack.esql.expression.function.FunctionInfo; import org.elasticsearch.xpack.esql.expression.function.Param; import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput; @@ -44,10 +45,21 @@ public class Categorize extends GroupingFunction implements Validatable { private final Expression field; - @FunctionInfo(returnType = "keyword", description = "Categorizes text messages.") + @FunctionInfo( + returnType = "keyword", + description = "Groups text messages into categories of similarly formatted text values.", + examples = { + @Example( + file = "docs", + tag = "docsCategorize", + description = "This example categorizes server logs messages into categories and aggregates their counts. " + ) }, + preview = true + ) public Categorize( Source source, @Param(name = "field", type = { "text", "keyword" }, description = "Expression to categorize") Expression field + ) { super(source, List.of(field)); this.field = field; From 9d9a136f3d104382cc0c8de3fc42a97b7ddbfcdd Mon Sep 17 00:00:00 2001 From: Dan Rubinstein Date: Mon, 2 Dec 2024 11:32:12 -0500 Subject: [PATCH 105/139] Unmuting chunking test (#116837) Co-authored-by: Elastic Machine --- .../xpack/inference/qa/mixed/OpenAIServiceMixedIT.java | 1 - 1 file changed, 1 deletion(-) diff --git a/x-pack/plugin/inference/qa/mixed-cluster/src/javaRestTest/java/org/elasticsearch/xpack/inference/qa/mixed/OpenAIServiceMixedIT.java b/x-pack/plugin/inference/qa/mixed-cluster/src/javaRestTest/java/org/elasticsearch/xpack/inference/qa/mixed/OpenAIServiceMixedIT.java index d8d5eb49c3c00..b37bd1801b331 100644 --- a/x-pack/plugin/inference/qa/mixed-cluster/src/javaRestTest/java/org/elasticsearch/xpack/inference/qa/mixed/OpenAIServiceMixedIT.java +++ b/x-pack/plugin/inference/qa/mixed-cluster/src/javaRestTest/java/org/elasticsearch/xpack/inference/qa/mixed/OpenAIServiceMixedIT.java @@ -54,7 +54,6 @@ public static void shutdown() { openAiChatCompletionsServer.close(); } - @AwaitsFix(bugUrl = "Backport #112074 to 8.16") @SuppressWarnings("unchecked") public void testOpenAiEmbeddings() throws IOException { var openAiEmbeddingsSupported = bwcVersion.onOrAfter(Version.fromString(OPEN_AI_EMBEDDINGS_ADDED)); From 187935eb77e31493ce98512396dd1f38d9ce41e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20B=C3=BCscher?= Date: Mon, 2 Dec 2024 17:37:03 +0100 Subject: [PATCH 106/139] Fix OldRepositoryAccessIT testOldRepoAccess (#117649) This test started failing with the changes made in #115314 when we only have one shard in the index. This change adjusts test expectations. Closes #115631 --- muted-tests.yml | 3 --- .../java/org/elasticsearch/oldrepos/OldRepositoryAccessIT.java | 3 ++- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/muted-tests.yml b/muted-tests.yml index d01b956db9199..73d9a29e275b3 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -91,9 +91,6 @@ tests: - class: org.elasticsearch.xpack.restart.MLModelDeploymentFullClusterRestartIT method: testDeploymentSurvivesRestart {cluster=UPGRADED} issue: https://github.com/elastic/elasticsearch/issues/115528 -- class: org.elasticsearch.oldrepos.OldRepositoryAccessIT - method: testOldRepoAccess - issue: https://github.com/elastic/elasticsearch/issues/115631 - class: org.elasticsearch.action.update.UpdateResponseTests method: testToAndFromXContent issue: https://github.com/elastic/elasticsearch/issues/115689 diff --git a/x-pack/qa/repository-old-versions/src/test/java/org/elasticsearch/oldrepos/OldRepositoryAccessIT.java b/x-pack/qa/repository-old-versions/src/test/java/org/elasticsearch/oldrepos/OldRepositoryAccessIT.java index 30ec6630b9618..ef1c8284b9c19 100644 --- a/x-pack/qa/repository-old-versions/src/test/java/org/elasticsearch/oldrepos/OldRepositoryAccessIT.java +++ b/x-pack/qa/repository-old-versions/src/test/java/org/elasticsearch/oldrepos/OldRepositoryAccessIT.java @@ -484,7 +484,8 @@ private void assertDocs( logger.info(searchResponse); assertEquals(0, searchResponse.getHits().getTotalHits().value()); assertEquals(numberOfShards, searchResponse.getSuccessfulShards()); - assertEquals(numberOfShards, searchResponse.getSkippedShards()); + int expectedSkips = numberOfShards == 1 ? 0 : numberOfShards; + assertEquals(expectedSkips, searchResponse.getSkippedShards()); } finally { searchResponse.decRef(); } From e10fc3c90dc18da0b6dd02a06113899e0be0c5de Mon Sep 17 00:00:00 2001 From: Benjamin Trent Date: Mon, 2 Dec 2024 12:19:03 -0500 Subject: [PATCH 107/139] Speed up bit compared with floats or bytes script operations (#117199) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of doing an "if" statement, which doesn't lend itself to vectorization, I switched to expand to the bits and multiply the 1s and 0s. This led to a marginal speed improvement on ARM. I expect that Panama vector could be used here to be even faster, but I didn't want to spend anymore time on this for the time being. ``` Benchmark (dims) Mode Cnt Score Error Units IpBitVectorScorerBenchmark.dotProductByteIfStatement 768 thrpt 5 2.952 ± 0.026 ops/us IpBitVectorScorerBenchmark.dotProductByteUnwrap 768 thrpt 5 4.017 ± 0.068 ops/us IpBitVectorScorerBenchmark.dotProductFloatIfStatement 768 thrpt 5 2.987 ± 0.124 ops/us IpBitVectorScorerBenchmark.dotProductFloatUnwrap 768 thrpt 5 4.726 ± 0.136 ops/us ``` Benchmark I used. https://gist.github.com/benwtrent/b0edb3975d2f03356c1a5ea84c72abc9 --- docs/changelog/117199.yaml | 5 ++ .../elasticsearch/simdvec/ESVectorUtil.java | 23 +------ .../DefaultESVectorUtilSupport.java | 65 +++++++++++++++++++ .../vectorization/ESVectorUtilSupport.java | 4 ++ .../PanamaESVectorUtilSupport.java | 10 +++ 5 files changed, 86 insertions(+), 21 deletions(-) create mode 100644 docs/changelog/117199.yaml diff --git a/docs/changelog/117199.yaml b/docs/changelog/117199.yaml new file mode 100644 index 0000000000000..b685e98b61f6b --- /dev/null +++ b/docs/changelog/117199.yaml @@ -0,0 +1,5 @@ +pr: 117199 +summary: Speed up bit compared with floats or bytes script operations +area: Vector Search +type: enhancement +issues: [] diff --git a/libs/simdvec/src/main/java/org/elasticsearch/simdvec/ESVectorUtil.java b/libs/simdvec/src/main/java/org/elasticsearch/simdvec/ESVectorUtil.java index 2f4743a47a14a..7fe475e86a2f5 100644 --- a/libs/simdvec/src/main/java/org/elasticsearch/simdvec/ESVectorUtil.java +++ b/libs/simdvec/src/main/java/org/elasticsearch/simdvec/ESVectorUtil.java @@ -61,17 +61,7 @@ public static int ipByteBit(byte[] q, byte[] d) { if (q.length != d.length * Byte.SIZE) { throw new IllegalArgumentException("vector dimensions incompatible: " + q.length + "!= " + Byte.SIZE + " x " + d.length); } - int result = 0; - // now combine the two vectors, summing the byte dimensions where the bit in d is `1` - for (int i = 0; i < d.length; i++) { - byte mask = d[i]; - for (int j = Byte.SIZE - 1; j >= 0; j--) { - if ((mask & (1 << j)) != 0) { - result += q[i * Byte.SIZE + Byte.SIZE - 1 - j]; - } - } - } - return result; + return IMPL.ipByteBit(q, d); } /** @@ -87,16 +77,7 @@ public static float ipFloatBit(float[] q, byte[] d) { if (q.length != d.length * Byte.SIZE) { throw new IllegalArgumentException("vector dimensions incompatible: " + q.length + "!= " + Byte.SIZE + " x " + d.length); } - float result = 0; - for (int i = 0; i < d.length; i++) { - byte mask = d[i]; - for (int j = Byte.SIZE - 1; j >= 0; j--) { - if ((mask & (1 << j)) != 0) { - result += q[i * Byte.SIZE + Byte.SIZE - 1 - j]; - } - } - } - return result; + return IMPL.ipFloatBit(q, d); } /** diff --git a/libs/simdvec/src/main/java/org/elasticsearch/simdvec/internal/vectorization/DefaultESVectorUtilSupport.java b/libs/simdvec/src/main/java/org/elasticsearch/simdvec/internal/vectorization/DefaultESVectorUtilSupport.java index 4a08096119d6a..00381c8c3fb2f 100644 --- a/libs/simdvec/src/main/java/org/elasticsearch/simdvec/internal/vectorization/DefaultESVectorUtilSupport.java +++ b/libs/simdvec/src/main/java/org/elasticsearch/simdvec/internal/vectorization/DefaultESVectorUtilSupport.java @@ -10,9 +10,18 @@ package org.elasticsearch.simdvec.internal.vectorization; import org.apache.lucene.util.BitUtil; +import org.apache.lucene.util.Constants; final class DefaultESVectorUtilSupport implements ESVectorUtilSupport { + private static float fma(float a, float b, float c) { + if (Constants.HAS_FAST_SCALAR_FMA) { + return Math.fma(a, b, c); + } else { + return a * b + c; + } + } + DefaultESVectorUtilSupport() {} @Override @@ -20,6 +29,62 @@ public long ipByteBinByte(byte[] q, byte[] d) { return ipByteBinByteImpl(q, d); } + @Override + public int ipByteBit(byte[] q, byte[] d) { + return ipByteBitImpl(q, d); + } + + @Override + public float ipFloatBit(float[] q, byte[] d) { + return ipFloatBitImpl(q, d); + } + + public static int ipByteBitImpl(byte[] q, byte[] d) { + assert q.length == d.length * Byte.SIZE; + int acc0 = 0; + int acc1 = 0; + int acc2 = 0; + int acc3 = 0; + // now combine the two vectors, summing the byte dimensions where the bit in d is `1` + for (int i = 0; i < d.length; i++) { + byte mask = d[i]; + // Make sure its just 1 or 0 + + acc0 += q[i * Byte.SIZE + 0] * ((mask >> 7) & 1); + acc1 += q[i * Byte.SIZE + 1] * ((mask >> 6) & 1); + acc2 += q[i * Byte.SIZE + 2] * ((mask >> 5) & 1); + acc3 += q[i * Byte.SIZE + 3] * ((mask >> 4) & 1); + + acc0 += q[i * Byte.SIZE + 4] * ((mask >> 3) & 1); + acc1 += q[i * Byte.SIZE + 5] * ((mask >> 2) & 1); + acc2 += q[i * Byte.SIZE + 6] * ((mask >> 1) & 1); + acc3 += q[i * Byte.SIZE + 7] * ((mask >> 0) & 1); + } + return acc0 + acc1 + acc2 + acc3; + } + + public static float ipFloatBitImpl(float[] q, byte[] d) { + assert q.length == d.length * Byte.SIZE; + float acc0 = 0; + float acc1 = 0; + float acc2 = 0; + float acc3 = 0; + // now combine the two vectors, summing the byte dimensions where the bit in d is `1` + for (int i = 0; i < d.length; i++) { + byte mask = d[i]; + acc0 = fma(q[i * Byte.SIZE + 0], (mask >> 7) & 1, acc0); + acc1 = fma(q[i * Byte.SIZE + 1], (mask >> 6) & 1, acc1); + acc2 = fma(q[i * Byte.SIZE + 2], (mask >> 5) & 1, acc2); + acc3 = fma(q[i * Byte.SIZE + 3], (mask >> 4) & 1, acc3); + + acc0 = fma(q[i * Byte.SIZE + 4], (mask >> 3) & 1, acc0); + acc1 = fma(q[i * Byte.SIZE + 5], (mask >> 2) & 1, acc1); + acc2 = fma(q[i * Byte.SIZE + 6], (mask >> 1) & 1, acc2); + acc3 = fma(q[i * Byte.SIZE + 7], (mask >> 0) & 1, acc3); + } + return acc0 + acc1 + acc2 + acc3; + } + public static long ipByteBinByteImpl(byte[] q, byte[] d) { long ret = 0; int size = d.length; diff --git a/libs/simdvec/src/main/java/org/elasticsearch/simdvec/internal/vectorization/ESVectorUtilSupport.java b/libs/simdvec/src/main/java/org/elasticsearch/simdvec/internal/vectorization/ESVectorUtilSupport.java index d7611173ca693..6938bffec5f37 100644 --- a/libs/simdvec/src/main/java/org/elasticsearch/simdvec/internal/vectorization/ESVectorUtilSupport.java +++ b/libs/simdvec/src/main/java/org/elasticsearch/simdvec/internal/vectorization/ESVectorUtilSupport.java @@ -14,4 +14,8 @@ public interface ESVectorUtilSupport { short B_QUERY = 4; long ipByteBinByte(byte[] q, byte[] d); + + int ipByteBit(byte[] q, byte[] d); + + float ipFloatBit(float[] q, byte[] d); } diff --git a/libs/simdvec/src/main21/java/org/elasticsearch/simdvec/internal/vectorization/PanamaESVectorUtilSupport.java b/libs/simdvec/src/main21/java/org/elasticsearch/simdvec/internal/vectorization/PanamaESVectorUtilSupport.java index 0e5827d046736..4de33643258e4 100644 --- a/libs/simdvec/src/main21/java/org/elasticsearch/simdvec/internal/vectorization/PanamaESVectorUtilSupport.java +++ b/libs/simdvec/src/main21/java/org/elasticsearch/simdvec/internal/vectorization/PanamaESVectorUtilSupport.java @@ -48,6 +48,16 @@ public long ipByteBinByte(byte[] q, byte[] d) { return DefaultESVectorUtilSupport.ipByteBinByteImpl(q, d); } + @Override + public int ipByteBit(byte[] q, byte[] d) { + return DefaultESVectorUtilSupport.ipByteBitImpl(q, d); + } + + @Override + public float ipFloatBit(float[] q, byte[] d) { + return DefaultESVectorUtilSupport.ipFloatBitImpl(q, d); + } + private static final VectorSpecies BYTE_SPECIES_128 = ByteVector.SPECIES_128; private static final VectorSpecies BYTE_SPECIES_256 = ByteVector.SPECIES_256; From 913e0fbca87d5a77951194460859979f4c890b80 Mon Sep 17 00:00:00 2001 From: Mark Tozzi Date: Mon, 2 Dec 2024 14:08:07 -0500 Subject: [PATCH 108/139] ESQL Date Nanos Addition and Subtraction (#116839) Resolves #109995 This adds support and tests for addition and subtraction of date nanos with periods and durations. It does not include support for date_diff, which is a separate ticket (#109999). The bulk of the PR is testing, the actual date math is all handled by library functions. --------- Co-authored-by: Elastic Machine --- .../esql/functions/kibana/definition/add.json | 72 ++++ .../esql/functions/kibana/definition/sub.json | 72 ++++ .../esql/functions/types/add.asciidoc | 4 + .../esql/functions/types/sub.asciidoc | 4 + .../xpack/esql/core/type/DataType.java | 8 + .../src/main/resources/date_nanos.csv-spec | 401 ++++++++++++++++++ .../arithmetic/AddDateNanosEvaluator.java | 142 +++++++ .../arithmetic/SubDateNanosEvaluator.java | 142 +++++++ .../xpack/esql/action/EsqlCapabilities.java | 4 + .../predicate/operator/arithmetic/Add.java | 34 +- .../DateTimeArithmeticOperation.java | 42 +- .../predicate/operator/arithmetic/Sub.java | 28 +- .../esql/type/EsqlDataTypeConverter.java | 7 +- .../xpack/esql/analysis/AnalyzerTests.java | 6 +- .../xpack/esql/analysis/VerifierTests.java | 4 +- .../expression/function/TestCaseSupplier.java | 95 ++++- .../operator/arithmetic/AddTests.java | 55 ++- .../operator/arithmetic/SubTests.java | 72 +++- .../esql/type/EsqlDataTypeConverterTests.java | 21 +- 19 files changed, 1152 insertions(+), 61 deletions(-) create mode 100644 x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/AddDateNanosEvaluator.java create mode 100644 x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/SubDateNanosEvaluator.java diff --git a/docs/reference/esql/functions/kibana/definition/add.json b/docs/reference/esql/functions/kibana/definition/add.json index bd9fbf4d4f9ec..cfb4755a93d59 100644 --- a/docs/reference/esql/functions/kibana/definition/add.json +++ b/docs/reference/esql/functions/kibana/definition/add.json @@ -40,6 +40,42 @@ "variadic" : false, "returnType" : "date" }, + { + "params" : [ + { + "name" : "lhs", + "type" : "date_nanos", + "optional" : false, + "description" : "A numeric value or a date time value." + }, + { + "name" : "rhs", + "type" : "date_period", + "optional" : false, + "description" : "A numeric value or a date time value." + } + ], + "variadic" : false, + "returnType" : "date_nanos" + }, + { + "params" : [ + { + "name" : "lhs", + "type" : "date_nanos", + "optional" : false, + "description" : "A numeric value or a date time value." + }, + { + "name" : "rhs", + "type" : "time_duration", + "optional" : false, + "description" : "A numeric value or a date time value." + } + ], + "variadic" : false, + "returnType" : "date_nanos" + }, { "params" : [ { @@ -58,6 +94,24 @@ "variadic" : false, "returnType" : "date" }, + { + "params" : [ + { + "name" : "lhs", + "type" : "date_period", + "optional" : false, + "description" : "A numeric value or a date time value." + }, + { + "name" : "rhs", + "type" : "date_nanos", + "optional" : false, + "description" : "A numeric value or a date time value." + } + ], + "variadic" : false, + "returnType" : "date_nanos" + }, { "params" : [ { @@ -256,6 +310,24 @@ "variadic" : false, "returnType" : "date" }, + { + "params" : [ + { + "name" : "lhs", + "type" : "time_duration", + "optional" : false, + "description" : "A numeric value or a date time value." + }, + { + "name" : "rhs", + "type" : "date_nanos", + "optional" : false, + "description" : "A numeric value or a date time value." + } + ], + "variadic" : false, + "returnType" : "date_nanos" + }, { "params" : [ { diff --git a/docs/reference/esql/functions/kibana/definition/sub.json b/docs/reference/esql/functions/kibana/definition/sub.json index e10e5a662c8cb..608b5eb1009a7 100644 --- a/docs/reference/esql/functions/kibana/definition/sub.json +++ b/docs/reference/esql/functions/kibana/definition/sub.json @@ -40,6 +40,60 @@ "variadic" : false, "returnType" : "date" }, + { + "params" : [ + { + "name" : "lhs", + "type" : "date_nanos", + "optional" : false, + "description" : "A numeric value or a date time value." + }, + { + "name" : "rhs", + "type" : "date_period", + "optional" : false, + "description" : "A numeric value or a date time value." + } + ], + "variadic" : false, + "returnType" : "date_nanos" + }, + { + "params" : [ + { + "name" : "lhs", + "type" : "date_nanos", + "optional" : false, + "description" : "A numeric value or a date time value." + }, + { + "name" : "rhs", + "type" : "time_duration", + "optional" : false, + "description" : "A numeric value or a date time value." + } + ], + "variadic" : false, + "returnType" : "date_nanos" + }, + { + "params" : [ + { + "name" : "lhs", + "type" : "date_period", + "optional" : false, + "description" : "A numeric value or a date time value." + }, + { + "name" : "rhs", + "type" : "date_nanos", + "optional" : false, + "description" : "A numeric value or a date time value." + } + ], + "variadic" : false, + "returnType" : "date_nanos" + }, { "params" : [ { @@ -220,6 +274,24 @@ "variadic" : false, "returnType" : "long" }, + { + "params" : [ + { + "name" : "lhs", + "type" : "time_duration", + "optional" : false, + "description" : "A numeric value or a date time value." + }, + { + "name" : "rhs", + "type" : "date_nanos", + "optional" : false, + "description" : "A numeric value or a date time value." + } + ], + "variadic" : false, + "returnType" : "date_nanos" + }, { "params" : [ { diff --git a/docs/reference/esql/functions/types/add.asciidoc b/docs/reference/esql/functions/types/add.asciidoc index 54d1aec463c1a..e47a0d81f27e7 100644 --- a/docs/reference/esql/functions/types/add.asciidoc +++ b/docs/reference/esql/functions/types/add.asciidoc @@ -7,7 +7,10 @@ lhs | rhs | result date | date_period | date date | time_duration | date +date_nanos | date_period | date_nanos +date_nanos | time_duration | date_nanos date_period | date | date +date_period | date_nanos | date_nanos date_period | date_period | date_period double | double | double double | integer | double @@ -19,6 +22,7 @@ long | double | double long | integer | long long | long | long time_duration | date | date +time_duration | date_nanos | date_nanos time_duration | time_duration | time_duration unsigned_long | unsigned_long | unsigned_long |=== diff --git a/docs/reference/esql/functions/types/sub.asciidoc b/docs/reference/esql/functions/types/sub.asciidoc index c3ded301ebe68..dca56026071ee 100644 --- a/docs/reference/esql/functions/types/sub.asciidoc +++ b/docs/reference/esql/functions/types/sub.asciidoc @@ -7,6 +7,9 @@ lhs | rhs | result date | date_period | date date | time_duration | date +date_nanos | date_period | date_nanos +date_nanos | time_duration | date_nanos +date_period | date_nanos | date_nanos date_period | date_period | date_period double | double | double double | integer | double @@ -17,6 +20,7 @@ integer | long | long long | double | double long | integer | long long | long | long +time_duration | date_nanos | date_nanos time_duration | time_duration | time_duration unsigned_long | unsigned_long | unsigned_long |=== diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/DataType.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/DataType.java index 1c65dd386667f..a63571093ba58 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/DataType.java +++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/type/DataType.java @@ -415,6 +415,14 @@ public static boolean isDateTimeOrTemporal(DataType t) { return isDateTime(t) || isTemporalAmount(t); } + public static boolean isDateTimeOrNanosOrTemporal(DataType t) { + return isDateTime(t) || isTemporalAmount(t) || t == DATE_NANOS; + } + + public static boolean isMillisOrNanos(DataType t) { + return t == DATETIME || t == DATE_NANOS; + } + public static boolean areCompatible(DataType left, DataType right) { if (left == right) { return true; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/date_nanos.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/date_nanos.csv-spec index 2ee23382515da..daa45825b93fc 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/date_nanos.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/date_nanos.csv-spec @@ -459,3 +459,404 @@ yr:date_nanos | mo:date_nanos | mn:date_nanos 2023-01-01T00:00:00.000000000Z | 2023-10-01T00:00:00.000000000Z | 2023-10-23T12:10:00.000000000Z | 2023-10-23T12:15:03.360000000Z 2023-01-01T00:00:00.000000000Z | 2023-10-01T00:00:00.000000000Z | 2023-10-23T12:10:00.000000000Z | 2023-10-23T12:15:03.360000000Z ; + +Add date nanos +required_capability: date_nanos_add_subtract + +FROM date_nanos +| WHERE millis > "2020-01-01" +| EVAL mo = nanos + 1 month, hr = nanos + 1 hour, dy = nanos - 4 days, mn = nanos - 2 minutes +| SORT millis DESC +| KEEP mo, hr, dy, mn; + +mo:date_nanos | hr:date_nanos | dy:date_nanos | mn:date_nanos +2023-11-23T13:55:01.543123456Z | 2023-10-23T14:55:01.543123456Z | 2023-10-19T13:55:01.543123456Z | 2023-10-23T13:53:01.543123456Z +2023-11-23T13:53:55.832987654Z | 2023-10-23T14:53:55.832987654Z | 2023-10-19T13:53:55.832987654Z | 2023-10-23T13:51:55.832987654Z +2023-11-23T13:52:55.015787878Z | 2023-10-23T14:52:55.015787878Z | 2023-10-19T13:52:55.015787878Z | 2023-10-23T13:50:55.015787878Z +2023-11-23T13:51:54.732102837Z | 2023-10-23T14:51:54.732102837Z | 2023-10-19T13:51:54.732102837Z | 2023-10-23T13:49:54.732102837Z +2023-11-23T13:33:34.937193000Z | 2023-10-23T14:33:34.937193000Z | 2023-10-19T13:33:34.937193000Z | 2023-10-23T13:31:34.937193000Z +2023-11-23T12:27:28.948000000Z | 2023-10-23T13:27:28.948000000Z | 2023-10-19T12:27:28.948000000Z | 2023-10-23T12:25:28.948000000Z +2023-11-23T12:15:03.360103847Z | 2023-10-23T13:15:03.360103847Z | 2023-10-19T12:15:03.360103847Z | 2023-10-23T12:13:03.360103847Z +2023-11-23T12:15:03.360103847Z | 2023-10-23T13:15:03.360103847Z | 2023-10-19T12:15:03.360103847Z | 2023-10-23T12:13:03.360103847Z +; + +datePlusPeriod +required_capability: date_nanos_add_subtract + +row dt = to_date_nanos("2100-01-01T01:01:01.000123456Z") +| eval plus = dt + 4 years + 3 months + 2 weeks + 1 day; + +dt:date_nanos | plus:date_nanos +2100-01-01T01:01:01.000123456Z | 2104-04-16T01:01:01.000123456Z +; + +datePlusPeriodFromLeft +required_capability: date_nanos_add_subtract + +row n = to_date_nanos("2053-04-04T00:00:00.000123456Z") | eval then = 4 years + 3 months + 2 weeks + 1 day + n | keep then; + +then:date_nanos +2057-07-19T00:00:00.000123456Z +; + +datePlusMixedPeriodsFromLeft +required_capability: date_nanos_add_subtract + +row n = to_date_nanos("2053-04-01T00:00:00.000123456Z") +| eval then = 4 years + 3 months + 1 year + 2 weeks + 1 month + 1 day + 1 week + 1 day + n +| keep then; + +then:date_nanos +2058-08-24T00:00:00.000123456Z +; + +datePlusSumOfPeriodsFromLeft +required_capability: date_nanos_add_subtract + +row n = to_date_nanos("2053-04-04T00:00:00.000123456Z") | eval then = (4 years + 3 months + 2 weeks + 1 day) + n | keep then; + +then:date_nanos +2057-07-19T00:00:00.000123456Z +; + +datePlusNegatedPeriod +required_capability: date_nanos_add_subtract + +row dt = to_date_nanos("2104-04-16T01:01:01.000123456Z") +| eval plus = dt + (-(4 years + 3 months + 2 weeks + 1 day)); + +dt:date_nanos | plus:date_nanos +2104-04-16T01:01:01.000123456Z | 2100-01-01T01:01:01.000123456Z +; + +dateMinusPeriod +required_capability: date_nanos_add_subtract + +row dt = to_date_nanos("2104-04-16T01:01:01.000123456Z") +| eval minus = dt - 4 years - 3 months - 2 weeks - 1 day; + +dt:date_nanos | minus:date_nanos +2104-04-16T01:01:01.000123456Z | 2100-01-01T01:01:01.000123456Z +; + +dateMinusPeriodFromLeft +required_capability: date_nanos_add_subtract + +row n = to_date_nanos("2057-07-19T00:00:00.000123456Z") | eval then = -4 years - 3 months - 2 weeks - 1 day + n | keep then; + +then:date_nanos +2053-04-04T00:00:00.000123456Z +; + +dateMinusSumOfNegativePeriods +required_capability: date_nanos_add_subtract + +row n = to_date_nanos("2053-04-04T00:00:00.000123456Z") | eval then = n - (-4 years - 3 months - 2 weeks - 1 day)| keep then; + +then:date_nanos +2057-07-19T00:00:00.000123456Z +; + +dateMinusPeriodsFromLeftMultipleEvals +required_capability: date_nanos_add_subtract + +row n = to_date_nanos("2053-04-04T00:00:00.000123456Z") +| eval x = -4 years + n +| eval y = -3 months + x, then = y + (-2 weeks - 1 day) +| keep then; + +then:date_nanos +2048-12-20T00:00:00.000123456Z +; + +datePlusDuration +required_capability: date_nanos_add_subtract + +row dt = to_date_nanos("2100-01-01T00:00:00.000123456Z") +| eval plus = dt + 1 hour + 1 minute + 1 second + 1 milliseconds; + +dt:date_nanos | plus:date_nanos +2100-01-01T00:00:00.000123456Z | 2100-01-01T01:01:01.001123456Z +; + +datePlusDurationFromLeft +required_capability: date_nanos_add_subtract + +row n = to_date_nanos("2053-04-04T00:00:00.000123456Z") | eval then = 1 hour + 1 minute + 1 second + 1 milliseconds + n | keep then; + +then:date_nanos +2053-04-04T01:01:01.001123456Z +; + +datePlusMixedDurationsFromLeft +required_capability: date_nanos_add_subtract + +row n = to_date_nanos("2053-04-04T00:00:00.000123456Z") +| eval then = 1 hour + 1 minute + 2 hour + 1 second + 2 minute + 1 milliseconds + 2 second + 2 millisecond + n +| keep then; + +then:date_nanos +2053-04-04T03:03:03.003123456Z +; + +datePlusSumOfDurationsFromLeft +required_capability: date_nanos_add_subtract + +row n = to_date_nanos("2053-04-04T00:00:00.000123456Z") | eval then = (1 hour + 1 minute + 1 second + 1 milliseconds) + n | keep then; + +then:date_nanos +2053-04-04T01:01:01.001123456Z +; + +datePlusNegatedDuration +required_capability: date_nanos_add_subtract + +row dt = to_date_nanos("2100-01-01T01:01:01.001123456Z") +| eval plus = dt + (-(1 hour + 1 minute + 1 second + 1 milliseconds)); + +dt:date_nanos | plus:date_nanos +2100-01-01T01:01:01.001123456Z | 2100-01-01T00:00:00.000123456Z +; + +datePlusNull +required_capability: date_nanos_add_subtract + +row dt = to_date_nanos("2100-01-01T01:01:01.001123456Z") +| eval plus_post = dt + null, plus_pre = null + dt; + +dt:date_nanos | plus_post:date_nanos | plus_pre:date_nanos +2100-01-01T01:01:01.001123456Z | null | null +; + +datePlusNullAndDuration +required_capability: date_nanos_add_subtract + +row dt = to_date_nanos("2100-01-01T01:01:01.001123456Z") +| eval plus_post = dt + null + 1 hour, plus_pre = 1 second + null + dt; + +dt:date_nanos | plus_post:date_nanos | plus_pre:date_nanos +2100-01-01T01:01:01.001123456Z | null | null +; + +datePlusNullAndPeriod +required_capability: date_nanos_add_subtract + +row dt = to_date_nanos("2100-01-01T01:01:01.001123456Z") +| eval plus_post = dt + null + 2 years, plus_pre = 3 weeks + null + dt; + +dt:date_nanos | plus_post:date_nanos | plus_pre:date_nanos +2100-01-01T01:01:01.001123456Z | null | null +; + +datePlusQuarter +required_capability: date_nanos_add_subtract + +required_capability: timespan_abbreviations +row dt = to_date_nanos("2100-01-01T01:01:01.000123456Z") +| eval plusQuarter = dt + 2 quarters +; + +dt:date_nanos | plusQuarter:date_nanos +2100-01-01T01:01:01.000123456Z | 2100-07-01T01:01:01.000123456Z +; + +datePlusAbbreviatedDurations +required_capability: timespan_abbreviations +required_capability: date_nanos_add_subtract + +row dt = to_date_nanos("2100-01-01T00:00:00.000123456Z") +| eval plusDurations = dt + 1 h + 2 min + 2 sec + 1 s + 4 ms +; + +dt:date_nanos | plusDurations:date_nanos +2100-01-01T00:00:00.000123456Z | 2100-01-01T01:02:03.004123456Z +; + +datePlusAbbreviatedPeriods +required_capability: timespan_abbreviations +required_capability: date_nanos_add_subtract + +row dt = to_date_nanos("2100-01-01T00:00:00.000123456Z") +| eval plusDurations = dt + 0 yr + 1y + 2 q + 3 mo + 4 w + 3 d +; + +dt:date_nanos | plusDurations:date_nanos +2100-01-01T00:00:00.000123456Z | 2101-11-01T00:00:00.000123456Z +; + + +dateMinusDuration +required_capability: date_nanos_add_subtract + +row dt = to_date_nanos("2100-01-01T01:01:01.001123456Z") +| eval minus = dt - 1 hour - 1 minute - 1 second - 1 milliseconds; + +dt:date_nanos | minus:date_nanos +2100-01-01T01:01:01.001123456Z | 2100-01-01T00:00:00.000123456Z +; + +dateMinusDurationFromLeft +required_capability: date_nanos_add_subtract + +row n = to_date_nanos("2053-04-04T01:01:01.001123456Z") | eval then = -1 hour - 1 minute - 1 second - 1 milliseconds + n | keep then; + +then:date_nanos +2053-04-04T00:00:00.000123456Z +; + +dateMinusSumOfNegativeDurations +required_capability: date_nanos_add_subtract + +row n = to_date_nanos("2053-04-04T00:00:00.000123456Z") | eval then = n - (-1 hour - 1 minute - 1 second - 1 milliseconds) | keep then; + +then:date_nanos +2053-04-04T01:01:01.001123456Z +; + +dateMinusDurationsFromLeftMultipleEvals +required_capability: date_nanos_add_subtract + +row n = to_date_nanos("2053-04-04T04:03:02.001123456Z") +| eval x = -4 hour + n +| eval y = -3 minute + x, then = y + (-2 second - 1 millisecond) +| keep then +; + +then:date_nanos +2053-04-04T00:00:00.000123456Z +; + +dateMinusNull +required_capability: date_nanos_add_subtract + +row dt = to_date_nanos("2053-04-04T04:03:02.001123456Z") +| eval minus = dt - null +; + +dt:date_nanos | minus:date_nanos +2053-04-04T04:03:02.001123456Z | null +; + +dateMinusNullAndPeriod +required_capability: date_nanos_add_subtract + +row dt = to_date_nanos("2053-04-04T04:03:02.001123456Z") +| eval minus = dt - null - 4 minutes +; + +dt:date_nanos | minus:date_nanos +2053-04-04T04:03:02.001123456Z | null +; + +dateMinusNullAndDuration +required_capability: date_nanos_add_subtract + +row dt = to_date_nanos("2053-04-04T04:03:02.001123456Z") +| eval minus = dt - 6 days - null +; + +dt:date_nanos | minus:date_nanos +2053-04-04T04:03:02.001123456Z | null +; + +datePlusPeriodAndDuration +required_capability: date_nanos_add_subtract + +row dt = to_date_nanos("2100-01-01T00:00:00.000123456Z") +| eval plus = dt + 4 years + 3 months + 2 weeks + 1 day + 1 hour + 1 minute + 1 second + 1 milliseconds; + +dt:date_nanos | plus:date_nanos +2100-01-01T00:00:00.000123456Z | 2104-04-16T01:01:01.001123456Z +; + +dateMinusPeriodAndDuration +required_capability: date_nanos_add_subtract + +row dt = to_date_nanos("2104-04-16T01:01:01.001123456Z") +| eval minus = dt - 4 years - 3 months - 2 weeks - 1 day - 1 hour - 1 minute - 1 second - 1 milliseconds; + +dt:date_nanos |minus:date_nanos +2104-04-16T01:01:01.001123456Z |2100-01-01T00:00:00.000123456Z +; + +datePlusPeriodMinusDuration +required_capability: date_nanos_add_subtract + +row dt = to_date_nanos("2100-01-01T01:01:01.001123456Z") +| eval plus = dt + 4 years + 3 months + 2 weeks + 1 day - 1 hour - 1 minute - 1 second - 1 milliseconds; + +dt:date_nanos | plus:date_nanos +2100-01-01T01:01:01.001123456Z | 2104-04-16T00:00:00.000123456Z +; + +datePlusDurationMinusPeriod +required_capability: date_nanos_add_subtract + +row dt = to_date_nanos("2104-04-16T00:00:00.000123456Z") +| eval plus = dt - 4 years - 3 months - 2 weeks - 1 day + 1 hour + 1 minute + 1 second + 1 milliseconds; + +dt:date_nanos | plus:date_nanos +2104-04-16T00:00:00.000123456Z | 2100-01-01T01:01:01.001123456Z +; + +dateMathArithmeticOverflow from addition +required_capability: date_nanos_add_subtract + +row dt = to_date_nanos(9223372036854775807) +| eval plus = dt + 1 day +| keep plus; + +warning:Line 2:15: evaluation of [dt + 1 day] failed, treating result as null. Only first 20 failures recorded. +warning:Line 2:15: java.time.DateTimeException: Date nanos out of range. Must be between 1970-01-01T00:00:00Z and 2262-04-11T23:47:16.854775807 +plus:date_nanos +null +; + +date nanos subtraction before 1970 +required_capability: date_nanos_add_subtract + +row dt = to_date_nanos(0::long) +| eval minus = dt - 1 day +| keep minus; + +warning:Line 2:16: evaluation of [dt - 1 day] failed, treating result as null. Only first 20 failures recorded. +warning:Line 2:16: java.time.DateTimeException: Date nanos out of range. Must be between 1970-01-01T00:00:00Z and 2262-04-11T23:47:16.854775807 +minus:date_nanos +null +; + +dateMathDateException +required_capability: date_nanos_add_subtract + +row dt = to_date_nanos(0::long) +| eval plus = dt + 2147483647 years +| keep plus; + +warning:Line 2:15: evaluation of [dt + 2147483647 years] failed, treating result as null. Only first 20 failures recorded. +warning:Line 2:15: java.time.DateTimeException: Invalid value for Year (valid values -999999999 - 999999999): 2147485617 + +plus:date_nanos +null +; + +dateMathNegatedPeriod +required_capability: date_nanos_add_subtract + +row dt = to_date_nanos(0::long) +| eval plus = -(-1 year) + dt +| keep plus; + +plus:date_nanos +1971-01-01T00:00:00.000Z +; + +dateMathNegatedDuration +required_capability: date_nanos_add_subtract + +row dt = to_date_nanos(0::long) +| eval plus = -(-1 second) + dt +| keep plus; + +plus:date_nanos +1970-01-01T00:00:01.000Z +; diff --git a/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/AddDateNanosEvaluator.java b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/AddDateNanosEvaluator.java new file mode 100644 index 0000000000000..fe80536ea5d0d --- /dev/null +++ b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/AddDateNanosEvaluator.java @@ -0,0 +1,142 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License +// 2.0; you may not use this file except in compliance with the Elastic License +// 2.0. +package org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic; + +import java.lang.ArithmeticException; +import java.lang.IllegalArgumentException; +import java.lang.Override; +import java.lang.String; +import java.time.DateTimeException; +import java.time.temporal.TemporalAmount; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.LongBlock; +import org.elasticsearch.compute.data.LongVector; +import org.elasticsearch.compute.data.Page; +import org.elasticsearch.compute.operator.DriverContext; +import org.elasticsearch.compute.operator.EvalOperator; +import org.elasticsearch.compute.operator.Warnings; +import org.elasticsearch.core.Releasables; +import org.elasticsearch.xpack.esql.core.tree.Source; + +/** + * {@link EvalOperator.ExpressionEvaluator} implementation for {@link Add}. + * This class is generated. Do not edit it. + */ +public final class AddDateNanosEvaluator implements EvalOperator.ExpressionEvaluator { + private final Source source; + + private final EvalOperator.ExpressionEvaluator dateNanos; + + private final TemporalAmount temporalAmount; + + private final DriverContext driverContext; + + private Warnings warnings; + + public AddDateNanosEvaluator(Source source, EvalOperator.ExpressionEvaluator dateNanos, + TemporalAmount temporalAmount, DriverContext driverContext) { + this.source = source; + this.dateNanos = dateNanos; + this.temporalAmount = temporalAmount; + this.driverContext = driverContext; + } + + @Override + public Block eval(Page page) { + try (LongBlock dateNanosBlock = (LongBlock) dateNanos.eval(page)) { + LongVector dateNanosVector = dateNanosBlock.asVector(); + if (dateNanosVector == null) { + return eval(page.getPositionCount(), dateNanosBlock); + } + return eval(page.getPositionCount(), dateNanosVector); + } + } + + public LongBlock eval(int positionCount, LongBlock dateNanosBlock) { + try(LongBlock.Builder result = driverContext.blockFactory().newLongBlockBuilder(positionCount)) { + position: for (int p = 0; p < positionCount; p++) { + if (dateNanosBlock.isNull(p)) { + result.appendNull(); + continue position; + } + if (dateNanosBlock.getValueCount(p) != 1) { + if (dateNanosBlock.getValueCount(p) > 1) { + warnings().registerException(new IllegalArgumentException("single-value function encountered multi-value")); + } + result.appendNull(); + continue position; + } + try { + result.appendLong(Add.processDateNanos(dateNanosBlock.getLong(dateNanosBlock.getFirstValueIndex(p)), this.temporalAmount)); + } catch (ArithmeticException | DateTimeException e) { + warnings().registerException(e); + result.appendNull(); + } + } + return result.build(); + } + } + + public LongBlock eval(int positionCount, LongVector dateNanosVector) { + try(LongBlock.Builder result = driverContext.blockFactory().newLongBlockBuilder(positionCount)) { + position: for (int p = 0; p < positionCount; p++) { + try { + result.appendLong(Add.processDateNanos(dateNanosVector.getLong(p), this.temporalAmount)); + } catch (ArithmeticException | DateTimeException e) { + warnings().registerException(e); + result.appendNull(); + } + } + return result.build(); + } + } + + @Override + public String toString() { + return "AddDateNanosEvaluator[" + "dateNanos=" + dateNanos + ", temporalAmount=" + temporalAmount + "]"; + } + + @Override + public void close() { + Releasables.closeExpectNoException(dateNanos); + } + + private Warnings warnings() { + if (warnings == null) { + this.warnings = Warnings.createWarnings( + driverContext.warningsMode(), + source.source().getLineNumber(), + source.source().getColumnNumber(), + source.text() + ); + } + return warnings; + } + + static class Factory implements EvalOperator.ExpressionEvaluator.Factory { + private final Source source; + + private final EvalOperator.ExpressionEvaluator.Factory dateNanos; + + private final TemporalAmount temporalAmount; + + public Factory(Source source, EvalOperator.ExpressionEvaluator.Factory dateNanos, + TemporalAmount temporalAmount) { + this.source = source; + this.dateNanos = dateNanos; + this.temporalAmount = temporalAmount; + } + + @Override + public AddDateNanosEvaluator get(DriverContext context) { + return new AddDateNanosEvaluator(source, dateNanos.get(context), temporalAmount, context); + } + + @Override + public String toString() { + return "AddDateNanosEvaluator[" + "dateNanos=" + dateNanos + ", temporalAmount=" + temporalAmount + "]"; + } + } +} diff --git a/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/SubDateNanosEvaluator.java b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/SubDateNanosEvaluator.java new file mode 100644 index 0000000000000..3b6f4c1046d40 --- /dev/null +++ b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/SubDateNanosEvaluator.java @@ -0,0 +1,142 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License +// 2.0; you may not use this file except in compliance with the Elastic License +// 2.0. +package org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic; + +import java.lang.ArithmeticException; +import java.lang.IllegalArgumentException; +import java.lang.Override; +import java.lang.String; +import java.time.DateTimeException; +import java.time.temporal.TemporalAmount; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.LongBlock; +import org.elasticsearch.compute.data.LongVector; +import org.elasticsearch.compute.data.Page; +import org.elasticsearch.compute.operator.DriverContext; +import org.elasticsearch.compute.operator.EvalOperator; +import org.elasticsearch.compute.operator.Warnings; +import org.elasticsearch.core.Releasables; +import org.elasticsearch.xpack.esql.core.tree.Source; + +/** + * {@link EvalOperator.ExpressionEvaluator} implementation for {@link Sub}. + * This class is generated. Do not edit it. + */ +public final class SubDateNanosEvaluator implements EvalOperator.ExpressionEvaluator { + private final Source source; + + private final EvalOperator.ExpressionEvaluator dateNanos; + + private final TemporalAmount temporalAmount; + + private final DriverContext driverContext; + + private Warnings warnings; + + public SubDateNanosEvaluator(Source source, EvalOperator.ExpressionEvaluator dateNanos, + TemporalAmount temporalAmount, DriverContext driverContext) { + this.source = source; + this.dateNanos = dateNanos; + this.temporalAmount = temporalAmount; + this.driverContext = driverContext; + } + + @Override + public Block eval(Page page) { + try (LongBlock dateNanosBlock = (LongBlock) dateNanos.eval(page)) { + LongVector dateNanosVector = dateNanosBlock.asVector(); + if (dateNanosVector == null) { + return eval(page.getPositionCount(), dateNanosBlock); + } + return eval(page.getPositionCount(), dateNanosVector); + } + } + + public LongBlock eval(int positionCount, LongBlock dateNanosBlock) { + try(LongBlock.Builder result = driverContext.blockFactory().newLongBlockBuilder(positionCount)) { + position: for (int p = 0; p < positionCount; p++) { + if (dateNanosBlock.isNull(p)) { + result.appendNull(); + continue position; + } + if (dateNanosBlock.getValueCount(p) != 1) { + if (dateNanosBlock.getValueCount(p) > 1) { + warnings().registerException(new IllegalArgumentException("single-value function encountered multi-value")); + } + result.appendNull(); + continue position; + } + try { + result.appendLong(Sub.processDateNanos(dateNanosBlock.getLong(dateNanosBlock.getFirstValueIndex(p)), this.temporalAmount)); + } catch (ArithmeticException | DateTimeException e) { + warnings().registerException(e); + result.appendNull(); + } + } + return result.build(); + } + } + + public LongBlock eval(int positionCount, LongVector dateNanosVector) { + try(LongBlock.Builder result = driverContext.blockFactory().newLongBlockBuilder(positionCount)) { + position: for (int p = 0; p < positionCount; p++) { + try { + result.appendLong(Sub.processDateNanos(dateNanosVector.getLong(p), this.temporalAmount)); + } catch (ArithmeticException | DateTimeException e) { + warnings().registerException(e); + result.appendNull(); + } + } + return result.build(); + } + } + + @Override + public String toString() { + return "SubDateNanosEvaluator[" + "dateNanos=" + dateNanos + ", temporalAmount=" + temporalAmount + "]"; + } + + @Override + public void close() { + Releasables.closeExpectNoException(dateNanos); + } + + private Warnings warnings() { + if (warnings == null) { + this.warnings = Warnings.createWarnings( + driverContext.warningsMode(), + source.source().getLineNumber(), + source.source().getColumnNumber(), + source.text() + ); + } + return warnings; + } + + static class Factory implements EvalOperator.ExpressionEvaluator.Factory { + private final Source source; + + private final EvalOperator.ExpressionEvaluator.Factory dateNanos; + + private final TemporalAmount temporalAmount; + + public Factory(Source source, EvalOperator.ExpressionEvaluator.Factory dateNanos, + TemporalAmount temporalAmount) { + this.source = source; + this.dateNanos = dateNanos; + this.temporalAmount = temporalAmount; + } + + @Override + public SubDateNanosEvaluator get(DriverContext context) { + return new SubDateNanosEvaluator(source, dateNanos.get(context), temporalAmount, context); + } + + @Override + public String toString() { + return "SubDateNanosEvaluator[" + "dateNanos=" + dateNanos + ", temporalAmount=" + temporalAmount + "]"; + } + } +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index dc3329a906741..a93590d7a5bc2 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -345,6 +345,10 @@ public enum Cap { */ LEAST_GREATEST_FOR_DATENANOS(), + /** + * Support add and subtract on date nanos + */ + DATE_NANOS_ADD_SUBTRACT(), /** * Support for date_trunc function on date nanos type */ diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/Add.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/Add.java index 8f8d885ee379b..9d34410e8a164 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/Add.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/Add.java @@ -9,6 +9,7 @@ import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.time.DateUtils; import org.elasticsearch.compute.ann.Evaluator; import org.elasticsearch.compute.ann.Fixed; import org.elasticsearch.xpack.esql.core.expression.Expression; @@ -21,7 +22,9 @@ import java.io.IOException; import java.time.DateTimeException; import java.time.Duration; +import java.time.Instant; import java.time.Period; +import java.time.ZonedDateTime; import java.time.temporal.TemporalAmount; import static org.elasticsearch.xpack.esql.core.util.DateUtils.asDateTime; @@ -33,7 +36,7 @@ public class Add extends DateTimeArithmeticOperation implements BinaryComparison public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(Expression.class, "Add", Add::new); @FunctionInfo( - returnType = { "double", "integer", "long", "date_period", "datetime", "time_duration", "unsigned_long" }, + returnType = { "double", "integer", "long", "date_nanos", "date_period", "datetime", "time_duration", "unsigned_long" }, description = "Add two numbers together. " + "If either field is <> then the result is `null`." ) public Add( @@ -41,12 +44,12 @@ public Add( @Param( name = "lhs", description = "A numeric value or a date time value.", - type = { "double", "integer", "long", "date_period", "datetime", "time_duration", "unsigned_long" } + type = { "double", "integer", "long", "date_nanos", "date_period", "datetime", "time_duration", "unsigned_long" } ) Expression left, @Param( name = "rhs", description = "A numeric value or a date time value.", - type = { "double", "integer", "long", "date_period", "datetime", "time_duration", "unsigned_long" } + type = { "double", "integer", "long", "date_nanos", "date_period", "datetime", "time_duration", "unsigned_long" } ) Expression right ) { super( @@ -58,7 +61,8 @@ public Add( AddLongsEvaluator.Factory::new, AddUnsignedLongsEvaluator.Factory::new, AddDoublesEvaluator.Factory::new, - AddDatetimesEvaluator.Factory::new + AddDatetimesEvaluator.Factory::new, + AddDateNanosEvaluator.Factory::new ); } @@ -70,7 +74,8 @@ private Add(StreamInput in) throws IOException { AddLongsEvaluator.Factory::new, AddUnsignedLongsEvaluator.Factory::new, AddDoublesEvaluator.Factory::new, - AddDatetimesEvaluator.Factory::new + AddDatetimesEvaluator.Factory::new, + AddDateNanosEvaluator.Factory::new ); } @@ -130,6 +135,25 @@ static long processDatetimes(long datetime, @Fixed TemporalAmount temporalAmount return asMillis(asDateTime(datetime).plus(temporalAmount)); } + @Evaluator(extraName = "DateNanos", warnExceptions = { ArithmeticException.class, DateTimeException.class }) + static long processDateNanos(long dateNanos, @Fixed TemporalAmount temporalAmount) { + // Instant.plus behaves differently from ZonedDateTime.plus, but DateUtils generally works with instants. + try { + return DateUtils.toLong( + Instant.from( + ZonedDateTime.ofInstant(DateUtils.toInstant(dateNanos), org.elasticsearch.xpack.esql.core.util.DateUtils.UTC) + .plus(temporalAmount) + ) + ); + } catch (IllegalArgumentException e) { + /* + toLong will throw IllegalArgumentException for out of range dates, but that includes the actual value which we want + to avoid returning here. + */ + throw new DateTimeException("Date nanos out of range. Must be between 1970-01-01T00:00:00Z and 2262-04-11T23:47:16.854775807"); + } + } + @Override public Period fold(Period left, Period right) { return left.plus(right); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/DateTimeArithmeticOperation.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/DateTimeArithmeticOperation.java index d407dd8bf7de1..8bb166fac60bb 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/DateTimeArithmeticOperation.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/DateTimeArithmeticOperation.java @@ -22,10 +22,11 @@ import java.util.Collection; import static org.elasticsearch.xpack.esql.core.type.DataType.DATETIME; +import static org.elasticsearch.xpack.esql.core.type.DataType.DATE_NANOS; import static org.elasticsearch.xpack.esql.core.type.DataType.DATE_PERIOD; import static org.elasticsearch.xpack.esql.core.type.DataType.TIME_DURATION; -import static org.elasticsearch.xpack.esql.core.type.DataType.isDateTime; -import static org.elasticsearch.xpack.esql.core.type.DataType.isDateTimeOrTemporal; +import static org.elasticsearch.xpack.esql.core.type.DataType.isDateTimeOrNanosOrTemporal; +import static org.elasticsearch.xpack.esql.core.type.DataType.isMillisOrNanos; import static org.elasticsearch.xpack.esql.core.type.DataType.isNull; import static org.elasticsearch.xpack.esql.core.type.DataType.isTemporalAmount; @@ -35,7 +36,8 @@ interface DatetimeArithmeticEvaluator { ExpressionEvaluator.Factory apply(Source source, ExpressionEvaluator.Factory expressionEvaluator, TemporalAmount temporalAmount); } - private final DatetimeArithmeticEvaluator datetimes; + private final DatetimeArithmeticEvaluator millisEvaluator; + private final DatetimeArithmeticEvaluator nanosEvaluator; DateTimeArithmeticOperation( Source source, @@ -46,10 +48,12 @@ interface DatetimeArithmeticEvaluator { BinaryEvaluator longs, BinaryEvaluator ulongs, BinaryEvaluator doubles, - DatetimeArithmeticEvaluator datetimes + DatetimeArithmeticEvaluator millisEvaluator, + DatetimeArithmeticEvaluator nanosEvaluator ) { super(source, left, right, op, ints, longs, ulongs, doubles); - this.datetimes = datetimes; + this.millisEvaluator = millisEvaluator; + this.nanosEvaluator = nanosEvaluator; } DateTimeArithmeticOperation( @@ -59,19 +63,22 @@ interface DatetimeArithmeticEvaluator { BinaryEvaluator longs, BinaryEvaluator ulongs, BinaryEvaluator doubles, - DatetimeArithmeticEvaluator datetimes + DatetimeArithmeticEvaluator millisEvaluator, + DatetimeArithmeticEvaluator nanosEvaluator ) throws IOException { super(in, op, ints, longs, ulongs, doubles); - this.datetimes = datetimes; + this.millisEvaluator = millisEvaluator; + this.nanosEvaluator = nanosEvaluator; } @Override protected TypeResolution resolveInputType(Expression e, TypeResolutions.ParamOrdinal paramOrdinal) { return TypeResolutions.isType( e, - t -> t.isNumeric() || DataType.isDateTimeOrTemporal(t) || DataType.isNull(t), + t -> t.isNumeric() || DataType.isDateTimeOrNanosOrTemporal(t) || DataType.isNull(t), sourceText(), paramOrdinal, + "date_nanos", "datetime", "numeric" ); @@ -86,11 +93,11 @@ protected TypeResolution checkCompatibility() { // - one argument is a DATETIME and the other a (foldable) TemporalValue, or // - both arguments are TemporalValues (so we can fold them), or // - one argument is NULL and the other one a DATETIME. - if (isDateTimeOrTemporal(leftType) || isDateTimeOrTemporal(rightType)) { + if (isDateTimeOrNanosOrTemporal(leftType) || isDateTimeOrNanosOrTemporal(rightType)) { if (isNull(leftType) || isNull(rightType)) { return TypeResolution.TYPE_RESOLVED; } - if ((isDateTime(leftType) && isTemporalAmount(rightType)) || (isTemporalAmount(leftType) && isDateTime(rightType))) { + if ((isMillisOrNanos(leftType) && isTemporalAmount(rightType)) || (isTemporalAmount(leftType) && isMillisOrNanos(rightType))) { return TypeResolution.TYPE_RESOLVED; } if (isTemporalAmount(leftType) && isTemporalAmount(rightType) && leftType == rightType) { @@ -171,7 +178,20 @@ public ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) { temporalAmountArgument = left(); } - return datetimes.apply(source(), toEvaluator.apply(datetimeArgument), (TemporalAmount) temporalAmountArgument.fold()); + return millisEvaluator.apply(source(), toEvaluator.apply(datetimeArgument), (TemporalAmount) temporalAmountArgument.fold()); + } else if (dataType() == DATE_NANOS) { + // One of the arguments has to be a date_nanos and the other a temporal amount. + Expression dateNanosArgument; + Expression temporalAmountArgument; + if (left().dataType() == DATE_NANOS) { + dateNanosArgument = left(); + temporalAmountArgument = right(); + } else { + dateNanosArgument = right(); + temporalAmountArgument = left(); + } + + return nanosEvaluator.apply(source(), toEvaluator.apply(dateNanosArgument), (TemporalAmount) temporalAmountArgument.fold()); } else { return super.toEvaluator(toEvaluator); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/Sub.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/Sub.java index 27f5579129cc9..e072619e67728 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/Sub.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/Sub.java @@ -9,6 +9,7 @@ import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.time.DateUtils; import org.elasticsearch.compute.ann.Evaluator; import org.elasticsearch.compute.ann.Fixed; import org.elasticsearch.xpack.esql.core.expression.Expression; @@ -22,7 +23,9 @@ import java.io.IOException; import java.time.DateTimeException; import java.time.Duration; +import java.time.Instant; import java.time.Period; +import java.time.ZonedDateTime; import java.time.temporal.TemporalAmount; import static org.elasticsearch.common.logging.LoggerMessageFormat.format; @@ -61,7 +64,8 @@ public Sub( SubLongsEvaluator.Factory::new, SubUnsignedLongsEvaluator.Factory::new, SubDoublesEvaluator.Factory::new, - SubDatetimesEvaluator.Factory::new + SubDatetimesEvaluator.Factory::new, + SubDateNanosEvaluator.Factory::new ); } @@ -73,7 +77,8 @@ private Sub(StreamInput in) throws IOException { SubLongsEvaluator.Factory::new, SubUnsignedLongsEvaluator.Factory::new, SubDoublesEvaluator.Factory::new, - SubDatetimesEvaluator.Factory::new + SubDatetimesEvaluator.Factory::new, + SubDateNanosEvaluator.Factory::new ); } @@ -143,6 +148,25 @@ static long processDatetimes(long datetime, @Fixed TemporalAmount temporalAmount return asMillis(asDateTime(datetime).minus(temporalAmount)); } + @Evaluator(extraName = "DateNanos", warnExceptions = { ArithmeticException.class, DateTimeException.class }) + static long processDateNanos(long dateNanos, @Fixed TemporalAmount temporalAmount) { + // Instant.plus behaves differently from ZonedDateTime.plus, but DateUtils generally works with instants. + try { + return DateUtils.toLong( + Instant.from( + ZonedDateTime.ofInstant(DateUtils.toInstant(dateNanos), org.elasticsearch.xpack.esql.core.util.DateUtils.UTC) + .minus(temporalAmount) + ) + ); + } catch (IllegalArgumentException e) { + /* + toLong will throw IllegalArgumentException for out of range dates, but that includes the actual value which we want + to avoid returning here. + */ + throw new DateTimeException("Date nanos out of range. Must be between 1970-01-01T00:00:00Z and 2262-04-11T23:47:16.854775807"); + } + } + @Override public Period fold(Period left, Period right) { return left.minus(right); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeConverter.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeConverter.java index 4bfc9ac5d848f..6ba2d8451f956 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeConverter.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeConverter.java @@ -78,7 +78,7 @@ import static org.elasticsearch.xpack.esql.core.type.DataType.UNSIGNED_LONG; import static org.elasticsearch.xpack.esql.core.type.DataType.VERSION; import static org.elasticsearch.xpack.esql.core.type.DataType.isDateTime; -import static org.elasticsearch.xpack.esql.core.type.DataType.isDateTimeOrTemporal; +import static org.elasticsearch.xpack.esql.core.type.DataType.isDateTimeOrNanosOrTemporal; import static org.elasticsearch.xpack.esql.core.type.DataType.isNullOrDatePeriod; import static org.elasticsearch.xpack.esql.core.type.DataType.isNullOrTemporalAmount; import static org.elasticsearch.xpack.esql.core.type.DataType.isNullOrTimeDuration; @@ -378,10 +378,13 @@ public static DataType commonType(DataType left, DataType right) { if (right == NULL) { return left; } - if (isDateTimeOrTemporal(left) || isDateTimeOrTemporal(right)) { + if (isDateTimeOrNanosOrTemporal(left) || isDateTimeOrNanosOrTemporal(right)) { if ((isDateTime(left) && isNullOrTemporalAmount(right)) || (isNullOrTemporalAmount(left) && isDateTime(right))) { return DATETIME; } + if ((left == DATE_NANOS && isNullOrTemporalAmount(right)) || (isNullOrTemporalAmount(left) && right == DATE_NANOS)) { + return DATE_NANOS; + } if (isNullOrTimeDuration(left) && isNullOrTimeDuration(right)) { return TIME_DURATION; } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java index e0ebc92afa95d..5a1e109041a16 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java @@ -2009,14 +2009,14 @@ public void testImplicitCasting() { assertThat( e.getMessage(), - containsString("first argument of [concat(\"2024\", \"-04\", \"-01\") + 1 day] must be [datetime or numeric]") + containsString("first argument of [concat(\"2024\", \"-04\", \"-01\") + 1 day] must be [date_nanos, datetime or numeric]") ); e = expectThrows(VerificationException.class, () -> analyze(""" from test | eval x = to_string(null) - 1 day """)); - assertThat(e.getMessage(), containsString("first argument of [to_string(null) - 1 day] must be [datetime or numeric]")); + assertThat(e.getMessage(), containsString("first argument of [to_string(null) - 1 day] must be [date_nanos, datetime or numeric]")); e = expectThrows(VerificationException.class, () -> analyze(""" from test | eval x = concat("2024", "-04", "-01") + "1 day" @@ -2024,7 +2024,7 @@ public void testImplicitCasting() { assertThat( e.getMessage(), - containsString("first argument of [concat(\"2024\", \"-04\", \"-01\") + \"1 day\"] must be [datetime or numeric]") + containsString("first argument of [concat(\"2024\", \"-04\", \"-01\") + \"1 day\"] must be [date_nanos, datetime or numeric]") ); e = expectThrows(VerificationException.class, () -> analyze(""" diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java index d4fca2a0a2540..d02e78202e0c2 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java @@ -56,11 +56,11 @@ public class VerifierTests extends ESTestCase { public void testIncompatibleTypesInMathOperation() { assertEquals( - "1:40: second argument of [a + c] must be [datetime or numeric], found value [c] type [keyword]", + "1:40: second argument of [a + c] must be [date_nanos, datetime or numeric], found value [c] type [keyword]", error("row a = 1, b = 2, c = \"xxx\" | eval y = a + c") ); assertEquals( - "1:40: second argument of [a - c] must be [datetime or numeric], found value [c] type [keyword]", + "1:40: second argument of [a - c] must be [date_nanos, datetime or numeric], found value [c] type [keyword]", error("row a = 1, b = 2, c = \"xxx\" | eval y = a - c") ); } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/TestCaseSupplier.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/TestCaseSupplier.java index d78dfd3141a04..816c9ef6f352c 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/TestCaseSupplier.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/TestCaseSupplier.java @@ -1113,31 +1113,83 @@ public static List dateCases(long min, long max) { * */ public static List dateNanosCases() { - return List.of( - new TypedDataSupplier("<1970-01-01T00:00:00.000000000Z>", () -> 0L, DataType.DATE_NANOS), - new TypedDataSupplier("", () -> ESTestCase.randomLongBetween(0, 10 * (long) 10e11), DataType.DATE_NANOS), - new TypedDataSupplier( - "", - () -> ESTestCase.randomLongBetween(10 * (long) 10e11, Long.MAX_VALUE), - DataType.DATE_NANOS - ), - new TypedDataSupplier( - "", - () -> ESTestCase.randomLongBetween(Long.MAX_VALUE / 100 * 99, Long.MAX_VALUE), - DataType.DATE_NANOS - ) - ); + return dateNanosCases(Instant.EPOCH, DateUtils.MAX_NANOSECOND_INSTANT); + } + + /** + * Generate cases for {@link DataType#DATE_NANOS}. + * + */ + public static List dateNanosCases(Instant minValue, Instant maxValue) { + // maximum nanosecond date in ES is 2262-04-11T23:47:16.854775807Z + Instant twentyOneHundred = Instant.parse("2100-01-01T00:00:00Z"); + Instant twentyTwoHundred = Instant.parse("2200-01-01T00:00:00Z"); + Instant twentyTwoFifty = Instant.parse("2250-01-01T00:00:00Z"); + + List cases = new ArrayList<>(); + if (minValue.isAfter(Instant.EPOCH) == false) { + cases.add( + new TypedDataSupplier("<1970-01-01T00:00:00.000000000Z>", () -> DateUtils.toLong(Instant.EPOCH), DataType.DATE_NANOS) + ); + } + + Instant lower = Instant.EPOCH.isBefore(minValue) ? minValue : Instant.EPOCH; + Instant upper = twentyOneHundred.isAfter(maxValue) ? maxValue : twentyOneHundred; + if (upper.isAfter(lower)) { + cases.add( + new TypedDataSupplier( + "<21st century date nanos>", + () -> DateUtils.toLong(ESTestCase.randomInstantBetween(lower, upper)), + DataType.DATE_NANOS + ) + ); + } + + Instant lower2 = twentyOneHundred.isBefore(minValue) ? minValue : twentyOneHundred; + Instant upper2 = twentyTwoHundred.isAfter(maxValue) ? maxValue : twentyTwoHundred; + if (upper.isAfter(lower)) { + cases.add( + new TypedDataSupplier( + "<22nd century date nanos>", + () -> DateUtils.toLong(ESTestCase.randomInstantBetween(lower2, upper2)), + DataType.DATE_NANOS + ) + ); + } + + Instant lower3 = twentyTwoHundred.isBefore(minValue) ? minValue : twentyTwoHundred; + Instant upper3 = twentyTwoFifty.isAfter(maxValue) ? maxValue : twentyTwoFifty; + if (upper.isAfter(lower)) { + cases.add( + new TypedDataSupplier( + "<23rd century date nanos>", + () -> DateUtils.toLong(ESTestCase.randomInstantBetween(lower3, upper3)), + DataType.DATE_NANOS + ) + ); + } + return cases; } public static List datePeriodCases() { + return datePeriodCases(-1000, -13, -32, 1000, 13, 32); + } + + public static List datePeriodCases(int yearMin, int monthMin, int dayMin, int yearMax, int monthMax, int dayMax) { + final int yMin = Math.max(yearMin, -1000); + final int mMin = Math.max(monthMin, -13); + final int dMin = Math.max(dayMin, -32); + final int yMax = Math.min(yearMax, 1000); + final int mMax = Math.min(monthMax, 13); + final int dMax = Math.min(dayMax, 32); return List.of( new TypedDataSupplier("", () -> Period.ZERO, DataType.DATE_PERIOD, true), new TypedDataSupplier( "", () -> Period.of( - ESTestCase.randomIntBetween(-1000, 1000), - ESTestCase.randomIntBetween(-13, 13), - ESTestCase.randomIntBetween(-32, 32) + ESTestCase.randomIntBetween(yMin, yMax), + ESTestCase.randomIntBetween(mMin, mMax), + ESTestCase.randomIntBetween(dMin, dMax) ), DataType.DATE_PERIOD, true @@ -1146,11 +1198,18 @@ public static List datePeriodCases() { } public static List timeDurationCases() { + return timeDurationCases(-604800000, 604800000); + } + + public static List timeDurationCases(long minValue, long maxValue) { + // plus/minus 7 days by default, with caller limits + final long min = Math.max(minValue, -604800000L); + final long max = Math.max(maxValue, 604800000L); return List.of( new TypedDataSupplier("", () -> Duration.ZERO, DataType.TIME_DURATION, true), new TypedDataSupplier( "", - () -> Duration.ofMillis(ESTestCase.randomLongBetween(-604800000L, 604800000L)), // plus/minus 7 days + () -> Duration.ofMillis(ESTestCase.randomLongBetween(min, max)), DataType.TIME_DURATION, true ) diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/AddTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/AddTests.java index 8c31b4a65dd14..abfb634d5f301 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/AddTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/AddTests.java @@ -10,6 +10,7 @@ import com.carrotsearch.randomizedtesting.annotations.Name; import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; +import org.elasticsearch.common.time.DateUtils; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.type.DataType; @@ -18,7 +19,9 @@ import java.math.BigInteger; import java.time.Duration; +import java.time.Instant; import java.time.Period; +import java.time.ZonedDateTime; import java.time.temporal.TemporalAmount; import java.util.ArrayList; import java.util.List; @@ -26,6 +29,7 @@ import java.util.function.BiFunction; import java.util.function.BinaryOperator; import java.util.function.Supplier; +import java.util.function.ToLongBiFunction; import static org.elasticsearch.xpack.esql.core.util.DateUtils.asDateTime; import static org.elasticsearch.xpack.esql.core.util.DateUtils.asMillis; @@ -148,14 +152,14 @@ public static Iterable parameters() { BinaryOperator result = (lhs, rhs) -> { try { - return addDatesAndTemporalAmount(lhs, rhs); + return addDatesAndTemporalAmount(lhs, rhs, AddTests::addMillis); } catch (ArithmeticException e) { return null; } }; BiFunction> warnings = (lhs, rhs) -> { try { - addDatesAndTemporalAmount(lhs.data(), rhs.data()); + addDatesAndTemporalAmount(lhs.data(), rhs.data(), AddTests::addMillis); return List.of(); } catch (ArithmeticException e) { return List.of( @@ -186,6 +190,37 @@ public static Iterable parameters() { true ) ); + + BinaryOperator nanosResult = (lhs, rhs) -> { + try { + return addDatesAndTemporalAmount(lhs, rhs, AddTests::addNanos); + } catch (ArithmeticException e) { + return null; + } + }; + suppliers.addAll( + TestCaseSupplier.forBinaryNotCasting( + nanosResult, + DataType.DATE_NANOS, + TestCaseSupplier.dateNanosCases(), + TestCaseSupplier.datePeriodCases(0, 0, 0, 10, 13, 32), + startsWith("AddDateNanosEvaluator[dateNanos=Attribute[channel=0], temporalAmount="), + warnings, + true + ) + ); + suppliers.addAll( + TestCaseSupplier.forBinaryNotCasting( + nanosResult, + DataType.DATE_NANOS, + TestCaseSupplier.dateNanosCases(), + TestCaseSupplier.timeDurationCases(0, 604800000L), + startsWith("AddDateNanosEvaluator[dateNanos=Attribute[channel=0], temporalAmount="), + warnings, + true + ) + ); + suppliers.addAll(TestCaseSupplier.dateCases().stream().mapMulti((tds, consumer) -> { consumer.accept( new TestCaseSupplier( @@ -284,7 +319,7 @@ public static Iterable parameters() { private static String addErrorMessageString(boolean includeOrdinal, List> validPerPosition, List types) { try { - return typeErrorMessage(includeOrdinal, validPerPosition, types, (a, b) -> "datetime or numeric"); + return typeErrorMessage(includeOrdinal, validPerPosition, types, (a, b) -> "date_nanos, datetime or numeric"); } catch (IllegalStateException e) { // This means all the positional args were okay, so the expected error is from the combination return "[+] has arguments with incompatible types [" + types.get(0).typeName() + "] and [" + types.get(1).typeName() + "]"; @@ -292,7 +327,7 @@ private static String addErrorMessageString(boolean includeOrdinal, List adder) { // this weird casting dance makes the expected value lambda symmetric Long date; TemporalAmount period; @@ -303,9 +338,21 @@ private static Object addDatesAndTemporalAmount(Object lhs, Object rhs) { date = (Long) rhs; period = (TemporalAmount) lhs; } + return adder.applyAsLong(date, period); + } + + private static long addMillis(Long date, TemporalAmount period) { return asMillis(asDateTime(date).plus(period)); } + private static long addNanos(Long date, TemporalAmount period) { + return DateUtils.toLong( + Instant.from( + ZonedDateTime.ofInstant(DateUtils.toInstant(date), org.elasticsearch.xpack.esql.core.util.DateUtils.UTC).plus(period) + ) + ); + } + @Override protected Expression build(Source source, List args) { return new Add(source, args.get(0), args.get(1)); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/SubTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/SubTests.java index 39d55d1ba0b54..1338299b3a121 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/SubTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/predicate/operator/arithmetic/SubTests.java @@ -10,16 +10,23 @@ import com.carrotsearch.randomizedtesting.annotations.Name; import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; +import org.elasticsearch.common.time.DateUtils; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.type.DataType; import org.elasticsearch.xpack.esql.expression.function.AbstractScalarFunctionTestCase; import org.elasticsearch.xpack.esql.expression.function.TestCaseSupplier; +import org.hamcrest.Matchers; import java.time.Duration; +import java.time.Instant; import java.time.Period; +import java.time.ZonedDateTime; +import java.time.temporal.TemporalAmount; import java.util.List; +import java.util.function.BinaryOperator; import java.util.function.Supplier; +import java.util.function.ToLongBiFunction; import static org.elasticsearch.xpack.esql.EsqlTestUtils.randomLiteral; import static org.elasticsearch.xpack.esql.core.util.DateUtils.asDateTime; @@ -28,6 +35,7 @@ import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.is; import static org.hamcrest.Matchers.nullValue; +import static org.hamcrest.Matchers.startsWith; public class SubTests extends AbstractScalarFunctionTestCase { public SubTests(@Name("TestCase") Supplier testCaseSupplier) { @@ -117,13 +125,44 @@ public static Iterable parameters() { return new TestCaseSupplier.TestCase( List.of( new TestCaseSupplier.TypedData(lhs, DataType.DATETIME, "lhs"), - new TestCaseSupplier.TypedData(rhs, DataType.DATE_PERIOD, "rhs") + new TestCaseSupplier.TypedData(rhs, DataType.DATE_PERIOD, "rhs").forceLiteral() ), - "SubDatetimesEvaluator[lhs=Attribute[channel=0], rhs=Attribute[channel=1]]", + Matchers.startsWith("SubDatetimesEvaluator[datetime=Attribute[channel=0], temporalAmount="), DataType.DATETIME, equalTo(asMillis(asDateTime(lhs).minus(rhs))) ); })); + + BinaryOperator nanosResult = (lhs, rhs) -> { + try { + return subtractDatesAndTemporalAmount(lhs, rhs, SubTests::subtractNanos); + } catch (ArithmeticException e) { + return null; + } + }; + suppliers.addAll( + TestCaseSupplier.forBinaryNotCasting( + nanosResult, + DataType.DATE_NANOS, + TestCaseSupplier.dateNanosCases(Instant.parse("1985-01-01T00:00:00Z"), DateUtils.MAX_NANOSECOND_INSTANT), + TestCaseSupplier.datePeriodCases(0, 0, 0, 10, 13, 32), + startsWith("SubDateNanosEvaluator[dateNanos=Attribute[channel=0], temporalAmount="), + (l, r) -> List.of(), + true + ) + ); + suppliers.addAll( + TestCaseSupplier.forBinaryNotCasting( + nanosResult, + DataType.DATE_NANOS, + TestCaseSupplier.dateNanosCases(Instant.parse("1985-01-01T00:00:00Z"), DateUtils.MAX_NANOSECOND_INSTANT), + TestCaseSupplier.timeDurationCases(0, 604800000L), + startsWith("SubDateNanosEvaluator[dateNanos=Attribute[channel=0], temporalAmount="), + (l, r) -> List.of(), + true + ) + ); + suppliers.add(new TestCaseSupplier("Period - Period", List.of(DataType.DATE_PERIOD, DataType.DATE_PERIOD), () -> { Period lhs = (Period) randomLiteral(DataType.DATE_PERIOD).value(); Period rhs = (Period) randomLiteral(DataType.DATE_PERIOD).value(); @@ -143,9 +182,9 @@ public static Iterable parameters() { TestCaseSupplier.TestCase testCase = new TestCaseSupplier.TestCase( List.of( new TestCaseSupplier.TypedData(lhs, DataType.DATETIME, "lhs"), - new TestCaseSupplier.TypedData(rhs, DataType.TIME_DURATION, "rhs") + new TestCaseSupplier.TypedData(rhs, DataType.TIME_DURATION, "rhs").forceLiteral() ), - "SubDatetimesEvaluator[lhs=Attribute[channel=0], rhs=Attribute[channel=1]]", + Matchers.startsWith("SubDatetimesEvaluator[datetime=Attribute[channel=0], temporalAmount="), DataType.DATETIME, equalTo(asMillis(asDateTime(lhs).minus(rhs))) ); @@ -164,6 +203,7 @@ public static Iterable parameters() { equalTo(lhs.minus(rhs)) ); })); + // exact math arithmetic exceptions suppliers.add( arithmeticExceptionOverflowCase( @@ -210,7 +250,7 @@ public static Iterable parameters() { return original.getData().get(nullPosition == 0 ? 1 : 0).type(); } return original.expectedType(); - }, (nullPosition, nullData, original) -> original); + }, (nullPosition, nullData, original) -> nullData.isForceLiteral() ? equalTo("LiteralsEvaluator[lit=null]") : original); suppliers.add(new TestCaseSupplier("MV", List.of(DataType.INTEGER, DataType.INTEGER), () -> { // Ensure we don't have an overflow @@ -236,4 +276,26 @@ public static Iterable parameters() { protected Expression build(Source source, List args) { return new Sub(source, args.get(0), args.get(1)); } + + private static Object subtractDatesAndTemporalAmount(Object lhs, Object rhs, ToLongBiFunction subtract) { + // this weird casting dance makes the expected value lambda symmetric + Long date; + TemporalAmount period; + if (lhs instanceof Long) { + date = (Long) lhs; + period = (TemporalAmount) rhs; + } else { + date = (Long) rhs; + period = (TemporalAmount) lhs; + } + return subtract.applyAsLong(date, period); + } + + private static long subtractNanos(Long date, TemporalAmount period) { + return DateUtils.toLong( + Instant.from( + ZonedDateTime.ofInstant(DateUtils.toInstant(date), org.elasticsearch.xpack.esql.core.util.DateUtils.UTC).minus(period) + ) + ); + } } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeConverterTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeConverterTests.java index b30f0870496e3..8a57dfa968ccd 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeConverterTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/type/EsqlDataTypeConverterTests.java @@ -43,7 +43,7 @@ import static org.elasticsearch.xpack.esql.core.type.DataType.UNSUPPORTED; import static org.elasticsearch.xpack.esql.core.type.DataType.VERSION; import static org.elasticsearch.xpack.esql.core.type.DataType.isDateTime; -import static org.elasticsearch.xpack.esql.core.type.DataType.isDateTimeOrTemporal; +import static org.elasticsearch.xpack.esql.core.type.DataType.isDateTimeOrNanosOrTemporal; import static org.elasticsearch.xpack.esql.core.type.DataType.isString; import static org.elasticsearch.xpack.esql.type.EsqlDataTypeConverter.commonType; @@ -80,14 +80,18 @@ public void testCommonTypeStrings() { } public void testCommonTypeDateTimeIntervals() { - List DATE_TIME_INTERVALS = Arrays.stream(DataType.values()).filter(DataType::isDateTimeOrTemporal).toList(); + List DATE_TIME_INTERVALS = Arrays.stream(DataType.values()).filter(DataType::isDateTimeOrNanosOrTemporal).toList(); for (DataType dataType1 : DATE_TIME_INTERVALS) { for (DataType dataType2 : DataType.values()) { if (dataType2 == NULL) { assertEqualsCommonType(dataType1, NULL, dataType1); - } else if (isDateTimeOrTemporal(dataType2)) { - if (isDateTime(dataType1) || isDateTime(dataType2)) { + } else if (isDateTimeOrNanosOrTemporal(dataType2)) { + if ((dataType1 == DATE_NANOS && dataType2 == DATETIME) || (dataType1 == DATETIME && dataType2 == DATE_NANOS)) { + assertNullCommonType(dataType1, dataType2); + } else if (isDateTime(dataType1) || isDateTime(dataType2)) { assertEqualsCommonType(dataType1, dataType2, DATETIME); + } else if (dataType1 == DATE_NANOS || dataType2 == DATE_NANOS) { + assertEqualsCommonType(dataType1, dataType2, DATE_NANOS); } else if (dataType1 == dataType2) { assertEqualsCommonType(dataType1, dataType2, dataType1); } else { @@ -141,7 +145,6 @@ public void testCommonTypeMiscellaneous() { UNSUPPORTED, OBJECT, SOURCE, - DATE_NANOS, DOC_DATA_TYPE, TSID_DATA_TYPE, PARTIAL_AGG, @@ -165,12 +168,12 @@ public void testCommonTypeMiscellaneous() { } private static void assertEqualsCommonType(DataType dataType1, DataType dataType2, DataType commonType) { - assertEquals(commonType, commonType(dataType1, dataType2)); - assertEquals(commonType, commonType(dataType2, dataType1)); + assertEquals("Expected " + commonType + " for " + dataType1 + " and " + dataType2, commonType, commonType(dataType1, dataType2)); + assertEquals("Expected " + commonType + " for " + dataType1 + " and " + dataType2, commonType, commonType(dataType2, dataType1)); } private static void assertNullCommonType(DataType dataType1, DataType dataType2) { - assertNull(commonType(dataType1, dataType2)); - assertNull(commonType(dataType2, dataType1)); + assertNull("Expected null for " + dataType1 + " and " + dataType2, commonType(dataType1, dataType2)); + assertNull("Expected null for " + dataType1 + " and " + dataType2, commonType(dataType2, dataType1)); } } From ae3d0b9e609f757a365dd61bf02dc477c2bcba83 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Mon, 2 Dec 2024 14:19:41 -0500 Subject: [PATCH 109/139] ESQL: Limit size of `Literal#toString` (#117842) This `toString` is rendered in task output and progress. Let's make sure it's not massive. --- docs/changelog/117842.yaml | 5 ++ .../xpack/esql/heap_attack/HeapAttackIT.java | 85 ++++++++++++++++--- .../xpack/esql/core/expression/Literal.java | 6 +- .../esql/core/expression/LiteralTests.java | 20 +++++ 4 files changed, 103 insertions(+), 13 deletions(-) create mode 100644 docs/changelog/117842.yaml diff --git a/docs/changelog/117842.yaml b/docs/changelog/117842.yaml new file mode 100644 index 0000000000000..9b528a158288c --- /dev/null +++ b/docs/changelog/117842.yaml @@ -0,0 +1,5 @@ +pr: 117842 +summary: Limit size of `Literal#toString` +area: ES|QL +type: bug +issues: [] diff --git a/test/external-modules/esql-heap-attack/src/javaRestTest/java/org/elasticsearch/xpack/esql/heap_attack/HeapAttackIT.java b/test/external-modules/esql-heap-attack/src/javaRestTest/java/org/elasticsearch/xpack/esql/heap_attack/HeapAttackIT.java index 008a056e87901..8b9176a346e30 100644 --- a/test/external-modules/esql-heap-attack/src/javaRestTest/java/org/elasticsearch/xpack/esql/heap_attack/HeapAttackIT.java +++ b/test/external-modules/esql-heap-attack/src/javaRestTest/java/org/elasticsearch/xpack/esql/heap_attack/HeapAttackIT.java @@ -295,15 +295,10 @@ private Response concat(int evals) throws IOException { * Returns many moderately long strings. */ public void testManyConcat() throws IOException { + int strings = 300; initManyLongs(); - Response resp = manyConcat(300); - Map map = responseAsMap(resp); - ListMatcher columns = matchesList(); - for (int s = 0; s < 300; s++) { - columns = columns.item(matchesMap().entry("name", "str" + s).entry("type", "keyword")); - } - MapMatcher mapMatcher = matchesMap(); - assertMap(map, mapMatcher.entry("columns", columns).entry("values", any(List.class)).entry("took", greaterThanOrEqualTo(0))); + Response resp = manyConcat("FROM manylongs", strings); + assertManyStrings(resp, strings); } /** @@ -311,15 +306,24 @@ public void testManyConcat() throws IOException { */ public void testHugeManyConcat() throws IOException { initManyLongs(); - assertCircuitBreaks(() -> manyConcat(2000)); + assertCircuitBreaks(() -> manyConcat("FROM manylongs", 2000)); + } + + /** + * Returns many moderately long strings. + */ + public void testManyConcatFromRow() throws IOException { + int strings = 2000; + Response resp = manyConcat("ROW a=9999, b=9999, c=9999, d=9999, e=9999", strings); + assertManyStrings(resp, strings); } /** * Tests that generate many moderately long strings. */ - private Response manyConcat(int strings) throws IOException { + private Response manyConcat(String init, int strings) throws IOException { StringBuilder query = startQuery(); - query.append("FROM manylongs | EVAL str = CONCAT("); + query.append(init).append(" | EVAL str = CONCAT("); query.append( Arrays.stream(new String[] { "a", "b", "c", "d", "e" }) .map(f -> "TO_STRING(" + f + ")") @@ -344,7 +348,64 @@ private Response manyConcat(int strings) throws IOException { query.append("str").append(s); } query.append("\"}"); - return query(query.toString(), null); + return query(query.toString(), "columns"); + } + + /** + * Returns many moderately long strings. + */ + public void testManyRepeat() throws IOException { + int strings = 30; + initManyLongs(); + Response resp = manyRepeat("FROM manylongs", strings); + assertManyStrings(resp, 30); + } + + /** + * Hits a circuit breaker by building many moderately long strings. + */ + public void testHugeManyRepeat() throws IOException { + initManyLongs(); + assertCircuitBreaks(() -> manyRepeat("FROM manylongs", 75)); + } + + /** + * Returns many moderately long strings. + */ + public void testManyRepeatFromRow() throws IOException { + int strings = 10000; + Response resp = manyRepeat("ROW a = 99", strings); + assertManyStrings(resp, strings); + } + + /** + * Tests that generate many moderately long strings. + */ + private Response manyRepeat(String init, int strings) throws IOException { + StringBuilder query = startQuery(); + query.append(init).append(" | EVAL str = TO_STRING(a)"); + for (int s = 0; s < strings; s++) { + query.append(",\nstr").append(s).append("=REPEAT(str, 10000)"); + } + query.append("\n|KEEP "); + for (int s = 0; s < strings; s++) { + if (s != 0) { + query.append(", "); + } + query.append("str").append(s); + } + query.append("\"}"); + return query(query.toString(), "columns"); + } + + private void assertManyStrings(Response resp, int strings) throws IOException { + Map map = responseAsMap(resp); + ListMatcher columns = matchesList(); + for (int s = 0; s < strings; s++) { + columns = columns.item(matchesMap().entry("name", "str" + s).entry("type", "keyword")); + } + MapMatcher mapMatcher = matchesMap(); + assertMap(map, mapMatcher.entry("columns", columns)); } public void testManyEval() throws IOException { diff --git a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/Literal.java b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/Literal.java index 20cdbaf6acdbf..53f559c5c82fe 100644 --- a/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/Literal.java +++ b/x-pack/plugin/esql-core/src/main/java/org/elasticsearch/xpack/esql/core/expression/Literal.java @@ -122,7 +122,11 @@ public boolean equals(Object obj) { @Override public String toString() { - return String.valueOf(value); + String str = String.valueOf(value); + if (str.length() > 500) { + return str.substring(0, 500) + "..."; + } + return str; } @Override diff --git a/x-pack/plugin/esql-core/src/test/java/org/elasticsearch/xpack/esql/core/expression/LiteralTests.java b/x-pack/plugin/esql-core/src/test/java/org/elasticsearch/xpack/esql/core/expression/LiteralTests.java index a4c67a8076479..a628916e67746 100644 --- a/x-pack/plugin/esql-core/src/test/java/org/elasticsearch/xpack/esql/core/expression/LiteralTests.java +++ b/x-pack/plugin/esql-core/src/test/java/org/elasticsearch/xpack/esql/core/expression/LiteralTests.java @@ -6,9 +6,12 @@ */ package org.elasticsearch.xpack.esql.core.expression; +import joptsimple.internal.Strings; + import org.elasticsearch.test.ESTestCase; import org.elasticsearch.xpack.esql.core.InvalidArgumentException; import org.elasticsearch.xpack.esql.core.tree.AbstractNodeTestCase; +import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.tree.SourceTests; import org.elasticsearch.xpack.esql.core.type.Converter; import org.elasticsearch.xpack.esql.core.type.DataType; @@ -17,6 +20,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import java.util.Objects; import java.util.function.Function; import java.util.function.Supplier; @@ -29,9 +33,12 @@ import static org.elasticsearch.xpack.esql.core.type.DataType.KEYWORD; import static org.elasticsearch.xpack.esql.core.type.DataType.LONG; import static org.elasticsearch.xpack.esql.core.type.DataType.SHORT; +import static org.hamcrest.Matchers.equalTo; public class LiteralTests extends AbstractNodeTestCase { + static class ValueAndCompatibleTypes { + final Supplier valueSupplier; final List validDataTypes; @@ -120,6 +127,19 @@ public void testReplaceChildren() { assertEquals("this type of node doesn't have any children to replace", e.getMessage()); } + public void testToString() { + assertThat(new Literal(Source.EMPTY, 1, LONG).toString(), equalTo("1")); + assertThat(new Literal(Source.EMPTY, "short", KEYWORD).toString(), equalTo("short")); + // toString should limit it's length + String tooLong = Strings.repeat('a', 510); + assertThat(new Literal(Source.EMPTY, tooLong, KEYWORD).toString(), equalTo(Strings.repeat('a', 500) + "...")); + + for (ValueAndCompatibleTypes g : GENERATORS) { + Literal lit = new Literal(Source.EMPTY, g.valueSupplier.get(), randomFrom(g.validDataTypes)); + assertThat(lit.toString(), equalTo(Objects.toString(lit.value()))); + } + } + private static Object randomValueOfTypeOtherThan(Object original, DataType type) { for (ValueAndCompatibleTypes gen : GENERATORS) { if (gen.validDataTypes.get(0) == type) { From 3b3be18af4d4e7457c6557589af190e5932f06dd Mon Sep 17 00:00:00 2001 From: Luca Cavanna Date: Mon, 2 Dec 2024 20:42:07 +0100 Subject: [PATCH 110/139] Add javadocs for Lucene 7 codec classes (#117819) --- .../lucene/bwc/codecs/lucene70/BWCLucene70Codec.java | 8 ++++++++ .../xpack/lucene/bwc/codecs/lucene70/Lucene70Codec.java | 8 ++++++++ 2 files changed, 16 insertions(+) diff --git a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/BWCLucene70Codec.java b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/BWCLucene70Codec.java index 0100a8bd14635..5a49a7a415b9c 100644 --- a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/BWCLucene70Codec.java +++ b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/BWCLucene70Codec.java @@ -25,6 +25,12 @@ import org.elasticsearch.xpack.lucene.bwc.codecs.BWCCodec; import org.elasticsearch.xpack.lucene.bwc.codecs.lucene60.Lucene60MetadataOnlyPointsFormat; +/** + * Implements the Lucene 7.0 index format. Loaded via SPI for indices created/written with Lucene 7.x (Elasticsearch 6.x) mounted + * as archive indices first in Elasticsearch 8.x. Lucene 9.12 retained Lucene70Codec in its classpath which required overriding the + * codec name and version in the segment infos. This codec is still needed after upgrading to Elasticsearch 9.x because its codec + * name has been written to disk. + */ public class BWCLucene70Codec extends BWCCodec { private final FieldInfosFormat fieldInfosFormat = wrap(new Lucene60FieldInfosFormat()); @@ -46,6 +52,8 @@ public PostingsFormat getPostingsFormatForField(String field) { } }; + // Needed for SPI loading + @SuppressWarnings("unused") public BWCLucene70Codec() { this("BWCLucene70Codec"); } diff --git a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/Lucene70Codec.java b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/Lucene70Codec.java index 77de24b53069d..f9ba02676c2d0 100644 --- a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/Lucene70Codec.java +++ b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/Lucene70Codec.java @@ -7,6 +7,14 @@ package org.elasticsearch.xpack.lucene.bwc.codecs.lucene70; +/** + * Implements the Lucene 7.0 index format. Will be loaded via SPI for indices created/written with Lucene 7.x (Elasticsearch 6.x) mounted + * as archive indices in Elasticsearch 9.x. Note that for indices with same version mounted first as archive indices in Elasticsearch 8.x, + * {@link BWCLucene70Codec} will be instead used which provides the same functionality, only registered with a different name. + * + * @deprecated Only for 7.0 back compat + */ +@Deprecated public class Lucene70Codec extends BWCLucene70Codec { public Lucene70Codec() { From 97a626b5ea9cb9a7aca5f83ac6395b3b0ad1dbf2 Mon Sep 17 00:00:00 2001 From: Marci W <333176+marciw@users.noreply.github.com> Date: Mon, 2 Dec 2024 14:46:41 -0500 Subject: [PATCH 111/139] Remove ccs banner (#117844) --- docs/reference/esql/esql-across-clusters.asciidoc | 5 ----- 1 file changed, 5 deletions(-) diff --git a/docs/reference/esql/esql-across-clusters.asciidoc b/docs/reference/esql/esql-across-clusters.asciidoc index db266fafde9d6..6decc351bc1c8 100644 --- a/docs/reference/esql/esql-across-clusters.asciidoc +++ b/docs/reference/esql/esql-across-clusters.asciidoc @@ -8,11 +8,6 @@ preview::["{ccs-cap} for {esql} is in technical preview and may be changed or removed in a future release. Elastic will work to fix any issues, but features in technical preview are not subject to the support SLA of official GA features."] -[NOTE] -==== -For {ccs-cap} with {esql} on version 8.16 or later, remote clusters must also be on version 8.16 or later. -==== - With {esql}, you can execute a single query across multiple clusters. [discrete] From c54d4b687f3658fadcb158dbe43befa1edcb0e38 Mon Sep 17 00:00:00 2001 From: Luca Cavanna Date: Mon, 2 Dec 2024 21:17:19 +0100 Subject: [PATCH 112/139] Don't skip shards in coord rewrite if timestamp is an alias (#117271) The coordinator rewrite has logic to skip indices if the provided date range filter is not within the min and max range of all of its shards. This mechanism is enabled for event.ingested and @timestamp fields, against searchable snapshots. We have basic checks that such fields need to be of date field type, yet if they are defined as alias of a date field, their range will be empty, which indicates that the shards are empty, and the coord rewrite logic resolves the alias and ends up skipping shards that may have matching docs. This commit adds an explicit check that declares the range UNKNOWN instead of EMPTY in these circumstances. The same check is also performed in the coord rewrite logic, so that shards are no longer skipped by mistake. --- docs/changelog/117271.yaml | 5 + .../index/query/RangeQueryBuilder.java | 1 + .../elasticsearch/index/shard/IndexShard.java | 4 +- .../indices/TimestampFieldMapperService.java | 6 +- ...pshotsCanMatchOnCoordinatorIntegTests.java | 124 +++++++++++++++++- 5 files changed, 130 insertions(+), 10 deletions(-) create mode 100644 docs/changelog/117271.yaml diff --git a/docs/changelog/117271.yaml b/docs/changelog/117271.yaml new file mode 100644 index 0000000000000..1a328279b9635 --- /dev/null +++ b/docs/changelog/117271.yaml @@ -0,0 +1,5 @@ +pr: 117271 +summary: Don't skip shards in coord rewrite if timestamp is an alias +area: Search +type: bug +issues: [] diff --git a/server/src/main/java/org/elasticsearch/index/query/RangeQueryBuilder.java b/server/src/main/java/org/elasticsearch/index/query/RangeQueryBuilder.java index 9f6a2be8cdbc7..d6dad15abb8e6 100644 --- a/server/src/main/java/org/elasticsearch/index/query/RangeQueryBuilder.java +++ b/server/src/main/java/org/elasticsearch/index/query/RangeQueryBuilder.java @@ -426,6 +426,7 @@ public String getWriteableName() { protected MappedFieldType.Relation getRelation(final CoordinatorRewriteContext coordinatorRewriteContext) { final MappedFieldType fieldType = coordinatorRewriteContext.getFieldType(fieldName); if (fieldType instanceof final DateFieldMapper.DateFieldType dateFieldType) { + assert fieldName.equals(fieldType.name()); IndexLongFieldRange fieldRange = coordinatorRewriteContext.getFieldRange(fieldName); if (fieldRange.isComplete() == false || fieldRange == IndexLongFieldRange.EMPTY) { // if not all shards for this (frozen) index have reported ranges to cluster state, OR if they diff --git a/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java b/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java index ee24b8d9a9e91..993079a3106d7 100644 --- a/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java +++ b/server/src/main/java/org/elasticsearch/index/shard/IndexShard.java @@ -2274,8 +2274,8 @@ private ShardLongFieldRange determineShardLongFieldRange(String fieldName) { return ShardLongFieldRange.UNKNOWN; // no mapper service, no idea if the field even exists } final MappedFieldType mappedFieldType = mapperService().fieldType(fieldName); - if (mappedFieldType instanceof DateFieldMapper.DateFieldType == false) { - return ShardLongFieldRange.UNKNOWN; // field missing or not a date + if (mappedFieldType instanceof DateFieldMapper.DateFieldType == false || mappedFieldType.name().equals(fieldName) == false) { + return ShardLongFieldRange.UNKNOWN; // field is missing, an alias (as the field type has a different name) or not a date field } if (mappedFieldType.isIndexed() == false) { return ShardLongFieldRange.UNKNOWN; // range information missing diff --git a/server/src/main/java/org/elasticsearch/indices/TimestampFieldMapperService.java b/server/src/main/java/org/elasticsearch/indices/TimestampFieldMapperService.java index 026766671e5aa..158cc1f44b608 100644 --- a/server/src/main/java/org/elasticsearch/indices/TimestampFieldMapperService.java +++ b/server/src/main/java/org/elasticsearch/indices/TimestampFieldMapperService.java @@ -166,11 +166,13 @@ private static DateFieldRangeInfo fromMapperService(MapperService mapperService) DateFieldMapper.DateFieldType eventIngestedFieldType = null; MappedFieldType mappedFieldType = mapperService.fieldType(DataStream.TIMESTAMP_FIELD_NAME); - if (mappedFieldType instanceof DateFieldMapper.DateFieldType dateFieldType) { + if (mappedFieldType instanceof DateFieldMapper.DateFieldType dateFieldType + && dateFieldType.name().equals(DataStream.TIMESTAMP_FIELD_NAME)) { timestampFieldType = dateFieldType; } mappedFieldType = mapperService.fieldType(IndexMetadata.EVENT_INGESTED_FIELD_NAME); - if (mappedFieldType instanceof DateFieldMapper.DateFieldType dateFieldType) { + if (mappedFieldType instanceof DateFieldMapper.DateFieldType dateFieldType + && dateFieldType.name().equals(IndexMetadata.EVENT_INGESTED_FIELD_NAME)) { eventIngestedFieldType = dateFieldType; } if (timestampFieldType == null && eventIngestedFieldType == null) { diff --git a/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/SearchableSnapshotsCanMatchOnCoordinatorIntegTests.java b/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/SearchableSnapshotsCanMatchOnCoordinatorIntegTests.java index 26764592d5f72..21b24db6ce8d5 100644 --- a/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/SearchableSnapshotsCanMatchOnCoordinatorIntegTests.java +++ b/x-pack/plugin/searchable-snapshots/src/internalClusterTest/java/org/elasticsearch/xpack/searchablesnapshots/SearchableSnapshotsCanMatchOnCoordinatorIntegTests.java @@ -15,6 +15,7 @@ import org.elasticsearch.action.search.SearchShardsGroup; import org.elasticsearch.action.search.SearchShardsRequest; import org.elasticsearch.action.search.SearchShardsResponse; +import org.elasticsearch.action.search.SearchType; import org.elasticsearch.action.search.TransportSearchShardsAction; import org.elasticsearch.blobcache.shared.SharedBlobCacheService; import org.elasticsearch.cluster.metadata.DataStream; @@ -1096,6 +1097,119 @@ public void testCanMatchSkipsPartiallyMountedIndicesWhenFrozenNodesUnavailable() } } + public void testTimestampAsAlias() throws Exception { + doTestCoordRewriteWithAliasField("@timestamp"); + } + + public void testEventIngestedAsAlias() throws Exception { + doTestCoordRewriteWithAliasField("event.ingested"); + } + + private void doTestCoordRewriteWithAliasField(String aliasFieldName) throws Exception { + internalCluster().startMasterOnlyNode(); + internalCluster().startCoordinatingOnlyNode(Settings.EMPTY); + final String dataNodeHoldingRegularIndex = internalCluster().startDataOnlyNode(); + final String dataNodeHoldingSearchableSnapshot = internalCluster().startDataOnlyNode(); + + String timestampFieldName = randomAlphaOfLengthBetween(3, 10); + String[] indices = new String[] { "index-0001", "index-0002" }; + for (String index : indices) { + Settings extraSettings = Settings.builder() + .put(INDEX_ROUTING_REQUIRE_GROUP_SETTING.getConcreteSettingForNamespace("_name").getKey(), dataNodeHoldingRegularIndex) + .build(); + + assertAcked( + indicesAdmin().prepareCreate(index) + .setMapping( + XContentFactory.jsonBuilder() + .startObject() + .startObject("properties") + + .startObject(timestampFieldName) + .field("type", "date") + .endObject() + + .startObject(aliasFieldName) + .field("type", "alias") + .field("path", timestampFieldName) + .endObject() + + .endObject() + .endObject() + ) + .setSettings(indexSettingsNoReplicas(1).put(INDEX_SOFT_DELETES_SETTING.getKey(), true).put(extraSettings)) + ); + } + ensureGreen(indices); + + for (String index : indices) { + final List indexRequestBuilders = new ArrayList<>(); + for (int i = 0; i < 10; i++) { + indexRequestBuilders.add(prepareIndex(index).setSource(timestampFieldName, "2024-11-19T08:08:08Z")); + } + indexRandom(true, false, indexRequestBuilders); + + assertThat( + indicesAdmin().prepareForceMerge(index).setOnlyExpungeDeletes(true).setFlush(true).get().getFailedShards(), + equalTo(0) + ); + refresh(index); + forceMerge(); + } + + final String repositoryName = randomAlphaOfLength(10).toLowerCase(Locale.ROOT); + createRepository(repositoryName, "mock"); + + final SnapshotId snapshotId = createSnapshot(repositoryName, "snapshot-1", List.of(indices[0])).snapshotId(); + assertAcked(indicesAdmin().prepareDelete(indices[0])); + + // Block the repository for the node holding the searchable snapshot shards + // to delay its restore + blockDataNode(repositoryName, dataNodeHoldingSearchableSnapshot); + + // Force the searchable snapshot to be allocated in a particular node + Settings restoredIndexSettings = Settings.builder() + .put(INDEX_ROUTING_REQUIRE_GROUP_SETTING.getConcreteSettingForNamespace("_name").getKey(), dataNodeHoldingSearchableSnapshot) + .build(); + + String mountedIndex = indices[0] + "-mounted"; + final MountSearchableSnapshotRequest mountRequest = new MountSearchableSnapshotRequest( + TEST_REQUEST_TIMEOUT, + mountedIndex, + repositoryName, + snapshotId.getName(), + indices[0], + restoredIndexSettings, + Strings.EMPTY_ARRAY, + false, + randomFrom(MountSearchableSnapshotRequest.Storage.values()) + ); + client().execute(MountSearchableSnapshotAction.INSTANCE, mountRequest).actionGet(); + + // Allow the searchable snapshots to be finally mounted + unblockNode(repositoryName, dataNodeHoldingSearchableSnapshot); + waitUntilRecoveryIsDone(mountedIndex); + ensureGreen(mountedIndex); + + String[] fieldsToQuery = new String[] { timestampFieldName, aliasFieldName }; + for (String fieldName : fieldsToQuery) { + RangeQueryBuilder rangeQuery = QueryBuilders.rangeQuery(fieldName).from("2024-11-01T00:00:00.000000000Z", true); + SearchRequest request = new SearchRequest().searchType(SearchType.QUERY_THEN_FETCH) + .source(new SearchSourceBuilder().query(rangeQuery)); + if (randomBoolean()) { + // pre_filter_shard_size default to 1 because there are read-only indices in the mix. It does not hurt to force it though. + request.setPreFilterShardSize(1); + } + assertResponse(client().search(request), searchResponse -> { + assertThat(searchResponse.getSuccessfulShards(), equalTo(2)); + assertThat(searchResponse.getFailedShards(), equalTo(0)); + assertThat(searchResponse.getSkippedShards(), equalTo(0)); + assertThat(searchResponse.getTotalShards(), equalTo(2)); + assertThat(searchResponse.getHits().getTotalHits().value(), equalTo(20L)); + }); + } + } + private void createIndexWithTimestampAndEventIngested(String indexName, int numShards, Settings extraSettings) throws IOException { assertAcked( indicesAdmin().prepareCreate(indexName) @@ -1144,8 +1258,7 @@ private void createIndexWithOnlyOneTimestampField(String timestampField, String ensureGreen(index); } - private void indexDocumentsWithOnlyOneTimestampField(String timestampField, String index, int docCount, String timestampTemplate) - throws Exception { + private void indexDocumentsWithOnlyOneTimestampField(String timestampField, String index, int docCount, String timestampTemplate) { final List indexRequestBuilders = new ArrayList<>(); for (int i = 0; i < docCount; i++) { indexRequestBuilders.add( @@ -1169,8 +1282,7 @@ private void indexDocumentsWithOnlyOneTimestampField(String timestampField, Stri forceMerge(); } - private void indexDocumentsWithTimestampAndEventIngestedDates(String indexName, int docCount, String timestampTemplate) - throws Exception { + private void indexDocumentsWithTimestampAndEventIngestedDates(String indexName, int docCount, String timestampTemplate) { final List indexRequestBuilders = new ArrayList<>(); for (int i = 0; i < docCount; i++) { @@ -1207,7 +1319,7 @@ private void indexDocumentsWithTimestampAndEventIngestedDates(String indexName, forceMerge(); } - private IndexMetadata getIndexMetadata(String indexName) { + private static IndexMetadata getIndexMetadata(String indexName) { return clusterAdmin().prepareState(TEST_REQUEST_TIMEOUT) .clear() .setMetadata(true) @@ -1218,7 +1330,7 @@ private IndexMetadata getIndexMetadata(String indexName) { .index(indexName); } - private void waitUntilRecoveryIsDone(String index) throws Exception { + private static void waitUntilRecoveryIsDone(String index) throws Exception { assertBusy(() -> { RecoveryResponse recoveryResponse = indicesAdmin().prepareRecoveries(index).get(); assertThat(recoveryResponse.hasRecoveries(), equalTo(true)); From 6c2f6071b20633fafc383212331f79146613011b Mon Sep 17 00:00:00 2001 From: Benjamin Trent Date: Mon, 2 Dec 2024 16:04:31 -0500 Subject: [PATCH 113/139] Refactor/bbq format (#117847) * Refactor bbq format to be contained in a package * fixing license headers * fixing module * fix style --- server/src/main/java/module-info.java | 4 ++-- .../{ => es816}/BinarizedByteVectorValues.java | 3 ++- .../codec/vectors/{ => es816}/BinaryQuantizer.java | 4 +++- .../{ => es816}/ES816BinaryFlatVectorsScorer.java | 14 ++++++++------ .../ES816BinaryQuantizedVectorsFormat.java | 2 +- .../ES816BinaryQuantizedVectorsReader.java | 7 ++++--- .../ES816BinaryQuantizedVectorsWriter.java | 10 ++++++---- .../ES816HnswBinaryQuantizedVectorsFormat.java | 2 +- .../{ => es816}/OffHeapBinarizedVectorValues.java | 9 +++++---- .../mapper/vectors/DenseVectorFieldMapper.java | 4 ++-- .../org.apache.lucene.codecs.KnnVectorsFormat | 4 ++-- .../{ => es816}/BinaryQuantizationTests.java | 4 +++- .../ES816BinaryFlatVectorsScorerTests.java | 4 +++- .../ES816BinaryQuantizedVectorsFormatTests.java | 3 ++- ...ES816HnswBinaryQuantizedVectorsFormatTests.java | 2 +- 15 files changed, 45 insertions(+), 31 deletions(-) rename server/src/main/java/org/elasticsearch/index/codec/vectors/{ => es816}/BinarizedByteVectorValues.java (96%) rename server/src/main/java/org/elasticsearch/index/codec/vectors/{ => es816}/BinaryQuantizer.java (98%) rename server/src/main/java/org/elasticsearch/index/codec/vectors/{ => es816}/ES816BinaryFlatVectorsScorer.java (95%) rename server/src/main/java/org/elasticsearch/index/codec/vectors/{ => es816}/ES816BinaryQuantizedVectorsFormat.java (98%) rename server/src/main/java/org/elasticsearch/index/codec/vectors/{ => es816}/ES816BinaryQuantizedVectorsReader.java (98%) rename server/src/main/java/org/elasticsearch/index/codec/vectors/{ => es816}/ES816BinaryQuantizedVectorsWriter.java (98%) rename server/src/main/java/org/elasticsearch/index/codec/vectors/{ => es816}/ES816HnswBinaryQuantizedVectorsFormat.java (99%) rename server/src/main/java/org/elasticsearch/index/codec/vectors/{ => es816}/OffHeapBinarizedVectorValues.java (97%) rename server/src/test/java/org/elasticsearch/index/codec/vectors/{ => es816}/BinaryQuantizationTests.java (99%) rename server/src/test/java/org/elasticsearch/index/codec/vectors/{ => es816}/ES816BinaryFlatVectorsScorerTests.java (99%) rename server/src/test/java/org/elasticsearch/index/codec/vectors/{ => es816}/ES816BinaryQuantizedVectorsFormatTests.java (98%) rename server/src/test/java/org/elasticsearch/index/codec/vectors/{ => es816}/ES816HnswBinaryQuantizedVectorsFormatTests.java (99%) diff --git a/server/src/main/java/module-info.java b/server/src/main/java/module-info.java index d572d3b90fec8..5acc202ebb294 100644 --- a/server/src/main/java/module-info.java +++ b/server/src/main/java/module-info.java @@ -457,8 +457,8 @@ org.elasticsearch.index.codec.vectors.ES814HnswScalarQuantizedVectorsFormat, org.elasticsearch.index.codec.vectors.ES815HnswBitVectorsFormat, org.elasticsearch.index.codec.vectors.ES815BitFlatVectorFormat, - org.elasticsearch.index.codec.vectors.ES816BinaryQuantizedVectorsFormat, - org.elasticsearch.index.codec.vectors.ES816HnswBinaryQuantizedVectorsFormat; + org.elasticsearch.index.codec.vectors.es816.ES816BinaryQuantizedVectorsFormat, + org.elasticsearch.index.codec.vectors.es816.ES816HnswBinaryQuantizedVectorsFormat; provides org.apache.lucene.codecs.Codec with diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/BinarizedByteVectorValues.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/BinarizedByteVectorValues.java similarity index 96% rename from server/src/main/java/org/elasticsearch/index/codec/vectors/BinarizedByteVectorValues.java rename to server/src/main/java/org/elasticsearch/index/codec/vectors/es816/BinarizedByteVectorValues.java index cf69ab0862949..d5f968af3e738 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/BinarizedByteVectorValues.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/BinarizedByteVectorValues.java @@ -17,11 +17,12 @@ * * Modifications copyright (C) 2024 Elasticsearch B.V. */ -package org.elasticsearch.index.codec.vectors; +package org.elasticsearch.index.codec.vectors.es816; import org.apache.lucene.index.ByteVectorValues; import org.apache.lucene.search.VectorScorer; import org.apache.lucene.util.VectorUtil; +import org.elasticsearch.index.codec.vectors.BQVectorUtils; import java.io.IOException; diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/BinaryQuantizer.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/BinaryQuantizer.java similarity index 98% rename from server/src/main/java/org/elasticsearch/index/codec/vectors/BinaryQuantizer.java rename to server/src/main/java/org/elasticsearch/index/codec/vectors/es816/BinaryQuantizer.java index aa72904fe1341..768c6d526e468 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/BinaryQuantizer.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/BinaryQuantizer.java @@ -17,11 +17,13 @@ * * Modifications copyright (C) 2024 Elasticsearch B.V. */ -package org.elasticsearch.index.codec.vectors; +package org.elasticsearch.index.codec.vectors.es816; import org.apache.lucene.index.VectorSimilarityFunction; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.VectorUtil; +import org.elasticsearch.index.codec.vectors.BQSpaceUtils; +import org.elasticsearch.index.codec.vectors.BQVectorUtils; import static org.apache.lucene.index.VectorSimilarityFunction.COSINE; import static org.apache.lucene.index.VectorSimilarityFunction.EUCLIDEAN; diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES816BinaryFlatVectorsScorer.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryFlatVectorsScorer.java similarity index 95% rename from server/src/main/java/org/elasticsearch/index/codec/vectors/ES816BinaryFlatVectorsScorer.java rename to server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryFlatVectorsScorer.java index 72c5da4880e75..445bdadab2354 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES816BinaryFlatVectorsScorer.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryFlatVectorsScorer.java @@ -17,7 +17,7 @@ * * Modifications copyright (C) 2024 Elasticsearch B.V. */ -package org.elasticsearch.index.codec.vectors; +package org.elasticsearch.index.codec.vectors.es816; import org.apache.lucene.codecs.hnsw.FlatVectorsScorer; import org.apache.lucene.index.KnnVectorValues; @@ -26,6 +26,8 @@ import org.apache.lucene.util.VectorUtil; import org.apache.lucene.util.hnsw.RandomVectorScorer; import org.apache.lucene.util.hnsw.RandomVectorScorerSupplier; +import org.elasticsearch.index.codec.vectors.BQSpaceUtils; +import org.elasticsearch.index.codec.vectors.BQVectorUtils; import org.elasticsearch.simdvec.ESVectorUtil; import java.io.IOException; @@ -35,10 +37,10 @@ import static org.apache.lucene.index.VectorSimilarityFunction.MAXIMUM_INNER_PRODUCT; /** Vector scorer over binarized vector values */ -public class ES816BinaryFlatVectorsScorer implements FlatVectorsScorer { +class ES816BinaryFlatVectorsScorer implements FlatVectorsScorer { private final FlatVectorsScorer nonQuantizedDelegate; - public ES816BinaryFlatVectorsScorer(FlatVectorsScorer nonQuantizedDelegate) { + ES816BinaryFlatVectorsScorer(FlatVectorsScorer nonQuantizedDelegate) { this.nonQuantizedDelegate = nonQuantizedDelegate; } @@ -144,10 +146,10 @@ public RandomVectorScorerSupplier copy() throws IOException { } /** A binarized query representing its quantized form along with factors */ - public record BinaryQueryVector(byte[] vector, BinaryQuantizer.QueryFactors factors) {} + record BinaryQueryVector(byte[] vector, BinaryQuantizer.QueryFactors factors) {} /** Vector scorer over binarized vector values */ - public static class BinarizedRandomVectorScorer extends RandomVectorScorer.AbstractRandomVectorScorer { + static class BinarizedRandomVectorScorer extends RandomVectorScorer.AbstractRandomVectorScorer { private final BinaryQueryVector queryVector; private final BinarizedByteVectorValues targetVectors; private final VectorSimilarityFunction similarityFunction; @@ -155,7 +157,7 @@ public static class BinarizedRandomVectorScorer extends RandomVectorScorer.Abstr private final float sqrtDimensions; private final float maxX1; - public BinarizedRandomVectorScorer( + BinarizedRandomVectorScorer( BinaryQueryVector queryVectors, BinarizedByteVectorValues targetVectors, VectorSimilarityFunction similarityFunction diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES816BinaryQuantizedVectorsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsFormat.java similarity index 98% rename from server/src/main/java/org/elasticsearch/index/codec/vectors/ES816BinaryQuantizedVectorsFormat.java rename to server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsFormat.java index e32aea0fb04ae..d864ec5dee8c5 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES816BinaryQuantizedVectorsFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsFormat.java @@ -17,7 +17,7 @@ * * Modifications copyright (C) 2024 Elasticsearch B.V. */ -package org.elasticsearch.index.codec.vectors; +package org.elasticsearch.index.codec.vectors.es816; import org.apache.lucene.codecs.hnsw.FlatVectorScorerUtil; import org.apache.lucene.codecs.hnsw.FlatVectorsFormat; diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES816BinaryQuantizedVectorsReader.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsReader.java similarity index 98% rename from server/src/main/java/org/elasticsearch/index/codec/vectors/ES816BinaryQuantizedVectorsReader.java rename to server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsReader.java index 21c4a5c449387..fc20809ea7eed 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES816BinaryQuantizedVectorsReader.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsReader.java @@ -17,7 +17,7 @@ * * Modifications copyright (C) 2024 Elasticsearch B.V. */ -package org.elasticsearch.index.codec.vectors; +package org.elasticsearch.index.codec.vectors.es816; import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.hnsw.FlatVectorsReader; @@ -43,6 +43,7 @@ import org.apache.lucene.util.SuppressForbidden; import org.apache.lucene.util.hnsw.OrdinalTranslatedKnnCollector; import org.apache.lucene.util.hnsw.RandomVectorScorer; +import org.elasticsearch.index.codec.vectors.BQVectorUtils; import java.io.IOException; import java.util.HashMap; @@ -55,7 +56,7 @@ * Copied from Lucene, replace with Lucene's implementation sometime after Lucene 10 */ @SuppressForbidden(reason = "Lucene classes") -public class ES816BinaryQuantizedVectorsReader extends FlatVectorsReader { +class ES816BinaryQuantizedVectorsReader extends FlatVectorsReader { private static final long SHALLOW_SIZE = RamUsageEstimator.shallowSizeOfInstance(ES816BinaryQuantizedVectorsReader.class); @@ -64,7 +65,7 @@ public class ES816BinaryQuantizedVectorsReader extends FlatVectorsReader { private final FlatVectorsReader rawVectorsReader; private final ES816BinaryFlatVectorsScorer vectorScorer; - public ES816BinaryQuantizedVectorsReader( + ES816BinaryQuantizedVectorsReader( SegmentReadState state, FlatVectorsReader rawVectorsReader, ES816BinaryFlatVectorsScorer vectorsScorer diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES816BinaryQuantizedVectorsWriter.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsWriter.java similarity index 98% rename from server/src/main/java/org/elasticsearch/index/codec/vectors/ES816BinaryQuantizedVectorsWriter.java rename to server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsWriter.java index a7774b850b64c..31ae977e81118 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES816BinaryQuantizedVectorsWriter.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsWriter.java @@ -17,7 +17,7 @@ * * Modifications copyright (C) 2024 Elasticsearch B.V. */ -package org.elasticsearch.index.codec.vectors; +package org.elasticsearch.index.codec.vectors.es816; import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.KnnVectorsReader; @@ -48,6 +48,8 @@ import org.apache.lucene.util.hnsw.RandomVectorScorer; import org.apache.lucene.util.hnsw.RandomVectorScorerSupplier; import org.elasticsearch.core.SuppressForbidden; +import org.elasticsearch.index.codec.vectors.BQSpaceUtils; +import org.elasticsearch.index.codec.vectors.BQVectorUtils; import java.io.Closeable; import java.io.IOException; @@ -61,14 +63,14 @@ import static org.apache.lucene.index.VectorSimilarityFunction.EUCLIDEAN; import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS; import static org.apache.lucene.util.RamUsageEstimator.shallowSizeOfInstance; -import static org.elasticsearch.index.codec.vectors.ES816BinaryQuantizedVectorsFormat.BINARIZED_VECTOR_COMPONENT; -import static org.elasticsearch.index.codec.vectors.ES816BinaryQuantizedVectorsFormat.DIRECT_MONOTONIC_BLOCK_SHIFT; +import static org.elasticsearch.index.codec.vectors.es816.ES816BinaryQuantizedVectorsFormat.BINARIZED_VECTOR_COMPONENT; +import static org.elasticsearch.index.codec.vectors.es816.ES816BinaryQuantizedVectorsFormat.DIRECT_MONOTONIC_BLOCK_SHIFT; /** * Copied from Lucene, replace with Lucene's implementation sometime after Lucene 10 */ @SuppressForbidden(reason = "Lucene classes") -public class ES816BinaryQuantizedVectorsWriter extends FlatVectorsWriter { +class ES816BinaryQuantizedVectorsWriter extends FlatVectorsWriter { private static final long SHALLOW_RAM_BYTES_USED = shallowSizeOfInstance(ES816BinaryQuantizedVectorsWriter.class); private final SegmentWriteState segmentWriteState; diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES816HnswBinaryQuantizedVectorsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816HnswBinaryQuantizedVectorsFormat.java similarity index 99% rename from server/src/main/java/org/elasticsearch/index/codec/vectors/ES816HnswBinaryQuantizedVectorsFormat.java rename to server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816HnswBinaryQuantizedVectorsFormat.java index 097cdffff6ae4..52f9f14b7bf97 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES816HnswBinaryQuantizedVectorsFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816HnswBinaryQuantizedVectorsFormat.java @@ -17,7 +17,7 @@ * * Modifications copyright (C) 2024 Elasticsearch B.V. */ -package org.elasticsearch.index.codec.vectors; +package org.elasticsearch.index.codec.vectors.es816; import org.apache.lucene.codecs.KnnVectorsFormat; import org.apache.lucene.codecs.KnnVectorsReader; diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/OffHeapBinarizedVectorValues.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/OffHeapBinarizedVectorValues.java similarity index 97% rename from server/src/main/java/org/elasticsearch/index/codec/vectors/OffHeapBinarizedVectorValues.java rename to server/src/main/java/org/elasticsearch/index/codec/vectors/es816/OffHeapBinarizedVectorValues.java index e7d818bb752d6..12bf962d314bd 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/OffHeapBinarizedVectorValues.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/OffHeapBinarizedVectorValues.java @@ -17,7 +17,7 @@ * * Modifications copyright (C) 2024 Elasticsearch B.V. */ -package org.elasticsearch.index.codec.vectors; +package org.elasticsearch.index.codec.vectors.es816; import org.apache.lucene.codecs.hnsw.FlatVectorsScorer; import org.apache.lucene.codecs.lucene90.IndexedDISI; @@ -29,6 +29,7 @@ import org.apache.lucene.util.Bits; import org.apache.lucene.util.hnsw.RandomVectorScorer; import org.apache.lucene.util.packed.DirectMonotonicReader; +import org.elasticsearch.index.codec.vectors.BQVectorUtils; import java.io.IOException; import java.nio.ByteBuffer; @@ -37,7 +38,7 @@ import static org.elasticsearch.index.codec.vectors.BQVectorUtils.constSqrt; /** Binarized vector values loaded from off-heap */ -public abstract class OffHeapBinarizedVectorValues extends BinarizedByteVectorValues { +abstract class OffHeapBinarizedVectorValues extends BinarizedByteVectorValues { protected final int dimension; protected final int size; @@ -251,8 +252,8 @@ public static OffHeapBinarizedVectorValues load( } /** Dense off-heap binarized vector values */ - public static class DenseOffHeapVectorValues extends OffHeapBinarizedVectorValues { - public DenseOffHeapVectorValues( + static class DenseOffHeapVectorValues extends OffHeapBinarizedVectorValues { + DenseOffHeapVectorValues( int dimension, int size, float[] centroid, diff --git a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java index dea9368a9377e..0a6a24f727572 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java @@ -46,8 +46,8 @@ import org.elasticsearch.index.codec.vectors.ES814HnswScalarQuantizedVectorsFormat; import org.elasticsearch.index.codec.vectors.ES815BitFlatVectorFormat; import org.elasticsearch.index.codec.vectors.ES815HnswBitVectorsFormat; -import org.elasticsearch.index.codec.vectors.ES816BinaryQuantizedVectorsFormat; -import org.elasticsearch.index.codec.vectors.ES816HnswBinaryQuantizedVectorsFormat; +import org.elasticsearch.index.codec.vectors.es816.ES816BinaryQuantizedVectorsFormat; +import org.elasticsearch.index.codec.vectors.es816.ES816HnswBinaryQuantizedVectorsFormat; import org.elasticsearch.index.fielddata.FieldDataContext; import org.elasticsearch.index.fielddata.IndexFieldData; import org.elasticsearch.index.mapper.ArraySourceValueFetcher; diff --git a/server/src/main/resources/META-INF/services/org.apache.lucene.codecs.KnnVectorsFormat b/server/src/main/resources/META-INF/services/org.apache.lucene.codecs.KnnVectorsFormat index c2201f5b1c319..389555e60b43b 100644 --- a/server/src/main/resources/META-INF/services/org.apache.lucene.codecs.KnnVectorsFormat +++ b/server/src/main/resources/META-INF/services/org.apache.lucene.codecs.KnnVectorsFormat @@ -3,5 +3,5 @@ org.elasticsearch.index.codec.vectors.ES813Int8FlatVectorFormat org.elasticsearch.index.codec.vectors.ES814HnswScalarQuantizedVectorsFormat org.elasticsearch.index.codec.vectors.ES815HnswBitVectorsFormat org.elasticsearch.index.codec.vectors.ES815BitFlatVectorFormat -org.elasticsearch.index.codec.vectors.ES816BinaryQuantizedVectorsFormat -org.elasticsearch.index.codec.vectors.ES816HnswBinaryQuantizedVectorsFormat +org.elasticsearch.index.codec.vectors.es816.ES816BinaryQuantizedVectorsFormat +org.elasticsearch.index.codec.vectors.es816.ES816HnswBinaryQuantizedVectorsFormat diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/BinaryQuantizationTests.java b/server/src/test/java/org/elasticsearch/index/codec/vectors/es816/BinaryQuantizationTests.java similarity index 99% rename from server/src/test/java/org/elasticsearch/index/codec/vectors/BinaryQuantizationTests.java rename to server/src/test/java/org/elasticsearch/index/codec/vectors/es816/BinaryQuantizationTests.java index 32d717bd76f91..205cbb4119dd6 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/vectors/BinaryQuantizationTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/vectors/es816/BinaryQuantizationTests.java @@ -17,11 +17,13 @@ * * Modifications copyright (C) 2024 Elasticsearch B.V. */ -package org.elasticsearch.index.codec.vectors; +package org.elasticsearch.index.codec.vectors.es816; import org.apache.lucene.index.VectorSimilarityFunction; import org.apache.lucene.tests.util.LuceneTestCase; import org.apache.lucene.util.VectorUtil; +import org.elasticsearch.index.codec.vectors.BQSpaceUtils; +import org.elasticsearch.index.codec.vectors.BQVectorUtils; import java.util.Random; diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/ES816BinaryFlatVectorsScorerTests.java b/server/src/test/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryFlatVectorsScorerTests.java similarity index 99% rename from server/src/test/java/org/elasticsearch/index/codec/vectors/ES816BinaryFlatVectorsScorerTests.java rename to server/src/test/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryFlatVectorsScorerTests.java index cef5e5358f3d5..a75b9bc6064d1 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/vectors/ES816BinaryFlatVectorsScorerTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryFlatVectorsScorerTests.java @@ -17,13 +17,15 @@ * * Modifications copyright (C) 2024 Elasticsearch B.V. */ -package org.elasticsearch.index.codec.vectors; +package org.elasticsearch.index.codec.vectors.es816; import org.apache.lucene.index.VectorSimilarityFunction; import org.apache.lucene.search.VectorScorer; import org.apache.lucene.tests.util.LuceneTestCase; import org.apache.lucene.util.VectorUtil; import org.elasticsearch.common.logging.LogConfigurator; +import org.elasticsearch.index.codec.vectors.BQSpaceUtils; +import org.elasticsearch.index.codec.vectors.BQVectorUtils; import java.io.IOException; diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/ES816BinaryQuantizedVectorsFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsFormatTests.java similarity index 98% rename from server/src/test/java/org/elasticsearch/index/codec/vectors/ES816BinaryQuantizedVectorsFormatTests.java rename to server/src/test/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsFormatTests.java index 42f2fbb383ac9..681f615653d40 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/vectors/ES816BinaryQuantizedVectorsFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsFormatTests.java @@ -17,7 +17,7 @@ * * Modifications copyright (C) 2024 Elasticsearch B.V. */ -package org.elasticsearch.index.codec.vectors; +package org.elasticsearch.index.codec.vectors.es816; import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.FilterCodec; @@ -41,6 +41,7 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase; import org.elasticsearch.common.logging.LogConfigurator; +import org.elasticsearch.index.codec.vectors.BQVectorUtils; import java.io.IOException; import java.util.Locale; diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/ES816HnswBinaryQuantizedVectorsFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/vectors/es816/ES816HnswBinaryQuantizedVectorsFormatTests.java similarity index 99% rename from server/src/test/java/org/elasticsearch/index/codec/vectors/ES816HnswBinaryQuantizedVectorsFormatTests.java rename to server/src/test/java/org/elasticsearch/index/codec/vectors/es816/ES816HnswBinaryQuantizedVectorsFormatTests.java index ca96e093b7b28..a25fa2836ee34 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/vectors/ES816HnswBinaryQuantizedVectorsFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/vectors/es816/ES816HnswBinaryQuantizedVectorsFormatTests.java @@ -17,7 +17,7 @@ * * Modifications copyright (C) 2024 Elasticsearch B.V. */ -package org.elasticsearch.index.codec.vectors; +package org.elasticsearch.index.codec.vectors.es816; import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.FilterCodec; From 12be8203d3efd1ed62a838aaa1b379c592a7aaec Mon Sep 17 00:00:00 2001 From: elasticsearchmachine <58790826+elasticsearchmachine@users.noreply.github.com> Date: Tue, 3 Dec 2024 09:31:51 +1100 Subject: [PATCH 114/139] Mute org.elasticsearch.xpack.test.rest.XPackRestIT test {p0=esql/60_usage/Basic ESQL usage output (telemetry) non-snapshot version} #117862 --- muted-tests.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/muted-tests.yml b/muted-tests.yml index 73d9a29e275b3..57db22feba059 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -236,6 +236,9 @@ tests: issue: https://github.com/elastic/elasticsearch/issues/117815 - class: org.elasticsearch.xpack.ml.integration.DatafeedJobsRestIT issue: https://github.com/elastic/elasticsearch/issues/111319 +- class: org.elasticsearch.xpack.test.rest.XPackRestIT + method: test {p0=esql/60_usage/Basic ESQL usage output (telemetry) non-snapshot version} + issue: https://github.com/elastic/elasticsearch/issues/117862 # Examples: # From af7d3f911fbacaa1f4b1be68398cc59cbfdc89e2 Mon Sep 17 00:00:00 2001 From: Nhat Nguyen Date: Mon, 2 Dec 2024 17:57:02 -0800 Subject: [PATCH 115/139] Add cluster level reduction (#117731) This change introduces cluster-level reduction. Unlike data-node-level reduction, it does not require pragmas because the network latency and throughput across clusters differ significantly from those within a cluster. As a result, the benefits of this reduction should outweigh the risks. --- docs/changelog/117731.yaml | 5 ++ .../action/CrossClustersCancellationIT.java | 37 ++++++++++++ .../xpack/esql/planner/PlannerUtils.java | 47 ++++++---------- .../xpack/esql/plugin/ComputeService.java | 56 +++++++------------ 4 files changed, 78 insertions(+), 67 deletions(-) create mode 100644 docs/changelog/117731.yaml diff --git a/docs/changelog/117731.yaml b/docs/changelog/117731.yaml new file mode 100644 index 0000000000000..f69cd5bf31100 --- /dev/null +++ b/docs/changelog/117731.yaml @@ -0,0 +1,5 @@ +pr: 117731 +summary: Add cluster level reduction +area: ES|QL +type: enhancement +issues: [] diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/CrossClustersCancellationIT.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/CrossClustersCancellationIT.java index 5ffc92636b272..f29f79976dc0d 100644 --- a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/CrossClustersCancellationIT.java +++ b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/CrossClustersCancellationIT.java @@ -238,4 +238,41 @@ public void testSameRemoteClusters() throws Exception { } } } + + public void testTasks() throws Exception { + createRemoteIndex(between(10, 100)); + EsqlQueryRequest request = EsqlQueryRequest.syncEsqlQueryRequest(); + request.query("FROM *:test | STATS total=sum(const) | LIMIT 1"); + request.pragmas(randomPragmas()); + ActionFuture requestFuture = client().execute(EsqlQueryAction.INSTANCE, request); + assertTrue(PauseFieldPlugin.startEmitting.await(30, TimeUnit.SECONDS)); + try { + assertBusy(() -> { + List clusterTasks = client(REMOTE_CLUSTER).admin() + .cluster() + .prepareListTasks() + .setActions(ComputeService.CLUSTER_ACTION_NAME) + .get() + .getTasks(); + assertThat(clusterTasks.size(), equalTo(1)); + List drivers = client(REMOTE_CLUSTER).admin() + .cluster() + .prepareListTasks() + .setTargetParentTaskId(clusterTasks.getFirst().taskId()) + .setActions(DriverTaskRunner.ACTION_NAME) + .setDetailed(true) + .get() + .getTasks(); + assertThat(drivers.size(), equalTo(1)); + TaskInfo driver = drivers.getFirst(); + assertThat(driver.description(), equalTo(""" + \\_ExchangeSourceOperator[] + \\_AggregationOperator[mode = INTERMEDIATE, aggs = sum of longs] + \\_ExchangeSinkOperator""")); + }); + } finally { + PauseFieldPlugin.allowEmitting.countDown(); + } + requestFuture.actionGet(30, TimeUnit.SECONDS).close(); + } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/PlannerUtils.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/PlannerUtils.java index c998af2215169..f4ada1442efe5 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/PlannerUtils.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/PlannerUtils.java @@ -29,14 +29,8 @@ import org.elasticsearch.xpack.esql.optimizer.LocalLogicalPlanOptimizer; import org.elasticsearch.xpack.esql.optimizer.LocalPhysicalOptimizerContext; import org.elasticsearch.xpack.esql.optimizer.LocalPhysicalPlanOptimizer; -import org.elasticsearch.xpack.esql.plan.logical.Aggregate; import org.elasticsearch.xpack.esql.plan.logical.EsRelation; import org.elasticsearch.xpack.esql.plan.logical.Filter; -import org.elasticsearch.xpack.esql.plan.logical.Limit; -import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; -import org.elasticsearch.xpack.esql.plan.logical.OrderBy; -import org.elasticsearch.xpack.esql.plan.logical.TopN; -import org.elasticsearch.xpack.esql.plan.logical.UnaryPlan; import org.elasticsearch.xpack.esql.plan.physical.AggregateExec; import org.elasticsearch.xpack.esql.plan.physical.EsSourceExec; import org.elasticsearch.xpack.esql.plan.physical.EstimatesRowSize; @@ -44,10 +38,7 @@ import org.elasticsearch.xpack.esql.plan.physical.ExchangeSinkExec; import org.elasticsearch.xpack.esql.plan.physical.ExchangeSourceExec; import org.elasticsearch.xpack.esql.plan.physical.FragmentExec; -import org.elasticsearch.xpack.esql.plan.physical.LimitExec; -import org.elasticsearch.xpack.esql.plan.physical.OrderExec; import org.elasticsearch.xpack.esql.plan.physical.PhysicalPlan; -import org.elasticsearch.xpack.esql.plan.physical.TopNExec; import org.elasticsearch.xpack.esql.planner.mapper.LocalMapper; import org.elasticsearch.xpack.esql.planner.mapper.Mapper; import org.elasticsearch.xpack.esql.session.Configuration; @@ -83,29 +74,25 @@ public static Tuple breakPlanBetweenCoordinatorAndDa return new Tuple<>(coordinatorPlan, dataNodePlan.get()); } - public static PhysicalPlan dataNodeReductionPlan(LogicalPlan plan, PhysicalPlan unused) { - var pipelineBreakers = plan.collectFirstChildren(Mapper::isPipelineBreaker); + public static PhysicalPlan reductionPlan(PhysicalPlan plan) { + // find the logical fragment + var fragments = plan.collectFirstChildren(p -> p instanceof FragmentExec); + if (fragments.isEmpty()) { + return null; + } + final FragmentExec fragment = (FragmentExec) fragments.getFirst(); - if (pipelineBreakers.isEmpty() == false) { - UnaryPlan pipelineBreaker = (UnaryPlan) pipelineBreakers.get(0); - if (pipelineBreaker instanceof TopN) { - LocalMapper mapper = new LocalMapper(); - var physicalPlan = EstimatesRowSize.estimateRowSize(0, mapper.map(plan)); - return physicalPlan.collectFirstChildren(TopNExec.class::isInstance).get(0); - } else if (pipelineBreaker instanceof Limit limit) { - return new LimitExec(limit.source(), unused, limit.limit()); - } else if (pipelineBreaker instanceof OrderBy order) { - return new OrderExec(order.source(), unused, order.order()); - } else if (pipelineBreaker instanceof Aggregate) { - LocalMapper mapper = new LocalMapper(); - var physicalPlan = EstimatesRowSize.estimateRowSize(0, mapper.map(plan)); - var aggregate = (AggregateExec) physicalPlan.collectFirstChildren(AggregateExec.class::isInstance).get(0); - return aggregate.withMode(AggregatorMode.INITIAL); - } else { - throw new EsqlIllegalArgumentException("unsupported unary physical plan node [" + pipelineBreaker.nodeName() + "]"); - } + final var pipelineBreakers = fragment.fragment().collectFirstChildren(Mapper::isPipelineBreaker); + if (pipelineBreakers.isEmpty()) { + return null; + } + final var pipelineBreaker = pipelineBreakers.getFirst(); + final LocalMapper mapper = new LocalMapper(); + PhysicalPlan reducePlan = mapper.map(pipelineBreaker); + if (reducePlan instanceof AggregateExec agg) { + reducePlan = agg.withMode(AggregatorMode.INITIAL); // force to emit intermediate outputs } - return null; + return EstimatesRowSize.estimateRowSize(fragment.estimatedRowSize(), reducePlan); } /** diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/ComputeService.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/ComputeService.java index b06dd3cdb64d3..9aea1577a4137 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/ComputeService.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/ComputeService.java @@ -60,12 +60,10 @@ import org.elasticsearch.xpack.esql.action.EsqlQueryAction; import org.elasticsearch.xpack.esql.action.EsqlSearchShardsAction; import org.elasticsearch.xpack.esql.core.expression.Attribute; -import org.elasticsearch.xpack.esql.core.util.Holder; import org.elasticsearch.xpack.esql.enrich.EnrichLookupService; import org.elasticsearch.xpack.esql.enrich.LookupFromIndexService; import org.elasticsearch.xpack.esql.plan.physical.ExchangeSinkExec; import org.elasticsearch.xpack.esql.plan.physical.ExchangeSourceExec; -import org.elasticsearch.xpack.esql.plan.physical.FragmentExec; import org.elasticsearch.xpack.esql.plan.physical.OutputExec; import org.elasticsearch.xpack.esql.plan.physical.PhysicalPlan; import org.elasticsearch.xpack.esql.planner.EsPhysicalOperationProviders; @@ -780,35 +778,24 @@ private void runComputeOnDataNode( } } + private static PhysicalPlan reductionPlan(ExchangeSinkExec plan, boolean enable) { + PhysicalPlan reducePlan = new ExchangeSourceExec(plan.source(), plan.output(), plan.isIntermediateAgg()); + if (enable) { + PhysicalPlan p = PlannerUtils.reductionPlan(plan); + if (p != null) { + reducePlan = p.replaceChildren(List.of(reducePlan)); + } + } + return new ExchangeSinkExec(plan.source(), plan.output(), plan.isIntermediateAgg(), reducePlan); + } + private class DataNodeRequestHandler implements TransportRequestHandler { @Override public void messageReceived(DataNodeRequest request, TransportChannel channel, Task task) { final ActionListener listener = new ChannelActionListener<>(channel); - final ExchangeSinkExec reducePlan; + final PhysicalPlan reductionPlan; if (request.plan() instanceof ExchangeSinkExec plan) { - var fragments = plan.collectFirstChildren(FragmentExec.class::isInstance); - if (fragments.isEmpty()) { - listener.onFailure(new IllegalStateException("expected a fragment plan for a remote compute; got " + request.plan())); - return; - } - var localExchangeSource = new ExchangeSourceExec(plan.source(), plan.output(), plan.isIntermediateAgg()); - Holder reducePlanHolder = new Holder<>(); - if (request.pragmas().nodeLevelReduction()) { - PhysicalPlan dataNodePlan = request.plan(); - request.plan() - .forEachUp( - FragmentExec.class, - f -> { reducePlanHolder.set(PlannerUtils.dataNodeReductionPlan(f.fragment(), dataNodePlan)); } - ); - } - reducePlan = new ExchangeSinkExec( - plan.source(), - plan.output(), - plan.isIntermediateAgg(), - reducePlanHolder.get() != null - ? reducePlanHolder.get().replaceChildren(List.of(localExchangeSource)) - : localExchangeSource - ); + reductionPlan = reductionPlan(plan, request.pragmas().nodeLevelReduction()); } else { listener.onFailure(new IllegalStateException("expected exchange sink for a remote compute; got " + request.plan())); return; @@ -825,7 +812,7 @@ public void messageReceived(DataNodeRequest request, TransportChannel channel, T request.indicesOptions() ); try (var computeListener = ComputeListener.create(transportService, (CancellableTask) task, listener)) { - runComputeOnDataNode((CancellableTask) task, sessionId, reducePlan, request, computeListener); + runComputeOnDataNode((CancellableTask) task, sessionId, reductionPlan, request, computeListener); } } } @@ -871,10 +858,10 @@ public void messageReceived(ClusterComputeRequest request, TransportChannel chan * Performs a compute on a remote cluster. The output pages are placed in an exchange sink specified by * {@code globalSessionId}. The coordinator on the main cluster will poll pages from there. *

- * Currently, the coordinator on the remote cluster simply collects pages from data nodes in the remote cluster - * and places them in the exchange sink. We can achieve this by using a single exchange buffer to minimize overhead. - * However, here we use two exchange buffers so that we can run an actual plan on this coordinator to perform partial - * reduce operations, such as limit, topN, and partial-to-partial aggregation in the future. + * Currently, the coordinator on the remote cluster polls pages from data nodes within the remote cluster + * and performs cluster-level reduction before sending pages to the querying cluster. This reduction aims + * to minimize data transfers across clusters but may require additional CPU resources for operations like + * aggregations. */ void runComputeOnRemoteCluster( String clusterAlias, @@ -892,6 +879,7 @@ void runComputeOnRemoteCluster( () -> exchangeService.finishSinkHandler(globalSessionId, new TaskCancelledException(parentTask.getReasonCancelled())) ); final String localSessionId = clusterAlias + ":" + globalSessionId; + final PhysicalPlan coordinatorPlan = reductionPlan(plan, true); var exchangeSource = new ExchangeSourceHandler( configuration.pragmas().exchangeBufferSize(), transportService.getThreadPool().executor(ThreadPool.Names.SEARCH), @@ -899,12 +887,6 @@ void runComputeOnRemoteCluster( ); try (Releasable ignored = exchangeSource.addEmptySink()) { exchangeSink.addCompletionListener(computeListener.acquireAvoid()); - PhysicalPlan coordinatorPlan = new ExchangeSinkExec( - plan.source(), - plan.output(), - plan.isIntermediateAgg(), - new ExchangeSourceExec(plan.source(), plan.output(), plan.isIntermediateAgg()) - ); runCompute( parentTask, new ComputeContext(localSessionId, clusterAlias, List.of(), configuration, exchangeSource, exchangeSink), From 4a9f632fab7571e198f5030dd30acc80c436c58b Mon Sep 17 00:00:00 2001 From: Nhat Nguyen Date: Mon, 2 Dec 2024 19:53:08 -0800 Subject: [PATCH 116/139] By pass cancellation when closing sinks (#117797) > **java.lang.AssertionError: Leftover exchanges ExchangeService{sinks=[veZSyrPATq2Sg83dtgK3Jg:700/3]} on node node_s4** I looked into the test failure described in https://github.com/elastic/elasticsearch/issues/117253. The reason we don't clean up the exchange sink quickly is that, once a failure occurs, we cancel the request along with all its child requests. These exchange sinks will be cleaned up only after they become inactive, which by default takes 5 minutes. We could override the `esql.exchange.sink_inactive_interval` setting in the test to remove these exchange sinks faster. However, I think we should allow exchange requests that close exchange sinks to bypass cancellation, enabling quicker resource cleanup than the default inactive interval. Closes #117253 --- .../operator/exchange/ExchangeRequest.java | 17 ++++++-- .../operator/exchange/ExchangeService.java | 43 ++++++++++--------- .../exchange/ExchangeSourceHandler.java | 8 ++-- .../compute/operator/exchange/RemoteSink.java | 10 +++++ .../exchange/ExchangeRequestTests.java | 27 ++++++++++++ .../exchange/ExchangeServiceTests.java | 6 ++- 6 files changed, 82 insertions(+), 29 deletions(-) create mode 100644 x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/exchange/ExchangeRequestTests.java diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/exchange/ExchangeRequest.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/exchange/ExchangeRequest.java index 6ed2cc7e587be..1e8700bcd4030 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/exchange/ExchangeRequest.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/exchange/ExchangeRequest.java @@ -40,6 +40,17 @@ public void writeTo(StreamOutput out) throws IOException { out.writeBoolean(sourcesFinished); } + @Override + public TaskId getParentTask() { + // Exchange requests with `sourcesFinished=true` complete the remote sink and return without blocking. + // Masking the parent task allows these requests to bypass task cancellation, ensuring cleanup of the remote sink. + // TODO: Maybe add a separate action/request for closing exchange sinks? + if (sourcesFinished) { + return TaskId.EMPTY_TASK_ID; + } + return super.getParentTask(); + } + /** * True if the {@link ExchangeSourceHandler} has enough input. * The corresponding {@link ExchangeSinkHandler} can drain pages and finish itself. @@ -70,9 +81,9 @@ public int hashCode() { @Override public Task createTask(long id, String type, String action, TaskId parentTaskId, Map headers) { - if (parentTaskId.isSet() == false) { - assert false : "ExchangeRequest must have a parent task"; - throw new IllegalStateException("ExchangeRequest must have a parent task"); + if (sourcesFinished == false && parentTaskId.isSet() == false) { + assert false : "ExchangeRequest with sourcesFinished=false must have a parent task"; + throw new IllegalStateException("ExchangeRequest with sourcesFinished=false must have a parent task"); } return new CancellableTask(id, type, action, "", parentTaskId, headers) { @Override diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/exchange/ExchangeService.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/exchange/ExchangeService.java index a943a90d02e87..00c68c4f48e86 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/exchange/ExchangeService.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/exchange/ExchangeService.java @@ -314,28 +314,20 @@ static final class TransportRemoteSink implements RemoteSink { @Override public void fetchPageAsync(boolean allSourcesFinished, ActionListener listener) { if (allSourcesFinished) { - if (finished.compareAndSet(false, true)) { - doFetchPageAsync(true, listener); - } else { - // already finished or promised - listener.onResponse(new ExchangeResponse(blockFactory, null, true)); - } - } else { - // already finished - if (finished.get()) { - listener.onResponse(new ExchangeResponse(blockFactory, null, true)); - return; - } - doFetchPageAsync(false, ActionListener.wrap(r -> { - if (r.finished()) { - finished.set(true); - } - listener.onResponse(r); - }, e -> { - finished.set(true); - listener.onFailure(e); - })); + close(listener.map(unused -> new ExchangeResponse(blockFactory, null, true))); + return; + } + // already finished + if (finished.get()) { + listener.onResponse(new ExchangeResponse(blockFactory, null, true)); + return; } + doFetchPageAsync(false, ActionListener.wrap(r -> { + if (r.finished()) { + finished.set(true); + } + listener.onResponse(r); + }, e -> close(ActionListener.running(() -> listener.onFailure(e))))); } private void doFetchPageAsync(boolean allSourcesFinished, ActionListener listener) { @@ -361,6 +353,15 @@ private void doFetchPageAsync(boolean allSourcesFinished, ActionListener listener) { + if (finished.compareAndSet(false, true)) { + doFetchPageAsync(true, listener.delegateFailure((l, unused) -> l.onResponse(null))); + } else { + listener.onResponse(null); + } + } } // For testing diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/exchange/ExchangeSourceHandler.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/exchange/ExchangeSourceHandler.java index 61b3386ce0274..375016a5d51d5 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/exchange/ExchangeSourceHandler.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/exchange/ExchangeSourceHandler.java @@ -224,8 +224,10 @@ void onSinkFailed(Exception e) { buffer.waitForReading().listener().onResponse(null); // resume the Driver if it is being blocked on reading if (finished == false) { finished = true; - outstandingSinks.finishInstance(); - completionListener.onFailure(e); + remoteSink.close(ActionListener.running(() -> { + outstandingSinks.finishInstance(); + completionListener.onFailure(e); + })); } } @@ -262,7 +264,7 @@ public void onFailure(Exception e) { failure.unwrapAndCollect(e); } buffer.waitForReading().listener().onResponse(null); // resume the Driver if it is being blocked on reading - sinkListener.onFailure(e); + remoteSink.close(ActionListener.running(() -> sinkListener.onFailure(e))); } @Override diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/exchange/RemoteSink.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/exchange/RemoteSink.java index 7d81cd3f66600..aaa937ef17c0e 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/exchange/RemoteSink.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/operator/exchange/RemoteSink.java @@ -12,4 +12,14 @@ public interface RemoteSink { void fetchPageAsync(boolean allSourcesFinished, ActionListener listener); + + default void close(ActionListener listener) { + fetchPageAsync(true, listener.delegateFailure((l, r) -> { + try { + r.close(); + } finally { + l.onResponse(null); + } + })); + } } diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/exchange/ExchangeRequestTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/exchange/ExchangeRequestTests.java new file mode 100644 index 0000000000000..8a0891651a497 --- /dev/null +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/exchange/ExchangeRequestTests.java @@ -0,0 +1,27 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.compute.operator.exchange; + +import org.elasticsearch.tasks.TaskId; +import org.elasticsearch.test.ESTestCase; + +import static org.hamcrest.Matchers.equalTo; + +public class ExchangeRequestTests extends ESTestCase { + + public void testParentTask() { + ExchangeRequest r1 = new ExchangeRequest("1", true); + r1.setParentTask(new TaskId("node-1", 1)); + assertSame(TaskId.EMPTY_TASK_ID, r1.getParentTask()); + + ExchangeRequest r2 = new ExchangeRequest("1", false); + r2.setParentTask(new TaskId("node-2", 2)); + assertTrue(r2.getParentTask().isSet()); + assertThat(r2.getParentTask(), equalTo((new TaskId("node-2", 2)))); + } +} diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/exchange/ExchangeServiceTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/exchange/ExchangeServiceTests.java index 4178f02898d79..fc6c850ba187b 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/exchange/ExchangeServiceTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/operator/exchange/ExchangeServiceTests.java @@ -491,7 +491,7 @@ public void testConcurrentWithTransportActions() { } } - public void testFailToRespondPage() { + public void testFailToRespondPage() throws Exception { Settings settings = Settings.builder().build(); MockTransportService node0 = newTransportService(); ExchangeService exchange0 = new ExchangeService(settings, threadPool, ESQL_TEST_EXECUTOR, blockFactory()); @@ -558,7 +558,9 @@ public void sendResponse(TransportResponse transportResponse) { Throwable cause = ExceptionsHelper.unwrap(err, IOException.class); assertNotNull(cause); assertThat(cause.getMessage(), equalTo("page is too large")); - sinkHandler.onFailure(new RuntimeException(cause)); + PlainActionFuture sinkCompletionFuture = new PlainActionFuture<>(); + sinkHandler.addCompletionListener(sinkCompletionFuture); + assertBusy(() -> assertTrue(sinkCompletionFuture.isDone())); expectThrows(Exception.class, () -> sourceCompletionFuture.actionGet(10, TimeUnit.SECONDS)); } } From af9a57ec66770530cf45aefd842e86a810b13947 Mon Sep 17 00:00:00 2001 From: Ignacio Vera Date: Tue, 3 Dec 2024 07:18:44 +0100 Subject: [PATCH 117/139] Remove supersetSize and subsetSize from InternalSignificantTerms.Bucket (#117574) Those fields are only used to update the score and not serialized in the bucket so they can be removed. --- .../SignificantTermsSignificanceScoreIT.java | 2 +- .../GlobalOrdinalsStringTermsAggregator.java | 9 +-- .../terms/InternalMappedSignificantTerms.java | 6 +- .../terms/InternalSignificantTerms.java | 50 ++------------ .../terms/MapStringTermsAggregator.java | 64 +++++++++-------- .../bucket/terms/NumericTermsAggregator.java | 69 ++++++++++--------- .../bucket/terms/SignificantLongTerms.java | 39 ++--------- .../bucket/terms/SignificantStringTerms.java | 30 ++------ .../bucket/terms/SignificantTerms.java | 24 +++---- .../terms/UnmappedSignificantTerms.java | 25 ++----- .../InternalSignificantTermsTestCase.java | 2 - .../terms/SignificantLongTermsTests.java | 15 +--- .../terms/SignificantStringTermsTests.java | 15 +--- ...AbstractSignificanceHeuristicTestCase.java | 39 +++-------- 14 files changed, 127 insertions(+), 262 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/search/aggregations/bucket/SignificantTermsSignificanceScoreIT.java b/server/src/internalClusterTest/java/org/elasticsearch/search/aggregations/bucket/SignificantTermsSignificanceScoreIT.java index bf11c1d69bcc6..671f60e2b9d5e 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/search/aggregations/bucket/SignificantTermsSignificanceScoreIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/search/aggregations/bucket/SignificantTermsSignificanceScoreIT.java @@ -495,7 +495,7 @@ public void testScriptScore() throws ExecutionException, InterruptedException, I for (SignificantTerms.Bucket bucket : sigTerms.getBuckets()) { assertThat( bucket.getSignificanceScore(), - is((double) bucket.getSubsetDf() + bucket.getSubsetSize() + bucket.getSupersetDf() + bucket.getSupersetSize()) + is((double) bucket.getSubsetDf() + sigTerms.getSubsetSize() + bucket.getSupersetDf() + sigTerms.getSupersetSize()) ); } } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java index 5a79155d1d4f5..4cf710232c7a0 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java @@ -989,7 +989,7 @@ SignificantStringTerms.Bucket[] buildBuckets(int size) { @Override SignificantStringTerms.Bucket buildEmptyTemporaryBucket() { - return new SignificantStringTerms.Bucket(new BytesRef(), 0, 0, 0, 0, null, format, 0); + return new SignificantStringTerms.Bucket(new BytesRef(), 0, 0, null, format, 0); } private long subsetSize(long owningBucketOrd) { @@ -998,22 +998,19 @@ private long subsetSize(long owningBucketOrd) { } @Override - BucketUpdater bucketUpdater(long owningBucketOrd, GlobalOrdLookupFunction lookupGlobalOrd) - throws IOException { + BucketUpdater bucketUpdater(long owningBucketOrd, GlobalOrdLookupFunction lookupGlobalOrd) { long subsetSize = subsetSize(owningBucketOrd); return (spare, globalOrd, bucketOrd, docCount) -> { spare.bucketOrd = bucketOrd; oversizedCopy(lookupGlobalOrd.apply(globalOrd), spare.termBytes); spare.subsetDf = docCount; - spare.subsetSize = subsetSize; spare.supersetDf = backgroundFrequencies.freq(spare.termBytes); - spare.supersetSize = supersetSize; /* * During shard-local down-selection we use subset/superset stats * that are for this shard only. Back at the central reducer these * properties will be updated with global stats. */ - spare.updateScore(significanceHeuristic); + spare.updateScore(significanceHeuristic, subsetSize, supersetSize); }; } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalMappedSignificantTerms.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalMappedSignificantTerms.java index 3f75a27306ab4..8c6d21cc74119 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalMappedSignificantTerms.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalMappedSignificantTerms.java @@ -59,7 +59,7 @@ protected InternalMappedSignificantTerms(StreamInput in, Bucket.Reader bucket subsetSize = in.readVLong(); supersetSize = in.readVLong(); significanceHeuristic = in.readNamedWriteable(SignificanceHeuristic.class); - buckets = in.readCollectionAsList(stream -> bucketReader.read(stream, subsetSize, supersetSize, format)); + buckets = in.readCollectionAsList(stream -> bucketReader.read(stream, format)); } @Override @@ -91,12 +91,12 @@ public B getBucketByKey(String term) { } @Override - protected long getSubsetSize() { + public long getSubsetSize() { return subsetSize; } @Override - protected long getSupersetSize() { + public long getSupersetSize() { return supersetSize; } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalSignificantTerms.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalSignificantTerms.java index 6c0eb465d1f80..78ae2481f5d99 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalSignificantTerms.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalSignificantTerms.java @@ -53,13 +53,11 @@ public abstract static class Bucket> extends InternalMultiBu */ @FunctionalInterface public interface Reader> { - B read(StreamInput in, long subsetSize, long supersetSize, DocValueFormat format) throws IOException; + B read(StreamInput in, DocValueFormat format) throws IOException; } long subsetDf; - long subsetSize; long supersetDf; - long supersetSize; /** * Ordinal of the bucket while it is being built. Not used after it is * returned from {@link Aggregator#buildAggregations(org.elasticsearch.common.util.LongArray)} and not @@ -70,16 +68,7 @@ public interface Reader> { protected InternalAggregations aggregations; final transient DocValueFormat format; - protected Bucket( - long subsetDf, - long subsetSize, - long supersetDf, - long supersetSize, - InternalAggregations aggregations, - DocValueFormat format - ) { - this.subsetSize = subsetSize; - this.supersetSize = supersetSize; + protected Bucket(long subsetDf, long supersetDf, InternalAggregations aggregations, DocValueFormat format) { this.subsetDf = subsetDf; this.supersetDf = supersetDf; this.aggregations = aggregations; @@ -89,9 +78,7 @@ protected Bucket( /** * Read from a stream. */ - protected Bucket(StreamInput in, long subsetSize, long supersetSize, DocValueFormat format) { - this.subsetSize = subsetSize; - this.supersetSize = supersetSize; + protected Bucket(StreamInput in, DocValueFormat format) { this.format = format; } @@ -105,20 +92,10 @@ public long getSupersetDf() { return supersetDf; } - @Override - public long getSupersetSize() { - return supersetSize; - } - - @Override - public long getSubsetSize() { - return subsetSize; - } - // TODO we should refactor to remove this, since buckets should be immutable after they are generated. // This can lead to confusing bugs if the bucket is re-created (via createBucket() or similar) without // the score - void updateScore(SignificanceHeuristic significanceHeuristic) { + void updateScore(SignificanceHeuristic significanceHeuristic, long subsetSize, long supersetSize) { score = significanceHeuristic.getScore(subsetDf, subsetSize, supersetDf, supersetSize); } @@ -262,13 +239,11 @@ public InternalAggregation get() { buckets.forEach(entry -> { final B b = createBucket( entry.value.subsetDf[0], - globalSubsetSize, entry.value.supersetDf[0], - globalSupersetSize, entry.value.reducer.getAggregations(), entry.value.reducer.getProto() ); - b.updateScore(heuristic); + b.updateScore(heuristic, globalSubsetSize, globalSupersetSize); if (((b.score > 0) && (b.subsetDf >= minDocCount)) || reduceContext.isFinalReduce() == false) { final B removed = ordered.insertWithOverflow(b); if (removed == null) { @@ -317,9 +292,7 @@ public InternalAggregation finalizeSampling(SamplingContext samplingContext) { .map( b -> createBucket( samplingContext.scaleUp(b.subsetDf), - subsetSize, samplingContext.scaleUp(b.supersetDf), - supersetSize, InternalAggregations.finalizeSampling(b.aggregations, samplingContext), b ) @@ -328,14 +301,7 @@ public InternalAggregation finalizeSampling(SamplingContext samplingContext) { ); } - abstract B createBucket( - long subsetDf, - long subsetSize, - long supersetDf, - long supersetSize, - InternalAggregations aggregations, - B prototype - ); + abstract B createBucket(long subsetDf, long supersetDf, InternalAggregations aggregations, B prototype); protected abstract A create(long subsetSize, long supersetSize, List buckets); @@ -344,10 +310,6 @@ abstract B createBucket( */ protected abstract B[] createBucketsArray(int size); - protected abstract long getSubsetSize(); - - protected abstract long getSupersetSize(); - protected abstract SignificanceHeuristic getSignificanceHeuristic(); @Override diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/MapStringTermsAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/MapStringTermsAggregator.java index 6ae47d5975479..b96c495d37489 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/MapStringTermsAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/MapStringTermsAggregator.java @@ -47,7 +47,6 @@ import java.util.function.BiConsumer; import java.util.function.Function; import java.util.function.LongConsumer; -import java.util.function.Supplier; import static org.elasticsearch.search.aggregations.InternalOrder.isKeyOrder; @@ -296,7 +295,7 @@ private InternalAggregation[] buildAggregations(LongArray owningBucketOrds) thro try (ObjectArrayPriorityQueue ordered = buildPriorityQueue(size)) { B spare = null; BytesKeyedBucketOrds.BucketOrdsEnum ordsEnum = bucketOrds.ordsEnum(owningOrd); - Supplier emptyBucketBuilder = emptyBucketBuilder(owningOrd); + BucketUpdater bucketUpdater = bucketUpdater(owningOrd); while (ordsEnum.next()) { long docCount = bucketDocCount(ordsEnum.ord()); otherDocCounts.increment(ordIdx, docCount); @@ -305,9 +304,9 @@ private InternalAggregation[] buildAggregations(LongArray owningBucketOrds) thro } if (spare == null) { checkRealMemoryCBForInternalBucket(); - spare = emptyBucketBuilder.get(); + spare = buildEmptyBucket(); } - updateBucket(spare, ordsEnum, docCount); + bucketUpdater.updateBucket(spare, ordsEnum, docCount); spare = ordered.insertWithOverflow(spare); } @@ -348,9 +347,9 @@ private InternalAggregation[] buildAggregations(LongArray owningBucketOrds) thro abstract void collectZeroDocEntriesIfNeeded(long owningBucketOrd, boolean excludeDeletedDocs) throws IOException; /** - * Build an empty temporary bucket. + * Build an empty bucket. */ - abstract Supplier emptyBucketBuilder(long owningBucketOrd); + abstract B buildEmptyBucket(); /** * Build a {@link PriorityQueue} to sort the buckets. After we've @@ -362,7 +361,7 @@ private InternalAggregation[] buildAggregations(LongArray owningBucketOrds) thro * Update fields in {@code spare} to reflect information collected for * this bucket ordinal. */ - abstract void updateBucket(B spare, BytesKeyedBucketOrds.BucketOrdsEnum ordsEnum, long docCount) throws IOException; + abstract BucketUpdater bucketUpdater(long owningBucketOrd); /** * Build an array to hold the "top" buckets for each ordinal. @@ -399,6 +398,10 @@ private InternalAggregation[] buildAggregations(LongArray owningBucketOrds) thro abstract R buildEmptyResult(); } + interface BucketUpdater { + void updateBucket(B spare, BytesKeyedBucketOrds.BucketOrdsEnum ordsEnum, long docCount) throws IOException; + } + /** * Builds results for the standard {@code terms} aggregation. */ @@ -490,8 +493,8 @@ private void collectZeroDocEntries(BinaryDocValues values, Bits liveDocs, int ma } @Override - Supplier emptyBucketBuilder(long owningBucketOrd) { - return () -> new StringTerms.Bucket(new BytesRef(), 0, null, showTermDocCountError, 0, format); + StringTerms.Bucket buildEmptyBucket() { + return new StringTerms.Bucket(new BytesRef(), 0, null, showTermDocCountError, 0, format); } @Override @@ -500,10 +503,12 @@ ObjectArrayPriorityQueue buildPriorityQueue(int size) { } @Override - void updateBucket(StringTerms.Bucket spare, BytesKeyedBucketOrds.BucketOrdsEnum ordsEnum, long docCount) throws IOException { - ordsEnum.readValue(spare.termBytes); - spare.docCount = docCount; - spare.bucketOrd = ordsEnum.ord(); + BucketUpdater bucketUpdater(long owningBucketOrd) { + return (spare, ordsEnum, docCount) -> { + ordsEnum.readValue(spare.termBytes); + spare.docCount = docCount; + spare.bucketOrd = ordsEnum.ord(); + }; } @Override @@ -615,9 +620,8 @@ public void collect(int doc, long owningBucketOrd) throws IOException { void collectZeroDocEntriesIfNeeded(long owningBucketOrd, boolean excludeDeletedDocs) throws IOException {} @Override - Supplier emptyBucketBuilder(long owningBucketOrd) { - long subsetSize = subsetSizes.get(owningBucketOrd); - return () -> new SignificantStringTerms.Bucket(new BytesRef(), 0, subsetSize, 0, 0, null, format, 0); + SignificantStringTerms.Bucket buildEmptyBucket() { + return new SignificantStringTerms.Bucket(new BytesRef(), 0, 0, null, format, 0); } @Override @@ -626,20 +630,20 @@ ObjectArrayPriorityQueue buildPriorityQueue(int s } @Override - void updateBucket(SignificantStringTerms.Bucket spare, BytesKeyedBucketOrds.BucketOrdsEnum ordsEnum, long docCount) - throws IOException { - - ordsEnum.readValue(spare.termBytes); - spare.bucketOrd = ordsEnum.ord(); - spare.subsetDf = docCount; - spare.supersetDf = backgroundFrequencies.freq(spare.termBytes); - spare.supersetSize = supersetSize; - /* - * During shard-local down-selection we use subset/superset stats - * that are for this shard only. Back at the central reducer these - * properties will be updated with global stats. - */ - spare.updateScore(significanceHeuristic); + BucketUpdater bucketUpdater(long owningBucketOrd) { + long subsetSize = subsetSizes.get(owningBucketOrd); + return (spare, ordsEnum, docCount) -> { + ordsEnum.readValue(spare.termBytes); + spare.bucketOrd = ordsEnum.ord(); + spare.subsetDf = docCount; + spare.supersetDf = backgroundFrequencies.freq(spare.termBytes); + /* + * During shard-local down-selection we use subset/superset stats + * that are for this shard only. Back at the central reducer these + * properties will be updated with global stats. + */ + spare.updateScore(significanceHeuristic, subsetSize, supersetSize); + }; } @Override diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/NumericTermsAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/NumericTermsAggregator.java index ce89b95b76a05..5d4c15d8a3b80 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/NumericTermsAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/NumericTermsAggregator.java @@ -43,7 +43,6 @@ import java.util.Map; import java.util.function.BiConsumer; import java.util.function.Function; -import java.util.function.Supplier; import static java.util.Collections.emptyList; import static org.elasticsearch.search.aggregations.InternalOrder.isKeyOrder; @@ -177,7 +176,7 @@ private InternalAggregation[] buildAggregations(LongArray owningBucketOrds) thro try (ObjectArrayPriorityQueue ordered = buildPriorityQueue(size)) { B spare = null; BucketOrdsEnum ordsEnum = bucketOrds.ordsEnum(owningBucketOrd); - Supplier emptyBucketBuilder = emptyBucketBuilder(owningBucketOrd); + BucketUpdater bucketUpdater = bucketUpdater(owningBucketOrd); while (ordsEnum.next()) { long docCount = bucketDocCount(ordsEnum.ord()); otherDocCounts.increment(ordIdx, docCount); @@ -186,9 +185,9 @@ private InternalAggregation[] buildAggregations(LongArray owningBucketOrds) thro } if (spare == null) { checkRealMemoryCBForInternalBucket(); - spare = emptyBucketBuilder.get(); + spare = buildEmptyBucket(); } - updateBucket(spare, ordsEnum, docCount); + bucketUpdater.updateBucket(spare, ordsEnum, docCount); spare = ordered.insertWithOverflow(spare); } @@ -240,17 +239,16 @@ private InternalAggregation[] buildAggregations(LongArray owningBucketOrds) thro abstract B[] buildBuckets(int size); /** - * Build a {@linkplain Supplier} that can be used to build "empty" - * buckets. Those buckets will then be {@link #updateBucket updated} + * Build an empty bucket. Those buckets will then be {@link #bucketUpdater(long)} updated} * for each collected bucket. */ - abstract Supplier emptyBucketBuilder(long owningBucketOrd); + abstract B buildEmptyBucket(); /** * Update fields in {@code spare} to reflect information collected for * this bucket ordinal. */ - abstract void updateBucket(B spare, BucketOrdsEnum ordsEnum, long docCount) throws IOException; + abstract BucketUpdater bucketUpdater(long owningBucketOrd); /** * Build a {@link ObjectArrayPriorityQueue} to sort the buckets. After we've @@ -282,6 +280,10 @@ private InternalAggregation[] buildAggregations(LongArray owningBucketOrds) thro abstract R buildEmptyResult(); } + interface BucketUpdater { + void updateBucket(B spare, BucketOrdsEnum ordsEnum, long docCount) throws IOException; + } + abstract class StandardTermsResultStrategy, B extends InternalTerms.Bucket> extends ResultStrategy { protected final boolean showTermDocCountError; @@ -305,13 +307,6 @@ final void buildSubAggs(ObjectArray topBucketsPerOrd) throws IOException { buildSubAggsForAllBuckets(topBucketsPerOrd, b -> b.bucketOrd, (b, aggs) -> b.aggregations = aggs); } - @Override - Supplier emptyBucketBuilder(long owningBucketOrd) { - return this::buildEmptyBucket; - } - - abstract B buildEmptyBucket(); - @Override final void collectZeroDocEntriesIfNeeded(long owningBucketOrd, boolean excludeDeletedDocs) throws IOException { if (bucketCountThresholds.getMinDocCount() != 0) { @@ -375,10 +370,12 @@ LongTerms.Bucket buildEmptyBucket() { } @Override - void updateBucket(LongTerms.Bucket spare, BucketOrdsEnum ordsEnum, long docCount) { - spare.term = ordsEnum.value(); - spare.docCount = docCount; - spare.bucketOrd = ordsEnum.ord(); + BucketUpdater bucketUpdater(long owningBucketOrd) { + return (LongTerms.Bucket spare, BucketOrdsEnum ordsEnum, long docCount) -> { + spare.term = ordsEnum.value(); + spare.docCount = docCount; + spare.bucketOrd = ordsEnum.ord(); + }; } @Override @@ -457,10 +454,12 @@ DoubleTerms.Bucket buildEmptyBucket() { } @Override - void updateBucket(DoubleTerms.Bucket spare, BucketOrdsEnum ordsEnum, long docCount) { - spare.term = NumericUtils.sortableLongToDouble(ordsEnum.value()); - spare.docCount = docCount; - spare.bucketOrd = ordsEnum.ord(); + BucketUpdater bucketUpdater(long owningBucketOrd) { + return (DoubleTerms.Bucket spare, BucketOrdsEnum ordsEnum, long docCount) -> { + spare.term = NumericUtils.sortableLongToDouble(ordsEnum.value()); + spare.docCount = docCount; + spare.bucketOrd = ordsEnum.ord(); + }; } @Override @@ -565,20 +564,22 @@ SignificantLongTerms.Bucket[] buildBuckets(int size) { } @Override - Supplier emptyBucketBuilder(long owningBucketOrd) { - long subsetSize = subsetSizes.get(owningBucketOrd); - return () -> new SignificantLongTerms.Bucket(0, subsetSize, 0, supersetSize, 0, null, format, 0); + SignificantLongTerms.Bucket buildEmptyBucket() { + return new SignificantLongTerms.Bucket(0, 0, 0, null, format, 0); } @Override - void updateBucket(SignificantLongTerms.Bucket spare, BucketOrdsEnum ordsEnum, long docCount) throws IOException { - spare.term = ordsEnum.value(); - spare.subsetDf = docCount; - spare.supersetDf = backgroundFrequencies.freq(spare.term); - spare.bucketOrd = ordsEnum.ord(); - // During shard-local down-selection we use subset/superset stats that are for this shard only - // Back at the central reducer these properties will be updated with global stats - spare.updateScore(significanceHeuristic); + BucketUpdater bucketUpdater(long owningBucketOrd) { + long subsetSize = subsetSizes.get(owningBucketOrd); + return (spare, ordsEnum, docCount) -> { + spare.term = ordsEnum.value(); + spare.subsetDf = docCount; + spare.supersetDf = backgroundFrequencies.freq(spare.term); + spare.bucketOrd = ordsEnum.ord(); + // During shard-local down-selection we use subset/superset stats that are for this shard only + // Back at the central reducer these properties will be updated with global stats + spare.updateScore(significanceHeuristic, subsetSize, supersetSize); + }; } @Override diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantLongTerms.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantLongTerms.java index 2aace2a714a26..17ea290b7aaaf 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantLongTerms.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantLongTerms.java @@ -30,23 +30,14 @@ public static class Bucket extends InternalSignificantTerms.Bucket { long term; - public Bucket( - long subsetDf, - long subsetSize, - long supersetDf, - long supersetSize, - long term, - InternalAggregations aggregations, - DocValueFormat format, - double score - ) { - super(subsetDf, subsetSize, supersetDf, supersetSize, aggregations, format); + public Bucket(long subsetDf, long supersetDf, long term, InternalAggregations aggregations, DocValueFormat format, double score) { + super(subsetDf, supersetDf, aggregations, format); this.term = term; this.score = score; } - Bucket(StreamInput in, long subsetSize, long supersetSize, DocValueFormat format) throws IOException { - super(in, subsetSize, supersetSize, format); + Bucket(StreamInput in, DocValueFormat format) throws IOException { + super(in, format); subsetDf = in.readVLong(); supersetDf = in.readVLong(); term = in.readLong(); @@ -136,16 +127,7 @@ public SignificantLongTerms create(List buckets) { @Override public Bucket createBucket(InternalAggregations aggregations, SignificantLongTerms.Bucket prototype) { - return new Bucket( - prototype.subsetDf, - prototype.subsetSize, - prototype.supersetDf, - prototype.supersetSize, - prototype.term, - aggregations, - prototype.format, - prototype.score - ); + return new Bucket(prototype.subsetDf, prototype.supersetDf, prototype.term, aggregations, prototype.format, prototype.score); } @Override @@ -169,14 +151,7 @@ protected Bucket[] createBucketsArray(int size) { } @Override - Bucket createBucket( - long subsetDf, - long subsetSize, - long supersetDf, - long supersetSize, - InternalAggregations aggregations, - SignificantLongTerms.Bucket prototype - ) { - return new Bucket(subsetDf, subsetSize, supersetDf, supersetSize, prototype.term, aggregations, format, prototype.score); + Bucket createBucket(long subsetDf, long supersetDf, InternalAggregations aggregations, SignificantLongTerms.Bucket prototype) { + return new Bucket(subsetDf, supersetDf, prototype.term, aggregations, format, prototype.score); } } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantStringTerms.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantStringTerms.java index 791c09d3cbd99..b255f17d2843b 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantStringTerms.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantStringTerms.java @@ -34,14 +34,12 @@ public static class Bucket extends InternalSignificantTerms.Bucket { public Bucket( BytesRef term, long subsetDf, - long subsetSize, long supersetDf, - long supersetSize, InternalAggregations aggregations, DocValueFormat format, double score ) { - super(subsetDf, subsetSize, supersetDf, supersetSize, aggregations, format); + super(subsetDf, supersetDf, aggregations, format); this.termBytes = term; this.score = score; } @@ -49,8 +47,8 @@ public Bucket( /** * Read from a stream. */ - public Bucket(StreamInput in, long subsetSize, long supersetSize, DocValueFormat format) throws IOException { - super(in, subsetSize, supersetSize, format); + public Bucket(StreamInput in, DocValueFormat format) throws IOException { + super(in, format); termBytes = in.readBytesRef(); subsetDf = in.readVLong(); supersetDf = in.readVLong(); @@ -140,16 +138,7 @@ public SignificantStringTerms create(List buckets @Override public Bucket createBucket(InternalAggregations aggregations, SignificantStringTerms.Bucket prototype) { - return new Bucket( - prototype.termBytes, - prototype.subsetDf, - prototype.subsetSize, - prototype.supersetDf, - prototype.supersetSize, - aggregations, - prototype.format, - prototype.score - ); + return new Bucket(prototype.termBytes, prototype.subsetDf, prototype.supersetDf, aggregations, prototype.format, prototype.score); } @Override @@ -173,14 +162,7 @@ protected Bucket[] createBucketsArray(int size) { } @Override - Bucket createBucket( - long subsetDf, - long subsetSize, - long supersetDf, - long supersetSize, - InternalAggregations aggregations, - SignificantStringTerms.Bucket prototype - ) { - return new Bucket(prototype.termBytes, subsetDf, subsetSize, supersetDf, supersetSize, aggregations, format, prototype.score); + Bucket createBucket(long subsetDf, long supersetDf, InternalAggregations aggregations, SignificantStringTerms.Bucket prototype) { + return new Bucket(prototype.termBytes, subsetDf, supersetDf, aggregations, format, prototype.score); } } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantTerms.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantTerms.java index f02b5338eea74..e8f160193bc71 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantTerms.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantTerms.java @@ -17,6 +17,18 @@ */ public interface SignificantTerms extends MultiBucketsAggregation, Iterable { + /** + * @return The numbers of docs in the subset (also known as "foreground set"). + * This number is equal to the document count of the containing aggregation. + */ + long getSubsetSize(); + + /** + * @return The numbers of docs in the superset (ordinarily the background count + * of the containing aggregation). + */ + long getSupersetSize(); + interface Bucket extends MultiBucketsAggregation.Bucket { /** @@ -30,24 +42,12 @@ interface Bucket extends MultiBucketsAggregation.Bucket { */ long getSubsetDf(); - /** - * @return The numbers of docs in the subset (also known as "foreground set"). - * This number is equal to the document count of the containing aggregation. - */ - long getSubsetSize(); - /** * @return The number of docs in the superset containing a particular term (also * known as the "background count" of the bucket) */ long getSupersetDf(); - /** - * @return The numbers of docs in the superset (ordinarily the background count - * of the containing aggregation). - */ - long getSupersetSize(); - } @Override diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/UnmappedSignificantTerms.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/UnmappedSignificantTerms.java index 8bd14a46bff96..6d1370f147f36 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/UnmappedSignificantTerms.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/UnmappedSignificantTerms.java @@ -40,16 +40,8 @@ public class UnmappedSignificantTerms extends InternalSignificantTerms { - private Bucket( - BytesRef term, - long subsetDf, - long subsetSize, - long supersetDf, - long supersetSize, - InternalAggregations aggregations, - DocValueFormat format - ) { - super(subsetDf, subsetSize, supersetDf, supersetSize, aggregations, format); + private Bucket(BytesRef term, long subsetDf, long supersetDf, InternalAggregations aggregations, DocValueFormat format) { + super(subsetDf, supersetDf, aggregations, format); } } @@ -95,14 +87,7 @@ protected UnmappedSignificantTerms create(long subsetSize, long supersetSize, Li } @Override - Bucket createBucket( - long subsetDf, - long subsetSize, - long supersetDf, - long supersetSize, - InternalAggregations aggregations, - Bucket prototype - ) { + Bucket createBucket(long subsetDf, long supersetDf, InternalAggregations aggregations, Bucket prototype) { throw new UnsupportedOperationException("not supported for UnmappedSignificantTerms"); } @@ -153,12 +138,12 @@ protected SignificanceHeuristic getSignificanceHeuristic() { } @Override - protected long getSubsetSize() { + public long getSubsetSize() { return 0; } @Override - protected long getSupersetSize() { + public long getSupersetSize() { return 0; } } diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/InternalSignificantTermsTestCase.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/InternalSignificantTermsTestCase.java index 6d49d6855caca..7e5d19977fe9f 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/InternalSignificantTermsTestCase.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/InternalSignificantTermsTestCase.java @@ -59,8 +59,6 @@ protected void assertSampled( InternalSignificantTerms.Bucket sampledBucket = sampledIt.next(); assertEquals(sampledBucket.subsetDf, samplingContext.scaleUp(reducedBucket.subsetDf)); assertEquals(sampledBucket.supersetDf, samplingContext.scaleUp(reducedBucket.supersetDf)); - assertEquals(sampledBucket.subsetSize, samplingContext.scaleUp(reducedBucket.subsetSize)); - assertEquals(sampledBucket.supersetSize, samplingContext.scaleUp(reducedBucket.supersetSize)); assertThat(sampledBucket.score, closeTo(reducedBucket.score, 1e-14)); } } diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantLongTermsTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantLongTermsTests.java index a303199338783..92bfa2f6f89f4 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantLongTermsTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantLongTermsTests.java @@ -49,17 +49,8 @@ public void setUp() throws Exception { Set terms = new HashSet<>(); for (int i = 0; i < numBuckets; ++i) { long term = randomValueOtherThanMany(l -> terms.add(l) == false, random()::nextLong); - SignificantLongTerms.Bucket bucket = new SignificantLongTerms.Bucket( - subsetDfs[i], - subsetSize, - supersetDfs[i], - supersetSize, - term, - aggs, - format, - 0 - ); - bucket.updateScore(significanceHeuristic); + SignificantLongTerms.Bucket bucket = new SignificantLongTerms.Bucket(subsetDfs[i], supersetDfs[i], term, aggs, format, 0); + bucket.updateScore(significanceHeuristic, subsetSize, supersetSize); buckets.add(bucket); } return new SignificantLongTerms(name, requiredSize, 1L, metadata, format, subsetSize, supersetSize, significanceHeuristic, buckets); @@ -90,8 +81,6 @@ public void setUp() throws Exception { randomLong(), randomNonNegativeLong(), randomNonNegativeLong(), - randomNonNegativeLong(), - randomNonNegativeLong(), InternalAggregations.EMPTY, format, 0 diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantStringTermsTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantStringTermsTests.java index a91566c615eaf..7499831f371aa 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantStringTermsTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/SignificantStringTermsTests.java @@ -42,17 +42,8 @@ public class SignificantStringTermsTests extends InternalSignificantTermsTestCas Set terms = new HashSet<>(); for (int i = 0; i < numBuckets; ++i) { BytesRef term = randomValueOtherThanMany(b -> terms.add(b) == false, () -> new BytesRef(randomAlphaOfLength(10))); - SignificantStringTerms.Bucket bucket = new SignificantStringTerms.Bucket( - term, - subsetDfs[i], - subsetSize, - supersetDfs[i], - supersetSize, - aggs, - format, - 0 - ); - bucket.updateScore(significanceHeuristic); + SignificantStringTerms.Bucket bucket = new SignificantStringTerms.Bucket(term, subsetDfs[i], supersetDfs[i], aggs, format, 0); + bucket.updateScore(significanceHeuristic, subsetSize, supersetSize); buckets.add(bucket); } return new SignificantStringTerms( @@ -93,8 +84,6 @@ public class SignificantStringTermsTests extends InternalSignificantTermsTestCas new BytesRef(randomAlphaOfLengthBetween(1, 10)), randomNonNegativeLong(), randomNonNegativeLong(), - randomNonNegativeLong(), - randomNonNegativeLong(), InternalAggregations.EMPTY, format, 0 diff --git a/test/framework/src/main/java/org/elasticsearch/search/aggregations/bucket/AbstractSignificanceHeuristicTestCase.java b/test/framework/src/main/java/org/elasticsearch/search/aggregations/bucket/AbstractSignificanceHeuristicTestCase.java index ae5083c245538..a3c03526c9b93 100644 --- a/test/framework/src/main/java/org/elasticsearch/search/aggregations/bucket/AbstractSignificanceHeuristicTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/search/aggregations/bucket/AbstractSignificanceHeuristicTestCase.java @@ -95,22 +95,20 @@ public void testStreamResponse() throws Exception { InternalMappedSignificantTerms read = (InternalMappedSignificantTerms) in.readNamedWriteable(InternalAggregation.class); assertEquals(sigTerms.getSignificanceHeuristic(), read.getSignificanceHeuristic()); + assertThat(read.getSubsetSize(), equalTo(10L)); + assertThat(read.getSupersetSize(), equalTo(20L)); SignificantTerms.Bucket originalBucket = sigTerms.getBuckets().get(0); SignificantTerms.Bucket streamedBucket = read.getBuckets().get(0); assertThat(originalBucket.getKeyAsString(), equalTo(streamedBucket.getKeyAsString())); assertThat(originalBucket.getSupersetDf(), equalTo(streamedBucket.getSupersetDf())); assertThat(originalBucket.getSubsetDf(), equalTo(streamedBucket.getSubsetDf())); - assertThat(streamedBucket.getSubsetSize(), equalTo(10L)); - assertThat(streamedBucket.getSupersetSize(), equalTo(20L)); } InternalMappedSignificantTerms getRandomSignificantTerms(SignificanceHeuristic heuristic) { if (randomBoolean()) { SignificantLongTerms.Bucket bucket = new SignificantLongTerms.Bucket( 1, - 2, 3, - 4, 123, InternalAggregations.EMPTY, DocValueFormat.RAW, @@ -121,9 +119,7 @@ public void testStreamResponse() throws Exception { SignificantStringTerms.Bucket bucket = new SignificantStringTerms.Bucket( new BytesRef("someterm"), 1, - 2, 3, - 4, InternalAggregations.EMPTY, DocValueFormat.RAW, randomDoubleBetween(0, 100, true) @@ -136,15 +132,13 @@ public void testReduce() { List aggs = createInternalAggregations(); AggregationReduceContext context = InternalAggregationTestCase.emptyReduceContextBuilder().forFinalReduction(); SignificantTerms reducedAgg = (SignificantTerms) InternalAggregationTestCase.reduce(aggs, context); + assertThat(reducedAgg.getSubsetSize(), equalTo(16L)); + assertThat(reducedAgg.getSupersetSize(), equalTo(30L)); assertThat(reducedAgg.getBuckets().size(), equalTo(2)); assertThat(reducedAgg.getBuckets().get(0).getSubsetDf(), equalTo(8L)); - assertThat(reducedAgg.getBuckets().get(0).getSubsetSize(), equalTo(16L)); assertThat(reducedAgg.getBuckets().get(0).getSupersetDf(), equalTo(10L)); - assertThat(reducedAgg.getBuckets().get(0).getSupersetSize(), equalTo(30L)); assertThat(reducedAgg.getBuckets().get(1).getSubsetDf(), equalTo(8L)); - assertThat(reducedAgg.getBuckets().get(1).getSubsetSize(), equalTo(16L)); assertThat(reducedAgg.getBuckets().get(1).getSupersetDf(), equalTo(10L)); - assertThat(reducedAgg.getBuckets().get(1).getSupersetSize(), equalTo(30L)); } public void testBasicScoreProperties() { @@ -234,9 +228,9 @@ private List createInternalAggregations() { : new AbstractSignificanceHeuristicTestCase.LongTestAggFactory(); List aggs = new ArrayList<>(); - aggs.add(factory.createAggregation(significanceHeuristic, 4, 10, 1, (f, i) -> f.createBucket(4, 4, 5, 10, 0))); - aggs.add(factory.createAggregation(significanceHeuristic, 4, 10, 1, (f, i) -> f.createBucket(4, 4, 5, 10, 1))); - aggs.add(factory.createAggregation(significanceHeuristic, 8, 10, 2, (f, i) -> f.createBucket(4, 4, 5, 10, i))); + aggs.add(factory.createAggregation(significanceHeuristic, 4, 10, 1, (f, i) -> f.createBucket(4, 5, 0))); + aggs.add(factory.createAggregation(significanceHeuristic, 4, 10, 1, (f, i) -> f.createBucket(4, 5, 1))); + aggs.add(factory.createAggregation(significanceHeuristic, 8, 10, 2, (f, i) -> f.createBucket(4, 5, i))); return aggs; } @@ -254,7 +248,7 @@ final A createAggregation( abstract A createAggregation(SignificanceHeuristic significanceHeuristic, long subsetSize, long supersetSize, List buckets); - abstract B createBucket(long subsetDF, long subsetSize, long supersetDF, long supersetSize, long label); + abstract B createBucket(long subsetDF, long supersetDF, long label); } private class StringTestAggFactory extends TestAggFactory { @@ -279,13 +273,11 @@ SignificantStringTerms createAggregation( } @Override - SignificantStringTerms.Bucket createBucket(long subsetDF, long subsetSize, long supersetDF, long supersetSize, long label) { + SignificantStringTerms.Bucket createBucket(long subsetDF, long supersetDF, long label) { return new SignificantStringTerms.Bucket( new BytesRef(Long.toString(label).getBytes(StandardCharsets.UTF_8)), subsetDF, - subsetSize, supersetDF, - supersetSize, InternalAggregations.EMPTY, DocValueFormat.RAW, 0 @@ -315,17 +307,8 @@ SignificantLongTerms createAggregation( } @Override - SignificantLongTerms.Bucket createBucket(long subsetDF, long subsetSize, long supersetDF, long supersetSize, long label) { - return new SignificantLongTerms.Bucket( - subsetDF, - subsetSize, - supersetDF, - supersetSize, - label, - InternalAggregations.EMPTY, - DocValueFormat.RAW, - 0 - ); + SignificantLongTerms.Bucket createBucket(long subsetDF, long supersetDF, long label) { + return new SignificantLongTerms.Bucket(subsetDF, supersetDF, label, InternalAggregations.EMPTY, DocValueFormat.RAW, 0); } } From fc266e5ea9e9c83c16f006f8d53ad481530273aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20R=C3=BChsen?= Date: Tue, 3 Dec 2024 07:50:18 +0100 Subject: [PATCH 118/139] [Profiling] Switch to 19Hz sampling frequency (#117757) * [Profiling] Switch to 19Hz sampling frequency * Fix internalClusterTest --- .../xpack/profiling/action/GetStackTracesActionIT.java | 8 ++++---- .../xpack/profiling/action/CO2Calculator.java | 2 +- .../xpack/profiling/action/CostCalculator.java | 2 +- .../xpack/profiling/action/CO2CalculatorTests.java | 4 ++-- .../xpack/profiling/action/CostCalculatorTests.java | 2 +- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/x-pack/plugin/profiling/src/internalClusterTest/java/org/elasticsearch/xpack/profiling/action/GetStackTracesActionIT.java b/x-pack/plugin/profiling/src/internalClusterTest/java/org/elasticsearch/xpack/profiling/action/GetStackTracesActionIT.java index 6463cda554e5b..4b3a4fb0108f7 100644 --- a/x-pack/plugin/profiling/src/internalClusterTest/java/org/elasticsearch/xpack/profiling/action/GetStackTracesActionIT.java +++ b/x-pack/plugin/profiling/src/internalClusterTest/java/org/elasticsearch/xpack/profiling/action/GetStackTracesActionIT.java @@ -46,8 +46,8 @@ public void testGetStackTracesUnfiltered() throws Exception { assertEquals(18, stackTrace.fileIds.length); assertEquals(18, stackTrace.frameIds.length); assertEquals(18, stackTrace.typeIds.length); - assertEquals(0.0000048475146d, stackTrace.annualCO2Tons, 0.0000000001d); - assertEquals(0.18834d, stackTrace.annualCostsUSD, 0.00001d); + assertEquals(0.0000051026469d, stackTrace.annualCO2Tons, 0.0000000001d); + assertEquals(0.19825d, stackTrace.annualCostsUSD, 0.00001d); // not determined by default assertNull(stackTrace.subGroups); @@ -91,8 +91,8 @@ public void testGetStackTracesGroupedByServiceName() throws Exception { assertEquals(18, stackTrace.fileIds.length); assertEquals(18, stackTrace.frameIds.length); assertEquals(18, stackTrace.typeIds.length); - assertEquals(0.0000048475146d, stackTrace.annualCO2Tons, 0.0000000001d); - assertEquals(0.18834d, stackTrace.annualCostsUSD, 0.00001d); + assertEquals(0.0000051026469d, stackTrace.annualCO2Tons, 0.0000000001d); + assertEquals(0.19825d, stackTrace.annualCostsUSD, 0.00001d); assertEquals(Long.valueOf(2L), stackTrace.subGroups.getCount("basket")); assertNotNull(response.getStackFrames()); diff --git a/x-pack/plugin/profiling/src/main/java/org/elasticsearch/xpack/profiling/action/CO2Calculator.java b/x-pack/plugin/profiling/src/main/java/org/elasticsearch/xpack/profiling/action/CO2Calculator.java index fbd5f7a9b5328..0a05fc5930942 100644 --- a/x-pack/plugin/profiling/src/main/java/org/elasticsearch/xpack/profiling/action/CO2Calculator.java +++ b/x-pack/plugin/profiling/src/main/java/org/elasticsearch/xpack/profiling/action/CO2Calculator.java @@ -12,7 +12,7 @@ import java.util.Map; final class CO2Calculator { - private static final double DEFAULT_SAMPLING_FREQUENCY = 20.0d; + private static final double DEFAULT_SAMPLING_FREQUENCY = 19.0d; private static final double DEFAULT_CO2_TONS_PER_KWH = 0.000379069d; // unit: metric tons / kWh private static final double DEFAULT_KILOWATTS_PER_CORE_X86 = 7.0d / 1000.0d; // unit: watt / core private static final double DEFAULT_KILOWATTS_PER_CORE_ARM64 = 2.8d / 1000.0d; // unit: watt / core diff --git a/x-pack/plugin/profiling/src/main/java/org/elasticsearch/xpack/profiling/action/CostCalculator.java b/x-pack/plugin/profiling/src/main/java/org/elasticsearch/xpack/profiling/action/CostCalculator.java index b8ee54f5f29e8..05b51adb6a52f 100644 --- a/x-pack/plugin/profiling/src/main/java/org/elasticsearch/xpack/profiling/action/CostCalculator.java +++ b/x-pack/plugin/profiling/src/main/java/org/elasticsearch/xpack/profiling/action/CostCalculator.java @@ -10,7 +10,7 @@ import java.util.Map; final class CostCalculator { - private static final double DEFAULT_SAMPLING_FREQUENCY = 20.0d; + private static final double DEFAULT_SAMPLING_FREQUENCY = 19.0d; private static final double SECONDS_PER_HOUR = 60 * 60; private static final double SECONDS_PER_YEAR = SECONDS_PER_HOUR * 24 * 365.0d; // unit: seconds public static final double DEFAULT_COST_USD_PER_CORE_HOUR = 0.0425d; // unit: USD / (core * hour) diff --git a/x-pack/plugin/profiling/src/test/java/org/elasticsearch/xpack/profiling/action/CO2CalculatorTests.java b/x-pack/plugin/profiling/src/test/java/org/elasticsearch/xpack/profiling/action/CO2CalculatorTests.java index ff698465a56c5..9be98fbe4f46b 100644 --- a/x-pack/plugin/profiling/src/test/java/org/elasticsearch/xpack/profiling/action/CO2CalculatorTests.java +++ b/x-pack/plugin/profiling/src/test/java/org/elasticsearch/xpack/profiling/action/CO2CalculatorTests.java @@ -73,7 +73,7 @@ public void testCreateFromRegularSource() { double samplingDurationInSeconds = 1_800.0d; // 30 minutes long samples = 100_000L; // 100k samples - double annualCoreHours = CostCalculator.annualCoreHours(samplingDurationInSeconds, samples, 20.0d); + double annualCoreHours = CostCalculator.annualCoreHours(samplingDurationInSeconds, samples, 19.0d); CO2Calculator co2Calculator = new CO2Calculator(hostsTable, samplingDurationInSeconds, null, null, null, null); checkCO2Calculation(co2Calculator.getAnnualCO2Tons(HOST_ID_A, samples), annualCoreHours, 1.135d, 0.0002786d, 7.0d); @@ -110,7 +110,7 @@ public void testCreateFromMalformedSource() { double samplingDurationInSeconds = 1_800.0d; // 30 minutes long samples = 100_000L; // 100k samples - double annualCoreHours = CostCalculator.annualCoreHours(samplingDurationInSeconds, samples, 20.0d); + double annualCoreHours = CostCalculator.annualCoreHours(samplingDurationInSeconds, samples, 19.0d); CO2Calculator co2Calculator = new CO2Calculator(hostsTable, samplingDurationInSeconds, null, null, null, null); checkCO2Calculation(co2Calculator.getAnnualCO2Tons(HOST_ID_A, samples), annualCoreHours, 1.135d, 0.0002786d, 7.0d); diff --git a/x-pack/plugin/profiling/src/test/java/org/elasticsearch/xpack/profiling/action/CostCalculatorTests.java b/x-pack/plugin/profiling/src/test/java/org/elasticsearch/xpack/profiling/action/CostCalculatorTests.java index eaf6cf618eddb..1c719c97164dc 100644 --- a/x-pack/plugin/profiling/src/test/java/org/elasticsearch/xpack/profiling/action/CostCalculatorTests.java +++ b/x-pack/plugin/profiling/src/test/java/org/elasticsearch/xpack/profiling/action/CostCalculatorTests.java @@ -63,7 +63,7 @@ public void testCreateFromRegularSource() { double samplingDurationInSeconds = 1_800.0d; // 30 minutes long samples = 100_000L; // 100k samples - double annualCoreHours = CostCalculator.annualCoreHours(samplingDurationInSeconds, samples, 20.0d); + double annualCoreHours = CostCalculator.annualCoreHours(samplingDurationInSeconds, samples, 19.0d); CostCalculator costCalculator = new CostCalculator(hostsTable, samplingDurationInSeconds, null, null, null); // Checks whether the cost calculation is based on the lookup data. From 564e13e2ba49ac78c8c142f9b29481e56c498c83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20R=C3=BChsen?= Date: Tue, 3 Dec 2024 08:56:20 +0100 Subject: [PATCH 119/139] [Profiling] Add field profiling.agent.config.sampling_frequency to profiling-hosts (#117752) --- .../profiling/component-template/profiling-hosts.json | 3 +++ 1 file changed, 3 insertions(+) diff --git a/x-pack/plugin/core/template-resources/src/main/resources/profiling/component-template/profiling-hosts.json b/x-pack/plugin/core/template-resources/src/main/resources/profiling/component-template/profiling-hosts.json index e58a3cbd39f97..50f3ab6bf9a08 100644 --- a/x-pack/plugin/core/template-resources/src/main/resources/profiling/component-template/profiling-hosts.json +++ b/x-pack/plugin/core/template-resources/src/main/resources/profiling/component-template/profiling-hosts.json @@ -135,6 +135,9 @@ }, "config.present_cpu_cores": { "type": "integer" + }, + "config.sampling_frequency": { + "type": "integer" } } }, From cbb08babdbd7d8f42426df7984caa2d587b26ff7 Mon Sep 17 00:00:00 2001 From: Dimitris Rempapis Date: Tue, 3 Dec 2024 10:52:14 +0200 Subject: [PATCH 120/139] Remove RestApiVersion#V_7 references for 9.0.0 (#117572) Address and remove references to org elastic search.core.RestApiVersion#V_7 from the search-related code. --- docs/changelog/117572.yaml | 5 + .../action/search/MultiSearchRequest.java | 27 +---- .../termvectors/TermVectorsRequest.java | 6 - .../index/query/CommonTermsQueryBuilder.java | 72 ------------ .../index/query/TypeQueryV7Builder.java | 108 ------------------ .../document/RestTermVectorsAction.java | 1 - .../action/search/RestMultiSearchAction.java | 3 - .../elasticsearch/search/SearchModule.java | 26 ----- .../MovAvgPipelineAggregationBuilder.java | 85 -------------- .../search/sort/FieldSortBuilder.java | 8 -- .../search/sort/ScriptSortBuilder.java | 9 -- .../search/sort/SortBuilder.java | 8 -- .../search/MultiSearchRequestTests.java | 30 ++--- 13 files changed, 20 insertions(+), 368 deletions(-) create mode 100644 docs/changelog/117572.yaml delete mode 100644 server/src/main/java/org/elasticsearch/index/query/CommonTermsQueryBuilder.java delete mode 100644 server/src/main/java/org/elasticsearch/index/query/TypeQueryV7Builder.java delete mode 100644 server/src/main/java/org/elasticsearch/search/aggregations/pipeline/MovAvgPipelineAggregationBuilder.java diff --git a/docs/changelog/117572.yaml b/docs/changelog/117572.yaml new file mode 100644 index 0000000000000..a4a2ef6c06f5d --- /dev/null +++ b/docs/changelog/117572.yaml @@ -0,0 +1,5 @@ +pr: 117572 +summary: Address and remove any references of RestApiVersion version 7 +area: Search +type: enhancement +issues: [] diff --git a/server/src/main/java/org/elasticsearch/action/search/MultiSearchRequest.java b/server/src/main/java/org/elasticsearch/action/search/MultiSearchRequest.java index 8467ee6fd86f3..2022180475529 100644 --- a/server/src/main/java/org/elasticsearch/action/search/MultiSearchRequest.java +++ b/server/src/main/java/org/elasticsearch/action/search/MultiSearchRequest.java @@ -18,11 +18,7 @@ import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; -import org.elasticsearch.common.logging.DeprecationLogger; import org.elasticsearch.common.xcontent.XContentHelper; -import org.elasticsearch.core.RestApiVersion; -import org.elasticsearch.rest.action.search.RestMultiSearchAction; -import org.elasticsearch.rest.action.search.RestSearchAction; import org.elasticsearch.tasks.CancellableTask; import org.elasticsearch.tasks.Task; import org.elasticsearch.tasks.TaskId; @@ -51,10 +47,6 @@ * A multi search API request. */ public class MultiSearchRequest extends ActionRequest implements CompositeIndicesRequest { - private static final DeprecationLogger deprecationLogger = DeprecationLogger.getLogger(RestSearchAction.class); - public static final String FIRST_LINE_EMPTY_DEPRECATION_MESSAGE = - "support for empty first line before any action metadata in msearch API is deprecated " - + "and will be removed in the next major version"; public static final int MAX_CONCURRENT_SEARCH_REQUESTS_DEFAULT = 0; private int maxConcurrentSearchRequests = 0; @@ -213,12 +205,6 @@ public static void readMultiLineFormat( if (nextMarker == -1) { break; } - // support first line with \n - if (parserConfig.restApiVersion() == RestApiVersion.V_7 && nextMarker == 0) { - deprecationLogger.compatibleCritical("msearch_first_line_empty", FIRST_LINE_EMPTY_DEPRECATION_MESSAGE); - from = nextMarker + 1; - continue; - } SearchRequest searchRequest = new SearchRequest(); if (indices != null) { @@ -281,14 +267,11 @@ public static void readMultiLineFormat( allowNoIndices = value; } else if ("ignore_throttled".equals(entry.getKey()) || "ignoreThrottled".equals(entry.getKey())) { ignoreThrottled = value; - } else if (parserConfig.restApiVersion() == RestApiVersion.V_7 - && ("type".equals(entry.getKey()) || "types".equals(entry.getKey()))) { - deprecationLogger.compatibleCritical("msearch_with_types", RestMultiSearchAction.TYPES_DEPRECATION_MESSAGE); - } else if (extraParamParser.apply(entry.getKey(), value, searchRequest)) { - // Skip, the parser handled the key/value - } else { - throw new IllegalArgumentException("key [" + entry.getKey() + "] is not supported in the metadata section"); - } + } else if (extraParamParser.apply(entry.getKey(), value, searchRequest)) { + // Skip, the parser handled the key/value + } else { + throw new IllegalArgumentException("key [" + entry.getKey() + "] is not supported in the metadata section"); + } } defaultOptions = IndicesOptions.fromParameters( expandWildcards, diff --git a/server/src/main/java/org/elasticsearch/action/termvectors/TermVectorsRequest.java b/server/src/main/java/org/elasticsearch/action/termvectors/TermVectorsRequest.java index a36158d11b5b3..7a7b2afab75d1 100644 --- a/server/src/main/java/org/elasticsearch/action/termvectors/TermVectorsRequest.java +++ b/server/src/main/java/org/elasticsearch/action/termvectors/TermVectorsRequest.java @@ -20,13 +20,11 @@ import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; -import org.elasticsearch.common.logging.DeprecationLogger; import org.elasticsearch.common.lucene.uid.Versions; import org.elasticsearch.common.util.set.Sets; import org.elasticsearch.common.xcontent.XContentHelper; import org.elasticsearch.core.RestApiVersion; import org.elasticsearch.index.VersionType; -import org.elasticsearch.rest.action.document.RestTermVectorsAction; import org.elasticsearch.xcontent.ParseField; import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xcontent.XContentParser; @@ -52,7 +50,6 @@ // It's not possible to suppress teh warning at #realtime(boolean) at a method-level. @SuppressWarnings("unchecked") public final class TermVectorsRequest extends SingleShardRequest implements RealtimeRequest { - private static final DeprecationLogger deprecationLogger = DeprecationLogger.getLogger(TermVectorsRequest.class); private static final ParseField INDEX = new ParseField("_index"); private static final ParseField ID = new ParseField("_id"); @@ -66,7 +63,6 @@ public final class TermVectorsRequest extends SingleShardRequest { - private static final DeprecationLogger deprecationLogger = DeprecationLogger.getLogger(CommonTermsQueryBuilder.class); - public static final String COMMON_TERMS_QUERY_DEPRECATION_MSG = "Common Terms Query usage is not supported. " - + "Use [match] query which can efficiently skip blocks of documents if the total number of hits is not tracked."; - - @UpdateForV9(owner = UpdateForV9.Owner.SEARCH_RELEVANCE) // v7 REST API no longer exists: eliminate ref to RestApiVersion.V_7 - public static ParseField NAME_V7 = new ParseField("common").withAllDeprecated(COMMON_TERMS_QUERY_DEPRECATION_MSG) - .forRestApiVersion(RestApiVersion.equalTo(RestApiVersion.V_7)); - - @Override - protected void doWriteTo(StreamOutput out) throws IOException { - throw new UnsupportedOperationException("common_term_query is not meant to be serialized."); - } - - @Override - protected void doXContent(XContentBuilder builder, Params params) throws IOException {} - - @Override - protected Query doToQuery(SearchExecutionContext context) throws IOException { - return null; - } - - @Override - protected boolean doEquals(CommonTermsQueryBuilder other) { - return false; - } - - @Override - protected int doHashCode() { - return 0; - } - - @Override - public String getWriteableName() { - return null; - } - - public static CommonTermsQueryBuilder fromXContent(XContentParser parser) throws IOException { - deprecationLogger.compatibleCritical("common_term_query", COMMON_TERMS_QUERY_DEPRECATION_MSG); - throw new ParsingException(parser.getTokenLocation(), COMMON_TERMS_QUERY_DEPRECATION_MSG); - } - - @Override - public TransportVersion getMinimalSupportedVersion() { - return TransportVersions.ZERO; - } -} diff --git a/server/src/main/java/org/elasticsearch/index/query/TypeQueryV7Builder.java b/server/src/main/java/org/elasticsearch/index/query/TypeQueryV7Builder.java deleted file mode 100644 index c9aae0195acf7..0000000000000 --- a/server/src/main/java/org/elasticsearch/index/query/TypeQueryV7Builder.java +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the "Elastic License - * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side - * Public License v 1"; you may not use this file except in compliance with, at - * your election, the "Elastic License 2.0", the "GNU Affero General Public - * License v3.0 only", or the "Server Side Public License, v 1". - */ - -package org.elasticsearch.index.query; - -import org.apache.lucene.search.MatchNoDocsQuery; -import org.apache.lucene.search.Query; -import org.elasticsearch.TransportVersion; -import org.elasticsearch.TransportVersions; -import org.elasticsearch.common.ParsingException; -import org.elasticsearch.common.io.stream.StreamInput; -import org.elasticsearch.common.io.stream.StreamOutput; -import org.elasticsearch.common.logging.DeprecationLogger; -import org.elasticsearch.core.RestApiVersion; -import org.elasticsearch.core.UpdateForV9; -import org.elasticsearch.index.mapper.MapperService; -import org.elasticsearch.xcontent.ObjectParser; -import org.elasticsearch.xcontent.ParseField; -import org.elasticsearch.xcontent.XContentBuilder; -import org.elasticsearch.xcontent.XContentParser; - -import java.io.IOException; - -@UpdateForV9(owner = UpdateForV9.Owner.SEARCH_RELEVANCE) // v7 REST API no longer exists: eliminate ref to RestApiVersion.V_7 -public class TypeQueryV7Builder extends AbstractQueryBuilder { - private static final DeprecationLogger deprecationLogger = DeprecationLogger.getLogger(TypeQueryV7Builder.class); - public static final String TYPES_DEPRECATION_MESSAGE = "[types removal] Type queries are deprecated, " - + "prefer to filter on a field instead."; - - private static final String NAME = "type"; - public static final ParseField NAME_V7 = new ParseField(NAME).forRestApiVersion(RestApiVersion.equalTo(RestApiVersion.V_7)); - private static final ParseField VALUE_FIELD = new ParseField("value"); - private static final ObjectParser PARSER = new ObjectParser<>(NAME, TypeQueryV7Builder::new); - - static { - PARSER.declareString( - QueryBuilder::queryName, - AbstractQueryBuilder.NAME_FIELD.forRestApiVersion(RestApiVersion.equalTo(RestApiVersion.V_7)) - ); - PARSER.declareFloat( - QueryBuilder::boost, - AbstractQueryBuilder.BOOST_FIELD.forRestApiVersion(RestApiVersion.equalTo(RestApiVersion.V_7)) - ); - PARSER.declareString(TypeQueryV7Builder::setValue, VALUE_FIELD.forRestApiVersion(RestApiVersion.equalTo(RestApiVersion.V_7))); - } - - private String value; - - public TypeQueryV7Builder() {} - - /** - * Read from a stream. - */ - public TypeQueryV7Builder(StreamInput in) throws IOException { - super(in); - } - - @Override - protected void doWriteTo(StreamOutput out) throws IOException {} - - @Override - protected void doXContent(XContentBuilder builder, Params params) throws IOException { - builder.startObject(NAME); - builder.field(VALUE_FIELD.getPreferredName(), MapperService.SINGLE_MAPPING_NAME); - printBoostAndQueryName(builder); - builder.endObject(); - } - - @Override - protected Query doToQuery(SearchExecutionContext context) throws IOException { - return new MatchNoDocsQuery(); - } - - @Override - protected boolean doEquals(TypeQueryV7Builder other) { - return true; - } - - @Override - protected int doHashCode() { - return 0; - } - - public static TypeQueryV7Builder fromXContent(XContentParser parser) throws IOException { - deprecationLogger.compatibleCritical("type_query", TYPES_DEPRECATION_MESSAGE); - throw new ParsingException(parser.getTokenLocation(), TYPES_DEPRECATION_MESSAGE); - } - - @Override - public String getWriteableName() { - return NAME; - } - - public void setValue(String value) { - this.value = value; - } - - @Override - public TransportVersion getMinimalSupportedVersion() { - return TransportVersions.ZERO; - } -} diff --git a/server/src/main/java/org/elasticsearch/rest/action/document/RestTermVectorsAction.java b/server/src/main/java/org/elasticsearch/rest/action/document/RestTermVectorsAction.java index 8e41e1cd09674..d2b09af8e1f3d 100644 --- a/server/src/main/java/org/elasticsearch/rest/action/document/RestTermVectorsAction.java +++ b/server/src/main/java/org/elasticsearch/rest/action/document/RestTermVectorsAction.java @@ -35,7 +35,6 @@ */ @ServerlessScope(Scope.PUBLIC) public class RestTermVectorsAction extends BaseRestHandler { - public static final String TYPES_DEPRECATION_MESSAGE = "[types removal] Specifying types in term vector requests is deprecated."; @Override public List routes() { diff --git a/server/src/main/java/org/elasticsearch/rest/action/search/RestMultiSearchAction.java b/server/src/main/java/org/elasticsearch/rest/action/search/RestMultiSearchAction.java index 89775b4ca8e15..24fab92ced392 100644 --- a/server/src/main/java/org/elasticsearch/rest/action/search/RestMultiSearchAction.java +++ b/server/src/main/java/org/elasticsearch/rest/action/search/RestMultiSearchAction.java @@ -43,9 +43,6 @@ @ServerlessScope(Scope.PUBLIC) public class RestMultiSearchAction extends BaseRestHandler { - public static final String TYPES_DEPRECATION_MESSAGE = "[types removal]" - + " Specifying types in multi search template requests is deprecated."; - private static final Set RESPONSE_PARAMS = Set.of(RestSearchAction.TYPED_KEYS_PARAM, RestSearchAction.TOTAL_HITS_AS_INT_PARAM); private final boolean allowExplicitIndex; diff --git a/server/src/main/java/org/elasticsearch/search/SearchModule.java b/server/src/main/java/org/elasticsearch/search/SearchModule.java index 09e25350ad4fd..d282ba425b126 100644 --- a/server/src/main/java/org/elasticsearch/search/SearchModule.java +++ b/server/src/main/java/org/elasticsearch/search/SearchModule.java @@ -20,12 +20,10 @@ import org.elasticsearch.common.settings.Setting; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.core.Nullable; -import org.elasticsearch.core.RestApiVersion; import org.elasticsearch.index.query.AbstractQueryBuilder; import org.elasticsearch.index.query.BoolQueryBuilder; import org.elasticsearch.index.query.BoostingQueryBuilder; import org.elasticsearch.index.query.CombinedFieldsQueryBuilder; -import org.elasticsearch.index.query.CommonTermsQueryBuilder; import org.elasticsearch.index.query.ConstantScoreQueryBuilder; import org.elasticsearch.index.query.DisMaxQueryBuilder; import org.elasticsearch.index.query.DistanceFeatureQueryBuilder; @@ -68,7 +66,6 @@ import org.elasticsearch.index.query.TermQueryBuilder; import org.elasticsearch.index.query.TermsQueryBuilder; import org.elasticsearch.index.query.TermsSetQueryBuilder; -import org.elasticsearch.index.query.TypeQueryV7Builder; import org.elasticsearch.index.query.WildcardQueryBuilder; import org.elasticsearch.index.query.WrapperQueryBuilder; import org.elasticsearch.index.query.functionscore.ExponentialDecayFunctionBuilder; @@ -204,7 +201,6 @@ import org.elasticsearch.search.aggregations.pipeline.InternalStatsBucket; import org.elasticsearch.search.aggregations.pipeline.MaxBucketPipelineAggregationBuilder; import org.elasticsearch.search.aggregations.pipeline.MinBucketPipelineAggregationBuilder; -import org.elasticsearch.search.aggregations.pipeline.MovAvgPipelineAggregationBuilder; import org.elasticsearch.search.aggregations.pipeline.PercentilesBucketPipelineAggregationBuilder; import org.elasticsearch.search.aggregations.pipeline.SerialDiffPipelineAggregationBuilder; import org.elasticsearch.search.aggregations.pipeline.StatsBucketPipelineAggregationBuilder; @@ -686,15 +682,6 @@ private ValuesSourceRegistry registerAggregations(List plugins) { .setAggregatorRegistrar(CompositeAggregationBuilder::registerAggregators), builder ); - if (RestApiVersion.minimumSupported() == RestApiVersion.V_7) { - registerQuery( - new QuerySpec<>( - CommonTermsQueryBuilder.NAME_V7, - (streamInput) -> new CommonTermsQueryBuilder(), - CommonTermsQueryBuilder::fromXContent - ) - ); - } registerFromPlugin(plugins, SearchPlugin::getAggregations, (agg) -> this.registerAggregation(agg, builder)); @@ -815,15 +802,6 @@ private void registerPipelineAggregations(List plugins) { SerialDiffPipelineAggregationBuilder::parse ) ); - if (RestApiVersion.minimumSupported() == RestApiVersion.V_7) { - registerPipelineAggregation( - new PipelineAggregationSpec( - MovAvgPipelineAggregationBuilder.NAME_V7, - MovAvgPipelineAggregationBuilder::new, - MovAvgPipelineAggregationBuilder.PARSER - ) - ); - } registerFromPlugin(plugins, SearchPlugin::getPipelineAggregations, this::registerPipelineAggregation); } @@ -1203,10 +1181,6 @@ private void registerQueryParsers(List plugins) { })); registerFromPlugin(plugins, SearchPlugin::getQueries, this::registerQuery); - - if (RestApiVersion.minimumSupported() == RestApiVersion.V_7) { - registerQuery(new QuerySpec<>(TypeQueryV7Builder.NAME_V7, TypeQueryV7Builder::new, TypeQueryV7Builder::fromXContent)); - } } private void registerIntervalsSourceProviders() { diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/pipeline/MovAvgPipelineAggregationBuilder.java b/server/src/main/java/org/elasticsearch/search/aggregations/pipeline/MovAvgPipelineAggregationBuilder.java deleted file mode 100644 index 068487317dfe5..0000000000000 --- a/server/src/main/java/org/elasticsearch/search/aggregations/pipeline/MovAvgPipelineAggregationBuilder.java +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the "Elastic License - * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side - * Public License v 1"; you may not use this file except in compliance with, at - * your election, the "Elastic License 2.0", the "GNU Affero General Public - * License v3.0 only", or the "Server Side Public License, v 1". - */ - -package org.elasticsearch.search.aggregations.pipeline; - -import org.elasticsearch.TransportVersion; -import org.elasticsearch.TransportVersions; -import org.elasticsearch.common.ParsingException; -import org.elasticsearch.common.io.stream.StreamInput; -import org.elasticsearch.common.io.stream.StreamOutput; -import org.elasticsearch.common.logging.DeprecationLogger; -import org.elasticsearch.core.RestApiVersion; -import org.elasticsearch.core.UpdateForV9; -import org.elasticsearch.index.query.CommonTermsQueryBuilder; -import org.elasticsearch.xcontent.ContextParser; -import org.elasticsearch.xcontent.ParseField; -import org.elasticsearch.xcontent.XContentBuilder; - -import java.io.IOException; -import java.util.Map; - -/** - * The actual moving_avg aggregation was removed as a breaking change in 8.0. This class exists to provide a friendlier error message - * if somebody attempts to use the moving_avg aggregation via the compatible-with=7 mechanism. - * - * We can remove this class entirely when v7 rest api compatibility is dropped. - * - * @deprecated Only for 7.x rest compat - */ -@UpdateForV9(owner = UpdateForV9.Owner.SEARCH_ANALYTICS) // remove this since it's only for 7.x compat and 7.x compat will be removed in 9.0 -@Deprecated -public class MovAvgPipelineAggregationBuilder extends AbstractPipelineAggregationBuilder { - private static final DeprecationLogger deprecationLogger = DeprecationLogger.getLogger(CommonTermsQueryBuilder.class); - public static final String MOVING_AVG_AGG_DEPRECATION_MSG = "Moving Average aggregation usage is not supported. " - + "Use the [moving_fn] aggregation instead."; - - public static final ParseField NAME_V7 = new ParseField("moving_avg").withAllDeprecated(MOVING_AVG_AGG_DEPRECATION_MSG) - .forRestApiVersion(RestApiVersion.equalTo(RestApiVersion.V_7)); - - public static final ContextParser PARSER = (parser, name) -> { - deprecationLogger.compatibleCritical("moving_avg_aggregation", MOVING_AVG_AGG_DEPRECATION_MSG); - throw new ParsingException(parser.getTokenLocation(), MOVING_AVG_AGG_DEPRECATION_MSG); - }; - - public MovAvgPipelineAggregationBuilder(StreamInput in) throws IOException { - super(in, NAME_V7.getPreferredName()); - throw new UnsupportedOperationException("moving_avg is not meant to be used."); - } - - @Override - protected void doWriteTo(StreamOutput out) throws IOException { - throw new UnsupportedOperationException("moving_avg is not meant to be used."); - } - - @Override - protected PipelineAggregator createInternal(Map metadata) { - throw new UnsupportedOperationException("moving_avg is not meant to be used."); - } - - @Override - protected XContentBuilder internalXContent(XContentBuilder builder, Params params) throws IOException { - throw new UnsupportedOperationException("moving_avg is not meant to be used."); - } - - @Override - protected void validate(ValidationContext context) { - throw new UnsupportedOperationException("moving_avg is not meant to be used."); - } - - @Override - public final String getWriteableName() { - return null; - } - - @Override - public TransportVersion getMinimalSupportedVersion() { - return TransportVersions.ZERO; - } -} diff --git a/server/src/main/java/org/elasticsearch/search/sort/FieldSortBuilder.java b/server/src/main/java/org/elasticsearch/search/sort/FieldSortBuilder.java index cd597f3328c0f..5691435c83ecb 100644 --- a/server/src/main/java/org/elasticsearch/search/sort/FieldSortBuilder.java +++ b/server/src/main/java/org/elasticsearch/search/sort/FieldSortBuilder.java @@ -18,7 +18,6 @@ import org.elasticsearch.ElasticsearchParseException; import org.elasticsearch.TransportVersion; import org.elasticsearch.TransportVersions; -import org.elasticsearch.common.ParsingException; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.time.DateMathParser; @@ -729,13 +728,6 @@ public static FieldSortBuilder fromXContent(XContentParser parser, String fieldN PARSER.declareObject(FieldSortBuilder::setNestedSort, (p, c) -> NestedSortBuilder.fromXContent(p), NESTED_FIELD); PARSER.declareString(FieldSortBuilder::setNumericType, NUMERIC_TYPE); PARSER.declareString(FieldSortBuilder::setFormat, FORMAT); - PARSER.declareField((b, v) -> {}, (p, c) -> { - throw new ParsingException(p.getTokenLocation(), "[nested_path] has been removed in favour of the [nested] parameter", c); - }, NESTED_PATH_FIELD, ValueType.STRING); - - PARSER.declareObject((b, v) -> {}, (p, c) -> { - throw new ParsingException(p.getTokenLocation(), "[nested_filter] has been removed in favour of the [nested] parameter", c); - }, NESTED_FILTER_FIELD); } @Override diff --git a/server/src/main/java/org/elasticsearch/search/sort/ScriptSortBuilder.java b/server/src/main/java/org/elasticsearch/search/sort/ScriptSortBuilder.java index 48773eec8371b..445c55dc546bc 100644 --- a/server/src/main/java/org/elasticsearch/search/sort/ScriptSortBuilder.java +++ b/server/src/main/java/org/elasticsearch/search/sort/ScriptSortBuilder.java @@ -17,7 +17,6 @@ import org.apache.lucene.util.BytesRefBuilder; import org.elasticsearch.TransportVersion; import org.elasticsearch.TransportVersions; -import org.elasticsearch.common.ParsingException; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.io.stream.Writeable; @@ -220,14 +219,6 @@ public XContentBuilder toXContent(XContentBuilder builder, Params builderParams) PARSER.declareString((b, v) -> b.order(SortOrder.fromString(v)), ORDER_FIELD); PARSER.declareString((b, v) -> b.sortMode(SortMode.fromString(v)), SORTMODE_FIELD); PARSER.declareObject(ScriptSortBuilder::setNestedSort, (p, c) -> NestedSortBuilder.fromXContent(p), NESTED_FIELD); - - PARSER.declareObject((b, v) -> {}, (p, c) -> { - throw new ParsingException(p.getTokenLocation(), "[nested_path] has been removed in favour of the [nested] parameter", c); - }, NESTED_PATH_FIELD); - - PARSER.declareObject((b, v) -> {}, (p, c) -> { - throw new ParsingException(p.getTokenLocation(), "[nested_filter] has been removed in favour of the [nested] parameter", c); - }, NESTED_FILTER_FIELD); } /** diff --git a/server/src/main/java/org/elasticsearch/search/sort/SortBuilder.java b/server/src/main/java/org/elasticsearch/search/sort/SortBuilder.java index 5832b93b9462f..4a8cdbcdffa55 100644 --- a/server/src/main/java/org/elasticsearch/search/sort/SortBuilder.java +++ b/server/src/main/java/org/elasticsearch/search/sort/SortBuilder.java @@ -18,8 +18,6 @@ import org.elasticsearch.common.io.stream.VersionedNamedWriteable; import org.elasticsearch.common.lucene.search.Queries; import org.elasticsearch.common.util.BigArrays; -import org.elasticsearch.core.RestApiVersion; -import org.elasticsearch.core.UpdateForV9; import org.elasticsearch.index.fielddata.IndexFieldData.XFieldComparatorSource.Nested; import org.elasticsearch.index.mapper.NestedObjectMapper; import org.elasticsearch.index.query.QueryBuilder; @@ -52,12 +50,6 @@ public abstract class SortBuilder> // parse fields common to more than one SortBuilder public static final ParseField ORDER_FIELD = new ParseField("order"); - @UpdateForV9(owner = UpdateForV9.Owner.SEARCH_FOUNDATIONS) // v7 REST API no longer exists: eliminate ref to RestApiVersion.V_7 - public static final ParseField NESTED_FILTER_FIELD = new ParseField("nested_filter").withAllDeprecated() - .forRestApiVersion(RestApiVersion.equalTo(RestApiVersion.V_7)); - public static final ParseField NESTED_PATH_FIELD = new ParseField("nested_path").withAllDeprecated() - .forRestApiVersion(RestApiVersion.equalTo(RestApiVersion.V_7)); - private static final Map> PARSERS = Map.of( ScriptSortBuilder.NAME, ScriptSortBuilder::fromXContent, diff --git a/server/src/test/java/org/elasticsearch/action/search/MultiSearchRequestTests.java b/server/src/test/java/org/elasticsearch/action/search/MultiSearchRequestTests.java index f2bc561792991..9f81b999c9d98 100644 --- a/server/src/test/java/org/elasticsearch/action/search/MultiSearchRequestTests.java +++ b/server/src/test/java/org/elasticsearch/action/search/MultiSearchRequestTests.java @@ -16,7 +16,6 @@ import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.xcontent.XContentHelper; import org.elasticsearch.core.CheckedRunnable; -import org.elasticsearch.core.RestApiVersion; import org.elasticsearch.index.query.MatchAllQueryBuilder; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.rest.RestRequest; @@ -268,12 +267,12 @@ public void testMsearchTerminatedByNewline() throws Exception { assertEquals(3, msearchRequest.requests().size()); } - private MultiSearchRequest parseMultiSearchRequestFromString(String request, RestApiVersion restApiVersion) throws IOException { - return parseMultiSearchRequest(createRestRequest(request.getBytes(StandardCharsets.UTF_8), restApiVersion)); + private MultiSearchRequest parseMultiSearchRequestFromString(String request) throws IOException { + return parseMultiSearchRequest(createRestRequest(request.getBytes(StandardCharsets.UTF_8))); } private MultiSearchRequest parseMultiSearchRequest(String sample) throws IOException { - return parseMultiSearchRequest(createRestRequest(sample, null)); + return parseMultiSearchRequest(createRestRequest(sample)); } private MultiSearchRequest parseMultiSearchRequest(RestRequest restRequest) throws IOException { @@ -288,22 +287,13 @@ private MultiSearchRequest parseMultiSearchRequest(RestRequest restRequest) thro return request; } - private RestRequest createRestRequest(String sample, RestApiVersion restApiVersion) throws IOException { + private RestRequest createRestRequest(String sample) throws IOException { byte[] data = StreamsUtils.copyToBytesFromClasspath(sample); - return createRestRequest(data, restApiVersion); + return createRestRequest(data); } - private FakeRestRequest createRestRequest(byte[] data, RestApiVersion restApiVersion) { - if (restApiVersion != null) { - final List contentTypeHeader = Collections.singletonList( - compatibleMediaType(XContentType.VND_JSON, RestApiVersion.V_7) - ); - return new FakeRestRequest.Builder(xContentRegistry()).withHeaders( - Map.of("Content-Type", contentTypeHeader, "Accept", contentTypeHeader) - ).withContent(new BytesArray(data), null).build(); - } else { - return new FakeRestRequest.Builder(xContentRegistry()).withContent(new BytesArray(data), XContentType.JSON).build(); - } + private FakeRestRequest createRestRequest(byte[] data) { + return new FakeRestRequest.Builder(xContentRegistry()).withContent(new BytesArray(data), XContentType.JSON).build(); } @Override @@ -517,7 +507,7 @@ public void testFailOnExtraCharacters() throws IOException { parseMultiSearchRequestFromString(""" {"index": "test"}{{{{{extra chars that shouldn't be here { "query": {"match_all": {}}} - """, null); + """); fail("should have caught first line; extra open brackets"); } catch (XContentParseException e) { assertEquals("[1:18] Unexpected token after end of object", e.getMessage()); @@ -526,7 +516,7 @@ public void testFailOnExtraCharacters() throws IOException { parseMultiSearchRequestFromString(""" {"index": "test"} { "query": {"match_all": {}}}{{{{even more chars - """, null); + """); fail("should have caught second line"); } catch (XContentParseException e) { assertEquals("[1:30] Unexpected token after end of object", e.getMessage()); @@ -535,7 +525,7 @@ public void testFailOnExtraCharacters() throws IOException { parseMultiSearchRequestFromString(""" {} { "query": {"match_all": {}}}}}}different error message - """, null); + """); fail("should have caught second line; extra closing brackets"); } catch (XContentParseException e) { assertThat( From a514aad3c2da305b0b63d8545cab75bb2c2d3032 Mon Sep 17 00:00:00 2001 From: Dimitris Rempapis Date: Tue, 3 Dec 2024 10:58:20 +0200 Subject: [PATCH 121/139] Fix/meta fields bad request (#117229) 400 rather a 5xx error is returned when _source / _seq_no / _feature / _nested_path / _field_names is requested, via fields --- docs/changelog/117229.yaml | 6 ++ .../extras/RankFeatureMetaFieldMapper.java | 2 +- rest-api-spec/build.gradle | 1 + .../test/search/520_fetch_fields.yml | 80 +++++++++++++++++-- .../index/mapper/FieldNamesFieldMapper.java | 2 +- .../index/mapper/MapperFeatures.java | 5 +- .../index/mapper/NestedPathFieldMapper.java | 2 +- .../index/mapper/SeqNoFieldMapper.java | 2 +- .../index/mapper/SourceFieldMapper.java | 2 +- .../fetch/subphase/FieldFetcherTests.java | 2 +- 10 files changed, 92 insertions(+), 12 deletions(-) create mode 100644 docs/changelog/117229.yaml diff --git a/docs/changelog/117229.yaml b/docs/changelog/117229.yaml new file mode 100644 index 0000000000000..f1b859c03e4fa --- /dev/null +++ b/docs/changelog/117229.yaml @@ -0,0 +1,6 @@ +pr: 117229 +summary: "In this pr, a 400 error is returned when _source / _seq_no / _feature /\ + \ _nested_path / _field_names is requested, rather a 5xx" +area: Search +type: bug +issues: [] diff --git a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/RankFeatureMetaFieldMapper.java b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/RankFeatureMetaFieldMapper.java index 15398b1f178ee..ed1cc57b84863 100644 --- a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/RankFeatureMetaFieldMapper.java +++ b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/RankFeatureMetaFieldMapper.java @@ -48,7 +48,7 @@ public String typeName() { @Override public ValueFetcher valueFetcher(SearchExecutionContext context, String format) { - throw new UnsupportedOperationException("Cannot fetch values for internal field [" + typeName() + "]."); + throw new IllegalArgumentException("Cannot fetch values for internal field [" + typeName() + "]."); } @Override diff --git a/rest-api-spec/build.gradle b/rest-api-spec/build.gradle index 650d17e41de7f..e2af894eb0939 100644 --- a/rest-api-spec/build.gradle +++ b/rest-api-spec/build.gradle @@ -66,4 +66,5 @@ tasks.named("yamlRestCompatTestTransform").configure ({ task -> task.skipTest("logsdb/20_source_mapping/stored _source mode is supported", "no longer serialize source_mode") task.skipTest("logsdb/20_source_mapping/include/exclude is supported with stored _source", "no longer serialize source_mode") task.skipTest("logsdb/20_source_mapping/synthetic _source is default", "no longer serialize source_mode") + task.skipTest("search/520_fetch_fields/fetch _seq_no via fields", "error code is changed from 5xx to 400 in 9.0") }) diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/520_fetch_fields.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/520_fetch_fields.yml index 2b309f502f0c2..9a43199755d75 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/520_fetch_fields.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/520_fetch_fields.yml @@ -128,18 +128,88 @@ fetch _seq_no via stored_fields: --- fetch _seq_no via fields: + - requires: + cluster_features: ["meta_fetch_fields_error_code_changed"] + reason: The fields_api returns a 400 instead a 5xx when _seq_no is requested via fields - do: - catch: "request" + catch: bad_request search: index: test body: fields: [ _seq_no ] - # This should be `unauthorized` (401) or `forbidden` (403) or at least `bad request` (400) - # while instead it is mapped to an `internal_server_error (500)` - - match: { status: 500 } - - match: { error.root_cause.0.type: unsupported_operation_exception } + - match: { status: 400 } + - match: { error.root_cause.0.type: illegal_argument_exception } + - match: { error.root_cause.0.reason: "error fetching [_seq_no]: Cannot fetch values for internal field [_seq_no]." } + +--- +fetch _source via fields: + - requires: + cluster_features: ["meta_fetch_fields_error_code_changed"] + reason: The fields_api returns a 400 instead a 5xx when _seq_no is requested via fields + + - do: + catch: bad_request + search: + index: test + body: + fields: [ _source ] + + - match: { status: 400 } + - match: { error.root_cause.0.type: illegal_argument_exception } + - match: { error.root_cause.0.reason: "error fetching [_source]: Cannot fetch values for internal field [_source]." } + +--- +fetch _feature via fields: + - requires: + cluster_features: ["meta_fetch_fields_error_code_changed"] + reason: The fields_api returns a 400 instead a 5xx when _seq_no is requested via fields + + - do: + catch: bad_request + search: + index: test + body: + fields: [ _feature ] + + - match: { status: 400 } + - match: { error.root_cause.0.type: illegal_argument_exception } + - match: { error.root_cause.0.reason: "error fetching [_feature]: Cannot fetch values for internal field [_feature]." } + +--- +fetch _nested_path via fields: + - requires: + cluster_features: ["meta_fetch_fields_error_code_changed"] + reason: The fields_api returns a 400 instead a 5xx when _seq_no is requested via fields + + - do: + catch: bad_request + search: + index: test + body: + fields: [ _nested_path ] + + - match: { status: 400 } + - match: { error.root_cause.0.type: illegal_argument_exception } + - match: { error.root_cause.0.reason: "error fetching [_nested_path]: Cannot fetch values for internal field [_nested_path]." } + +--- +fetch _field_names via fields: + - requires: + cluster_features: ["meta_fetch_fields_error_code_changed"] + reason: The fields_api returns a 400 instead a 5xx when _seq_no is requested via fields + + - do: + catch: bad_request + search: + index: test + body: + fields: [ _field_names ] + + - match: { status: 400 } + - match: { error.root_cause.0.type: illegal_argument_exception } + - match: { error.root_cause.0.reason: "error fetching [_field_names]: Cannot fetch values for internal field [_field_names]." } --- fetch fields with none stored_fields: diff --git a/server/src/main/java/org/elasticsearch/index/mapper/FieldNamesFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/FieldNamesFieldMapper.java index 565b1ff28a39f..425e3c664c262 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/FieldNamesFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/FieldNamesFieldMapper.java @@ -135,7 +135,7 @@ public boolean isEnabled() { @Override public ValueFetcher valueFetcher(SearchExecutionContext context, String format) { - throw new UnsupportedOperationException("Cannot fetch values for internal field [" + name() + "]."); + throw new IllegalArgumentException("Cannot fetch values for internal field [" + name() + "]."); } @Override diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java b/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java index 333c37381c587..bf6c729f95653 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java @@ -61,6 +61,8 @@ public Set getFeatures() { "mapper.constant_keyword.synthetic_source_write_fix" ); + public static final NodeFeature META_FETCH_FIELDS_ERROR_CODE_CHANGED = new NodeFeature("meta_fetch_fields_error_code_changed"); + @Override public Set getTestFeatures() { return Set.of( @@ -71,7 +73,8 @@ public Set getTestFeatures() { IgnoredSourceFieldMapper.IGNORED_SOURCE_AS_TOP_LEVEL_METADATA_ARRAY_FIELD, IgnoredSourceFieldMapper.ALWAYS_STORE_OBJECT_ARRAYS_IN_NESTED_OBJECTS, MapperService.LOGSDB_DEFAULT_IGNORE_DYNAMIC_BEYOND_LIMIT, - CONSTANT_KEYWORD_SYNTHETIC_SOURCE_WRITE_FIX + CONSTANT_KEYWORD_SYNTHETIC_SOURCE_WRITE_FIX, + META_FETCH_FIELDS_ERROR_CODE_CHANGED ); } } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/NestedPathFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/NestedPathFieldMapper.java index b22c3a12fcda3..1cd752dc34403 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/NestedPathFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/NestedPathFieldMapper.java @@ -67,7 +67,7 @@ public Query existsQuery(SearchExecutionContext context) { @Override public ValueFetcher valueFetcher(SearchExecutionContext context, String format) { - throw new UnsupportedOperationException("Cannot fetch values for internal field [" + name() + "]."); + throw new IllegalArgumentException("Cannot fetch values for internal field [" + name() + "]."); } @Override diff --git a/server/src/main/java/org/elasticsearch/index/mapper/SeqNoFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/SeqNoFieldMapper.java index e126102b0f3c2..66ee42dfc56f9 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/SeqNoFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/SeqNoFieldMapper.java @@ -168,7 +168,7 @@ public boolean mayExistInIndex(SearchExecutionContext context) { @Override public ValueFetcher valueFetcher(SearchExecutionContext context, String format) { - throw new UnsupportedOperationException("Cannot fetch values for internal field [" + name() + "]."); + throw new IllegalArgumentException("Cannot fetch values for internal field [" + name() + "]."); } @Override diff --git a/server/src/main/java/org/elasticsearch/index/mapper/SourceFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/SourceFieldMapper.java index b97e04fcddb5d..1cea8154aad43 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/SourceFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/SourceFieldMapper.java @@ -325,7 +325,7 @@ public String typeName() { @Override public ValueFetcher valueFetcher(SearchExecutionContext context, String format) { - throw new UnsupportedOperationException("Cannot fetch values for internal field [" + name() + "]."); + throw new IllegalArgumentException("Cannot fetch values for internal field [" + name() + "]."); } @Override diff --git a/server/src/test/java/org/elasticsearch/search/fetch/subphase/FieldFetcherTests.java b/server/src/test/java/org/elasticsearch/search/fetch/subphase/FieldFetcherTests.java index f01f760ed71c3..c5f1efe561c22 100644 --- a/server/src/test/java/org/elasticsearch/search/fetch/subphase/FieldFetcherTests.java +++ b/server/src/test/java/org/elasticsearch/search/fetch/subphase/FieldFetcherTests.java @@ -271,7 +271,7 @@ public void testMetadataFields() throws IOException { FieldNamesFieldMapper.NAME, NestedPathFieldMapper.name(IndexVersion.current()) )) { - expectThrows(UnsupportedOperationException.class, () -> fetchFields(mapperService, source, fieldname)); + expectThrows(IllegalArgumentException.class, () -> fetchFields(mapperService, source, fieldname)); } } From b1412f65b90893c3d29756c921c32d39f3172a65 Mon Sep 17 00:00:00 2001 From: Luca Cavanna Date: Tue, 3 Dec 2024 10:57:05 +0100 Subject: [PATCH 122/139] Clean up search timeout handling code (#116678) TimeExceededException was made public to be able to catch it outside of the search.internal package. That is rather dangerous, because we really need it to be created only from `ContextIndexSearcher#throwTimeExceededException`. This commit makes its constructor private to prevent it from being created outside of ContextIndexSearcher. It also adds javadocs around that. I took the chance to also share the timeout handling code that is now copy pasted in different places. --- .../search/fetch/FetchPhase.java | 7 +--- .../search/fetch/FetchPhaseDocsIterator.java | 41 +++++++++---------- .../search/internal/ContextIndexSearcher.java | 18 +++++--- .../search/query/QueryPhase.java | 9 ++-- .../search/query/SearchTimeoutException.java | 13 ++++++ .../search/rescore/RescorePhase.java | 9 ++-- .../fetch/FetchPhaseDocsIteratorTests.java | 8 +++- 7 files changed, 63 insertions(+), 42 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/fetch/FetchPhase.java b/server/src/main/java/org/elasticsearch/search/fetch/FetchPhase.java index 546586a9ff3c3..2fbe3c1fc1532 100644 --- a/server/src/main/java/org/elasticsearch/search/fetch/FetchPhase.java +++ b/server/src/main/java/org/elasticsearch/search/fetch/FetchPhase.java @@ -195,13 +195,10 @@ protected SearchHit nextDoc(int doc) throws IOException { context.shardTarget(), context.searcher().getIndexReader(), docIdsToLoad, - context.request().allowPartialSearchResults() + context.request().allowPartialSearchResults(), + context.queryResult() ); - if (docsIterator.isTimedOut()) { - context.queryResult().searchTimedOut(true); - } - if (context.isCancelled()) { for (SearchHit hit : hits) { // release all hits that would otherwise become owned and eventually released by SearchHits below diff --git a/server/src/main/java/org/elasticsearch/search/fetch/FetchPhaseDocsIterator.java b/server/src/main/java/org/elasticsearch/search/fetch/FetchPhaseDocsIterator.java index df4e7649ffd3b..4a242f70e8d02 100644 --- a/server/src/main/java/org/elasticsearch/search/fetch/FetchPhaseDocsIterator.java +++ b/server/src/main/java/org/elasticsearch/search/fetch/FetchPhaseDocsIterator.java @@ -16,6 +16,7 @@ import org.elasticsearch.search.SearchHits; import org.elasticsearch.search.SearchShardTarget; import org.elasticsearch.search.internal.ContextIndexSearcher; +import org.elasticsearch.search.query.QuerySearchResult; import org.elasticsearch.search.query.SearchTimeoutException; import java.io.IOException; @@ -30,12 +31,6 @@ */ abstract class FetchPhaseDocsIterator { - private boolean timedOut = false; - - public boolean isTimedOut() { - return timedOut; - } - /** * Called when a new leaf reader is reached * @param ctx the leaf reader for this set of doc ids @@ -53,7 +48,13 @@ public boolean isTimedOut() { /** * Iterate over a set of docsIds within a particular shard and index reader */ - public final SearchHit[] iterate(SearchShardTarget shardTarget, IndexReader indexReader, int[] docIds, boolean allowPartialResults) { + public final SearchHit[] iterate( + SearchShardTarget shardTarget, + IndexReader indexReader, + int[] docIds, + boolean allowPartialResults, + QuerySearchResult querySearchResult + ) { SearchHit[] searchHits = new SearchHit[docIds.length]; DocIdToIndex[] docs = new DocIdToIndex[docIds.length]; for (int index = 0; index < docIds.length; index++) { @@ -69,12 +70,10 @@ public final SearchHit[] iterate(SearchShardTarget shardTarget, IndexReader inde int[] docsInLeaf = docIdsInLeaf(0, endReaderIdx, docs, ctx.docBase); try { setNextReader(ctx, docsInLeaf); - } catch (ContextIndexSearcher.TimeExceededException timeExceededException) { - if (allowPartialResults) { - timedOut = true; - return SearchHits.EMPTY; - } - throw new SearchTimeoutException(shardTarget, "Time exceeded"); + } catch (ContextIndexSearcher.TimeExceededException e) { + SearchTimeoutException.handleTimeout(allowPartialResults, shardTarget, querySearchResult); + assert allowPartialResults; + return SearchHits.EMPTY; } for (int i = 0; i < docs.length; i++) { try { @@ -88,15 +87,15 @@ public final SearchHit[] iterate(SearchShardTarget shardTarget, IndexReader inde currentDoc = docs[i].docId; assert searchHits[docs[i].index] == null; searchHits[docs[i].index] = nextDoc(docs[i].docId); - } catch (ContextIndexSearcher.TimeExceededException timeExceededException) { - if (allowPartialResults) { - timedOut = true; - SearchHit[] partialSearchHits = new SearchHit[i]; - System.arraycopy(searchHits, 0, partialSearchHits, 0, i); - return partialSearchHits; + } catch (ContextIndexSearcher.TimeExceededException e) { + if (allowPartialResults == false) { + purgeSearchHits(searchHits); } - purgeSearchHits(searchHits); - throw new SearchTimeoutException(shardTarget, "Time exceeded"); + SearchTimeoutException.handleTimeout(allowPartialResults, shardTarget, querySearchResult); + assert allowPartialResults; + SearchHit[] partialSearchHits = new SearchHit[i]; + System.arraycopy(searchHits, 0, partialSearchHits, 0, i); + return partialSearchHits; } } } catch (SearchTimeoutException e) { diff --git a/server/src/main/java/org/elasticsearch/search/internal/ContextIndexSearcher.java b/server/src/main/java/org/elasticsearch/search/internal/ContextIndexSearcher.java index 78d90377cdc3f..9f990fbd97cdf 100644 --- a/server/src/main/java/org/elasticsearch/search/internal/ContextIndexSearcher.java +++ b/server/src/main/java/org/elasticsearch/search/internal/ContextIndexSearcher.java @@ -169,8 +169,8 @@ public void setProfiler(QueryProfiler profiler) { * Add a {@link Runnable} that will be run on a regular basis while accessing documents in the * DirectoryReader but also while collecting them and check for query cancellation or timeout. */ - public Runnable addQueryCancellation(Runnable action) { - return this.cancellable.add(action); + public void addQueryCancellation(Runnable action) { + this.cancellable.add(action); } /** @@ -425,8 +425,16 @@ public void throwTimeExceededException() { } } - public static class TimeExceededException extends RuntimeException { + /** + * Exception thrown whenever a search timeout occurs. May be thrown by {@link ContextIndexSearcher} or {@link ExitableDirectoryReader}. + */ + public static final class TimeExceededException extends RuntimeException { // This exception should never be re-thrown, but we fill in the stacktrace to be able to trace where it does not get properly caught + + /** + * Created via {@link #throwTimeExceededException()} + */ + private TimeExceededException() {} } @Override @@ -570,14 +578,12 @@ public DirectoryReader getDirectoryReader() { } private static class MutableQueryTimeout implements ExitableDirectoryReader.QueryCancellation { - private final List runnables = new ArrayList<>(); - private Runnable add(Runnable action) { + private void add(Runnable action) { Objects.requireNonNull(action, "cancellation runnable should not be null"); assert runnables.contains(action) == false : "Cancellation runnable already added"; runnables.add(action); - return action; } private void remove(Runnable action) { diff --git a/server/src/main/java/org/elasticsearch/search/query/QueryPhase.java b/server/src/main/java/org/elasticsearch/search/query/QueryPhase.java index af65c30b49dcf..3036a295d459a 100644 --- a/server/src/main/java/org/elasticsearch/search/query/QueryPhase.java +++ b/server/src/main/java/org/elasticsearch/search/query/QueryPhase.java @@ -217,10 +217,11 @@ static void addCollectorsAndSearch(SearchContext searchContext) throws QueryPhas queryResult.topDocs(queryPhaseResult.topDocsAndMaxScore(), queryPhaseResult.sortValueFormats()); if (searcher.timeExceeded()) { assert timeoutRunnable != null : "TimeExceededException thrown even though timeout wasn't set"; - if (searchContext.request().allowPartialSearchResults() == false) { - throw new SearchTimeoutException(searchContext.shardTarget(), "Time exceeded"); - } - queryResult.searchTimedOut(true); + SearchTimeoutException.handleTimeout( + searchContext.request().allowPartialSearchResults(), + searchContext.shardTarget(), + searchContext.queryResult() + ); } if (searchContext.terminateAfter() != SearchContext.DEFAULT_TERMINATE_AFTER) { queryResult.terminatedEarly(queryPhaseResult.terminatedAfter()); diff --git a/server/src/main/java/org/elasticsearch/search/query/SearchTimeoutException.java b/server/src/main/java/org/elasticsearch/search/query/SearchTimeoutException.java index 0ed64811fee28..e006f176ff91a 100644 --- a/server/src/main/java/org/elasticsearch/search/query/SearchTimeoutException.java +++ b/server/src/main/java/org/elasticsearch/search/query/SearchTimeoutException.java @@ -33,4 +33,17 @@ public SearchTimeoutException(StreamInput in) throws IOException { public RestStatus status() { return RestStatus.GATEWAY_TIMEOUT; } + + /** + * Propagate a timeout according to whether partial search results are allowed or not. + * In case partial results are allowed, a flag will be set to the provided {@link QuerySearchResult} to indicate that there was a + * timeout, but the execution will continue and partial results will be returned to the user. + * When partial results are disallowed, a {@link SearchTimeoutException} will be thrown and returned to the user. + */ + public static void handleTimeout(boolean allowPartialSearchResults, SearchShardTarget target, QuerySearchResult querySearchResult) { + if (allowPartialSearchResults == false) { + throw new SearchTimeoutException(target, "Time exceeded"); + } + querySearchResult.searchTimedOut(true); + } } diff --git a/server/src/main/java/org/elasticsearch/search/rescore/RescorePhase.java b/server/src/main/java/org/elasticsearch/search/rescore/RescorePhase.java index 1227db5d8e1db..7e3646e7689cc 100644 --- a/server/src/main/java/org/elasticsearch/search/rescore/RescorePhase.java +++ b/server/src/main/java/org/elasticsearch/search/rescore/RescorePhase.java @@ -73,10 +73,11 @@ public static void execute(SearchContext context) { } catch (IOException e) { throw new ElasticsearchException("Rescore Phase Failed", e); } catch (ContextIndexSearcher.TimeExceededException e) { - if (context.request().allowPartialSearchResults() == false) { - throw new SearchTimeoutException(context.shardTarget(), "Time exceeded"); - } - context.queryResult().searchTimedOut(true); + SearchTimeoutException.handleTimeout( + context.request().allowPartialSearchResults(), + context.shardTarget(), + context.queryResult() + ); } } diff --git a/server/src/test/java/org/elasticsearch/search/fetch/FetchPhaseDocsIteratorTests.java b/server/src/test/java/org/elasticsearch/search/fetch/FetchPhaseDocsIteratorTests.java index d5e930321db95..c8d1b6721c64b 100644 --- a/server/src/test/java/org/elasticsearch/search/fetch/FetchPhaseDocsIteratorTests.java +++ b/server/src/test/java/org/elasticsearch/search/fetch/FetchPhaseDocsIteratorTests.java @@ -17,6 +17,7 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.tests.index.RandomIndexWriter; import org.elasticsearch.search.SearchHit; +import org.elasticsearch.search.query.QuerySearchResult; import org.elasticsearch.test.ESTestCase; import java.io.IOException; @@ -77,7 +78,7 @@ protected SearchHit nextDoc(int doc) { } }; - SearchHit[] hits = it.iterate(null, reader, docs, randomBoolean()); + SearchHit[] hits = it.iterate(null, reader, docs, randomBoolean(), new QuerySearchResult()); assertThat(hits.length, equalTo(docs.length)); for (int i = 0; i < hits.length; i++) { @@ -125,7 +126,10 @@ protected SearchHit nextDoc(int doc) { } }; - Exception e = expectThrows(FetchPhaseExecutionException.class, () -> it.iterate(null, reader, docs, randomBoolean())); + Exception e = expectThrows( + FetchPhaseExecutionException.class, + () -> it.iterate(null, reader, docs, randomBoolean(), new QuerySearchResult()) + ); assertThat(e.getMessage(), containsString("Error running fetch phase for doc [" + badDoc + "]")); assertThat(e.getCause(), instanceOf(IllegalArgumentException.class)); From 76a382a78d728d90cc84fa3fbcfe61ba1c1e8db2 Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Tue, 3 Dec 2024 12:24:55 +0100 Subject: [PATCH 123/139] ESQL: Enable CATEGORIZE tests on non-snapshot builds (#117881) --- .../org/elasticsearch/xpack/esql/action/EsqlCapabilities.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index a93590d7a5bc2..646c4f8240c3e 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -407,7 +407,7 @@ public enum Cap { /** * Supported the text categorization function "CATEGORIZE". */ - CATEGORIZE_V4(Build.current().isSnapshot()), + CATEGORIZE_V4, /** * QSTR function From cf9687f56de49bf5f07152b70b388d3f971aa9a5 Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Tue, 3 Dec 2024 13:08:02 +0100 Subject: [PATCH 124/139] ESQL: Fix layout when aggregating with aliases (#117837) Forward-port of #117832 Only really relevant for bwc with 8.11/8.12.; port for consistency with 8.x --- .../planner/AbstractPhysicalOperationProviders.java | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/AbstractPhysicalOperationProviders.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/AbstractPhysicalOperationProviders.java index 69e2d1c45aa3c..35aba7665ec87 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/AbstractPhysicalOperationProviders.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/AbstractPhysicalOperationProviders.java @@ -120,10 +120,14 @@ public final PhysicalOperation groupingPhysicalOperation( * - before stats (keep x = a | stats by x) which requires the partial input to use a's channel * - after stats (stats by a | keep x = a) which causes the output layout to refer to the follow-up alias */ + // TODO: This is likely required only for pre-8.14 node compatibility; confirm and remove if possible. + // Since https://github.com/elastic/elasticsearch/pull/104958, it shouldn't be possible to have aliases in the aggregates + // which the groupings refer to. Except for `BY CATEGORIZE(field)`, which remains as alias in the grouping, all aliases + // should've become EVALs before or after the STATS. for (NamedExpression agg : aggregates) { if (agg instanceof Alias a) { if (a.child() instanceof Attribute attr) { - if (groupAttribute.id().equals(attr.id())) { + if (sourceGroupAttribute.id().equals(attr.id())) { groupAttributeLayout.nameIds().add(a.id()); // TODO: investigate whether a break could be used since it shouldn't be possible to have multiple // attributes pointing to the same attribute @@ -133,8 +137,8 @@ public final PhysicalOperation groupingPhysicalOperation( // is in the output form // if the group points to an alias declared in the aggregate, use the alias child as source else if (aggregatorMode.isOutputPartial()) { - if (groupAttribute.semanticEquals(a.toAttribute())) { - groupAttribute = attr; + if (sourceGroupAttribute.semanticEquals(a.toAttribute())) { + sourceGroupAttribute = attr; break; } } From 2a9a3a44dc8bcf71659df5893ef23df535967eea Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Wed, 4 Dec 2024 00:13:04 +1100 Subject: [PATCH 125/139] Add a not-master state for desired balance (#116904) The new state prevents a long running desired balance computation to set result after the node stands down as master. --- docs/changelog/116904.yaml | 5 ++ .../allocation/allocator/DesiredBalance.java | 9 ++- .../DesiredBalanceShardsAllocator.java | 71 ++++++++++++++----- ...nsportDeleteDesiredBalanceActionTests.java | 2 +- .../DesiredBalanceComputerTests.java | 51 +++++++++---- .../DesiredBalanceShardsAllocatorTests.java | 13 ++-- 6 files changed, 112 insertions(+), 39 deletions(-) create mode 100644 docs/changelog/116904.yaml diff --git a/docs/changelog/116904.yaml b/docs/changelog/116904.yaml new file mode 100644 index 0000000000000..46fa445f36154 --- /dev/null +++ b/docs/changelog/116904.yaml @@ -0,0 +1,5 @@ +pr: 116904 +summary: Add a not-master state for desired balance +area: Allocation +type: enhancement +issues: [] diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalance.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalance.java index 6ad44fdf3a9c0..406ca72868a40 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalance.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalance.java @@ -40,7 +40,14 @@ public DesiredBalance(long lastConvergedIndex, Map ass this(lastConvergedIndex, assignments, Map.of(), ComputationFinishReason.CONVERGED); } - public static final DesiredBalance INITIAL = new DesiredBalance(-1, Map.of()); + /** + * The placeholder value for {@link DesiredBalance} when the node stands down as master. + */ + public static final DesiredBalance NOT_MASTER = new DesiredBalance(-2, Map.of()); + /** + * The starting value for {@link DesiredBalance} when the node becomes the master. + */ + public static final DesiredBalance BECOME_MASTER_INITIAL = new DesiredBalance(-1, Map.of()); public ShardAssignment getAssignment(ShardId shardId) { return assignments.get(shardId); diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceShardsAllocator.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceShardsAllocator.java index 72261df658ca1..8408386b8da58 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceShardsAllocator.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceShardsAllocator.java @@ -29,6 +29,7 @@ import org.elasticsearch.cluster.service.MasterService; import org.elasticsearch.cluster.service.MasterServiceTaskQueue; import org.elasticsearch.common.Priority; +import org.elasticsearch.common.Strings; import org.elasticsearch.common.metrics.CounterMetric; import org.elasticsearch.common.metrics.MeanMetric; import org.elasticsearch.common.settings.ClusterSettings; @@ -43,6 +44,7 @@ import java.util.Set; import java.util.concurrent.ConcurrentLinkedQueue; import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.atomic.AtomicReference; /** * A {@link ShardsAllocator} which asynchronously refreshes the desired balance held by the {@link DesiredBalanceComputer} and then takes @@ -62,7 +64,7 @@ public class DesiredBalanceShardsAllocator implements ShardsAllocator { private final AtomicLong indexGenerator = new AtomicLong(-1); private final ConcurrentLinkedQueue> pendingDesiredBalanceMoves = new ConcurrentLinkedQueue<>(); private final MasterServiceTaskQueue masterServiceTaskQueue; - private volatile DesiredBalance currentDesiredBalance = DesiredBalance.INITIAL; + private final AtomicReference currentDesiredBalanceRef = new AtomicReference<>(DesiredBalance.NOT_MASTER); private volatile boolean resetCurrentDesiredBalance = false; private final Set processedNodeShutdowns = new HashSet<>(); private final DesiredBalanceMetrics desiredBalanceMetrics; @@ -129,6 +131,12 @@ protected void processInput(DesiredBalanceInput desiredBalanceInput) { long index = desiredBalanceInput.index(); logger.debug("Starting desired balance computation for [{}]", index); + final DesiredBalance initialDesiredBalance = getInitialDesiredBalance(); + if (initialDesiredBalance == DesiredBalance.NOT_MASTER) { + logger.debug("Abort desired balance computation because node is no longer master"); + return; + } + recordTime( cumulativeComputationTime, // We set currentDesiredBalance back to INITIAL when the node stands down as master in onNoLongerMaster. @@ -137,7 +145,7 @@ protected void processInput(DesiredBalanceInput desiredBalanceInput) { // lead to unexpected behaviours for tests. See also https://github.com/elastic/elasticsearch/pull/116904 () -> setCurrentDesiredBalance( desiredBalanceComputer.compute( - getInitialDesiredBalance(), + initialDesiredBalance, desiredBalanceInput, pendingDesiredBalanceMoves, this::isFresh @@ -146,7 +154,17 @@ protected void processInput(DesiredBalanceInput desiredBalanceInput) { ); computationsExecuted.inc(); - if (currentDesiredBalance.finishReason() == DesiredBalance.ComputationFinishReason.STOP_EARLY) { + final DesiredBalance currentDesiredBalance = currentDesiredBalanceRef.get(); + if (currentDesiredBalance == DesiredBalance.NOT_MASTER || currentDesiredBalance == DesiredBalance.BECOME_MASTER_INITIAL) { + logger.debug( + () -> Strings.format( + "Desired balance computation for [%s] is discarded since master has concurrently changed. " + + "Current desiredBalance=[%s]", + index, + currentDesiredBalance + ) + ); + } else if (currentDesiredBalance.finishReason() == DesiredBalance.ComputationFinishReason.STOP_EARLY) { logger.debug( "Desired balance computation for [{}] terminated early with partial result, scheduling reconciliation", index @@ -164,10 +182,13 @@ protected void processInput(DesiredBalanceInput desiredBalanceInput) { } private DesiredBalance getInitialDesiredBalance() { + final DesiredBalance currentDesiredBalance = currentDesiredBalanceRef.get(); if (resetCurrentDesiredBalance) { logger.info("Resetting current desired balance"); resetCurrentDesiredBalance = false; - return new DesiredBalance(currentDesiredBalance.lastConvergedIndex(), Map.of()); + return currentDesiredBalance == DesiredBalance.NOT_MASTER + ? DesiredBalance.NOT_MASTER + : new DesiredBalance(currentDesiredBalance.lastConvergedIndex(), Map.of()); } else { return currentDesiredBalance; } @@ -215,6 +236,10 @@ public void allocate(RoutingAllocation allocation, ActionListener listener var index = indexGenerator.incrementAndGet(); logger.debug("Executing allocate for [{}]", index); queue.add(index, listener); + // This can only run on master, so unset not-master if exists + if (currentDesiredBalanceRef.compareAndSet(DesiredBalance.NOT_MASTER, DesiredBalance.BECOME_MASTER_INITIAL)) { + logger.debug("initialized desired balance for becoming master"); + } desiredBalanceComputation.onNewInput(DesiredBalanceInput.create(index, allocation)); if (allocation.routingTable().indicesRouting().isEmpty()) { @@ -224,7 +249,7 @@ public void allocate(RoutingAllocation allocation, ActionListener listener // Starts reconciliation towards desired balance that might have not been updated with a recent calculation yet. // This is fine as balance should have incremental rather than radical changes. // This should speed up achieving the desired balance in cases current state is still different from it (due to THROTTLING). - reconcile(currentDesiredBalance, allocation); + reconcile(currentDesiredBalanceRef.get(), allocation); } private void processNodeShutdowns(ClusterState clusterState) { @@ -267,16 +292,26 @@ private static List getMoveCommands(AllocationCommands co } private void setCurrentDesiredBalance(DesiredBalance newDesiredBalance) { - if (logger.isTraceEnabled()) { - var diff = DesiredBalance.hasChanges(currentDesiredBalance, newDesiredBalance) - ? "Diff: " + DesiredBalance.humanReadableDiff(currentDesiredBalance, newDesiredBalance) - : "No changes"; - logger.trace("Desired balance updated: {}. {}", newDesiredBalance, diff); - } else { - logger.debug("Desired balance updated for [{}]", newDesiredBalance.lastConvergedIndex()); + while (true) { + final var oldDesiredBalance = currentDesiredBalanceRef.get(); + if (oldDesiredBalance == DesiredBalance.NOT_MASTER) { + logger.debug("discard desired balance for [{}] since node is no longer master", newDesiredBalance.lastConvergedIndex()); + return; + } + + if (currentDesiredBalanceRef.compareAndSet(oldDesiredBalance, newDesiredBalance)) { + if (logger.isTraceEnabled()) { + var diff = DesiredBalance.hasChanges(oldDesiredBalance, newDesiredBalance) + ? "Diff: " + DesiredBalance.humanReadableDiff(oldDesiredBalance, newDesiredBalance) + : "No changes"; + logger.trace("Desired balance updated: {}. {}", newDesiredBalance, diff); + } else { + logger.debug("Desired balance updated for [{}]", newDesiredBalance.lastConvergedIndex()); + } + computedShardMovements.inc(DesiredBalance.shardMovements(oldDesiredBalance, newDesiredBalance)); + break; + } } - computedShardMovements.inc(DesiredBalance.shardMovements(currentDesiredBalance, newDesiredBalance)); - currentDesiredBalance = newDesiredBalance; } protected void submitReconcileTask(DesiredBalance desiredBalance) { @@ -316,7 +351,7 @@ public void execute(RoutingAllocation allocation) { } public DesiredBalance getDesiredBalance() { - return currentDesiredBalance; + return currentDesiredBalanceRef.get(); } public void resetDesiredBalance() { @@ -325,7 +360,7 @@ public void resetDesiredBalance() { public DesiredBalanceStats getStats() { return new DesiredBalanceStats( - Math.max(currentDesiredBalance.lastConvergedIndex(), 0L), + Math.max(currentDesiredBalanceRef.get().lastConvergedIndex(), 0L), desiredBalanceComputation.isActive(), computationsSubmitted.count(), computationsExecuted.count(), @@ -342,7 +377,7 @@ public DesiredBalanceStats getStats() { private void onNoLongerMaster() { if (indexGenerator.getAndSet(-1) != -1) { - currentDesiredBalance = DesiredBalance.INITIAL; + currentDesiredBalanceRef.set(DesiredBalance.NOT_MASTER); queue.completeAllAsNotMaster(); pendingDesiredBalanceMoves.clear(); desiredBalanceReconciler.clear(); @@ -412,7 +447,7 @@ private static void discardSupersededTasks( // only for tests - in production, this happens after reconciliation protected final void completeToLastConvergedIndex() { - queue.complete(currentDesiredBalance.lastConvergedIndex()); + queue.complete(currentDesiredBalanceRef.get().lastConvergedIndex()); } private void recordTime(CounterMetric metric, Runnable action) { diff --git a/server/src/test/java/org/elasticsearch/action/admin/cluster/allocation/TransportDeleteDesiredBalanceActionTests.java b/server/src/test/java/org/elasticsearch/action/admin/cluster/allocation/TransportDeleteDesiredBalanceActionTests.java index 3dafc8f000f3f..385ac600666db 100644 --- a/server/src/test/java/org/elasticsearch/action/admin/cluster/allocation/TransportDeleteDesiredBalanceActionTests.java +++ b/server/src/test/java/org/elasticsearch/action/admin/cluster/allocation/TransportDeleteDesiredBalanceActionTests.java @@ -136,7 +136,7 @@ public DesiredBalance compute( safeAwait((ActionListener listener) -> allocationService.reroute(clusterState, "inital-allocate", listener)); var balanceBeforeReset = allocator.getDesiredBalance(); - assertThat(balanceBeforeReset.lastConvergedIndex(), greaterThan(DesiredBalance.INITIAL.lastConvergedIndex())); + assertThat(balanceBeforeReset.lastConvergedIndex(), greaterThan(DesiredBalance.BECOME_MASTER_INITIAL.lastConvergedIndex())); assertThat(balanceBeforeReset.assignments(), not(anEmptyMap())); var listener = new PlainActionFuture(); diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceComputerTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceComputerTests.java index 7b77947792bd4..679d04224aefe 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceComputerTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceComputerTests.java @@ -96,7 +96,12 @@ public void testComputeBalance() { var clusterState = createInitialClusterState(3); var index = clusterState.metadata().index(TEST_INDEX).getIndex(); - var desiredBalance = desiredBalanceComputer.compute(DesiredBalance.INITIAL, createInput(clusterState), queue(), input -> true); + var desiredBalance = desiredBalanceComputer.compute( + DesiredBalance.BECOME_MASTER_INITIAL, + createInput(clusterState), + queue(), + input -> true + ); assertDesiredAssignments( desiredBalance, @@ -115,7 +120,7 @@ public void testStopsComputingWhenStale() { var index = clusterState.metadata().index(TEST_INDEX).getIndex(); // if the isFresh flag is false then we only do one iteration, allocating the primaries but not the replicas - var desiredBalance0 = DesiredBalance.INITIAL; + var desiredBalance0 = DesiredBalance.BECOME_MASTER_INITIAL; var desiredBalance1 = desiredBalanceComputer.compute(desiredBalance0, createInput(clusterState), queue(), input -> false); assertDesiredAssignments( desiredBalance1, @@ -147,7 +152,7 @@ public void testIgnoresOutOfScopePrimaries() { var primaryShard = mutateAllocationStatus(clusterState.routingTable().index(TEST_INDEX).shard(0).primaryShard()); var desiredBalance = desiredBalanceComputer.compute( - DesiredBalance.INITIAL, + DesiredBalance.BECOME_MASTER_INITIAL, createInput(clusterState, primaryShard), queue(), input -> true @@ -184,7 +189,7 @@ public void testIgnoresOutOfScopeReplicas() { var replicaShard = mutateAllocationStatus(originalReplicaShard); var desiredBalance = desiredBalanceComputer.compute( - DesiredBalance.INITIAL, + DesiredBalance.BECOME_MASTER_INITIAL, createInput(clusterState, replicaShard), queue(), input -> true @@ -241,7 +246,7 @@ public void testAssignShardsToTheirPreviousLocationIfAvailable() { : new ShardRouting[] { clusterState.routingTable().index(TEST_INDEX).shard(0).primaryShard() }; var desiredBalance = desiredBalanceComputer.compute( - DesiredBalance.INITIAL, + DesiredBalance.BECOME_MASTER_INITIAL, createInput(clusterState, ignored), queue(), input -> true @@ -284,7 +289,12 @@ public void testRespectsAssignmentOfUnknownPrimaries() { } clusterState = ClusterState.builder(clusterState).routingTable(RoutingTable.of(routingNodes)).build(); - var desiredBalance = desiredBalanceComputer.compute(DesiredBalance.INITIAL, createInput(clusterState), queue(), input -> true); + var desiredBalance = desiredBalanceComputer.compute( + DesiredBalance.BECOME_MASTER_INITIAL, + createInput(clusterState), + queue(), + input -> true + ); assertDesiredAssignments( desiredBalance, @@ -331,7 +341,12 @@ public void testRespectsAssignmentOfUnknownReplicas() { } clusterState = ClusterState.builder(clusterState).routingTable(RoutingTable.of(routingNodes)).build(); - var desiredBalance = desiredBalanceComputer.compute(DesiredBalance.INITIAL, createInput(clusterState), queue(), input -> true); + var desiredBalance = desiredBalanceComputer.compute( + DesiredBalance.BECOME_MASTER_INITIAL, + createInput(clusterState), + queue(), + input -> true + ); assertDesiredAssignments( desiredBalance, @@ -367,7 +382,7 @@ public void testRespectsAssignmentByGatewayAllocators() { } var desiredBalance = desiredBalanceComputer.compute( - DesiredBalance.INITIAL, + DesiredBalance.BECOME_MASTER_INITIAL, DesiredBalanceInput.create(randomNonNegativeLong(), routingAllocation), queue(), input -> true @@ -427,7 +442,12 @@ public ShardAllocationDecision decideShardAllocation(ShardRouting shard, Routing } clusterState = ClusterState.builder(clusterState).routingTable(RoutingTable.of(desiredRoutingNodes)).build(); - var desiredBalance1 = desiredBalanceComputer.compute(DesiredBalance.INITIAL, createInput(clusterState), queue(), input -> true); + var desiredBalance1 = desiredBalanceComputer.compute( + DesiredBalance.BECOME_MASTER_INITIAL, + createInput(clusterState), + queue(), + input -> true + ); assertDesiredAssignments( desiredBalance1, Map.of( @@ -513,7 +533,12 @@ public void testNoDataNodes() { var desiredBalanceComputer = createDesiredBalanceComputer(); var clusterState = createInitialClusterState(0); - var desiredBalance = desiredBalanceComputer.compute(DesiredBalance.INITIAL, createInput(clusterState), queue(), input -> true); + var desiredBalance = desiredBalanceComputer.compute( + DesiredBalance.BECOME_MASTER_INITIAL, + createInput(clusterState), + queue(), + input -> true + ); assertDesiredAssignments(desiredBalance, Map.of()); } @@ -532,7 +557,7 @@ public void testAppliesMoveCommands() { clusterState = ClusterState.builder(clusterState).routingTable(RoutingTable.of(routingNodes)).build(); var desiredBalance = desiredBalanceComputer.compute( - DesiredBalance.INITIAL, + DesiredBalance.BECOME_MASTER_INITIAL, createInput(clusterState), queue( new MoveAllocationCommand(index.getName(), 0, "node-1", "node-2"), @@ -662,7 +687,7 @@ public void testDesiredBalanceShouldConvergeInABigCluster() { var input = new DesiredBalanceInput(randomInt(), routingAllocationWithDecidersOf(clusterState, clusterInfo, settings), List.of()); var desiredBalance = createDesiredBalanceComputer(new BalancedShardsAllocator(settings)).compute( - DesiredBalance.INITIAL, + DesiredBalance.BECOME_MASTER_INITIAL, input, queue(), ignored -> iteration.incrementAndGet() < 1000 @@ -1243,7 +1268,7 @@ public ShardAllocationDecision decideShardAllocation(ShardRouting shard, Routing assertThatLogger(() -> { var iteration = new AtomicInteger(0); desiredBalanceComputer.compute( - DesiredBalance.INITIAL, + DesiredBalance.BECOME_MASTER_INITIAL, createInput(createInitialClusterState(3)), queue(), input -> iteration.incrementAndGet() < iterations diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceShardsAllocatorTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceShardsAllocatorTests.java index 9d33b697e31ca..9caf89d4d7613 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceShardsAllocatorTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceShardsAllocatorTests.java @@ -698,6 +698,7 @@ public void onFailure(Exception e) { try { assertTrue(listenersCalled.await(10, TimeUnit.SECONDS)); + assertThat(desiredBalanceShardsAllocator.getDesiredBalance(), sameInstance(DesiredBalance.NOT_MASTER)); } finally { clusterService.close(); terminate(threadPool); @@ -753,7 +754,7 @@ public DesiredBalance compute( try { // initial computation is based on DesiredBalance.INITIAL rerouteAndWait(service, clusterState, "initial-allocation"); - assertThat(desiredBalanceComputer.lastComputationInput.get(), equalTo(DesiredBalance.INITIAL)); + assertThat(desiredBalanceComputer.lastComputationInput.get(), equalTo(DesiredBalance.BECOME_MASTER_INITIAL)); // any next computation is based on current desired balance var current = desiredBalanceShardsAllocator.getDesiredBalance(); @@ -806,7 +807,7 @@ public void testResetDesiredBalanceOnNoLongerMaster() { try { rerouteAndWait(service, clusterState, "initial-allocation"); - assertThat(desiredBalanceShardsAllocator.getDesiredBalance(), not(equalTo(DesiredBalance.INITIAL))); + assertThat(desiredBalanceShardsAllocator.getDesiredBalance(), not(equalTo(DesiredBalance.BECOME_MASTER_INITIAL))); clusterState = ClusterState.builder(clusterState) .nodes(DiscoveryNodes.builder(clusterState.getNodes()).localNodeId(node1.getId()).masterNodeId(node2.getId())) @@ -816,7 +817,7 @@ public void testResetDesiredBalanceOnNoLongerMaster() { assertThat( "desired balance should be resetted on no longer master", desiredBalanceShardsAllocator.getDesiredBalance(), - equalTo(DesiredBalance.INITIAL) + equalTo(DesiredBalance.NOT_MASTER) ); } finally { clusterService.close(); @@ -862,7 +863,7 @@ public void resetDesiredBalance() { try { rerouteAndWait(service, clusterState, "initial-allocation"); - assertThat(desiredBalanceAllocator.getDesiredBalance(), not(equalTo(DesiredBalance.INITIAL))); + assertThat(desiredBalanceAllocator.getDesiredBalance(), not(equalTo(DesiredBalance.BECOME_MASTER_INITIAL))); final var shutdownType = randomFrom(Type.SIGTERM, Type.REMOVE, Type.REPLACE); final var singleShutdownMetadataBuilder = SingleNodeShutdownMetadata.builder() @@ -938,7 +939,7 @@ public DesiredBalance compute( Queue> pendingDesiredBalanceMoves, Predicate isFresh ) { - assertThat(previousDesiredBalance, sameInstance(DesiredBalance.INITIAL)); + assertThat(previousDesiredBalance, sameInstance(DesiredBalance.BECOME_MASTER_INITIAL)); return new DesiredBalance(desiredBalanceInput.index(), Map.of()); } }, @@ -967,7 +968,7 @@ protected void submitReconcileTask(DesiredBalance desiredBalance) { lastListener.onResponse(null); } }; - assertThat(desiredBalanceShardsAllocator.getDesiredBalance(), sameInstance(DesiredBalance.INITIAL)); + assertThat(desiredBalanceShardsAllocator.getDesiredBalance(), sameInstance(DesiredBalance.NOT_MASTER)); try { final PlainActionFuture future = new PlainActionFuture<>(); desiredBalanceShardsAllocator.allocate( From cab6dc5d56a7fcdbbd2fe355bc6d1277094f1400 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine <58790826+elasticsearchmachine@users.noreply.github.com> Date: Wed, 4 Dec 2024 00:26:23 +1100 Subject: [PATCH 126/139] Mute org.elasticsearch.validation.DotPrefixClientYamlTestSuiteIT org.elasticsearch.validation.DotPrefixClientYamlTestSuiteIT #117893 --- muted-tests.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/muted-tests.yml b/muted-tests.yml index 57db22feba059..cf39eae210f88 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -239,6 +239,8 @@ tests: - class: org.elasticsearch.xpack.test.rest.XPackRestIT method: test {p0=esql/60_usage/Basic ESQL usage output (telemetry) non-snapshot version} issue: https://github.com/elastic/elasticsearch/issues/117862 +- class: org.elasticsearch.validation.DotPrefixClientYamlTestSuiteIT + issue: https://github.com/elastic/elasticsearch/issues/117893 # Examples: # From cca7051e73ff089b26f3d1825e4b4e15b81e04aa Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Tue, 3 Dec 2024 14:28:07 +0100 Subject: [PATCH 127/139] ESQL: Simplify CombineProjections (#117882) Make combineUpperGroupingsAndLowerProjections a bit simpler. Also slightly improve a test and add comments to provide more context. --- .../rules/logical/CombineProjections.java | 40 ++++++++++--------- .../optimizer/LogicalPlanOptimizerTests.java | 2 +- 2 files changed, 22 insertions(+), 20 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/CombineProjections.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/CombineProjections.java index be7096538fb9a..957db4a7273e5 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/CombineProjections.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/CombineProjections.java @@ -22,6 +22,7 @@ import org.elasticsearch.xpack.esql.plan.logical.UnaryPlan; import java.util.ArrayList; +import java.util.LinkedHashSet; import java.util.List; public final class CombineProjections extends OptimizerRules.OptimizerRule { @@ -144,30 +145,31 @@ private static List combineUpperGroupingsAndLowerProjections( List upperGroupings, List lowerProjections ) { + assert upperGroupings.size() <= 1 + || upperGroupings.stream().anyMatch(group -> group.anyMatch(expr -> expr instanceof Categorize)) == false + : "CombineProjections only tested with a single CATEGORIZE with no additional groups"; // Collect the alias map for resolving the source (f1 = 1, f2 = f1, etc..) - AttributeMap aliases = new AttributeMap<>(); + AttributeMap aliases = new AttributeMap<>(); for (NamedExpression ne : lowerProjections) { - // record the alias - aliases.put(ne.toAttribute(), Alias.unwrap(ne)); + // Record the aliases. + // Projections are just aliases for attributes, so casting is safe. + aliases.put(ne.toAttribute(), (Attribute) Alias.unwrap(ne)); } - // Replace any matching attribute directly with the aliased attribute from the projection. - AttributeSet seen = new AttributeSet(); - List replaced = new ArrayList<>(); + + // Propagate any renames from the lower projection into the upper groupings. + // This can lead to duplicates: e.g. + // | EVAL x = y | STATS ... BY x, y + // All substitutions happen before; groupings must be attributes at this point except for CATEGORIZE which will be an alias like + // `c = CATEGORIZE(attribute)`. + // Therefore, it is correct to deduplicate based on simple equality (based on names) instead of name ids (Set vs. AttributeSet). + // TODO: The deduplication based on simple equality will be insufficient in case of multiple CATEGORIZEs, e.g. for + // `| EVAL x = y | STATS ... BY CATEGORIZE(x), CATEGORIZE(y)`. That will require semantic equality instead. + LinkedHashSet resolvedGroupings = new LinkedHashSet<>(); for (NamedExpression ne : upperGroupings) { - // Duplicated attributes are ignored. - if (ne instanceof Attribute attribute) { - var newExpression = aliases.resolve(attribute, attribute); - if (newExpression instanceof Attribute newAttribute && seen.add(newAttribute) == false) { - // Already seen, skip - continue; - } - replaced.add(newExpression); - } else { - // For grouping functions, this will replace nested properties too - replaced.add(ne.transformUp(Attribute.class, a -> aliases.resolve(a, a))); - } + NamedExpression transformed = (NamedExpression) ne.transformUp(Attribute.class, a -> aliases.resolve(a, a)); + resolvedGroupings.add(transformed); } - return replaced; + return new ArrayList<>(resolvedGroupings); } /** diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java index 57d0c7432f97b..a74efca3b3d99 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java @@ -1217,7 +1217,7 @@ public void testCombineProjectionWithCategorizeGrouping() { var plan = plan(""" from test | eval k = first_name, k1 = k - | stats s = sum(salary) by cat = CATEGORIZE(k) + | stats s = sum(salary) by cat = CATEGORIZE(k1) | keep s, cat """); From 03a71d2deee7bb2788fc40b8d21d90cc75b787e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Iv=C3=A1n=20Cea=20Fontenla?= Date: Tue, 3 Dec 2024 14:47:40 +0100 Subject: [PATCH 128/139] ESQL: Make Categorize usable in aggs when identical to a grouping (#117835) Cases like `STATS MV_APPEND(cat, CATEGORIZE(x)) BY cat=CATEGORIZE(x)` should work, as they're moved to an EVAL by a rule. Also, these cases were discarded, as they fail because of other verifications (Which also fail for BUCKET): ``` STATS x = category BY category=CATEGORIZE(message) STATS x = CATEGORIZE(message) BY CATEGORIZE(message) STATS x = CATEGORIZE(message) BY category=CATEGORIZE(message) --- .../src/main/resources/bucket.csv-spec | 21 +++ .../src/main/resources/categorize.csv-spec | 121 ++++++++++++------ .../src/main/resources/docs.csv-spec | 2 +- .../xpack/esql/action/EsqlCapabilities.java | 2 +- .../xpack/esql/analysis/Verifier.java | 39 +++--- ...ReplaceAggregateAggExpressionWithEval.java | 16 +++ ...laceAggregateNestedExpressionWithEval.java | 6 +- .../xpack/esql/analysis/VerifierTests.java | 34 +++-- .../optimizer/LogicalPlanOptimizerTests.java | 4 +- 9 files changed, 167 insertions(+), 78 deletions(-) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/bucket.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/bucket.csv-spec index 7bbf011176693..b29c489910f65 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/bucket.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/bucket.csv-spec @@ -503,6 +503,27 @@ FROM employees //end::reuseGroupingFunctionWithExpression-result[] ; +reuseGroupingFunctionImplicitAliasWithExpression#[skip:-8.13.99, reason:BUCKET renamed in 8.14] +FROM employees +| STATS s1 = `BUCKET(salary / 100 + 99, 50.)` + 1, s2 = BUCKET(salary / 1000 + 999, 50.) + 2 BY BUCKET(salary / 100 + 99, 50.), b2 = BUCKET(salary / 1000 + 999, 50.) +| SORT `BUCKET(salary / 100 + 99, 50.)`, b2 +| KEEP s1, `BUCKET(salary / 100 + 99, 50.)`, s2, b2 +; + + s1:double | BUCKET(salary / 100 + 99, 50.):double | s2:double | b2:double +351.0 |350.0 |1002.0 |1000.0 +401.0 |400.0 |1002.0 |1000.0 +451.0 |450.0 |1002.0 |1000.0 +501.0 |500.0 |1002.0 |1000.0 +551.0 |550.0 |1002.0 |1000.0 +601.0 |600.0 |1002.0 |1000.0 +601.0 |600.0 |1052.0 |1050.0 +651.0 |650.0 |1052.0 |1050.0 +701.0 |700.0 |1052.0 |1050.0 +751.0 |750.0 |1052.0 |1050.0 +801.0 |800.0 |1052.0 |1050.0 +; + reuseGroupingFunctionWithinAggs#[skip:-8.13.99, reason:BUCKET renamed in 8.14] FROM employees | STATS sum = 1 + MAX(1 + BUCKET(salary, 1000.)) BY BUCKET(salary, 1000.) + 1 diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/categorize.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/categorize.csv-spec index e45b10d1aa122..804c1c56a1eb5 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/categorize.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/categorize.csv-spec @@ -1,5 +1,5 @@ standard aggs -required_capability: categorize_v4 +required_capability: categorize_v5 FROM sample_data | STATS count=COUNT(), @@ -17,7 +17,7 @@ count:long | sum:long | avg:double | count_distinct:long | category:keyw ; values aggs -required_capability: categorize_v4 +required_capability: categorize_v5 FROM sample_data | STATS values=MV_SORT(VALUES(message)), @@ -33,7 +33,7 @@ values:keyword | top ; mv -required_capability: categorize_v4 +required_capability: categorize_v5 FROM mv_sample_data | STATS COUNT(), SUM(event_duration) BY category=CATEGORIZE(message) @@ -48,7 +48,7 @@ COUNT():long | SUM(event_duration):long | category:keyword ; row mv -required_capability: categorize_v4 +required_capability: categorize_v5 ROW message = ["connected to a", "connected to b", "disconnected"], str = ["a", "b", "c"] | STATS COUNT(), VALUES(str) BY category=CATEGORIZE(message) @@ -61,7 +61,7 @@ COUNT():long | VALUES(str):keyword | category:keyword ; skips stopwords -required_capability: categorize_v4 +required_capability: categorize_v5 ROW message = ["Mon Tue connected to a", "Jul Aug connected to b September ", "UTC connected GMT to c UTC"] | STATS COUNT() BY category=CATEGORIZE(message) @@ -73,7 +73,7 @@ COUNT():long | category:keyword ; with multiple indices -required_capability: categorize_v4 +required_capability: categorize_v5 required_capability: union_types FROM sample_data* @@ -88,7 +88,7 @@ COUNT():long | category:keyword ; mv with many values -required_capability: categorize_v4 +required_capability: categorize_v5 FROM employees | STATS COUNT() BY category=CATEGORIZE(job_positions) @@ -105,7 +105,7 @@ COUNT():long | category:keyword ; mv with many values and SUM -required_capability: categorize_v4 +required_capability: categorize_v5 FROM employees | STATS SUM(languages) BY category=CATEGORIZE(job_positions) @@ -120,7 +120,7 @@ SUM(languages):long | category:keyword ; mv with many values and nulls and SUM -required_capability: categorize_v4 +required_capability: categorize_v5 FROM employees | STATS SUM(languages) BY category=CATEGORIZE(job_positions) @@ -134,7 +134,7 @@ SUM(languages):long | category:keyword ; mv via eval -required_capability: categorize_v4 +required_capability: categorize_v5 FROM sample_data | EVAL message = MV_APPEND(message, "Banana") @@ -150,7 +150,7 @@ COUNT():long | category:keyword ; mv via eval const -required_capability: categorize_v4 +required_capability: categorize_v5 FROM sample_data | EVAL message = ["Banana", "Bread"] @@ -164,7 +164,7 @@ COUNT():long | category:keyword ; mv via eval const without aliases -required_capability: categorize_v4 +required_capability: categorize_v5 FROM sample_data | EVAL message = ["Banana", "Bread"] @@ -178,7 +178,7 @@ COUNT():long | CATEGORIZE(message):keyword ; mv const in parameter -required_capability: categorize_v4 +required_capability: categorize_v5 FROM sample_data | STATS COUNT() BY c = CATEGORIZE(["Banana", "Bread"]) @@ -191,7 +191,7 @@ COUNT():long | c:keyword ; agg alias shadowing -required_capability: categorize_v4 +required_capability: categorize_v5 FROM sample_data | STATS c = COUNT() BY c = CATEGORIZE(["Banana", "Bread"]) @@ -206,7 +206,7 @@ c:keyword ; chained aggregations using categorize -required_capability: categorize_v4 +required_capability: categorize_v5 FROM sample_data | STATS COUNT() BY category=CATEGORIZE(message) @@ -221,7 +221,7 @@ COUNT():long | category:keyword ; stats without aggs -required_capability: categorize_v4 +required_capability: categorize_v5 FROM sample_data | STATS BY category=CATEGORIZE(message) @@ -235,7 +235,7 @@ category:keyword ; text field -required_capability: categorize_v4 +required_capability: categorize_v5 FROM hosts | STATS COUNT() BY category=CATEGORIZE(host_group) @@ -253,7 +253,7 @@ COUNT():long | category:keyword ; on TO_UPPER -required_capability: categorize_v4 +required_capability: categorize_v5 FROM sample_data | STATS COUNT() BY category=CATEGORIZE(TO_UPPER(message)) @@ -267,7 +267,7 @@ COUNT():long | category:keyword ; on CONCAT -required_capability: categorize_v4 +required_capability: categorize_v5 FROM sample_data | STATS COUNT() BY category=CATEGORIZE(CONCAT(message, " banana")) @@ -281,7 +281,7 @@ COUNT():long | category:keyword ; on CONCAT with unicode -required_capability: categorize_v4 +required_capability: categorize_v5 FROM sample_data | STATS COUNT() BY category=CATEGORIZE(CONCAT(message, " 👍🏽😊")) @@ -295,7 +295,7 @@ COUNT():long | category:keyword ; on REVERSE(CONCAT()) -required_capability: categorize_v4 +required_capability: categorize_v5 FROM sample_data | STATS COUNT() BY category=CATEGORIZE(REVERSE(CONCAT(message, " 👍🏽😊"))) @@ -309,7 +309,7 @@ COUNT():long | category:keyword ; and then TO_LOWER -required_capability: categorize_v4 +required_capability: categorize_v5 FROM sample_data | STATS COUNT() BY category=CATEGORIZE(message) @@ -324,7 +324,7 @@ COUNT():long | category:keyword ; on const empty string -required_capability: categorize_v4 +required_capability: categorize_v5 FROM sample_data | STATS COUNT() BY category=CATEGORIZE("") @@ -336,7 +336,7 @@ COUNT():long | category:keyword ; on const empty string from eval -required_capability: categorize_v4 +required_capability: categorize_v5 FROM sample_data | EVAL x = "" @@ -349,7 +349,7 @@ COUNT():long | category:keyword ; on null -required_capability: categorize_v4 +required_capability: categorize_v5 FROM sample_data | EVAL x = null @@ -362,7 +362,7 @@ COUNT():long | SUM(event_duration):long | category:keyword ; on null string -required_capability: categorize_v4 +required_capability: categorize_v5 FROM sample_data | EVAL x = null::string @@ -375,7 +375,7 @@ COUNT():long | category:keyword ; filtering out all data -required_capability: categorize_v4 +required_capability: categorize_v5 FROM sample_data | WHERE @timestamp < "2023-10-23T00:00:00Z" @@ -387,7 +387,7 @@ COUNT():long | category:keyword ; filtering out all data with constant -required_capability: categorize_v4 +required_capability: categorize_v5 FROM sample_data | STATS COUNT() BY category=CATEGORIZE(message) @@ -398,7 +398,7 @@ COUNT():long | category:keyword ; drop output columns -required_capability: categorize_v4 +required_capability: categorize_v5 FROM sample_data | STATS count=COUNT() BY category=CATEGORIZE(message) @@ -413,7 +413,7 @@ x:integer ; category value processing -required_capability: categorize_v4 +required_capability: categorize_v5 ROW message = ["connected to a", "connected to b", "disconnected"] | STATS COUNT() BY category=CATEGORIZE(message) @@ -427,7 +427,7 @@ COUNT():long | category:keyword ; row aliases -required_capability: categorize_v4 +required_capability: categorize_v5 ROW message = "connected to xyz" | EVAL x = message @@ -441,7 +441,7 @@ COUNT():long | category:keyword | y:keyword ; from aliases -required_capability: categorize_v4 +required_capability: categorize_v5 FROM sample_data | EVAL x = message @@ -457,7 +457,7 @@ COUNT():long | category:keyword | y:keyword ; row aliases with keep -required_capability: categorize_v4 +required_capability: categorize_v5 ROW message = "connected to xyz" | EVAL x = message @@ -473,7 +473,7 @@ COUNT():long | y:keyword ; from aliases with keep -required_capability: categorize_v4 +required_capability: categorize_v5 FROM sample_data | EVAL x = message @@ -491,7 +491,7 @@ COUNT():long | y:keyword ; row rename -required_capability: categorize_v4 +required_capability: categorize_v5 ROW message = "connected to xyz" | RENAME message as x @@ -505,7 +505,7 @@ COUNT():long | y:keyword ; from rename -required_capability: categorize_v4 +required_capability: categorize_v5 FROM sample_data | RENAME message as x @@ -521,7 +521,7 @@ COUNT():long | y:keyword ; row drop -required_capability: categorize_v4 +required_capability: categorize_v5 ROW message = "connected to a" | STATS c = COUNT() BY category=CATEGORIZE(message) @@ -534,7 +534,7 @@ c:long ; from drop -required_capability: categorize_v4 +required_capability: categorize_v5 FROM sample_data | STATS c = COUNT() BY category=CATEGORIZE(message) @@ -547,3 +547,48 @@ c:long 3 3 ; + +categorize in aggs inside function +required_capability: categorize_v5 + +FROM sample_data + | STATS COUNT(), x = MV_APPEND(category, category) BY category=CATEGORIZE(message) + | SORT x + | KEEP `COUNT()`, x +; + +COUNT():long | x:keyword + 3 | [.*?Connected.+?to.*?,.*?Connected.+?to.*?] + 3 | [.*?Connection.+?error.*?,.*?Connection.+?error.*?] + 1 | [.*?Disconnected.*?,.*?Disconnected.*?] +; + +categorize in aggs same as grouping inside function +required_capability: categorize_v5 + +FROM sample_data + | STATS COUNT(), x = MV_APPEND(CATEGORIZE(message), `CATEGORIZE(message)`) BY CATEGORIZE(message) + | SORT x + | KEEP `COUNT()`, x +; + +COUNT():long | x:keyword + 3 | [.*?Connected.+?to.*?,.*?Connected.+?to.*?] + 3 | [.*?Connection.+?error.*?,.*?Connection.+?error.*?] + 1 | [.*?Disconnected.*?,.*?Disconnected.*?] +; + +categorize in aggs same as grouping inside function with explicit alias +required_capability: categorize_v5 + +FROM sample_data + | STATS COUNT(), x = MV_APPEND(CATEGORIZE(message), category) BY category=CATEGORIZE(message) + | SORT x + | KEEP `COUNT()`, x +; + +COUNT():long | x:keyword + 3 | [.*?Connected.+?to.*?,.*?Connected.+?to.*?] + 3 | [.*?Connection.+?error.*?,.*?Connection.+?error.*?] + 1 | [.*?Disconnected.*?,.*?Disconnected.*?] +; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/docs.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/docs.csv-spec index 24baf1263d06a..aa89c775da4cf 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/docs.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/docs.csv-spec @@ -678,7 +678,7 @@ Bangalore | 9 | 72 ; docsCategorize -required_capability: categorize_v4 +required_capability: categorize_v5 // tag::docsCategorize[] FROM sample_data | STATS count=COUNT() BY category=CATEGORIZE(message) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index 646c4f8240c3e..b5d6dd8584e8c 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -407,7 +407,7 @@ public enum Cap { /** * Supported the text categorization function "CATEGORIZE". */ - CATEGORIZE_V4, + CATEGORIZE_V5, /** * QSTR function diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java index 5f8c011cff53a..49d8a5ee8caad 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java @@ -20,7 +20,6 @@ import org.elasticsearch.xpack.esql.core.expression.Expressions; import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; import org.elasticsearch.xpack.esql.core.expression.MetadataAttribute; -import org.elasticsearch.xpack.esql.core.expression.NameId; import org.elasticsearch.xpack.esql.core.expression.NamedExpression; import org.elasticsearch.xpack.esql.core.expression.TypeResolutions; import org.elasticsearch.xpack.esql.core.expression.function.Function; @@ -63,12 +62,10 @@ import java.util.ArrayList; import java.util.BitSet; import java.util.Collection; -import java.util.HashMap; import java.util.HashSet; import java.util.LinkedHashSet; import java.util.List; import java.util.Locale; -import java.util.Map; import java.util.Set; import java.util.function.BiConsumer; import java.util.function.Consumer; @@ -364,35 +361,35 @@ private static void checkCategorizeGrouping(Aggregate agg, Set failures ); }); - // Forbid CATEGORIZE being used in the aggregations - agg.aggregates().forEach(a -> { - a.forEachDown( - Categorize.class, - categorize -> failures.add( - fail(categorize, "cannot use CATEGORIZE grouping function [{}] within the aggregations", categorize.sourceText()) + // Forbid CATEGORIZE being used in the aggregations, unless it appears as a grouping + agg.aggregates() + .forEach( + a -> a.forEachDown( + AggregateFunction.class, + aggregateFunction -> aggregateFunction.forEachDown( + Categorize.class, + categorize -> failures.add( + fail(categorize, "cannot use CATEGORIZE grouping function [{}] within an aggregation", categorize.sourceText()) + ) + ) ) ); - }); - // Forbid CATEGORIZE being referenced in the aggregation functions - Map categorizeByAliasId = new HashMap<>(); + // Forbid CATEGORIZE being referenced as a child of an aggregation function + AttributeMap categorizeByAttribute = new AttributeMap<>(); agg.groupings().forEach(g -> { g.forEachDown(Alias.class, alias -> { if (alias.child() instanceof Categorize categorize) { - categorizeByAliasId.put(alias.id(), categorize); + categorizeByAttribute.put(alias.toAttribute(), categorize); } }); }); agg.aggregates() .forEach(a -> a.forEachDown(AggregateFunction.class, aggregate -> aggregate.forEachDown(Attribute.class, attribute -> { - var categorize = categorizeByAliasId.get(attribute.id()); + var categorize = categorizeByAttribute.get(attribute); if (categorize != null) { failures.add( - fail( - attribute, - "cannot reference CATEGORIZE grouping function [{}] within the aggregations", - attribute.sourceText() - ) + fail(attribute, "cannot reference CATEGORIZE grouping function [{}] within an aggregation", attribute.sourceText()) ); } }))); @@ -449,7 +446,7 @@ private static void checkInvalidNamedExpressionUsage( // check the bucketing function against the group else if (c instanceof GroupingFunction gf) { if (Expressions.anyMatch(groups, ex -> ex instanceof Alias a && a.child().semanticEquals(gf)) == false) { - failures.add(fail(gf, "can only use grouping function [{}] part of the BY clause", gf.sourceText())); + failures.add(fail(gf, "can only use grouping function [{}] as part of the BY clause", gf.sourceText())); } } }); @@ -466,7 +463,7 @@ else if (c instanceof GroupingFunction gf) { // optimizer will later unroll expressions with aggs and non-aggs with a grouping function into an EVAL, but that will no longer // be verified (by check above in checkAggregate()), so do it explicitly here if (Expressions.anyMatch(groups, ex -> ex instanceof Alias a && a.child().semanticEquals(gf)) == false) { - failures.add(fail(gf, "can only use grouping function [{}] part of the BY clause", gf.sourceText())); + failures.add(fail(gf, "can only use grouping function [{}] as part of the BY clause", gf.sourceText())); } else if (level == 0) { addFailureOnGroupingUsedNakedInAggs(failures, gf, "function"); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceAggregateAggExpressionWithEval.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceAggregateAggExpressionWithEval.java index 2361b46b2be6f..c36d4caf7f599 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceAggregateAggExpressionWithEval.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceAggregateAggExpressionWithEval.java @@ -9,18 +9,21 @@ import org.elasticsearch.common.util.Maps; import org.elasticsearch.xpack.esql.core.expression.Alias; +import org.elasticsearch.xpack.esql.core.expression.Attribute; import org.elasticsearch.xpack.esql.core.expression.AttributeMap; import org.elasticsearch.xpack.esql.core.expression.Expression; import org.elasticsearch.xpack.esql.core.expression.NamedExpression; import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.util.Holder; import org.elasticsearch.xpack.esql.expression.function.aggregate.AggregateFunction; +import org.elasticsearch.xpack.esql.expression.function.grouping.Categorize; import org.elasticsearch.xpack.esql.plan.logical.Aggregate; import org.elasticsearch.xpack.esql.plan.logical.Eval; import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; import org.elasticsearch.xpack.esql.plan.logical.Project; import java.util.ArrayList; +import java.util.HashMap; import java.util.List; import java.util.Map; @@ -51,6 +54,16 @@ protected LogicalPlan rule(Aggregate aggregate) { AttributeMap aliases = new AttributeMap<>(); aggregate.forEachExpressionUp(Alias.class, a -> aliases.put(a.toAttribute(), a.child())); + // Build Categorize grouping functions map. + // Functions like BUCKET() shouldn't reach this point, + // as they are moved to an early EVAL by ReplaceAggregateNestedExpressionWithEval + Map groupingAttributes = new HashMap<>(); + aggregate.forEachExpressionUp(Alias.class, a -> { + if (a.child() instanceof Categorize groupingFunction) { + groupingAttributes.put(groupingFunction, a.toAttribute()); + } + }); + // break down each aggregate into AggregateFunction and/or grouping key // preserve the projection at the end List aggs = aggregate.aggregates(); @@ -109,6 +122,9 @@ protected LogicalPlan rule(Aggregate aggregate) { return alias.toAttribute(); }); + // replace grouping functions with their references + aggExpression = aggExpression.transformUp(Categorize.class, groupingAttributes::get); + Alias alias = as.replaceChild(aggExpression); newEvals.add(alias); newProjections.add(alias.toAttribute()); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceAggregateNestedExpressionWithEval.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceAggregateNestedExpressionWithEval.java index 985e68252a1f9..4dbc43454a023 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceAggregateNestedExpressionWithEval.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceAggregateNestedExpressionWithEval.java @@ -51,6 +51,7 @@ protected LogicalPlan rule(Aggregate aggregate) { // Exception: Categorize is internal to the aggregation and remains in the groupings. We move its child expression into an eval. if (g instanceof Alias as) { if (as.child() instanceof Categorize cat) { + // For Categorize grouping function, we only move the child expression into an eval if (cat.field() instanceof Attribute == false) { groupingChanged = true; var fieldAs = new Alias(as.source(), as.name(), cat.field(), null, true); @@ -59,7 +60,6 @@ protected LogicalPlan rule(Aggregate aggregate) { evalNames.put(fieldAs.name(), fieldAttr); Categorize replacement = cat.replaceChildren(List.of(fieldAttr)); newGroupings.set(i, as.replaceChild(replacement)); - groupingAttributes.put(cat, fieldAttr); } } else { groupingChanged = true; @@ -135,6 +135,10 @@ protected LogicalPlan rule(Aggregate aggregate) { }); // replace any grouping functions with their references pointing to the added synthetic eval replaced = replaced.transformDown(GroupingFunction.class, gf -> { + // Categorize in aggs depends on the grouping result, not on an early eval + if (gf instanceof Categorize) { + return gf; + } aggsChanged.set(true); // should never return null, as it's verified. // but even if broken, the transform will fail safely; otoh, returning `gf` will fail later due to incorrect plan. diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java index d02e78202e0c2..74e2de1141728 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java @@ -407,12 +407,12 @@ public void testAggFilterOnBucketingOrAggFunctions() { // but fails if it's different assertEquals( - "1:32: can only use grouping function [bucket(a, 3)] part of the BY clause", + "1:32: can only use grouping function [bucket(a, 3)] as part of the BY clause", error("row a = 1 | stats sum(a) where bucket(a, 3) > -1 by bucket(a,2)") ); assertEquals( - "1:40: can only use grouping function [bucket(salary, 10)] part of the BY clause", + "1:40: can only use grouping function [bucket(salary, 10)] as part of the BY clause", error("from test | stats max(languages) WHERE bucket(salary, 10) > 1 by emp_no") ); @@ -444,19 +444,19 @@ public void testAggWithNonBooleanFilter() { public void testGroupingInsideAggsAsAgg() { assertEquals( - "1:18: can only use grouping function [bucket(emp_no, 5.)] part of the BY clause", + "1:18: can only use grouping function [bucket(emp_no, 5.)] as part of the BY clause", error("from test| stats bucket(emp_no, 5.) by emp_no") ); assertEquals( - "1:18: can only use grouping function [bucket(emp_no, 5.)] part of the BY clause", + "1:18: can only use grouping function [bucket(emp_no, 5.)] as part of the BY clause", error("from test| stats bucket(emp_no, 5.)") ); assertEquals( - "1:18: can only use grouping function [bucket(emp_no, 5.)] part of the BY clause", + "1:18: can only use grouping function [bucket(emp_no, 5.)] as part of the BY clause", error("from test| stats bucket(emp_no, 5.) by bucket(emp_no, 6.)") ); assertEquals( - "1:22: can only use grouping function [bucket(emp_no, 5.)] part of the BY clause", + "1:22: can only use grouping function [bucket(emp_no, 5.)] as part of the BY clause", error("from test| stats 3 + bucket(emp_no, 5.) by bucket(emp_no, 6.)") ); } @@ -1846,7 +1846,7 @@ public void testIntervalAsString() { } public void testCategorizeSingleGrouping() { - assumeTrue("requires Categorize capability", EsqlCapabilities.Cap.CATEGORIZE_V4.isEnabled()); + assumeTrue("requires Categorize capability", EsqlCapabilities.Cap.CATEGORIZE_V5.isEnabled()); query("from test | STATS COUNT(*) BY CATEGORIZE(first_name)"); query("from test | STATS COUNT(*) BY cat = CATEGORIZE(first_name)"); @@ -1875,7 +1875,7 @@ public void testCategorizeSingleGrouping() { } public void testCategorizeNestedGrouping() { - assumeTrue("requires Categorize capability", EsqlCapabilities.Cap.CATEGORIZE_V4.isEnabled()); + assumeTrue("requires Categorize capability", EsqlCapabilities.Cap.CATEGORIZE_V5.isEnabled()); query("from test | STATS COUNT(*) BY CATEGORIZE(LENGTH(first_name)::string)"); @@ -1890,27 +1890,33 @@ public void testCategorizeNestedGrouping() { } public void testCategorizeWithinAggregations() { - assumeTrue("requires Categorize capability", EsqlCapabilities.Cap.CATEGORIZE_V4.isEnabled()); + assumeTrue("requires Categorize capability", EsqlCapabilities.Cap.CATEGORIZE_V5.isEnabled()); query("from test | STATS MV_COUNT(cat), COUNT(*) BY cat = CATEGORIZE(first_name)"); + query("from test | STATS MV_COUNT(CATEGORIZE(first_name)), COUNT(*) BY cat = CATEGORIZE(first_name)"); + query("from test | STATS MV_COUNT(CATEGORIZE(first_name)), COUNT(*) BY CATEGORIZE(first_name)"); assertEquals( - "1:25: cannot use CATEGORIZE grouping function [CATEGORIZE(first_name)] within the aggregations", + "1:25: cannot use CATEGORIZE grouping function [CATEGORIZE(first_name)] within an aggregation", error("FROM test | STATS COUNT(CATEGORIZE(first_name)) BY CATEGORIZE(first_name)") ); - assertEquals( - "1:25: cannot reference CATEGORIZE grouping function [cat] within the aggregations", + "1:25: cannot reference CATEGORIZE grouping function [cat] within an aggregation", error("FROM test | STATS COUNT(cat) BY cat = CATEGORIZE(first_name)") ); assertEquals( - "1:30: cannot reference CATEGORIZE grouping function [cat] within the aggregations", + "1:30: cannot reference CATEGORIZE grouping function [cat] within an aggregation", error("FROM test | STATS SUM(LENGTH(cat::keyword) + LENGTH(last_name)) BY cat = CATEGORIZE(first_name)") ); assertEquals( - "1:25: cannot reference CATEGORIZE grouping function [`CATEGORIZE(first_name)`] within the aggregations", + "1:25: cannot reference CATEGORIZE grouping function [`CATEGORIZE(first_name)`] within an aggregation", error("FROM test | STATS COUNT(`CATEGORIZE(first_name)`) BY CATEGORIZE(first_name)") ); + + assertEquals( + "1:28: can only use grouping function [CATEGORIZE(last_name)] as part of the BY clause", + error("FROM test | STATS MV_COUNT(CATEGORIZE(last_name)) BY CATEGORIZE(first_name)") + ); } public void testSortByAggregate() { diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java index a74efca3b3d99..b76781f76f4af 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java @@ -1212,7 +1212,7 @@ public void testCombineProjectionWithAggregationFirstAndAliasedGroupingUsedInAgg * \_EsRelation[test][_meta_field{f}#23, emp_no{f}#17, first_name{f}#18, ..] */ public void testCombineProjectionWithCategorizeGrouping() { - assumeTrue("requires Categorize capability", EsqlCapabilities.Cap.CATEGORIZE_V4.isEnabled()); + assumeTrue("requires Categorize capability", EsqlCapabilities.Cap.CATEGORIZE_V5.isEnabled()); var plan = plan(""" from test @@ -3949,7 +3949,7 @@ public void testNestedExpressionsInGroups() { * \_EsRelation[test][_meta_field{f}#14, emp_no{f}#8, first_name{f}#9, ge..] */ public void testNestedExpressionsInGroupsWithCategorize() { - assumeTrue("requires Categorize capability", EsqlCapabilities.Cap.CATEGORIZE_V4.isEnabled()); + assumeTrue("requires Categorize capability", EsqlCapabilities.Cap.CATEGORIZE_V5.isEnabled()); var plan = optimizedPlan(""" from test From ed1e3664ad6c50d2af24b09db51448072764f663 Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Tue, 3 Dec 2024 13:51:07 +0000 Subject: [PATCH 129/139] Move SparseVectorQueryBuilder and TextExpansionQueryBuilder to x-pack core (#117857) This commit moves the SparseVectorQueryBuilder and TextExpansionQueryBuilder classes to the x-pack core module, enabling other modules to utilize these query builders. Additionally, it introduces a SparseVectorQueryWrapper to extract sparse vector queries from standard Lucene queries. This is needed for supporting semantic highlighting with sparse vector fields as follow up. --- .../xpack/core/XPackClientPlugin.java | 10 +++ .../ml/search}/SparseVectorQueryBuilder.java | 7 +- .../ml/search/SparseVectorQueryWrapper.java | 77 +++++++++++++++++++ .../ml/search}/TextExpansionQueryBuilder.java | 4 +- .../ml/search/WeightedTokensQueryBuilder.java | 2 +- .../core/ml/search/WeightedTokensUtils.java | 11 ++- .../SparseVectorQueryBuilderTests.java | 21 ++--- .../TextExpansionQueryBuilderTests.java | 14 ++-- .../WeightedTokensQueryBuilderTests.java | 13 +++- .../xpack/ml/MachineLearning.java | 19 ----- 10 files changed, 125 insertions(+), 53 deletions(-) rename x-pack/plugin/{ml/src/main/java/org/elasticsearch/xpack/ml/queries => core/src/main/java/org/elasticsearch/xpack/core/ml/search}/SparseVectorQueryBuilder.java (97%) create mode 100644 x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/SparseVectorQueryWrapper.java rename x-pack/plugin/{ml/src/main/java/org/elasticsearch/xpack/ml/queries => core/src/main/java/org/elasticsearch/xpack/core/ml/search}/TextExpansionQueryBuilder.java (98%) rename x-pack/plugin/{ml/src/test/java/org/elasticsearch/xpack/ml/queries => core/src/test/java/org/elasticsearch/xpack/core/ml/search}/SparseVectorQueryBuilderTests.java (94%) rename x-pack/plugin/{ml/src/test/java/org/elasticsearch/xpack/ml/queries => core/src/test/java/org/elasticsearch/xpack/core/ml/search}/TextExpansionQueryBuilderTests.java (96%) diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackClientPlugin.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackClientPlugin.java index e2435c3396fa8..f5923a4942634 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackClientPlugin.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/XPackClientPlugin.java @@ -71,6 +71,8 @@ import org.elasticsearch.xpack.core.ml.job.config.JobTaskState; import org.elasticsearch.xpack.core.ml.job.snapshot.upgrade.SnapshotUpgradeTaskParams; import org.elasticsearch.xpack.core.ml.job.snapshot.upgrade.SnapshotUpgradeTaskState; +import org.elasticsearch.xpack.core.ml.search.SparseVectorQueryBuilder; +import org.elasticsearch.xpack.core.ml.search.TextExpansionQueryBuilder; import org.elasticsearch.xpack.core.ml.search.WeightedTokensQueryBuilder; import org.elasticsearch.xpack.core.monitoring.MonitoringFeatureSetUsage; import org.elasticsearch.xpack.core.rollup.RollupFeatureSetUsage; @@ -398,6 +400,14 @@ public List getNamedXContent() { @Override public List> getQueries() { return List.of( + new QuerySpec<>(SparseVectorQueryBuilder.NAME, SparseVectorQueryBuilder::new, SparseVectorQueryBuilder::fromXContent), + new QuerySpec( + TextExpansionQueryBuilder.NAME, + TextExpansionQueryBuilder::new, + TextExpansionQueryBuilder::fromXContent + ), + // TODO: The WeightedTokensBuilder is slated for removal after the SparseVectorQueryBuilder is available. + // The logic to create a Boolean query based on weighted tokens will remain and/or be moved to server. new SearchPlugin.QuerySpec( WeightedTokensQueryBuilder.NAME, WeightedTokensQueryBuilder::new, diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/queries/SparseVectorQueryBuilder.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/SparseVectorQueryBuilder.java similarity index 97% rename from x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/queries/SparseVectorQueryBuilder.java rename to x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/SparseVectorQueryBuilder.java index 5a63ad8e85e9b..e9e4e90421adc 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/queries/SparseVectorQueryBuilder.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/SparseVectorQueryBuilder.java @@ -5,7 +5,7 @@ * 2.0. */ -package org.elasticsearch.xpack.ml.queries; +package org.elasticsearch.xpack.core.ml.search; import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.Query; @@ -33,9 +33,6 @@ import org.elasticsearch.xpack.core.ml.inference.results.TextExpansionResults; import org.elasticsearch.xpack.core.ml.inference.results.WarningInferenceResults; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.TextExpansionConfigUpdate; -import org.elasticsearch.xpack.core.ml.search.TokenPruningConfig; -import org.elasticsearch.xpack.core.ml.search.WeightedToken; -import org.elasticsearch.xpack.core.ml.search.WeightedTokensUtils; import java.io.IOException; import java.util.ArrayList; @@ -210,7 +207,7 @@ protected Query doToQuery(SearchExecutionContext context) throws IOException { return (shouldPruneTokens) ? WeightedTokensUtils.queryBuilderWithPrunedTokens(fieldName, tokenPruningConfig, queryVectors, ft, context) - : WeightedTokensUtils.queryBuilderWithAllTokens(queryVectors, ft, context); + : WeightedTokensUtils.queryBuilderWithAllTokens(fieldName, queryVectors, ft, context); } @Override diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/SparseVectorQueryWrapper.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/SparseVectorQueryWrapper.java new file mode 100644 index 0000000000000..234560f620d95 --- /dev/null +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/SparseVectorQueryWrapper.java @@ -0,0 +1,77 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.core.ml.search; + +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.QueryVisitor; +import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.Weight; +import org.elasticsearch.index.query.SearchExecutionContext; + +import java.io.IOException; +import java.util.Objects; + +/** + * A wrapper class for the Lucene query generated by {@link SparseVectorQueryBuilder#toQuery(SearchExecutionContext)}. + * This wrapper facilitates the extraction of the complete sparse vector query using a {@link QueryVisitor}. + */ +public class SparseVectorQueryWrapper extends Query { + private final String fieldName; + private final Query termsQuery; + + public SparseVectorQueryWrapper(String fieldName, Query termsQuery) { + this.fieldName = fieldName; + this.termsQuery = termsQuery; + } + + public Query getTermsQuery() { + return termsQuery; + } + + @Override + public Query rewrite(IndexSearcher indexSearcher) throws IOException { + var rewrite = termsQuery.rewrite(indexSearcher); + if (rewrite != termsQuery) { + return new SparseVectorQueryWrapper(fieldName, rewrite); + } + return this; + } + + @Override + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + return termsQuery.createWeight(searcher, scoreMode, boost); + } + + @Override + public String toString(String field) { + return termsQuery.toString(field); + } + + @Override + public void visit(QueryVisitor visitor) { + if (visitor.acceptField(fieldName)) { + termsQuery.visit(visitor.getSubVisitor(BooleanClause.Occur.MUST, this)); + } + } + + @Override + public boolean equals(Object obj) { + if (sameClassAs(obj) == false) { + return false; + } + SparseVectorQueryWrapper that = (SparseVectorQueryWrapper) obj; + return fieldName.equals(that.fieldName) && termsQuery.equals(that.termsQuery); + } + + @Override + public int hashCode() { + return Objects.hash(classHash(), fieldName, termsQuery); + } +} diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/queries/TextExpansionQueryBuilder.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/TextExpansionQueryBuilder.java similarity index 98% rename from x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/queries/TextExpansionQueryBuilder.java rename to x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/TextExpansionQueryBuilder.java index 6d972bcf5863a..81758ec5f9342 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/queries/TextExpansionQueryBuilder.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/TextExpansionQueryBuilder.java @@ -5,7 +5,7 @@ * 2.0. */ -package org.elasticsearch.xpack.ml.queries; +package org.elasticsearch.xpack.core.ml.search; import org.apache.lucene.search.Query; import org.apache.lucene.util.SetOnce; @@ -32,8 +32,6 @@ import org.elasticsearch.xpack.core.ml.inference.results.TextExpansionResults; import org.elasticsearch.xpack.core.ml.inference.results.WarningInferenceResults; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.TextExpansionConfigUpdate; -import org.elasticsearch.xpack.core.ml.search.TokenPruningConfig; -import org.elasticsearch.xpack.core.ml.search.WeightedTokensQueryBuilder; import java.io.IOException; import java.util.List; diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/WeightedTokensQueryBuilder.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/WeightedTokensQueryBuilder.java index 256c90c3eaa62..f41fcd77ce627 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/WeightedTokensQueryBuilder.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/WeightedTokensQueryBuilder.java @@ -125,7 +125,7 @@ protected Query doToQuery(SearchExecutionContext context) throws IOException { } return (this.tokenPruningConfig == null) - ? WeightedTokensUtils.queryBuilderWithAllTokens(tokens, ft, context) + ? WeightedTokensUtils.queryBuilderWithAllTokens(fieldName, tokens, ft, context) : WeightedTokensUtils.queryBuilderWithPrunedTokens(fieldName, tokenPruningConfig, tokens, ft, context); } diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/WeightedTokensUtils.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/WeightedTokensUtils.java index 133920416d227..1c2ac23151e6e 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/WeightedTokensUtils.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/WeightedTokensUtils.java @@ -24,13 +24,18 @@ public final class WeightedTokensUtils { private WeightedTokensUtils() {} - public static Query queryBuilderWithAllTokens(List tokens, MappedFieldType ft, SearchExecutionContext context) { + public static Query queryBuilderWithAllTokens( + String fieldName, + List tokens, + MappedFieldType ft, + SearchExecutionContext context + ) { var qb = new BooleanQuery.Builder(); for (var token : tokens) { qb.add(new BoostQuery(ft.termQuery(token.token(), context), token.weight()), BooleanClause.Occur.SHOULD); } - return qb.setMinimumNumberShouldMatch(1).build(); + return new SparseVectorQueryWrapper(fieldName, qb.setMinimumNumberShouldMatch(1).build()); } public static Query queryBuilderWithPrunedTokens( @@ -64,7 +69,7 @@ public static Query queryBuilderWithPrunedTokens( } } - return qb.setMinimumNumberShouldMatch(1).build(); + return new SparseVectorQueryWrapper(fieldName, qb.setMinimumNumberShouldMatch(1).build()); } /** diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/queries/SparseVectorQueryBuilderTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/search/SparseVectorQueryBuilderTests.java similarity index 94% rename from x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/queries/SparseVectorQueryBuilderTests.java rename to x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/search/SparseVectorQueryBuilderTests.java index 13cf6d87728a8..9872d95de024a 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/queries/SparseVectorQueryBuilderTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/search/SparseVectorQueryBuilderTests.java @@ -5,7 +5,7 @@ * 2.0. */ -package org.elasticsearch.xpack.ml.queries; +package org.elasticsearch.xpack.core.ml.search; import org.apache.lucene.document.Document; import org.apache.lucene.document.FeatureField; @@ -40,9 +40,6 @@ import org.elasticsearch.xpack.core.ml.action.InferModelAction; import org.elasticsearch.xpack.core.ml.inference.TrainedModelPrefixStrings; import org.elasticsearch.xpack.core.ml.inference.results.TextExpansionResults; -import org.elasticsearch.xpack.core.ml.search.TokenPruningConfig; -import org.elasticsearch.xpack.core.ml.search.WeightedToken; -import org.elasticsearch.xpack.ml.MachineLearning; import java.io.IOException; import java.lang.reflect.Method; @@ -50,7 +47,7 @@ import java.util.Collection; import java.util.List; -import static org.elasticsearch.xpack.ml.queries.SparseVectorQueryBuilder.QUERY_VECTOR_FIELD; +import static org.elasticsearch.xpack.core.ml.search.SparseVectorQueryBuilder.QUERY_VECTOR_FIELD; import static org.hamcrest.CoreMatchers.instanceOf; import static org.hamcrest.Matchers.either; import static org.hamcrest.Matchers.hasSize; @@ -102,7 +99,7 @@ private SparseVectorQueryBuilder createTestQueryBuilder(TokenPruningConfig token @Override protected Collection> getPlugins() { - return List.of(MachineLearning.class, MapperExtrasPlugin.class, XPackClientPlugin.class); + return List.of(MapperExtrasPlugin.class, XPackClientPlugin.class); } @Override @@ -156,8 +153,10 @@ protected void initializeAdditionalMappings(MapperService mapperService) throws @Override protected void doAssertLuceneQuery(SparseVectorQueryBuilder queryBuilder, Query query, SearchExecutionContext context) { - assertThat(query, instanceOf(BooleanQuery.class)); - BooleanQuery booleanQuery = (BooleanQuery) query; + assertThat(query, instanceOf(SparseVectorQueryWrapper.class)); + var sparseQuery = (SparseVectorQueryWrapper) query; + assertThat(sparseQuery.getTermsQuery(), instanceOf(BooleanQuery.class)); + BooleanQuery booleanQuery = (BooleanQuery) sparseQuery.getTermsQuery(); assertEquals(booleanQuery.getMinimumNumberShouldMatch(), 1); assertThat(booleanQuery.clauses(), hasSize(NUM_TOKENS)); @@ -233,11 +232,13 @@ public void testToQuery() throws IOException { private void testDoToQuery(SparseVectorQueryBuilder queryBuilder, SearchExecutionContext context) throws IOException { Query query = queryBuilder.doToQuery(context); + assertTrue(query instanceof SparseVectorQueryWrapper); + var sparseQuery = (SparseVectorQueryWrapper) query; if (queryBuilder.shouldPruneTokens()) { // It's possible that all documents were pruned for aggressive pruning configurations - assertTrue(query instanceof BooleanQuery || query instanceof MatchNoDocsQuery); + assertTrue(sparseQuery.getTermsQuery() instanceof BooleanQuery || sparseQuery.getTermsQuery() instanceof MatchNoDocsQuery); } else { - assertTrue(query instanceof BooleanQuery); + assertTrue(sparseQuery.getTermsQuery() instanceof BooleanQuery); } } diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/queries/TextExpansionQueryBuilderTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/search/TextExpansionQueryBuilderTests.java similarity index 96% rename from x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/queries/TextExpansionQueryBuilderTests.java rename to x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/search/TextExpansionQueryBuilderTests.java index 00d50e0d0d7bb..a0263003b72db 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/queries/TextExpansionQueryBuilderTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/search/TextExpansionQueryBuilderTests.java @@ -5,7 +5,7 @@ * 2.0. */ -package org.elasticsearch.xpack.ml.queries; +package org.elasticsearch.xpack.core.ml.search; import org.apache.lucene.document.Document; import org.apache.lucene.document.FeatureField; @@ -35,10 +35,6 @@ import org.elasticsearch.xpack.core.ml.action.InferModelAction; import org.elasticsearch.xpack.core.ml.inference.TrainedModelPrefixStrings; import org.elasticsearch.xpack.core.ml.inference.results.TextExpansionResults; -import org.elasticsearch.xpack.core.ml.search.TokenPruningConfig; -import org.elasticsearch.xpack.core.ml.search.WeightedToken; -import org.elasticsearch.xpack.core.ml.search.WeightedTokensQueryBuilder; -import org.elasticsearch.xpack.ml.MachineLearning; import java.io.IOException; import java.lang.reflect.Method; @@ -77,7 +73,7 @@ protected TextExpansionQueryBuilder doCreateTestQueryBuilder() { @Override protected Collection> getPlugins() { - return List.of(MachineLearning.class, MapperExtrasPlugin.class, XPackClientPlugin.class); + return List.of(MapperExtrasPlugin.class, XPackClientPlugin.class); } @Override @@ -129,8 +125,10 @@ protected void initializeAdditionalMappings(MapperService mapperService) throws @Override protected void doAssertLuceneQuery(TextExpansionQueryBuilder queryBuilder, Query query, SearchExecutionContext context) { - assertThat(query, instanceOf(BooleanQuery.class)); - BooleanQuery booleanQuery = (BooleanQuery) query; + assertThat(query, instanceOf(SparseVectorQueryWrapper.class)); + var sparseQuery = (SparseVectorQueryWrapper) query; + assertThat(sparseQuery.getTermsQuery(), instanceOf(BooleanQuery.class)); + BooleanQuery booleanQuery = (BooleanQuery) sparseQuery.getTermsQuery(); assertEquals(booleanQuery.getMinimumNumberShouldMatch(), 1); assertThat(booleanQuery.clauses(), hasSize(NUM_TOKENS)); diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/search/WeightedTokensQueryBuilderTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/search/WeightedTokensQueryBuilderTests.java index 114ad90354c61..cded9b8dce5e2 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/search/WeightedTokensQueryBuilderTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/search/WeightedTokensQueryBuilderTests.java @@ -271,8 +271,11 @@ public void testPruningIsAppliedCorrectly() throws IOException { } private void assertCorrectLuceneQuery(String name, Query query, List expectedFeatureFields) { - assertTrue(query instanceof BooleanQuery); - List booleanClauses = ((BooleanQuery) query).clauses(); + assertThat(query, instanceOf(SparseVectorQueryWrapper.class)); + var sparseQuery = (SparseVectorQueryWrapper) query; + assertThat(sparseQuery.getTermsQuery(), instanceOf(BooleanQuery.class)); + BooleanQuery booleanQuery = (BooleanQuery) sparseQuery.getTermsQuery(); + List booleanClauses = booleanQuery.clauses(); assertEquals( name + " had " + booleanClauses.size() + " clauses, expected " + expectedFeatureFields.size(), expectedFeatureFields.size(), @@ -343,8 +346,10 @@ public void testMustRewrite() throws IOException { @Override protected void doAssertLuceneQuery(WeightedTokensQueryBuilder queryBuilder, Query query, SearchExecutionContext context) { - assertThat(query, instanceOf(BooleanQuery.class)); - BooleanQuery booleanQuery = (BooleanQuery) query; + assertThat(query, instanceOf(SparseVectorQueryWrapper.class)); + var sparseQuery = (SparseVectorQueryWrapper) query; + assertThat(sparseQuery.getTermsQuery(), instanceOf(BooleanQuery.class)); + BooleanQuery booleanQuery = (BooleanQuery) sparseQuery.getTermsQuery(); assertEquals(booleanQuery.getMinimumNumberShouldMatch(), 1); assertThat(booleanQuery.clauses(), hasSize(NUM_TOKENS)); diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java index 8363e0f5c19a1..c76e43790a259 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java @@ -48,7 +48,6 @@ import org.elasticsearch.features.NodeFeature; import org.elasticsearch.index.analysis.CharFilterFactory; import org.elasticsearch.index.analysis.TokenizerFactory; -import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.indices.AssociatedIndexDescriptor; import org.elasticsearch.indices.SystemIndexDescriptor; import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider; @@ -376,8 +375,6 @@ import org.elasticsearch.xpack.ml.process.MlMemoryTracker; import org.elasticsearch.xpack.ml.process.NativeController; import org.elasticsearch.xpack.ml.process.NativeStorageProvider; -import org.elasticsearch.xpack.ml.queries.SparseVectorQueryBuilder; -import org.elasticsearch.xpack.ml.queries.TextExpansionQueryBuilder; import org.elasticsearch.xpack.ml.rest.RestDeleteExpiredDataAction; import org.elasticsearch.xpack.ml.rest.RestMlInfoAction; import org.elasticsearch.xpack.ml.rest.RestMlMemoryAction; @@ -1764,22 +1761,6 @@ public List> getQueryVectorBuilders() { ); } - @Override - public List> getQueries() { - return List.of( - new QuerySpec( - TextExpansionQueryBuilder.NAME, - TextExpansionQueryBuilder::new, - TextExpansionQueryBuilder::fromXContent - ), - new QuerySpec( - SparseVectorQueryBuilder.NAME, - SparseVectorQueryBuilder::new, - SparseVectorQueryBuilder::fromXContent - ) - ); - } - private ContextParser checkAggLicense(ContextParser realParser, LicensedFeature.Momentary feature) { return (parser, name) -> { if (feature.check(getLicenseState()) == false) { From 5c1b3c7197603414614d72487c7327662d622420 Mon Sep 17 00:00:00 2001 From: mmahacek Date: Tue, 3 Dec 2024 06:10:02 -0800 Subject: [PATCH 130/139] Update email.asciidoc (#117867) Fix error in documentation. --- docs/reference/watcher/actions/email.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/watcher/actions/email.asciidoc b/docs/reference/watcher/actions/email.asciidoc index 16b9cc4be0628..efad500e0226b 100644 --- a/docs/reference/watcher/actions/email.asciidoc +++ b/docs/reference/watcher/actions/email.asciidoc @@ -129,7 +129,7 @@ killed by firewalls or load balancers in-between. | Name | Description | `format` | Attaches the watch data, equivalent to specifying `attach_data` in the watch configuration. Possible values are `json` or `yaml`. - Defaults to `json` if not specified. + Defaults to `yaml` if not specified. |====== From d3f0ae04e2b5e107686b9a19ffbe5312bacec753 Mon Sep 17 00:00:00 2001 From: Craig Taverner Date: Tue, 3 Dec 2024 15:10:57 +0100 Subject: [PATCH 131/139] Enhance LOOKUP JOIN csv-spec tests to cover more cases and fix several bugs found (#117843) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds several more tests to lookup-join.csv-spec, and fixes the following bugs: * FieldCaps on right hand side should ignore fieldNames method and just use "*" because currently the fieldNames search cannot handle lookup fields with aliases (should be fixed in a followup PR). * Stop using the lookup index in the ComputeService (so we don’t get both indices data coming in from the left, and other weird behaviour). * Ignore failing SearchStats checks on fields from the right hand side in the logical planner (so it does not plan EVAL field = null for all right hand fields). This should be fixed properly with the correct updates to TransportSearchShardsAction (or rather to making multiple use of that for each branch of the execution model). --- .../xpack/esql/ccq/MultiClusterSpecIT.java | 4 +- .../xpack/esql/CsvTestsDataLoader.java | 8 + .../resources/clientips_lookup-settings.json | 5 + .../src/main/resources/languages.csv | 2 +- .../src/main/resources/lookup-join.csv-spec | 224 +++++++++++++++++- .../src/main/resources/mapping-clientips.json | 16 +- .../src/main/resources/mapping-languages.json | 2 +- .../main/resources/mapping-message_types.json | 10 + .../src/main/resources/message_types.csv | 6 + .../message_types_lookup-settings.json | 5 + .../xpack/esql/action/EsqlCapabilities.java | 2 +- .../esql/enrich/LookupFromIndexService.java | 11 + .../local/ReplaceMissingFieldWithNull.java | 13 +- .../physical/local/InsertFieldExtraction.java | 15 +- .../esql/plan/physical/LookupJoinExec.java | 2 +- .../esql/planner/LocalExecutionPlanner.java | 1 + .../xpack/esql/planner/PlannerUtils.java | 11 +- .../xpack/esql/plugin/ComputeService.java | 54 ++++- .../xpack/esql/session/EsqlSession.java | 4 +- .../elasticsearch/xpack/esql/CsvTests.java | 2 +- 20 files changed, 355 insertions(+), 42 deletions(-) create mode 100644 x-pack/plugin/esql/qa/testFixtures/src/main/resources/clientips_lookup-settings.json create mode 100644 x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-message_types.json create mode 100644 x-pack/plugin/esql/qa/testFixtures/src/main/resources/message_types.csv create mode 100644 x-pack/plugin/esql/qa/testFixtures/src/main/resources/message_types_lookup-settings.json diff --git a/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClusterSpecIT.java b/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClusterSpecIT.java index af5eadc7358a2..19b29764559d1 100644 --- a/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClusterSpecIT.java +++ b/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClusterSpecIT.java @@ -47,7 +47,7 @@ import static org.elasticsearch.xpack.esql.EsqlTestUtils.classpathResources; import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.INLINESTATS; import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.INLINESTATS_V2; -import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.JOIN_LOOKUP_V3; +import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.JOIN_LOOKUP_V4; import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.JOIN_PLANNING_V1; import static org.elasticsearch.xpack.esql.action.EsqlCapabilities.Cap.METADATA_FIELDS_REMOTE_TEST; import static org.elasticsearch.xpack.esql.qa.rest.EsqlSpecTestCase.Mode.SYNC; @@ -125,7 +125,7 @@ protected void shouldSkipTest(String testName) throws IOException { assumeFalse("INLINESTATS not yet supported in CCS", testCase.requiredCapabilities.contains(INLINESTATS.capabilityName())); assumeFalse("INLINESTATS not yet supported in CCS", testCase.requiredCapabilities.contains(INLINESTATS_V2.capabilityName())); assumeFalse("INLINESTATS not yet supported in CCS", testCase.requiredCapabilities.contains(JOIN_PLANNING_V1.capabilityName())); - assumeFalse("LOOKUP JOIN not yet supported in CCS", testCase.requiredCapabilities.contains(JOIN_LOOKUP_V3.capabilityName())); + assumeFalse("LOOKUP JOIN not yet supported in CCS", testCase.requiredCapabilities.contains(JOIN_LOOKUP_V4.capabilityName())); } private TestFeatureService remoteFeaturesService() throws IOException { diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java index 9c987a02aca2d..f9d8cf00695c1 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java @@ -72,6 +72,11 @@ public class CsvTestsDataLoader { .withTypeMapping(Map.of("@timestamp", "date_nanos")); private static final TestsDataset MISSING_IP_SAMPLE_DATA = new TestsDataset("missing_ip_sample_data"); private static final TestsDataset CLIENT_IPS = new TestsDataset("clientips"); + private static final TestsDataset CLIENT_IPS_LOOKUP = CLIENT_IPS.withIndex("clientips_lookup") + .withSetting("clientips_lookup-settings.json"); + private static final TestsDataset MESSAGE_TYPES = new TestsDataset("message_types"); + private static final TestsDataset MESSAGE_TYPES_LOOKUP = MESSAGE_TYPES.withIndex("message_types_lookup") + .withSetting("message_types_lookup-settings.json"); private static final TestsDataset CLIENT_CIDR = new TestsDataset("client_cidr"); private static final TestsDataset AGES = new TestsDataset("ages"); private static final TestsDataset HEIGHTS = new TestsDataset("heights"); @@ -112,6 +117,9 @@ public class CsvTestsDataLoader { Map.entry(SAMPLE_DATA_TS_NANOS.indexName, SAMPLE_DATA_TS_NANOS), Map.entry(MISSING_IP_SAMPLE_DATA.indexName, MISSING_IP_SAMPLE_DATA), Map.entry(CLIENT_IPS.indexName, CLIENT_IPS), + Map.entry(CLIENT_IPS_LOOKUP.indexName, CLIENT_IPS_LOOKUP), + Map.entry(MESSAGE_TYPES.indexName, MESSAGE_TYPES), + Map.entry(MESSAGE_TYPES_LOOKUP.indexName, MESSAGE_TYPES_LOOKUP), Map.entry(CLIENT_CIDR.indexName, CLIENT_CIDR), Map.entry(AGES.indexName, AGES), Map.entry(HEIGHTS.indexName, HEIGHTS), diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/clientips_lookup-settings.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/clientips_lookup-settings.json new file mode 100644 index 0000000000000..b73d1f9accf92 --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/clientips_lookup-settings.json @@ -0,0 +1,5 @@ +{ + "index": { + "mode": "lookup" + } +} diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/languages.csv b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/languages.csv index 3ee60b79970ba..1c1a9776df6cc 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/languages.csv +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/languages.csv @@ -1,4 +1,4 @@ -language_code:keyword,language_name:keyword +language_code:integer,language_name:keyword 1,English 2,French 3,Spanish diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec index 5de353978b307..f2800456ceb33 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec @@ -4,8 +4,8 @@ // //TODO: this sometimes returns null instead of the looked up value (likely related to the execution order) -basicOnTheDataNode-Ignore -required_capability: join_lookup_v3 +basicOnTheDataNode +required_capability: join_lookup_v4 FROM employees | EVAL language_code = languages @@ -21,19 +21,19 @@ emp_no:integer | language_code:integer | language_name:keyword 10093 | 3 | Spanish ; -basicRow-Ignore -required_capability: join_lookup_v3 +basicRow +required_capability: join_lookup_v4 ROW language_code = 1 | LOOKUP JOIN languages_lookup ON language_code ; -language_code:keyword | language_name:keyword +language_code:integer | language_name:keyword 1 | English ; basicOnTheCoordinator -required_capability: join_lookup_v3 +required_capability: join_lookup_v4 FROM employees | SORT emp_no @@ -49,9 +49,8 @@ emp_no:integer | language_code:integer | language_name:keyword 10003 | 4 | German ; -//TODO: this sometimes returns null instead of the looked up value (likely related to the execution order) -subsequentEvalOnTheDataNode-Ignore -required_capability: join_lookup_v3 +subsequentEvalOnTheDataNode +required_capability: join_lookup_v4 FROM employees | EVAL language_code = languages @@ -69,7 +68,7 @@ emp_no:integer | language_code:integer | language_name:keyword | language_code_x ; subsequentEvalOnTheCoordinator -required_capability: join_lookup_v3 +required_capability: join_lookup_v4 FROM employees | SORT emp_no @@ -85,3 +84,208 @@ emp_no:integer | language_code:integer | language_name:keyword | language_code_x 10002 | 5 | null | 10 10003 | 4 | german | 8 ; + +lookupIPFromRow +required_capability: join_lookup_v4 + +ROW left = "left", client_ip = "172.21.0.5", right = "right" +| LOOKUP JOIN clientips_lookup ON client_ip +; + +left:keyword | client_ip:keyword | right:keyword | env:keyword +left | 172.21.0.5 | right | Development +; + +lookupIPFromRowWithShadowing +required_capability: join_lookup_v4 + +ROW left = "left", client_ip = "172.21.0.5", env = "env", right = "right" +| LOOKUP JOIN clientips_lookup ON client_ip +; + +left:keyword | client_ip:keyword | right:keyword | env:keyword +left | 172.21.0.5 | right | Development +; + +lookupIPFromRowWithShadowingKeep +required_capability: join_lookup_v4 + +ROW left = "left", client_ip = "172.21.0.5", env = "env", right = "right" +| EVAL client_ip = client_ip::keyword +| LOOKUP JOIN clientips_lookup ON client_ip +| KEEP left, client_ip, right, env +; + +left:keyword | client_ip:keyword | right:keyword | env:keyword +left | 172.21.0.5 | right | Development +; + +lookupIPFromIndex +required_capability: join_lookup_v4 + +FROM sample_data +| EVAL client_ip = client_ip::keyword +| LOOKUP JOIN clientips_lookup ON client_ip +; + +@timestamp:date | event_duration:long | message:keyword | client_ip:keyword | env:keyword +2023-10-23T13:55:01.543Z | 1756467 | Connected to 10.1.0.1 | 172.21.3.15 | Production +2023-10-23T13:53:55.832Z | 5033755 | Connection error | 172.21.3.15 | Production +2023-10-23T13:52:55.015Z | 8268153 | Connection error | 172.21.3.15 | Production +2023-10-23T13:51:54.732Z | 725448 | Connection error | 172.21.3.15 | Production +2023-10-23T13:33:34.937Z | 1232382 | Disconnected | 172.21.0.5 | Development +2023-10-23T12:27:28.948Z | 2764889 | Connected to 10.1.0.2 | 172.21.2.113 | QA +2023-10-23T12:15:03.360Z | 3450233 | Connected to 10.1.0.3 | 172.21.2.162 | QA +; + +lookupIPFromIndexKeep +required_capability: join_lookup_v4 + +FROM sample_data +| EVAL client_ip = client_ip::keyword +| LOOKUP JOIN clientips_lookup ON client_ip +| KEEP @timestamp, client_ip, event_duration, message, env +; + +@timestamp:date | client_ip:keyword | event_duration:long | message:keyword | env:keyword +2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1 | Production +2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error | Production +2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error | Production +2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error | Production +2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected | Development +2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2 | QA +2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3 | QA +; + +lookupIPFromIndexStats +required_capability: join_lookup_v4 + +FROM sample_data +| EVAL client_ip = client_ip::keyword +| LOOKUP JOIN clientips_lookup ON client_ip +| STATS count = count(client_ip) BY env +| SORT count DESC, env ASC +; + +count:long | env:keyword +4 | Production +2 | QA +1 | Development +; + +lookupIPFromIndexStatsKeep +required_capability: join_lookup_v4 + +FROM sample_data +| EVAL client_ip = client_ip::keyword +| LOOKUP JOIN clientips_lookup ON client_ip +| KEEP client_ip, env +| STATS count = count(client_ip) BY env +| SORT count DESC, env ASC +; + +count:long | env:keyword +4 | Production +2 | QA +1 | Development +; + +lookupMessageFromRow +required_capability: join_lookup_v4 + +ROW left = "left", message = "Connected to 10.1.0.1", right = "right" +| LOOKUP JOIN message_types_lookup ON message +; + +left:keyword | message:keyword | right:keyword | type:keyword +left | Connected to 10.1.0.1 | right | Success +; + +lookupMessageFromRowWithShadowing +required_capability: join_lookup_v4 + +ROW left = "left", message = "Connected to 10.1.0.1", type = "unknown", right = "right" +| LOOKUP JOIN message_types_lookup ON message +; + +left:keyword | message:keyword | right:keyword | type:keyword +left | Connected to 10.1.0.1 | right | Success +; + +lookupMessageFromRowWithShadowingKeep +required_capability: join_lookup_v4 + +ROW left = "left", message = "Connected to 10.1.0.1", type = "unknown", right = "right" +| LOOKUP JOIN message_types_lookup ON message +| KEEP left, message, right, type +; + +left:keyword | message:keyword | right:keyword | type:keyword +left | Connected to 10.1.0.1 | right | Success +; + +lookupMessageFromIndex +required_capability: join_lookup_v4 + +FROM sample_data +| LOOKUP JOIN message_types_lookup ON message +; + +@timestamp:date | client_ip:ip | event_duration:long | message:keyword | type:keyword +2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1 | Success +2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error | Error +2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error | Error +2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error | Error +2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected | Disconnected +2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2 | Success +2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3 | Success +; + +lookupMessageFromIndexKeep +required_capability: join_lookup_v4 + +FROM sample_data +| LOOKUP JOIN message_types_lookup ON message +| KEEP @timestamp, client_ip, event_duration, message, type +; + +@timestamp:date | client_ip:ip | event_duration:long | message:keyword | type:keyword +2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1 | Success +2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error | Error +2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error | Error +2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error | Error +2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected | Disconnected +2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2 | Success +2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3 | Success +; + +lookupMessageFromIndexStats +required_capability: join_lookup_v4 + +FROM sample_data +| LOOKUP JOIN message_types_lookup ON message +| STATS count = count(message) BY type +| SORT count DESC, type ASC +; + +count:long | type:keyword +3 | Error +3 | Success +1 | Disconnected +; + +lookupMessageFromIndexStatsKeep +required_capability: join_lookup_v4 + +FROM sample_data +| LOOKUP JOIN message_types_lookup ON message +| KEEP message, type +| STATS count = count(message) BY type +| SORT count DESC, type ASC +; + +count:long | type:keyword +3 | Error +3 | Success +1 | Disconnected +; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-clientips.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-clientips.json index 39bd37ce26c7f..d491810f9134e 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-clientips.json +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-clientips.json @@ -1,10 +1,10 @@ { - "properties": { - "client_ip": { - "type": "keyword" - }, - "env": { - "type": "keyword" - } + "properties": { + "client_ip": { + "type": "keyword" + }, + "env": { + "type": "keyword" } - } \ No newline at end of file + } +} diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-languages.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-languages.json index 0cec0caf17304..327b692369242 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-languages.json +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-languages.json @@ -1,7 +1,7 @@ { "properties" : { "language_code" : { - "type" : "keyword" + "type" : "integer" }, "language_name" : { "type" : "keyword" diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-message_types.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-message_types.json new file mode 100644 index 0000000000000..af545b48da3d2 --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/mapping-message_types.json @@ -0,0 +1,10 @@ +{ + "properties": { + "message": { + "type": "keyword" + }, + "type": { + "type": "keyword" + } + } +} diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/message_types.csv b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/message_types.csv new file mode 100644 index 0000000000000..8e00485771445 --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/message_types.csv @@ -0,0 +1,6 @@ +message:keyword,type:keyword +Connection error,Error +Disconnected,Disconnected +Connected to 10.1.0.1,Success +Connected to 10.1.0.2,Success +Connected to 10.1.0.3,Success diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/message_types_lookup-settings.json b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/message_types_lookup-settings.json new file mode 100644 index 0000000000000..b73d1f9accf92 --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/message_types_lookup-settings.json @@ -0,0 +1,5 @@ +{ + "index": { + "mode": "lookup" + } +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index b5d6dd8584e8c..4845c7061949b 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -525,7 +525,7 @@ public enum Cap { /** * LOOKUP JOIN */ - JOIN_LOOKUP_V3(Build.current().isSnapshot()), + JOIN_LOOKUP_V4(Build.current().isSnapshot()), /** * Fix for https://github.com/elastic/elasticsearch/issues/117054 diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/enrich/LookupFromIndexService.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/enrich/LookupFromIndexService.java index 849e8e890e248..4f429c46b9123 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/enrich/LookupFromIndexService.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/enrich/LookupFromIndexService.java @@ -24,6 +24,7 @@ import org.elasticsearch.tasks.TaskId; import org.elasticsearch.transport.TransportService; import org.elasticsearch.xpack.core.security.authz.privilege.ClusterPrivilegeResolver; +import org.elasticsearch.xpack.esql.EsqlIllegalArgumentException; import org.elasticsearch.xpack.esql.action.EsqlQueryAction; import org.elasticsearch.xpack.esql.core.expression.NamedExpression; import org.elasticsearch.xpack.esql.core.tree.Source; @@ -78,9 +79,19 @@ protected TransportRequest transportRequest(LookupFromIndexService.Request reque @Override protected QueryList queryList(TransportRequest request, SearchExecutionContext context, Block inputBlock, DataType inputDataType) { MappedFieldType fieldType = context.getFieldType(request.matchField); + validateTypes(request.inputDataType, fieldType); return termQueryList(fieldType, context, inputBlock, inputDataType); } + private static void validateTypes(DataType inputDataType, MappedFieldType fieldType) { + // TODO: consider supporting implicit type conversion as done in ENRICH for some types + if (fieldType.typeName().equals(inputDataType.typeName()) == false) { + throw new EsqlIllegalArgumentException( + "LOOKUP JOIN match and input types are incompatible: match[" + fieldType.typeName() + "], input[" + inputDataType + "]" + ); + } + } + public static class Request extends AbstractLookupService.Request { private final String matchField; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceMissingFieldWithNull.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceMissingFieldWithNull.java index 0fa6d61a0ca9b..096f72f7694e1 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceMissingFieldWithNull.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/local/ReplaceMissingFieldWithNull.java @@ -9,6 +9,7 @@ import org.elasticsearch.common.util.Maps; import org.elasticsearch.xpack.esql.core.expression.Alias; +import org.elasticsearch.xpack.esql.core.expression.AttributeSet; import org.elasticsearch.xpack.esql.core.expression.FieldAttribute; import org.elasticsearch.xpack.esql.core.expression.Literal; import org.elasticsearch.xpack.esql.core.expression.NamedExpression; @@ -23,6 +24,7 @@ import org.elasticsearch.xpack.esql.plan.logical.Project; import org.elasticsearch.xpack.esql.plan.logical.RegexExtract; import org.elasticsearch.xpack.esql.plan.logical.TopN; +import org.elasticsearch.xpack.esql.plan.logical.join.Join; import org.elasticsearch.xpack.esql.plan.logical.local.LocalRelation; import org.elasticsearch.xpack.esql.rule.ParameterizedRule; import org.elasticsearch.xpack.esql.stats.SearchStats; @@ -56,10 +58,13 @@ else if (plan instanceof Project project) { var projections = project.projections(); List newProjections = new ArrayList<>(projections.size()); Map nullLiteral = Maps.newLinkedHashMapWithExpectedSize(DataType.types().size()); + AttributeSet joinAttributes = joinAttributes(project); for (NamedExpression projection : projections) { // Do not use the attribute name, this can deviate from the field name for union types. - if (projection instanceof FieldAttribute f && stats.exists(f.fieldName()) == false) { + if (projection instanceof FieldAttribute f && stats.exists(f.fieldName()) == false && joinAttributes.contains(f) == false) { + // TODO: Should do a searchStats lookup for join attributes instead of just ignoring them here + // See TransportSearchShardsAction DataType dt = f.dataType(); Alias nullAlias = nullLiteral.get(f.dataType()); // save the first field as null (per datatype) @@ -96,4 +101,10 @@ else if (plan instanceof Project project) { return plan; } + + private AttributeSet joinAttributes(Project project) { + var attributes = new AttributeSet(); + project.forEachDown(Join.class, j -> j.right().forEachDown(EsRelation.class, p -> attributes.addAll(p.output()))); + return attributes; + } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/InsertFieldExtraction.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/InsertFieldExtraction.java index cafe3726f92ac..dc32a4ad3c282 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/InsertFieldExtraction.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/physical/local/InsertFieldExtraction.java @@ -23,14 +23,12 @@ import org.elasticsearch.xpack.esql.rule.Rule; import java.util.ArrayList; -import java.util.Collections; import java.util.LinkedHashSet; import java.util.LinkedList; import java.util.List; import java.util.Set; /** - * * Materialize the concrete fields that need to be extracted from the storage until the last possible moment. * Expects the local plan to already have a projection containing the fields needed upstream. *

@@ -102,15 +100,18 @@ public PhysicalPlan apply(PhysicalPlan plan) { private static Set missingAttributes(PhysicalPlan p) { var missing = new LinkedHashSet(); - var inputSet = p.inputSet(); + var input = p.inputSet(); - // TODO: We need to extract whatever fields are missing from the left hand side. - // skip the lookup join since the right side is always materialized and a projection + // For LOOKUP JOIN we only need field-extraction on left fields used to match, since the right side is always materialized if (p instanceof LookupJoinExec join) { - return Collections.emptySet(); + join.leftFields().forEach(f -> { + if (input.contains(f) == false) { + missing.add(f); + } + }); + return missing; } - var input = inputSet; // collect field attributes used inside expressions // TODO: Rather than going over all expressions manually, this should just call .references() p.forEachExpression(TypedAttribute.class, f -> { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/LookupJoinExec.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/LookupJoinExec.java index 2d3caa27da4cd..8b1cc047309e7 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/LookupJoinExec.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/physical/LookupJoinExec.java @@ -102,7 +102,7 @@ public List output() { @Override public PhysicalPlan estimateRowSize(State state) { - state.add(false, output()); + state.add(false, addedFields); return this; } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java index a8afaa4d8119b..8c0488afdd42a 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/LocalExecutionPlanner.java @@ -565,6 +565,7 @@ private PhysicalOperation planHashJoin(HashJoinExec join, LocalExecutionPlannerC private PhysicalOperation planLookupJoin(LookupJoinExec join, LocalExecutionPlannerContext context) { PhysicalOperation source = plan(join.left(), context); + // TODO: The source builder includes incoming fields including the ones we're going to drop Layout.Builder layoutBuilder = source.layout.builder(); for (Attribute f : join.addedFields()) { layoutBuilder.append(f); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/PlannerUtils.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/PlannerUtils.java index f4ada1442efe5..37f89891860d8 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/PlannerUtils.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/planner/PlannerUtils.java @@ -14,6 +14,7 @@ import org.elasticsearch.compute.data.BlockFactory; import org.elasticsearch.compute.data.ElementType; import org.elasticsearch.core.Tuple; +import org.elasticsearch.index.IndexMode; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.SearchExecutionContext; @@ -25,6 +26,7 @@ import org.elasticsearch.xpack.esql.core.type.DataType; import org.elasticsearch.xpack.esql.core.util.Holder; import org.elasticsearch.xpack.esql.core.util.Queries; +import org.elasticsearch.xpack.esql.index.EsIndex; import org.elasticsearch.xpack.esql.optimizer.LocalLogicalOptimizerContext; import org.elasticsearch.xpack.esql.optimizer.LocalLogicalPlanOptimizer; import org.elasticsearch.xpack.esql.optimizer.LocalPhysicalOptimizerContext; @@ -117,12 +119,17 @@ public static String[] planOriginalIndices(PhysicalPlan plan) { var indices = new LinkedHashSet(); plan.forEachUp( FragmentExec.class, - f -> f.fragment() - .forEachUp(EsRelation.class, r -> indices.addAll(asList(Strings.commaDelimitedListToStringArray(r.index().name())))) + f -> f.fragment().forEachUp(EsRelation.class, r -> addOriginalIndexIfNotLookup(indices, r.index())) ); return indices.toArray(String[]::new); } + private static void addOriginalIndexIfNotLookup(Set indices, EsIndex index) { + if (index.indexNameWithModes().get(index.name()) != IndexMode.LOOKUP) { + indices.addAll(asList(Strings.commaDelimitedListToStringArray(index.name()))); + } + } + public static PhysicalPlan localPlan(List searchContexts, Configuration configuration, PhysicalPlan plan) { return localPlan(configuration, plan, SearchContextStats.from(searchContexts)); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/ComputeService.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/ComputeService.java index 9aea1577a4137..c9c8635a60f57 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/ComputeService.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/ComputeService.java @@ -62,8 +62,12 @@ import org.elasticsearch.xpack.esql.core.expression.Attribute; import org.elasticsearch.xpack.esql.enrich.EnrichLookupService; import org.elasticsearch.xpack.esql.enrich.LookupFromIndexService; +import org.elasticsearch.xpack.esql.plan.logical.EsRelation; +import org.elasticsearch.xpack.esql.plan.logical.join.Join; import org.elasticsearch.xpack.esql.plan.physical.ExchangeSinkExec; import org.elasticsearch.xpack.esql.plan.physical.ExchangeSourceExec; +import org.elasticsearch.xpack.esql.plan.physical.FragmentExec; +import org.elasticsearch.xpack.esql.plan.physical.LookupJoinExec; import org.elasticsearch.xpack.esql.plan.physical.OutputExec; import org.elasticsearch.xpack.esql.plan.physical.PhysicalPlan; import org.elasticsearch.xpack.esql.planner.EsPhysicalOperationProviders; @@ -76,6 +80,7 @@ import java.util.Arrays; import java.util.Collections; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; @@ -160,9 +165,11 @@ public void execute( Map clusterToConcreteIndices = transportService.getRemoteClusterService() .groupIndices(SearchRequest.DEFAULT_INDICES_OPTIONS, PlannerUtils.planConcreteIndices(physicalPlan).toArray(String[]::new)); QueryPragmas queryPragmas = configuration.pragmas(); + Set lookupIndexNames = findLookupIndexNames(physicalPlan); + Set concreteIndexNames = selectConcreteIndices(clusterToConcreteIndices, lookupIndexNames); if (dataNodePlan == null) { - if (clusterToConcreteIndices.values().stream().allMatch(v -> v.indices().length == 0) == false) { - String error = "expected no concrete indices without data node plan; got " + clusterToConcreteIndices; + if (concreteIndexNames.isEmpty() == false) { + String error = "expected no concrete indices without data node plan; got " + concreteIndexNames; assert false : error; listener.onFailure(new IllegalStateException(error)); return; @@ -185,7 +192,7 @@ public void execute( return; } } else { - if (clusterToConcreteIndices.values().stream().allMatch(v -> v.indices().length == 0)) { + if (concreteIndexNames.isEmpty()) { var error = "expected concrete indices with data node plan but got empty; data node plan " + dataNodePlan; assert false : error; listener.onFailure(new IllegalStateException(error)); @@ -259,6 +266,42 @@ public void execute( } } + private Set selectConcreteIndices(Map clusterToConcreteIndices, Set indexesToIgnore) { + Set concreteIndexNames = new HashSet<>(); + clusterToConcreteIndices.forEach((clusterAlias, concreteIndices) -> { + for (String index : concreteIndices.indices()) { + if (indexesToIgnore.contains(index) == false) { + concreteIndexNames.add(index); + } + } + }); + return concreteIndexNames; + } + + private Set findLookupIndexNames(PhysicalPlan physicalPlan) { + Set lookupIndexNames = new HashSet<>(); + // When planning JOIN on the coordinator node: "LookupJoinExec.lookup()->FragmentExec.fragment()->EsRelation.index()" + physicalPlan.forEachDown( + LookupJoinExec.class, + lookupJoinExec -> lookupJoinExec.lookup() + .forEachDown( + FragmentExec.class, + frag -> frag.fragment().forEachDown(EsRelation.class, esRelation -> lookupIndexNames.add(esRelation.index().name())) + ) + ); + // When planning JOIN on the data node: "FragmentExec.fragment()->Join.right()->EsRelation.index()" + // TODO this only works for LEFT join, so we still need to support RIGHT join + physicalPlan.forEachDown( + FragmentExec.class, + fragmentExec -> fragmentExec.fragment() + .forEachDown( + Join.class, + join -> join.right().forEachDown(EsRelation.class, esRelation -> lookupIndexNames.add(esRelation.index().name())) + ) + ); + return lookupIndexNames; + } + // For queries like: FROM logs* | LIMIT 0 (including cross-cluster LIMIT 0 queries) private static void updateShardCountForCoordinatorOnlyQuery(EsqlExecutionInfo execInfo) { if (execInfo.isCrossClusterSearch()) { @@ -562,8 +605,9 @@ record DataNode(Transport.Connection connection, List shardIds, Map dataNodes, int totalShards, int skippedShards) {} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java index 3b0f9ab578df9..3d1ed8f70eae0 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java @@ -178,7 +178,7 @@ public void executeOptimizedPlan( executeSubPlans(physicalPlan, planRunner, executionInfo, request, listener); } - private record PlanTuple(PhysicalPlan physical, LogicalPlan logical) {}; + private record PlanTuple(PhysicalPlan physical, LogicalPlan logical) {} private void executeSubPlans( PhysicalPlan physicalPlan, @@ -313,7 +313,7 @@ private void preAnalyze( // First resolve the lookup indices, then the main indices preAnalyzeLookupIndices( preAnalysis.lookupIndices, - fieldNames, + Set.of("*"), // Current LOOKUP JOIN syntax does not allow for field selection l.delegateFailureAndWrap( (lx, lookupIndexResolution) -> preAnalyzeIndices( indices, diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java index df974a88a4c57..2e8b856cf82a6 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java @@ -263,7 +263,7 @@ public final void test() throws Throwable { ); assumeFalse( "lookup join disabled for csv tests", - testCase.requiredCapabilities.contains(EsqlCapabilities.Cap.JOIN_LOOKUP_V3.capabilityName()) + testCase.requiredCapabilities.contains(EsqlCapabilities.Cap.JOIN_LOOKUP_V4.capabilityName()) ); if (Build.current().isSnapshot()) { assertThat( From ae1b1320996f8fb636f8f377bc9fa7b7743230a6 Mon Sep 17 00:00:00 2001 From: Ryan Ernst Date: Tue, 3 Dec 2024 06:34:13 -0800 Subject: [PATCH 132/139] Only check non-negative stats for active, current and queue (#117834) In SimpleThreadPoolIT, stats are gathered for each threadpool being checked, then measurements are collected. Some stats may go up or down depending on other background tasks outside the test. This commit adjusts the check for those stats to only check collecting non-negative values. closes #108320 --- muted-tests.yml | 3 --- .../org/elasticsearch/threadpool/SimpleThreadPoolIT.java | 6 +++--- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/muted-tests.yml b/muted-tests.yml index cf39eae210f88..3652173327e84 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -141,9 +141,6 @@ tests: - class: org.elasticsearch.xpack.shutdown.NodeShutdownIT method: testAllocationPreventedForRemoval issue: https://github.com/elastic/elasticsearch/issues/116363 -- class: org.elasticsearch.threadpool.SimpleThreadPoolIT - method: testThreadPoolMetrics - issue: https://github.com/elastic/elasticsearch/issues/108320 - class: org.elasticsearch.xpack.downsample.ILMDownsampleDisruptionIT method: testILMDownsampleRollingRestart issue: https://github.com/elastic/elasticsearch/issues/114233 diff --git a/server/src/internalClusterTest/java/org/elasticsearch/threadpool/SimpleThreadPoolIT.java b/server/src/internalClusterTest/java/org/elasticsearch/threadpool/SimpleThreadPoolIT.java index be875421e036f..d2e021a8d7436 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/threadpool/SimpleThreadPoolIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/threadpool/SimpleThreadPoolIT.java @@ -167,10 +167,10 @@ public void testThreadPoolMetrics() throws Exception { tps[0].forEach(stats -> { Map threadPoolStats = List.of( Map.entry(ThreadPool.THREAD_POOL_METRIC_NAME_COMPLETED, stats.completed()), - Map.entry(ThreadPool.THREAD_POOL_METRIC_NAME_ACTIVE, (long) stats.active()), - Map.entry(ThreadPool.THREAD_POOL_METRIC_NAME_CURRENT, (long) stats.threads()), + Map.entry(ThreadPool.THREAD_POOL_METRIC_NAME_ACTIVE, 0L), + Map.entry(ThreadPool.THREAD_POOL_METRIC_NAME_CURRENT, 0L), Map.entry(ThreadPool.THREAD_POOL_METRIC_NAME_LARGEST, (long) stats.largest()), - Map.entry(ThreadPool.THREAD_POOL_METRIC_NAME_QUEUE, (long) stats.queue()) + Map.entry(ThreadPool.THREAD_POOL_METRIC_NAME_QUEUE, 0L) ).stream().collect(toUnmodifiableSortedMap(e -> stats.name() + e.getKey(), Entry::getValue)); Function> measurementExtractor = name -> { From 5ed106a79b278052842865d2e63c4817230af7ab Mon Sep 17 00:00:00 2001 From: Artem Prigoda Date: Tue, 3 Dec 2024 16:16:03 +0100 Subject: [PATCH 133/139] [test] Remove synchronization from InternalTestCluster#getInstance (#117780) The map of nodes is volatile and immutable and can be ready without synchronization. Getting a class's instance from the node's injector is also thread safe. Doing so prevents deadlocks if we restart the node and have a disruption scheme that internally calls `getInstance` from another thread. ``` 2> "elasticsearch[StatelessClusterIntegrityStressIT][server][T#1]" ID=3490 BLOCKED on org.elasticsearch.test.InternalTestCluster@18a6d098 owned by "elasticsearch[StatelessClusterIntegrityStressIT][server][T#2]" ID=3492 2> at app//org.elasticsearch.test.InternalTestCluster.getInstance(InternalTestCluster.java:1653) 2> - blocked on org.elasticsearch.test.InternalTestCluster@18a6d098 2> at app//org.elasticsearch.test.InternalTestCluster.getInstance(InternalTestCluster.java:1620) 2> at app//org.elasticsearch.test.disruption.NetworkDisruption.transport(NetworkDisruption.java:172) 2> at app//org.elasticsearch.test.disruption.NetworkDisruption.applyToNodes(NetworkDisruption.java:157) 2> at app//org.elasticsearch.test.disruption.Net 2> workDisruption.startDisrupting(NetworkDisruption.java:133) 2> "elasticsearch[StatelessClusterIntegrityStressIT][server][T#2]" ID=3492 BLOCKED on org.elasticsearch.test.disruption.NetworkDisruption@60fd3a1e owned by "elasticsearch[StatelessClusterIntegrityStressIT][server][T#1]" ID=3490 2> at app//org.elasticsearch.test.disruption.NetworkDisruption.applyToNode(NetworkDisruption.java:116) 2> - blocked on org.elasticsearch.test.disruption.NetworkDisruption@60fd3a1e 2> at app//org.elasticsearch.test.InternalTestCluster.applyDisruptionSchemeToNode(InternalTestCluster.java:2307) 2> at app//org.elasticsearch.test.InternalTestCluster.publishNode(InternalTestCluster.java:2258) 2> - locked org.elasticsearch.test.InternalTestCluster@18a6d098 2> at app//org.elasticsearch.test.InternalTestCluster.restartNode(InternalTestCluster.java:1901) 2> at app//org.elasticsearch.test.InternalTestCluster.restartNode(InternalTestCluster.java:1863) 2> - locked org.elasticsearch.test.InternalTestCluster@18a6d098 ``` --- .../main/java/org/elasticsearch/test/InternalTestCluster.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/framework/src/main/java/org/elasticsearch/test/InternalTestCluster.java b/test/framework/src/main/java/org/elasticsearch/test/InternalTestCluster.java index 7a04384298933..6d46605e201f9 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/InternalTestCluster.java +++ b/test/framework/src/main/java/org/elasticsearch/test/InternalTestCluster.java @@ -1649,7 +1649,7 @@ public T getAnyMasterNodeInstance(Class clazz) { return getInstance(clazz, MASTER_NODE_PREDICATE); } - private synchronized T getInstance(Class clazz, Predicate predicate) { + private T getInstance(Class clazz, Predicate predicate) { NodeAndClient randomNodeAndClient = getRandomNodeAndClient(predicate); if (randomNodeAndClient == null) { throw new AssertionError("no node matches [" + predicate + "]"); From 267dc1a41d49b11c6470ae1f83091debfc49e95f Mon Sep 17 00:00:00 2001 From: Nhat Nguyen Date: Tue, 3 Dec 2024 07:27:44 -0800 Subject: [PATCH 134/139] Fix BWC for ES|QL cluster request (#117865) We identified a BWC bug in the cluster computer request. Specifically, the indices options were not properly selected for requests from an older querying cluster. This caused the search_shards API on the remote cluster to use restricted indices options, leading to failures when resolving wildcard index patterns. Our tests didn't catch this issue because the current BWC tests for cross-cluster queries only cover one direction: the querying cluster on the current version and the remote cluster on a compatible version. This PR fixes the issue and expands BWC tests to support both directions: the querying cluster on the current version with the remote cluster on a compatible version, and vice versa. --- docs/changelog/117865.yaml | 5 + .../qa/server/multi-clusters/build.gradle | 17 +- .../xpack/esql/ccq/Clusters.java | 19 +- .../xpack/esql/ccq/EsqlRestValidationIT.java | 7 + .../xpack/esql/ccq/MultiClusterSpecIT.java | 7 +- .../xpack/esql/ccq/MultiClustersIT.java | 104 ++++++--- .../xpack/esql/qa/single_node/RestEsqlIT.java | 1 - .../xpack/esql/plugin/RemoteClusterPlan.java | 21 +- .../esql/plugin/ClusterRequestTests.java | 206 ++++++++++++++++++ 9 files changed, 345 insertions(+), 42 deletions(-) create mode 100644 docs/changelog/117865.yaml create mode 100644 x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plugin/ClusterRequestTests.java diff --git a/docs/changelog/117865.yaml b/docs/changelog/117865.yaml new file mode 100644 index 0000000000000..33dc497725f92 --- /dev/null +++ b/docs/changelog/117865.yaml @@ -0,0 +1,5 @@ +pr: 117865 +summary: Fix BWC for ES|QL cluster request +area: ES|QL +type: bug +issues: [] diff --git a/x-pack/plugin/esql/qa/server/multi-clusters/build.gradle b/x-pack/plugin/esql/qa/server/multi-clusters/build.gradle index 7f3859e2229ef..d80cb764ca433 100644 --- a/x-pack/plugin/esql/qa/server/multi-clusters/build.gradle +++ b/x-pack/plugin/esql/qa/server/multi-clusters/build.gradle @@ -23,9 +23,22 @@ def supportedVersion = bwcVersion -> { } buildParams.bwcVersions.withWireCompatible(supportedVersion) { bwcVersion, baseName -> - tasks.register(bwcTaskName(bwcVersion), StandaloneRestIntegTestTask) { + tasks.register("${baseName}#newToOld", StandaloneRestIntegTestTask) { + usesBwcDistribution(bwcVersion) + systemProperty("tests.version.remote_cluster", bwcVersion) + maxParallelForks = 1 + } + + tasks.register("${baseName}#oldToNew", StandaloneRestIntegTestTask) { usesBwcDistribution(bwcVersion) - systemProperty("tests.old_cluster_version", bwcVersion) + systemProperty("tests.version.local_cluster", bwcVersion) + maxParallelForks = 1 + } + + // TODO: avoid running tests twice with the current version + tasks.register(bwcTaskName(bwcVersion), StandaloneRestIntegTestTask) { + dependsOn tasks.named("${baseName}#oldToNew") + dependsOn tasks.named("${baseName}#newToOld") maxParallelForks = 1 } } diff --git a/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/Clusters.java b/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/Clusters.java index fa8cb49c59aed..5f3f135810322 100644 --- a/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/Clusters.java +++ b/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/Clusters.java @@ -20,7 +20,7 @@ public static ElasticsearchCluster remoteCluster() { return ElasticsearchCluster.local() .name(REMOTE_CLUSTER_NAME) .distribution(DistributionType.DEFAULT) - .version(Version.fromString(System.getProperty("tests.old_cluster_version"))) + .version(distributionVersion("tests.version.remote_cluster")) .nodes(2) .setting("node.roles", "[data,ingest,master]") .setting("xpack.security.enabled", "false") @@ -34,7 +34,7 @@ public static ElasticsearchCluster localCluster(ElasticsearchCluster remoteClust return ElasticsearchCluster.local() .name(LOCAL_CLUSTER_NAME) .distribution(DistributionType.DEFAULT) - .version(Version.CURRENT) + .version(distributionVersion("tests.version.local_cluster")) .nodes(2) .setting("xpack.security.enabled", "false") .setting("xpack.license.self_generated.type", "trial") @@ -46,7 +46,18 @@ public static ElasticsearchCluster localCluster(ElasticsearchCluster remoteClust .build(); } - public static org.elasticsearch.Version oldVersion() { - return org.elasticsearch.Version.fromString(System.getProperty("tests.old_cluster_version")); + public static org.elasticsearch.Version localClusterVersion() { + String prop = System.getProperty("tests.version.local_cluster"); + return prop != null ? org.elasticsearch.Version.fromString(prop) : org.elasticsearch.Version.CURRENT; + } + + public static org.elasticsearch.Version remoteClusterVersion() { + String prop = System.getProperty("tests.version.remote_cluster"); + return prop != null ? org.elasticsearch.Version.fromString(prop) : org.elasticsearch.Version.CURRENT; + } + + private static Version distributionVersion(String key) { + final String val = System.getProperty(key); + return val != null ? Version.fromString(val) : Version.CURRENT; } } diff --git a/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/EsqlRestValidationIT.java b/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/EsqlRestValidationIT.java index 21307c5362417..55500aa1c9537 100644 --- a/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/EsqlRestValidationIT.java +++ b/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/EsqlRestValidationIT.java @@ -10,12 +10,14 @@ import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters; import org.apache.http.HttpHost; +import org.elasticsearch.Version; import org.elasticsearch.client.RestClient; import org.elasticsearch.core.IOUtils; import org.elasticsearch.test.TestClustersThreadFilter; import org.elasticsearch.test.cluster.ElasticsearchCluster; import org.elasticsearch.xpack.esql.qa.rest.EsqlRestValidationTestCase; import org.junit.AfterClass; +import org.junit.Before; import org.junit.ClassRule; import org.junit.rules.RuleChain; import org.junit.rules.TestRule; @@ -78,4 +80,9 @@ private RestClient remoteClusterClient() throws IOException { } return remoteClient; } + + @Before + public void skipTestOnOldVersions() { + assumeTrue("skip on old versions", Clusters.localClusterVersion().equals(Version.V_8_16_0)); + } } diff --git a/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClusterSpecIT.java b/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClusterSpecIT.java index 19b29764559d1..e658d169cbce8 100644 --- a/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClusterSpecIT.java +++ b/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClusterSpecIT.java @@ -12,6 +12,7 @@ import org.apache.http.HttpEntity; import org.apache.http.HttpHost; +import org.elasticsearch.Version; import org.elasticsearch.client.Request; import org.elasticsearch.client.Response; import org.elasticsearch.client.RestClient; @@ -118,10 +119,8 @@ protected void shouldSkipTest(String testName) throws IOException { // Do not run tests including "METADATA _index" unless marked with metadata_fields_remote_test, // because they may produce inconsistent results with multiple clusters. assumeFalse("can't test with _index metadata", (remoteMetadata == false) && hasIndexMetadata(testCase.query)); - assumeTrue( - "Test " + testName + " is skipped on " + Clusters.oldVersion(), - isEnabled(testName, instructions, Clusters.oldVersion()) - ); + Version oldVersion = Version.min(Clusters.localClusterVersion(), Clusters.remoteClusterVersion()); + assumeTrue("Test " + testName + " is skipped on " + oldVersion, isEnabled(testName, instructions, oldVersion)); assumeFalse("INLINESTATS not yet supported in CCS", testCase.requiredCapabilities.contains(INLINESTATS.capabilityName())); assumeFalse("INLINESTATS not yet supported in CCS", testCase.requiredCapabilities.contains(INLINESTATS_V2.capabilityName())); assumeFalse("INLINESTATS not yet supported in CCS", testCase.requiredCapabilities.contains(JOIN_PLANNING_V1.capabilityName())); diff --git a/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClustersIT.java b/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClustersIT.java index dbeaed1596eff..452f40baa34a8 100644 --- a/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClustersIT.java +++ b/x-pack/plugin/esql/qa/server/multi-clusters/src/javaRestTest/java/org/elasticsearch/xpack/esql/ccq/MultiClustersIT.java @@ -10,6 +10,7 @@ import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters; import org.apache.http.HttpHost; +import org.elasticsearch.Version; import org.elasticsearch.client.Request; import org.elasticsearch.client.RestClient; import org.elasticsearch.common.Strings; @@ -29,7 +30,6 @@ import java.io.IOException; import java.util.List; import java.util.Map; -import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; import java.util.stream.IntStream; @@ -127,10 +127,12 @@ void indexDocs(RestClient client, String index, List docs) throws IOExcepti } private Map run(String query, boolean includeCCSMetadata) throws IOException { - Map resp = runEsql( - new RestEsqlTestCase.RequestObjectBuilder().query(query).includeCCSMetadata(includeCCSMetadata).build() - ); - logger.info("--> query {} response {}", query, resp); + var queryBuilder = new RestEsqlTestCase.RequestObjectBuilder().query(query); + if (includeCCSMetadata) { + queryBuilder.includeCCSMetadata(true); + } + Map resp = runEsql(queryBuilder.build()); + logger.info("--> query {} response {}", queryBuilder, resp); return resp; } @@ -156,7 +158,7 @@ private Map runEsql(RestEsqlTestCase.RequestObjectBuilder reques public void testCount() throws Exception { { - boolean includeCCSMetadata = randomBoolean(); + boolean includeCCSMetadata = includeCCSMetadata(); Map result = run("FROM test-local-index,*:test-remote-index | STATS c = COUNT(*)", includeCCSMetadata); var columns = List.of(Map.of("name", "c", "type", "long")); var values = List.of(List.of(localDocs.size() + remoteDocs.size())); @@ -165,13 +167,16 @@ public void testCount() throws Exception { if (includeCCSMetadata) { mapMatcher = mapMatcher.entry("_clusters", any(Map.class)); } - assertMap(result, mapMatcher.entry("columns", columns).entry("values", values).entry("took", greaterThanOrEqualTo(0))); + if (ccsMetadataAvailable()) { + mapMatcher = mapMatcher.entry("took", greaterThanOrEqualTo(0)); + } + assertMap(result, mapMatcher.entry("columns", columns).entry("values", values)); if (includeCCSMetadata) { assertClusterDetailsMap(result, false); } } { - boolean includeCCSMetadata = randomBoolean(); + boolean includeCCSMetadata = includeCCSMetadata(); Map result = run("FROM *:test-remote-index | STATS c = COUNT(*)", includeCCSMetadata); var columns = List.of(Map.of("name", "c", "type", "long")); var values = List.of(List.of(remoteDocs.size())); @@ -180,7 +185,10 @@ public void testCount() throws Exception { if (includeCCSMetadata) { mapMatcher = mapMatcher.entry("_clusters", any(Map.class)); } - assertMap(result, mapMatcher.entry("columns", columns).entry("values", values).entry("took", greaterThanOrEqualTo(0))); + if (ccsMetadataAvailable()) { + mapMatcher = mapMatcher.entry("took", greaterThanOrEqualTo(0)); + } + assertMap(result, mapMatcher.entry("columns", columns).entry("values", values)); if (includeCCSMetadata) { assertClusterDetailsMap(result, true); } @@ -189,7 +197,7 @@ public void testCount() throws Exception { public void testUngroupedAggs() throws Exception { { - boolean includeCCSMetadata = randomBoolean(); + boolean includeCCSMetadata = includeCCSMetadata(); Map result = run("FROM test-local-index,*:test-remote-index | STATS total = SUM(data)", includeCCSMetadata); var columns = List.of(Map.of("name", "total", "type", "long")); long sum = Stream.concat(localDocs.stream(), remoteDocs.stream()).mapToLong(d -> d.data).sum(); @@ -200,13 +208,16 @@ public void testUngroupedAggs() throws Exception { if (includeCCSMetadata) { mapMatcher = mapMatcher.entry("_clusters", any(Map.class)); } - assertMap(result, mapMatcher.entry("columns", columns).entry("values", values).entry("took", greaterThanOrEqualTo(0))); + if (ccsMetadataAvailable()) { + mapMatcher = mapMatcher.entry("took", greaterThanOrEqualTo(0)); + } + assertMap(result, mapMatcher.entry("columns", columns).entry("values", values)); if (includeCCSMetadata) { assertClusterDetailsMap(result, false); } } { - boolean includeCCSMetadata = randomBoolean(); + boolean includeCCSMetadata = includeCCSMetadata(); Map result = run("FROM *:test-remote-index | STATS total = SUM(data)", includeCCSMetadata); var columns = List.of(Map.of("name", "total", "type", "long")); long sum = remoteDocs.stream().mapToLong(d -> d.data).sum(); @@ -216,12 +227,16 @@ public void testUngroupedAggs() throws Exception { if (includeCCSMetadata) { mapMatcher = mapMatcher.entry("_clusters", any(Map.class)); } - assertMap(result, mapMatcher.entry("columns", columns).entry("values", values).entry("took", greaterThanOrEqualTo(0))); + if (ccsMetadataAvailable()) { + mapMatcher = mapMatcher.entry("took", greaterThanOrEqualTo(0)); + } + assertMap(result, mapMatcher.entry("columns", columns).entry("values", values)); if (includeCCSMetadata) { assertClusterDetailsMap(result, true); } } { + assumeTrue("requires ccs metadata", ccsMetadataAvailable()); Map result = runWithColumnarAndIncludeCCSMetadata("FROM *:test-remote-index | STATS total = SUM(data)"); var columns = List.of(Map.of("name", "total", "type", "long")); long sum = remoteDocs.stream().mapToLong(d -> d.data).sum(); @@ -293,7 +308,7 @@ private void assertClusterDetailsMap(Map result, boolean remoteO public void testGroupedAggs() throws Exception { { - boolean includeCCSMetadata = randomBoolean(); + boolean includeCCSMetadata = includeCCSMetadata(); Map result = run( "FROM test-local-index,*:test-remote-index | STATS total = SUM(data) BY color | SORT color", includeCCSMetadata @@ -311,13 +326,16 @@ public void testGroupedAggs() throws Exception { if (includeCCSMetadata) { mapMatcher = mapMatcher.entry("_clusters", any(Map.class)); } - assertMap(result, mapMatcher.entry("columns", columns).entry("values", values).entry("took", greaterThanOrEqualTo(0))); + if (ccsMetadataAvailable()) { + mapMatcher = mapMatcher.entry("took", greaterThanOrEqualTo(0)); + } + assertMap(result, mapMatcher.entry("columns", columns).entry("values", values)); if (includeCCSMetadata) { assertClusterDetailsMap(result, false); } } { - boolean includeCCSMetadata = randomBoolean(); + boolean includeCCSMetadata = includeCCSMetadata(); Map result = run( "FROM *:test-remote-index | STATS total = SUM(data) by color | SORT color", includeCCSMetadata @@ -336,29 +354,57 @@ public void testGroupedAggs() throws Exception { if (includeCCSMetadata) { mapMatcher = mapMatcher.entry("_clusters", any(Map.class)); } - assertMap(result, mapMatcher.entry("columns", columns).entry("values", values).entry("took", greaterThanOrEqualTo(0))); + if (ccsMetadataAvailable()) { + mapMatcher = mapMatcher.entry("took", greaterThanOrEqualTo(0)); + } + assertMap(result, mapMatcher.entry("columns", columns).entry("values", values)); if (includeCCSMetadata) { assertClusterDetailsMap(result, true); } } } + public void testIndexPattern() throws Exception { + { + String indexPattern = randomFrom( + "test-local-index,*:test-remote-index", + "test-local-index,*:test-remote-*", + "test-local-index,*:test-*", + "test-*,*:test-remote-index" + ); + Map result = run("FROM " + indexPattern + " | STATS c = COUNT(*)", false); + var columns = List.of(Map.of("name", "c", "type", "long")); + var values = List.of(List.of(localDocs.size() + remoteDocs.size())); + MapMatcher mapMatcher = matchesMap(); + if (ccsMetadataAvailable()) { + mapMatcher = mapMatcher.entry("took", greaterThanOrEqualTo(0)); + } + assertMap(result, mapMatcher.entry("columns", columns).entry("values", values)); + } + { + String indexPattern = randomFrom("*:test-remote-index", "*:test-remote-*", "*:test-*"); + Map result = run("FROM " + indexPattern + " | STATS c = COUNT(*)", false); + var columns = List.of(Map.of("name", "c", "type", "long")); + var values = List.of(List.of(remoteDocs.size())); + + MapMatcher mapMatcher = matchesMap(); + if (ccsMetadataAvailable()) { + mapMatcher = mapMatcher.entry("took", greaterThanOrEqualTo(0)); + } + assertMap(result, mapMatcher.entry("columns", columns).entry("values", values)); + } + } + private RestClient remoteClusterClient() throws IOException { var clusterHosts = parseClusterHosts(remoteCluster.getHttpAddresses()); return buildClient(restClientSettings(), clusterHosts.toArray(new HttpHost[0])); } - private TestFeatureService remoteFeaturesService() throws IOException { - if (remoteFeaturesService == null) { - try (RestClient remoteClient = remoteClusterClient()) { - var remoteNodeVersions = readVersionsFromNodesInfo(remoteClient); - var semanticNodeVersions = remoteNodeVersions.stream() - .map(ESRestTestCase::parseLegacyVersion) - .flatMap(Optional::stream) - .collect(Collectors.toSet()); - remoteFeaturesService = createTestFeatureService(getClusterStateFeatures(remoteClient), semanticNodeVersions); - } - } - return remoteFeaturesService; + private static boolean ccsMetadataAvailable() { + return Clusters.localClusterVersion().onOrAfter(Version.V_8_16_0); + } + + private static boolean includeCCSMetadata() { + return ccsMetadataAvailable() && randomBoolean(); } } diff --git a/x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/RestEsqlIT.java b/x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/RestEsqlIT.java index 9a184b9a620fd..050259bbb5b5c 100644 --- a/x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/RestEsqlIT.java +++ b/x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/RestEsqlIT.java @@ -76,7 +76,6 @@ public void testBasicEsql() throws IOException { indexTimestampData(1); RequestObjectBuilder builder = requestObjectBuilder().query(fromIndex() + " | stats avg(value)"); - requestObjectBuilder().includeCCSMetadata(randomBoolean()); if (Build.current().isSnapshot()) { builder.pragmas(Settings.builder().put("data_partitioning", "shard").build()); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/RemoteClusterPlan.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/RemoteClusterPlan.java index 8564e4b3afde1..031bfd7139a84 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/RemoteClusterPlan.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/RemoteClusterPlan.java @@ -9,12 +9,14 @@ import org.elasticsearch.TransportVersions; import org.elasticsearch.action.OriginalIndices; -import org.elasticsearch.action.support.IndicesOptions; +import org.elasticsearch.action.search.SearchRequest; import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput; import org.elasticsearch.xpack.esql.io.stream.PlanStreamOutput; import org.elasticsearch.xpack.esql.plan.physical.PhysicalPlan; import java.io.IOException; +import java.util.Arrays; +import java.util.Objects; record RemoteClusterPlan(PhysicalPlan plan, String[] targetIndices, OriginalIndices originalIndices) { static RemoteClusterPlan from(PlanStreamInput planIn) throws IOException { @@ -24,7 +26,8 @@ static RemoteClusterPlan from(PlanStreamInput planIn) throws IOException { if (planIn.getTransportVersion().onOrAfter(TransportVersions.ESQL_ORIGINAL_INDICES)) { originalIndices = OriginalIndices.readOriginalIndices(planIn); } else { - originalIndices = new OriginalIndices(planIn.readStringArray(), IndicesOptions.strictSingleIndexNoExpandForbidClosed()); + // fallback to the previous behavior + originalIndices = new OriginalIndices(planIn.readStringArray(), SearchRequest.DEFAULT_INDICES_OPTIONS); } return new RemoteClusterPlan(plan, targetIndices, originalIndices); } @@ -38,4 +41,18 @@ public void writeTo(PlanStreamOutput out) throws IOException { out.writeStringArray(originalIndices.indices()); } } + + @Override + public boolean equals(Object o) { + if (o == null || getClass() != o.getClass()) return false; + RemoteClusterPlan that = (RemoteClusterPlan) o; + return Objects.equals(plan, that.plan) + && Objects.deepEquals(targetIndices, that.targetIndices) + && Objects.equals(originalIndices, that.originalIndices); + } + + @Override + public int hashCode() { + return Objects.hash(plan, Arrays.hashCode(targetIndices), originalIndices); + } } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plugin/ClusterRequestTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plugin/ClusterRequestTests.java new file mode 100644 index 0000000000000..07ca112e8c527 --- /dev/null +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plugin/ClusterRequestTests.java @@ -0,0 +1,206 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.plugin; + +import org.elasticsearch.TransportVersions; +import org.elasticsearch.action.OriginalIndices; +import org.elasticsearch.action.search.SearchRequest; +import org.elasticsearch.action.support.IndicesOptions; +import org.elasticsearch.common.io.stream.NamedWriteableRegistry; +import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.IndexMode; +import org.elasticsearch.search.SearchModule; +import org.elasticsearch.test.AbstractWireSerializingTestCase; +import org.elasticsearch.test.TransportVersionUtils; +import org.elasticsearch.xpack.esql.ConfigurationTestUtils; +import org.elasticsearch.xpack.esql.EsqlTestUtils; +import org.elasticsearch.xpack.esql.analysis.Analyzer; +import org.elasticsearch.xpack.esql.analysis.AnalyzerContext; +import org.elasticsearch.xpack.esql.core.type.EsField; +import org.elasticsearch.xpack.esql.expression.function.EsqlFunctionRegistry; +import org.elasticsearch.xpack.esql.index.EsIndex; +import org.elasticsearch.xpack.esql.index.IndexResolution; +import org.elasticsearch.xpack.esql.optimizer.LogicalOptimizerContext; +import org.elasticsearch.xpack.esql.optimizer.LogicalPlanOptimizer; +import org.elasticsearch.xpack.esql.parser.EsqlParser; +import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; +import org.elasticsearch.xpack.esql.plan.physical.PhysicalPlan; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +import static org.elasticsearch.xpack.esql.ConfigurationTestUtils.randomConfiguration; +import static org.elasticsearch.xpack.esql.ConfigurationTestUtils.randomTables; +import static org.elasticsearch.xpack.esql.EsqlTestUtils.TEST_CFG; +import static org.elasticsearch.xpack.esql.EsqlTestUtils.TEST_VERIFIER; +import static org.elasticsearch.xpack.esql.EsqlTestUtils.emptyPolicyResolution; +import static org.elasticsearch.xpack.esql.EsqlTestUtils.loadMapping; +import static org.elasticsearch.xpack.esql.EsqlTestUtils.withDefaultLimitWarning; +import static org.hamcrest.Matchers.equalTo; + +public class ClusterRequestTests extends AbstractWireSerializingTestCase { + + @Override + protected Writeable.Reader instanceReader() { + return ClusterComputeRequest::new; + } + + @Override + protected NamedWriteableRegistry getNamedWriteableRegistry() { + List writeables = new ArrayList<>(); + writeables.addAll(new SearchModule(Settings.EMPTY, List.of()).getNamedWriteables()); + writeables.addAll(new EsqlPlugin().getNamedWriteables()); + return new NamedWriteableRegistry(writeables); + } + + @Override + protected ClusterComputeRequest createTestInstance() { + var sessionId = randomAlphaOfLength(10); + String query = randomQuery(); + PhysicalPlan physicalPlan = DataNodeRequestTests.mapAndMaybeOptimize(parse(query)); + OriginalIndices originalIndices = new OriginalIndices( + generateRandomStringArray(10, 10, false, false), + IndicesOptions.fromOptions(randomBoolean(), randomBoolean(), randomBoolean(), randomBoolean()) + ); + String[] targetIndices = generateRandomStringArray(10, 10, false, false); + ClusterComputeRequest request = new ClusterComputeRequest( + randomAlphaOfLength(10), + sessionId, + randomConfiguration(query, randomTables()), + new RemoteClusterPlan(physicalPlan, targetIndices, originalIndices) + ); + request.setParentTask(randomAlphaOfLength(10), randomNonNegativeLong()); + return request; + } + + @Override + protected ClusterComputeRequest mutateInstance(ClusterComputeRequest in) throws IOException { + return switch (between(0, 4)) { + case 0 -> { + var request = new ClusterComputeRequest( + randomValueOtherThan(in.clusterAlias(), () -> randomAlphaOfLength(10)), + in.sessionId(), + in.configuration(), + in.remoteClusterPlan() + ); + request.setParentTask(in.getParentTask()); + yield request; + } + case 1 -> { + var request = new ClusterComputeRequest( + in.clusterAlias(), + randomValueOtherThan(in.sessionId(), () -> randomAlphaOfLength(10)), + in.configuration(), + in.remoteClusterPlan() + ); + request.setParentTask(in.getParentTask()); + yield request; + } + case 2 -> { + var request = new ClusterComputeRequest( + in.clusterAlias(), + in.sessionId(), + randomValueOtherThan(in.configuration(), ConfigurationTestUtils::randomConfiguration), + in.remoteClusterPlan() + ); + request.setParentTask(in.getParentTask()); + yield request; + } + case 3 -> { + RemoteClusterPlan plan = in.remoteClusterPlan(); + var request = new ClusterComputeRequest( + in.clusterAlias(), + in.sessionId(), + in.configuration(), + new RemoteClusterPlan( + plan.plan(), + randomValueOtherThan(plan.targetIndices(), () -> generateRandomStringArray(10, 10, false, false)), + plan.originalIndices() + ) + ); + request.setParentTask(in.getParentTask()); + yield request; + } + case 4 -> { + RemoteClusterPlan plan = in.remoteClusterPlan(); + var request = new ClusterComputeRequest( + in.clusterAlias(), + in.sessionId(), + in.configuration(), + new RemoteClusterPlan( + plan.plan(), + plan.targetIndices(), + new OriginalIndices( + plan.originalIndices().indices(), + randomValueOtherThan( + plan.originalIndices().indicesOptions(), + () -> IndicesOptions.fromOptions(randomBoolean(), randomBoolean(), randomBoolean(), randomBoolean()) + ) + ) + ) + ); + request.setParentTask(in.getParentTask()); + yield request; + } + default -> throw new AssertionError("invalid value"); + }; + } + + public void testFallbackIndicesOptions() throws Exception { + ClusterComputeRequest request = createTestInstance(); + var version = TransportVersionUtils.randomVersionBetween( + random(), + TransportVersions.V_8_14_0, + TransportVersions.ESQL_ORIGINAL_INDICES + ); + ClusterComputeRequest cloned = copyInstance(request, version); + assertThat(cloned.clusterAlias(), equalTo(request.clusterAlias())); + assertThat(cloned.sessionId(), equalTo(request.sessionId())); + assertThat(cloned.configuration(), equalTo(request.configuration())); + RemoteClusterPlan plan = cloned.remoteClusterPlan(); + assertThat(plan.plan(), equalTo(request.remoteClusterPlan().plan())); + assertThat(plan.targetIndices(), equalTo(request.remoteClusterPlan().targetIndices())); + OriginalIndices originalIndices = plan.originalIndices(); + assertThat(originalIndices.indices(), equalTo(request.remoteClusterPlan().originalIndices().indices())); + assertThat(originalIndices.indicesOptions(), equalTo(SearchRequest.DEFAULT_INDICES_OPTIONS)); + } + + private static String randomQuery() { + return randomFrom(""" + from test + | where round(emp_no) > 10 + | limit 10 + """, """ + from test + | sort last_name + | limit 10 + | where round(emp_no) > 10 + | eval c = first_name + """); + } + + static LogicalPlan parse(String query) { + Map mapping = loadMapping("mapping-basic.json"); + EsIndex test = new EsIndex("test", mapping, Map.of("test", IndexMode.STANDARD)); + IndexResolution getIndexResult = IndexResolution.valid(test); + var logicalOptimizer = new LogicalPlanOptimizer(new LogicalOptimizerContext(TEST_CFG)); + var analyzer = new Analyzer( + new AnalyzerContext(EsqlTestUtils.TEST_CFG, new EsqlFunctionRegistry(), getIndexResult, emptyPolicyResolution()), + TEST_VERIFIER + ); + return logicalOptimizer.optimize(analyzer.analyze(new EsqlParser().createStatement(query))); + } + + @Override + protected List filteredWarnings() { + return withDefaultLimitWarning(super.filteredWarnings()); + } +} From 00a1222f10a6bc605f67aee67d4053c5ba0557e8 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine <58790826+elasticsearchmachine@users.noreply.github.com> Date: Wed, 4 Dec 2024 02:32:41 +1100 Subject: [PATCH 135/139] Mute org.elasticsearch.xpack.core.ml.search.SparseVectorQueryBuilderTests testToQuery #117904 --- muted-tests.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/muted-tests.yml b/muted-tests.yml index 3652173327e84..857266a5a47cd 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -238,6 +238,9 @@ tests: issue: https://github.com/elastic/elasticsearch/issues/117862 - class: org.elasticsearch.validation.DotPrefixClientYamlTestSuiteIT issue: https://github.com/elastic/elasticsearch/issues/117893 +- class: org.elasticsearch.xpack.core.ml.search.SparseVectorQueryBuilderTests + method: testToQuery + issue: https://github.com/elastic/elasticsearch/issues/117904 # Examples: # From c1a9d44ed4ac980130deb730991f10cce127c583 Mon Sep 17 00:00:00 2001 From: Ryan Ernst Date: Tue, 3 Dec 2024 08:42:49 -0800 Subject: [PATCH 136/139] Guard against missing file in CI upload (#117889) Somehow files can be lost before the build ends up uploading them, presumable from temporarily file deletion after tests complete. This commit guards against this case so that the build will not completely fail, but instead log a warning. --- .../internal/ElasticsearchBuildCompletePlugin.java | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/ElasticsearchBuildCompletePlugin.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/ElasticsearchBuildCompletePlugin.java index 14baa55794c95..b1207a2f5161d 100644 --- a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/ElasticsearchBuildCompletePlugin.java +++ b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/ElasticsearchBuildCompletePlugin.java @@ -29,6 +29,8 @@ import org.gradle.api.provider.Property; import org.gradle.api.tasks.Input; import org.jetbrains.annotations.NotNull; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.io.BufferedInputStream; import java.io.BufferedOutputStream; @@ -47,6 +49,8 @@ public abstract class ElasticsearchBuildCompletePlugin implements Plugin { + private static final Logger log = LoggerFactory.getLogger(ElasticsearchBuildCompletePlugin.class); + @Inject protected abstract FlowScope getFlowScope(); @@ -241,8 +245,11 @@ private static void createBuildArchiveTar(List files, File projectDir, Fil tOut.setLongFileMode(TarArchiveOutputStream.LONGFILE_GNU); tOut.setBigNumberMode(TarArchiveOutputStream.BIGNUMBER_STAR); for (Path path : files.stream().map(File::toPath).toList()) { - if (!Files.isRegularFile(path)) { - throw new IOException("Support only file!"); + if (Files.exists(path) == false) { + log.warn("File disappeared before it could be added to CI archive: " + path); + continue; + } else if (!Files.isRegularFile(path)) { + throw new IOException("Support only file!: " + path); } long entrySize = Files.size(path); From 0a208279ea869fafe7ee9b4c4ac60d4b9816bd25 Mon Sep 17 00:00:00 2001 From: Luigi Dell'Aquila Date: Tue, 3 Dec 2024 17:53:10 +0100 Subject: [PATCH 137/139] ES|QL fix telemetry tests (usage stats) after promoting CATEGORIZE (#117878) --- muted-tests.yml | 3 --- .../resources/rest-api-spec/test/esql/60_usage.yml | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/muted-tests.yml b/muted-tests.yml index 857266a5a47cd..7bd06a6605028 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -233,9 +233,6 @@ tests: issue: https://github.com/elastic/elasticsearch/issues/117815 - class: org.elasticsearch.xpack.ml.integration.DatafeedJobsRestIT issue: https://github.com/elastic/elasticsearch/issues/111319 -- class: org.elasticsearch.xpack.test.rest.XPackRestIT - method: test {p0=esql/60_usage/Basic ESQL usage output (telemetry) non-snapshot version} - issue: https://github.com/elastic/elasticsearch/issues/117862 - class: org.elasticsearch.validation.DotPrefixClientYamlTestSuiteIT issue: https://github.com/elastic/elasticsearch/issues/117893 - class: org.elasticsearch.xpack.core.ml.search.SparseVectorQueryBuilderTests diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/60_usage.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/60_usage.yml index f7dd979540afa..c23b44c00bd14 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/60_usage.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/60_usage.yml @@ -163,4 +163,4 @@ setup: - match: {esql.functions.cos: $functions_cos} - gt: {esql.functions.to_long: $functions_to_long} - match: {esql.functions.coalesce: $functions_coalesce} - - length: {esql.functions: 118} # check the "sister" test above for a likely update to the same esql.functions length check + - length: {esql.functions: 119} # check the "sister" test above for a likely update to the same esql.functions length check From 22f4a799377ea8710076ff10b74fbb48724a0c09 Mon Sep 17 00:00:00 2001 From: Andrei Stefan Date: Tue, 3 Dec 2024 20:08:05 +0200 Subject: [PATCH 138/139] Smarter field caps with subscribable listener (#116755) --- docs/changelog/116755.yaml | 5 + .../multi_node/RequestIndexFilteringIT.java | 27 ++ .../single_node/RequestIndexFilteringIT.java | 27 ++ .../rest/RequestIndexFilteringTestCase.java | 284 ++++++++++++++++ .../esql/qa/rest/RestEnrichTestCase.java | 176 +++++++++- .../esql/enrich/EnrichPolicyResolver.java | 2 +- .../xpack/esql/session/EsqlSession.java | 315 ++++++++++++------ .../xpack/esql/session/IndexResolver.java | 13 +- 8 files changed, 741 insertions(+), 108 deletions(-) create mode 100644 docs/changelog/116755.yaml create mode 100644 x-pack/plugin/esql/qa/server/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/multi_node/RequestIndexFilteringIT.java create mode 100644 x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/RequestIndexFilteringIT.java create mode 100644 x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/RequestIndexFilteringTestCase.java diff --git a/docs/changelog/116755.yaml b/docs/changelog/116755.yaml new file mode 100644 index 0000000000000..3aa5ec8580b59 --- /dev/null +++ b/docs/changelog/116755.yaml @@ -0,0 +1,5 @@ +pr: 116755 +summary: Smarter field caps with subscribable listener +area: ES|QL +type: enhancement +issues: [] diff --git a/x-pack/plugin/esql/qa/server/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/multi_node/RequestIndexFilteringIT.java b/x-pack/plugin/esql/qa/server/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/multi_node/RequestIndexFilteringIT.java new file mode 100644 index 0000000000000..c2ba502b92554 --- /dev/null +++ b/x-pack/plugin/esql/qa/server/multi-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/multi_node/RequestIndexFilteringIT.java @@ -0,0 +1,27 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.qa.multi_node; + +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters; + +import org.elasticsearch.test.TestClustersThreadFilter; +import org.elasticsearch.test.cluster.ElasticsearchCluster; +import org.elasticsearch.xpack.esql.qa.rest.RequestIndexFilteringTestCase; +import org.junit.ClassRule; + +@ThreadLeakFilters(filters = TestClustersThreadFilter.class) +public class RequestIndexFilteringIT extends RequestIndexFilteringTestCase { + + @ClassRule + public static ElasticsearchCluster cluster = Clusters.testCluster(ignored -> {}); + + @Override + protected String getTestRestCluster() { + return cluster.getHttpAddresses(); + } +} diff --git a/x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/RequestIndexFilteringIT.java b/x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/RequestIndexFilteringIT.java new file mode 100644 index 0000000000000..f13bcd618f0a8 --- /dev/null +++ b/x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/RequestIndexFilteringIT.java @@ -0,0 +1,27 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.qa.single_node; + +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters; + +import org.elasticsearch.test.TestClustersThreadFilter; +import org.elasticsearch.test.cluster.ElasticsearchCluster; +import org.elasticsearch.xpack.esql.qa.rest.RequestIndexFilteringTestCase; +import org.junit.ClassRule; + +@ThreadLeakFilters(filters = TestClustersThreadFilter.class) +public class RequestIndexFilteringIT extends RequestIndexFilteringTestCase { + + @ClassRule + public static ElasticsearchCluster cluster = Clusters.testCluster(); + + @Override + protected String getTestRestCluster() { + return cluster.getHttpAddresses(); + } +} diff --git a/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/RequestIndexFilteringTestCase.java b/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/RequestIndexFilteringTestCase.java new file mode 100644 index 0000000000000..3314430d63eaa --- /dev/null +++ b/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/RequestIndexFilteringTestCase.java @@ -0,0 +1,284 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.qa.rest; + +import org.apache.http.util.EntityUtils; +import org.elasticsearch.client.Request; +import org.elasticsearch.client.Response; +import org.elasticsearch.client.ResponseException; +import org.elasticsearch.test.rest.ESRestTestCase; +import org.elasticsearch.xcontent.XContentType; +import org.elasticsearch.xpack.esql.AssertWarnings; +import org.junit.After; +import org.junit.Assert; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.List; +import java.util.Locale; +import java.util.Map; + +import static org.elasticsearch.test.ListMatcher.matchesList; +import static org.elasticsearch.test.MapMatcher.assertMap; +import static org.elasticsearch.test.MapMatcher.matchesMap; +import static org.elasticsearch.xpack.esql.qa.rest.RestEsqlTestCase.entityToMap; +import static org.elasticsearch.xpack.esql.qa.rest.RestEsqlTestCase.requestObjectBuilder; +import static org.hamcrest.Matchers.allOf; +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThanOrEqualTo; +import static org.hamcrest.Matchers.hasSize; +import static org.hamcrest.Matchers.instanceOf; +import static org.hamcrest.Matchers.nullValue; + +public abstract class RequestIndexFilteringTestCase extends ESRestTestCase { + + @After + public void wipeTestData() throws IOException { + try { + var response = client().performRequest(new Request("DELETE", "/test*")); + assertEquals(200, response.getStatusLine().getStatusCode()); + } catch (ResponseException re) { + assertEquals(404, re.getResponse().getStatusLine().getStatusCode()); + } + } + + public void testTimestampFilterFromQuery() throws IOException { + int docsTest1 = 50; + int docsTest2 = 30; + indexTimestampData(docsTest1, "test1", "2024-11-26", "id1"); + indexTimestampData(docsTest2, "test2", "2023-11-26", "id2"); + + // filter includes both indices in the result (all columns, all rows) + RestEsqlTestCase.RequestObjectBuilder builder = timestampFilter("gte", "2023-01-01").query("FROM test*"); + Map result = runEsql(builder); + assertMap( + result, + matchesMap().entry( + "columns", + matchesList().item(matchesMap().entry("name", "@timestamp").entry("type", "date")) + .item(matchesMap().entry("name", "id1").entry("type", "integer")) + .item(matchesMap().entry("name", "id2").entry("type", "integer")) + .item(matchesMap().entry("name", "value").entry("type", "long")) + ).entry("values", allOf(instanceOf(List.class), hasSize(docsTest1 + docsTest2))).entry("took", greaterThanOrEqualTo(0)) + ); + + // filter includes only test1. Columns from test2 are filtered out, as well (not only rows)! + builder = timestampFilter("gte", "2024-01-01").query("FROM test*"); + assertMap( + runEsql(builder), + matchesMap().entry( + "columns", + matchesList().item(matchesMap().entry("name", "@timestamp").entry("type", "date")) + .item(matchesMap().entry("name", "id1").entry("type", "integer")) + .item(matchesMap().entry("name", "value").entry("type", "long")) + ).entry("values", allOf(instanceOf(List.class), hasSize(docsTest1))).entry("took", greaterThanOrEqualTo(0)) + ); + + // filter excludes both indices (no rows); the first analysis step fails because there are no columns, a second attempt succeeds + // after eliminating the index filter. All columns are returned. + builder = timestampFilter("gte", "2025-01-01").query("FROM test*"); + assertMap( + runEsql(builder), + matchesMap().entry( + "columns", + matchesList().item(matchesMap().entry("name", "@timestamp").entry("type", "date")) + .item(matchesMap().entry("name", "id1").entry("type", "integer")) + .item(matchesMap().entry("name", "id2").entry("type", "integer")) + .item(matchesMap().entry("name", "value").entry("type", "long")) + ).entry("values", allOf(instanceOf(List.class), hasSize(0))).entry("took", greaterThanOrEqualTo(0)) + ); + } + + public void testFieldExistsFilter_KeepWildcard() throws IOException { + int docsTest1 = randomIntBetween(0, 10); + int docsTest2 = randomIntBetween(0, 10); + indexTimestampData(docsTest1, "test1", "2024-11-26", "id1"); + indexTimestampData(docsTest2, "test2", "2023-11-26", "id2"); + + // filter includes only test1. Columns are rows of test2 are filtered out + RestEsqlTestCase.RequestObjectBuilder builder = existsFilter("id1").query("FROM test*"); + Map result = runEsql(builder); + assertMap( + result, + matchesMap().entry( + "columns", + matchesList().item(matchesMap().entry("name", "@timestamp").entry("type", "date")) + .item(matchesMap().entry("name", "id1").entry("type", "integer")) + .item(matchesMap().entry("name", "value").entry("type", "long")) + ).entry("values", allOf(instanceOf(List.class), hasSize(docsTest1))).entry("took", greaterThanOrEqualTo(0)) + ); + + // filter includes only test1. Columns from test2 are filtered out, as well (not only rows)! + builder = existsFilter("id1").query("FROM test* METADATA _index | KEEP _index, id*"); + result = runEsql(builder); + assertMap( + result, + matchesMap().entry( + "columns", + matchesList().item(matchesMap().entry("name", "_index").entry("type", "keyword")) + .item(matchesMap().entry("name", "id1").entry("type", "integer")) + ).entry("values", allOf(instanceOf(List.class), hasSize(docsTest1))).entry("took", greaterThanOrEqualTo(0)) + ); + @SuppressWarnings("unchecked") + var values = (List>) result.get("values"); + for (List row : values) { + assertThat(row.get(0), equalTo("test1")); + assertThat(row.get(1), instanceOf(Integer.class)); + } + } + + public void testFieldExistsFilter_With_ExplicitUseOfDiscardedIndexFields() throws IOException { + int docsTest1 = randomIntBetween(1, 5); + int docsTest2 = randomIntBetween(0, 5); + indexTimestampData(docsTest1, "test1", "2024-11-26", "id1"); + indexTimestampData(docsTest2, "test2", "2023-11-26", "id2"); + + // test2 is explicitly used in a query with "SORT id2" even if the index filter should discard test2 + RestEsqlTestCase.RequestObjectBuilder builder = existsFilter("id1").query( + "FROM test* METADATA _index | SORT id2 | KEEP _index, id*" + ); + Map result = runEsql(builder); + assertMap( + result, + matchesMap().entry( + "columns", + matchesList().item(matchesMap().entry("name", "_index").entry("type", "keyword")) + .item(matchesMap().entry("name", "id1").entry("type", "integer")) + .item(matchesMap().entry("name", "id2").entry("type", "integer")) + ).entry("values", allOf(instanceOf(List.class), hasSize(docsTest1))).entry("took", greaterThanOrEqualTo(0)) + ); + @SuppressWarnings("unchecked") + var values = (List>) result.get("values"); + for (List row : values) { + assertThat(row.get(0), equalTo("test1")); + assertThat(row.get(1), instanceOf(Integer.class)); + assertThat(row.get(2), nullValue()); + } + } + + public void testFieldNameTypo() throws IOException { + int docsTest1 = randomIntBetween(0, 5); + int docsTest2 = randomIntBetween(0, 5); + indexTimestampData(docsTest1, "test1", "2024-11-26", "id1"); + indexTimestampData(docsTest2, "test2", "2023-11-26", "id2"); + + // idx field name is explicitly used, though it doesn't exist in any of the indices. First test - without filter + ResponseException e = expectThrows( + ResponseException.class, + () -> runEsql(requestObjectBuilder().query("FROM test* | WHERE idx == 123")) + ); + assertEquals(400, e.getResponse().getStatusLine().getStatusCode()); + assertThat(e.getMessage(), containsString("verification_exception")); + assertThat(e.getMessage(), containsString("Found 1 problem")); + assertThat(e.getMessage(), containsString("line 1:20: Unknown column [idx]")); + + e = expectThrows(ResponseException.class, () -> runEsql(requestObjectBuilder().query("FROM test1 | WHERE idx == 123"))); + assertEquals(400, e.getResponse().getStatusLine().getStatusCode()); + assertThat(e.getMessage(), containsString("verification_exception")); + assertThat(e.getMessage(), containsString("Found 1 problem")); + assertThat(e.getMessage(), containsString("line 1:20: Unknown column [idx]")); + + e = expectThrows( + ResponseException.class, + () -> runEsql(timestampFilter("gte", "2020-01-01").query("FROM test* | WHERE idx == 123")) + ); + assertEquals(400, e.getResponse().getStatusLine().getStatusCode()); + assertThat(e.getMessage(), containsString("Found 1 problem")); + assertThat(e.getMessage(), containsString("line 1:20: Unknown column [idx]")); + + e = expectThrows( + ResponseException.class, + () -> runEsql(timestampFilter("gte", "2020-01-01").query("FROM test2 | WHERE idx == 123")) + ); + assertEquals(400, e.getResponse().getStatusLine().getStatusCode()); + assertThat(e.getMessage(), containsString("Found 1 problem")); + assertThat(e.getMessage(), containsString("line 1:20: Unknown column [idx]")); + } + + public void testIndicesDontExist() throws IOException { + int docsTest1 = 0; // we are interested only in the created index, not necessarily that it has data + indexTimestampData(docsTest1, "test1", "2024-11-26", "id1"); + + ResponseException e = expectThrows(ResponseException.class, () -> runEsql(timestampFilter("gte", "2020-01-01").query("FROM foo"))); + assertEquals(400, e.getResponse().getStatusLine().getStatusCode()); + assertThat(e.getMessage(), containsString("verification_exception")); + assertThat(e.getMessage(), containsString("Unknown index [foo]")); + + e = expectThrows(ResponseException.class, () -> runEsql(timestampFilter("gte", "2020-01-01").query("FROM foo*"))); + assertEquals(400, e.getResponse().getStatusLine().getStatusCode()); + assertThat(e.getMessage(), containsString("verification_exception")); + assertThat(e.getMessage(), containsString("Unknown index [foo*]")); + + e = expectThrows(ResponseException.class, () -> runEsql(timestampFilter("gte", "2020-01-01").query("FROM foo,test1"))); + assertEquals(404, e.getResponse().getStatusLine().getStatusCode()); + assertThat(e.getMessage(), containsString("index_not_found_exception")); + assertThat(e.getMessage(), containsString("no such index [foo]")); + } + + private static RestEsqlTestCase.RequestObjectBuilder timestampFilter(String op, String date) throws IOException { + return requestObjectBuilder().filter(b -> { + b.startObject("range"); + { + b.startObject("@timestamp").field(op, date).endObject(); + } + b.endObject(); + }); + } + + private static RestEsqlTestCase.RequestObjectBuilder existsFilter(String field) throws IOException { + return requestObjectBuilder().filter(b -> b.startObject("exists").field("field", field).endObject()); + } + + public Map runEsql(RestEsqlTestCase.RequestObjectBuilder requestObject) throws IOException { + return RestEsqlTestCase.runEsql(requestObject, new AssertWarnings.NoWarnings(), RestEsqlTestCase.Mode.SYNC); + } + + protected void indexTimestampData(int docs, String indexName, String date, String differentiatorFieldName) throws IOException { + Request createIndex = new Request("PUT", indexName); + createIndex.setJsonEntity(""" + { + "settings": { + "index": { + "number_of_shards": 3 + } + }, + "mappings": { + "properties": { + "@timestamp": { + "type": "date" + }, + "%differentiator_field_name%": { + "type": "integer" + } + } + } + }""".replace("%differentiator_field_name%", differentiatorFieldName)); + Response response = client().performRequest(createIndex); + assertThat( + entityToMap(response.getEntity(), XContentType.JSON), + matchesMap().entry("shards_acknowledged", true).entry("index", indexName).entry("acknowledged", true) + ); + + if (docs > 0) { + StringBuilder b = new StringBuilder(); + for (int i = 0; i < docs; i++) { + b.append(String.format(Locale.ROOT, """ + {"create":{"_index":"%s"}} + {"@timestamp":"%s","value":%d,"%s":%d} + """, indexName, date, i, differentiatorFieldName, i)); + } + Request bulk = new Request("POST", "/_bulk"); + bulk.addParameter("refresh", "true"); + bulk.addParameter("filter_path", "errors"); + bulk.setJsonEntity(b.toString()); + response = client().performRequest(bulk); + Assert.assertEquals("{\"errors\":false}", EntityUtils.toString(response.getEntity(), StandardCharsets.UTF_8)); + } + } +} diff --git a/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/RestEnrichTestCase.java b/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/RestEnrichTestCase.java index def6491fb920f..bf4a4400e13cf 100644 --- a/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/RestEnrichTestCase.java +++ b/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/RestEnrichTestCase.java @@ -12,7 +12,9 @@ import org.apache.http.util.EntityUtils; import org.elasticsearch.client.Request; import org.elasticsearch.client.ResponseException; +import org.elasticsearch.core.CheckedConsumer; import org.elasticsearch.test.rest.ESRestTestCase; +import org.elasticsearch.xcontent.XContentBuilder; import org.junit.After; import org.junit.Before; @@ -29,7 +31,6 @@ public abstract class RestEnrichTestCase extends ESRestTestCase { private static final String sourceIndexName = "countries"; - private static final String testIndexName = "test"; private static final String policyName = "countries"; public enum Mode { @@ -56,7 +57,7 @@ public void assertRequestBreakerEmpty() throws Exception { @Before public void loadTestData() throws IOException { - Request request = new Request("PUT", "/" + testIndexName); + Request request = new Request("PUT", "/test1"); request.setJsonEntity(""" { "mappings": { @@ -72,7 +73,7 @@ public void loadTestData() throws IOException { }"""); assertEquals(200, client().performRequest(request).getStatusLine().getStatusCode()); - request = new Request("POST", "/" + testIndexName + "/_bulk"); + request = new Request("POST", "/test1/_bulk"); request.addParameter("refresh", "true"); request.setJsonEntity(""" { "index": {"_id": 1} } @@ -84,6 +85,34 @@ public void loadTestData() throws IOException { """); assertEquals(200, client().performRequest(request).getStatusLine().getStatusCode()); + request = new Request("PUT", "/test2"); + request.setJsonEntity(""" + { + "mappings": { + "properties": { + "geo.dest": { + "type": "keyword" + }, + "country_number": { + "type": "long" + } + } + } + }"""); + assertEquals(200, client().performRequest(request).getStatusLine().getStatusCode()); + + request = new Request("POST", "/test2/_bulk"); + request.addParameter("refresh", "true"); + request.setJsonEntity(""" + { "index": {"_id": 1} } + { "geo.dest": "IN", "country_number": 2 } + { "index": {"_id": 2} } + { "geo.dest": "IN", "country_number": 2 } + { "index": {"_id": 3} } + { "geo.dest": "US", "country_number": 3 } + """); + assertEquals(200, client().performRequest(request).getStatusLine().getStatusCode()); + request = new Request("PUT", "/" + sourceIndexName); request.setJsonEntity(""" { @@ -131,7 +160,7 @@ public void loadTestData() throws IOException { @After public void wipeTestData() throws IOException { try { - var response = client().performRequest(new Request("DELETE", "/" + testIndexName)); + var response = client().performRequest(new Request("DELETE", "/test1,test2")); assertEquals(200, response.getStatusLine().getStatusCode()); response = client().performRequest(new Request("DELETE", "/" + sourceIndexName)); assertEquals(200, response.getStatusLine().getStatusCode()); @@ -143,7 +172,7 @@ public void wipeTestData() throws IOException { } public void testNonExistentEnrichPolicy() throws IOException { - ResponseException re = expectThrows(ResponseException.class, () -> runEsql("from test | enrich countris", Mode.SYNC)); + ResponseException re = expectThrows(ResponseException.class, () -> runEsql("from test1 | enrich countris", null, Mode.SYNC)); assertThat( EntityUtils.toString(re.getResponse().getEntity()), containsString("cannot find enrich policy [countris], did you mean [countries]?") @@ -151,7 +180,10 @@ public void testNonExistentEnrichPolicy() throws IOException { } public void testNonExistentEnrichPolicy_KeepField() throws IOException { - ResponseException re = expectThrows(ResponseException.class, () -> runEsql("from test | enrich countris | keep number", Mode.SYNC)); + ResponseException re = expectThrows( + ResponseException.class, + () -> runEsql("from test1 | enrich countris | keep number", null, Mode.SYNC) + ); assertThat( EntityUtils.toString(re.getResponse().getEntity()), containsString("cannot find enrich policy [countris], did you mean [countries]?") @@ -159,25 +191,147 @@ public void testNonExistentEnrichPolicy_KeepField() throws IOException { } public void testMatchField_ImplicitFieldsList() throws IOException { - Map result = runEsql("from test | enrich countries | keep number | sort number"); + Map result = runEsql("from test1 | enrich countries | keep number | sort number"); var columns = List.of(Map.of("name", "number", "type", "long")); var values = List.of(List.of(1000), List.of(1000), List.of(5000)); assertMap(result, matchesMap().entry("columns", columns).entry("values", values).entry("took", greaterThanOrEqualTo(0))); } public void testMatchField_ImplicitFieldsList_WithStats() throws IOException { - Map result = runEsql("from test | enrich countries | stats s = sum(number) by country_name"); + Map result = runEsql("from test1 | enrich countries | stats s = sum(number) by country_name"); var columns = List.of(Map.of("name", "s", "type", "long"), Map.of("name", "country_name", "type", "keyword")); var values = List.of(List.of(2000, "United States of America"), List.of(5000, "China")); assertMap(result, matchesMap().entry("columns", columns).entry("values", values).entry("took", greaterThanOrEqualTo(0))); } + public void testSimpleIndexFilteringWithEnrich() throws IOException { + // no filter + Map result = runEsql(""" + from test* metadata _index + | enrich countries + | keep *number, geo.dest, _index + | sort geo.dest, _index + """); + var columns = List.of( + Map.of("name", "country_number", "type", "long"), + Map.of("name", "number", "type", "long"), + Map.of("name", "geo.dest", "type", "keyword"), + Map.of("name", "_index", "type", "keyword") + ); + var values = List.of( + Arrays.asList(null, 5000, "CN", "test1"), + Arrays.asList(2, null, "IN", "test2"), + Arrays.asList(2, null, "IN", "test2"), + Arrays.asList(null, 1000, "US", "test1"), + Arrays.asList(null, 1000, "US", "test1"), + Arrays.asList(3, null, "US", "test2") + ); + assertMap(result, matchesMap().entry("columns", columns).entry("values", values).entry("took", greaterThanOrEqualTo(0))); + + // filter something that won't affect the columns + result = runEsql(""" + from test* metadata _index + | enrich countries + | keep *number, geo.dest, _index + | sort geo.dest, _index + """, b -> b.startObject("exists").field("field", "foobar").endObject()); + assertMap(result, matchesMap().entry("columns", columns).entry("values", List.of()).entry("took", greaterThanOrEqualTo(0))); + } + + public void testIndexFilteringWithEnrich_RemoveOneIndex() throws IOException { + // filter out test2 but specifically use one of its fields in the query (country_number) + Map result = runEsql(""" + from test* metadata _index + | enrich countries + | keep country_number, number, geo.dest, _index + | sort geo.dest, _index + """, b -> b.startObject("exists").field("field", "number").endObject()); + + var columns = List.of( + Map.of("name", "country_number", "type", "long"), + Map.of("name", "number", "type", "long"), + Map.of("name", "geo.dest", "type", "keyword"), + Map.of("name", "_index", "type", "keyword") + ); + var values = List.of( + Arrays.asList(null, 5000, "CN", "test1"), + Arrays.asList(null, 1000, "US", "test1"), + Arrays.asList(null, 1000, "US", "test1") + ); + + assertMap(result, matchesMap().entry("columns", columns).entry("values", values).entry("took", greaterThanOrEqualTo(0))); + + // filter out test2 and use a wildcarded field name in the "keep" command + result = runEsql(""" + from test* metadata _index + | enrich countries + | keep *number, geo.dest, _index + | sort geo.dest, _index + """, b -> b.startObject("exists").field("field", "number").endObject()); + + columns = List.of( + Map.of("name", "number", "type", "long"), + Map.of("name", "geo.dest", "type", "keyword"), + Map.of("name", "_index", "type", "keyword") + ); + values = List.of(Arrays.asList(5000, "CN", "test1"), Arrays.asList(1000, "US", "test1"), Arrays.asList(1000, "US", "test1")); + assertMap(result, matchesMap().entry("columns", columns).entry("values", values).entry("took", greaterThanOrEqualTo(0))); + } + + public void testIndexFilteringWithEnrich_ExpectException() throws IOException { + // no filter, just a simple query with "enrich" that should throw a valid VerificationException + ResponseException e = expectThrows(ResponseException.class, () -> runEsql(""" + from test* metadata _index + | enrich countries + | where foobar == 123 + """)); + assertEquals(400, e.getResponse().getStatusLine().getStatusCode()); + assertThat(e.getMessage(), containsString("Found 1 problem")); + assertThat(e.getMessage(), containsString("line 3:13: Unknown column [foobar]")); + + // same query, but with a filter this time + e = expectThrows(ResponseException.class, () -> runEsql(""" + from test* metadata _index + | enrich countries + | where foobar == 123 + """, b -> b.startObject("exists").field("field", "number").endObject())); + assertEquals(400, e.getResponse().getStatusLine().getStatusCode()); + assertThat(e.getMessage(), containsString("Found 1 problem")); + assertThat(e.getMessage(), containsString("line 3:13: Unknown column [foobar]")); + } + + public void testIndexFilteringWithEnrich_FilterUnusedIndexFields() throws IOException { + // filter out "test1". The field that is specific to "test1" ("number") is not actually used in the query + Map result = runEsql(""" + from test* metadata _index + | enrich countries + | keep country_number, geo.dest, _index + | sort geo.dest, _index + """, b -> b.startObject("exists").field("field", "country_number").endObject()); + + var columns = List.of( + Map.of("name", "country_number", "type", "long"), + Map.of("name", "geo.dest", "type", "keyword"), + Map.of("name", "_index", "type", "keyword") + ); + var values = List.of(Arrays.asList(2, "IN", "test2"), Arrays.asList(2, "IN", "test2"), Arrays.asList(3, "US", "test2")); + assertMap(result, matchesMap().entry("columns", columns).entry("values", values).entry("took", greaterThanOrEqualTo(0))); + } + private Map runEsql(String query) throws IOException { - return runEsql(query, mode); + return runEsql(query, null, mode); } - private Map runEsql(String query, Mode mode) throws IOException { - var requestObject = new RestEsqlTestCase.RequestObjectBuilder().query(query); + private Map runEsql(String query, CheckedConsumer filter) throws IOException { + return runEsql(query, filter, mode); + } + + private Map runEsql(String query, CheckedConsumer filter, Mode mode) throws IOException { + var requestObject = new RestEsqlTestCase.RequestObjectBuilder(); + if (filter != null) { + requestObject.filter(filter); + } + requestObject.query(query); if (mode == Mode.ASYNC) { return RestEsqlTestCase.runEsqlAsync(requestObject); } else { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/enrich/EnrichPolicyResolver.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/enrich/EnrichPolicyResolver.java index c8a7a6bcc4e98..c8e993b7dbf0b 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/enrich/EnrichPolicyResolver.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/enrich/EnrichPolicyResolver.java @@ -411,7 +411,7 @@ public void messageReceived(LookupRequest request, TransportChannel channel, Tas } try (ThreadContext.StoredContext ignored = threadContext.stashWithOrigin(ClientHelper.ENRICH_ORIGIN)) { String indexName = EnrichPolicy.getBaseName(policyName); - indexResolver.resolveAsMergedMapping(indexName, IndexResolver.ALL_FIELDS, refs.acquire(indexResult -> { + indexResolver.resolveAsMergedMapping(indexName, IndexResolver.ALL_FIELDS, null, refs.acquire(indexResult -> { if (indexResult.isValid() && indexResult.get().concreteIndices().size() == 1) { EsIndex esIndex = indexResult.get(); var concreteIndices = Map.of(request.clusterAlias, Iterables.get(esIndex.concreteIndices(), 0)); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java index 3d1ed8f70eae0..71fba5683644d 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java @@ -11,6 +11,7 @@ import org.elasticsearch.action.OriginalIndices; import org.elasticsearch.action.search.ShardSearchFailure; import org.elasticsearch.action.support.IndicesOptions; +import org.elasticsearch.action.support.SubscribableListener; import org.elasticsearch.common.Strings; import org.elasticsearch.common.TriFunction; import org.elasticsearch.common.collect.Iterators; @@ -25,6 +26,7 @@ import org.elasticsearch.indices.IndicesExpressionGrouper; import org.elasticsearch.logging.LogManager; import org.elasticsearch.logging.Logger; +import org.elasticsearch.xpack.esql.VerificationException; import org.elasticsearch.xpack.esql.action.EsqlExecutionInfo; import org.elasticsearch.xpack.esql.action.EsqlQueryRequest; import org.elasticsearch.xpack.esql.analysis.Analyzer; @@ -151,6 +153,7 @@ public void execute(EsqlQueryRequest request, EsqlExecutionInfo executionInfo, P analyzedPlan( parse(request.query(), request.params()), executionInfo, + request.filter(), new EsqlSessionCCSUtils.CssPartialErrorsActionListener(executionInfo, listener) { @Override public void onResponse(LogicalPlan analyzedPlan) { @@ -268,31 +271,28 @@ private LogicalPlan parse(String query, QueryParams params) { return parsed; } - public void analyzedPlan(LogicalPlan parsed, EsqlExecutionInfo executionInfo, ActionListener listener) { + public void analyzedPlan( + LogicalPlan parsed, + EsqlExecutionInfo executionInfo, + QueryBuilder requestFilter, + ActionListener logicalPlanListener + ) { if (parsed.analyzed()) { - listener.onResponse(parsed); + logicalPlanListener.onResponse(parsed); return; } - preAnalyze(parsed, executionInfo, (indices, lookupIndices, policies) -> { + TriFunction analyzeAction = (indices, lookupIndices, policies) -> { planningMetrics.gatherPreAnalysisMetrics(parsed); Analyzer analyzer = new Analyzer( new AnalyzerContext(configuration, functionRegistry, indices, lookupIndices, policies), verifier ); - var plan = analyzer.analyze(parsed); + LogicalPlan plan = analyzer.analyze(parsed); plan.setAnalyzed(); - LOGGER.debug("Analyzed plan:\n{}", plan); return plan; - }, listener); - } + }; - private void preAnalyze( - LogicalPlan parsed, - EsqlExecutionInfo executionInfo, - TriFunction action, - ActionListener listener - ) { PreAnalyzer.PreAnalysis preAnalysis = preAnalyzer.preAnalyze(parsed); var unresolvedPolicies = preAnalysis.enriches.stream() .map(e -> new EnrichPolicyResolver.UnresolvedPolicy((String) e.policyName().fold(), e.mode())) @@ -302,81 +302,113 @@ private void preAnalyze( final Set targetClusters = enrichPolicyResolver.groupIndicesPerCluster( indices.stream().flatMap(t -> Arrays.stream(Strings.commaDelimitedListToStringArray(t.id().index()))).toArray(String[]::new) ).keySet(); - enrichPolicyResolver.resolvePolicies(targetClusters, unresolvedPolicies, listener.delegateFailureAndWrap((l, enrichResolution) -> { - // first we need the match_fields names from enrich policies and THEN, with an updated list of fields, we call field_caps API - var enrichMatchFields = enrichResolution.resolvedEnrichPolicies() - .stream() - .map(ResolvedEnrichPolicy::matchField) - .collect(Collectors.toSet()); - // get the field names from the parsed plan combined with the ENRICH match fields from the ENRICH policy - var fieldNames = fieldNames(parsed, enrichMatchFields); - // First resolve the lookup indices, then the main indices - preAnalyzeLookupIndices( - preAnalysis.lookupIndices, + + SubscribableListener.newForked(l -> enrichPolicyResolver.resolvePolicies(targetClusters, unresolvedPolicies, l)) + .andThen((l, enrichResolution) -> { + // we need the match_fields names from enrich policies and THEN, with an updated list of fields, we call field_caps API + var enrichMatchFields = enrichResolution.resolvedEnrichPolicies() + .stream() + .map(ResolvedEnrichPolicy::matchField) + .collect(Collectors.toSet()); + // get the field names from the parsed plan combined with the ENRICH match fields from the ENRICH policy + var fieldNames = fieldNames(parsed, enrichMatchFields); + ListenerResult listenerResult = new ListenerResult(null, null, enrichResolution, fieldNames); + + // first resolve the lookup indices, then the main indices + preAnalyzeLookupIndices(preAnalysis.lookupIndices, listenerResult, l); + }) + .andThen((l, listenerResult) -> { + // resolve the main indices + preAnalyzeIndices(preAnalysis.indices, executionInfo, listenerResult, requestFilter, l); + }) + .andThen((l, listenerResult) -> { + // TODO in follow-PR (for skip_unavailable handling of missing concrete indexes) add some tests for + // invalid index resolution to updateExecutionInfo + if (listenerResult.indices.isValid()) { + // CCS indices and skip_unavailable cluster values can stop the analysis right here + if (analyzeCCSIndices(executionInfo, targetClusters, unresolvedPolicies, listenerResult, logicalPlanListener, l)) + return; + } + // whatever tuple we have here (from CCS-special handling or from the original pre-analysis), pass it on to the next step + l.onResponse(listenerResult); + }) + .andThen((l, listenerResult) -> { + // first attempt (maybe the only one) at analyzing the plan + analyzeAndMaybeRetry(analyzeAction, requestFilter, listenerResult, logicalPlanListener, l); + }) + .andThen((l, listenerResult) -> { + assert requestFilter != null : "The second pre-analysis shouldn't take place when there is no index filter in the request"; + + // "reset" execution information for all ccs or non-ccs (local) clusters, since we are performing the indices + // resolving one more time (the first attempt failed and the query had a filter) + for (String clusterAlias : executionInfo.clusterAliases()) { + executionInfo.swapCluster(clusterAlias, (k, v) -> null); + } + + // here the requestFilter is set to null, performing the pre-analysis after the first step failed + preAnalyzeIndices(preAnalysis.indices, executionInfo, listenerResult, null, l); + }) + .andThen((l, listenerResult) -> { + assert requestFilter != null : "The second analysis shouldn't take place when there is no index filter in the request"; + LOGGER.debug("Analyzing the plan (second attempt, without filter)"); + LogicalPlan plan; + try { + plan = analyzeAction.apply(listenerResult.indices, listenerResult.lookupIndices, listenerResult.enrichResolution); + } catch (Exception e) { + l.onFailure(e); + return; + } + LOGGER.debug("Analyzed plan (second attempt, without filter):\n{}", plan); + l.onResponse(plan); + }) + .addListener(logicalPlanListener); + } + + private void preAnalyzeLookupIndices(List indices, ListenerResult listenerResult, ActionListener listener) { + if (indices.size() > 1) { + // Note: JOINs on more than one index are not yet supported + listener.onFailure(new MappingException("More than one LOOKUP JOIN is not supported")); + } else if (indices.size() == 1) { + TableInfo tableInfo = indices.get(0); + TableIdentifier table = tableInfo.id(); + // call the EsqlResolveFieldsAction (field-caps) to resolve indices and get field types + indexResolver.resolveAsMergedMapping( + table.index(), Set.of("*"), // Current LOOKUP JOIN syntax does not allow for field selection - l.delegateFailureAndWrap( - (lx, lookupIndexResolution) -> preAnalyzeIndices( - indices, - executionInfo, - enrichResolution.getUnavailableClusters(), - fieldNames, - lx.delegateFailureAndWrap((ll, indexResolution) -> { - // TODO in follow-PR (for skip_unavailble handling of missing concrete indexes) add some tests for invalid - // index resolution to updateExecutionInfo - if (indexResolution.isValid()) { - EsqlSessionCCSUtils.updateExecutionInfoWithClustersWithNoMatchingIndices(executionInfo, indexResolution); - EsqlSessionCCSUtils.updateExecutionInfoWithUnavailableClusters( - executionInfo, - indexResolution.unavailableClusters() - ); - if (executionInfo.isCrossClusterSearch() - && executionInfo.getClusterStateCount(EsqlExecutionInfo.Cluster.Status.RUNNING) == 0) { - // for a CCS, if all clusters have been marked as SKIPPED, nothing to search so send a sentinel - // Exception to let the LogicalPlanActionListener decide how to proceed - ll.onFailure(new NoClustersToSearchException()); - return; - } - - Set newClusters = enrichPolicyResolver.groupIndicesPerCluster( - indexResolution.get().concreteIndices().toArray(String[]::new) - ).keySet(); - // If new clusters appear when resolving the main indices, we need to resolve the enrich policies again - // or exclude main concrete indices. Since this is rare, it's simpler to resolve the enrich policies - // again. - // TODO: add a test for this - if (targetClusters.containsAll(newClusters) == false - // do not bother with a re-resolution if only remotes were requested and all were offline - && executionInfo.getClusterStateCount(EsqlExecutionInfo.Cluster.Status.RUNNING) > 0) { - enrichPolicyResolver.resolvePolicies( - newClusters, - unresolvedPolicies, - ll.map( - newEnrichResolution -> action.apply(indexResolution, lookupIndexResolution, newEnrichResolution) - ) - ); - return; - } - } - ll.onResponse(action.apply(indexResolution, lookupIndexResolution, enrichResolution)); - }) - ) - ) + null, + listener.map(indexResolution -> listenerResult.withLookupIndexResolution(indexResolution)) ); - })); + } else { + try { + // No lookup indices specified + listener.onResponse( + new ListenerResult( + listenerResult.indices, + IndexResolution.invalid("[none specified]"), + listenerResult.enrichResolution, + listenerResult.fieldNames + ) + ); + } catch (Exception ex) { + listener.onFailure(ex); + } + } } private void preAnalyzeIndices( List indices, EsqlExecutionInfo executionInfo, - Map unavailableClusters, // known to be unavailable from the enrich policy API call - Set fieldNames, - ActionListener listener + ListenerResult listenerResult, + QueryBuilder requestFilter, + ActionListener listener ) { // TODO we plan to support joins in the future when possible, but for now we'll just fail early if we see one if (indices.size() > 1) { // Note: JOINs are not supported but we detect them when listener.onFailure(new MappingException("Queries with multiple indices are not supported")); } else if (indices.size() == 1) { + // known to be unavailable from the enrich policy API call + Map unavailableClusters = listenerResult.enrichResolution.getUnavailableClusters(); TableInfo tableInfo = indices.get(0); TableIdentifier table = tableInfo.id(); @@ -409,38 +441,116 @@ private void preAnalyzeIndices( String indexExpressionToResolve = EsqlSessionCCSUtils.createIndexExpressionFromAvailableClusters(executionInfo); if (indexExpressionToResolve.isEmpty()) { // if this was a pure remote CCS request (no local indices) and all remotes are offline, return an empty IndexResolution - listener.onResponse(IndexResolution.valid(new EsIndex(table.index(), Map.of(), Map.of()))); + listener.onResponse( + new ListenerResult( + IndexResolution.valid(new EsIndex(table.index(), Map.of(), Map.of())), + listenerResult.lookupIndices, + listenerResult.enrichResolution, + listenerResult.fieldNames + ) + ); } else { // call the EsqlResolveFieldsAction (field-caps) to resolve indices and get field types - indexResolver.resolveAsMergedMapping(indexExpressionToResolve, fieldNames, listener); + indexResolver.resolveAsMergedMapping( + indexExpressionToResolve, + listenerResult.fieldNames, + requestFilter, + listener.map(indexResolution -> listenerResult.withIndexResolution(indexResolution)) + ); } } else { try { // occurs when dealing with local relations (row a = 1) - listener.onResponse(IndexResolution.invalid("[none specified]")); + listener.onResponse( + new ListenerResult( + IndexResolution.invalid("[none specified]"), + listenerResult.lookupIndices, + listenerResult.enrichResolution, + listenerResult.fieldNames + ) + ); } catch (Exception ex) { listener.onFailure(ex); } } } - private void preAnalyzeLookupIndices(List indices, Set fieldNames, ActionListener listener) { - if (indices.size() > 1) { - // Note: JOINs on more than one index are not yet supported - listener.onFailure(new MappingException("More than one LOOKUP JOIN is not supported")); - } else if (indices.size() == 1) { - TableInfo tableInfo = indices.get(0); - TableIdentifier table = tableInfo.id(); - // call the EsqlResolveFieldsAction (field-caps) to resolve indices and get field types - indexResolver.resolveAsMergedMapping(table.index(), fieldNames, listener); - } else { - try { - // No lookup indices specified - listener.onResponse(IndexResolution.invalid("[none specified]")); - } catch (Exception ex) { - listener.onFailure(ex); + private boolean analyzeCCSIndices( + EsqlExecutionInfo executionInfo, + Set targetClusters, + Set unresolvedPolicies, + ListenerResult listenerResult, + ActionListener logicalPlanListener, + ActionListener l + ) { + IndexResolution indexResolution = listenerResult.indices; + EsqlSessionCCSUtils.updateExecutionInfoWithClustersWithNoMatchingIndices(executionInfo, indexResolution); + EsqlSessionCCSUtils.updateExecutionInfoWithUnavailableClusters(executionInfo, indexResolution.unavailableClusters()); + if (executionInfo.isCrossClusterSearch() && executionInfo.getClusterStateCount(EsqlExecutionInfo.Cluster.Status.RUNNING) == 0) { + // for a CCS, if all clusters have been marked as SKIPPED, nothing to search so send a sentinel Exception + // to let the LogicalPlanActionListener decide how to proceed + logicalPlanListener.onFailure(new NoClustersToSearchException()); + return true; + } + + Set newClusters = enrichPolicyResolver.groupIndicesPerCluster( + indexResolution.get().concreteIndices().toArray(String[]::new) + ).keySet(); + // If new clusters appear when resolving the main indices, we need to resolve the enrich policies again + // or exclude main concrete indices. Since this is rare, it's simpler to resolve the enrich policies again. + // TODO: add a test for this + if (targetClusters.containsAll(newClusters) == false + // do not bother with a re-resolution if only remotes were requested and all were offline + && executionInfo.getClusterStateCount(EsqlExecutionInfo.Cluster.Status.RUNNING) > 0) { + enrichPolicyResolver.resolvePolicies( + newClusters, + unresolvedPolicies, + l.map(enrichResolution -> listenerResult.withEnrichResolution(enrichResolution)) + ); + return true; + } + return false; + } + + private static void analyzeAndMaybeRetry( + TriFunction analyzeAction, + QueryBuilder requestFilter, + ListenerResult listenerResult, + ActionListener logicalPlanListener, + ActionListener l + ) { + LogicalPlan plan = null; + var filterPresentMessage = requestFilter == null ? "without" : "with"; + var attemptMessage = requestFilter == null ? "the only" : "first"; + LOGGER.debug("Analyzing the plan ({} attempt, {} filter)", attemptMessage, filterPresentMessage); + + try { + plan = analyzeAction.apply(listenerResult.indices, listenerResult.lookupIndices, listenerResult.enrichResolution); + } catch (Exception e) { + if (e instanceof VerificationException ve) { + LOGGER.debug( + "Analyzing the plan ({} attempt, {} filter) failed with {}", + attemptMessage, + filterPresentMessage, + ve.getDetailedMessage() + ); + if (requestFilter == null) { + // if the initial request didn't have a filter, then just pass the exception back to the user + logicalPlanListener.onFailure(ve); + } else { + // interested only in a VerificationException, but this time we are taking out the index filter + // to try and make the index resolution work without any index filtering. In the next step... to be continued + l.onResponse(listenerResult); + } + } else { + // if the query failed with any other type of exception, then just pass the exception back to the user + logicalPlanListener.onFailure(e); } + return; } + LOGGER.debug("Analyzed plan ({} attempt, {} filter):\n{}", attemptMessage, filterPresentMessage, plan); + // the analysis succeeded from the first attempt, irrespective if it had a filter or not, just continue with the planning + logicalPlanListener.onResponse(plan); } static Set fieldNames(LogicalPlan parsed, Set enrichPolicyMatchFields) { @@ -591,4 +701,23 @@ public PhysicalPlan optimizedPhysicalPlan(LogicalPlan optimizedPlan) { LOGGER.debug("Optimized physical plan:\n{}", plan); return plan; } + + private record ListenerResult( + IndexResolution indices, + IndexResolution lookupIndices, + EnrichResolution enrichResolution, + Set fieldNames + ) { + ListenerResult withEnrichResolution(EnrichResolution newEnrichResolution) { + return new ListenerResult(indices(), lookupIndices(), newEnrichResolution, fieldNames()); + } + + ListenerResult withIndexResolution(IndexResolution newIndexResolution) { + return new ListenerResult(newIndexResolution, lookupIndices(), enrichResolution(), fieldNames()); + } + + ListenerResult withLookupIndexResolution(IndexResolution newIndexResolution) { + return new ListenerResult(indices(), newIndexResolution, enrichResolution(), fieldNames()); + } + }; } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/IndexResolver.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/IndexResolver.java index f61be4b59830e..d000b2765e2b1 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/IndexResolver.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/IndexResolver.java @@ -18,6 +18,7 @@ import org.elasticsearch.common.util.Maps; import org.elasticsearch.index.IndexMode; import org.elasticsearch.index.mapper.TimeSeriesParams; +import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.xpack.esql.action.EsqlResolveFieldsAction; import org.elasticsearch.xpack.esql.core.type.DataType; @@ -76,10 +77,15 @@ public IndexResolver(Client client) { /** * Resolves a pattern to one (potentially compound meaning that spawns multiple indices) mapping. */ - public void resolveAsMergedMapping(String indexWildcard, Set fieldNames, ActionListener listener) { + public void resolveAsMergedMapping( + String indexWildcard, + Set fieldNames, + QueryBuilder requestFilter, + ActionListener listener + ) { client.execute( EsqlResolveFieldsAction.TYPE, - createFieldCapsRequest(indexWildcard, fieldNames), + createFieldCapsRequest(indexWildcard, fieldNames, requestFilter), listener.delegateFailureAndWrap((l, response) -> l.onResponse(mergedMappings(indexWildcard, response))) ); } @@ -252,10 +258,11 @@ private EsField conflictingMetricTypes(String name, String fullName, FieldCapabi return new InvalidMappedField(name, "mapped as different metric types in indices: " + indices); } - private static FieldCapabilitiesRequest createFieldCapsRequest(String index, Set fieldNames) { + private static FieldCapabilitiesRequest createFieldCapsRequest(String index, Set fieldNames, QueryBuilder requestFilter) { FieldCapabilitiesRequest req = new FieldCapabilitiesRequest().indices(Strings.commaDelimitedListToStringArray(index)); req.fields(fieldNames.toArray(String[]::new)); req.includeUnmapped(true); + req.indexFilter(requestFilter); // lenient because we throw our own errors looking at the response e.g. if something was not resolved // also because this way security doesn't throw authorization exceptions but rather honors ignore_unavailable req.indicesOptions(FIELD_CAPS_INDICES_OPTIONS); From f2addbc69a2aa7fb512c1d5ca9a839f5fc7f5134 Mon Sep 17 00:00:00 2001 From: Kostas Krikellas <131142368+kkrik-es@users.noreply.github.com> Date: Tue, 3 Dec 2024 20:10:30 +0200 Subject: [PATCH 139/139] Parse the contents of dynamic objects for [subobjects:false] (#117762) * Parse the contents of dynamic objects for [subobjects:false] * Update docs/changelog/117762.yaml * add tests * tests * test dynamic field * test dynamic field * fix tests --- docs/changelog/117762.yaml | 6 + .../test/search/330_fetch_fields.yml | 118 ++++++++++++++++++ .../index/mapper/DocumentParser.java | 7 +- .../index/mapper/MapperFeatures.java | 1 + .../index/mapper/DocumentParserTests.java | 63 ++++++++++ 5 files changed, 194 insertions(+), 1 deletion(-) create mode 100644 docs/changelog/117762.yaml diff --git a/docs/changelog/117762.yaml b/docs/changelog/117762.yaml new file mode 100644 index 0000000000000..123432e0f0507 --- /dev/null +++ b/docs/changelog/117762.yaml @@ -0,0 +1,6 @@ +pr: 117762 +summary: "Parse the contents of dynamic objects for [subobjects:false]" +area: Mapping +type: bug +issues: + - 117544 diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/330_fetch_fields.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/330_fetch_fields.yml index 8a8dffda69e20..44d966b76f34e 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/330_fetch_fields.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/330_fetch_fields.yml @@ -1177,3 +1177,121 @@ fetch geo_point: - is_false: hits.hits.0.fields.message - match: { hits.hits.0._source.message.foo: 10 } - match: { hits.hits.0._source.message.foo\.bar: 20 } + +--- +root with subobjects false and dynamic false: + - requires: + cluster_features: mapper.fix_parsing_subobjects_false_dynamic_false + reason: bug fix + + - do: + indices.create: + index: test + body: + mappings: + subobjects: false + dynamic: false + properties: + id: + type: integer + my.keyword.field: + type: keyword + + - do: + bulk: + index: test + refresh: true + body: + - '{ "index": { } }' + - '{ "id": 1, "my": { "keyword.field": "abc" } }' + - match: { errors: false } + + # indexing a dynamically-mapped field still fails (silently) + - do: + bulk: + index: test + refresh: true + body: + - '{ "index": { } }' + - '{ "id": 2, "my": { "random.field": "abc" } }' + - match: { errors: false } + + - do: + search: + index: test + body: + sort: id + fields: [ "*" ] + + - match: { hits.hits.0.fields: { my.keyword.field: [ abc ], id: [ 1 ] } } + - match: { hits.hits.1.fields: { id: [ 2 ] } } + + - do: + search: + index: test + body: + query: + match: + my.keyword.field: abc + + - match: { hits.total.value: 1 } + +--- +object with subobjects false and dynamic false: + - requires: + cluster_features: mapper.fix_parsing_subobjects_false_dynamic_false + reason: bug fix + + - do: + indices.create: + index: test + body: + mappings: + properties: + my: + subobjects: false + dynamic: false + properties: + id: + type: integer + nested.keyword.field: + type: keyword + + - do: + bulk: + index: test + refresh: true + body: + - '{ "index": { } }' + - '{ "id": 1, "my": { "nested": { "keyword.field": "abc" } } }' + - match: { errors: false } + + # indexing a dynamically-mapped field still fails (silently) + - do: + bulk: + index: test + refresh: true + body: + - '{ "index": { } }' + - '{ "id": 2, "my": { "nested": { "random.field": "abc" } } }' + - match: { errors: false } + + - do: + search: + index: test + body: + sort: id + fields: [ "*" ] + + - match: { hits.hits.0.fields: { my.nested.keyword.field: [ abc ], id: [ 1 ] } } + - match: { hits.hits.1.fields: { id: [ 2 ] } } + + - do: + search: + index: test + body: + query: + match: + my.nested.keyword.field: abc + + - match: { hits.total.value: 1 } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java index 82004356ceb57..e00e7b2320000 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java @@ -16,6 +16,7 @@ import org.elasticsearch.common.regex.Regex; import org.elasticsearch.common.xcontent.XContentHelper; import org.elasticsearch.core.Nullable; +import org.elasticsearch.features.NodeFeature; import org.elasticsearch.index.IndexVersion; import org.elasticsearch.index.IndexVersions; import org.elasticsearch.index.fielddata.FieldDataContext; @@ -53,6 +54,9 @@ public final class DocumentParser { public static final IndexVersion DYNAMICALLY_MAP_DENSE_VECTORS_INDEX_VERSION = IndexVersions.FIRST_DETACHED_INDEX_VERSION; + static final NodeFeature FIX_PARSING_SUBOBJECTS_FALSE_DYNAMIC_FALSE = new NodeFeature( + "mapper.fix_parsing_subobjects_false_dynamic_false" + ); private final XContentParserConfiguration parserConfiguration; private final MappingParserContext mappingParserContext; @@ -531,7 +535,8 @@ private static void doParseObject(DocumentParserContext context, String currentF private static void parseObjectDynamic(DocumentParserContext context, String currentFieldName) throws IOException { ensureNotStrict(context, currentFieldName); - if (context.dynamic() == ObjectMapper.Dynamic.FALSE) { + // For [subobjects:false], intermediate objects get flattened so we can't skip parsing children. + if (context.dynamic() == ObjectMapper.Dynamic.FALSE && context.parent().subobjects() != ObjectMapper.Subobjects.DISABLED) { failIfMatchesRoutingPath(context, currentFieldName); if (context.canAddIgnoredField()) { context.addIgnoredField( diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java b/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java index bf6c729f95653..ffb38d229078e 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java @@ -73,6 +73,7 @@ public Set getTestFeatures() { IgnoredSourceFieldMapper.IGNORED_SOURCE_AS_TOP_LEVEL_METADATA_ARRAY_FIELD, IgnoredSourceFieldMapper.ALWAYS_STORE_OBJECT_ARRAYS_IN_NESTED_OBJECTS, MapperService.LOGSDB_DEFAULT_IGNORE_DYNAMIC_BEYOND_LIMIT, + DocumentParser.FIX_PARSING_SUBOBJECTS_FALSE_DYNAMIC_FALSE, CONSTANT_KEYWORD_SYNTHETIC_SOURCE_WRITE_FIX, META_FETCH_FIELDS_ERROR_CODE_CHANGED ); diff --git a/server/src/test/java/org/elasticsearch/index/mapper/DocumentParserTests.java b/server/src/test/java/org/elasticsearch/index/mapper/DocumentParserTests.java index 09d57d0e34c3c..d128b25038a59 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/DocumentParserTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/DocumentParserTests.java @@ -2053,6 +2053,38 @@ public void testSubobjectsFalseWithInnerDottedObject() throws Exception { assertNotNull(doc.rootDoc().getField("metrics.service.test.with.dots.max")); } + public void testSubobjectsFalseWithInnerDottedObjectDynamicFalse() throws Exception { + DocumentMapper mapper = createDocumentMapper(mapping(b -> { + b.startObject("metrics").field("type", "object").field("subobjects", false).field("dynamic", randomFrom("false", "runtime")); + b.startObject("properties").startObject("service.test.with.dots").field("type", "keyword").endObject().endObject(); + b.endObject(); + })); + + ParsedDocument doc = mapper.parse(source(""" + { "metrics": { "service": { "test.with.dots": "foo" } } }""")); + assertNotNull(doc.rootDoc().getField("metrics.service.test.with.dots")); + + doc = mapper.parse(source(""" + { "metrics": { "service.test": { "with.dots": "foo" } } }""")); + assertNotNull(doc.rootDoc().getField("metrics.service.test.with.dots")); + + doc = mapper.parse(source(""" + { "metrics": { "service": { "test": { "with.dots": "foo" } } } }""")); + assertNotNull(doc.rootDoc().getField("metrics.service.test.with.dots")); + + doc = mapper.parse(source(""" + { "metrics": { "service": { "test.other.dots": "foo" } } }""")); + assertNull(doc.rootDoc().getField("metrics.service.test.other.dots")); + + doc = mapper.parse(source(""" + { "metrics": { "service.test": { "other.dots": "foo" } } }""")); + assertNull(doc.rootDoc().getField("metrics.service.test.other.dots")); + + doc = mapper.parse(source(""" + { "metrics": { "service": { "test": { "other.dots": "foo" } } } }""")); + assertNull(doc.rootDoc().getField("metrics.service.test.other.dots")); + } + public void testSubobjectsFalseRoot() throws Exception { DocumentMapper mapper = createDocumentMapper(mappingNoSubobjects(xContentBuilder -> {})); ParsedDocument doc = mapper.parse(source(""" @@ -2074,6 +2106,37 @@ public void testSubobjectsFalseRoot() throws Exception { assertNotNull(doc.rootDoc().getField("metrics.service.test.with.dots")); } + public void testSubobjectsFalseRootWithInnerDottedObjectDynamicFalse() throws Exception { + DocumentMapper mapper = createDocumentMapper(topMapping(b -> { + b.field("subobjects", false).field("dynamic", randomFrom("false", "runtime")); + b.startObject("properties").startObject("service.test.with.dots").field("type", "keyword").endObject().endObject(); + })); + + ParsedDocument doc = mapper.parse(source(""" + { "service": { "test.with.dots": "foo" } }""")); + assertNotNull(doc.rootDoc().getField("service.test.with.dots")); + + doc = mapper.parse(source(""" + { "service.test": { "with.dots": "foo" } }""")); + assertNotNull(doc.rootDoc().getField("service.test.with.dots")); + + doc = mapper.parse(source(""" + { "service": { "test": { "with.dots": "foo" } } }""")); + assertNotNull(doc.rootDoc().getField("service.test.with.dots")); + + doc = mapper.parse(source(""" + { "service": { "test.other.dots": "foo" } }""")); + assertNull(doc.rootDoc().getField("service.test.other.dots")); + + doc = mapper.parse(source(""" + { "service.test": { "other.dots": "foo" } }""")); + assertNull(doc.rootDoc().getField("service.test.other.dots")); + + doc = mapper.parse(source(""" + { "service": { "test": { "other.dots": "foo" } } }""")); + assertNull(doc.rootDoc().getField("service.test.other.dots")); + } + public void testSubobjectsFalseStructuredPath() throws Exception { DocumentMapper mapper = createDocumentMapper( mapping(b -> b.startObject("metrics.service").field("type", "object").field("subobjects", false).endObject())