Skip to content

Commit

Permalink
Apply the date histogram rewrite optimization to range aggregation (o…
Browse files Browse the repository at this point in the history
…pensearch-project#13865)

* Refactor the ranges representation

Signed-off-by: bowenlan-amzn <[email protected]>

* Refactor try fast filter

Signed-off-by: bowenlan-amzn <[email protected]>

* Main work finished; left the handling of different numeric data types

Signed-off-by: bowenlan-amzn <[email protected]>

* buildRanges accepts field type

Signed-off-by: bowenlan-amzn <[email protected]>

* first working draft probably

Signed-off-by: bowenlan-amzn <[email protected]>

* add change log

Signed-off-by: bowenlan-amzn <[email protected]>

* accommodate geo distance agg

Signed-off-by: bowenlan-amzn <[email protected]>

* Fix test

support all numeric types
minus one on the upper range

Signed-off-by: bowenlan-amzn <[email protected]>

* [Refactor] range is lower inclusive, right exclusive

Signed-off-by: bowenlan-amzn <[email protected]>

* adding test

Signed-off-by: bowenlan-amzn <[email protected]>

* Adding test and refactor

Signed-off-by: bowenlan-amzn <[email protected]>

* refactor

Signed-off-by: bowenlan-amzn <[email protected]>

* add test

Signed-off-by: bowenlan-amzn <[email protected]>

* add test and update the compare logic in tree traversal

Signed-off-by: bowenlan-amzn <[email protected]>

* fix test, add random test

Signed-off-by: bowenlan-amzn <[email protected]>

* refactor to address comments

Signed-off-by: bowenlan-amzn <[email protected]>

* small potential performance update

Signed-off-by: bowenlan-amzn <[email protected]>

* fix precommit

Signed-off-by: bowenlan-amzn <[email protected]>

* refactor

Signed-off-by: bowenlan-amzn <[email protected]>

* refactor

Signed-off-by: bowenlan-amzn <[email protected]>

* set refresh_interval to -1

Signed-off-by: bowenlan-amzn <[email protected]>

* address comment

Signed-off-by: bowenlan-amzn <[email protected]>

* address comment

Signed-off-by: bowenlan-amzn <[email protected]>

* address comment

Signed-off-by: bowenlan-amzn <[email protected]>

* Fix test

To understand fully about the double and bigdecimal usage in scaled float field will take more time.

Signed-off-by: bowenlan-amzn <[email protected]>

---------

Signed-off-by: bowenlan-amzn <[email protected]>
  • Loading branch information
bowenlan-amzn authored and harshavamsi committed Jul 12, 2024
1 parent 9a8a7f6 commit f2edd84
Show file tree
Hide file tree
Showing 16 changed files with 901 additions and 232 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import com.fasterxml.jackson.core.JsonParseException;

import org.apache.lucene.document.Field;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
Expand Down Expand Up @@ -165,7 +166,7 @@ public ScaledFloatFieldMapper build(BuilderContext context) {

public static final TypeParser PARSER = new TypeParser((n, c) -> new Builder(n, c.getSettings()));

public static final class ScaledFloatFieldType extends SimpleMappedFieldType {
public static final class ScaledFloatFieldType extends SimpleMappedFieldType implements NumericPointEncoder {

private final double scalingFactor;
private final Double nullValue;
Expand All @@ -188,6 +189,21 @@ public ScaledFloatFieldType(String name, double scalingFactor) {
this(name, true, false, true, Collections.emptyMap(), scalingFactor, null);
}

@Override
public byte[] encodePoint(Number value) {
assert value instanceof Double;
double doubleValue = (Double) value;
byte[] point = new byte[Long.BYTES];
if (doubleValue == Double.POSITIVE_INFINITY) {
LongPoint.encodeDimension(Long.MAX_VALUE, point, 0);
} else if (doubleValue == Double.NEGATIVE_INFINITY) {
LongPoint.encodeDimension(Long.MIN_VALUE, point, 0);
} else {
LongPoint.encodeDimension(Math.round(scale(value)), point, 0);
}
return point;
}

public double getScalingFactor() {
return scalingFactor;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ setup:
date:
type: date
format: epoch_second
scaled_field:
type: scaled_float
scaling_factor: 100

- do:
cluster.health:
Expand Down Expand Up @@ -528,3 +531,139 @@ setup:
- is_false: aggregations.unsigned_long_range.buckets.2.to

- match: { aggregations.unsigned_long_range.buckets.2.doc_count: 0 }

---
"Double range profiler shows filter rewrite info":
- skip:
version: " - 2.99.99"
reason: debug info for filter rewrite added in 3.0.0 (to be backported to 2.15.0)

- do:
indices.create:
index: test_profile
body:
settings:
number_of_replicas: 0
refresh_interval: -1
mappings:
properties:
ip:
type: ip
double:
type: double
date:
type: date
format: epoch_second

- do:
bulk:
index: test_profile
refresh: true
body:
- '{"index": {}}'
- '{"double" : 42}'
- '{"index": {}}'
- '{"double" : 100}'
- '{"index": {}}'
- '{"double" : 50}'

- do:
search:
index: test_profile
body:
size: 0
profile: true
aggs:
double_range:
range:
field: double
ranges:
- to: 50
- from: 50
to: 150
- from: 150

- length: { aggregations.double_range.buckets: 3 }

- match: { aggregations.double_range.buckets.0.key: "*-50.0" }
- is_false: aggregations.double_range.buckets.0.from
- match: { aggregations.double_range.buckets.0.to: 50.0 }
- match: { aggregations.double_range.buckets.0.doc_count: 1 }
- match: { aggregations.double_range.buckets.1.key: "50.0-150.0" }
- match: { aggregations.double_range.buckets.1.from: 50.0 }
- match: { aggregations.double_range.buckets.1.to: 150.0 }
- match: { aggregations.double_range.buckets.1.doc_count: 2 }
- match: { aggregations.double_range.buckets.2.key: "150.0-*" }
- match: { aggregations.double_range.buckets.2.from: 150.0 }
- is_false: aggregations.double_range.buckets.2.to
- match: { aggregations.double_range.buckets.2.doc_count: 0 }

- match: { profile.shards.0.aggregations.0.debug.optimized_segments: 1 }
- match: { profile.shards.0.aggregations.0.debug.unoptimized_segments: 0 }
- match: { profile.shards.0.aggregations.0.debug.leaf_visited: 1 }
- match: { profile.shards.0.aggregations.0.debug.inner_visited: 0 }

---
"Scaled Float Range Aggregation":
- do:
index:
index: test
id: 1
body: { "scaled_field": 1 }

- do:
index:
index: test
id: 2
body: { "scaled_field": 1.53 }

- do:
index:
index: test
id: 3
body: { "scaled_field": -2.1 }

- do:
index:
index: test
id: 4
body: { "scaled_field": 1.53 }

- do:
indices.refresh: { }

- do:
search:
index: test
body:
size: 0
aggs:
my_range:
range:
field: scaled_field
ranges:
- to: 0
- from: 0
to: 1
- from: 1
to: 1.5
- from: 1.5

- length: { aggregations.my_range.buckets: 4 }

- match: { aggregations.my_range.buckets.0.key: "*-0.0" }
- is_false: aggregations.my_range.buckets.0.from
- match: { aggregations.my_range.buckets.0.to: 0.0 }
- match: { aggregations.my_range.buckets.0.doc_count: 1 }
- match: { aggregations.my_range.buckets.1.key: "0.0-1.0" }
- match: { aggregations.my_range.buckets.1.from: 0.0 }
- match: { aggregations.my_range.buckets.1.to: 1.0 }
- match: { aggregations.my_range.buckets.1.doc_count: 0 }
- match: { aggregations.my_range.buckets.2.key: "1.0-1.5" }
- match: { aggregations.my_range.buckets.2.from: 1.0 }
- match: { aggregations.my_range.buckets.2.to: 1.5 }
- match: { aggregations.my_range.buckets.2.doc_count: 1 }
- match: { aggregations.my_range.buckets.3.key: "1.5-*" }
- match: { aggregations.my_range.buckets.3.from: 1.5 }
- is_false: aggregations.my_range.buckets.3.to
- match: { aggregations.my_range.buckets.3.doc_count: 2 }
Original file line number Diff line number Diff line change
Expand Up @@ -352,7 +352,7 @@ public DateFieldMapper build(BuilderContext context) {
*
* @opensearch.internal
*/
public static final class DateFieldType extends MappedFieldType {
public static final class DateFieldType extends MappedFieldType implements NumericPointEncoder {
protected final DateFormatter dateTimeFormatter;
protected final DateMathParser dateMathParser;
protected final Resolution resolution;
Expand Down Expand Up @@ -588,6 +588,13 @@ public static long parseToLong(
return resolution.convert(dateParser.parse(BytesRefs.toString(value), now, roundUp, zone));
}

@Override
public byte[] encodePoint(Number value) {
byte[] point = new byte[Long.BYTES];
LongPoint.encodeDimension(value.longValue(), point, 0);
return point;
}

@Override
public Query distanceFeatureQuery(Object origin, String pivot, float boost, QueryShardContext context) {
failIfNotIndexedAndNoDocValues();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ public NumberFieldMapper build(BuilderContext context) {
*
* @opensearch.internal
*/
public enum NumberType {
public enum NumberType implements NumericPointEncoder {
HALF_FLOAT("half_float", NumericType.HALF_FLOAT) {
@Override
public Float parse(Object value, boolean coerce) {
Expand All @@ -194,6 +194,13 @@ public Number parsePoint(byte[] value) {
return HalfFloatPoint.decodeDimension(value, 0);
}

@Override
public byte[] encodePoint(Number value) {
byte[] point = new byte[HalfFloatPoint.BYTES];
HalfFloatPoint.encodeDimension(value.floatValue(), point, 0);
return point;
}

@Override
public Float parse(XContentParser parser, boolean coerce) throws IOException {
float parsed = parser.floatValue(coerce);
Expand Down Expand Up @@ -331,6 +338,13 @@ public Number parsePoint(byte[] value) {
return FloatPoint.decodeDimension(value, 0);
}

@Override
public byte[] encodePoint(Number value) {
byte[] point = new byte[Float.BYTES];
FloatPoint.encodeDimension(value.floatValue(), point, 0);
return point;
}

@Override
public Float parse(XContentParser parser, boolean coerce) throws IOException {
float parsed = parser.floatValue(coerce);
Expand Down Expand Up @@ -457,6 +471,13 @@ public Number parsePoint(byte[] value) {
return DoublePoint.decodeDimension(value, 0);
}

@Override
public byte[] encodePoint(Number value) {
byte[] point = new byte[Double.BYTES];
DoublePoint.encodeDimension(value.doubleValue(), point, 0);
return point;
}

@Override
public Double parse(XContentParser parser, boolean coerce) throws IOException {
double parsed = parser.doubleValue(coerce);
Expand Down Expand Up @@ -582,6 +603,13 @@ public Number parsePoint(byte[] value) {
return INTEGER.parsePoint(value).byteValue();
}

@Override
public byte[] encodePoint(Number value) {
byte[] point = new byte[Integer.BYTES];
IntPoint.encodeDimension(value.intValue(), point, 0);
return point;
}

@Override
public Short parse(XContentParser parser, boolean coerce) throws IOException {
int value = parser.intValue(coerce);
Expand Down Expand Up @@ -654,6 +682,13 @@ public Number parsePoint(byte[] value) {
return INTEGER.parsePoint(value).shortValue();
}

@Override
public byte[] encodePoint(Number value) {
byte[] point = new byte[Integer.BYTES];
IntPoint.encodeDimension(value.intValue(), point, 0);
return point;
}

@Override
public Short parse(XContentParser parser, boolean coerce) throws IOException {
return parser.shortValue(coerce);
Expand Down Expand Up @@ -722,6 +757,13 @@ public Number parsePoint(byte[] value) {
return IntPoint.decodeDimension(value, 0);
}

@Override
public byte[] encodePoint(Number value) {
byte[] point = new byte[Integer.BYTES];
IntPoint.encodeDimension(value.intValue(), point, 0);
return point;
}

@Override
public Integer parse(XContentParser parser, boolean coerce) throws IOException {
return parser.intValue(coerce);
Expand Down Expand Up @@ -868,6 +910,13 @@ public Number parsePoint(byte[] value) {
return LongPoint.decodeDimension(value, 0);
}

@Override
public byte[] encodePoint(Number value) {
byte[] point = new byte[Long.BYTES];
LongPoint.encodeDimension(value.longValue(), point, 0);
return point;
}

@Override
public Long parse(XContentParser parser, boolean coerce) throws IOException {
return parser.longValue(coerce);
Expand Down Expand Up @@ -988,6 +1037,13 @@ public Number parsePoint(byte[] value) {
return BigIntegerPoint.decodeDimension(value, 0);
}

@Override
public byte[] encodePoint(Number value) {
byte[] point = new byte[BigIntegerPoint.BYTES];
BigIntegerPoint.encodeDimension(objectToUnsignedLong(value, false, true), point, 0);
return point;
}

@Override
public BigInteger parse(XContentParser parser, boolean coerce) throws IOException {
return parser.bigIntegerValue(coerce);
Expand Down Expand Up @@ -1215,16 +1271,30 @@ public static long objectToLong(Object value, boolean coerce) {
return Numbers.toLong(stringValue, coerce);
}

public static BigInteger objectToUnsignedLong(Object value, boolean coerce) {
return objectToUnsignedLong(value, coerce, false);
}

/**
* Converts and Object to a {@code long} by checking it against known
* Converts an Object to a {@code BigInteger} by checking it against known
* types and checking its range.
*
* @param lenientBound if true, use MIN or MAX if the value is out of bound
*/
public static BigInteger objectToUnsignedLong(Object value, boolean coerce) {
public static BigInteger objectToUnsignedLong(Object value, boolean coerce, boolean lenientBound) {
if (value instanceof Long) {
return Numbers.toUnsignedBigInteger(((Long) value).longValue());
}

double doubleValue = objectToDouble(value);
if (lenientBound) {
if (doubleValue < Numbers.MIN_UNSIGNED_LONG_VALUE.doubleValue()) {
return Numbers.MIN_UNSIGNED_LONG_VALUE;
}
if (doubleValue > Numbers.MAX_UNSIGNED_LONG_VALUE.doubleValue()) {
return Numbers.MAX_UNSIGNED_LONG_VALUE;
}
}
if (doubleValue < Numbers.MIN_UNSIGNED_LONG_VALUE.doubleValue()
|| doubleValue > Numbers.MAX_UNSIGNED_LONG_VALUE.doubleValue()) {
throw new IllegalArgumentException("Value [" + value + "] is out of range for an unsigned long");
Expand Down Expand Up @@ -1349,7 +1419,7 @@ public static Query unsignedLongRangeQuery(
*
* @opensearch.internal
*/
public static class NumberFieldType extends SimpleMappedFieldType {
public static class NumberFieldType extends SimpleMappedFieldType implements NumericPointEncoder {

private final NumberType type;
private final boolean coerce;
Expand Down Expand Up @@ -1394,6 +1464,10 @@ public String typeName() {
return type.name;
}

public NumberType numberType() {
return type;
}

public NumericType numericType() {
return type.numericType();
}
Expand Down Expand Up @@ -1501,6 +1575,11 @@ public DocValueFormat docValueFormat(String format, ZoneId timeZone) {
public Number parsePoint(byte[] value) {
return type.parsePoint(value);
}

@Override
public byte[] encodePoint(Number value) {
return type.encodePoint(value);
}
}

private final NumberType type;
Expand Down
Loading

0 comments on commit f2edd84

Please sign in to comment.