Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

OTF-2326 push updates for greater than #2

Open
wants to merge 4 commits into
base: feature/otf-1500_column_comparisons_1521
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -284,14 +284,6 @@ public <T> Boolean ltEq(BoundReference<T> ref, BoundReference<T> ref2) {
return ROWS_CANNOT_MATCH;
}

if (ref.type().typeId() != ref2.type().typeId()) {
return ROWS_MIGHT_MATCH;
}

if (checkLowerToUpperBounds(ref, ref2, id, id2, cmp -> cmp > 0)) {
Copy link
Author

@jenbaldwin jenbaldwin Dec 11, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

when data is equal the lower to upper bound checks return that rows cannot match when they can, removing checks. I have updated my test bteqs for this. These impact data pruning and not file pruning

return ROWS_CANNOT_MATCH;
}

return ROWS_MIGHT_MATCH;
}

@@ -331,10 +323,6 @@ public <T> Boolean gt(BoundReference<T> ref, BoundReference<T> ref2) {
return ROWS_MIGHT_MATCH;
}

if (checkUpperBounds(ref, ref2, id, id2, cmp -> cmp <= 0)) {
return ROWS_CANNOT_MATCH;
}

if (checkUpperToLowerBounds(ref, ref2, id, id2, cmp -> cmp <= 0)) {
return ROWS_CANNOT_MATCH;
}
@@ -374,18 +362,6 @@ public <T> Boolean gtEq(BoundReference<T> ref, BoundReference<T> ref2) {
return ROWS_CANNOT_MATCH;
}

if (ref.type().typeId() != ref2.type().typeId()) {
return ROWS_MIGHT_MATCH;
}

if (checkUpperBounds(ref, ref2, id, id2, cmp -> cmp < 0)) {
return ROWS_CANNOT_MATCH;
}

if (checkUpperToLowerBounds(ref, ref2, id, id2, cmp -> cmp < 0)) {
return ROWS_CANNOT_MATCH;
}

return ROWS_MIGHT_MATCH;
}

@@ -435,18 +411,6 @@ public <T> Boolean eq(BoundReference<T> ref, BoundReference<T> ref2) {
return ROWS_CANNOT_MATCH;
}

if (ref.type().typeId() != ref2.type().typeId()) {
return ROWS_MIGHT_MATCH;
}

if (checkLowerToUpperBounds(ref, ref2, id, id2, cmp -> cmp > 0)) {
return ROWS_CANNOT_MATCH;
}

if (checkUpperToLowerBounds(ref, ref2, id, id2, cmp -> cmp < 0)) {
return ROWS_CANNOT_MATCH;
}

return ROWS_MIGHT_MATCH;
}

@@ -464,25 +428,6 @@ public <T> Boolean notEq(BoundReference<T> ref, BoundReference<T> ref2) {
return ROWS_MIGHT_MATCH;
}

private <T> boolean checkUpperBounds(
BoundReference<T> ref,
BoundReference<T> ref2,
Integer id,
Integer id2,
java.util.function.Predicate<Integer> compare) {
if (upperBounds != null && upperBounds.containsKey(id) && upperBounds.containsKey(id2)) {
T upper = Conversions.fromByteBuffer(ref.type(), upperBounds.get(id));
T upper2 = Conversions.fromByteBuffer(ref2.type(), upperBounds.get(id2));

Comparator<Object> comparator = Comparators.forType(ref.type().asPrimitiveType());
int cmp = comparator.compare(upper, upper2);
if (compare.test(cmp)) {
return true;
}
}
return false;
}

private <T> boolean checkLowerToUpperBounds(
BoundReference<T> ref,
BoundReference<T> ref2,
Original file line number Diff line number Diff line change
@@ -329,16 +329,16 @@ public <T> Boolean gtEq(BoundReference<T> ref, BoundReference<T> ref2) {
int pos = Accessors.toPosition(ref.accessor());
int pos2 = Accessors.toPosition(ref2.accessor());
ByteBuffer upperBound = stats.get(pos).upperBound();
ByteBuffer upperBound2 = stats.get(pos2).upperBound();
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

correction specific to greater than or equals

if (upperBound == null || upperBound2 == null) {
ByteBuffer lowerBound = stats.get(pos2).lowerBound();
if (upperBound == null || lowerBound == null) {
return ROWS_CANNOT_MATCH; // values are all null
}

T upper = Conversions.fromByteBuffer(ref.type(), upperBound);
T upper2 = Conversions.fromByteBuffer(ref2.type(), upperBound2);
T lower = Conversions.fromByteBuffer(ref2.type(), lowerBound);

Comparator<Object> comparator = Comparators.forType(ref.type().asPrimitiveType());
int cmp = comparator.compare(upper, upper2);
int cmp = comparator.compare(upper, lower);
if (cmp < 0) {
return ROWS_CANNOT_MATCH;
}
Original file line number Diff line number Diff line change
@@ -553,8 +553,8 @@ SCHEMA, termPredicate(Expression.Operation.LT_EQ, "id", "id2"))
SCHEMA, termPredicate(Expression.Operation.LT_EQ, "id", "id2"))
.eval(FILE_3);
assertThat(shouldRead)
.as("Should not read: id range lower bound (30) is not below upper bound id range (25)")
.isFalse();
.as("Should not read: id range lower bound (30) can be equal to upper bound range (25)")
.isTrue();

shouldRead =
new InclusiveMetricsEvaluator(
@@ -655,9 +655,8 @@ SCHEMA, termPredicate(Expression.Operation.GT_EQ, "id", "id2"))
SCHEMA, termPredicate(Expression.Operation.GT_EQ, "id", "id2"))
.eval(FILE_2);
assertThat(shouldRead)
.as(
"Should not read: id range upper bound (40) is not greater than upper bound id2 range (80)")
.isFalse();
.as("Should read: id range upper bound (40) can be equal to upper range (80)")
.isTrue();

shouldRead =
new InclusiveMetricsEvaluator(
@@ -714,15 +713,15 @@ public void testRefCompareIntegerEq() {
new InclusiveMetricsEvaluator(SCHEMA, termPredicate(Expression.Operation.EQ, "id", "id2"))
.eval(FILE_2);
assertThat(shouldRead)
.as("Should not read: id range (30,40) can not be equal to id2 range (50,80)")
.isFalse();
.as("Should not read: id range (30,40) can be equal to id2 range (50,80)")
.isTrue();

shouldRead =
new InclusiveMetricsEvaluator(SCHEMA, termPredicate(Expression.Operation.EQ, "id", "id2"))
.eval(FILE_3);
assertThat(shouldRead)
.as("Should not read: id range (5,25) can not be equal to id2 range (30,40)")
.isFalse();
.as("Should not read: id range (5,25) can be equal to id2 range (30,40)")
.isTrue();

shouldRead =
new InclusiveMetricsEvaluator(SCHEMA, termPredicate(Expression.Operation.EQ, "id", "id2"))
Original file line number Diff line number Diff line change
@@ -319,10 +319,6 @@ public <T> Boolean ltEq(BoundReference<T> ref, BoundReference<T> ref2) {
if (minMaxUndefined(colStats) || minMaxUndefined(colStats2)) {
return ROWS_MIGHT_MATCH;
}

if (compareLowerToUpperStats(ref, id, id2, colStats, colStats2, cmp -> cmp > 0)) {
return ROWS_CANNOT_MATCH;
}
}

return ROWS_MIGHT_MATCH;
@@ -385,10 +381,6 @@ public <T> Boolean gt(BoundReference<T> ref, BoundReference<T> ref2) {
return ROWS_MIGHT_MATCH;
}

if (compareUpperStats(ref, id, id2, colStats, colStats2, cmp -> cmp <= 0)) {
return ROWS_CANNOT_MATCH;
}

if (compareUpperToLowerStats(ref, id, id2, colStats, colStats2, cmp -> cmp <= 0)) {
return ROWS_CANNOT_MATCH;
}
@@ -453,14 +445,6 @@ public <T> Boolean gtEq(BoundReference<T> ref, BoundReference<T> ref2) {
if (minMaxUndefined(colStats) || minMaxUndefined(colStats2)) {
return ROWS_MIGHT_MATCH;
}

if (compareUpperStats(ref, id, id2, colStats, colStats2, cmp -> cmp < 0)) {
return ROWS_CANNOT_MATCH;
}

if (compareUpperToLowerStats(ref, id, id2, colStats, colStats2, cmp -> cmp < 0)) {
return ROWS_CANNOT_MATCH;
}
}

return ROWS_MIGHT_MATCH;
@@ -565,52 +549,11 @@ private <T> Boolean compareStats(
if (minMaxUndefined(colStats) || minMaxUndefined(colStats2)) {
return ROWS_MIGHT_MATCH;
}

if (compareLowerStats(ref, id, id2, colStats, colStats2, cmp -> cmp > 0)) {
return ROWS_CANNOT_MATCH;
}
if (compareUpperStats(ref, id, id2, colStats, colStats2, cmp -> cmp < 0)) {
return ROWS_CANNOT_MATCH;
}
}
}
return ROWS_MIGHT_MATCH;
}

private <T> boolean compareUpperStats(
BoundReference<T> ref,
int id,
int id2,
Statistics<?> colStats,
Statistics<?> colStats2,
java.util.function.Predicate<Integer> compare) {
int cmp;

T upper = max(colStats, id);
T upper2 = max(colStats2, id2);
cmp = ref.comparator().compare(upper, upper2);
if (compare.test(cmp)) {
return true;
}
return false;
}

private <T> boolean compareLowerStats(
BoundReference<T> ref,
int id,
int id2,
Statistics<?> colStats,
Statistics<?> colStats2,
java.util.function.Predicate<Integer> compare) {
T lower = min(colStats, id);
T lower2 = min(colStats2, id2);
int cmp = ref.comparator().compare(lower, lower2);
if (compare.test(cmp)) {
return true;
}
return false;
}

private <T> boolean compareLowerToUpperStats(
BoundReference<T> ref,
int id,