Skip to content

Commit

Permalink
[core] Add StringEndsWith predicate (#3714)
Browse files Browse the repository at this point in the history
  • Loading branch information
xuzifu666 authored Jul 11, 2024
1 parent ff64c8c commit 9306dd8
Show file tree
Hide file tree
Showing 11 changed files with 136 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,11 @@ public FileIndexResult visitStartsWith(FieldRef fieldRef, Object literal) {
return REMAIN;
}

@Override
public FileIndexResult visitEndsWith(FieldRef fieldRef, Object literal) {
return REMAIN;
}

@Override
public FileIndexResult visitLessThan(FieldRef fieldRef, Object literal) {
return REMAIN;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,11 @@ public FileIndexResult visitStartsWith(FieldRef fieldRef, Object literal) {
return SKIP;
}

@Override
public FileIndexResult visitEndsWith(FieldRef fieldRef, Object literal) {
return SKIP;
}

@Override
public FileIndexResult visitLessThan(FieldRef fieldRef, Object literal) {
return SKIP;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.paimon.predicate;

import org.apache.paimon.data.BinaryString;
import org.apache.paimon.types.DataType;

import java.util.List;
import java.util.Optional;

/**
* A {@link NullFalseLeafBinaryFunction} to evaluate {@code filter like '%abc' or filter like
* '_abc'}.
*/
public class EndsWith extends NullFalseLeafBinaryFunction {

public static final EndsWith INSTANCE = new EndsWith();

private EndsWith() {}

@Override
public boolean test(DataType type, Object field, Object patternLiteral) {
BinaryString fieldString = (BinaryString) field;
return fieldString.endsWith((BinaryString) patternLiteral);
}

@Override
public boolean test(
DataType type,
long rowCount,
Object min,
Object max,
Long nullCount,
Object patternLiteral) {
return true;
}

@Override
public Optional<LeafFunction> negate() {
return Optional.empty();
}

@Override
public <T> T visit(FunctionVisitor<T> visitor, FieldRef fieldRef, List<Object> literals) {
return visitor.visitEndsWith(fieldRef, literals.get(0));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ default T visit(CompoundPredicate predicate) {

T visitStartsWith(FieldRef fieldRef, Object literal);

T visitEndsWith(FieldRef fieldRef, Object literal);

T visitLessThan(FieldRef fieldRef, Object literal);

T visitGreaterOrEqual(FieldRef fieldRef, Object literal);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,11 @@ public Boolean visitStartsWith(FieldRef fieldRef, Object literal) {
return false;
}

@Override
public Boolean visitEndsWith(FieldRef fieldRef, Object literal) {
return false;
}

@Override
public Boolean visitLessThan(FieldRef fieldRef, Object literal) {
return false;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,10 @@ public Predicate startsWith(int idx, Object patternLiteral) {
return leaf(StartsWith.INSTANCE, idx, patternLiteral);
}

public Predicate endsWith(int idx, Object patternLiteral) {
return leaf(EndsWith.INSTANCE, idx, patternLiteral);
}

public Predicate leaf(NullFalseLeafBinaryFunction function, int idx, Object literal) {
DataField field = rowType.getFields().get(idx);
return new LeafPredicate(function, field.type(), idx, field.name(), singletonList(literal));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,20 @@

package org.apache.paimon.predicate;

import org.apache.paimon.data.GenericArray;
import org.apache.paimon.data.GenericRow;
import org.apache.paimon.format.SimpleColStats;
import org.apache.paimon.types.IntType;
import org.apache.paimon.types.RowType;
import org.apache.paimon.types.VarCharType;

import org.junit.jupiter.api.Test;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import static org.apache.paimon.data.BinaryString.fromString;
import static org.apache.paimon.predicate.SimpleColStatsTestUtils.test;
import static org.assertj.core.api.Assertions.assertThat;

Expand Down Expand Up @@ -377,6 +380,19 @@ public void testNotInNull() {
.isEqualTo(false);
}

@Test
public void testEndsWith() {
PredicateBuilder builder = new PredicateBuilder(RowType.of(new VarCharType()));
Predicate predicate = builder.endsWith(0, fromString("bcc"));
GenericRow row = GenericRow.of(fromString("aabbcc"));

GenericRow max = GenericRow.of(fromString("aaba"));
GenericRow min = GenericRow.of(fromString("aabb"));
Integer[] nullCount = {null};
assertThat(predicate.test(row)).isEqualTo(true);
assertThat(predicate.test(10, min, max, new GenericArray(nullCount))).isEqualTo(true);
}

@Test
public void testLargeIn() {
PredicateBuilder builder = new PredicateBuilder(RowType.of(new IntType()));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,11 @@ public Optional<OrcFilters.Predicate> visitStartsWith(FieldRef fieldRef, Object
return Optional.empty();
}

@Override
public Optional<OrcFilters.Predicate> visitEndsWith(FieldRef fieldRef, Object literal) {
return Optional.empty();
}

@Override
public Optional<OrcFilters.Predicate> visitLessThan(FieldRef fieldRef, Object literal) {
return convertBinary(fieldRef, literal, OrcFilters.LessThan::new);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,11 @@ public FilterPredicate visitStartsWith(FieldRef fieldRef, Object literal) {
throw new UnsupportedOperationException();
}

@Override
public FilterPredicate visitEndsWith(FieldRef fieldRef, Object literal) {
throw new UnsupportedOperationException();
}

@Override
public FilterPredicate visitIn(FieldRef fieldRef, List<Object> literals) {
throw new UnsupportedOperationException();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
import org.apache.spark.sql.sources.LessThanOrEqual;
import org.apache.spark.sql.sources.Not;
import org.apache.spark.sql.sources.Or;
import org.apache.spark.sql.sources.StringEndsWith;
import org.apache.spark.sql.sources.StringStartsWith;

import java.util.Arrays;
Expand All @@ -61,7 +62,8 @@ public class SparkFilterConverter {
"And",
"Or",
"Not",
"StringStartsWith");
"StringStartsWith",
"StringEndsWith");

private final RowType rowType;
private final PredicateBuilder builder;
Expand Down Expand Up @@ -141,6 +143,11 @@ public Predicate convert(Filter filter) {
int index = fieldIndex(startsWith.attribute());
Object literal = convertLiteral(index, startsWith.value());
return builder.startsWith(index, literal);
} else if (filter instanceof StringEndsWith) {
StringEndsWith endsWith = (StringEndsWith) filter;
int index = fieldIndex(endsWith.attribute());
Object literal = convertLiteral(index, endsWith.value());
return builder.endsWith(index, literal);
}

// TODO: AlwaysTrue, AlwaysFalse
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@

package org.apache.paimon.spark;

import org.apache.paimon.data.GenericArray;
import org.apache.paimon.data.GenericRow;
import org.apache.paimon.data.Timestamp;
import org.apache.paimon.predicate.Predicate;
import org.apache.paimon.predicate.PredicateBuilder;
Expand All @@ -38,6 +40,7 @@
import org.apache.spark.sql.sources.LessThan;
import org.apache.spark.sql.sources.LessThanOrEqual;
import org.apache.spark.sql.sources.Not;
import org.apache.spark.sql.sources.StringEndsWith;
import org.apache.spark.sql.sources.StringStartsWith;
import org.junit.jupiter.api.Test;

Expand All @@ -51,6 +54,7 @@
import java.util.Collections;
import java.util.List;

import static org.apache.paimon.data.BinaryString.fromString;
import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.catchThrowableOfType;

Expand Down Expand Up @@ -149,6 +153,20 @@ public void testAll() {
Predicate expectedLargeIn = builder.in(0, Arrays.asList(literals));
Predicate actualLargeIn = converter.convert(largeIn);
assertThat(actualLargeIn).isEqualTo(expectedLargeIn);

RowType rowType01 =
new RowType(Collections.singletonList(new DataField(0, "id", new VarCharType())));
SparkFilterConverter converter01 = new SparkFilterConverter(rowType01);
StringEndsWith endsWith = StringEndsWith.apply("id", "abc");
Predicate endsWithPre = converter01.convert(endsWith);
GenericRow row = GenericRow.of(fromString("aabc"));
GenericRow max = GenericRow.of(fromString("xasxwsa"));
GenericRow min = GenericRow.of(fromString("aaaaa"));
boolean test = endsWithPre.test(row);
Integer[] nullCount = {null};
boolean test1 = endsWithPre.test(10, min, max, new GenericArray(nullCount));
assertThat(test).isEqualTo(true);
assertThat(test1).isEqualTo(true);
}

@Test
Expand Down

0 comments on commit 9306dd8

Please sign in to comment.