Skip to content

Commit

Permalink
[CALCITE-5826] Add FIND_IN_SET function (enabled in Hive and Spark li…
Browse files Browse the repository at this point in the history
…braries)
  • Loading branch information
herunkang2018 committed Oct 29, 2023
1 parent 008c553 commit c4c593d
Show file tree
Hide file tree
Showing 6 changed files with 74 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,7 @@
import static org.apache.calcite.sql.fun.SqlLibraryOperators.EXISTS_NODE;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.EXTRACT_VALUE;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.EXTRACT_XML;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.FIND_IN_SET;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.FLOOR_BIG_QUERY;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.FORMAT_DATE;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.FORMAT_DATETIME;
Expand Down Expand Up @@ -586,6 +587,7 @@ Builder populate() {
defineReflective(REGEXP_INSTR, BuiltInMethod.REGEXP_INSTR2.method,
BuiltInMethod.REGEXP_INSTR3.method, BuiltInMethod.REGEXP_INSTR4.method,
BuiltInMethod.REGEXP_INSTR5.method);
defineMethod(FIND_IN_SET, BuiltInMethod.FIND_IN_SET.method, NullPolicy.ANY);

map.put(TRIM, new TrimImplementor());

Expand Down
37 changes: 37 additions & 0 deletions core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,8 @@
@SuppressWarnings("UnnecessaryUnboxing")
@Deterministic
public class SqlFunctions {
private static final char COMMA_DELIMITER = ',';

@SuppressWarnings("unused")
private static final DecimalFormat DOUBLE_FORMAT =
NumberUtil.decimalFormat("0.0E0");
Expand Down Expand Up @@ -1076,6 +1078,41 @@ public static int levenshtein(String string1, String string2) {
return LEVENSHTEIN_DISTANCE.apply(string1, string2);
}

/** SQL FIND_IN_SET(matchStr, textStr) function.
* Returns the index (1-based) of the given matchStr
* in the comma-delimited list textStr. Returns 0,
* if the matchStr is not found or if the matchStr
* contains a comma. */
public static @Nullable Integer findInSet(
@Nullable String matchStr,
@Nullable String textStr) {
if (matchStr == null || textStr == null) {
return null;
}
if (matchStr.contains(String.valueOf(COMMA_DELIMITER))) {
return 0;
}
final int textStrLen = textStr.length();
final int matchStrLen = matchStr.length();
int n = 1;
int lastCommaIndex = -1;
for (int i = 0; i < textStrLen; i++) {
if (textStr.charAt(i) == COMMA_DELIMITER) {
if (i - (lastCommaIndex + 1) == matchStrLen
&& textStr.substring(lastCommaIndex + 1, i).equals(matchStr)) {
return n;
}
lastCommaIndex = i;
n++;
}
}
if (textStrLen - (lastCommaIndex + 1) == matchStrLen
&& textStr.substring(lastCommaIndex + 1, textStrLen).equals(matchStr)) {
return n;
}
return 0;
}

/** SQL ASCII(string) function. */
public static int ascii(String s) {
return s.isEmpty()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -405,6 +405,14 @@ static RelDataType deriveTypeSplit(SqlOperatorBinding operatorBinding,
OperandTypes.STRING_STRING_OPTIONAL_STRING,
SqlFunctionCategory.STRING);

/** The "FIND_IN_SET(matchStr, textStr)" function. */
@LibraryOperator(libraries = {HIVE, SPARK})
public static final SqlFunction FIND_IN_SET =
SqlBasicFunction.create("FIND_IN_SET",
ReturnTypes.INTEGER_NULLABLE,
OperandTypes.STRING_STRING,
SqlFunctionCategory.STRING);

/** The "GREATEST(value, value)" function. */
@LibraryOperator(libraries = {BIG_QUERY, ORACLE})
public static final SqlFunction GREATEST =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -371,6 +371,7 @@ public enum BuiltInMethod {
DIFFERENCE(SqlFunctions.class, "difference", String.class, String.class),
REVERSE(SqlFunctions.class, "reverse", String.class),
LEVENSHTEIN(SqlFunctions.class, "levenshtein", String.class, String.class),
FIND_IN_SET(SqlFunctions.class, "findInSet", String.class, String.class),
LEFT(SqlFunctions.class, "left", String.class, int.class),
RIGHT(SqlFunctions.class, "right", String.class, int.class),
TO_BASE64(SqlFunctions.class, "toBase64", String.class),
Expand Down
1 change: 1 addition & 0 deletions site/_docs/reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -2729,6 +2729,7 @@ BigQuery's type system uses confusingly different names for types and functions:
| o | EXTRACT(xml, xpath, [, namespaces ]) | Returns the XML fragment of the element or elements matched by the XPath expression. The optional namespace value that specifies a default mapping or namespace mapping for prefixes, which is used when evaluating the XPath expression
| o | EXISTSNODE(xml, xpath, [, namespaces ]) | Determines whether traversal of a XML document using a specified xpath results in any nodes. Returns 0 if no nodes remain after applying the XPath traversal on the document fragment of the element or elements matched by the XPath expression. Returns 1 if any nodes remain. The optional namespace value that specifies a default mapping or namespace mapping for prefixes, which is used when evaluating the XPath expression.
| m | EXTRACTVALUE(xml, xpathExpr)) | Returns the text of the first text node which is a child of the element or elements matched by the XPath expression.
| h s | FIND_IN_SET(matchStr, textStr) | Returns the index (1-based) of the given *matchStr* in the comma-delimited *textStr*. Returns 0, if the given *matchStr* is not found or if the *matchStr* contains a comma. For example, FIND_IN_SET('bc', 'a,bc,def') returns 2
| b | FLOOR(value) | Similar to standard `FLOOR(value)` except if *value* is an integer type, the return type is a double
| b | FORMAT_DATE(string, date) | Formats *date* according to the specified format *string*
| b | FORMAT_DATETIME(string, timestamp) | Formats *timestamp* according to the specified format *string*
Expand Down
25 changes: 25 additions & 0 deletions testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -4573,6 +4573,31 @@ void testBitGetFunc(SqlOperatorFixture f, String functionName) {
f0.forEachLibrary(list(SqlLibrary.HIVE, SqlLibrary.SPARK), consumer);
}

@Test void testFindInSetFunc() {
final SqlOperatorFixture f0 = fixture().setFor(SqlLibraryOperators.FIND_IN_SET);
f0.checkFails("^find_in_set('ab', 'abc,b,ab,c,def')^",
"No match found for function signature FIND_IN_SET\\(<CHARACTER>, <CHARACTER>\\)",
false);
final Consumer<SqlOperatorFixture> consumer = f -> {
f.checkString("find_in_set('ab', 'abc,b,ab,c,def')",
"3", "INTEGER NOT NULL");
f.checkString("find_in_set('ab', ',,,ab,abc,b,ab,c,def')",
"4", "INTEGER NOT NULL");
f.checkString("find_in_set('def', ',,,ab,abc,c,def')",
"7", "INTEGER NOT NULL");
f.checkString("find_in_set(_UTF8'\u4F60\u597D', _UTF8'b,ab,c,def,\u4F60\u597D')",
"5", "INTEGER NOT NULL");
f.checkString("find_in_set('acd', ',,,ab,abc,c,def')",
"0", "INTEGER NOT NULL");
f.checkString("find_in_set('ab,', 'abc,b,ab,c,def')",
"0", "INTEGER NOT NULL");
f.checkNull("find_in_set(cast(null as varchar), 'abc,b,ab,c,def')");
f.checkNull("find_in_set('ab', cast(null as varchar))");
f.checkNull("find_in_set(cast(null as varchar), cast(null as varchar))");
};
f0.forEachLibrary(list(SqlLibrary.HIVE, SqlLibrary.SPARK), consumer);
}

@Test void testIfFunc() {
final SqlOperatorFixture f = fixture();
checkIf(f.withLibrary(SqlLibrary.BIG_QUERY));
Expand Down

0 comments on commit c4c593d

Please sign in to comment.