Skip to content

Commit

Permalink
[CALCITE-5826] Add FIND_IN_SET function (enabled in Hive and Spark li…
Browse files Browse the repository at this point in the history
…braries)
  • Loading branch information
herunkang2018 committed Oct 31, 2023
1 parent c5f3b8d commit e81547f
Show file tree
Hide file tree
Showing 6 changed files with 62 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@
import static org.apache.calcite.sql.fun.SqlLibraryOperators.EXTRACT_VALUE;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.EXTRACT_XML;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.FACTORIAL;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.FIND_IN_SET;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.FLOOR_BIG_QUERY;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.FORMAT_DATE;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.FORMAT_DATETIME;
Expand Down Expand Up @@ -598,6 +599,7 @@ Builder populate() {
defineReflective(REGEXP_INSTR, BuiltInMethod.REGEXP_INSTR2.method,
BuiltInMethod.REGEXP_INSTR3.method, BuiltInMethod.REGEXP_INSTR4.method,
BuiltInMethod.REGEXP_INSTR5.method);
defineMethod(FIND_IN_SET, BuiltInMethod.FIND_IN_SET.method, NullPolicy.ANY);

map.put(TRIM, new TrimImplementor());

Expand Down
25 changes: 25 additions & 0 deletions core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,8 @@
@SuppressWarnings("UnnecessaryUnboxing")
@Deterministic
public class SqlFunctions {
private static final String COMMA_DELIMITER = ",";

@SuppressWarnings("unused")
private static final DecimalFormat DOUBLE_FORMAT =
NumberUtil.decimalFormat("0.0E0");
Expand Down Expand Up @@ -1142,6 +1144,29 @@ public static int levenshtein(String string1, String string2) {
return LEVENSHTEIN_DISTANCE.apply(string1, string2);
}

/** SQL FIND_IN_SET(matchStr, textStr) function.
* Returns the index (1-based) of the given matchStr
* in the comma-delimited list textStr. Returns 0,
* if the matchStr is not found or if the matchStr
* contains a comma. */
public static @Nullable Integer findInSet(
@Nullable String matchStr,
@Nullable String textStr) {
if (matchStr == null || textStr == null) {
return null;
}
if (matchStr.contains(COMMA_DELIMITER)) {
return 0;
}
String[] splits = textStr.split(COMMA_DELIMITER);
for (int i = 0; i < splits.length; i++) {
if (matchStr.equals(splits[i])) {
return i + 1;
}
}
return 0;
}

/** SQL ASCII(string) function. */
public static int ascii(String s) {
return s.isEmpty()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,14 @@ static RelDataType deriveTypeSplit(SqlOperatorBinding operatorBinding,
OperandTypes.STRING_STRING_OPTIONAL_STRING,
SqlFunctionCategory.STRING);

/** The "FIND_IN_SET(matchStr, textStr)" function. */
@LibraryOperator(libraries = {HIVE, SPARK})
public static final SqlFunction FIND_IN_SET =
SqlBasicFunction.create("FIND_IN_SET",
ReturnTypes.INTEGER_NULLABLE,
OperandTypes.STRING_STRING,
SqlFunctionCategory.STRING);

/** The "GREATEST(value, value)" function. */
@LibraryOperator(libraries = {BIG_QUERY, ORACLE})
public static final SqlFunction GREATEST =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,7 @@ public enum BuiltInMethod {
DIFFERENCE(SqlFunctions.class, "difference", String.class, String.class),
REVERSE(SqlFunctions.class, "reverse", String.class),
LEVENSHTEIN(SqlFunctions.class, "levenshtein", String.class, String.class),
FIND_IN_SET(SqlFunctions.class, "findInSet", String.class, String.class),
LEFT(SqlFunctions.class, "left", String.class, int.class),
RIGHT(SqlFunctions.class, "right", String.class, int.class),
TO_BASE64(SqlFunctions.class, "toBase64", String.class),
Expand Down
1 change: 1 addition & 0 deletions site/_docs/reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -2731,6 +2731,7 @@ BigQuery's type system uses confusingly different names for types and functions:
| o | EXISTSNODE(xml, xpath, [, namespaces ]) | Determines whether traversal of a XML document using a specified xpath results in any nodes. Returns 0 if no nodes remain after applying the XPath traversal on the document fragment of the element or elements matched by the XPath expression. Returns 1 if any nodes remain. The optional namespace value that specifies a default mapping or namespace mapping for prefixes, which is used when evaluating the XPath expression.
| m | EXTRACTVALUE(xml, xpathExpr)) | Returns the text of the first text node which is a child of the element or elements matched by the XPath expression.
| h s | FACTORIAL(integer) | Returns the factorial of *integer*, the range of *integer* is [0, 20]. Otherwise, returns NULL
| h s | FIND_IN_SET(matchStr, textStr) | Returns the index (1-based) of the given *matchStr* in the comma-delimited *textStr*. Returns 0, if the given *matchStr* is not found or if the *matchStr* contains a comma. For example, FIND_IN_SET('bc', 'a,bc,def') returns 2
| b | FLOOR(value) | Similar to standard `FLOOR(value)` except if *value* is an integer type, the return type is a double
| b | FORMAT_DATE(string, date) | Formats *date* according to the specified format *string*
| b | FORMAT_DATETIME(string, timestamp) | Formats *timestamp* according to the specified format *string*
Expand Down
25 changes: 25 additions & 0 deletions testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -4701,6 +4701,31 @@ void testBitGetFunc(SqlOperatorFixture f, String functionName) {
f0.forEachLibrary(list(SqlLibrary.HIVE, SqlLibrary.SPARK), consumer);
}

@Test void testFindInSetFunc() {
final SqlOperatorFixture f0 = fixture().setFor(SqlLibraryOperators.FIND_IN_SET);
f0.checkFails("^find_in_set('ab', 'abc,b,ab,c,def')^",
"No match found for function signature FIND_IN_SET\\(<CHARACTER>, <CHARACTER>\\)",
false);
final Consumer<SqlOperatorFixture> consumer = f -> {
f.checkString("find_in_set('ab', 'abc,b,ab,c,def')",
"3", "INTEGER NOT NULL");
f.checkString("find_in_set('ab', ',,,ab,abc,b,ab,c,def')",
"4", "INTEGER NOT NULL");
f.checkString("find_in_set('def', ',,,ab,abc,c,def')",
"7", "INTEGER NOT NULL");
f.checkString("find_in_set(_UTF8'\u4F60\u597D', _UTF8'b,ab,c,def,\u4F60\u597D')",
"5", "INTEGER NOT NULL");
f.checkString("find_in_set('acd', ',,,ab,abc,c,def')",
"0", "INTEGER NOT NULL");
f.checkString("find_in_set('ab,', 'abc,b,ab,c,def')",
"0", "INTEGER NOT NULL");
f.checkNull("find_in_set(cast(null as varchar), 'abc,b,ab,c,def')");
f.checkNull("find_in_set('ab', cast(null as varchar))");
f.checkNull("find_in_set(cast(null as varchar), cast(null as varchar))");
};
f0.forEachLibrary(list(SqlLibrary.HIVE, SqlLibrary.SPARK), consumer);
}

@Test void testIfFunc() {
final SqlOperatorFixture f = fixture();
checkIf(f.withLibrary(SqlLibrary.BIG_QUERY));
Expand Down

0 comments on commit e81547f

Please sign in to comment.