diff --git a/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java b/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java index de84ea7d7137..920462bf4ce2 100644 --- a/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java +++ b/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java @@ -174,6 +174,7 @@ import static org.apache.calcite.sql.fun.SqlLibraryOperators.EXISTS_NODE; import static org.apache.calcite.sql.fun.SqlLibraryOperators.EXTRACT_VALUE; import static org.apache.calcite.sql.fun.SqlLibraryOperators.EXTRACT_XML; +import static org.apache.calcite.sql.fun.SqlLibraryOperators.FIND_IN_SET; import static org.apache.calcite.sql.fun.SqlLibraryOperators.FLOOR_BIG_QUERY; import static org.apache.calcite.sql.fun.SqlLibraryOperators.FORMAT_DATE; import static org.apache.calcite.sql.fun.SqlLibraryOperators.FORMAT_DATETIME; @@ -586,6 +587,7 @@ Builder populate() { defineReflective(REGEXP_INSTR, BuiltInMethod.REGEXP_INSTR2.method, BuiltInMethod.REGEXP_INSTR3.method, BuiltInMethod.REGEXP_INSTR4.method, BuiltInMethod.REGEXP_INSTR5.method); + defineMethod(FIND_IN_SET, BuiltInMethod.FIND_IN_SET.method, NullPolicy.STRICT); map.put(TRIM, new TrimImplementor()); diff --git a/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java b/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java index 1b659d6cb388..ea207789f3dc 100644 --- a/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java +++ b/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java @@ -1076,6 +1076,32 @@ public static int levenshtein(String string1, String string2) { return LEVENSHTEIN_DISTANCE.apply(string1, string2); } + /** SQL FIND_IN_SET(string, stringArray)" function. */ + public static int findInSet(String matchStr, String textStr) { + if (matchStr.contains(",")) { + return 0; + } + int textStrLen = textStr.length(); + int matchStrLen = matchStr.length(); + int n = 1; + int lastComma = -1; + for (int i = 0; i < textStrLen; i++) { + if (textStr.charAt(i) == ',') { + if (i - (lastComma + 1) == matchStrLen + && textStr.substring(lastComma + 1, i).equals(matchStr)) { + return n; + } + lastComma = i; + n++; + } + } + if (textStrLen - (lastComma + 1) == matchStrLen + && textStr.substring(lastComma + 1, textStrLen).equals(matchStr)) { + return n; + } + return 0; + } + /** SQL ASCII(string) function. */ public static int ascii(String s) { return s.isEmpty() diff --git a/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java b/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java index f850ab1cc06d..284b2220f7a5 100644 --- a/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java +++ b/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java @@ -405,6 +405,14 @@ static RelDataType deriveTypeSplit(SqlOperatorBinding operatorBinding, OperandTypes.STRING_STRING_OPTIONAL_STRING, SqlFunctionCategory.STRING); + /** The "FIND_IN_SET(string, stringArray)" function. */ + @LibraryOperator(libraries = {HIVE, SPARK}) + public static final SqlFunction FIND_IN_SET = + SqlBasicFunction.create("FIND_IN_SET", + ReturnTypes.INTEGER_NULLABLE, + OperandTypes.STRING_STRING, + SqlFunctionCategory.STRING); + /** The "GREATEST(value, value)" function. */ @LibraryOperator(libraries = {BIG_QUERY, ORACLE}) public static final SqlFunction GREATEST = diff --git a/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java b/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java index 4652ca6bab87..a38b706fb0e3 100644 --- a/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java +++ b/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java @@ -371,6 +371,7 @@ public enum BuiltInMethod { DIFFERENCE(SqlFunctions.class, "difference", String.class, String.class), REVERSE(SqlFunctions.class, "reverse", String.class), LEVENSHTEIN(SqlFunctions.class, "levenshtein", String.class, String.class), + FIND_IN_SET(SqlFunctions.class, "findInSet", String.class, String.class), LEFT(SqlFunctions.class, "left", String.class, int.class), RIGHT(SqlFunctions.class, "right", String.class, int.class), TO_BASE64(SqlFunctions.class, "toBase64", String.class), diff --git a/site/_docs/reference.md b/site/_docs/reference.md index 857445d27f4e..eb3445e85ef5 100644 --- a/site/_docs/reference.md +++ b/site/_docs/reference.md @@ -2736,6 +2736,7 @@ BigQuery's type system uses confusingly different names for types and functions: | h s | FORMAT_NUMBER(value, format) | Formats the number *value* to MySQL's FORMAT *format*, like '#,###,###.##0.00' | b | FORMAT_TIME(string, time) | Formats *time* according to the specified format *string* | b | FORMAT_TIMESTAMP(string timestamp) | Formats *timestamp* according to the specified format *string* +| h s | FIND_IN_SET(string, stringArray) | Returns the index (1-based) of the given *string* in the comma-delimited *stringArray* Returns 0, if the given *string* was not found or if *string* contains a comma | s | GETBIT(value, position) | Equivalent to `BIT_GET(value, position)` | b o | GREATEST(expr [, expr ]*) | Returns the greatest of the expressions | b h s | IF(condition, value1, value2) | Returns *value1* if *condition* is TRUE, *value2* otherwise diff --git a/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java b/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java index e4b774ac59cd..aba7e489d7d1 100644 --- a/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java +++ b/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java @@ -4573,6 +4573,31 @@ void testBitGetFunc(SqlOperatorFixture f, String functionName) { f0.forEachLibrary(list(SqlLibrary.HIVE, SqlLibrary.SPARK), consumer); } + @Test void testFindInSetFunc() { + final SqlOperatorFixture f0 = fixture().setFor(SqlLibraryOperators.FIND_IN_SET); + f0.checkFails("^find_in_set('ab', 'abc,b,ab,c,def')^", + "No match found for function signature FIND_IN_SET\\(, \\)", + false); + final Consumer consumer = f -> { + f.checkString("find_in_set('ab', 'abc,b,ab,c,def')", + "3", "INTEGER NOT NULL"); + f.checkString("find_in_set('ab', ',,,ab,abc,b,ab,c,def')", + "4", "INTEGER NOT NULL"); + f.checkString("find_in_set('def', ',,,ab,abc,c,def')", + "7", "INTEGER NOT NULL"); + f.checkString("find_in_set(_UTF8'\u4F60\u597D', _UTF8'b,ab,c,def,\u4F60\u597D')", + "5", "INTEGER NOT NULL"); + f.checkString("find_in_set('acd', ',,,ab,abc,c,def')", + "0", "INTEGER NOT NULL"); + f.checkString("find_in_set('ab,', 'abc,b,ab,c,def')", + "0", "INTEGER NOT NULL"); + f.checkNull("find_in_set(cast(null as varchar), 'abc,b,ab,c,def')"); + f.checkNull("find_in_set('ab', cast(null as varchar))"); + f.checkNull("find_in_set(cast(null as varchar), cast(null as varchar))"); + }; + f0.forEachLibrary(list(SqlLibrary.HIVE, SqlLibrary.SPARK), consumer); + } + @Test void testIfFunc() { final SqlOperatorFixture f = fixture(); checkIf(f.withLibrary(SqlLibrary.BIG_QUERY));