From 186695295a2a48bc0ce96389f8f06f83f3105546 Mon Sep 17 00:00:00 2001 From: Runkang He Date: Fri, 14 Jul 2023 09:12:35 +0800 Subject: [PATCH] [CALCITE-5826] Add FIND_IN_SET function (enabled in Hive and Spark library) --- .../adapter/enumerable/RexImpTable.java | 2 ++ .../apache/calcite/runtime/SqlFunctions.java | 26 +++++++++++++++++++ .../calcite/sql/fun/SqlLibraryOperators.java | 8 ++++++ .../apache/calcite/util/BuiltInMethod.java | 1 + site/_docs/reference.md | 1 + .../apache/calcite/test/SqlOperatorTest.java | 25 ++++++++++++++++++ 6 files changed, 63 insertions(+) diff --git a/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java b/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java index de84ea7d7137..920462bf4ce2 100644 --- a/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java +++ b/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java @@ -174,6 +174,7 @@ import static org.apache.calcite.sql.fun.SqlLibraryOperators.EXISTS_NODE; import static org.apache.calcite.sql.fun.SqlLibraryOperators.EXTRACT_VALUE; import static org.apache.calcite.sql.fun.SqlLibraryOperators.EXTRACT_XML; +import static org.apache.calcite.sql.fun.SqlLibraryOperators.FIND_IN_SET; import static org.apache.calcite.sql.fun.SqlLibraryOperators.FLOOR_BIG_QUERY; import static org.apache.calcite.sql.fun.SqlLibraryOperators.FORMAT_DATE; import static org.apache.calcite.sql.fun.SqlLibraryOperators.FORMAT_DATETIME; @@ -586,6 +587,7 @@ Builder populate() { defineReflective(REGEXP_INSTR, BuiltInMethod.REGEXP_INSTR2.method, BuiltInMethod.REGEXP_INSTR3.method, BuiltInMethod.REGEXP_INSTR4.method, BuiltInMethod.REGEXP_INSTR5.method); + defineMethod(FIND_IN_SET, BuiltInMethod.FIND_IN_SET.method, NullPolicy.STRICT); map.put(TRIM, new TrimImplementor()); diff --git a/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java b/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java index 1b659d6cb388..e1f93ecf8bd8 100644 --- a/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java +++ b/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java @@ -1076,6 +1076,32 @@ public static int levenshtein(String string1, String string2) { return LEVENSHTEIN_DISTANCE.apply(string1, string2); } + /** SQL FIND_IN_SET(string, stringArray) function. */ + public static int findInSet(String matchStr, String textStr) { + if (matchStr.contains(",")) { + return 0; + } + int textStrLen = textStr.length(); + int matchStrLen = matchStr.length(); + int n = 1; + int lastComma = -1; + for (int i = 0; i < textStrLen; i++) { + if (textStr.charAt(i) == ',') { + if (i - (lastComma + 1) == matchStrLen + && textStr.substring(lastComma + 1, i).equals(matchStr)) { + return n; + } + lastComma = i; + n++; + } + } + if (textStrLen - (lastComma + 1) == matchStrLen + && textStr.substring(lastComma + 1, textStrLen).equals(matchStr)) { + return n; + } + return 0; + } + /** SQL ASCII(string) function. */ public static int ascii(String s) { return s.isEmpty() diff --git a/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java b/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java index f850ab1cc06d..284b2220f7a5 100644 --- a/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java +++ b/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java @@ -405,6 +405,14 @@ static RelDataType deriveTypeSplit(SqlOperatorBinding operatorBinding, OperandTypes.STRING_STRING_OPTIONAL_STRING, SqlFunctionCategory.STRING); + /** The "FIND_IN_SET(string, stringArray)" function. */ + @LibraryOperator(libraries = {HIVE, SPARK}) + public static final SqlFunction FIND_IN_SET = + SqlBasicFunction.create("FIND_IN_SET", + ReturnTypes.INTEGER_NULLABLE, + OperandTypes.STRING_STRING, + SqlFunctionCategory.STRING); + /** The "GREATEST(value, value)" function. */ @LibraryOperator(libraries = {BIG_QUERY, ORACLE}) public static final SqlFunction GREATEST = diff --git a/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java b/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java index 4652ca6bab87..a38b706fb0e3 100644 --- a/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java +++ b/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java @@ -371,6 +371,7 @@ public enum BuiltInMethod { DIFFERENCE(SqlFunctions.class, "difference", String.class, String.class), REVERSE(SqlFunctions.class, "reverse", String.class), LEVENSHTEIN(SqlFunctions.class, "levenshtein", String.class, String.class), + FIND_IN_SET(SqlFunctions.class, "findInSet", String.class, String.class), LEFT(SqlFunctions.class, "left", String.class, int.class), RIGHT(SqlFunctions.class, "right", String.class, int.class), TO_BASE64(SqlFunctions.class, "toBase64", String.class), diff --git a/site/_docs/reference.md b/site/_docs/reference.md index 857445d27f4e..15020cceec7c 100644 --- a/site/_docs/reference.md +++ b/site/_docs/reference.md @@ -2729,6 +2729,7 @@ BigQuery's type system uses confusingly different names for types and functions: | o | EXTRACT(xml, xpath, [, namespaces ]) | Returns the XML fragment of the element or elements matched by the XPath expression. The optional namespace value that specifies a default mapping or namespace mapping for prefixes, which is used when evaluating the XPath expression | o | EXISTSNODE(xml, xpath, [, namespaces ]) | Determines whether traversal of a XML document using a specified xpath results in any nodes. Returns 0 if no nodes remain after applying the XPath traversal on the document fragment of the element or elements matched by the XPath expression. Returns 1 if any nodes remain. The optional namespace value that specifies a default mapping or namespace mapping for prefixes, which is used when evaluating the XPath expression. | m | EXTRACTVALUE(xml, xpathExpr)) | Returns the text of the first text node which is a child of the element or elements matched by the XPath expression. +| h s | FIND_IN_SET(string, stringArray) | Returns the index (1-based) of the given *string* in the comma-delimited *stringArray*. Returns 0, if the given *string* was not found or if *string* contains a comma | b | FLOOR(value) | Similar to standard `FLOOR(value)` except if *value* is an integer type, the return type is a double | b | FORMAT_DATE(string, date) | Formats *date* according to the specified format *string* | b | FORMAT_DATETIME(string, timestamp) | Formats *timestamp* according to the specified format *string* diff --git a/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java b/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java index e4b774ac59cd..aba7e489d7d1 100644 --- a/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java +++ b/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java @@ -4573,6 +4573,31 @@ void testBitGetFunc(SqlOperatorFixture f, String functionName) { f0.forEachLibrary(list(SqlLibrary.HIVE, SqlLibrary.SPARK), consumer); } + @Test void testFindInSetFunc() { + final SqlOperatorFixture f0 = fixture().setFor(SqlLibraryOperators.FIND_IN_SET); + f0.checkFails("^find_in_set('ab', 'abc,b,ab,c,def')^", + "No match found for function signature FIND_IN_SET\\(, \\)", + false); + final Consumer consumer = f -> { + f.checkString("find_in_set('ab', 'abc,b,ab,c,def')", + "3", "INTEGER NOT NULL"); + f.checkString("find_in_set('ab', ',,,ab,abc,b,ab,c,def')", + "4", "INTEGER NOT NULL"); + f.checkString("find_in_set('def', ',,,ab,abc,c,def')", + "7", "INTEGER NOT NULL"); + f.checkString("find_in_set(_UTF8'\u4F60\u597D', _UTF8'b,ab,c,def,\u4F60\u597D')", + "5", "INTEGER NOT NULL"); + f.checkString("find_in_set('acd', ',,,ab,abc,c,def')", + "0", "INTEGER NOT NULL"); + f.checkString("find_in_set('ab,', 'abc,b,ab,c,def')", + "0", "INTEGER NOT NULL"); + f.checkNull("find_in_set(cast(null as varchar), 'abc,b,ab,c,def')"); + f.checkNull("find_in_set('ab', cast(null as varchar))"); + f.checkNull("find_in_set(cast(null as varchar), cast(null as varchar))"); + }; + f0.forEachLibrary(list(SqlLibrary.HIVE, SqlLibrary.SPARK), consumer); + } + @Test void testIfFunc() { final SqlOperatorFixture f = fixture(); checkIf(f.withLibrary(SqlLibrary.BIG_QUERY));