From 3c78776037c5670588944da8bda49198c653f497 Mon Sep 17 00:00:00 2001 From: Runkang He Date: Sun, 22 Oct 2023 09:02:37 +0800 Subject: [PATCH] [CALCITE-6065] Add HEX and UNHEX functions (enabled in Hive and Spark libraries) --- .../adapter/enumerable/RexImpTable.java | 4 + .../apache/calcite/runtime/SqlFunctions.java | 33 +++++++- .../calcite/sql/fun/SqlLibraryOperators.java | 18 +++- .../apache/calcite/util/BuiltInMethod.java | 2 + site/_docs/reference.md | 6 +- .../apache/calcite/test/SqlOperatorTest.java | 82 ++++++++++++++++++- 6 files changed, 140 insertions(+), 5 deletions(-) diff --git a/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java b/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java index b3a5565eb5de..4adc7210fae8 100644 --- a/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java +++ b/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java @@ -187,6 +187,7 @@ import static org.apache.calcite.sql.fun.SqlLibraryOperators.FROM_BASE64; import static org.apache.calcite.sql.fun.SqlLibraryOperators.FROM_HEX; import static org.apache.calcite.sql.fun.SqlLibraryOperators.GETBIT; +import static org.apache.calcite.sql.fun.SqlLibraryOperators.HEX; import static org.apache.calcite.sql.fun.SqlLibraryOperators.ILIKE; import static org.apache.calcite.sql.fun.SqlLibraryOperators.IS_INF; import static org.apache.calcite.sql.fun.SqlLibraryOperators.IS_NAN; @@ -272,6 +273,7 @@ import static org.apache.calcite.sql.fun.SqlLibraryOperators.TRANSLATE3; import static org.apache.calcite.sql.fun.SqlLibraryOperators.TRUNC; import static org.apache.calcite.sql.fun.SqlLibraryOperators.TRY_CAST; +import static org.apache.calcite.sql.fun.SqlLibraryOperators.UNHEX; import static org.apache.calcite.sql.fun.SqlLibraryOperators.UNIX_DATE; import static org.apache.calcite.sql.fun.SqlLibraryOperators.UNIX_MICROS; import static org.apache.calcite.sql.fun.SqlLibraryOperators.UNIX_MILLIS; @@ -523,7 +525,9 @@ Builder populate() { defineMethod(FROM_BASE64, BuiltInMethod.FROM_BASE64.method, NullPolicy.STRICT); defineMethod(TO_BASE32, BuiltInMethod.TO_BASE32.method, NullPolicy.STRICT); defineMethod(FROM_BASE32, BuiltInMethod.FROM_BASE32.method, NullPolicy.STRICT); + defineMethod(HEX, BuiltInMethod.HEX.method, NullPolicy.STRICT); defineMethod(TO_HEX, BuiltInMethod.TO_HEX.method, NullPolicy.STRICT); + defineMethod(UNHEX, BuiltInMethod.UNHEX.method, NullPolicy.STRICT); defineMethod(FROM_HEX, BuiltInMethod.FROM_HEX.method, NullPolicy.STRICT); defineMethod(MD5, BuiltInMethod.MD5.method, NullPolicy.STRICT); defineMethod(SHA1, BuiltInMethod.SHA1.method, NullPolicy.STRICT); diff --git a/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java b/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java index 8a0e04c6d1ce..59741c10bab1 100644 --- a/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java +++ b/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java @@ -313,18 +313,49 @@ public static ByteString fromBase32(String base32) { /** SQL FROM_HEX(varchar) function. */ public static ByteString fromHex(String hex) { try { - return new ByteString(Hex.decodeHex(hex)); + return fromHexHelper(hex); } catch (DecoderException e) { throw new IllegalArgumentException( String.format(Locale.ROOT, "Failed to decode hex string: %s", hex), e); } } + /** SQL UNHEX(varchar) function. */ + public static @Nullable ByteString unHex(String hex) { + try { + return fromHexHelper(hex); + } catch (DecoderException e) { + return null; + } + } + + private static ByteString fromHexHelper(String hex) throws DecoderException { + if (hex.length() % 2 == 1) { + hex = "0" + hex; + } + return new ByteString(Hex.decodeHex(hex)); + } + /** SQL TO_HEX(binary) function. */ public static String toHex(ByteString byteString) { return Hex.encodeHexString(byteString.getBytes()); } + /** SQL HEX(binary) function. */ + public static String hex(ByteString value) { + return toHex(value).toUpperCase(Locale.ROOT); + } + + /** SQL HEX(bigint) function. */ + public static String hex(long value) { + return Long.toHexString(value).toUpperCase(Locale.ROOT); + } + + /** SQL HEX(varchar) function. */ + public static String hex(String value) { + return Hex.encodeHexString(value.getBytes(UTF_8)).toUpperCase(Locale.ROOT); + } + /** SQL MD5(string) function. */ public static String md5(String string) { return DigestUtils.md5Hex(string.getBytes(UTF_8)); diff --git a/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java b/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java index c123c449cd52..d5400853c03b 100644 --- a/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java +++ b/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java @@ -1454,12 +1454,18 @@ private static RelDataType deriveTypeMapFromEntries(SqlOperatorBinding opBinding * The "FROM_HEX(varchar)" function; converts a hexadecimal-encoded {@code varchar} into bytes. */ @LibraryOperator(libraries = {BIG_QUERY}) - public static final SqlFunction FROM_HEX = + public static final SqlBasicFunction FROM_HEX = SqlBasicFunction.create("FROM_HEX", ReturnTypes.VARBINARY_NULLABLE, OperandTypes.CHARACTER, SqlFunctionCategory.STRING); + /** The "UNHEX(varchar)" function, Hive and Spark's + * equivalent to {@link #FROM_HEX}. */ + @LibraryOperator(libraries = {HIVE, SPARK}) + public static final SqlFunction UNHEX = + FROM_HEX.withName("UNHEX"); + /** * The "TO_HEX(binary)" function; converts {@code binary} into a hexadecimal varchar. */ @@ -1470,6 +1476,16 @@ private static RelDataType deriveTypeMapFromEntries(SqlOperatorBinding opBinding OperandTypes.BINARY, SqlFunctionCategory.STRING); + /** + * The "HEX(binary or bigint or varchar)" function. + */ + @LibraryOperator(libraries = {HIVE, SPARK}) + public static final SqlFunction HEX = + SqlBasicFunction.create("HEX", + ReturnTypes.VARCHAR_NULLABLE, + OperandTypes.BINARY.or(OperandTypes.INTEGER).or(OperandTypes.CHARACTER), + SqlFunctionCategory.STRING); + /** The "FORMAT_NUMBER(value, decimalOrFormat)" function. */ @LibraryOperator(libraries = {HIVE, SPARK}) public static final SqlFunction FORMAT_NUMBER = diff --git a/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java b/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java index f0df06185201..2530e9d345c5 100644 --- a/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java +++ b/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java @@ -383,7 +383,9 @@ public enum BuiltInMethod { FROM_BASE64(SqlFunctions.class, "fromBase64", String.class), TO_BASE32(SqlFunctions.class, "toBase32", String.class), FROM_BASE32(SqlFunctions.class, "fromBase32", String.class), + HEX(SqlFunctions.class, "hex", ByteString.class), TO_HEX(SqlFunctions.class, "toHex", ByteString.class), + UNHEX(SqlFunctions.class, "unHex", String.class), FROM_HEX(SqlFunctions.class, "fromHex", String.class), MD5(SqlFunctions.class, "md5", String.class), SHA1(SqlFunctions.class, "sha1", String.class), diff --git a/site/_docs/reference.md b/site/_docs/reference.md index b2a2483f88de..5d621a9cdd73 100644 --- a/site/_docs/reference.md +++ b/site/_docs/reference.md @@ -2739,6 +2739,9 @@ BigQuery's type system uses confusingly different names for types and functions: | b | FORMAT_TIMESTAMP(string timestamp) | Formats *timestamp* according to the specified format *string* | s | GETBIT(value, position) | Equivalent to `BIT_GET(value, position)` | b o | GREATEST(expr [, expr ]*) | Returns the greatest of the expressions +| h s | HEX(binary) | Converts *binary* into a hexadecimal string. For example, hex(x'6162') returns '6162' +| h s | HEX(bigint) | Converts *bigint* into a shortened hexadecimal string without leading zeros. For example, hex(10) returns 'A' +| h s | HEX(string) | Converts *string* into a hexadecimal string. It converts each character of *string* into its hexadecimal representation. For example, hex('ab') returns '6162' | b h s | IF(condition, value1, value2) | Returns *value1* if *condition* is TRUE, *value2* otherwise | b | IFNULL(value1, value2) | Equivalent to `NVL(value1, value2)` | p | string1 ILIKE string2 [ ESCAPE string3 ] | Whether *string1* matches pattern *string2*, ignoring case (similar to `LIKE`) @@ -2768,7 +2771,7 @@ BigQuery's type system uses confusingly different names for types and functions: | m | TO_BASE64(string) | Converts the *string* to base-64 encoded form and returns a encoded string | b m | FROM_BASE64(string) | Returns the decoded result of a base-64 *string* as a string | b | TO_HEX(binary) | Converts *binary* into a hexadecimal varchar -| b | FROM_HEX(varchar) | Converts a hexadecimal-encoded *varchar* into bytes +| b | FROM_HEX(string) | Converts a hexadecimal-encoded *string* into bytes; throws if *string* is not a valid hexadecimal string | b o | LTRIM(string) | Returns *string* with all blanks removed from the start | s | MAP_CONCAT(map [, map]*) | Concatenates one or more maps. If any input argument is `NULL` the function returns `NULL`. Note that calcite is using the LAST_WIN strategy | s | MAP_ENTRIES(map) | Returns the entries of the *map* as an array, the order of the entries is not defined @@ -2852,6 +2855,7 @@ BigQuery's type system uses confusingly different names for types and functions: | b o p | TRANSLATE(expr, fromString, toString) | Returns *expr* with all occurrences of each character in *fromString* replaced by its corresponding character in *toString*. Characters in *expr* that are not in *fromString* are not replaced | b | TRUNC(numeric1 [, numeric2 ]) | Truncates *numeric1* to optionally *numeric2* (if not specified 0) places right to the decimal point | q | TRY_CAST(value AS type) | Converts *value* to *type*, returning NULL if conversion fails +| h s | UNHEX(string) | Converts a hexadecimal-encoded *string* into bytes; returns NULL if *string* is not a valid hexadecimal string | b | UNIX_MICROS(timestamp) | Returns the number of microseconds since 1970-01-01 00:00:00 | b | UNIX_MILLIS(timestamp) | Returns the number of milliseconds since 1970-01-01 00:00:00 | b | UNIX_SECONDS(timestamp) | Returns the number of seconds since 1970-01-01 00:00:00 diff --git a/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java b/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java index bfc14daab707..9de1fc0fe6ad 100644 --- a/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java +++ b/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java @@ -4520,6 +4520,76 @@ void testBitGetFunc(SqlOperatorFixture f, String functionName) { f.checkNull("to_hex(cast(null as varbinary))"); } + /** Test case for + * [CALCITE-6065] + * Add HEX and UNHEX functions (enabled in Hive and Spark libraries). + */ + @Test void testHex() { + final SqlOperatorFixture f0 = fixture().setFor(SqlLibraryOperators.HEX); + f0.checkFails("^hex(x'')^", + "No match found for function signature HEX\\(\\)", + false); + final Consumer consumer = f -> { + // test with binary + f.checkString("hex(x'00010203AAEEEFFF')", + "00010203AAEEEFFF", + "VARCHAR NOT NULL"); + f.checkString("hex(x'')", "", "VARCHAR NOT NULL"); + f.checkNull("hex(cast(null as varbinary))"); + + // test with bigint + f.checkString("hex(0)", "0", "VARCHAR NOT NULL"); + f.checkString("hex(17)", + "11", + "VARCHAR NOT NULL"); + f.checkString("hex(1234567)", "12D687", "VARCHAR NOT NULL"); + f.checkNull("hex(cast(null as varbinary))"); + + // test with varchar + f.checkString("hex('abcDEF123')", + "616263444546313233", + "VARCHAR NOT NULL"); + f.checkString("hex(_UTF8'\u4F60\u597D')", + "E4BDA0E5A5BD", + "VARCHAR NOT NULL"); + f.checkString("hex('')", "", "VARCHAR NOT NULL"); + f.checkNull("hex(cast(null as varbinary))"); + }; + f0.forEachLibrary(list(SqlLibrary.HIVE, SqlLibrary.SPARK), consumer); + } + + /** Test case for + * [CALCITE-6065] + * Add HEX and UNHEX functions (enabled in Hive and Spark libraries). + */ + @Test void testUnHex() { + final SqlOperatorFixture f0 = fixture().setFor(SqlLibraryOperators.UNHEX); + f0.checkFails("^unhex('')^", + "No match found for function signature UNHEX\\(\\)", + false); + final Consumer consumer = f -> { + f.checkString("unhex('00010203aaeeefff')", + "00010203aaeeefff", + "VARBINARY NOT NULL"); + f.checkString("unhex('00010203AAEEEFFF')", + "00010203aaeeefff", + "VARBINARY NOT NULL"); + f.checkString("unhex('666f6f626172')", + "666f6f626172", + "VARBINARY NOT NULL"); + f.checkString("unhex('666F6F626172')", + "666f6f626172", + "VARBINARY NOT NULL"); + f.checkString("unhex('')", "", "VARBINARY NOT NULL"); + + // test for invalid hexadecimal varchar + f.checkNull("unhex('r')"); + + f.checkNull("unhex(cast(null as varchar))"); + }; + f0.forEachLibrary(list(SqlLibrary.HIVE, SqlLibrary.SPARK), consumer); + } + @Test void testFromHex() { final SqlOperatorFixture f0 = fixture().setFor(SqlLibraryOperators.FROM_HEX); f0.checkFails("^from_hex('')^", @@ -4529,12 +4599,20 @@ void testBitGetFunc(SqlOperatorFixture f, String functionName) { f.checkString("from_hex('00010203aaeeefff')", "00010203aaeeefff", "VARBINARY NOT NULL"); - + f.checkString("from_hex('00010203AAEEEFFF')", + "00010203aaeeefff", + "VARBINARY NOT NULL"); f.checkString("from_hex('666f6f626172')", "666f6f626172", "VARBINARY NOT NULL"); - + f.checkString("from_hex('666F6F626172')", + "666f6f626172", + "VARBINARY NOT NULL"); f.checkString("from_hex('')", "", "VARBINARY NOT NULL"); + + // test for invalid hexadecimal varchar + f.checkFails("from_hex('r')", "Failed to decode hex string.*", true); + f.checkNull("from_hex(cast(null as varchar))"); }