Skip to content

Commit

Permalink
[CALCITE-6065] Add HEX and UNHEX functions (enabled in Hive and Spark…
Browse files Browse the repository at this point in the history
… libraries)
  • Loading branch information
herunkang2018 committed Oct 30, 2023
1 parent c83ac69 commit 3c78776
Show file tree
Hide file tree
Showing 6 changed files with 140 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,7 @@
import static org.apache.calcite.sql.fun.SqlLibraryOperators.FROM_BASE64;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.FROM_HEX;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.GETBIT;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.HEX;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.ILIKE;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.IS_INF;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.IS_NAN;
Expand Down Expand Up @@ -272,6 +273,7 @@
import static org.apache.calcite.sql.fun.SqlLibraryOperators.TRANSLATE3;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.TRUNC;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.TRY_CAST;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.UNHEX;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.UNIX_DATE;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.UNIX_MICROS;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.UNIX_MILLIS;
Expand Down Expand Up @@ -523,7 +525,9 @@ Builder populate() {
defineMethod(FROM_BASE64, BuiltInMethod.FROM_BASE64.method, NullPolicy.STRICT);
defineMethod(TO_BASE32, BuiltInMethod.TO_BASE32.method, NullPolicy.STRICT);
defineMethod(FROM_BASE32, BuiltInMethod.FROM_BASE32.method, NullPolicy.STRICT);
defineMethod(HEX, BuiltInMethod.HEX.method, NullPolicy.STRICT);
defineMethod(TO_HEX, BuiltInMethod.TO_HEX.method, NullPolicy.STRICT);
defineMethod(UNHEX, BuiltInMethod.UNHEX.method, NullPolicy.STRICT);
defineMethod(FROM_HEX, BuiltInMethod.FROM_HEX.method, NullPolicy.STRICT);
defineMethod(MD5, BuiltInMethod.MD5.method, NullPolicy.STRICT);
defineMethod(SHA1, BuiltInMethod.SHA1.method, NullPolicy.STRICT);
Expand Down
33 changes: 32 additions & 1 deletion core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java
Original file line number Diff line number Diff line change
Expand Up @@ -313,18 +313,49 @@ public static ByteString fromBase32(String base32) {
/** SQL FROM_HEX(varchar) function. */
public static ByteString fromHex(String hex) {
try {
return new ByteString(Hex.decodeHex(hex));
return fromHexHelper(hex);
} catch (DecoderException e) {
throw new IllegalArgumentException(
String.format(Locale.ROOT, "Failed to decode hex string: %s", hex), e);
}
}

/** SQL UNHEX(varchar) function. */
public static @Nullable ByteString unHex(String hex) {
try {
return fromHexHelper(hex);
} catch (DecoderException e) {
return null;
}
}

private static ByteString fromHexHelper(String hex) throws DecoderException {
if (hex.length() % 2 == 1) {
hex = "0" + hex;
}
return new ByteString(Hex.decodeHex(hex));
}

/** SQL TO_HEX(binary) function. */
public static String toHex(ByteString byteString) {
return Hex.encodeHexString(byteString.getBytes());
}

/** SQL HEX(binary) function. */
public static String hex(ByteString value) {
return toHex(value).toUpperCase(Locale.ROOT);
}

/** SQL HEX(bigint) function. */
public static String hex(long value) {
return Long.toHexString(value).toUpperCase(Locale.ROOT);
}

/** SQL HEX(varchar) function. */
public static String hex(String value) {
return Hex.encodeHexString(value.getBytes(UTF_8)).toUpperCase(Locale.ROOT);
}

/** SQL MD5(string) function. */
public static String md5(String string) {
return DigestUtils.md5Hex(string.getBytes(UTF_8));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1454,12 +1454,18 @@ private static RelDataType deriveTypeMapFromEntries(SqlOperatorBinding opBinding
* The "FROM_HEX(varchar)" function; converts a hexadecimal-encoded {@code varchar} into bytes.
*/
@LibraryOperator(libraries = {BIG_QUERY})
public static final SqlFunction FROM_HEX =
public static final SqlBasicFunction FROM_HEX =
SqlBasicFunction.create("FROM_HEX",
ReturnTypes.VARBINARY_NULLABLE,
OperandTypes.CHARACTER,
SqlFunctionCategory.STRING);

/** The "UNHEX(varchar)" function, Hive and Spark's
* equivalent to {@link #FROM_HEX}. */
@LibraryOperator(libraries = {HIVE, SPARK})
public static final SqlFunction UNHEX =
FROM_HEX.withName("UNHEX");

/**
* The "TO_HEX(binary)" function; converts {@code binary} into a hexadecimal varchar.
*/
Expand All @@ -1470,6 +1476,16 @@ private static RelDataType deriveTypeMapFromEntries(SqlOperatorBinding opBinding
OperandTypes.BINARY,
SqlFunctionCategory.STRING);

/**
* The "HEX(binary or bigint or varchar)" function.
*/
@LibraryOperator(libraries = {HIVE, SPARK})
public static final SqlFunction HEX =
SqlBasicFunction.create("HEX",
ReturnTypes.VARCHAR_NULLABLE,
OperandTypes.BINARY.or(OperandTypes.INTEGER).or(OperandTypes.CHARACTER),
SqlFunctionCategory.STRING);

/** The "FORMAT_NUMBER(value, decimalOrFormat)" function. */
@LibraryOperator(libraries = {HIVE, SPARK})
public static final SqlFunction FORMAT_NUMBER =
Expand Down
2 changes: 2 additions & 0 deletions core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
Original file line number Diff line number Diff line change
Expand Up @@ -383,7 +383,9 @@ public enum BuiltInMethod {
FROM_BASE64(SqlFunctions.class, "fromBase64", String.class),
TO_BASE32(SqlFunctions.class, "toBase32", String.class),
FROM_BASE32(SqlFunctions.class, "fromBase32", String.class),
HEX(SqlFunctions.class, "hex", ByteString.class),
TO_HEX(SqlFunctions.class, "toHex", ByteString.class),
UNHEX(SqlFunctions.class, "unHex", String.class),
FROM_HEX(SqlFunctions.class, "fromHex", String.class),
MD5(SqlFunctions.class, "md5", String.class),
SHA1(SqlFunctions.class, "sha1", String.class),
Expand Down
6 changes: 5 additions & 1 deletion site/_docs/reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -2739,6 +2739,9 @@ BigQuery's type system uses confusingly different names for types and functions:
| b | FORMAT_TIMESTAMP(string timestamp) | Formats *timestamp* according to the specified format *string*
| s | GETBIT(value, position) | Equivalent to `BIT_GET(value, position)`
| b o | GREATEST(expr [, expr ]*) | Returns the greatest of the expressions
| h s | HEX(binary) | Converts *binary* into a hexadecimal string. For example, hex(x'6162') returns '6162'
| h s | HEX(bigint) | Converts *bigint* into a shortened hexadecimal string without leading zeros. For example, hex(10) returns 'A'
| h s | HEX(string) | Converts *string* into a hexadecimal string. It converts each character of *string* into its hexadecimal representation. For example, hex('ab') returns '6162'
| b h s | IF(condition, value1, value2) | Returns *value1* if *condition* is TRUE, *value2* otherwise
| b | IFNULL(value1, value2) | Equivalent to `NVL(value1, value2)`
| p | string1 ILIKE string2 [ ESCAPE string3 ] | Whether *string1* matches pattern *string2*, ignoring case (similar to `LIKE`)
Expand Down Expand Up @@ -2768,7 +2771,7 @@ BigQuery's type system uses confusingly different names for types and functions:
| m | TO_BASE64(string) | Converts the *string* to base-64 encoded form and returns a encoded string
| b m | FROM_BASE64(string) | Returns the decoded result of a base-64 *string* as a string
| b | TO_HEX(binary) | Converts *binary* into a hexadecimal varchar
| b | FROM_HEX(varchar) | Converts a hexadecimal-encoded *varchar* into bytes
| b | FROM_HEX(string) | Converts a hexadecimal-encoded *string* into bytes; throws if *string* is not a valid hexadecimal string
| b o | LTRIM(string) | Returns *string* with all blanks removed from the start
| s | MAP_CONCAT(map [, map]*) | Concatenates one or more maps. If any input argument is `NULL` the function returns `NULL`. Note that calcite is using the LAST_WIN strategy
| s | MAP_ENTRIES(map) | Returns the entries of the *map* as an array, the order of the entries is not defined
Expand Down Expand Up @@ -2852,6 +2855,7 @@ BigQuery's type system uses confusingly different names for types and functions:
| b o p | TRANSLATE(expr, fromString, toString) | Returns *expr* with all occurrences of each character in *fromString* replaced by its corresponding character in *toString*. Characters in *expr* that are not in *fromString* are not replaced
| b | TRUNC(numeric1 [, numeric2 ]) | Truncates *numeric1* to optionally *numeric2* (if not specified 0) places right to the decimal point
| q | TRY_CAST(value AS type) | Converts *value* to *type*, returning NULL if conversion fails
| h s | UNHEX(string) | Converts a hexadecimal-encoded *string* into bytes; returns NULL if *string* is not a valid hexadecimal string
| b | UNIX_MICROS(timestamp) | Returns the number of microseconds since 1970-01-01 00:00:00
| b | UNIX_MILLIS(timestamp) | Returns the number of milliseconds since 1970-01-01 00:00:00
| b | UNIX_SECONDS(timestamp) | Returns the number of seconds since 1970-01-01 00:00:00
Expand Down
82 changes: 80 additions & 2 deletions testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -4520,6 +4520,76 @@ void testBitGetFunc(SqlOperatorFixture f, String functionName) {
f.checkNull("to_hex(cast(null as varbinary))");
}

/** Test case for
* <a href="https://issues.apache.org/jira/browse/CALCITE-6065">[CALCITE-6065]
* Add HEX and UNHEX functions (enabled in Hive and Spark libraries)</a>.
*/
@Test void testHex() {
final SqlOperatorFixture f0 = fixture().setFor(SqlLibraryOperators.HEX);
f0.checkFails("^hex(x'')^",
"No match found for function signature HEX\\(<BINARY>\\)",
false);
final Consumer<SqlOperatorFixture> consumer = f -> {
// test with binary
f.checkString("hex(x'00010203AAEEEFFF')",
"00010203AAEEEFFF",
"VARCHAR NOT NULL");
f.checkString("hex(x'')", "", "VARCHAR NOT NULL");
f.checkNull("hex(cast(null as varbinary))");

// test with bigint
f.checkString("hex(0)", "0", "VARCHAR NOT NULL");
f.checkString("hex(17)",
"11",
"VARCHAR NOT NULL");
f.checkString("hex(1234567)", "12D687", "VARCHAR NOT NULL");
f.checkNull("hex(cast(null as varbinary))");

// test with varchar
f.checkString("hex('abcDEF123')",
"616263444546313233",
"VARCHAR NOT NULL");
f.checkString("hex(_UTF8'\u4F60\u597D')",
"E4BDA0E5A5BD",
"VARCHAR NOT NULL");
f.checkString("hex('')", "", "VARCHAR NOT NULL");
f.checkNull("hex(cast(null as varbinary))");
};
f0.forEachLibrary(list(SqlLibrary.HIVE, SqlLibrary.SPARK), consumer);
}

/** Test case for
* <a href="https://issues.apache.org/jira/browse/CALCITE-6065">[CALCITE-6065]
* Add HEX and UNHEX functions (enabled in Hive and Spark libraries)</a>.
*/
@Test void testUnHex() {
final SqlOperatorFixture f0 = fixture().setFor(SqlLibraryOperators.UNHEX);
f0.checkFails("^unhex('')^",
"No match found for function signature UNHEX\\(<CHARACTER>\\)",
false);
final Consumer<SqlOperatorFixture> consumer = f -> {
f.checkString("unhex('00010203aaeeefff')",
"00010203aaeeefff",
"VARBINARY NOT NULL");
f.checkString("unhex('00010203AAEEEFFF')",
"00010203aaeeefff",
"VARBINARY NOT NULL");
f.checkString("unhex('666f6f626172')",
"666f6f626172",
"VARBINARY NOT NULL");
f.checkString("unhex('666F6F626172')",
"666f6f626172",
"VARBINARY NOT NULL");
f.checkString("unhex('')", "", "VARBINARY NOT NULL");

// test for invalid hexadecimal varchar
f.checkNull("unhex('r')");

f.checkNull("unhex(cast(null as varchar))");
};
f0.forEachLibrary(list(SqlLibrary.HIVE, SqlLibrary.SPARK), consumer);
}

@Test void testFromHex() {
final SqlOperatorFixture f0 = fixture().setFor(SqlLibraryOperators.FROM_HEX);
f0.checkFails("^from_hex('')^",
Expand All @@ -4529,12 +4599,20 @@ void testBitGetFunc(SqlOperatorFixture f, String functionName) {
f.checkString("from_hex('00010203aaeeefff')",
"00010203aaeeefff",
"VARBINARY NOT NULL");

f.checkString("from_hex('00010203AAEEEFFF')",
"00010203aaeeefff",
"VARBINARY NOT NULL");
f.checkString("from_hex('666f6f626172')",
"666f6f626172",
"VARBINARY NOT NULL");

f.checkString("from_hex('666F6F626172')",
"666f6f626172",
"VARBINARY NOT NULL");
f.checkString("from_hex('')", "", "VARBINARY NOT NULL");

// test for invalid hexadecimal varchar
f.checkFails("from_hex('r')", "Failed to decode hex string.*", true);

f.checkNull("from_hex(cast(null as varchar))");
}

Expand Down

0 comments on commit 3c78776

Please sign in to comment.