From ee937400ae7e7003113da1e4a912c452309d24fc Mon Sep 17 00:00:00 2001 From: Marius Grama Date: Sun, 15 Oct 2023 22:23:50 +0200 Subject: [PATCH] [Coral-Trino] Avoid accidental translation of Trino `from_unixtime` SQL call --- .../functions/StaticHiveFunctionRegistry.java | 1 + .../functions/TimestampFromUnixtime.java | 68 +++++++++++++++++++ .../CoralToTrinoSqlCallConverter.java | 2 +- .../FromUtcTimestampOperatorTransformer.java | 4 +- .../rel2trino/HiveToTrinoConverterTest.java | 8 +-- 5 files changed, 76 insertions(+), 7 deletions(-) create mode 100644 coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/functions/TimestampFromUnixtime.java diff --git a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/functions/StaticHiveFunctionRegistry.java b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/functions/StaticHiveFunctionRegistry.java index baba18be3..5f99e7067 100644 --- a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/functions/StaticHiveFunctionRegistry.java +++ b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/functions/StaticHiveFunctionRegistry.java @@ -320,6 +320,7 @@ public boolean isOptional(int i) { // Date Functions createAddUserDefinedFunction("from_unixtime", FunctionReturnTypes.STRING, family(ImmutableList.of(SqlTypeFamily.NUMERIC, SqlTypeFamily.STRING), optionalOrd(1))); + addFunctionEntry("timestamp_from_unixtime", new TimestampFromUnixtime()); createAddUserDefinedFunction("unix_timestamp", BIGINT, family(ImmutableList.of(SqlTypeFamily.STRING, SqlTypeFamily.STRING), optionalOrd(ImmutableList.of(0, 1)))); createAddUserDefinedFunction("to_date", FunctionReturnTypes.STRING, or(STRING, DATETIME)); diff --git a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/functions/TimestampFromUnixtime.java b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/functions/TimestampFromUnixtime.java new file mode 100644 index 000000000..832cd5e35 --- /dev/null +++ b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/functions/TimestampFromUnixtime.java @@ -0,0 +1,68 @@ +/** + * Copyright 2023 LinkedIn Corporation. All rights reserved. + * Licensed under the BSD-2 Clause license. + * See LICENSE in the project root for license information. + */ +package com.linkedin.coral.hive.hive2rel.functions; + +import org.apache.calcite.sql.SqlCallBinding; +import org.apache.calcite.sql.SqlFunctionalOperator; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.sql.SqlOperandCountRange; +import org.apache.calcite.sql.SqlSyntax; +import org.apache.calcite.sql.type.OperandTypes; +import org.apache.calcite.sql.type.SqlOperandCountRanges; +import org.apache.calcite.sql.type.SqlTypeFamily; + +import com.linkedin.coral.common.functions.FunctionReturnTypes; + + +public class TimestampFromUnixtime extends SqlFunctionalOperator { + public TimestampFromUnixtime() { + super("from_unixtime", SqlKind.OTHER_FUNCTION, 100, true, FunctionReturnTypes.TIMESTAMP, null, null); + } + + @Override + public SqlOperandCountRange getOperandCountRange() { + return SqlOperandCountRanges.between(1, 3); + } + + @Override + public boolean checkOperandTypes(SqlCallBinding callBinding, boolean throwOnFailure) { + final SqlNode firstOperand = callBinding.operand(0); + if (!OperandTypes.family(SqlTypeFamily.NUMERIC).checkSingleOperandType(callBinding, firstOperand, 0, + throwOnFailure)) { + return false; + } + + if (callBinding.getOperandCount() == 2) { + final SqlNode secondOperand = callBinding.operand(1); + if (!OperandTypes.family(SqlTypeFamily.STRING).checkSingleOperandType(callBinding, secondOperand, 0, + throwOnFailure)) { + return false; + } + } + + if (callBinding.getOperandCount() == 2) { + final SqlNode secondOperand = callBinding.operand(1); + final SqlNode thirdOperand = callBinding.operand(2); + if (!OperandTypes.family(SqlTypeFamily.NUMERIC).checkSingleOperandType(callBinding, secondOperand, 0, + throwOnFailure)) { + return false; + } + if (!OperandTypes.family(SqlTypeFamily.NUMERIC).checkSingleOperandType(callBinding, thirdOperand, 0, + throwOnFailure)) { + return false; + } + } + + // unknown function signature + return false; + } + + @Override + public SqlSyntax getSyntax() { + return SqlSyntax.FUNCTION; + } +} diff --git a/coral-trino/src/main/java/com/linkedin/coral/trino/rel2trino/CoralToTrinoSqlCallConverter.java b/coral-trino/src/main/java/com/linkedin/coral/trino/rel2trino/CoralToTrinoSqlCallConverter.java index 71626def8..89d5cfc33 100644 --- a/coral-trino/src/main/java/com/linkedin/coral/trino/rel2trino/CoralToTrinoSqlCallConverter.java +++ b/coral-trino/src/main/java/com/linkedin/coral/trino/rel2trino/CoralToTrinoSqlCallConverter.java @@ -37,7 +37,7 @@ import com.linkedin.coral.trino.rel2trino.transformers.ToDateOperatorTransformer; import com.linkedin.coral.trino.rel2trino.transformers.UnnestOperatorTransformer; -import static com.linkedin.coral.trino.rel2trino.CoralTrinoConfigKeys.*; +import static com.linkedin.coral.trino.rel2trino.CoralTrinoConfigKeys.AVOID_TRANSFORM_TO_DATE_UDF; /** diff --git a/coral-trino/src/main/java/com/linkedin/coral/trino/rel2trino/transformers/FromUtcTimestampOperatorTransformer.java b/coral-trino/src/main/java/com/linkedin/coral/trino/rel2trino/transformers/FromUtcTimestampOperatorTransformer.java index 7d7535cd1..bb8230da8 100644 --- a/coral-trino/src/main/java/com/linkedin/coral/trino/rel2trino/transformers/FromUtcTimestampOperatorTransformer.java +++ b/coral-trino/src/main/java/com/linkedin/coral/trino/rel2trino/transformers/FromUtcTimestampOperatorTransformer.java @@ -54,7 +54,7 @@ public class FromUtcTimestampOperatorTransformer extends SqlCallTransformer { private static final String WITH_TIMEZONE = "with_timezone"; private static final String TO_UNIXTIME = "to_unixtime"; private static final String FROM_UNIXTIME_NANOS = "from_unixtime_nanos"; - private static final String FROM_UNIXTIME = "from_unixtime"; + private static final String TIMESTAMP_FROM_UNIXTIME = "timestamp_from_unixtime"; private static final String CANONICALIZE_HIVE_TIMEZONE_ID = "$canonicalize_hive_timezone_id"; public FromUtcTimestampOperatorTransformer(TypeDerivationUtil typeDerivationUtil) { @@ -86,7 +86,7 @@ protected SqlCall transform(SqlCall sqlCall) { SqlOperator trinoFromUnixtimeNanos = createSqlOperator(FROM_UNIXTIME_NANOS, explicit(TIMESTAMP /* should be WITH TIME ZONE */)); SqlOperator trinoFromUnixTime = - createSqlOperator(FROM_UNIXTIME, explicit(TIMESTAMP /* should be WITH TIME ZONE */)); + createSqlOperator(TIMESTAMP_FROM_UNIXTIME, explicit(TIMESTAMP /* should be WITH TIME ZONE */)); SqlOperator trinoCanonicalizeHiveTimezoneId = createSqlOperator(CANONICALIZE_HIVE_TIMEZONE_ID, explicit(VARCHAR)); SqlCall canonicalizeHiveTimezoneIdSqlCall = trinoCanonicalizeHiveTimezoneId.createCall(ZERO, timezone); diff --git a/coral-trino/src/test/java/com/linkedin/coral/trino/rel2trino/HiveToTrinoConverterTest.java b/coral-trino/src/test/java/com/linkedin/coral/trino/rel2trino/HiveToTrinoConverterTest.java index 0c5f8c91b..652267d8a 100644 --- a/coral-trino/src/test/java/com/linkedin/coral/trino/rel2trino/HiveToTrinoConverterTest.java +++ b/coral-trino/src/test/java/com/linkedin/coral/trino/rel2trino/HiveToTrinoConverterTest.java @@ -159,7 +159,7 @@ public Object[][] viewTestCasesProvider() { { "test", "get_json_object_view", "SELECT \"json_extract\"(\"tablea\".\"b\".\"b1\", '$.name')\n" + "FROM \"test\".\"tablea\" AS \"tablea\"" }, - { "test", "view_from_utc_timestamp", "SELECT CAST(\"at_timezone\"(\"from_unixtime_nanos\"(CAST(\"table_from_utc_timestamp\".\"a_tinyint\" AS BIGINT) * 1000000), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"from_unixtime_nanos\"(CAST(\"table_from_utc_timestamp\".\"a_smallint\" AS BIGINT) * 1000000), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"from_unixtime_nanos\"(CAST(\"table_from_utc_timestamp\".\"a_integer\" AS BIGINT) * 1000000), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"from_unixtime_nanos\"(CAST(\"table_from_utc_timestamp\".\"a_bigint\" AS BIGINT) * 1000000), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"format_datetime\"(\"from_unixtime\"(CAST(\"table_from_utc_timestamp\".\"a_float\" AS DOUBLE)), 'yyyy-MM-dd HH:mm:ss'), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"format_datetime\"(\"from_unixtime\"(CAST(\"table_from_utc_timestamp\".\"a_double\" AS DOUBLE)), 'yyyy-MM-dd HH:mm:ss'), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"format_datetime\"(\"from_unixtime\"(CAST(\"table_from_utc_timestamp\".\"a_decimal_three\" AS DOUBLE)), 'yyyy-MM-dd HH:mm:ss'), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"format_datetime\"(\"from_unixtime\"(CAST(\"table_from_utc_timestamp\".\"a_decimal_zero\" AS DOUBLE)), 'yyyy-MM-dd HH:mm:ss'), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"format_datetime\"(\"from_unixtime\"(\"to_unixtime\"(\"with_timezone\"(\"table_from_utc_timestamp\".\"a_timestamp\", 'UTC'))), 'yyyy-MM-dd HH:mm:ss'), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"format_datetime\"(\"from_unixtime\"(\"to_unixtime\"(\"with_timezone\"(\"table_from_utc_timestamp\".\"a_date\", 'UTC'))), 'yyyy-MM-dd HH:mm:ss'), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3))\n" + { "test", "view_from_utc_timestamp", "SELECT CAST(\"at_timezone\"(\"from_unixtime_nanos\"(CAST(\"table_from_utc_timestamp\".\"a_tinyint\" AS BIGINT) * 1000000), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"from_unixtime_nanos\"(CAST(\"table_from_utc_timestamp\".\"a_smallint\" AS BIGINT) * 1000000), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"from_unixtime_nanos\"(CAST(\"table_from_utc_timestamp\".\"a_integer\" AS BIGINT) * 1000000), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"from_unixtime_nanos\"(CAST(\"table_from_utc_timestamp\".\"a_bigint\" AS BIGINT) * 1000000), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"from_unixtime\"(CAST(\"table_from_utc_timestamp\".\"a_float\" AS DOUBLE)), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"from_unixtime\"(CAST(\"table_from_utc_timestamp\".\"a_double\" AS DOUBLE)), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"from_unixtime\"(CAST(\"table_from_utc_timestamp\".\"a_decimal_three\" AS DOUBLE)), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"from_unixtime\"(CAST(\"table_from_utc_timestamp\".\"a_decimal_zero\" AS DOUBLE)), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"from_unixtime\"(\"to_unixtime\"(\"with_timezone\"(\"table_from_utc_timestamp\".\"a_timestamp\", 'UTC'))), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"from_unixtime\"(\"to_unixtime\"(\"with_timezone\"(\"table_from_utc_timestamp\".\"a_date\", 'UTC'))), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3))\n" + "FROM \"test\".\"table_from_utc_timestamp\" AS \"table_from_utc_timestamp\"" }, { "test", "date_calculation_view", "SELECT \"date\"(CAST(\"substr\"('2021-08-20', 1, 10) AS TIMESTAMP)), \"date\"(CAST('2021-08-20' AS TIMESTAMP)), \"date\"(CAST('2021-08-20 00:00:00' AS TIMESTAMP)), \"date_add\"('day', 1, \"date\"(CAST('2021-08-20' AS TIMESTAMP))), \"date_add\"('day', 1, \"date\"(CAST('2021-08-20 00:00:00' AS TIMESTAMP))), \"date_add\"('day', 1 * -1, \"date\"(CAST('2021-08-20' AS TIMESTAMP))), \"date_add\"('day', 1 * -1, \"date\"(CAST('2021-08-20 00:00:00' AS TIMESTAMP))), CAST(\"date_diff\"('day', \"date\"(CAST('2021-08-21' AS TIMESTAMP)), \"date\"(CAST('2021-08-20' AS TIMESTAMP))) AS INTEGER), CAST(\"date_diff\"('day', \"date\"(CAST('2021-08-19' AS TIMESTAMP)), \"date\"(CAST('2021-08-20' AS TIMESTAMP))) AS INTEGER), CAST(\"date_diff\"('day', \"date\"(CAST('2021-08-19 23:59:59' AS TIMESTAMP)), \"date\"(CAST('2021-08-20 00:00:00' AS TIMESTAMP))) AS INTEGER)\n" @@ -560,7 +560,7 @@ public void testCastNestedTimestampToDecimal() { relNode = TestUtils.getHiveToRelConverter().convertSql( "SELECT CAST(from_utc_timestamp(a_date, 'America/Los_Angeles') AS DECIMAL(10, 0)) AS d\nFROM test.table_from_utc_timestamp"); targetSql = - "SELECT CAST(\"to_unixtime\"(\"with_timezone\"(CAST(\"at_timezone\"(\"format_datetime\"(\"from_unixtime\"(\"to_unixtime\"(\"with_timezone\"(\"table_from_utc_timestamp0\".\"a_date\", 'UTC'))), 'yyyy-MM-dd HH:mm:ss'), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), 'UTC')) AS DECIMAL(10, 0)) AS \"d\"\n" + "SELECT CAST(\"to_unixtime\"(\"with_timezone\"(CAST(\"at_timezone\"(\"from_unixtime\"(\"to_unixtime\"(\"with_timezone\"(\"table_from_utc_timestamp0\".\"a_date\", 'UTC'))), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), 'UTC')) AS DECIMAL(10, 0)) AS \"d\"\n" + "FROM \"test\".\"table_from_utc_timestamp\" AS \"table_from_utc_timestamp0\""; expandedSql = relToTrinoConverter.convert(relNode); assertEquals(expandedSql, targetSql); @@ -581,7 +581,7 @@ public void testSubstrWithTimestampOperator() { relNode = TestUtils.getHiveToRelConverter().convertSql( "SELECT substring(from_utc_timestamp(a_decimal_three,'PST'),1,10) AS d\nFROM test.table_from_utc_timestamp"); targetSql = - "SELECT \"substr\"(CAST(CAST(\"at_timezone\"(CAST(\"format_datetime\"(\"from_unixtime\"(CAST(\"table_from_utc_timestamp0\".\"a_decimal_three\" AS DOUBLE)), 'yyyy-MM-dd HH:mm:ss') AS TIMESTAMP), \"$canonicalize_hive_timezone_id\"('PST')) AS TIMESTAMP(3)) AS VARCHAR(65535)), 1, 10) AS \"d\"\n" + "SELECT \"substr\"(CAST(CAST(\"at_timezone\"(\"from_unixtime\"(CAST(\"table_from_utc_timestamp0\".\"a_decimal_three\" AS DOUBLE)), \"$canonicalize_hive_timezone_id\"('PST')) AS TIMESTAMP(3)) AS VARCHAR(65535)), 1, 10) AS \"d\"\n" + "FROM \"test\".\"table_from_utc_timestamp\" AS \"table_from_utc_timestamp0\""; expandedSql = relToTrinoConverter.convert(relNode); assertEquals(expandedSql, targetSql); @@ -589,7 +589,7 @@ public void testSubstrWithTimestampOperator() { relNode = TestUtils.getHiveToRelConverter().convertSql( "SELECT substring(from_utc_timestamp(a_timestamp,'PST'),1,10) AS d\nFROM test.table_from_utc_timestamp"); targetSql = - "SELECT \"substr\"(CAST(CAST(\"at_timezone\"(CAST(\"format_datetime\"(\"from_unixtime\"(\"to_unixtime\"(\"with_timezone\"(\"table_from_utc_timestamp1\".\"a_timestamp\", 'UTC'))), 'yyyy-MM-dd HH:mm:ss') AS TIMESTAMP), \"$canonicalize_hive_timezone_id\"('PST')) AS TIMESTAMP(3)) AS VARCHAR(65535)), 1, 10) AS \"d\"\n" + "SELECT \"substr\"(CAST(CAST(\"at_timezone\"(\"from_unixtime\"(\"to_unixtime\"(\"with_timezone\"(\"table_from_utc_timestamp1\".\"a_timestamp\", 'UTC'))), \"$canonicalize_hive_timezone_id\"('PST')) AS TIMESTAMP(3)) AS VARCHAR(65535)), 1, 10) AS \"d\"\n" + "FROM \"test\".\"table_from_utc_timestamp\" AS \"table_from_utc_timestamp1\""; expandedSql = relToTrinoConverter.convert(relNode); assertEquals(expandedSql, targetSql);