Skip to content

Commit

Permalink
[Coral-Trino] Avoid accidental translation of Trino from_unixtime S…
Browse files Browse the repository at this point in the history
…QL call
  • Loading branch information
findinpath committed Oct 20, 2023
1 parent a7e33e6 commit ee93740
Show file tree
Hide file tree
Showing 5 changed files with 76 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,7 @@ public boolean isOptional(int i) {
// Date Functions
createAddUserDefinedFunction("from_unixtime", FunctionReturnTypes.STRING,
family(ImmutableList.of(SqlTypeFamily.NUMERIC, SqlTypeFamily.STRING), optionalOrd(1)));
addFunctionEntry("timestamp_from_unixtime", new TimestampFromUnixtime());
createAddUserDefinedFunction("unix_timestamp", BIGINT,
family(ImmutableList.of(SqlTypeFamily.STRING, SqlTypeFamily.STRING), optionalOrd(ImmutableList.of(0, 1))));
createAddUserDefinedFunction("to_date", FunctionReturnTypes.STRING, or(STRING, DATETIME));
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
/**
* Copyright 2023 LinkedIn Corporation. All rights reserved.
* Licensed under the BSD-2 Clause license.
* See LICENSE in the project root for license information.
*/
package com.linkedin.coral.hive.hive2rel.functions;

import org.apache.calcite.sql.SqlCallBinding;
import org.apache.calcite.sql.SqlFunctionalOperator;
import org.apache.calcite.sql.SqlKind;
import org.apache.calcite.sql.SqlNode;
import org.apache.calcite.sql.SqlOperandCountRange;
import org.apache.calcite.sql.SqlSyntax;
import org.apache.calcite.sql.type.OperandTypes;
import org.apache.calcite.sql.type.SqlOperandCountRanges;
import org.apache.calcite.sql.type.SqlTypeFamily;

import com.linkedin.coral.common.functions.FunctionReturnTypes;


public class TimestampFromUnixtime extends SqlFunctionalOperator {
public TimestampFromUnixtime() {
super("from_unixtime", SqlKind.OTHER_FUNCTION, 100, true, FunctionReturnTypes.TIMESTAMP, null, null);
}

@Override
public SqlOperandCountRange getOperandCountRange() {
return SqlOperandCountRanges.between(1, 3);
}

@Override
public boolean checkOperandTypes(SqlCallBinding callBinding, boolean throwOnFailure) {
final SqlNode firstOperand = callBinding.operand(0);
if (!OperandTypes.family(SqlTypeFamily.NUMERIC).checkSingleOperandType(callBinding, firstOperand, 0,
throwOnFailure)) {
return false;
}

if (callBinding.getOperandCount() == 2) {
final SqlNode secondOperand = callBinding.operand(1);
if (!OperandTypes.family(SqlTypeFamily.STRING).checkSingleOperandType(callBinding, secondOperand, 0,
throwOnFailure)) {
return false;
}
}

if (callBinding.getOperandCount() == 2) {
final SqlNode secondOperand = callBinding.operand(1);
final SqlNode thirdOperand = callBinding.operand(2);
if (!OperandTypes.family(SqlTypeFamily.NUMERIC).checkSingleOperandType(callBinding, secondOperand, 0,
throwOnFailure)) {
return false;
}
if (!OperandTypes.family(SqlTypeFamily.NUMERIC).checkSingleOperandType(callBinding, thirdOperand, 0,
throwOnFailure)) {
return false;
}
}

// unknown function signature
return false;
}

@Override
public SqlSyntax getSyntax() {
return SqlSyntax.FUNCTION;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
import com.linkedin.coral.trino.rel2trino.transformers.ToDateOperatorTransformer;
import com.linkedin.coral.trino.rel2trino.transformers.UnnestOperatorTransformer;

import static com.linkedin.coral.trino.rel2trino.CoralTrinoConfigKeys.*;
import static com.linkedin.coral.trino.rel2trino.CoralTrinoConfigKeys.AVOID_TRANSFORM_TO_DATE_UDF;


/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ public class FromUtcTimestampOperatorTransformer extends SqlCallTransformer {
private static final String WITH_TIMEZONE = "with_timezone";
private static final String TO_UNIXTIME = "to_unixtime";
private static final String FROM_UNIXTIME_NANOS = "from_unixtime_nanos";
private static final String FROM_UNIXTIME = "from_unixtime";
private static final String TIMESTAMP_FROM_UNIXTIME = "timestamp_from_unixtime";
private static final String CANONICALIZE_HIVE_TIMEZONE_ID = "$canonicalize_hive_timezone_id";

public FromUtcTimestampOperatorTransformer(TypeDerivationUtil typeDerivationUtil) {
Expand Down Expand Up @@ -86,7 +86,7 @@ protected SqlCall transform(SqlCall sqlCall) {
SqlOperator trinoFromUnixtimeNanos =
createSqlOperator(FROM_UNIXTIME_NANOS, explicit(TIMESTAMP /* should be WITH TIME ZONE */));
SqlOperator trinoFromUnixTime =
createSqlOperator(FROM_UNIXTIME, explicit(TIMESTAMP /* should be WITH TIME ZONE */));
createSqlOperator(TIMESTAMP_FROM_UNIXTIME, explicit(TIMESTAMP /* should be WITH TIME ZONE */));
SqlOperator trinoCanonicalizeHiveTimezoneId = createSqlOperator(CANONICALIZE_HIVE_TIMEZONE_ID, explicit(VARCHAR));

SqlCall canonicalizeHiveTimezoneIdSqlCall = trinoCanonicalizeHiveTimezoneId.createCall(ZERO, timezone);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ public Object[][] viewTestCasesProvider() {
{ "test", "get_json_object_view", "SELECT \"json_extract\"(\"tablea\".\"b\".\"b1\", '$.name')\n"
+ "FROM \"test\".\"tablea\" AS \"tablea\"" },

{ "test", "view_from_utc_timestamp", "SELECT CAST(\"at_timezone\"(\"from_unixtime_nanos\"(CAST(\"table_from_utc_timestamp\".\"a_tinyint\" AS BIGINT) * 1000000), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"from_unixtime_nanos\"(CAST(\"table_from_utc_timestamp\".\"a_smallint\" AS BIGINT) * 1000000), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"from_unixtime_nanos\"(CAST(\"table_from_utc_timestamp\".\"a_integer\" AS BIGINT) * 1000000), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"from_unixtime_nanos\"(CAST(\"table_from_utc_timestamp\".\"a_bigint\" AS BIGINT) * 1000000), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"format_datetime\"(\"from_unixtime\"(CAST(\"table_from_utc_timestamp\".\"a_float\" AS DOUBLE)), 'yyyy-MM-dd HH:mm:ss'), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"format_datetime\"(\"from_unixtime\"(CAST(\"table_from_utc_timestamp\".\"a_double\" AS DOUBLE)), 'yyyy-MM-dd HH:mm:ss'), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"format_datetime\"(\"from_unixtime\"(CAST(\"table_from_utc_timestamp\".\"a_decimal_three\" AS DOUBLE)), 'yyyy-MM-dd HH:mm:ss'), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"format_datetime\"(\"from_unixtime\"(CAST(\"table_from_utc_timestamp\".\"a_decimal_zero\" AS DOUBLE)), 'yyyy-MM-dd HH:mm:ss'), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"format_datetime\"(\"from_unixtime\"(\"to_unixtime\"(\"with_timezone\"(\"table_from_utc_timestamp\".\"a_timestamp\", 'UTC'))), 'yyyy-MM-dd HH:mm:ss'), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"format_datetime\"(\"from_unixtime\"(\"to_unixtime\"(\"with_timezone\"(\"table_from_utc_timestamp\".\"a_date\", 'UTC'))), 'yyyy-MM-dd HH:mm:ss'), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3))\n"
{ "test", "view_from_utc_timestamp", "SELECT CAST(\"at_timezone\"(\"from_unixtime_nanos\"(CAST(\"table_from_utc_timestamp\".\"a_tinyint\" AS BIGINT) * 1000000), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"from_unixtime_nanos\"(CAST(\"table_from_utc_timestamp\".\"a_smallint\" AS BIGINT) * 1000000), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"from_unixtime_nanos\"(CAST(\"table_from_utc_timestamp\".\"a_integer\" AS BIGINT) * 1000000), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"from_unixtime_nanos\"(CAST(\"table_from_utc_timestamp\".\"a_bigint\" AS BIGINT) * 1000000), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"from_unixtime\"(CAST(\"table_from_utc_timestamp\".\"a_float\" AS DOUBLE)), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"from_unixtime\"(CAST(\"table_from_utc_timestamp\".\"a_double\" AS DOUBLE)), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"from_unixtime\"(CAST(\"table_from_utc_timestamp\".\"a_decimal_three\" AS DOUBLE)), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"from_unixtime\"(CAST(\"table_from_utc_timestamp\".\"a_decimal_zero\" AS DOUBLE)), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"from_unixtime\"(\"to_unixtime\"(\"with_timezone\"(\"table_from_utc_timestamp\".\"a_timestamp\", 'UTC'))), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"from_unixtime\"(\"to_unixtime\"(\"with_timezone\"(\"table_from_utc_timestamp\".\"a_date\", 'UTC'))), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3))\n"
+ "FROM \"test\".\"table_from_utc_timestamp\" AS \"table_from_utc_timestamp\"" },

{ "test", "date_calculation_view", "SELECT \"date\"(CAST(\"substr\"('2021-08-20', 1, 10) AS TIMESTAMP)), \"date\"(CAST('2021-08-20' AS TIMESTAMP)), \"date\"(CAST('2021-08-20 00:00:00' AS TIMESTAMP)), \"date_add\"('day', 1, \"date\"(CAST('2021-08-20' AS TIMESTAMP))), \"date_add\"('day', 1, \"date\"(CAST('2021-08-20 00:00:00' AS TIMESTAMP))), \"date_add\"('day', 1 * -1, \"date\"(CAST('2021-08-20' AS TIMESTAMP))), \"date_add\"('day', 1 * -1, \"date\"(CAST('2021-08-20 00:00:00' AS TIMESTAMP))), CAST(\"date_diff\"('day', \"date\"(CAST('2021-08-21' AS TIMESTAMP)), \"date\"(CAST('2021-08-20' AS TIMESTAMP))) AS INTEGER), CAST(\"date_diff\"('day', \"date\"(CAST('2021-08-19' AS TIMESTAMP)), \"date\"(CAST('2021-08-20' AS TIMESTAMP))) AS INTEGER), CAST(\"date_diff\"('day', \"date\"(CAST('2021-08-19 23:59:59' AS TIMESTAMP)), \"date\"(CAST('2021-08-20 00:00:00' AS TIMESTAMP))) AS INTEGER)\n"
Expand Down Expand Up @@ -560,7 +560,7 @@ public void testCastNestedTimestampToDecimal() {
relNode = TestUtils.getHiveToRelConverter().convertSql(
"SELECT CAST(from_utc_timestamp(a_date, 'America/Los_Angeles') AS DECIMAL(10, 0)) AS d\nFROM test.table_from_utc_timestamp");
targetSql =
"SELECT CAST(\"to_unixtime\"(\"with_timezone\"(CAST(\"at_timezone\"(\"format_datetime\"(\"from_unixtime\"(\"to_unixtime\"(\"with_timezone\"(\"table_from_utc_timestamp0\".\"a_date\", 'UTC'))), 'yyyy-MM-dd HH:mm:ss'), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), 'UTC')) AS DECIMAL(10, 0)) AS \"d\"\n"
"SELECT CAST(\"to_unixtime\"(\"with_timezone\"(CAST(\"at_timezone\"(\"from_unixtime\"(\"to_unixtime\"(\"with_timezone\"(\"table_from_utc_timestamp0\".\"a_date\", 'UTC'))), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), 'UTC')) AS DECIMAL(10, 0)) AS \"d\"\n"
+ "FROM \"test\".\"table_from_utc_timestamp\" AS \"table_from_utc_timestamp0\"";
expandedSql = relToTrinoConverter.convert(relNode);
assertEquals(expandedSql, targetSql);
Expand All @@ -581,15 +581,15 @@ public void testSubstrWithTimestampOperator() {
relNode = TestUtils.getHiveToRelConverter().convertSql(
"SELECT substring(from_utc_timestamp(a_decimal_three,'PST'),1,10) AS d\nFROM test.table_from_utc_timestamp");
targetSql =
"SELECT \"substr\"(CAST(CAST(\"at_timezone\"(CAST(\"format_datetime\"(\"from_unixtime\"(CAST(\"table_from_utc_timestamp0\".\"a_decimal_three\" AS DOUBLE)), 'yyyy-MM-dd HH:mm:ss') AS TIMESTAMP), \"$canonicalize_hive_timezone_id\"('PST')) AS TIMESTAMP(3)) AS VARCHAR(65535)), 1, 10) AS \"d\"\n"
"SELECT \"substr\"(CAST(CAST(\"at_timezone\"(\"from_unixtime\"(CAST(\"table_from_utc_timestamp0\".\"a_decimal_three\" AS DOUBLE)), \"$canonicalize_hive_timezone_id\"('PST')) AS TIMESTAMP(3)) AS VARCHAR(65535)), 1, 10) AS \"d\"\n"
+ "FROM \"test\".\"table_from_utc_timestamp\" AS \"table_from_utc_timestamp0\"";
expandedSql = relToTrinoConverter.convert(relNode);
assertEquals(expandedSql, targetSql);

relNode = TestUtils.getHiveToRelConverter().convertSql(
"SELECT substring(from_utc_timestamp(a_timestamp,'PST'),1,10) AS d\nFROM test.table_from_utc_timestamp");
targetSql =
"SELECT \"substr\"(CAST(CAST(\"at_timezone\"(CAST(\"format_datetime\"(\"from_unixtime\"(\"to_unixtime\"(\"with_timezone\"(\"table_from_utc_timestamp1\".\"a_timestamp\", 'UTC'))), 'yyyy-MM-dd HH:mm:ss') AS TIMESTAMP), \"$canonicalize_hive_timezone_id\"('PST')) AS TIMESTAMP(3)) AS VARCHAR(65535)), 1, 10) AS \"d\"\n"
"SELECT \"substr\"(CAST(CAST(\"at_timezone\"(\"from_unixtime\"(\"to_unixtime\"(\"with_timezone\"(\"table_from_utc_timestamp1\".\"a_timestamp\", 'UTC'))), \"$canonicalize_hive_timezone_id\"('PST')) AS TIMESTAMP(3)) AS VARCHAR(65535)), 1, 10) AS \"d\"\n"
+ "FROM \"test\".\"table_from_utc_timestamp\" AS \"table_from_utc_timestamp1\"";
expandedSql = relToTrinoConverter.convert(relNode);
assertEquals(expandedSql, targetSql);
Expand Down

0 comments on commit ee93740

Please sign in to comment.