From e09015e954ab2ff0e0ac6c7e62d6744b5d818f24 Mon Sep 17 00:00:00 2001 From: Kevin Ge Date: Thu, 11 Apr 2024 13:35:09 -0400 Subject: [PATCH] [Coral-Trino] Migrate cast transformation from RelNode layer to SqlNode layer (#491) * Initial commit for migrating Cast * add type derivation changes * type derivation enhance and test debug * remove testing files * spotlessapply + comment out failing unit test * update unit tests * fix regression by TypeDerivationUtil on casts with join on simple aliases * generalize case of casting with join on simple aliases * remove extraneous input to dummy select * temporary removal of type util postprocessor for i-test * revert * spotless checks * update calcite version for linkedin-calcite #98/#99 * remove Calcite2TrinoUDFConverter * clean up tests * unused imports * spotless * improve javadoc/documentation * fix javadoc * improve javadoc * grammar * refactor + clean up --------- Co-authored-by: aastha25 --- .../linkedin/coral/common/ToRelConverter.java | 2 +- .../common/utils/TypeDerivationUtil.java | 7 +- .../rel2trino/Calcite2TrinoUDFConverter.java | 210 ------------------ .../DataTypeDerivedSqlCallConverter.java | 5 +- .../trino/rel2trino/RelToTrinoConverter.java | 9 +- .../transformers/AsOperatorTransformer.java | 2 +- .../transformers/CastOperatorTransformer.java | 104 +++++++++ .../FromUtcTimestampOperatorTransformer.java | 4 +- .../rel2trino/HiveToTrinoConverterTest.java | 17 +- .../coral/trino/rel2trino/TestUtils.java | 3 +- gradle/dependencies.gradle | 2 +- 11 files changed, 137 insertions(+), 228 deletions(-) delete mode 100644 coral-trino/src/main/java/com/linkedin/coral/trino/rel2trino/Calcite2TrinoUDFConverter.java create mode 100644 coral-trino/src/main/java/com/linkedin/coral/trino/rel2trino/transformers/CastOperatorTransformer.java diff --git a/coral-common/src/main/java/com/linkedin/coral/common/ToRelConverter.java b/coral-common/src/main/java/com/linkedin/coral/common/ToRelConverter.java index 26475da78..067a502bb 100644 --- a/coral-common/src/main/java/com/linkedin/coral/common/ToRelConverter.java +++ b/coral-common/src/main/java/com/linkedin/coral/common/ToRelConverter.java @@ -1,5 +1,5 @@ /** - * Copyright 2017-2023 LinkedIn Corporation. All rights reserved. + * Copyright 2017-2024 LinkedIn Corporation. All rights reserved. * Licensed under the BSD-2 Clause license. * See LICENSE in the project root for license information. */ diff --git a/coral-common/src/main/java/com/linkedin/coral/common/utils/TypeDerivationUtil.java b/coral-common/src/main/java/com/linkedin/coral/common/utils/TypeDerivationUtil.java index e0ba27c51..684ad668c 100644 --- a/coral-common/src/main/java/com/linkedin/coral/common/utils/TypeDerivationUtil.java +++ b/coral-common/src/main/java/com/linkedin/coral/common/utils/TypeDerivationUtil.java @@ -1,5 +1,5 @@ /** - * Copyright 2023 LinkedIn Corporation. All rights reserved. + * Copyright 2023-2024 LinkedIn Corporation. All rights reserved. * Licensed under the BSD-2 Clause license. * See LICENSE in the project root for license information. */ @@ -59,6 +59,11 @@ public RelDataType getRelDataType(SqlNode sqlNode) { throw new RuntimeException("SqlValidator does not exist to derive the RelDataType for SqlNode: " + sqlNode); } + RelDataType cachedDataType = sqlValidator.getValidatedNodeTypeIfKnown(sqlNode); + if (cachedDataType != null) { + return cachedDataType; + } + for (SqlSelect topSqlSelectNode : topSelectNodes) { final SqlSelect dummySqlSelect = new SqlSelect(topSqlSelectNode.getParserPosition(), null, SqlNodeList.of(sqlNode), topSqlSelectNode.getFrom(), topSqlSelectNode.getWhere(), topSqlSelectNode.getGroup(), diff --git a/coral-trino/src/main/java/com/linkedin/coral/trino/rel2trino/Calcite2TrinoUDFConverter.java b/coral-trino/src/main/java/com/linkedin/coral/trino/rel2trino/Calcite2TrinoUDFConverter.java deleted file mode 100644 index b8fb269d5..000000000 --- a/coral-trino/src/main/java/com/linkedin/coral/trino/rel2trino/Calcite2TrinoUDFConverter.java +++ /dev/null @@ -1,210 +0,0 @@ -/** - * Copyright 2017-2023 LinkedIn Corporation. All rights reserved. - * Licensed under the BSD-2 Clause license. - * See LICENSE in the project root for license information. - */ -package com.linkedin.coral.trino.rel2trino; - -import java.util.List; -import java.util.Map; -import java.util.Optional; - -import org.apache.calcite.rel.RelNode; -import org.apache.calcite.rel.RelShuttle; -import org.apache.calcite.rel.RelShuttleImpl; -import org.apache.calcite.rel.core.TableFunctionScan; -import org.apache.calcite.rel.core.TableScan; -import org.apache.calcite.rel.logical.LogicalAggregate; -import org.apache.calcite.rel.logical.LogicalCorrelate; -import org.apache.calcite.rel.logical.LogicalExchange; -import org.apache.calcite.rel.logical.LogicalFilter; -import org.apache.calcite.rel.logical.LogicalIntersect; -import org.apache.calcite.rel.logical.LogicalJoin; -import org.apache.calcite.rel.logical.LogicalMatch; -import org.apache.calcite.rel.logical.LogicalMinus; -import org.apache.calcite.rel.logical.LogicalProject; -import org.apache.calcite.rel.logical.LogicalSort; -import org.apache.calcite.rel.logical.LogicalUnion; -import org.apache.calcite.rel.logical.LogicalValues; -import org.apache.calcite.rel.type.RelDataTypeFactory; -import org.apache.calcite.rex.RexBuilder; -import org.apache.calcite.rex.RexCall; -import org.apache.calcite.rex.RexNode; -import org.apache.calcite.rex.RexShuttle; -import org.apache.calcite.sql.SqlIdentifier; -import org.apache.calcite.sql.SqlKind; -import org.apache.calcite.sql.SqlOperator; -import org.apache.calcite.sql.parser.SqlParserPos; -import org.apache.calcite.sql.type.SqlReturnTypeInference; -import org.apache.calcite.sql.validate.SqlUserDefinedFunction; - -import static com.linkedin.coral.trino.rel2trino.CoralTrinoConfigKeys.*; -import static org.apache.calcite.sql.type.ReturnTypes.explicit; -import static org.apache.calcite.sql.type.SqlTypeName.*; - - -public class Calcite2TrinoUDFConverter { - private Calcite2TrinoUDFConverter() { - } - - /** - * Replaces Calcite SQL operators with Trino UDF to obtain the Trino-compatible Calcite plan. - * - * @param calciteNode Original Calcite plan - * @return Trino-compatible Calcite plan - */ - public static RelNode convertRel(RelNode calciteNode, Map configs) { - RelShuttle converter = new RelShuttleImpl() { - @Override - public RelNode visit(LogicalProject project) { - return super.visit(project).accept(getTrinoRexConverter(project)); - } - - @Override - public RelNode visit(LogicalFilter inputFilter) { - return super.visit(inputFilter).accept(getTrinoRexConverter(inputFilter)); - } - - @Override - public RelNode visit(LogicalAggregate aggregate) { - return super.visit(aggregate).accept(getTrinoRexConverter(aggregate)); - } - - @Override - public RelNode visit(LogicalMatch match) { - return super.visit(match).accept(getTrinoRexConverter(match)); - } - - @Override - public RelNode visit(TableScan scan) { - return super.visit(scan).accept(getTrinoRexConverter(scan)); - } - - @Override - public RelNode visit(TableFunctionScan scan) { - return super.visit(scan).accept(getTrinoRexConverter(scan)); - } - - @Override - public RelNode visit(LogicalValues values) { - return super.visit(values).accept(getTrinoRexConverter(values)); - } - - @Override - public RelNode visit(LogicalJoin join) { - return super.visit(join).accept(getTrinoRexConverter(join)); - } - - @Override - public RelNode visit(LogicalCorrelate correlate) { - return super.visit(correlate).accept(getTrinoRexConverter(correlate)); - } - - @Override - public RelNode visit(LogicalUnion union) { - return super.visit(union).accept(getTrinoRexConverter(union)); - } - - @Override - public RelNode visit(LogicalIntersect intersect) { - return super.visit(intersect).accept(getTrinoRexConverter(intersect)); - } - - @Override - public RelNode visit(LogicalMinus minus) { - return super.visit(minus).accept(getTrinoRexConverter(minus)); - } - - @Override - public RelNode visit(LogicalSort sort) { - return super.visit(sort).accept(getTrinoRexConverter(sort)); - } - - @Override - public RelNode visit(LogicalExchange exchange) { - return super.visit(exchange).accept(getTrinoRexConverter(exchange)); - } - - @Override - public RelNode visit(RelNode other) { - return super.visit(other).accept(getTrinoRexConverter(other)); - } - - private TrinoRexConverter getTrinoRexConverter(RelNode node) { - return new TrinoRexConverter(node, configs); - } - }; - return calciteNode.accept(converter); - } - - /** - * For replacing a certain Calcite SQL operator with Trino UDFs in a relational expression - */ - public static class TrinoRexConverter extends RexShuttle { - private final RexBuilder rexBuilder; - private final RelDataTypeFactory typeFactory; - private final RelNode node; - private final Map configs; - - public TrinoRexConverter(RelNode node, Map configs) { - this.rexBuilder = node.getCluster().getRexBuilder(); - this.typeFactory = node.getCluster().getTypeFactory(); - this.configs = configs; - this.node = node; - } - - @Override - public RexNode visitCall(RexCall call) { - - final String operatorName = call.getOperator().getName(); - - if (operatorName.equalsIgnoreCase("cast")) { - Optional modifiedCall = visitCast(call); - if (modifiedCall.isPresent()) { - return modifiedCall.get(); - } - } - - return super.visitCall(call); - } - - private Optional visitCast(RexCall call) { - final SqlOperator op = call.getOperator(); - if (op.getKind() != SqlKind.CAST) { - return Optional.empty(); - } - - List convertedOperands = visitList(call.getOperands(), (boolean[]) null); - RexNode leftOperand = convertedOperands.get(0); - - // Hive allows for casting of TIMESTAMP to DECIMAL, which converts it to unix time if the decimal format is valid - // Example: "SELECT cast(current_timestamp() AS decimal(10,0));" -> 1633112585 - // Trino does not allow for such conversion, but we can achieve the same behavior by first calling "to_unixtime" - // on the TIMESTAMP and then casting it to DECIMAL after. - if (call.getType().getSqlTypeName() == DECIMAL && leftOperand.getType().getSqlTypeName() == TIMESTAMP) { - SqlOperator trinoToUnixTime = createSqlOperatorOfFunction("to_unixtime", explicit(DOUBLE)); - SqlOperator trinoWithTimeZone = - createSqlOperatorOfFunction("with_timezone", explicit(TIMESTAMP /* should be WITH TIME ZONE */)); - return Optional.of(rexBuilder.makeCast(call.getType(), rexBuilder.makeCall(trinoToUnixTime, - rexBuilder.makeCall(trinoWithTimeZone, leftOperand, rexBuilder.makeLiteral("UTC"))))); - } - - // Trino doesn't allow casting varbinary/binary to varchar/char, we need to use the built-in function `from_utf8` - // to replace the cast, i.e. CAST(binary AS VARCHAR) -> from_utf8(binary) - if ((call.getType().getSqlTypeName() == VARCHAR || call.getType().getSqlTypeName() == CHAR) - && (leftOperand.getType().getSqlTypeName() == VARBINARY - || leftOperand.getType().getSqlTypeName() == BINARY)) { - SqlOperator fromUTF8 = createSqlOperatorOfFunction("from_utf8", explicit(VARCHAR)); - return Optional.of(rexBuilder.makeCall(fromUTF8, leftOperand)); - } - - return Optional.empty(); - } - } - - private static SqlOperator createSqlOperatorOfFunction(String functionName, SqlReturnTypeInference typeInference) { - SqlIdentifier sqlIdentifier = - new SqlIdentifier(com.google.common.collect.ImmutableList.of(functionName), SqlParserPos.ZERO); - return new SqlUserDefinedFunction(sqlIdentifier, typeInference, null, null, null, null); - } -} diff --git a/coral-trino/src/main/java/com/linkedin/coral/trino/rel2trino/DataTypeDerivedSqlCallConverter.java b/coral-trino/src/main/java/com/linkedin/coral/trino/rel2trino/DataTypeDerivedSqlCallConverter.java index 056120e91..274a163ad 100644 --- a/coral-trino/src/main/java/com/linkedin/coral/trino/rel2trino/DataTypeDerivedSqlCallConverter.java +++ b/coral-trino/src/main/java/com/linkedin/coral/trino/rel2trino/DataTypeDerivedSqlCallConverter.java @@ -1,5 +1,5 @@ /** - * Copyright 2022-2023 LinkedIn Corporation. All rights reserved. + * Copyright 2022-2024 LinkedIn Corporation. All rights reserved. * Licensed under the BSD-2 Clause license. * See LICENSE in the project root for license information. */ @@ -16,6 +16,7 @@ import com.linkedin.coral.common.utils.TypeDerivationUtil; import com.linkedin.coral.hive.hive2rel.HiveToRelConverter; import com.linkedin.coral.hive.hive2rel.functions.VersionedSqlUserDefinedFunction; +import com.linkedin.coral.trino.rel2trino.transformers.CastOperatorTransformer; import com.linkedin.coral.trino.rel2trino.transformers.ConcatOperatorTransformer; import com.linkedin.coral.trino.rel2trino.transformers.FromUtcTimestampOperatorTransformer; import com.linkedin.coral.trino.rel2trino.transformers.GenericProjectTransformer; @@ -44,7 +45,7 @@ public DataTypeDerivedSqlCallConverter(HiveMetastoreClient mscClient, SqlNode to operatorTransformerList = SqlCallTransformers.of(new FromUtcTimestampOperatorTransformer(typeDerivationUtil), new GenericProjectTransformer(typeDerivationUtil), new NamedStructToCastTransformer(typeDerivationUtil), new ConcatOperatorTransformer(typeDerivationUtil), new SubstrOperatorTransformer(typeDerivationUtil), - new UnionSqlCallTransformer(typeDerivationUtil)); + new CastOperatorTransformer(typeDerivationUtil), new UnionSqlCallTransformer(typeDerivationUtil)); } @Override diff --git a/coral-trino/src/main/java/com/linkedin/coral/trino/rel2trino/RelToTrinoConverter.java b/coral-trino/src/main/java/com/linkedin/coral/trino/rel2trino/RelToTrinoConverter.java index 416c0299f..9ebc2cf93 100644 --- a/coral-trino/src/main/java/com/linkedin/coral/trino/rel2trino/RelToTrinoConverter.java +++ b/coral-trino/src/main/java/com/linkedin/coral/trino/rel2trino/RelToTrinoConverter.java @@ -1,5 +1,5 @@ /** - * Copyright 2017-2023 LinkedIn Corporation. All rights reserved. + * Copyright 2017-2024 LinkedIn Corporation. All rights reserved. * Licensed under the BSD-2 Clause license. * See LICENSE in the project root for license information. */ @@ -19,7 +19,6 @@ import org.apache.calcite.rel.rel2sql.RelToSqlConverter; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeField; -import org.apache.calcite.rex.RexCall; import org.apache.calcite.rex.RexFieldAccess; import org.apache.calcite.rex.RexLiteral; import org.apache.calcite.rex.RexNode; @@ -35,7 +34,6 @@ import com.linkedin.coral.common.functions.FunctionFieldReferenceOperator; import static com.google.common.base.Preconditions.*; -import static com.linkedin.coral.trino.rel2trino.Calcite2TrinoUDFConverter.convertRel; import static com.linkedin.coral.trino.rel2trino.CoralTrinoConfigKeys.*; @@ -48,7 +46,7 @@ public class RelToTrinoConverter extends RelToSqlConverter { * wrapping in {@link RelToTrinoConverter#visit(Uncollect)} * (2) Some internally registered UDFs which should not be converted, like `to_date`. * If the value of key {@link CoralTrinoConfigKeys#AVOID_TRANSFORM_TO_DATE_UDF} is set to true, we don't transform `to_date` UDF - * in {@link com.linkedin.coral.trino.rel2trino.Calcite2TrinoUDFConverter.TrinoRexConverter#visitCall(RexCall)} + * in {@link com.linkedin.coral.trino.rel2trino.transformers.ToDateOperatorTransformer} * (3) We need to adjust the return type for some functions using cast, since the converted Trino function's return type is not * aligned with the Hive function's return type. For example, if the value of key {@link CoralTrinoConfigKeys#CAST_DATEADD_TO_STRING} * is set to true, we would cast the converted RexCall to `varchar` type (date_add(xxx) -> cast(date_add(xxx) as varchar)) @@ -84,8 +82,7 @@ public RelToTrinoConverter(HiveMetastoreClient mscClient, Map c * @return SQL string */ public String convert(RelNode relNode) { - RelNode rel = convertRel(relNode, configs); - SqlNode sqlNode = convertToSqlNode(rel); + SqlNode sqlNode = convertToSqlNode(relNode); SqlNode sqlNodeWithRelDataTypeDerivedConversions = sqlNode.accept(new DataTypeDerivedSqlCallConverter(_hiveMetastoreClient, sqlNode)); diff --git a/coral-trino/src/main/java/com/linkedin/coral/trino/rel2trino/transformers/AsOperatorTransformer.java b/coral-trino/src/main/java/com/linkedin/coral/trino/rel2trino/transformers/AsOperatorTransformer.java index ef2bda3e5..a9e17ae4d 100644 --- a/coral-trino/src/main/java/com/linkedin/coral/trino/rel2trino/transformers/AsOperatorTransformer.java +++ b/coral-trino/src/main/java/com/linkedin/coral/trino/rel2trino/transformers/AsOperatorTransformer.java @@ -1,5 +1,5 @@ /** - * Copyright 2023 LinkedIn Corporation. All rights reserved. + * Copyright 2023-2024 LinkedIn Corporation. All rights reserved. * Licensed under the BSD-2 Clause license. * See LICENSE in the project root for license information. */ diff --git a/coral-trino/src/main/java/com/linkedin/coral/trino/rel2trino/transformers/CastOperatorTransformer.java b/coral-trino/src/main/java/com/linkedin/coral/trino/rel2trino/transformers/CastOperatorTransformer.java new file mode 100644 index 000000000..2edcfad64 --- /dev/null +++ b/coral-trino/src/main/java/com/linkedin/coral/trino/rel2trino/transformers/CastOperatorTransformer.java @@ -0,0 +1,104 @@ +/** + * Copyright 2023-2024 LinkedIn Corporation. All rights reserved. + * Licensed under the BSD-2 Clause license. + * See LICENSE in the project root for license information. + */ +package com.linkedin.coral.trino.rel2trino.transformers; + +import java.util.List; +import java.util.Set; + +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.sql.SqlCall; +import org.apache.calcite.sql.SqlDataTypeSpec; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.sql.SqlOperator; +import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.calcite.sql.type.SqlTypeName; + +import com.linkedin.coral.com.google.common.collect.ImmutableSet; +import com.linkedin.coral.common.calcite.CalciteUtil; +import com.linkedin.coral.common.transformers.SqlCallTransformer; +import com.linkedin.coral.common.utils.TypeDerivationUtil; + +import static org.apache.calcite.sql.parser.SqlParserPos.*; +import static org.apache.calcite.sql.type.ReturnTypes.*; +import static org.apache.calcite.sql.type.SqlTypeName.*; +import static org.apache.calcite.sql.type.SqlTypeName.DOUBLE; + + +/** + * The CAST operator in Trino has some differences compared to Hive, which are handled by this transformer: + * + * 1) Hive allows for casting of TIMESTAMP to DECIMAL, by converting it to unix time if the specified decimal + * type has enough precision/scale to hold the Unix timestamp value. For example, casting to DECIMAL(10,0) + * for the timestamp 1633112585 + * While Trino does not allow for such conversion, but we can achieve the same behavior by first calling + * "to_unixtime" (with timezone specified) on the TIMESTAMP and then casting it to DECIMAL after. + * Hive: SELECT CAST(timestamp AS DECIMAL(10,0)) + * Trino: CAST(to_unixtime(with_timezone(timestamp, 'UTC')) AS DECIMAL(10, 0)) + * + * 2) Hive allows for casting varbinary/binary to varchar/char, while in Trino we need to use the + * built-in function `from_utf8` instead to replace the cast. + * Hive: CAST(binary AS VARCHAR) + * Trino: from_utf8(binary) + * + * Since this transformer introduces an extra iteration of Calcite validation during RelNode to SqlNode transformation (RHS) + * for queries with CAST operators, there is an added (but expected) side effect of implicit casting by Calcite's type coercion rules. + * Consider the following Hive input query: + * "SELECT CAST(from_utc_timestamp(a_date, 'America/Los_Angeles') AS DECIMAL(10, 0)) AS d FROM test.table_from_utc_timestamp" + * + * We add a `with_timezone` operator in this transformation, and with the added layer of Calcite validation, instead + * of just getting: + * "(with_timezone(table_from_utc_timestamp0.a_date), 'UTC')" + * + * We get an extra implicit cast from Calcite on the input to `with_timezone` which is a date type to the expected timestamp type: + * "(with_timezone(CAST(table_from_utc_timestamp0.a_date AS TIMESTAMP), 'UTC'))" + */ +public class CastOperatorTransformer extends SqlCallTransformer { + private static final String WITH_TIMEZONE = "with_timezone"; + private static final String TO_UNIXTIME = "to_unixtime"; + private static final Set BINARY_SQL_TYPE_NAMES = + ImmutableSet.of(SqlTypeName.VARBINARY, SqlTypeName.BINARY); + private static final Set CHAR_SQL_TYPE_NAMES = ImmutableSet.of(SqlTypeName.VARCHAR, SqlTypeName.CHAR); + + public CastOperatorTransformer(TypeDerivationUtil typeDerivationUtil) { + super(typeDerivationUtil); + } + + @Override + protected boolean condition(SqlCall sqlCall) { + return sqlCall.getOperator().getKind() == SqlKind.CAST; + } + + @Override + protected SqlCall transform(SqlCall sqlCall) { + List operands = sqlCall.getOperandList(); + + final SqlNode leftOperand = operands.get(0); + final RelDataType sourceType = deriveRelDatatype(leftOperand); + final SqlDataTypeSpec targetSqlDataTypeSpec = (SqlDataTypeSpec) operands.get(1); + final SqlTypeName targetType = SqlTypeName.get(targetSqlDataTypeSpec.getTypeNameSpec().getTypeName().toString()); + + if (sourceType.getSqlTypeName() == TIMESTAMP && targetType == DECIMAL) { + SqlOperator trinoWithTimeZone = + createSqlOperator(WITH_TIMEZONE, explicit(TIMESTAMP /* should be WITH TIME ZONE */)); + SqlOperator trinoToUnixTime = createSqlOperator(TO_UNIXTIME, explicit(DOUBLE)); + + SqlCall withTimeZoneCall = + trinoWithTimeZone.createCall(ZERO, leftOperand, CalciteUtil.createStringLiteral("UTC", ZERO)); + SqlCall toUnixTimeCall = trinoToUnixTime.createCall(ZERO, withTimeZoneCall); + + return SqlStdOperatorTable.CAST.createCall(ZERO, toUnixTimeCall, targetSqlDataTypeSpec); + } + + if (BINARY_SQL_TYPE_NAMES.contains(sourceType.getSqlTypeName()) && CHAR_SQL_TYPE_NAMES.contains(targetType)) { + SqlOperator fromUTF8 = createSqlOperator("from_utf8", explicit(VARCHAR)); + + return fromUTF8.createCall(ZERO, leftOperand); + } + + return sqlCall; + } +} diff --git a/coral-trino/src/main/java/com/linkedin/coral/trino/rel2trino/transformers/FromUtcTimestampOperatorTransformer.java b/coral-trino/src/main/java/com/linkedin/coral/trino/rel2trino/transformers/FromUtcTimestampOperatorTransformer.java index bb8230da8..81e30535f 100644 --- a/coral-trino/src/main/java/com/linkedin/coral/trino/rel2trino/transformers/FromUtcTimestampOperatorTransformer.java +++ b/coral-trino/src/main/java/com/linkedin/coral/trino/rel2trino/transformers/FromUtcTimestampOperatorTransformer.java @@ -1,5 +1,5 @@ /** - * Copyright 2023 LinkedIn Corporation. All rights reserved. + * Copyright 2023-2024 LinkedIn Corporation. All rights reserved. * Licensed under the BSD-2 Clause license. * See LICENSE in the project root for license information. */ @@ -76,7 +76,7 @@ protected SqlCall transform(SqlCall sqlCall) { RelDataType inputType = deriveRelDatatype(sqlCall.getOperandList().get(0)); SqlTypeName inputSqlTypeName = inputType.getSqlTypeName(); - // In below definitions we should use `TIMESTATMP WITH TIME ZONE`. As calcite is lacking + // In below definitions we should use `TIMESTAMP WITH TIME ZONE`. As calcite is lacking // this type we use `TIMESTAMP` instead. It does not have any practical implications as result syntax tree // is not type-checked, and only used for generating output SQL for a view query. SqlOperator trinoAtTimeZone = createSqlOperator(AT_TIMEZONE, explicit(TIMESTAMP /* should be WITH TIME ZONE */)); diff --git a/coral-trino/src/test/java/com/linkedin/coral/trino/rel2trino/HiveToTrinoConverterTest.java b/coral-trino/src/test/java/com/linkedin/coral/trino/rel2trino/HiveToTrinoConverterTest.java index 8d309802b..8f0cd2042 100644 --- a/coral-trino/src/test/java/com/linkedin/coral/trino/rel2trino/HiveToTrinoConverterTest.java +++ b/coral-trino/src/test/java/com/linkedin/coral/trino/rel2trino/HiveToTrinoConverterTest.java @@ -159,7 +159,7 @@ public Object[][] viewTestCasesProvider() { { "test", "get_json_object_view", "SELECT \"json_extract\"(\"tablea\".\"b\".\"b1\", '$.name')\n" + "FROM \"test\".\"tablea\" AS \"tablea\"" }, - { "test", "view_from_utc_timestamp", "SELECT CAST(\"at_timezone\"(\"from_unixtime_nanos\"(CAST(\"table_from_utc_timestamp\".\"a_tinyint\" AS BIGINT) * 1000000), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"from_unixtime_nanos\"(CAST(\"table_from_utc_timestamp\".\"a_smallint\" AS BIGINT) * 1000000), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"from_unixtime_nanos\"(CAST(\"table_from_utc_timestamp\".\"a_integer\" AS BIGINT) * 1000000), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"from_unixtime_nanos\"(CAST(\"table_from_utc_timestamp\".\"a_bigint\" AS BIGINT) * 1000000), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"from_unixtime\"(CAST(\"table_from_utc_timestamp\".\"a_float\" AS DOUBLE)), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"from_unixtime\"(CAST(\"table_from_utc_timestamp\".\"a_double\" AS DOUBLE)), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"from_unixtime\"(CAST(\"table_from_utc_timestamp\".\"a_decimal_three\" AS DOUBLE)), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"from_unixtime\"(CAST(\"table_from_utc_timestamp\".\"a_decimal_zero\" AS DOUBLE)), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"from_unixtime\"(\"to_unixtime\"(\"with_timezone\"(\"table_from_utc_timestamp\".\"a_timestamp\", 'UTC'))), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"from_unixtime\"(\"to_unixtime\"(\"with_timezone\"(\"table_from_utc_timestamp\".\"a_date\", 'UTC'))), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3))\n" + { "test", "view_from_utc_timestamp", "SELECT CAST(\"at_timezone\"(\"from_unixtime_nanos\"(CAST(\"table_from_utc_timestamp\".\"a_tinyint\" AS BIGINT) * 1000000), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"from_unixtime_nanos\"(CAST(\"table_from_utc_timestamp\".\"a_smallint\" AS BIGINT) * 1000000), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"from_unixtime_nanos\"(CAST(\"table_from_utc_timestamp\".\"a_integer\" AS BIGINT) * 1000000), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"from_unixtime_nanos\"(CAST(\"table_from_utc_timestamp\".\"a_bigint\" AS BIGINT) * 1000000), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"from_unixtime\"(CAST(\"table_from_utc_timestamp\".\"a_float\" AS DOUBLE)), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"from_unixtime\"(CAST(\"table_from_utc_timestamp\".\"a_double\" AS DOUBLE)), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"from_unixtime\"(CAST(\"table_from_utc_timestamp\".\"a_decimal_three\" AS DOUBLE)), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"from_unixtime\"(CAST(\"table_from_utc_timestamp\".\"a_decimal_zero\" AS DOUBLE)), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"from_unixtime\"(\"to_unixtime\"(\"with_timezone\"(\"table_from_utc_timestamp\".\"a_timestamp\", 'UTC'))), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"from_unixtime\"(\"to_unixtime\"(\"with_timezone\"(CAST(\"table_from_utc_timestamp\".\"a_date\" AS TIMESTAMP), 'UTC'))), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3))\n" + "FROM \"test\".\"table_from_utc_timestamp\" AS \"table_from_utc_timestamp\"" }, { "test", "date_calculation_view", "SELECT \"date\"(CAST(\"substr\"('2021-08-20', 1 + 1, 10) AS TIMESTAMP)), \"date\"(CAST('2021-08-20' AS TIMESTAMP)), \"date\"(CAST('2021-08-20 00:00:00' AS TIMESTAMP)), \"date_add\"('day', 1, \"date\"(CAST('2021-08-20' AS TIMESTAMP))), \"date_add\"('day', 1, \"date\"(CAST('2021-08-20 00:00:00' AS TIMESTAMP))), \"date_add\"('day', 1 * -1, \"date\"(CAST('2021-08-20' AS TIMESTAMP))), \"date_add\"('day', 1 * -1, \"date\"(CAST('2021-08-20 00:00:00' AS TIMESTAMP))), CAST(\"date_diff\"('day', \"date\"(CAST('2021-08-21' AS TIMESTAMP)), \"date\"(CAST('2021-08-20' AS TIMESTAMP))) AS INTEGER), CAST(\"date_diff\"('day', \"date\"(CAST('2021-08-19' AS TIMESTAMP)), \"date\"(CAST('2021-08-20' AS TIMESTAMP))) AS INTEGER), CAST(\"date_diff\"('day', \"date\"(CAST('2021-08-19 23:59:59' AS TIMESTAMP)), \"date\"(CAST('2021-08-20 00:00:00' AS TIMESTAMP))) AS INTEGER)\n" @@ -572,12 +572,25 @@ public void testCastNestedTimestampToDecimal() { relNode = TestUtils.getHiveToRelConverter().convertSql( "SELECT CAST(from_utc_timestamp(a_date, 'America/Los_Angeles') AS DECIMAL(10, 0)) AS d\nFROM test.table_from_utc_timestamp"); targetSql = - "SELECT CAST(\"to_unixtime\"(\"with_timezone\"(CAST(\"at_timezone\"(\"from_unixtime\"(\"to_unixtime\"(\"with_timezone\"(\"table_from_utc_timestamp0\".\"a_date\", 'UTC'))), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), 'UTC')) AS DECIMAL(10, 0)) AS \"d\"\n" + "SELECT CAST(\"to_unixtime\"(\"with_timezone\"(CAST(\"at_timezone\"(\"from_unixtime\"(\"to_unixtime\"(\"with_timezone\"(CAST(\"table_from_utc_timestamp0\".\"a_date\" AS TIMESTAMP), 'UTC'))), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), 'UTC')) AS DECIMAL(10, 0)) AS \"d\"\n" + "FROM \"test\".\"table_from_utc_timestamp\" AS \"table_from_utc_timestamp0\""; expandedSql = relToTrinoConverter.convert(relNode); assertEquals(expandedSql, targetSql); } + @Test + public void testCastWithJoinOnTableAlias() { + RelNode relNode = TestUtils.getHiveToRelConverter().convertSql( + "SELECT t1.*, CAST('123' AS INT) FROM (SELECT * FROM test.tableA) t1 LEFT OUTER JOIN test.tableB t2 ON t1.a = t2.a"); + String targetSql = "SELECT \"tablea\".\"a\" AS \"a\", \"tablea\".\"b\" AS \"b\", CAST('123' AS INTEGER)\n" + + "FROM \"test\".\"tablea\" AS \"tablea\"\n" + + "LEFT JOIN \"test\".\"tableb\" AS \"tableb\" ON \"tablea\".\"a\" = \"tableb\".\"a\""; + + RelToTrinoConverter relToTrinoConverter = TestUtils.getRelToTrinoConverter(); + String expandedSql = relToTrinoConverter.convert(relNode); + assertEquals(expandedSql, targetSql); + } + @Test public void testSubstrWithTimestampOperator() { RelToTrinoConverter relToTrinoConverter = TestUtils.getRelToTrinoConverter(); diff --git a/coral-trino/src/test/java/com/linkedin/coral/trino/rel2trino/TestUtils.java b/coral-trino/src/test/java/com/linkedin/coral/trino/rel2trino/TestUtils.java index 0409333ff..148085261 100644 --- a/coral-trino/src/test/java/com/linkedin/coral/trino/rel2trino/TestUtils.java +++ b/coral-trino/src/test/java/com/linkedin/coral/trino/rel2trino/TestUtils.java @@ -1,5 +1,5 @@ /** - * Copyright 2017-2023 LinkedIn Corporation. All rights reserved. + * Copyright 2017-2024 LinkedIn Corporation. All rights reserved. * Licensed under the BSD-2 Clause license. * See LICENSE in the project root for license information. */ @@ -70,7 +70,6 @@ static RelNode toRel(String sql, FrameworkConfig config) { RelRoot rel = planner.rel(validate); //RelNode relNode = rel.project(); return rel.project(); - //return Calcite2TrinoUDFConverter.convertRel(relNode); } catch (Exception e) { throw new RuntimeException(e); } diff --git a/gradle/dependencies.gradle b/gradle/dependencies.gradle index 6d585596c..15b54b73f 100644 --- a/gradle/dependencies.gradle +++ b/gradle/dependencies.gradle @@ -9,7 +9,7 @@ def versions = [ 'jetbrains': '16.0.2', 'jline': '0.9.94', 'kryo': '2.22', - 'linkedin-calcite-core': '1.21.0.260', + 'linkedin-calcite-core': '1.21.0.262', 'pig': '0.15.0', 'spark': '2.4.0', 'spark3': '3.1.1',