From d1d5b1ea0925b02c0506e794b061e30bb8976d5f Mon Sep 17 00:00:00 2001 From: Kevin Ge Date: Thu, 25 Jul 2024 12:05:10 -0400 Subject: [PATCH] Add missing numeric precisions in Hive type system (#501) * add precision for int and bigint for hive type system * use static final variables for precisions * spotless check * fix converting type to typespec bug * add UT * add tinyint and smallint precisions * spotless * spotless * spotless * new UTs * add tableInt to rel to trino tests * spotless * remove redundant test --- .../linkedin/coral/common/HiveTypeSystem.java | 16 +++++++- .../hive/hive2rel/HiveToRelConverterTest.java | 40 ++++++++++++++++++- .../coral/hive/hive2rel/TestUtils.java | 2 + .../FromUtcTimestampOperatorTransformer.java | 12 +----- .../coral/trino/rel2trino/TestUtils.java | 3 ++ 5 files changed, 60 insertions(+), 13 deletions(-) diff --git a/coral-common/src/main/java/com/linkedin/coral/common/HiveTypeSystem.java b/coral-common/src/main/java/com/linkedin/coral/common/HiveTypeSystem.java index 32ecd526d..6372a220b 100644 --- a/coral-common/src/main/java/com/linkedin/coral/common/HiveTypeSystem.java +++ b/coral-common/src/main/java/com/linkedin/coral/common/HiveTypeSystem.java @@ -1,5 +1,5 @@ /** - * Copyright 2017-2023 LinkedIn Corporation. All rights reserved. + * Copyright 2017-2024 LinkedIn Corporation. All rights reserved. * Licensed under the BSD-2 Clause license. * See LICENSE in the project root for license information. */ @@ -12,7 +12,7 @@ import org.apache.calcite.sql.type.SqlTypeUtil; -// Copied from Hive source code +// Precision and scale values copied from Hive source code public class HiveTypeSystem extends RelDataTypeSystemImpl { // TODO: This should come from type system; Currently there is no definition // in type system for this. @@ -29,6 +29,10 @@ public class HiveTypeSystem extends RelDataTypeSystemImpl { private static final int DEFAULT_CHAR_PRECISION = 255; private static final int MAX_BINARY_PRECISION = Integer.MAX_VALUE; private static final int MAX_TIMESTAMP_PRECISION = 9; + private static final int DEFAULT_TINYINT_PRECISION = 3; + private static final int DEFAULT_SMALLINT_PRECISION = 5; + private static final int DEFAULT_INTEGER_PRECISION = 10; + private static final int DEFAULT_BIGINT_PRECISION = 19; @Override public int getMaxScale(SqlTypeName typeName) { @@ -84,6 +88,14 @@ public int getDefaultPrecision(SqlTypeName typeName) { case INTERVAL_MINUTE_SECOND: case INTERVAL_SECOND: return SqlTypeName.DEFAULT_INTERVAL_START_PRECISION; + case TINYINT: + return DEFAULT_TINYINT_PRECISION; + case SMALLINT: + return DEFAULT_SMALLINT_PRECISION; + case INTEGER: + return DEFAULT_INTEGER_PRECISION; + case BIGINT: + return DEFAULT_BIGINT_PRECISION; default: return -1; } diff --git a/coral-hive/src/test/java/com/linkedin/coral/hive/hive2rel/HiveToRelConverterTest.java b/coral-hive/src/test/java/com/linkedin/coral/hive/hive2rel/HiveToRelConverterTest.java index d8f047777..e2eafb0e7 100644 --- a/coral-hive/src/test/java/com/linkedin/coral/hive/hive2rel/HiveToRelConverterTest.java +++ b/coral-hive/src/test/java/com/linkedin/coral/hive/hive2rel/HiveToRelConverterTest.java @@ -1,5 +1,5 @@ /** - * Copyright 2017-2023 LinkedIn Corporation. All rights reserved. + * Copyright 2017-2024 LinkedIn Corporation. All rights reserved. * Licensed under the BSD-2 Clause license. * See LICENSE in the project root for license information. */ @@ -622,6 +622,44 @@ public void testNameSakeColumnNamesShouldGetUniqueIdentifiers() { assertEquals(generated, expected); } + @Test + public void testUnionIntAndBigInt() { + final String sql = "SELECT a FROM test.tableOne UNION ALL SELECT bigint_col FROM test.tableInt"; + RelNode rel = converter.convertSql(sql); + assertEquals(rel.getRowType().getFieldCount(), 1); + // Should be BIGINT since it is a less restrictive type than INT + assertEquals(rel.getRowType().getFieldList().get(0).getType().getSqlTypeName(), SqlTypeName.BIGINT); + } + + @Test + public void testUnionIntAndSmallInt() { + final String sql = "SELECT smallint_col FROM test.tableInt UNION ALL SELECT a FROM test.tableOne"; + RelNode rel = converter.convertSql(sql); + assertEquals(rel.getRowType().getFieldCount(), 1); + // Should be INT since it is a less restrictive type than SMALLINT + assertEquals(rel.getRowType().getFieldList().get(0).getType().getSqlTypeName(), SqlTypeName.INTEGER); + } + + @Test + public void testUnionIntAndTinyInt() { + final String sql = "SELECT tinyint_col FROM test.tableInt UNION ALL SELECT a FROM test.tableOne"; + RelNode rel = converter.convertSql(sql); + assertEquals(rel.getRowType().getFieldCount(), 1); + // Should be INT since it is a less restrictive type than TINYINT + assertEquals(rel.getRowType().getFieldList().get(0).getType().getSqlTypeName(), SqlTypeName.INTEGER); + } + + @Test + public void testIntCastToBigIntDuringComparison() { + // We're testing that a comparison between INT and BIGINT sees a cast on the more restrictive type to the + // less restrictive type and not the other way around. In other words, the INT is cast to BIGINT. + final String sql = "SELECT CASE WHEN int_col = bigint_col THEN 'abc' ELSE 'def' END FROM test.tableInt"; + String expected = "LogicalProject(EXPR$0=[CASE(=(CAST($2):BIGINT, $3), 'abc', 'def')])\n" + + " LogicalTableScan(table=[[hive, test, tableint]])\n"; + + assertEquals(relToString(sql), expected); + } + private String relToString(String sql) { return RelOptUtil.toString(converter.convertSql(sql)); } diff --git a/coral-hive/src/test/java/com/linkedin/coral/hive/hive2rel/TestUtils.java b/coral-hive/src/test/java/com/linkedin/coral/hive/hive2rel/TestUtils.java index 8a1a437c4..5a6a9fe0a 100644 --- a/coral-hive/src/test/java/com/linkedin/coral/hive/hive2rel/TestUtils.java +++ b/coral-hive/src/test/java/com/linkedin/coral/hive/hive2rel/TestUtils.java @@ -82,6 +82,8 @@ public static TestHive setupDefaultHive(HiveConf conf) throws IOException { driver.run("CREATE DATABASE IF NOT EXISTS test"); driver.run("CREATE TABLE IF NOT EXISTS test.tableOne(a int, b varchar(30), c double, d timestamp)"); driver.run("CREATE TABLE IF NOT EXISTS test.tableTwo(x int, y double)"); + driver.run( + "CREATE TABLE IF NOT EXISTS test.tableInt(tinyint_col tinyint, smallint_col smallint, int_col int, bigint_col bigint)"); driver.run("CREATE DATABASE IF NOT EXISTS fuzzy_union"); diff --git a/coral-trino/src/main/java/com/linkedin/coral/trino/rel2trino/transformers/FromUtcTimestampOperatorTransformer.java b/coral-trino/src/main/java/com/linkedin/coral/trino/rel2trino/transformers/FromUtcTimestampOperatorTransformer.java index 81e30535f..eee27b50b 100644 --- a/coral-trino/src/main/java/com/linkedin/coral/trino/rel2trino/transformers/FromUtcTimestampOperatorTransformer.java +++ b/coral-trino/src/main/java/com/linkedin/coral/trino/rel2trino/transformers/FromUtcTimestampOperatorTransformer.java @@ -11,15 +11,13 @@ import java.util.Set; import org.apache.calcite.rel.type.RelDataType; -import org.apache.calcite.sql.SqlBasicTypeNameSpec; import org.apache.calcite.sql.SqlCall; -import org.apache.calcite.sql.SqlDataTypeSpec; import org.apache.calcite.sql.SqlNode; import org.apache.calcite.sql.SqlOperator; -import org.apache.calcite.sql.SqlTypeNameSpec; import org.apache.calcite.sql.fun.SqlStdOperatorTable; import org.apache.calcite.sql.type.BasicSqlType; import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.calcite.sql.type.SqlTypeUtil; import com.linkedin.coral.common.HiveTypeSystem; import com.linkedin.coral.common.calcite.CalciteUtil; @@ -122,12 +120,6 @@ protected SqlCall transform(SqlCall sqlCall) { } private SqlCall castOperand(SqlNode operand, RelDataType relDataType) { - return SqlStdOperatorTable.CAST.createCall(ZERO, operand, getSqlDataTypeSpecForCasting(relDataType)); - } - - private SqlDataTypeSpec getSqlDataTypeSpecForCasting(RelDataType relDataType) { - final SqlTypeNameSpec typeNameSpec = new SqlBasicTypeNameSpec(relDataType.getSqlTypeName(), - relDataType.getPrecision(), relDataType.getScale(), null, ZERO); - return new SqlDataTypeSpec(typeNameSpec, ZERO); + return SqlStdOperatorTable.CAST.createCall(ZERO, operand, SqlTypeUtil.convertTypeToSpec(relDataType)); } } diff --git a/coral-trino/src/test/java/com/linkedin/coral/trino/rel2trino/TestUtils.java b/coral-trino/src/test/java/com/linkedin/coral/trino/rel2trino/TestUtils.java index 148085261..3e6f088f8 100644 --- a/coral-trino/src/test/java/com/linkedin/coral/trino/rel2trino/TestUtils.java +++ b/coral-trino/src/test/java/com/linkedin/coral/trino/rel2trino/TestUtils.java @@ -425,6 +425,9 @@ public static void initializeTablesAndViews(HiveConf conf) throws HiveException, run(driver, "CREATE TABLE IF NOT EXISTS test.tableFour(icol int, scol string, acol array, mcol map)"); + run(driver, + "CREATE TABLE IF NOT EXISTS test.tableInt(tinyint_col tinyint, smallint_col smallint, int_col int, bigint_col bigint)"); + } public static HiveConf loadResourceHiveConf() {