Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace FunctionFieldReferenceOperator with Calcite DOT operator #1

Draft
wants to merge 12 commits into
base: single-unions
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ jobs:
run: ./gradlew githubRelease publishToSonatype closeAndReleaseStagingRepository --stacktrace
env:
GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
SONATYPE_USER: ${{secrets.SONATYPE_USER}}
SONATYPE_PWD: ${{secrets.SONATYPE_PWD}}
SONATYPE_TOKEN_USERNAME: ${{secrets.SONATYPE_TOKEN_USERNAME}}
SONATYPE_TOKEN_PASSWORD: ${{secrets.SONATYPE_TOKEN_PASSWORD}}
PGP_KEY: ${{secrets.PGP_KEY}}
PGP_PWD: ${{secrets.PGP_PWD}}
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright 2019-2023 LinkedIn Corporation. All rights reserved.
* Copyright 2019-2024 LinkedIn Corporation. All rights reserved.
* Licensed under the BSD-2 Clause license.
* See LICENSE in the project root for license information.
*/
Expand Down Expand Up @@ -237,7 +237,8 @@ private RelDataType getUnionDataType(final List<RelDataType> dataTypes) {
if (baseDataType.isStruct()) {
// Build the common UNION type using the first branch that appears in the query
final RelDataTypeFactory.Builder builder =
new RelDataTypeFactory.Builder(toRelConverter.getRelBuilder().getTypeFactory());
new RelDataTypeFactory.Builder(toRelConverter.getRelBuilder().getTypeFactory())
.kind(baseDataType.getStructKind());

// Build a set of common fields by name in the given dataTypes
Set<String> commonFieldNames =
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright 2017-2023 LinkedIn Corporation. All rights reserved.
* Copyright 2017-2024 LinkedIn Corporation. All rights reserved.
* Licensed under the BSD-2 Clause license.
* See LICENSE in the project root for license information.
*/
Expand All @@ -12,7 +12,7 @@
import org.apache.calcite.sql.type.SqlTypeUtil;


// Copied from Hive source code
// Precision and scale values copied from Hive source code
public class HiveTypeSystem extends RelDataTypeSystemImpl {
// TODO: This should come from type system; Currently there is no definition
// in type system for this.
Expand All @@ -29,6 +29,10 @@ public class HiveTypeSystem extends RelDataTypeSystemImpl {
private static final int DEFAULT_CHAR_PRECISION = 255;
private static final int MAX_BINARY_PRECISION = Integer.MAX_VALUE;
private static final int MAX_TIMESTAMP_PRECISION = 9;
private static final int DEFAULT_TINYINT_PRECISION = 3;
private static final int DEFAULT_SMALLINT_PRECISION = 5;
private static final int DEFAULT_INTEGER_PRECISION = 10;
private static final int DEFAULT_BIGINT_PRECISION = 19;

@Override
public int getMaxScale(SqlTypeName typeName) {
Expand Down Expand Up @@ -84,6 +88,14 @@ public int getDefaultPrecision(SqlTypeName typeName) {
case INTERVAL_MINUTE_SECOND:
case INTERVAL_SECOND:
return SqlTypeName.DEFAULT_INTERVAL_START_PRECISION;
case TINYINT:
return DEFAULT_TINYINT_PRECISION;
case SMALLINT:
return DEFAULT_SMALLINT_PRECISION;
case INTEGER:
return DEFAULT_INTEGER_PRECISION;
case BIGINT:
return DEFAULT_BIGINT_PRECISION;
default:
return -1;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright 2017-2023 LinkedIn Corporation. All rights reserved.
* Copyright 2017-2024 LinkedIn Corporation. All rights reserved.
* Licensed under the BSD-2 Clause license.
* See LICENSE in the project root for license information.
*/
Expand Down Expand Up @@ -140,16 +140,12 @@ public static RelDataType convert(StructTypeInfo structType, final RelDataTypeFa
// The schema of output Struct conforms to https://github.com/trinodb/trino/pull/3483
// except we adopted "integer" for the type of "tag" field instead of "tinyint" in the Trino patch
// for compatibility with other platforms that Iceberg currently doesn't support tinyint type.
// When the field count inside UnionTypeInfo is one, we surface the underlying RelDataType instead.

// Note: this is subject to change in the future pending on the discussion in
// https://mail-archives.apache.org/mod_mbox/iceberg-dev/202112.mbox/browser
public static RelDataType convert(UnionTypeInfo unionType, RelDataTypeFactory dtFactory) {
List<RelDataType> fTypes = unionType.getAllUnionObjectTypeInfos().stream()
.map(typeInfo -> convert(typeInfo, dtFactory)).collect(Collectors.toList());
if (fTypes.size() == 1) {
return dtFactory.createTypeWithNullability(fTypes.get(0), true);
}
List<String> fNames = IntStream.range(0, unionType.getAllUnionObjectTypeInfos().size()).mapToObj(i -> "field" + i)
.collect(Collectors.toList());
fTypes.add(0, dtFactory.createSqlType(SqlTypeName.INTEGER));
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright 2022-2023 LinkedIn Corporation. All rights reserved.
* Copyright 2022-2024 LinkedIn Corporation. All rights reserved.
* Licensed under the BSD-2 Clause license.
* See LICENSE in the project root for license information.
*/
Expand Down Expand Up @@ -41,6 +41,25 @@ public class RelDataTypeToHiveTypeStringConverter {
private RelDataTypeToHiveTypeStringConverter() {
}

public RelDataTypeToHiveTypeStringConverter(boolean convertUnionTypes) {
this.convertUnionTypes = convertUnionTypes;
}

/**
* If true, Coral will convert single uniontypes back to Hive's native uniontype representation. This is necessary
* because some engines have readers that unwrap Hive single uniontypes to just the underlying data type, causing
* the loss of information that the column was originally a uniontype in Hive. This can be problematic when calling
* the `coalesce_struct` UDF on such columns, as they are expected to be treated as uniontypes. Retaining the
* original uniontype record and passing it into `coalesce_struct` ensures correct handling.
*
* Example:
* RelDataType:
* struct(tag:integer,field0:varchar)
* Hive Type String:
* uniontype&lt;string&gt;
*/
private static boolean convertUnionTypes = false;

/**
* @param relDataType a given RelDataType
* @return a syntactically and semantically correct Hive type string for relDataType
Expand Down Expand Up @@ -110,6 +129,14 @@ public static String convertRelDataType(RelDataType relDataType) {
*/
private static String buildStructDataTypeString(RelRecordType relRecordType) {
List<String> structFieldStrings = new ArrayList<>();

// Convert single uniontypes as structs back to native Hive representation
if (convertUnionTypes && relRecordType.getFieldList().size() == 2
&& relRecordType.getFieldList().get(0).getName().equals("tag")
&& relRecordType.getFieldList().get(1).getName().equals("field0")) {
return String.format("uniontype<%s>", convertRelDataType(relRecordType.getFieldList().get(1).getType()));
}

for (RelDataTypeField fieldRelDataType : relRecordType.getFieldList()) {
structFieldStrings
.add(String.format("%s:%s", fieldRelDataType.getName(), convertRelDataType(fieldRelDataType.getType())));
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright 2019-2023 LinkedIn Corporation. All rights reserved.
* Copyright 2019-2024 LinkedIn Corporation. All rights reserved.
* Licensed under the BSD-2 Clause license.
* See LICENSE in the project root for license information.
*/
Expand Down Expand Up @@ -178,4 +178,18 @@ public void testCharRelDataType() {

assertEquals(hiveDataTypeSchemaString, expectedHiveDataTypeSchemaString);
}

@Test
public void testSingleUniontypeStructRelDataType() {
String expectedHiveDataTypeSchemaString = "uniontype<string>";

List<RelDataTypeField> fields = new ArrayList<>();
fields.add(new RelDataTypeFieldImpl("tag", 0, new BasicSqlType(RelDataTypeSystem.DEFAULT, SqlTypeName.INTEGER)));
fields.add(new RelDataTypeFieldImpl("field0", 0, new BasicSqlType(RelDataTypeSystem.DEFAULT, SqlTypeName.VARCHAR)));

RelRecordType relRecordType = new RelRecordType(fields);
String hiveDataTypeSchemaString = new RelDataTypeToHiveTypeStringConverter(true).convertRelDataType(relRecordType);

assertEquals(hiveDataTypeSchemaString, expectedHiveDataTypeSchemaString);
}
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright 2018-2023 LinkedIn Corporation. All rights reserved.
* Copyright 2018-2024 LinkedIn Corporation. All rights reserved.
* Licensed under the BSD-2 Clause license.
* See LICENSE in the project root for license information.
*/
Expand All @@ -16,31 +16,31 @@
import org.apache.calcite.sql2rel.SqlRexConvertlet;
import org.apache.calcite.sql2rel.StandardConvertletTable;

import com.linkedin.coral.common.functions.FunctionFieldReferenceOperator;


/**
* ConvertletTable for Hive Operators
* ConvertletTable for transformations only relevant to Coral's Intermediate Representation, not specific
* any SQL dialect. These transformations keep data parity between the SqlNode and RelNode layer, keeping the IR intact.
* @see ReflectiveConvertletTable documentation for method naming and visibility rules
*/
public class HiveConvertletTable extends ReflectiveConvertletTable {
public class CoralConvertletTable extends ReflectiveConvertletTable {

@SuppressWarnings("unused")
public RexNode convertFunctionFieldReferenceOperator(SqlRexContext cx, FunctionFieldReferenceOperator op,
SqlCall call) {
RexNode funcExpr = cx.convertExpression(call.operand(0));
String fieldName = FunctionFieldReferenceOperator.fieldNameStripQuotes(call.operand(1));
return cx.getRexBuilder().makeFieldAccess(funcExpr, fieldName, false);
}
// @SuppressWarnings("unused")
// public RexNode convertFunctionFieldReferenceOperator(SqlRexContext cx, FunctionFieldReferenceOperator op,
// SqlCall call) {
// RexNode funcExpr = cx.convertExpression(call.operand(0));
// String fieldName = FunctionFieldReferenceOperator.fieldNameStripQuotes(call.operand(1));
// return cx.getRexBuilder().makeFieldAccess(funcExpr, fieldName, false);
// }

/**
* Override {@link StandardConvertletTable#convertCast} to avoid cast optimizations that remove the cast.
*/
@SuppressWarnings("unused")
public RexNode convertCast(SqlRexContext cx, SqlCastFunction cast, SqlCall call) {
final SqlNode left = call.operand(0);
RexNode leftRex = cx.convertExpression(left);
SqlDataTypeSpec dataType = call.operand(1);
RelDataType castType = dataType.deriveType(cx.getValidator(), true);
// can not call RexBuilder.makeCast() since that optimizes to remove the cast
// we don't want to remove the cast
return cx.getRexBuilder().makeAbstractCast(castType, leftRex);
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright 2017-2023 LinkedIn Corporation. All rights reserved.
* Copyright 2017-2024 LinkedIn Corporation. All rights reserved.
* Licensed under the BSD-2 Clause license.
* See LICENSE in the project root for license information.
*/
Expand All @@ -13,7 +13,6 @@
import com.linkedin.coral.common.transformers.SqlCallTransformers;
import com.linkedin.coral.common.utils.TypeDerivationUtil;
import com.linkedin.coral.transformers.ShiftArrayIndexTransformer;
import com.linkedin.coral.transformers.SingleUnionFieldReferenceTransformer;


/**
Expand All @@ -24,8 +23,7 @@ public class HiveSqlNodeToCoralSqlNodeConverter extends SqlShuttle {

public HiveSqlNodeToCoralSqlNodeConverter(SqlValidator sqlValidator, SqlNode topSqlNode) {
TypeDerivationUtil typeDerivationUtil = new TypeDerivationUtil(sqlValidator, topSqlNode);
operatorTransformerList = SqlCallTransformers.of(new ShiftArrayIndexTransformer(typeDerivationUtil),
new SingleUnionFieldReferenceTransformer(typeDerivationUtil));
operatorTransformerList = SqlCallTransformers.of(new ShiftArrayIndexTransformer(typeDerivationUtil));
}

@Override
Expand Down
Loading