diff --git a/coral-common/src/main/java/com/linkedin/coral/common/TypeConverter.java b/coral-common/src/main/java/com/linkedin/coral/common/TypeConverter.java index 022538eaa..d1dc558af 100644 --- a/coral-common/src/main/java/com/linkedin/coral/common/TypeConverter.java +++ b/coral-common/src/main/java/com/linkedin/coral/common/TypeConverter.java @@ -140,7 +140,6 @@ public static RelDataType convert(StructTypeInfo structType, final RelDataTypeFa // The schema of output Struct conforms to https://github.com/trinodb/trino/pull/3483 // except we adopted "integer" for the type of "tag" field instead of "tinyint" in the Trino patch // for compatibility with other platforms that Iceberg currently doesn't support tinyint type. - // When the field count inside UnionTypeInfo is one, we surface the underlying RelDataType instead. // Note: this is subject to change in the future pending on the discussion in // https://mail-archives.apache.org/mod_mbox/iceberg-dev/202112.mbox/browser diff --git a/coral-schema/src/main/java/com/linkedin/coral/schema/avro/RelToAvroSchemaConverter.java b/coral-schema/src/main/java/com/linkedin/coral/schema/avro/RelToAvroSchemaConverter.java index 0d6f86370..a7165a779 100644 --- a/coral-schema/src/main/java/com/linkedin/coral/schema/avro/RelToAvroSchemaConverter.java +++ b/coral-schema/src/main/java/com/linkedin/coral/schema/avro/RelToAvroSchemaConverter.java @@ -507,14 +507,7 @@ public RexNode visitFieldAccess(RexFieldAccess rexFieldAccess) { // UDFs calls could potentially be doubly (or more) field-referenced, for example, `extract_union(baz).single.tag_0` // where baz is a struct containing a uniontype field. In this case, we simply need to use derived type of the entire // call. Note that this also takes care of the simple one layer field reference on a UDF call. - String oldFieldName = rexFieldAccess.getField().getName(); - String suggestNewFieldName = suggestedFieldNames.poll(); - String newFieldName = SchemaUtilities.getFieldName(oldFieldName, suggestNewFieldName); - - RelDataType fieldType = rexFieldAccess.getType(); - boolean isNullable = SchemaUtilities.isFieldNullable((RexCall) referenceExpr, inputSchema); - // TODO: add field documentation - SchemaUtilities.appendField(newFieldName, fieldType, null, fieldAssembler, isNullable); + handleUDFFieldAccess(rexFieldAccess, (RexCall) referenceExpr); return rexFieldAccess; } else if (referenceExpr instanceof RexFieldAccess) { // While selecting `int_field` from `struct_col:struct>` using `struct_col.inner_struct_col.int_field`, @@ -527,16 +520,31 @@ public RexNode visitFieldAccess(RexFieldAccess rexFieldAccess) { } } + handleFieldAccess(rexFieldAccess, (RexInputRef) referenceExpr, innerRecordNames); + return rexFieldAccess; + } + + private void handleFieldAccess(RexFieldAccess rexFieldAccess, RexInputRef referenceExpr, + Deque innerRecordNames) { String oldFieldName = rexFieldAccess.getField().getName(); String suggestNewFieldName = suggestedFieldNames.poll(); String newFieldName = SchemaUtilities.getFieldName(oldFieldName, suggestNewFieldName); - Schema topSchema = inputSchema.getFields().get(((RexInputRef) referenceExpr).getIndex()).schema(); + Schema topSchema = inputSchema.getFields().get(referenceExpr.getIndex()).schema(); Schema.Field accessedField = getFieldFromTopSchema(topSchema, oldFieldName, innerRecordNames); assert accessedField != null; SchemaUtilities.appendField(newFieldName, accessedField, fieldAssembler); + } - return rexFieldAccess; + private void handleUDFFieldAccess(RexFieldAccess rexFieldAccess, RexCall referenceExpr) { + String oldFieldName = rexFieldAccess.getField().getName(); + String suggestNewFieldName = suggestedFieldNames.poll(); + String newFieldName = SchemaUtilities.getFieldName(oldFieldName, suggestNewFieldName); + + RelDataType fieldType = rexFieldAccess.getType(); + boolean isNullable = SchemaUtilities.isFieldNullable(referenceExpr, inputSchema); + // TODO: add field documentation + SchemaUtilities.appendField(newFieldName, fieldType, null, fieldAssembler, isNullable); } @Override