From f6392ac0654fc8c472a1fbfac2b4a0bda51ee6f2 Mon Sep 17 00:00:00 2001 From: Jiefan Li Date: Wed, 10 Jan 2024 13:59:38 -0800 Subject: [PATCH 1/3] [Coral-Schema] Add DATE type support in RelDataTypeToAvroType --- .../coral/schema/avro/RelDataTypeToAvroType.java | 9 +++++++-- .../schema/avro/RelDataTypeToAvroTypeTests.java | 15 ++++++++++++++- .../com/linkedin/coral/schema/avro/TestUtils.java | 4 +++- .../src/test/resources/base-date-type-field.avsc | 12 ++++++++++++ .../rel2avro-testDateTypeField-expected.avsc | 13 +++++++++++++ .../trino/rel2trino/HiveToTrinoConverterTest.java | 2 +- 6 files changed, 50 insertions(+), 5 deletions(-) create mode 100644 coral-schema/src/test/resources/base-date-type-field.avsc create mode 100644 coral-schema/src/test/resources/rel2avro-testDateTypeField-expected.avsc diff --git a/coral-schema/src/main/java/com/linkedin/coral/schema/avro/RelDataTypeToAvroType.java b/coral-schema/src/main/java/com/linkedin/coral/schema/avro/RelDataTypeToAvroType.java index 53963c321..efa57f9d8 100644 --- a/coral-schema/src/main/java/com/linkedin/coral/schema/avro/RelDataTypeToAvroType.java +++ b/coral-schema/src/main/java/com/linkedin/coral/schema/avro/RelDataTypeToAvroType.java @@ -1,5 +1,5 @@ /** - * Copyright 2019-2023 LinkedIn Corporation. All rights reserved. + * Copyright 2019-2024 LinkedIn Corporation. All rights reserved. * Licensed under the BSD-2 Clause license. * See LICENSE in the project root for license information. */ @@ -90,6 +90,7 @@ private static Schema relDataTypeToAvroType(RelDataType relDataType, String reco } private static Schema basicSqlTypeToAvroType(BasicSqlType relDataType) { + Schema schema; switch (relDataType.getSqlTypeName()) { case BOOLEAN: return Schema.create(Schema.Type.BOOLEAN); @@ -110,9 +111,13 @@ private static Schema basicSqlTypeToAvroType(BasicSqlType relDataType) { case NULL: return Schema.create(Schema.Type.NULL); case TIMESTAMP: - Schema schema = Schema.create(Schema.Type.LONG); + schema = Schema.create(Schema.Type.LONG); schema.addProp("logicalType", "timestamp-millis"); return schema; + case DATE: + schema = Schema.create(Schema.Type.INT); + schema.addProp("logicalType", "date"); + return schema; case DECIMAL: JsonNodeFactory factory = JsonNodeFactory.instance; Schema decimalSchema = Schema.create(Schema.Type.BYTES); diff --git a/coral-schema/src/test/java/com/linkedin/coral/schema/avro/RelDataTypeToAvroTypeTests.java b/coral-schema/src/test/java/com/linkedin/coral/schema/avro/RelDataTypeToAvroTypeTests.java index dff021ec8..b2973c916 100644 --- a/coral-schema/src/test/java/com/linkedin/coral/schema/avro/RelDataTypeToAvroTypeTests.java +++ b/coral-schema/src/test/java/com/linkedin/coral/schema/avro/RelDataTypeToAvroTypeTests.java @@ -1,5 +1,5 @@ /** - * Copyright 2020-2023 LinkedIn Corporation. All rights reserved. + * Copyright 2020-2024 LinkedIn Corporation. All rights reserved. * Licensed under the BSD-2 Clause license. * See LICENSE in the project root for license information. */ @@ -78,4 +78,17 @@ public void testTimestampTypeField() { Assert.assertEquals(actualAvroType.toString(true), TestUtils.loadSchema("rel2avro-testTimestampTypeField-expected.avsc")); } + + @Test + public void testDateTypeField() { + String viewSql = "CREATE VIEW v AS SELECT * FROM basedatetypefield"; + + TestUtils.executeCreateViewQuery("default", "v", viewSql); + RelNode relNode = hiveToRelConverter.convertView("default", "v"); + Schema actualAvroType = + RelDataTypeToAvroType.relDataTypeToAvroTypeNonNullable(relNode.getRowType(), "dateTypeField"); + + Assert.assertEquals(actualAvroType.toString(true), + TestUtils.loadSchema("rel2avro-testDateTypeField-expected.avsc")); + } } diff --git a/coral-schema/src/test/java/com/linkedin/coral/schema/avro/TestUtils.java b/coral-schema/src/test/java/com/linkedin/coral/schema/avro/TestUtils.java index cb79495f4..56642a828 100644 --- a/coral-schema/src/test/java/com/linkedin/coral/schema/avro/TestUtils.java +++ b/coral-schema/src/test/java/com/linkedin/coral/schema/avro/TestUtils.java @@ -1,5 +1,5 @@ /** - * Copyright 2019-2023 LinkedIn Corporation. All rights reserved. + * Copyright 2019-2024 LinkedIn Corporation. All rights reserved. * Licensed under the BSD-2 Clause license. * See LICENSE in the project root for license information. */ @@ -96,6 +96,7 @@ private static void initializeTables() { String baseNestedComplexSchema = loadSchema("base-nested-complex.avsc"); String baseNullTypeFieldSchema = loadSchema("base-null-type-field.avsc"); String baseTimestampTypeFieldSchema = loadSchema("base-timestamp-type-field.avsc"); + String baseDateTypeFieldSchema = loadSchema("base-date-type-field.avsc"); String baseComplexUnionTypeSchema = loadSchema("base-complex-union-type.avsc"); String baseNestedUnionSchema = loadSchema("base-nested-union.avsc"); String baseComplexLowercase = loadSchema("base-complex-lowercase.avsc"); @@ -113,6 +114,7 @@ private static void initializeTables() { executeCreateTableQuery("default", "basenullability", baseNullabilitySchema); executeCreateTableQuery("default", "basenulltypefield", baseNullTypeFieldSchema); executeCreateTableQuery("default", "basetimestamptypefield", baseTimestampTypeFieldSchema); + executeCreateTableQuery("default", "basedatetypefield", baseDateTypeFieldSchema); executeCreateTableQuery("default", "basecomplexuniontype", baseComplexUnionTypeSchema); executeCreateTableQuery("default", "basenestedunion", baseNestedUnionSchema); executeCreateTableQuery("default", "basecomplexlowercase", baseComplexLowercase); diff --git a/coral-schema/src/test/resources/base-date-type-field.avsc b/coral-schema/src/test/resources/base-date-type-field.avsc new file mode 100644 index 000000000..da7d8d71b --- /dev/null +++ b/coral-schema/src/test/resources/base-date-type-field.avsc @@ -0,0 +1,12 @@ +{ + "type" : "record", + "name" : "basedatetypefield", + "namespace" : "coral.schema.avro.base.date.type.field", + "fields" : [ { + "name" : "Date_Field", + "type" : [ "null", { + "type" : "int", + "logicalType" : "date" + } ] + } ] +} \ No newline at end of file diff --git a/coral-schema/src/test/resources/rel2avro-testDateTypeField-expected.avsc b/coral-schema/src/test/resources/rel2avro-testDateTypeField-expected.avsc new file mode 100644 index 000000000..89cf55517 --- /dev/null +++ b/coral-schema/src/test/resources/rel2avro-testDateTypeField-expected.avsc @@ -0,0 +1,13 @@ +{ + "type" : "record", + "name" : "dateTypeField", + "namespace" : "rel_avro", + "fields" : [ { + "name" : "date_field", + "type" : [ "null", { + "type" : "int", + "logicalType" : "date" + } ], + "default" : null + } ] +} \ No newline at end of file diff --git a/coral-trino/src/test/java/com/linkedin/coral/trino/rel2trino/HiveToTrinoConverterTest.java b/coral-trino/src/test/java/com/linkedin/coral/trino/rel2trino/HiveToTrinoConverterTest.java index 328f6a975..855345905 100644 --- a/coral-trino/src/test/java/com/linkedin/coral/trino/rel2trino/HiveToTrinoConverterTest.java +++ b/coral-trino/src/test/java/com/linkedin/coral/trino/rel2trino/HiveToTrinoConverterTest.java @@ -1,5 +1,5 @@ /** - * Copyright 2017-2023 LinkedIn Corporation. All rights reserved. + * Copyright 2017-2024 LinkedIn Corporation. All rights reserved. * Licensed under the BSD-2 Clause license. * See LICENSE in the project root for license information. */ From 9bcd0bfefb1f4c61d197363a891d4174c60ada53 Mon Sep 17 00:00:00 2001 From: Jiefan Li Date: Wed, 10 Jan 2024 14:43:56 -0800 Subject: [PATCH 2/3] Add inline comment --- .../com/linkedin/coral/schema/avro/RelDataTypeToAvroType.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/coral-schema/src/main/java/com/linkedin/coral/schema/avro/RelDataTypeToAvroType.java b/coral-schema/src/main/java/com/linkedin/coral/schema/avro/RelDataTypeToAvroType.java index efa57f9d8..4912a4734 100644 --- a/coral-schema/src/main/java/com/linkedin/coral/schema/avro/RelDataTypeToAvroType.java +++ b/coral-schema/src/main/java/com/linkedin/coral/schema/avro/RelDataTypeToAvroType.java @@ -115,6 +115,8 @@ private static Schema basicSqlTypeToAvroType(BasicSqlType relDataType) { schema.addProp("logicalType", "timestamp-millis"); return schema; case DATE: + // Spark recognizes the data type of {"type": "int", "logicalType": "date"} as a date type: + // https://github.com/apache/spark/blob/master/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala#L145 schema = Schema.create(Schema.Type.INT); schema.addProp("logicalType", "date"); return schema; From 98a7abbaeab4ebe2ebb938e039db01970c6bd975 Mon Sep 17 00:00:00 2001 From: Jiefan Li Date: Thu, 11 Jan 2024 09:45:03 -0800 Subject: [PATCH 3/3] Modify comment --- .../com/linkedin/coral/schema/avro/RelDataTypeToAvroType.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/coral-schema/src/main/java/com/linkedin/coral/schema/avro/RelDataTypeToAvroType.java b/coral-schema/src/main/java/com/linkedin/coral/schema/avro/RelDataTypeToAvroType.java index 4912a4734..450754b54 100644 --- a/coral-schema/src/main/java/com/linkedin/coral/schema/avro/RelDataTypeToAvroType.java +++ b/coral-schema/src/main/java/com/linkedin/coral/schema/avro/RelDataTypeToAvroType.java @@ -115,8 +115,7 @@ private static Schema basicSqlTypeToAvroType(BasicSqlType relDataType) { schema.addProp("logicalType", "timestamp-millis"); return schema; case DATE: - // Spark recognizes the data type of {"type": "int", "logicalType": "date"} as a date type: - // https://github.com/apache/spark/blob/master/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala#L145 + // In Avro, "date" type is represented as {"type": "int", "logicalType": "date"}. schema = Schema.create(Schema.Type.INT); schema.addProp("logicalType", "date"); return schema;