From 7c4fbe42c4bcd60eb152ab3ac15551ce4766dde0 Mon Sep 17 00:00:00 2001 From: "gang3.yang" Date: Thu, 21 Nov 2024 11:19:13 +0800 Subject: [PATCH 1/3] Modify the judgment method of Hive DDL and paimon schema to subset judgment --- .../org/apache/paimon/hive/HiveSchema.java | 5 +- .../paimon/hive/HiveTableSchemaTest.java | 60 ++++++++++++++++++- 2 files changed, 60 insertions(+), 5 deletions(-) diff --git a/paimon-hive/paimon-hive-connector-common/src/main/java/org/apache/paimon/hive/HiveSchema.java b/paimon-hive/paimon-hive-connector-common/src/main/java/org/apache/paimon/hive/HiveSchema.java index f637651413ed..108315a96103 100644 --- a/paimon-hive/paimon-hive-connector-common/src/main/java/org/apache/paimon/hive/HiveSchema.java +++ b/paimon-hive/paimon-hive-connector-common/src/main/java/org/apache/paimon/hive/HiveSchema.java @@ -233,9 +233,10 @@ private static void checkFieldsMatched( } } - if (schemaFieldNames.size() != hiveFieldNames.size()) { + // It is OK that hive is a subset of paimon + if (schemaFieldNames.size() < hiveFieldNames.size()) { throw new IllegalArgumentException( - "Hive DDL and paimon schema mismatched! " + "Hive DDL is a superset of paimon schema! " + "It is recommended not to write any column definition " + "as Paimon external table can read schema from the specified location.\n" + "There are " diff --git a/paimon-hive/paimon-hive-connector-common/src/test/java/org/apache/paimon/hive/HiveTableSchemaTest.java b/paimon-hive/paimon-hive-connector-common/src/test/java/org/apache/paimon/hive/HiveTableSchemaTest.java index 07cd00c8e67e..c0da322702fc 100644 --- a/paimon-hive/paimon-hive-connector-common/src/test/java/org/apache/paimon/hive/HiveTableSchemaTest.java +++ b/paimon-hive/paimon-hive-connector-common/src/test/java/org/apache/paimon/hive/HiveTableSchemaTest.java @@ -44,7 +44,9 @@ import static org.assertj.core.api.Assertions.assertThatExceptionOfType; import static org.assertj.core.api.Assertions.assertThatThrownBy; -/** Tests for {@link HiveSchema}. */ +/** + * Tests for {@link HiveSchema}. + */ public class HiveTableSchemaTest { private static final RowType ROW_TYPE = @@ -54,7 +56,8 @@ public class HiveTableSchemaTest { new DataField(1, "b", DataTypes.STRING(), "second comment"), new DataField(2, "c", DataTypes.DECIMAL(5, 3), "last comment"))); - @TempDir java.nio.file.Path tempDir; + @TempDir + java.nio.file.Path tempDir; @Test public void testExtractSchemaWithEmptyDDLAndNoPaimonTable() { @@ -153,6 +156,56 @@ public void testMismatchedColumnNameAndType() throws Exception { .hasMessageContaining(expected); } + + @Test + public void testSubsetColumnNameAndType() throws Exception { + createSchema(); + Properties properties = new Properties(); + List columns = Arrays.asList("a","b"); + properties.setProperty("columns", String.join(",",columns)); + properties.setProperty( + "columns.types", + String.join( + ":", + Arrays.asList( + TypeInfoFactory.intTypeInfo.getTypeName(), + TypeInfoFactory.stringTypeInfo.getTypeName(), + TypeInfoFactory.getDecimalTypeInfo(6, 3).getTypeName()))); + properties.setProperty("columns.comments", "\0\0"); + properties.setProperty("location", tempDir.toString()); + List fields = HiveSchema.extract(null, properties).fieldNames(); + assertThat(fields).isEqualTo(columns); + } + + @Test + public void testSupersetColumnNameAndType() throws Exception { + createSchema(); + Properties properties = new Properties(); + properties.setProperty("columns", "a,b,c,d"); + properties.setProperty( + "columns.types", + String.join( + ":", + Arrays.asList( + TypeInfoFactory.intTypeInfo.getTypeName(), + TypeInfoFactory.stringTypeInfo.getTypeName(), + TypeInfoFactory.decimalTypeInfo.getTypeName(), + TypeInfoFactory.stringTypeInfo.getTypeName(), + TypeInfoFactory.getDecimalTypeInfo(6, 3).getTypeName()))); + properties.setProperty("columns.comments", "\0\0"); + properties.setProperty("location", tempDir.toString()); + String expected = + "Hive DDL is a superset of paimon schema! " + + "It is recommended not to write any column definition " + + "as Paimon external table can read schema from the specified location.\n" + + "There are 4 fields in Hive DDL: a, b, c, d\n" + + "There are 3 fields in Paimon schema: a, b, c\n"; + assertThatThrownBy(() -> HiveSchema.extract(null, properties)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining(expected); + } + + @Test public void testTooFewColumns() throws Exception { createSchema(); @@ -361,7 +414,8 @@ public void testReadHiveSchemaFromProperties() throws Exception { String dataFieldStr = JsonSerdeUtil.toJson(dataFields); List dataFieldsDeserialized = - JsonSerdeUtil.fromJson(dataFieldStr, new TypeReference>() {}); + JsonSerdeUtil.fromJson(dataFieldStr, new TypeReference>() { + }); HiveSchema newHiveSchema = new HiveSchema(new RowType(dataFieldsDeserialized)); assertThat(newHiveSchema).usingRecursiveComparison().isEqualTo(hiveSchema); } From bbb835b2d547e971c092fb1dcb225a86ed0c74a6 Mon Sep 17 00:00:00 2001 From: "gang3.yang" Date: Thu, 21 Nov 2024 14:49:35 +0800 Subject: [PATCH 2/3] up code format ,for checkstyle pass --- .../apache/paimon/hive/HiveTableSchemaTest.java | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/paimon-hive/paimon-hive-connector-common/src/test/java/org/apache/paimon/hive/HiveTableSchemaTest.java b/paimon-hive/paimon-hive-connector-common/src/test/java/org/apache/paimon/hive/HiveTableSchemaTest.java index c0da322702fc..2d03c56b20a7 100644 --- a/paimon-hive/paimon-hive-connector-common/src/test/java/org/apache/paimon/hive/HiveTableSchemaTest.java +++ b/paimon-hive/paimon-hive-connector-common/src/test/java/org/apache/paimon/hive/HiveTableSchemaTest.java @@ -44,9 +44,7 @@ import static org.assertj.core.api.Assertions.assertThatExceptionOfType; import static org.assertj.core.api.Assertions.assertThatThrownBy; -/** - * Tests for {@link HiveSchema}. - */ +/** Tests for {@link HiveSchema}. */ public class HiveTableSchemaTest { private static final RowType ROW_TYPE = @@ -56,8 +54,7 @@ public class HiveTableSchemaTest { new DataField(1, "b", DataTypes.STRING(), "second comment"), new DataField(2, "c", DataTypes.DECIMAL(5, 3), "last comment"))); - @TempDir - java.nio.file.Path tempDir; + @TempDir java.nio.file.Path tempDir; @Test public void testExtractSchemaWithEmptyDDLAndNoPaimonTable() { @@ -156,13 +153,12 @@ public void testMismatchedColumnNameAndType() throws Exception { .hasMessageContaining(expected); } - @Test public void testSubsetColumnNameAndType() throws Exception { createSchema(); Properties properties = new Properties(); - List columns = Arrays.asList("a","b"); - properties.setProperty("columns", String.join(",",columns)); + List columns = Arrays.asList("a", "b"); + properties.setProperty("columns", String.join(",", columns)); properties.setProperty( "columns.types", String.join( @@ -205,7 +201,6 @@ public void testSupersetColumnNameAndType() throws Exception { .hasMessageContaining(expected); } - @Test public void testTooFewColumns() throws Exception { createSchema(); @@ -414,8 +409,7 @@ public void testReadHiveSchemaFromProperties() throws Exception { String dataFieldStr = JsonSerdeUtil.toJson(dataFields); List dataFieldsDeserialized = - JsonSerdeUtil.fromJson(dataFieldStr, new TypeReference>() { - }); + JsonSerdeUtil.fromJson(dataFieldStr, new TypeReference>() {}); HiveSchema newHiveSchema = new HiveSchema(new RowType(dataFieldsDeserialized)); assertThat(newHiveSchema).usingRecursiveComparison().isEqualTo(hiveSchema); } From f018634c98ece0d6c29cfbae4745ff3c7b4723b4 Mon Sep 17 00:00:00 2001 From: "gang3.yang" Date: Fri, 22 Nov 2024 10:25:03 +0800 Subject: [PATCH 3/3] up testTooFewColumns and testTooManyColumns ut,Adjust for compatibility --- .../org/apache/paimon/hive/HiveTableSchemaTest.java | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/paimon-hive/paimon-hive-connector-common/src/test/java/org/apache/paimon/hive/HiveTableSchemaTest.java b/paimon-hive/paimon-hive-connector-common/src/test/java/org/apache/paimon/hive/HiveTableSchemaTest.java index 2d03c56b20a7..fe7aeac0833a 100644 --- a/paimon-hive/paimon-hive-connector-common/src/test/java/org/apache/paimon/hive/HiveTableSchemaTest.java +++ b/paimon-hive/paimon-hive-connector-common/src/test/java/org/apache/paimon/hive/HiveTableSchemaTest.java @@ -210,16 +210,7 @@ public void testTooFewColumns() throws Exception { properties.setProperty("columns.types", TypeInfoFactory.intTypeInfo.getTypeName()); properties.setProperty("location", tempDir.toString()); properties.setProperty("columns.comments", ""); - - String expected = - "Hive DDL and paimon schema mismatched! " - + "It is recommended not to write any column definition " - + "as Paimon external table can read schema from the specified location.\n" - + "There are 1 fields in Hive DDL: a\n" - + "There are 3 fields in Paimon schema: a, b, c"; - assertThatExceptionOfType(IllegalArgumentException.class) - .isThrownBy(() -> HiveSchema.extract(null, properties)) - .withMessageContaining(expected); + assertThat(HiveSchema.extract(null, properties)).isInstanceOf(HiveSchema.class); } @Test @@ -242,7 +233,7 @@ public void testTooManyColumns() throws Exception { properties.setProperty("location", tempDir.toString()); String expected = - "Hive DDL and paimon schema mismatched! " + "Hive DDL is a superset of paimon schema! " + "It is recommended not to write any column definition " + "as Paimon external table can read schema from the specified location.\n" + "There are 5 fields in Hive DDL: a, b, c, d, e\n"