Yingjianw/fix parent child empty null config (#617)

* Revert "ignore void transform partitions from current partition field list (#606)" This reverts commit dc3bc13. * fix parentChildEmptyNullConfig fix add test fix fix --------- Co-authored-by: Yingjian Wu <[email protected]>
Netflix · Oct 30, 2024 · 52066ec · 52066ec
1 parent dc3bc13
commit 52066ec
Show file tree

Hide file tree

Showing 8 changed files with 47 additions and 227 deletions.
diff --git a/.../java/com/netflix/metacat/common/server/properties/ParentChildRelationshipProperties.java b/.../java/com/netflix/metacat/common/server/properties/ParentChildRelationshipProperties.java
@@ -58,9 +58,6 @@ public ParentChildRelationshipProperties(@Nullable final Environment env) {
      * @param  configStr configString
      */
     public void setMaxAllowPerTablePerRelType(@Nullable final String configStr) {
-        if (configStr == null || configStr.isEmpty()) {
-            return;
-        }
         try {
             this.maxAllowPerTablePerRelType = parseNestedConfigString(configStr);
         } catch (Exception e) {
@@ -74,9 +71,6 @@ public void setMaxAllowPerTablePerRelType(@Nullable final String configStr) {
      * @param  configStr configString
      */
     public void setMaxAllowPerDBPerRelType(@Nullable final String configStr) {
-        if (configStr == null || configStr.isEmpty()) {
-            return;
-        }
         try {
             this.maxAllowPerDBPerRelType = parseNestedConfigString(configStr);
         } catch (Exception e) {
@@ -89,12 +83,10 @@ public void setMaxAllowPerDBPerRelType(@Nullable final String configStr) {
      * @param  configStr configString
      */
     public void setDefaultMaxAllowPerRelType(@Nullable final String configStr) {
-        if (configStr == null || configStr.isEmpty()) {
-            return;
-        }
         try {
-            this.defaultMaxAllowPerRelType =
-                Arrays.stream(configStr.split(";"))
+            this.defaultMaxAllowPerRelType = configStr == null || configStr.isEmpty()
+                ? new HashMap<>()
+                : Arrays.stream(configStr.split(";"))
                     .map(entry -> entry.split(","))
                     .collect(Collectors.toMap(
                         parts -> parts[0],
@@ -105,7 +97,10 @@ public void setDefaultMaxAllowPerRelType(@Nullable final String configStr) {
         }
     }
 
-    private Map<String, Map<String, Integer>> parseNestedConfigString(final String configStr) {
+    private Map<String, Map<String, Integer>> parseNestedConfigString(@Nullable final String configStr) {
+        if (configStr == null || configStr.isEmpty()) {
+            return new HashMap<>();
+        }
         return Arrays.stream(configStr.split(";"))
             .map(entry -> entry.split(","))
             .collect(Collectors.groupingBy(

diff --git a/...c/main/java/com/netflix/metacat/connector/hive/converters/HiveConnectorInfoConverter.java b/...c/main/java/com/netflix/metacat/connector/hive/converters/HiveConnectorInfoConverter.java
@@ -188,7 +188,7 @@ public TableInfo fromIcebergTableToTableInfo(final QualifiedName name,
                                                  final TableInfo tableInfo) {
         final org.apache.iceberg.Table table = tableWrapper.getTable();
         final List<FieldInfo> allFields =
-            this.hiveTypeConverter.icebergSchemaTofieldDtos(table.schema(), table.spec().fields());
+            this.hiveTypeConverter.icebergeSchemaTofieldDtos(table.schema(), table.spec().fields());
         final Map<String, String> tableParameters = new HashMap<>();
         tableParameters.put(DirectSqlTable.PARAM_TABLE_TYPE, DirectSqlTable.ICEBERG_TABLE_TYPE);
         tableParameters.put(DirectSqlTable.PARAM_METADATA_LOCATION, tableLoc);

diff --git a/...r-hive/src/main/java/com/netflix/metacat/connector/hive/converters/HiveTypeConverter.java b/...r-hive/src/main/java/com/netflix/metacat/connector/hive/converters/HiveTypeConverter.java
@@ -108,13 +108,12 @@ public Type toMetacatType(final String type) {
      * @param partitionFields partitioned fields
      * @return list of field Info
      */
-    public List<FieldInfo> icebergSchemaTofieldDtos(final Schema schema,
+    public List<FieldInfo> icebergeSchemaTofieldDtos(final Schema schema,
                                                      final List<PartitionField> partitionFields) {
         final List<FieldInfo> fields = Lists.newArrayList();
-        final List<String> partitionNames = partitionFields.stream()
-                .filter(f -> f.transform() != null && !f.transform().toString().equalsIgnoreCase("void"))
-                .map(f -> schema.findField(f.sourceId()).name())
-                .collect(Collectors.toList());
+        final List<String> partitionNames =
+            partitionFields.stream()
+                .map(f -> schema.findField(f.sourceId()).name()).collect(Collectors.toList());
 
         for (Types.NestedField field : schema.columns()) {
             final FieldInfo fieldInfo = new FieldInfo();

diff --git a/...roovy/com/netflix/metacat/connector/hive/converters/HiveConnectorInfoConvertorSpec.groovy b/...roovy/com/netflix/metacat/connector/hive/converters/HiveConnectorInfoConvertorSpec.groovy
@@ -19,7 +19,6 @@ package com.netflix.metacat.connector.hive.converters
 import org.apache.iceberg.PartitionField
 import org.apache.iceberg.PartitionSpec
 import org.apache.iceberg.Schema
-import org.apache.iceberg.transforms.Identity
 import org.apache.iceberg.types.Type
 import org.apache.iceberg.types.Types
 import com.netflix.metacat.common.QualifiedName
@@ -59,7 +58,7 @@ class HiveConnectorInfoConvertorSpec extends Specification{
     def setup() {
         // Stub this to always return true
         config.isEpochInSeconds() >> true
-        converter = new HiveConnectorInfoConverter(new HiveTypeConverter())
+        converter = new HiveConnectorInfoConverter( new HiveTypeConverter())
     }
 
     def 'test date to epoch seconds'() {
@@ -513,7 +512,6 @@ class HiveConnectorInfoConvertorSpec extends Specification{
         def tableInfo = converter.fromIcebergTableToTableInfo(QualifiedName.ofTable('c', 'd', 't'),
             icebergTableWrapper, "/tmp/test", TableInfo.builder().build() )
         then:
-        2 * field.transform() >> Mock(Identity)
         1 * icebergTable.properties() >> ["test":"abd"]
         2 * icebergTable.spec() >>  partSpec
         1 * partSpec.fields() >> [ field]

diff --git a/...rc/test/groovy/com/netflix/metacat/connector/hive/converters/HiveTypeConverterSpec.groovy b/...rc/test/groovy/com/netflix/metacat/connector/hive/converters/HiveTypeConverterSpec.groovy
@@ -13,11 +13,6 @@
 
 package com.netflix.metacat.connector.hive.converters
 
-import org.apache.iceberg.PartitionField
-import org.apache.iceberg.transforms.Identity
-import org.apache.iceberg.transforms.VoidTransform
-import org.apache.iceberg.Schema
-import org.apache.iceberg.types.Types
 import spock.lang.Shared
 import spock.lang.Specification
 import spock.lang.Unroll
@@ -260,43 +255,4 @@ class HiveTypeConverterSpec extends Specification {
         "array<struct<date:string,countryCodes:array<string>,source:string>>" | """{"type":"array","elementType":{"type":"row","fields":[{"name":"date","type":"string"},{"name":"countryCodes","type":{"type":"array","elementType":"string"}},{"name":"source","type":"string"}]}}"""
         "array<struct<Date:string,nestedArray:array<struct<date:string,countryCodes:array<string>,source:string>>>>" | """{"type":"array","elementType":{"type":"row","fields":[{"name":"Date","type":"string"},{"name":"nestedArray","type":{"type":"array","elementType":{"type":"row","fields":[{"name":"date","type":"string"},{"name":"countryCodes","type":{"type":"array","elementType":"string"}},{"name":"source","type":"string"}]}}}]}}"""
     }
-
-    def "Test treat void transforms partitions as non-partition field"() {
-        given:
-        // Initial schema with three fields
-        def initialSchema = new Schema(
-            Types.NestedField.optional(1, "field1", Types.BooleanType.get(), "added 1st - partition key"),
-            Types.NestedField.optional(2, "field2", Types.StringType.get(), "added 2nd"),
-            Types.NestedField.optional(3, "field3", Types.IntegerType.get(), "added 3rd")
-        )
-
-        // Initial partition fields
-        def initialPartitionFields = [
-            new PartitionField(1, 1, "field1", new Identity()),
-            new PartitionField(2, 2, "field2", new VoidTransform<String>()),
-        ]
-
-        when:
-        def fieldDtos = this.converter.icebergSchemaTofieldDtos(initialSchema, initialPartitionFields)
-
-        then:
-        fieldDtos.size() == 3
-
-        // Validate the first field
-        def field1 = fieldDtos.find { it.name == "field1" }
-        field1 != null
-        field1.partitionKey == true
-
-        // Validate the second field
-        def field2 = fieldDtos.find { it.name == "field2" }
-        field2 != null
-        field2.partitionKey == false
-
-        // Validate the third field
-        def field3 = fieldDtos.find { it.name == "field3" }
-        field3 != null
-        field3.partitionKey == false
-
-        noExceptionThrown()
-    }
 }
diff --git a/...luster/etc-metacat/data/metadata/00001-ba4b775c-7b1a-4a6f-aec0-03d3c851088c.metadata.json b/...luster/etc-metacat/data/metadata/00001-ba4b775c-7b1a-4a6f-aec0-03d3c851088c.metadata.json
diff --git a/...at-functional-tests/src/functionalTest/groovy/com/netflix/metacat/MetacatSmokeSpec.groovy b/...at-functional-tests/src/functionalTest/groovy/com/netflix/metacat/MetacatSmokeSpec.groovy
@@ -41,7 +41,6 @@ import feign.RetryableException
 import feign.Retryer
 import groovy.sql.Sql
 import org.apache.commons.io.FileUtils
-import org.apache.iceberg.PartitionField
 import org.joda.time.Instant
 import org.skyscreamer.jsonassert.JSONAssert
 import spock.lang.Ignore
@@ -832,50 +831,6 @@ class MetacatSmokeSpec extends Specification {
         api.deleteTable(catalogName, databaseName, tableName)
     }
 
-    @Unroll
-    def "Test ignore void transform as partition fields"() {
-        given:
-        def catalogName = 'embedded-fast-hive-metastore'
-        def databaseName = 'iceberg_db'
-        def tableName = 'iceberg_table_6'
-        def uri = isLocalEnv ? String.format('file:/tmp/data/') : null
-        def tableDto = new TableDto(
-            name: QualifiedName.ofTable(catalogName, databaseName, tableName),
-            serde: new StorageDto(
-                owner: 'metacat-test',
-                inputFormat: 'org.apache.hadoop.mapred.TextInputFormat',
-                outputFormat: 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat',
-                serializationLib: 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe',
-                parameters: [
-                    'serialization.format': '1'
-                ],
-                uri: uri
-            ),
-            definitionMetadata: null,
-            dataMetadata: null,
-            fields: null
-        )
-
-        def metadataLocation = String.format('/tmp/data/metadata/00001-ba4b775c-7b1a-4a6f-aec0-03d3c851088c.metadata.json')
-        def metadata = [table_type: 'ICEBERG', metadata_location: metadataLocation]
-        tableDto.setMetadata(metadata)
-
-        when:
-        try {api.createDatabase(catalogName, databaseName, new DatabaseCreateRequestDto())
-        } catch (Exception ignored) {
-        }
-        api.createTable(catalogName, databaseName, tableName, tableDto)
-        def tableDTO = api.getTable(catalogName, databaseName, tableName, true, true, true)
-
-        then:
-        tableDTO.getFields().size() == 4
-        tableDTO.getPartition_keys().size() == 1
-        tableDTO.getPartition_keys()[0] == "field1"
-
-        cleanup:
-        api.deleteTable(catalogName, databaseName, tableName)
-    }
-
     @Unroll
     def "Test get partitions from iceberg table using #filter"() {
         def catalogName = 'embedded-fast-hive-metastore'

diff --git a/...ts/src/functionalTest/groovy/com/netflix/metacat/ParentChildRelMetadataServiceSpec.groovy b/...ts/src/functionalTest/groovy/com/netflix/metacat/ParentChildRelMetadataServiceSpec.groovy
@@ -618,4 +618,38 @@ class ParentChildRelMetadataServiceSpec extends Specification{
         1        |  "CLONE,5"               | "CLONE,test,3;OTHER,other,2"|  "CLONE,testhive/test/parent,2"                               | 2
         1        |  "CLONE,5;Other,3"       | "CLONE,test,3;CLONE,other,2"|  "CLONE,testhive/test/parent,2;CLONE,testhive/test/other,2"   | 2
     }
+
+    def "test empty/null input string for config"() {
+        given:
+        def parentChildProps = new ParentChildRelationshipProperties(null)
+
+        when: "Setting properties to non empty string"
+        parentChildProps.setDefaultMaxAllowPerRelType("CLONE,5;Other,3")
+        parentChildProps.setMaxAllowPerDBPerRelType("CLONE,test,3;CLONE,other,2")
+        parentChildProps.setMaxAllowPerTablePerRelType("CLONE,testhive/test/parent,2;CLONE,testhive/test/other,2")
+
+        then:
+        assert parentChildProps.getDefaultMaxAllowPerRelType().size() == 2
+        assert parentChildProps.getMaxAllowPerDBPerRelType().size() == 1
+        assert parentChildProps.getMaxAllowPerTablePerRelType().size() == 1
+
+        when: "Setting properties to empty or null based on the isEmpty flag"
+        if (isEmpty) {
+            parentChildProps.setDefaultMaxAllowPerRelType("")
+            parentChildProps.setMaxAllowPerDBPerRelType("")
+            parentChildProps.setMaxAllowPerTablePerRelType("")
+        } else {
+            parentChildProps.setDefaultMaxAllowPerRelType(null)
+            parentChildProps.setMaxAllowPerDBPerRelType(null)
+            parentChildProps.setMaxAllowPerTablePerRelType(null)
+        }
+
+        then: "The properties should be empty"
+        assert parentChildProps.getDefaultMaxAllowPerRelType().isEmpty()
+        assert parentChildProps.getMaxAllowPerDBPerRelType().isEmpty()
+        assert parentChildProps.getMaxAllowPerTablePerRelType().isEmpty()
+
+        where:
+        isEmpty << [true, false]
+    }
 }