diff --git a/metacat-connector-hive/src/main/java/com/netflix/metacat/connector/hive/converters/HiveConnectorInfoConverter.java b/metacat-connector-hive/src/main/java/com/netflix/metacat/connector/hive/converters/HiveConnectorInfoConverter.java index 8a897056e..1b2f02b0f 100644 --- a/metacat-connector-hive/src/main/java/com/netflix/metacat/connector/hive/converters/HiveConnectorInfoConverter.java +++ b/metacat-connector-hive/src/main/java/com/netflix/metacat/connector/hive/converters/HiveConnectorInfoConverter.java @@ -188,7 +188,7 @@ public TableInfo fromIcebergTableToTableInfo(final QualifiedName name, final TableInfo tableInfo) { final org.apache.iceberg.Table table = tableWrapper.getTable(); final List allFields = - this.hiveTypeConverter.icebergSchemaTofieldDtos(table.schema(), table.spec().fields()); + this.hiveTypeConverter.icebergeSchemaTofieldDtos(table.schema(), table.spec().fields()); final Map tableParameters = new HashMap<>(); tableParameters.put(DirectSqlTable.PARAM_TABLE_TYPE, DirectSqlTable.ICEBERG_TABLE_TYPE); tableParameters.put(DirectSqlTable.PARAM_METADATA_LOCATION, tableLoc); diff --git a/metacat-connector-hive/src/main/java/com/netflix/metacat/connector/hive/converters/HiveTypeConverter.java b/metacat-connector-hive/src/main/java/com/netflix/metacat/connector/hive/converters/HiveTypeConverter.java index d069ec7e0..625a0990e 100644 --- a/metacat-connector-hive/src/main/java/com/netflix/metacat/connector/hive/converters/HiveTypeConverter.java +++ b/metacat-connector-hive/src/main/java/com/netflix/metacat/connector/hive/converters/HiveTypeConverter.java @@ -108,13 +108,12 @@ public Type toMetacatType(final String type) { * @param partitionFields partitioned fields * @return list of field Info */ - public List icebergSchemaTofieldDtos(final Schema schema, + public List icebergeSchemaTofieldDtos(final Schema schema, final List partitionFields) { final List fields = Lists.newArrayList(); - final List partitionNames = partitionFields.stream() - .filter(f -> f.transform() != null && !f.transform().toString().equalsIgnoreCase("void")) - .map(f -> schema.findField(f.sourceId()).name()) - .collect(Collectors.toList()); + final List partitionNames = + partitionFields.stream() + .map(f -> schema.findField(f.sourceId()).name()).collect(Collectors.toList()); for (Types.NestedField field : schema.columns()) { final FieldInfo fieldInfo = new FieldInfo(); diff --git a/metacat-connector-hive/src/test/groovy/com/netflix/metacat/connector/hive/converters/HiveConnectorInfoConvertorSpec.groovy b/metacat-connector-hive/src/test/groovy/com/netflix/metacat/connector/hive/converters/HiveConnectorInfoConvertorSpec.groovy index 6eff0374d..0a611c28f 100644 --- a/metacat-connector-hive/src/test/groovy/com/netflix/metacat/connector/hive/converters/HiveConnectorInfoConvertorSpec.groovy +++ b/metacat-connector-hive/src/test/groovy/com/netflix/metacat/connector/hive/converters/HiveConnectorInfoConvertorSpec.groovy @@ -19,7 +19,6 @@ package com.netflix.metacat.connector.hive.converters import org.apache.iceberg.PartitionField import org.apache.iceberg.PartitionSpec import org.apache.iceberg.Schema -import org.apache.iceberg.transforms.Identity import org.apache.iceberg.types.Type import org.apache.iceberg.types.Types import com.netflix.metacat.common.QualifiedName @@ -59,7 +58,7 @@ class HiveConnectorInfoConvertorSpec extends Specification{ def setup() { // Stub this to always return true config.isEpochInSeconds() >> true - converter = new HiveConnectorInfoConverter(new HiveTypeConverter()) + converter = new HiveConnectorInfoConverter( new HiveTypeConverter()) } def 'test date to epoch seconds'() { @@ -513,7 +512,6 @@ class HiveConnectorInfoConvertorSpec extends Specification{ def tableInfo = converter.fromIcebergTableToTableInfo(QualifiedName.ofTable('c', 'd', 't'), icebergTableWrapper, "/tmp/test", TableInfo.builder().build() ) then: - 2 * field.transform() >> Mock(Identity) 1 * icebergTable.properties() >> ["test":"abd"] 2 * icebergTable.spec() >> partSpec 1 * partSpec.fields() >> [ field] diff --git a/metacat-connector-hive/src/test/groovy/com/netflix/metacat/connector/hive/converters/HiveTypeConverterSpec.groovy b/metacat-connector-hive/src/test/groovy/com/netflix/metacat/connector/hive/converters/HiveTypeConverterSpec.groovy index 38122d5ce..9a8282c3d 100644 --- a/metacat-connector-hive/src/test/groovy/com/netflix/metacat/connector/hive/converters/HiveTypeConverterSpec.groovy +++ b/metacat-connector-hive/src/test/groovy/com/netflix/metacat/connector/hive/converters/HiveTypeConverterSpec.groovy @@ -13,11 +13,6 @@ package com.netflix.metacat.connector.hive.converters -import org.apache.iceberg.PartitionField -import org.apache.iceberg.transforms.Identity -import org.apache.iceberg.transforms.VoidTransform -import org.apache.iceberg.Schema -import org.apache.iceberg.types.Types import spock.lang.Shared import spock.lang.Specification import spock.lang.Unroll @@ -260,36 +255,4 @@ class HiveTypeConverterSpec extends Specification { "array,source:string>>" | """{"type":"array","elementType":{"type":"row","fields":[{"name":"date","type":"string"},{"name":"countryCodes","type":{"type":"array","elementType":"string"}},{"name":"source","type":"string"}]}}""" "array,source:string>>>>" | """{"type":"array","elementType":{"type":"row","fields":[{"name":"Date","type":"string"},{"name":"nestedArray","type":{"type":"array","elementType":{"type":"row","fields":[{"name":"date","type":"string"},{"name":"countryCodes","type":{"type":"array","elementType":"string"}},{"name":"source","type":"string"}]}}}]}}""" } - - def "Test treat void transforms partitions as non-partition field"() { - given: - // Initial schema with three fields - def initialSchema = new Schema( - Types.NestedField.optional(1, "field1", Types.BooleanType.get(), "added 1st - partition key"), - Types.NestedField.optional(2, "field2", Types.StringType.get(), "added 2nd"), - Types.NestedField.optional(3, "field3", Types.IntegerType.get(), "added 3rd") - ) - // Initial partition fields - def initialPartitionFields = [ - new PartitionField(1, 1, "field1", new Identity()), - new PartitionField(2, 2, "field2", new VoidTransform()), - ] - when: - def fieldDtos = this.converter.icebergSchemaTofieldDtos(initialSchema, initialPartitionFields) - then: - fieldDtos.size() == 3 - // Validate the first field - def field1 = fieldDtos.find { it.name == "field1" } - field1 != null - field1.partitionKey == true - // Validate the second field - def field2 = fieldDtos.find { it.name == "field2" } - field2 != null - field2.partitionKey == false - // Validate the third field - def field3 = fieldDtos.find { it.name == "field3" } - field3 != null - field3.partitionKey == false - noExceptionThrown() - } } diff --git a/metacat-functional-tests/metacat-test-cluster/etc-metacat/data/metadata/00001-ba4b775c-7b1a-4a6f-aec0-03d3c851088c.metadata.json b/metacat-functional-tests/metacat-test-cluster/etc-metacat/data/metadata/00001-ba4b775c-7b1a-4a6f-aec0-03d3c851088c.metadata.json deleted file mode 100644 index adccffd1b..000000000 --- a/metacat-functional-tests/metacat-test-cluster/etc-metacat/data/metadata/00001-ba4b775c-7b1a-4a6f-aec0-03d3c851088c.metadata.json +++ /dev/null @@ -1,117 +0,0 @@ -{ - "format-version" : 1, - "table-uuid" : "77ea2333-acd1-4d8b-9870-b3dcece00c87", - "location" : "file:/tmp/dat", - "last-updated-ms" : 1725487921787, - "last-column-id" : 4, - "schema" : { - "type" : "struct", - "schema-id" : 0, - "fields" : [ { - "id" : 1, - "name" : "field1", - "required" : false, - "type" : "long" - }, { - "id" : 2, - "name" : "field2", - "required" : false, - "type" : "string" - }, { - "id" : 3, - "name" : "field3", - "required" : false, - "type" : "long" - }, { - "id" : 4, - "name" : "field4", - "required" : false, - "type" : "int" - } ] - }, - "current-schema-id" : 0, - "schemas" : [ { - "type" : "struct", - "schema-id" : 0, - "fields" : [ { - "id" : 1, - "name" : "field1", - "required" : false, - "type" : "long" - }, { - "id" : 2, - "name" : "field2", - "required" : false, - "type" : "string" - }, { - "id" : 3, - "name" : "field3", - "required" : false, - "type" : "long" - }, { - "id" : 4, - "name" : "field4", - "required" : false, - "type" : "int" - } ] - } ], - "partition-spec" : [ { - "name" : "field1", - "transform" : "identity", - "source-id" : 1, - "field-id" : 1000 - }, { - "name" : "field2", - "transform" : "void", - "source-id" : 2, - "field-id" : 1001 - } ], - "default-spec-id" : 1, - "partition-specs" : [ { - "spec-id" : 0, - "fields" : [ { - "name" : "field1", - "transform" : "identity", - "source-id" : 1, - "field-id" : 1000 - }, { - "name" : "field2", - "transform" : "identity", - "source-id" : 2, - "field-id" : 1001 - } ] - }, { - "spec-id" : 1, - "fields" : [ { - "name" : "field1", - "transform" : "identity", - "source-id" : 1, - "field-id" : 1000 - }, { - "name" : "field2", - "transform" : "void", - "source-id" : 2, - "field-id" : 1001 - } ] - } ], - "last-partition-id" : 1001, - "default-sort-order-id" : 0, - "sort-orders" : [ { - "order-id" : 0, - "fields" : [ ] - } ], - "properties" : { - "owner" : "owner", - "acls" : "[{\"format_version\":1,\"principals\":[{\"name\":\"123\",\"principal_type\":\"USER\"}],\"resources\":[{\"resource_type\":\"TABLE\",\"uuid\":\"77ea2333-acd1-4d8b-9870-b3dcece00c87\",\"parent\":{\"resource_type\":\"SCHEMA\",\"name\":\"owner\",\"parent\":{\"resource_type\":\"CATALOG\",\"name\":\"prodhive\",\"parent\":null}}}],\"privileges\":[\"ALL\"],\"grantee\":{\"name\":\"123\",\"principal_type\":\"USER\"},\"with_grant\":true},{\"format_version\":1,\"principals\":[{\"name\":\"123\",\"principal_type\":\"USER\"}],\"resources\":[{\"resource_type\":\"TABLE\",\"uuid\":\"77ea2333-acd1-4d8b-9870-b3dcece00c87\",\"parent\":{\"resource_type\":\"SCHEMA\",\"name\":\"owner\",\"parent\":{\"resource_type\":\"CATALOG\",\"name\":\"prodhive\",\"parent\":null}}}],\"privileges\":[\"ALL\"],\"grantee\":{\"name\":\"123\",\"principal_type\":\"USER\"},\"with_grant\":false}]", - "field.metadata.json" : "{\"1\":{},\"2\":{},\"3\":{},\"4\":{}}" - }, - "current-snapshot-id" : -1, - "refs" : { }, - "snapshots" : [ ], - "statistics" : [ ], - "snapshot-log" : [ ], - "metadata-log" : [ { - "timestamp-ms" : 1725487921770, - "metadata-file" : "file:/tmp/data/metadata/00001-abf48887-aa4f-4bcc-9219-1e1721314ee1.metadata.json" - } ] -} diff --git a/metacat-functional-tests/src/functionalTest/groovy/com/netflix/metacat/MetacatSmokeSpec.groovy b/metacat-functional-tests/src/functionalTest/groovy/com/netflix/metacat/MetacatSmokeSpec.groovy index fbc29d30f..c7193abd6 100644 --- a/metacat-functional-tests/src/functionalTest/groovy/com/netflix/metacat/MetacatSmokeSpec.groovy +++ b/metacat-functional-tests/src/functionalTest/groovy/com/netflix/metacat/MetacatSmokeSpec.groovy @@ -41,7 +41,6 @@ import feign.RetryableException import feign.Retryer import groovy.sql.Sql import org.apache.commons.io.FileUtils -import org.apache.iceberg.PartitionField import org.joda.time.Instant import org.skyscreamer.jsonassert.JSONAssert import spock.lang.Ignore @@ -832,47 +831,6 @@ class MetacatSmokeSpec extends Specification { api.deleteTable(catalogName, databaseName, tableName) } - @Unroll - def "Test ignore void transform as partition fields"() { - given: - def catalogName = 'embedded-fast-hive-metastore' - def databaseName = 'iceberg_db' - def tableName = 'iceberg_table_6' - def uri = isLocalEnv ? String.format('file:/tmp/data/') : null - def tableDto = new TableDto( - name: QualifiedName.ofTable(catalogName, databaseName, tableName), - serde: new StorageDto( - owner: 'metacat-test', - inputFormat: 'org.apache.hadoop.mapred.TextInputFormat', - outputFormat: 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat', - serializationLib: 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe', - parameters: [ - 'serialization.format': '1' - ], - uri: uri - ), - definitionMetadata: null, - dataMetadata: null, - fields: null - ) - def metadataLocation = String.format('/tmp/data/metadata/00001-ba4b775c-7b1a-4a6f-aec0-03d3c851088c.metadata.json') - def metadata = [table_type: 'ICEBERG', metadata_location: metadataLocation] - tableDto.setMetadata(metadata) - when: - try {api.createDatabase(catalogName, databaseName, new DatabaseCreateRequestDto()) - } catch (Exception ignored) { - } - api.createTable(catalogName, databaseName, tableName, tableDto) - def tableDTO = api.getTable(catalogName, databaseName, tableName, true, true, true) - then: - tableDTO.getFields().size() == 4 - tableDTO.getPartition_keys().size() == 1 - tableDTO.getPartition_keys()[0] == "field1" - cleanup: - api.deleteTable(catalogName, databaseName, tableName) - } - - @Unroll def "Test get partitions from iceberg table using #filter"() { def catalogName = 'embedded-fast-hive-metastore'