From 399e9021879e88c050e2ff32de86a1a2da63b509 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergio=20G=C3=B3mez=20Villamor?= Date: Tue, 10 Dec 2024 19:27:30 +0100 Subject: [PATCH] tests(datahub-client): new tests for the AvroSchemaConverter --- .../avro/AvroSchemaConverterTest.java | 810 ++++++++++++++++++ .../avroschemaconverter_complex_arrays.asc | 87 ++ .../avroschemaconverter_complex_maps.asc | 87 ++ .../avroschemaconverter_complex_structs.asc | 76 ++ .../avroschemaconverter_complex_unions.asc | 60 ++ .../avroschemaconverter_logical_types.asc | 72 ++ .../avroschemaconverter_primitives.asc | 62 ++ 7 files changed, 1254 insertions(+) create mode 100644 metadata-integration/java/datahub-schematron/lib/src/test/java/io/datahubproject/schematron/converters/avro/AvroSchemaConverterTest.java create mode 100644 metadata-integration/java/datahub-schematron/lib/src/test/resources/avroschemaconverter_complex_arrays.asc create mode 100644 metadata-integration/java/datahub-schematron/lib/src/test/resources/avroschemaconverter_complex_maps.asc create mode 100644 metadata-integration/java/datahub-schematron/lib/src/test/resources/avroschemaconverter_complex_structs.asc create mode 100644 metadata-integration/java/datahub-schematron/lib/src/test/resources/avroschemaconverter_complex_unions.asc create mode 100644 metadata-integration/java/datahub-schematron/lib/src/test/resources/avroschemaconverter_logical_types.asc create mode 100644 metadata-integration/java/datahub-schematron/lib/src/test/resources/avroschemaconverter_primitives.asc diff --git a/metadata-integration/java/datahub-schematron/lib/src/test/java/io/datahubproject/schematron/converters/avro/AvroSchemaConverterTest.java b/metadata-integration/java/datahub-schematron/lib/src/test/java/io/datahubproject/schematron/converters/avro/AvroSchemaConverterTest.java new file mode 100644 index 0000000000000..ded5ed7ac7cd7 --- /dev/null +++ b/metadata-integration/java/datahub-schematron/lib/src/test/java/io/datahubproject/schematron/converters/avro/AvroSchemaConverterTest.java @@ -0,0 +1,810 @@ +package io.datahubproject.schematron.converters.avro; + +import static org.testng.Assert.*; + +import com.linkedin.common.urn.DataPlatformUrn; +import com.linkedin.data.template.StringArray; +import com.linkedin.schema.*; +import java.io.File; +import java.io.IOException; +import java.net.URISyntaxException; +import java.util.Collections; +import org.apache.avro.Schema; +import org.testng.annotations.*; + +@Test(groups = "unit") +class AvroSchemaConverterTest { + + private AvroSchemaConverter avroSchemaConverter = AvroSchemaConverter.builder().build(); + private DataPlatformUrn dataPlatformUrn = + DataPlatformUrn.createFromString("urn:li:dataPlatform:foo"); + + AvroSchemaConverterTest() throws URISyntaxException {} + + @Test(groups = "basic") + void testPrimitiveTypes() throws IOException { + SchemaMetadata schema = + avroSchemaConverter.toDataHubSchema( + readAvroSchema("avroschemaconverter_primitives.asc"), + false, + false, + dataPlatformUrn, + null); + + schema.getFields().forEach(System.out::println); + + assertEquals(schema.getFields().size(), 14); + + assertSchemaField( + schema.getFields().get(0), + "[version=2.0].[type=PrimitiveType].[type=int].intField", + "int", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType()))); + assertSchemaField( + schema.getFields().get(1), + "[version=2.0].[type=PrimitiveType].[type=union].intFieldV2", + "union", + false, + false, + new SchemaFieldDataType() + .setType( + SchemaFieldDataType.Type.create( + new UnionType() + .setNestedTypes(new StringArray(Collections.singletonList("union")))))); + assertSchemaField( + schema.getFields().get(2), + "[version=2.0].[type=PrimitiveType].[type=union].[type=int].intFieldV2", + "int", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType()))); + assertSchemaField( + schema.getFields().get(3), + "[version=2.0].[type=PrimitiveType].[type=null].nullField", + "null", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NullType()))); + assertSchemaField( + schema.getFields().get(4), + "[version=2.0].[type=PrimitiveType].[type=union].nullFieldV2", + "union", + true, + false, + new SchemaFieldDataType() + .setType( + SchemaFieldDataType.Type.create( + new UnionType() + .setNestedTypes(new StringArray(Collections.singletonList("union")))))); + assertSchemaField( + schema.getFields().get(5), + "[version=2.0].[type=PrimitiveType].[type=long].longField", + "long", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType()))); + assertSchemaField( + schema.getFields().get(6), + "[version=2.0].[type=PrimitiveType].[type=float].floatField", + "float", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType()))); + assertSchemaField( + schema.getFields().get(7), + "[version=2.0].[type=PrimitiveType].[type=double].doubleField", + "double", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType()))); + assertSchemaField( + schema.getFields().get(8), + "[version=2.0].[type=PrimitiveType].[type=string].stringField", + "string", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType()))); + assertSchemaField( + schema.getFields().get(9), + "[version=2.0].[type=PrimitiveType].[type=boolean].booleanField", + "boolean", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new BooleanType()))); + assertSchemaField( + schema.getFields().get(10), + "[version=2.0].[type=PrimitiveType].[type=int].nullableIntField", + "int", + true, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType()))); + assertSchemaField( + schema.getFields().get(11), + "[version=2.0].[type=PrimitiveType].[type=long].nullableLongField", + "long", + true, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType()))); + assertSchemaField( + schema.getFields().get(12), + "[version=2.0].[type=PrimitiveType].[type=string].nullableStringField", + "string", + true, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType()))); + assertSchemaField( + schema.getFields().get(13), + "[version=2.0].[type=PrimitiveType].[type=enum].status", + "Enum", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new EnumType()))); + } + + @Test(groups = "basic") + void testComplexMaps() throws IOException { + SchemaMetadata schema = + avroSchemaConverter.toDataHubSchema( + readAvroSchema("avroschemaconverter_complex_maps.asc"), + false, + false, + dataPlatformUrn, + null); + + schema.getFields().forEach(System.out::println); + + assertEquals(schema.getFields().size(), 15); + + assertSchemaField( + schema.getFields().get(0), + "[version=2.0].[type=MapType].[type=map].mapOfString", + "map", + false, + false, + new SchemaFieldDataType() + .setType( + SchemaFieldDataType.Type.create( + new MapType().setKeyType("string").setValueType("string")))); + assertSchemaField( + schema.getFields().get(1), + "[version=2.0].[type=MapType].[type=map].[type=ComplexType].mapOfComplexType", + "ComplexType", + false, + false, + new SchemaFieldDataType() + .setType( + SchemaFieldDataType.Type.create( + new MapType().setKeyType("string").setValueType("ComplexType")))); + assertSchemaField( + schema.getFields().get(2), + "[version=2.0].[type=MapType].[type=map].[type=ComplexType].mapOfComplexType.[type=string].field1", + "string", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType()))); + assertSchemaField( + schema.getFields().get(3), + "[version=2.0].[type=MapType].[type=map].[type=ComplexType].mapOfComplexType.[type=int].field2", + "int", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType()))); + assertSchemaField( + schema.getFields().get(4), + "[version=2.0].[type=MapType].[type=map].[type=union].mapOfNullableString", + "union", + false, + false, + new SchemaFieldDataType() + .setType( + SchemaFieldDataType.Type.create( + new MapType().setKeyType("string").setValueType("union")))); + assertSchemaField( + schema.getFields().get(5), + "[version=2.0].[type=MapType].[type=map].[type=union].[type=string].mapOfNullableString", + "string", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType()))); + assertSchemaField( + schema.getFields().get(6), + "[version=2.0].[type=MapType].[type=map].[type=union].mapOfNullableComplexType", + "union", + false, + false, + new SchemaFieldDataType() + .setType( + SchemaFieldDataType.Type.create( + new MapType().setKeyType("string").setValueType("union")))); + assertSchemaField( + schema.getFields().get(7), + "[version=2.0].[type=MapType].[type=map].[type=union].[type=ComplexTypeNullable].mapOfNullableComplexType", + "ComplexTypeNullable", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new RecordType()))); + assertSchemaField( + schema.getFields().get(8), + "[version=2.0].[type=MapType].[type=map].[type=union].[type=ComplexTypeNullable].mapOfNullableComplexType.[type=string].field1", + "string", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType()))); + assertSchemaField( + schema.getFields().get(9), + "[version=2.0].[type=MapType].[type=map].[type=union].[type=ComplexTypeNullable].mapOfNullableComplexType.[type=int].field2", + "int", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType()))); + assertSchemaField( + schema.getFields().get(10), + "[version=2.0].[type=MapType].[type=map].[type=array].mapOfArray", + "array(string)", + false, + false, + new SchemaFieldDataType() + .setType( + SchemaFieldDataType.Type.create( + new ArrayType().setNestedType(new StringArray("string"))))); + assertSchemaField( + schema.getFields().get(11), + "[version=2.0].[type=MapType].[type=map].[type=map].mapOfMap", + "map", + false, + false, + new SchemaFieldDataType() + .setType( + SchemaFieldDataType.Type.create( + new MapType().setKeyType("string").setValueType("int")))); + assertSchemaField( + schema.getFields().get(12), + "[version=2.0].[type=MapType].[type=map].[type=union].mapOfUnion", + "union", + false, + false, + new SchemaFieldDataType() + .setType( + SchemaFieldDataType.Type.create( + new MapType().setKeyType("string").setValueType("union")))); + assertSchemaField( + schema.getFields().get(13), + "[version=2.0].[type=MapType].[type=map].[type=union].[type=string].mapOfUnion", + "string", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType()))); + assertSchemaField( + schema.getFields().get(14), + "[version=2.0].[type=MapType].[type=map].[type=union].[type=int].mapOfUnion", + "int", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType()))); + } + + @Test(groups = "basic") + void testComplexArrays() throws IOException { + SchemaMetadata schema = + avroSchemaConverter.toDataHubSchema( + readAvroSchema("avroschemaconverter_complex_arrays.asc"), + false, + false, + dataPlatformUrn, + null); + + schema.getFields().forEach(System.out::println); + + assertEquals(schema.getFields().size(), 16); + + assertSchemaField( + schema.getFields().get(0), + "[version=2.0].[type=ArrayType].[type=array].arrayOfString", + "array(string)", + false, + false, + new SchemaFieldDataType() + .setType( + SchemaFieldDataType.Type.create( + new ArrayType().setNestedType(new StringArray("string"))))); + assertSchemaField( + schema.getFields().get(1), + "[version=2.0].[type=ArrayType].[type=array].[type=map].arrayOfMap", + "map", + false, + false, + new SchemaFieldDataType() + .setType( + SchemaFieldDataType.Type.create( + new MapType().setKeyType("string").setValueType("string")))); + assertSchemaField( + schema.getFields().get(2), + "[version=2.0].[type=ArrayType].[type=array].[type=ComplexType].arrayOfRecord", + "ComplexType", + false, + false, + new SchemaFieldDataType() + .setType( + SchemaFieldDataType.Type.create( + new ArrayType().setNestedType(new StringArray("ComplexType"))))); + assertSchemaField( + schema.getFields().get(3), + "[version=2.0].[type=ArrayType].[type=array].[type=ComplexType].arrayOfRecord.[type=string].field1", + "string", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType()))); + assertSchemaField( + schema.getFields().get(4), + "[version=2.0].[type=ArrayType].[type=array].[type=ComplexType].arrayOfRecord.[type=int].field2", + "int", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType()))); + assertSchemaField( + schema.getFields().get(5), + "[version=2.0].[type=ArrayType].[type=array].[type=array].arrayOfArray", + "array(string)", + false, + false, + new SchemaFieldDataType() + .setType( + SchemaFieldDataType.Type.create( + new ArrayType().setNestedType(new StringArray("string"))))); + assertSchemaField( + schema.getFields().get(6), + "[version=2.0].[type=ArrayType].[type=array].[type=union].arrayOfUnion", + "union", + false, + false, + new SchemaFieldDataType() + .setType( + SchemaFieldDataType.Type.create( + new ArrayType().setNestedType(new StringArray("union"))))); + assertSchemaField( + schema.getFields().get(7), + "[version=2.0].[type=ArrayType].[type=array].[type=union].[type=string].arrayOfUnion", + "string", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType()))); + assertSchemaField( + schema.getFields().get(8), + "[version=2.0].[type=ArrayType].[type=array].[type=union].[type=int].arrayOfUnion", + "int", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType()))); + assertSchemaField( + schema.getFields().get(9), + "[version=2.0].[type=ArrayType].[type=array].[type=union].[type=boolean].arrayOfUnion", + "boolean", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new BooleanType()))); + assertSchemaField( + schema.getFields().get(10), + "[version=2.0].[type=ArrayType].[type=array].[type=union].arrayOfNullableString", + "union", + false, + false, + new SchemaFieldDataType() + .setType( + SchemaFieldDataType.Type.create( + new ArrayType().setNestedType(new StringArray("union"))))); + assertSchemaField( + schema.getFields().get(11), + "[version=2.0].[type=ArrayType].[type=array].[type=union].[type=string].arrayOfNullableString", + "string", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType()))); + assertSchemaField( + schema.getFields().get(12), + "[version=2.0].[type=ArrayType].[type=array].[type=union].arrayOfNullableRecord", + "union", + false, + false, + new SchemaFieldDataType() + .setType( + SchemaFieldDataType.Type.create( + new ArrayType().setNestedType(new StringArray("union"))))); + assertSchemaField( + schema.getFields().get(13), + "[version=2.0].[type=ArrayType].[type=array].[type=union].[type=ComplexTypeNullable].arrayOfNullableRecord", + "ComplexTypeNullable", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new RecordType()))); + assertSchemaField( + schema.getFields().get(14), + "[version=2.0].[type=ArrayType].[type=array].[type=union].[type=ComplexTypeNullable].arrayOfNullableRecord.[type=string].field1", + "string", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType()))); + assertSchemaField( + schema.getFields().get(15), + "[version=2.0].[type=ArrayType].[type=array].[type=union].[type=ComplexTypeNullable].arrayOfNullableRecord.[type=int].field2", + "int", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType()))); + } + + @Test(groups = "basic") + void testComplexStructs() throws IOException { + SchemaMetadata schema = + avroSchemaConverter.toDataHubSchema( + readAvroSchema("avroschemaconverter_complex_structs.asc"), + false, + false, + dataPlatformUrn, + null); + + schema.getFields().forEach(System.out::println); + + assertEquals(schema.getFields().size(), 13); + + assertSchemaField( + schema.getFields().get(0), + "[version=2.0].[type=StructType].[type=ComplexStruct].structField", + "ComplexStruct", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new RecordType()))); + assertSchemaField( + schema.getFields().get(1), + "[version=2.0].[type=StructType].[type=ComplexStruct].structField.[type=string].fieldString", + "string", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType()))); + assertSchemaField( + schema.getFields().get(2), + "[version=2.0].[type=StructType].[type=ComplexStruct].structField.[type=int].fieldInt", + "int", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType()))); + assertSchemaField( + schema.getFields().get(3), + "[version=2.0].[type=StructType].[type=ComplexStruct].structField.[type=boolean].fieldBoolean", + "boolean", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new BooleanType()))); + assertSchemaField( + schema.getFields().get(4), + "[version=2.0].[type=StructType].[type=ComplexStruct].structField.[type=map].fieldMap", + "map", + false, + false, + new SchemaFieldDataType() + .setType( + SchemaFieldDataType.Type.create( + new MapType().setKeyType("string").setValueType("string")))); + assertSchemaField( + schema.getFields().get(5), + "[version=2.0].[type=StructType].[type=ComplexStruct].structField.[type=NestedRecord].fieldRecord", + "NestedRecord", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new RecordType()))); + assertSchemaField( + schema.getFields().get(6), + "[version=2.0].[type=StructType].[type=ComplexStruct].structField.[type=NestedRecord].fieldRecord.[type=string].nestedField1", + "string", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType()))); + assertSchemaField( + schema.getFields().get(7), + "[version=2.0].[type=StructType].[type=ComplexStruct].structField.[type=NestedRecord].fieldRecord.[type=int].nestedField2", + "int", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType()))); + assertSchemaField( + schema.getFields().get(8), + "[version=2.0].[type=StructType].[type=ComplexStruct].structField.[type=array].fieldArray", + "array(string)", + false, + false, + new SchemaFieldDataType() + .setType( + SchemaFieldDataType.Type.create( + new ArrayType().setNestedType(new StringArray("string"))))); + assertSchemaField( + schema.getFields().get(9), + "[version=2.0].[type=StructType].[type=ComplexStruct].structField.[type=union].fieldUnion", + "union", + true, + false, + new SchemaFieldDataType() + .setType( + SchemaFieldDataType.Type.create( + new UnionType().setNestedTypes(new StringArray("union"))))); + assertSchemaField( + schema.getFields().get(10), + "[version=2.0].[type=StructType].[type=ComplexStruct].structField.[type=union].[type=string].fieldUnion", + "string", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType()))); + assertSchemaField( + schema.getFields().get(11), + "[version=2.0].[type=StructType].[type=ComplexStruct].structField.[type=union].[type=int].fieldUnion", + "int", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType()))); + assertSchemaField( + schema.getFields().get(12), + "[version=2.0].[type=StructType].[type=ComplexStruct].structField.[type=map].fieldNullableMap", + "map", + true, + false, + new SchemaFieldDataType() + .setType( + SchemaFieldDataType.Type.create( + new MapType().setKeyType("string").setValueType("string")))); + } + + @Test(groups = "basic") + void testComplexUnions() throws IOException { + SchemaMetadata schema = + avroSchemaConverter.toDataHubSchema( + readAvroSchema("avroschemaconverter_complex_unions.asc"), + false, + false, + dataPlatformUrn, + null); + + schema.getFields().forEach(System.out::println); + + assertEquals(schema.getFields().size(), 14); + + assertSchemaField( + schema.getFields().get(0), + "[version=2.0].[type=UnionType].[type=union].fieldUnionNullablePrimitives", + "union", + true, + false, + new SchemaFieldDataType() + .setType( + SchemaFieldDataType.Type.create( + new UnionType().setNestedTypes(new StringArray("union"))))); + assertSchemaField( + schema.getFields().get(1), + "[version=2.0].[type=UnionType].[type=union].[type=string].fieldUnionNullablePrimitives", + "string", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType()))); + assertSchemaField( + schema.getFields().get(2), + "[version=2.0].[type=UnionType].[type=union].[type=int].fieldUnionNullablePrimitives", + "int", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType()))); + assertSchemaField( + schema.getFields().get(3), + "[version=2.0].[type=UnionType].[type=union].[type=boolean].fieldUnionNullablePrimitives", + "boolean", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new BooleanType()))); + assertSchemaField( + schema.getFields().get(4), + "[version=2.0].[type=UnionType].[type=union].fieldUnionComplexTypes", + "union", + true, + false, + new SchemaFieldDataType() + .setType( + SchemaFieldDataType.Type.create( + new UnionType().setNestedTypes(new StringArray("union"))))); + assertSchemaField( + schema.getFields().get(5), + "[version=2.0].[type=UnionType].[type=union].[type=NestedRecord].fieldUnionComplexTypes", + "NestedRecord", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new RecordType()))); + assertSchemaField( + schema.getFields().get(6), + "[version=2.0].[type=UnionType].[type=union].[type=NestedRecord].fieldUnionComplexTypes.[type=string].nestedField1", + "string", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType()))); + assertSchemaField( + schema.getFields().get(7), + "[version=2.0].[type=UnionType].[type=union].[type=NestedRecord].fieldUnionComplexTypes.[type=int].nestedField2", + "int", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType()))); + assertSchemaField( + schema.getFields().get(8), + "[version=2.0].[type=UnionType].[type=union].[type=map].fieldUnionComplexTypes", + "map", + false, + false, + new SchemaFieldDataType() + .setType( + SchemaFieldDataType.Type.create( + new MapType().setKeyType("string").setValueType("string")))); + assertSchemaField( + schema.getFields().get(9), + "[version=2.0].[type=UnionType].[type=union].fieldUnionPrimitiveAndComplex", + "union", + true, + false, + new SchemaFieldDataType() + .setType( + SchemaFieldDataType.Type.create( + new UnionType().setNestedTypes(new StringArray("union"))))); + assertSchemaField( + schema.getFields().get(10), + "[version=2.0].[type=UnionType].[type=union].[type=string].fieldUnionPrimitiveAndComplex", + "string", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType()))); + assertSchemaField( + schema.getFields().get(11), + "[version=2.0].[type=UnionType].[type=union].[type=ComplexTypeRecord].fieldUnionPrimitiveAndComplex", + "ComplexTypeRecord", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new RecordType()))); + assertSchemaField( + schema.getFields().get(12), + "[version=2.0].[type=UnionType].[type=union].[type=ComplexTypeRecord].fieldUnionPrimitiveAndComplex.[type=string].complexField1", + "string", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType()))); + assertSchemaField( + schema.getFields().get(13), + "[version=2.0].[type=UnionType].[type=union].[type=ComplexTypeRecord].fieldUnionPrimitiveAndComplex.[type=int].complexField2", + "int", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType()))); + } + + @Test(groups = "basic") + void testLogicalTypes() throws IOException { + SchemaMetadata schema = + avroSchemaConverter.toDataHubSchema( + readAvroSchema("avroschemaconverter_logical_types.asc"), + false, + false, + dataPlatformUrn, + null); + + schema.getFields().forEach(System.out::println); + + assertEquals(schema.getFields().size(), 9); + + assertSchemaField( + schema.getFields().get(0), + "[version=2.0].[type=LogicalTypes].[type=bytes].decimalField", + "bytes(decimal)", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType())), + "{\"scale\":2,\"logicalType\":\"decimal\",\"precision\":9}"); + assertSchemaField( + schema.getFields().get(1), + "[version=2.0].[type=LogicalTypes].[type=bytes].decimalFieldWithoutScale", + "bytes(decimal)", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new NumberType())), + "{\"logicalType\":\"decimal\",\"precision\":9}"); + assertSchemaField( + schema.getFields().get(2), + "[version=2.0].[type=LogicalTypes].[type=bytes].decimalFieldWithoutPrecisionAndScale", + "bytes", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new BytesType())), + "{\"logicalType\":\"decimal\"}"); + assertSchemaField( + schema.getFields().get(3), + "[version=2.0].[type=LogicalTypes].[type=long].timestampMillisField", + "long(timestamp-millis)", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new TimeType())), + "{\"logicalType\":\"timestamp-millis\"}"); + assertSchemaField( + schema.getFields().get(4), + "[version=2.0].[type=LogicalTypes].[type=long].timestampMicrosField", + "long(timestamp-micros)", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new TimeType())), + "{\"logicalType\":\"timestamp-micros\"}"); + assertSchemaField( + schema.getFields().get(5), + "[version=2.0].[type=LogicalTypes].[type=int].dateField", + "int(date)", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new DateType())), + "{\"logicalType\":\"date\"}"); + assertSchemaField( + schema.getFields().get(6), + "[version=2.0].[type=LogicalTypes].[type=int].timeMillisField", + "int(time-millis)", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new TimeType())), + "{\"logicalType\":\"time-millis\"}"); + assertSchemaField( + schema.getFields().get(7), + "[version=2.0].[type=LogicalTypes].[type=long].timeMicrosField", + "long(time-micros)", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new TimeType())), + "{\"logicalType\":\"time-micros\"}"); + assertSchemaField( + schema.getFields().get(8), + "[version=2.0].[type=LogicalTypes].[type=string].uuidField", + "string(uuid)", + false, + false, + new SchemaFieldDataType().setType(SchemaFieldDataType.Type.create(new StringType())), + "{\"logicalType\":\"uuid\"}"); + } + + private void assertSchemaField( + SchemaField field, + String expectedPath, + String expectedNativeType, + boolean expectedNullable, + boolean expectedIsPartOfKey, + SchemaFieldDataType expectedType) { + assertSchemaField( + field, + expectedPath, + expectedNativeType, + expectedNullable, + expectedIsPartOfKey, + expectedType, + null); + } + + private void assertSchemaField( + SchemaField field, + String expectedPath, + String expectedNativeType, + boolean expectedNullable, + boolean expectedIsPartOfKey, + SchemaFieldDataType expectedType, + String expectedJsonProps) { + assertEquals(field.getFieldPath(), expectedPath); + assertEquals(field.getNativeDataType(), expectedNativeType); + assertEquals(field.isNullable(), expectedNullable); + assertEquals(field.isIsPartOfKey(), expectedIsPartOfKey); + assertEquals(field.getType(), expectedType); + if (expectedJsonProps != null) { + assertEquals(field.getJsonProps(), expectedJsonProps); + } + } + + private Schema readAvroSchema(String schemaFileName) throws IOException { + String schemaPath = getClass().getClassLoader().getResource(schemaFileName).getPath(); + File schemaFile = new File(schemaPath); + return new Schema.Parser().parse(schemaFile); + } +} diff --git a/metadata-integration/java/datahub-schematron/lib/src/test/resources/avroschemaconverter_complex_arrays.asc b/metadata-integration/java/datahub-schematron/lib/src/test/resources/avroschemaconverter_complex_arrays.asc new file mode 100644 index 0000000000000..8e8bcdaa0a7dc --- /dev/null +++ b/metadata-integration/java/datahub-schematron/lib/src/test/resources/avroschemaconverter_complex_arrays.asc @@ -0,0 +1,87 @@ +{ + "type": "record", + "name": "ArrayType", + "fields": [ + { + "name": "arrayOfString", + "type": { + "type": "array", + "items": "string" + } + }, + { + "name": "arrayOfMap", + "type": { + "type": "array", + "items": { + "type": "map", + "values": "string" + } + } + }, + { + "name": "arrayOfRecord", + "type": { + "type": "array", + "items": { + "type": "record", + "name": "ComplexType", + "fields": [ + { + "name": "field1", + "type": "string" + }, + { + "name": "field2", + "type": "int" + } + ] + } + } + }, + { + "name": "arrayOfArray", + "type": { + "type": "array", + "items": { + "type": "array", + "items": "string" + } + } + }, + { + "name": "arrayOfUnion", + "type": { + "type": "array", + "items": ["string", "int", "boolean"] + } + }, + { + "name": "arrayOfNullableString", + "type": { + "type": "array", + "items": ["null", "string"] + } + }, + { + "name": "arrayOfNullableRecord", + "type": { + "type": "array", + "items": ["null", { + "type": "record", + "name": "ComplexTypeNullable", + "fields": [ + { + "name": "field1", + "type": "string" + }, + { + "name": "field2", + "type": "int" + } + ] + }] + } + } + ] +} \ No newline at end of file diff --git a/metadata-integration/java/datahub-schematron/lib/src/test/resources/avroschemaconverter_complex_maps.asc b/metadata-integration/java/datahub-schematron/lib/src/test/resources/avroschemaconverter_complex_maps.asc new file mode 100644 index 0000000000000..baedae1b9dcc1 --- /dev/null +++ b/metadata-integration/java/datahub-schematron/lib/src/test/resources/avroschemaconverter_complex_maps.asc @@ -0,0 +1,87 @@ +{ + "type": "record", + "name": "MapType", + "fields": [ + { + "name": "mapOfString", + "type": { + "type": "map", + "values": "string" + } + }, + { + "name": "mapOfComplexType", + "type": { + "type": "map", + "values": { + "type": "record", + "name": "ComplexType", + "fields": [ + { + "name": "field1", + "type": "string" + }, + { + "name": "field2", + "type": "int" + } + ] + } + } + }, + { + "name": "mapOfNullableString", + "type": { + "type": "map", + "values": ["null", "string"] + } + }, + { + "name": "mapOfNullableComplexType", + "type": { + "type": "map", + "values": ["null", { + "type": "record", + "name": "ComplexTypeNullable", + "fields": [ + { + "name": "field1", + "type": "string" + }, + { + "name": "field2", + "type": "int" + } + ] + }] + } + }, + { + "name": "mapOfArray", + "type": { + "type": "map", + "values": { + "type": "array", + "items": "string" + } + } + }, + { + "name": "mapOfMap", + "type": { + "type": "map", + "values": { + "type": "map", + "values": "int" + } + } + }, + { + "name": "mapOfUnion", + "type": { + "type": "map", + "values": ["null", "string", "int"] + } + } + ] +} \ No newline at end of file diff --git a/metadata-integration/java/datahub-schematron/lib/src/test/resources/avroschemaconverter_complex_structs.asc b/metadata-integration/java/datahub-schematron/lib/src/test/resources/avroschemaconverter_complex_structs.asc new file mode 100644 index 0000000000000..7f5824192d306 --- /dev/null +++ b/metadata-integration/java/datahub-schematron/lib/src/test/resources/avroschemaconverter_complex_structs.asc @@ -0,0 +1,76 @@ +{ + "type": "record", + "name": "StructType", + "fields": [ + { + "name": "structField", + "type": { + "type": "record", + "name": "ComplexStruct", + "fields": [ + { + "name": "fieldString", + "type": "string" + }, + { + "name": "fieldInt", + "type": "int" + }, + { + "name": "fieldBoolean", + "type": "boolean" + }, + { + "name": "fieldMap", + "type": { + "type": "map", + "values": "string" + } + }, + { + "name": "fieldRecord", + "type": { + "type": "record", + "name": "NestedRecord", + "fields": [ + { + "name": "nestedField1", + "type": "string" + }, + { + "name": "nestedField2", + "type": "int" + } + ] + } + }, + { + "name": "fieldArray", + "type": { + "type": "array", + "items": "string" + } + }, + { + "name": "fieldUnion", + "type": [ + "null", + "string", + "int" + ] + }, + { + "name": "fieldNullableMap", + "type": [ + "null", + { + "type": "map", + "values": "string" + } + ] + } + ] + } + } + ] +} \ No newline at end of file diff --git a/metadata-integration/java/datahub-schematron/lib/src/test/resources/avroschemaconverter_complex_unions.asc b/metadata-integration/java/datahub-schematron/lib/src/test/resources/avroschemaconverter_complex_unions.asc new file mode 100644 index 0000000000000..1a35f1cfa0e6d --- /dev/null +++ b/metadata-integration/java/datahub-schematron/lib/src/test/resources/avroschemaconverter_complex_unions.asc @@ -0,0 +1,60 @@ +{ + "type": "record", + "name": "UnionType", + "fields": [ + { + "name": "fieldUnionNullablePrimitives", + "type": [ + "null", + "string", + "int", + "boolean" + ] + }, + { + "name": "fieldUnionComplexTypes", + "type": [ + "null", + { + "type": "record", + "name": "NestedRecord", + "fields": [ + { + "name": "nestedField1", + "type": "string" + }, + { + "name": "nestedField2", + "type": "int" + } + ] + }, + { + "type": "map", + "values": "string" + } + ] + }, + { + "name": "fieldUnionPrimitiveAndComplex", + "type": [ + "null", + "string", + { + "type": "record", + "name": "ComplexTypeRecord", + "fields": [ + { + "name": "complexField1", + "type": "string" + }, + { + "name": "complexField2", + "type": "int" + } + ] + } + ] + } + ] +} \ No newline at end of file diff --git a/metadata-integration/java/datahub-schematron/lib/src/test/resources/avroschemaconverter_logical_types.asc b/metadata-integration/java/datahub-schematron/lib/src/test/resources/avroschemaconverter_logical_types.asc new file mode 100644 index 0000000000000..24919d8214965 --- /dev/null +++ b/metadata-integration/java/datahub-schematron/lib/src/test/resources/avroschemaconverter_logical_types.asc @@ -0,0 +1,72 @@ +{ + "type": "record", + "name": "LogicalTypes", + "fields": [ + { + "name": "decimalField", + "type": { + "type": "bytes", + "logicalType": "decimal", + "precision": 9, + "scale": 2 + } + }, + { + "name": "decimalFieldWithoutScale", + "type": { + "type": "bytes", + "logicalType": "decimal", + "precision": 9 + } + }, + { + "name": "decimalFieldWithoutPrecisionAndScale", + "type": { + "type": "bytes", + "logicalType": "decimal" + } + }, + { + "name": "timestampMillisField", + "type": { + "type": "long", + "logicalType": "timestamp-millis" + } + }, + { + "name": "timestampMicrosField", + "type": { + "type": "long", + "logicalType": "timestamp-micros" + } + }, + { + "name": "dateField", + "type": { + "type": "int", + "logicalType": "date" + } + }, + { + "name": "timeMillisField", + "type": { + "type": "int", + "logicalType": "time-millis" + } + }, + { + "name": "timeMicrosField", + "type": { + "type": "long", + "logicalType": "time-micros" + } + }, + { + "name": "uuidField", + "type": { + "type": "string", + "logicalType": "uuid" + } + } + ] +} \ No newline at end of file diff --git a/metadata-integration/java/datahub-schematron/lib/src/test/resources/avroschemaconverter_primitives.asc b/metadata-integration/java/datahub-schematron/lib/src/test/resources/avroschemaconverter_primitives.asc new file mode 100644 index 0000000000000..c618299748fab --- /dev/null +++ b/metadata-integration/java/datahub-schematron/lib/src/test/resources/avroschemaconverter_primitives.asc @@ -0,0 +1,62 @@ +{ + "type": "record", + "name": "PrimitiveType", + "fields": [ + { + "name": "intField", + "type": "int" + }, + { + "name": "intFieldV2", + "type": ["int"] + }, + { + "name": "nullField", + "type": "null" + }, + { + "name": "nullFieldV2", + "type": ["null"] + }, + { + "name": "longField", + "type": "long" + }, + { + "name": "floatField", + "type": "float" + }, + { + "name": "doubleField", + "type": "double" + }, + { + "name": "stringField", + "type": "string" + }, + { + "name": "booleanField", + "type": "boolean" + }, + { + "name": "nullableIntField", + "type": ["null", "int"] + }, + { + "name": "nullableLongField", + "type": ["null", "long"] + }, + { + "name": "nullableStringField", + "type": ["null", "string"] + }, + { + "name": "status", + "type": { + "type": "enum", + "name": "StatusEnum", + "symbols": ["ACTIVE", "INACTIVE", "PENDING"] + } + } + ] +} \ No newline at end of file