From 31357f14bf99d31fc1c732286d2ab2fc1d57e26b Mon Sep 17 00:00:00 2001 From: Eka Winata Date: Tue, 29 Oct 2024 12:05:31 +0700 Subject: [PATCH] feat: get implementation diff from feat branch --- build.gradle | 5 +- src/main/java/com/gotocompany/depot/Main.java | 76 +++ .../depot/config/MaxComputeSinkConfig.java | 71 +++ .../depot/maxcompute/MaxComputeSink.java | 41 ++ .../maxcompute/MaxComputeSinkFactory.java | 86 ++++ .../maxcompute/client/MaxComputeClient.java | 75 +++ .../client/insert/InsertManager.java | 11 + .../insert/NonPartitionedInsertManager.java | 38 ++ .../insert/PartitionedInsertManager.java | 48 ++ .../converter/BaseTypeInfoConverter.java | 29 -- .../converter/ConverterOrchestrator.java | 76 +++ .../converter/TypeInfoConverter.java | 12 - .../payload/DurationPayloadConverter.java | 37 ++ .../payload/MessagePayloadConverter.java | 38 ++ .../converter/payload/PayloadConverter.java | 22 + .../payload/PrimitivePayloadConverter.java | 33 ++ .../payload/StructPayloadConverter.java | 32 ++ .../payload/TimestampPayloadConverter.java | 29 ++ .../record/MessageRecordConverter.java | 10 + .../converter/record/RecordConverter.java | 51 ++ .../{ => type}/DurationTypeInfoConverter.java | 18 +- .../{ => type}/MessageTypeInfoConverter.java | 12 +- .../PrimitiveTypeInfoConverter.java | 9 +- .../{ => type}/StructTypeInfoConverter.java | 4 +- .../TimestampTypeInfoConverter.java | 10 +- .../converter/type/TypeInfoConverter.java | 16 + .../helper/MaxComputeSchemaHelper.java | 81 ++++ .../maxcompute/model/MaxComputeSchema.java | 34 ++ .../depot/maxcompute/model/RecordWrapper.java | 20 + .../maxcompute/model/RecordWrappers.java | 26 ++ .../ProtoDataColumnRecordDecorator.java | 59 +++ .../ProtoMetadataColumnRecordDecorator.java | 70 +++ .../maxcompute/record/RecordDecorator.java | 24 + .../record/RecordDecoratorFactory.java | 29 ++ .../schema/MaxComputeSchemaCache.java | 73 +++ .../DefaultPartitioningStrategy.java | 36 ++ .../partition/PartitioningStrategy.java | 11 + .../PartitioningStrategyFactory.java | 55 +++ .../TimestampPartitioningStrategy.java | 53 +++ .../depot/maxcompute/util/MetadataUtil.java | 59 +++ .../message/proto/ProtoMessageParser.java | 4 + src/main/proto/test-dse-dev.proto | 51 ++ .../maxcompute/BaseTypeInfoConverterTest.java | 45 -- .../NonPartitionedInsertManagerTest.java | 53 +++ .../insert/PartitionedInsertManagerTest.java | 67 +++ .../converter/ConverterOrchestratorTest.java | 107 +++++ .../payload/DurationPayloadConverterTest.java | 74 +++ .../payload/MessagePayloadConverterTest.java | 117 +++++ .../PrimitivePayloadConverterTest.java | 438 ++++++++++++++++++ .../payload/StructPayloadConverterTest.java | 58 +++ .../TimestampPayloadConverterTest.java | 66 +++ .../converter/record/RecordConverterTest.java | 209 +++++++++ .../type}/DurationTypeInfoConverterTest.java | 13 +- .../type}/MessageTypeInfoConverterTest.java | 25 +- .../type}/PrimitiveTypeInfoConverterTest.java | 21 +- .../type}/StructTypeInfoConverterTest.java | 7 +- .../type/TimestampPayloadConverterTest.java} | 9 +- .../helper/MaxComputeSchemaHelperTest.java | 181 ++++++++ .../ProtoDataColumnRecordDecoratorTest.java | 120 +++++ ...rotoMetadataColumnRecordDecoratorTest.java | 138 ++++++ .../record/RecordDecoratorFactoryTest.java | 39 ++ .../PartitioningStrategyFactoryTest.java | 78 ++++ .../maxcompute/util/MetadataUtilTest.java | 44 ++ src/test/proto/TestMaxComputePartition.proto | 13 + src/test/proto/TestMaxComputeRecord.proto | 16 + src/test/proto/TestMaxComputeTypeInfo.proto | 49 ++ src/test/proto/TextMaxComputeTable.proto | 26 ++ 67 files changed, 3431 insertions(+), 156 deletions(-) create mode 100644 src/main/java/com/gotocompany/depot/Main.java create mode 100644 src/main/java/com/gotocompany/depot/config/MaxComputeSinkConfig.java create mode 100644 src/main/java/com/gotocompany/depot/maxcompute/MaxComputeSink.java create mode 100644 src/main/java/com/gotocompany/depot/maxcompute/MaxComputeSinkFactory.java create mode 100644 src/main/java/com/gotocompany/depot/maxcompute/client/MaxComputeClient.java create mode 100644 src/main/java/com/gotocompany/depot/maxcompute/client/insert/InsertManager.java create mode 100644 src/main/java/com/gotocompany/depot/maxcompute/client/insert/NonPartitionedInsertManager.java create mode 100644 src/main/java/com/gotocompany/depot/maxcompute/client/insert/PartitionedInsertManager.java delete mode 100644 src/main/java/com/gotocompany/depot/maxcompute/converter/BaseTypeInfoConverter.java create mode 100644 src/main/java/com/gotocompany/depot/maxcompute/converter/ConverterOrchestrator.java delete mode 100644 src/main/java/com/gotocompany/depot/maxcompute/converter/TypeInfoConverter.java create mode 100644 src/main/java/com/gotocompany/depot/maxcompute/converter/payload/DurationPayloadConverter.java create mode 100644 src/main/java/com/gotocompany/depot/maxcompute/converter/payload/MessagePayloadConverter.java create mode 100644 src/main/java/com/gotocompany/depot/maxcompute/converter/payload/PayloadConverter.java create mode 100644 src/main/java/com/gotocompany/depot/maxcompute/converter/payload/PrimitivePayloadConverter.java create mode 100644 src/main/java/com/gotocompany/depot/maxcompute/converter/payload/StructPayloadConverter.java create mode 100644 src/main/java/com/gotocompany/depot/maxcompute/converter/payload/TimestampPayloadConverter.java create mode 100644 src/main/java/com/gotocompany/depot/maxcompute/converter/record/MessageRecordConverter.java create mode 100644 src/main/java/com/gotocompany/depot/maxcompute/converter/record/RecordConverter.java rename src/main/java/com/gotocompany/depot/maxcompute/converter/{ => type}/DurationTypeInfoConverter.java (59%) rename src/main/java/com/gotocompany/depot/maxcompute/converter/{ => type}/MessageTypeInfoConverter.java (78%) rename src/main/java/com/gotocompany/depot/maxcompute/converter/{ => type}/PrimitiveTypeInfoConverter.java (86%) rename src/main/java/com/gotocompany/depot/maxcompute/converter/{ => type}/StructTypeInfoConverter.java (79%) rename src/main/java/com/gotocompany/depot/maxcompute/converter/{ => type}/TimestampTypeInfoConverter.java (55%) create mode 100644 src/main/java/com/gotocompany/depot/maxcompute/converter/type/TypeInfoConverter.java create mode 100644 src/main/java/com/gotocompany/depot/maxcompute/helper/MaxComputeSchemaHelper.java create mode 100644 src/main/java/com/gotocompany/depot/maxcompute/model/MaxComputeSchema.java create mode 100644 src/main/java/com/gotocompany/depot/maxcompute/model/RecordWrapper.java create mode 100644 src/main/java/com/gotocompany/depot/maxcompute/model/RecordWrappers.java create mode 100644 src/main/java/com/gotocompany/depot/maxcompute/record/ProtoDataColumnRecordDecorator.java create mode 100644 src/main/java/com/gotocompany/depot/maxcompute/record/ProtoMetadataColumnRecordDecorator.java create mode 100644 src/main/java/com/gotocompany/depot/maxcompute/record/RecordDecorator.java create mode 100644 src/main/java/com/gotocompany/depot/maxcompute/record/RecordDecoratorFactory.java create mode 100644 src/main/java/com/gotocompany/depot/maxcompute/schema/MaxComputeSchemaCache.java create mode 100644 src/main/java/com/gotocompany/depot/maxcompute/schema/partition/DefaultPartitioningStrategy.java create mode 100644 src/main/java/com/gotocompany/depot/maxcompute/schema/partition/PartitioningStrategy.java create mode 100644 src/main/java/com/gotocompany/depot/maxcompute/schema/partition/PartitioningStrategyFactory.java create mode 100644 src/main/java/com/gotocompany/depot/maxcompute/schema/partition/TimestampPartitioningStrategy.java create mode 100644 src/main/java/com/gotocompany/depot/maxcompute/util/MetadataUtil.java create mode 100644 src/main/proto/test-dse-dev.proto delete mode 100644 src/test/java/com/gotocompany/depot/maxcompute/BaseTypeInfoConverterTest.java create mode 100644 src/test/java/com/gotocompany/depot/maxcompute/client/insert/NonPartitionedInsertManagerTest.java create mode 100644 src/test/java/com/gotocompany/depot/maxcompute/client/insert/PartitionedInsertManagerTest.java create mode 100644 src/test/java/com/gotocompany/depot/maxcompute/converter/ConverterOrchestratorTest.java create mode 100644 src/test/java/com/gotocompany/depot/maxcompute/converter/payload/DurationPayloadConverterTest.java create mode 100644 src/test/java/com/gotocompany/depot/maxcompute/converter/payload/MessagePayloadConverterTest.java create mode 100644 src/test/java/com/gotocompany/depot/maxcompute/converter/payload/PrimitivePayloadConverterTest.java create mode 100644 src/test/java/com/gotocompany/depot/maxcompute/converter/payload/StructPayloadConverterTest.java create mode 100644 src/test/java/com/gotocompany/depot/maxcompute/converter/payload/TimestampPayloadConverterTest.java create mode 100644 src/test/java/com/gotocompany/depot/maxcompute/converter/record/RecordConverterTest.java rename src/test/java/com/gotocompany/depot/maxcompute/{ => converter/type}/DurationTypeInfoConverterTest.java (79%) rename src/test/java/com/gotocompany/depot/maxcompute/{ => converter/type}/MessageTypeInfoConverterTest.java (72%) rename src/test/java/com/gotocompany/depot/maxcompute/{ => converter/type}/PrimitiveTypeInfoConverterTest.java (97%) rename src/test/java/com/gotocompany/depot/maxcompute/{ => converter/type}/StructTypeInfoConverterTest.java (81%) rename src/test/java/com/gotocompany/depot/maxcompute/{TimestampTypeInfoConverterTest.java => converter/type/TimestampPayloadConverterTest.java} (80%) create mode 100644 src/test/java/com/gotocompany/depot/maxcompute/helper/MaxComputeSchemaHelperTest.java create mode 100644 src/test/java/com/gotocompany/depot/maxcompute/record/ProtoDataColumnRecordDecoratorTest.java create mode 100644 src/test/java/com/gotocompany/depot/maxcompute/record/ProtoMetadataColumnRecordDecoratorTest.java create mode 100644 src/test/java/com/gotocompany/depot/maxcompute/record/RecordDecoratorFactoryTest.java create mode 100644 src/test/java/com/gotocompany/depot/maxcompute/schema/partition/PartitioningStrategyFactoryTest.java create mode 100644 src/test/java/com/gotocompany/depot/maxcompute/util/MetadataUtilTest.java create mode 100644 src/test/proto/TestMaxComputePartition.proto create mode 100644 src/test/proto/TestMaxComputeRecord.proto create mode 100644 src/test/proto/TextMaxComputeTable.proto diff --git a/build.gradle b/build.gradle index c780fafe..d5db86cf 100644 --- a/build.gradle +++ b/build.gradle @@ -44,7 +44,7 @@ dependencies { implementation(group: 'com.google.cloud', name: 'google-cloud-bigtable', version: '2.24.1') { exclude group: "io.grpc" } - implementation group: 'com.aliyun.odps', name: 'odps-sdk-core', version: '0.48.8-public' + implementation group: 'com.aliyun.odps', name: 'odps-sdk-core', version: '0.50.3-public' implementation 'io.grpc:grpc-all:1.55.1' implementation group: 'org.slf4j', name: 'jul-to-slf4j', version: '1.7.35' implementation group: 'redis.clients', name: 'jedis', version: '3.10.0' @@ -54,7 +54,10 @@ dependencies { implementation group: 'com.jayway.jsonpath', name: 'json-path', version: '2.8.0' implementation group: 'com.squareup.okhttp3', name: 'okhttp', version: '4.9.1' implementation group: 'joda-time', name: 'joda-time', version: '2.10.2' + // need to take this out as well + implementation 'ch.qos.logback:logback-classic:1.4.11' testImplementation group: 'junit', name: 'junit', version: '4.13.1' + testImplementation group: 'org.assertj', name: 'assertj-core', version: '3.26.3' testImplementation 'org.hamcrest:hamcrest-all:1.3' testImplementation 'org.mockito:mockito-core:4.5.1' testImplementation 'com.github.tomakehurst:wiremock:2.16.0' diff --git a/src/main/java/com/gotocompany/depot/Main.java b/src/main/java/com/gotocompany/depot/Main.java new file mode 100644 index 00000000..df6508e1 --- /dev/null +++ b/src/main/java/com/gotocompany/depot/Main.java @@ -0,0 +1,76 @@ +package com.gotocompany.depot; + +import com.google.protobuf.Timestamp; +import com.gotocompany.depot.common.Tuple; +import com.gotocompany.depot.exception.SinkException; +import com.gotocompany.depot.maxcompute.MaxComputeSinkFactory; +import com.gotocompany.depot.message.Message; +import com.gotocompany.depot.metrics.StatsDReporter; +import com.timgroup.statsd.NoOpStatsDClient; +import deduction.HttpRequest; +import lombok.extern.slf4j.Slf4j; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Random; +import java.util.UUID; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +@Slf4j +public class Main { + public static void main(String[] args) throws SinkException { + StatsDReporter statsDReporter = new StatsDReporter(new NoOpStatsDClient()); + MaxComputeSinkFactory maxComputeSinkFactory = new MaxComputeSinkFactory(statsDReporter, getMockEnv()); + maxComputeSinkFactory.init(); + Sink sink = maxComputeSinkFactory.create(); + + while (true) { + int batchCount = Math.max(1, new Random().nextInt() % 20); + + List messageList = IntStream.range(0, batchCount) + .mapToObj(index -> { + byte[] messageBytes = HttpRequest.newBuilder() + .setField1(UUID.randomUUID().toString()) + .setField2(UUID.randomUUID().toString()) + .setEventTimestamp(Timestamp.newBuilder() + .setSeconds(System.currentTimeMillis() / 1000) + .setNanos(0) + .build()) + .build().toByteArray(); + return new Message(null, messageBytes, new Tuple<>("topic", "test"), new Tuple<>("partition", 0), new Tuple<>("offset", index), new Tuple<>("timestamp", System.currentTimeMillis())); + }) + .collect(Collectors.toList()); + sink.pushToSink(messageList); + log.info("Pushed {} messages to sink", batchCount); + } + } + + private static Map getMockEnv() { + Map env = new HashMap<>(); + env.put("SINK_CONNECTOR_SCHEMA_MESSAGE_MODE", "LOG_MESSAGE"); + env.put("INPUT_SCHEMA_PROTO_CLASS", "deduction.HttpRequest"); + env.put("SINK_CONNECTOR_SCHEMA_PROTO_MESSAGE_CLASS", "deduction.HttpRequest"); + env.put("SCHEMA_REGISTRY_STENCIL_ENABLE", "true"); + env.put("SCHEMA_REGISTRY_STENCIL_URLS", "http://stencil.integration.gtfdata.io/v1beta1/namespaces/gtfn/schemas/depot"); + env.put("SCHEMA_REGISTRY_STENCIL_FETCH_TIMEOUT_MS", "10000"); + env.put("SCHEMA_REGISTRY_STENCIL_CACHE_AUTO_REFRESH", "true"); + env.put("SCHEMA_REGISTRY_STENCIL_REFRESH_STRATEGY", "LONG_POLLING"); + env.put("SINK_MAXCOMPUTE_ODPS_URL", "http://service.ap-southeast-5.maxcompute.aliyun.com/api"); + env.put("SINK_MAXCOMPUTE_TUNNEL_URL", "http://dt.ap-southeast-5.maxcompute.aliyun.com"); + env.put("SINK_MAXCOMPUTE_ACCESS_ID", ""); + env.put("SINK_MAXCOMPUTE_ACCESS_KEY", ""); + env.put("SINK_MAXCOMPUTE_PROJECT_ID", "goto_test"); + env.put("SINK_MAXCOMPUTE_SCHEMA", "default"); + env.put("SINK_MAXCOMPUTE_METADATA_NAMESPACE", "__kafka_metadata"); + env.put("SINK_MAXCOMPUTE_ADD_METADATA_ENABLED", "true"); + env.put("SINK_MAXCOMPUTE_METADATA_COLUMNS_TYPES", "timestamp=timestamp,topic=string,partition=integer,offset=long"); + env.put("SINK_MAXCOMPUTE_TABLE_PARTITIONING_ENABLE", "true"); + env.put("SINK_MAXCOMPUTE_TABLE_PARTITION_KEY", "event_timestamp"); + env.put("SINK_MAXCOMPUTE_TABLE_PARTITION_COLUMN_NAME", "__partition_key"); + env.put("SINK_MAXCOMPUTE_TABLE_NAME", "depot_test_partitioned_1"); + env.put("SINK_MAXCOMPUTE_TABLE_LIFECYCLE_DAYS", "100"); + return env; + } +} diff --git a/src/main/java/com/gotocompany/depot/config/MaxComputeSinkConfig.java b/src/main/java/com/gotocompany/depot/config/MaxComputeSinkConfig.java new file mode 100644 index 00000000..4d52b329 --- /dev/null +++ b/src/main/java/com/gotocompany/depot/config/MaxComputeSinkConfig.java @@ -0,0 +1,71 @@ +package com.gotocompany.depot.config; + +import com.gotocompany.depot.common.TupleString; +import com.gotocompany.depot.config.converter.ConfToListConverter; +import org.aeonbits.owner.Config; + +import java.util.List; + +public interface MaxComputeSinkConfig extends Config { + @Key("SINK_MAXCOMPUTE_ODPS_URL") + String getMaxComputeOdpsUrl(); + + @Key("SINK_MAXCOMPUTE_TUNNEL_URL") + String getMaxComputeTunnelUrl(); + + @Key("SINK_MAXCOMPUTE_ACCESS_ID") + String getMaxComputeAccessId(); + + @Key("SINK_MAXCOMPUTE_ACCESS_KEY") + String getMaxComputeAccessKey(); + + @Key("SINK_MAXCOMPUTE_PROJECT_ID") + String getMaxComputeProjectId(); + + @Key("SINK_MAXCOMPUTE_METADATA_NAMESPACE") + @DefaultValue("") + String getMaxcomputeMetadataNamespace(); + + @DefaultValue("true") + @Key("SINK_MAXCOMPUTE_ADD_METADATA_ENABLED") + boolean shouldAddMetadata(); + + @DefaultValue("") + @Key("SINK_MAXCOMPUTE_METADATA_COLUMNS_TYPES") + @ConverterClass(ConfToListConverter.class) + @Separator(ConfToListConverter.ELEMENT_SEPARATOR) + List getMetadataColumnsTypes(); + + @Key("SINK_MAXCOMPUTE_SCHEMA") + @DefaultValue("default") + String getMaxComputeSchema(); + + @Key("SINK_MAXCOMPUTE_TABLE_PARTITIONING_ENABLE") + @DefaultValue("false") + Boolean isTablePartitioningEnabled(); + + @Key("SINK_MAXCOMPUTE_TABLE_PARTITION_KEY") + String getTablePartitionKey(); + + @Key("SINK_MAXCOMPUTE_TABLE_PARTITION_COLUMN_NAME") + String getTablePartitionColumnName(); + + @Key("SINK_MAXCOMPUTE_TABLE_PARTITION_BY_TIMESTAMP_TIMEZONE") + @DefaultValue("UTC+7") + String getTablePartitionByTimestampTimezone(); + + @Key("SINK_MAX_COMPUTE_TABLE_PARTITION_BY_TIMESTAMP_ZONE_OFFSET") + @DefaultValue("+07:00") + String getTablePartitionByTimestampZoneOffset(); + + @Key("SINK_MAXCOMPUTE_TABLE_NAME") + String getMaxComputeTableName(); + + @Key("SINK_MAXCOMPUTE_TABLE_LIFECYCLE_DAYS") + Long getMaxComputeTableLifecycleDays(); + + @Key("SINK_MAXCOMPUTE_RECORD_PACK_FLUSH_TIMEOUT") + @DefaultValue("-1") + Long getMaxComputeRecordPackFlushTimeout(); + +} diff --git a/src/main/java/com/gotocompany/depot/maxcompute/MaxComputeSink.java b/src/main/java/com/gotocompany/depot/maxcompute/MaxComputeSink.java new file mode 100644 index 00000000..f923c526 --- /dev/null +++ b/src/main/java/com/gotocompany/depot/maxcompute/MaxComputeSink.java @@ -0,0 +1,41 @@ +package com.gotocompany.depot.maxcompute; + +import com.gotocompany.depot.Sink; +import com.gotocompany.depot.SinkResponse; +import com.gotocompany.depot.error.ErrorInfo; +import com.gotocompany.depot.error.ErrorType; +import com.gotocompany.depot.exception.SinkException; +import com.gotocompany.depot.maxcompute.client.MaxComputeClient; +import com.gotocompany.depot.maxcompute.converter.record.RecordConverter; +import com.gotocompany.depot.maxcompute.model.RecordWrappers; +import com.gotocompany.depot.message.Message; +import lombok.RequiredArgsConstructor; + +import java.io.IOException; +import java.util.List; + +@RequiredArgsConstructor +public class MaxComputeSink implements Sink { + + private final MaxComputeClient maxComputeClient; + private final RecordConverter recordConverter; + + @Override + public SinkResponse pushToSink(List messages) throws SinkException { + SinkResponse sinkResponse = new SinkResponse(); + RecordWrappers recordWrappers = recordConverter.convert(messages); + recordWrappers.getInvalidRecords() + .forEach(invalidRecord -> sinkResponse.getErrors().put(invalidRecord.getIndex(), invalidRecord.getErrorInfo())); + try { + maxComputeClient.insert(recordWrappers.getValidRecords()); + } catch (Exception e) { + recordWrappers.getValidRecords() + .forEach(validRecord -> sinkResponse.getErrors().put(validRecord.getIndex(), new ErrorInfo(e, ErrorType.DEFAULT_ERROR))); + } + return sinkResponse; + } + + @Override + public void close() throws IOException {} + +} diff --git a/src/main/java/com/gotocompany/depot/maxcompute/MaxComputeSinkFactory.java b/src/main/java/com/gotocompany/depot/maxcompute/MaxComputeSinkFactory.java new file mode 100644 index 00000000..b45237ae --- /dev/null +++ b/src/main/java/com/gotocompany/depot/maxcompute/MaxComputeSinkFactory.java @@ -0,0 +1,86 @@ +package com.gotocompany.depot.maxcompute; + +import com.google.protobuf.Descriptors; +import com.gotocompany.depot.Sink; +import com.gotocompany.depot.config.MaxComputeSinkConfig; +import com.gotocompany.depot.config.SinkConfig; +import com.gotocompany.depot.maxcompute.client.MaxComputeClient; +import com.gotocompany.depot.maxcompute.converter.ConverterOrchestrator; +import com.gotocompany.depot.maxcompute.converter.record.RecordConverter; +import com.gotocompany.depot.maxcompute.helper.MaxComputeSchemaHelper; +import com.gotocompany.depot.maxcompute.record.RecordDecorator; +import com.gotocompany.depot.maxcompute.record.RecordDecoratorFactory; +import com.gotocompany.depot.maxcompute.schema.MaxComputeSchemaCache; +import com.gotocompany.depot.maxcompute.schema.partition.PartitioningStrategy; +import com.gotocompany.depot.maxcompute.schema.partition.PartitioningStrategyFactory; +import com.gotocompany.depot.message.MessageParser; +import com.gotocompany.depot.message.MessageParserFactory; +import com.gotocompany.depot.message.SinkConnectorSchemaMessageMode; +import com.gotocompany.depot.metrics.StatsDReporter; +import com.gotocompany.depot.utils.StencilUtils; +import com.gotocompany.stencil.StencilClientFactory; +import com.gotocompany.stencil.client.StencilClient; +import com.gotocompany.stencil.config.StencilConfig; +import org.aeonbits.owner.ConfigFactory; + +import java.util.Map; + +public class MaxComputeSinkFactory { + + private final PartitioningStrategyFactory partitioningStrategyFactory; + private final MaxComputeSinkConfig maxComputeSinkConfig; + private final SinkConfig sinkConfig; + private final StatsDReporter statsDReporter; + private final ConverterOrchestrator converterOrchestrator; + private MaxComputeClient maxComputeClient; + private MaxComputeSchemaCache maxComputeSchemaCache; + private PartitioningStrategy partitioningStrategy; + + public MaxComputeSinkFactory(StatsDReporter statsDReporter, Map env) { + this.statsDReporter = statsDReporter; + this.maxComputeSinkConfig = ConfigFactory.create(MaxComputeSinkConfig.class, env); + this.sinkConfig = ConfigFactory.create(SinkConfig.class, env); + this.converterOrchestrator = new ConverterOrchestrator(); + this.partitioningStrategyFactory = new PartitioningStrategyFactory(converterOrchestrator, maxComputeSinkConfig); + } + + public void init() { + String schemaClass = SinkConnectorSchemaMessageMode.LOG_MESSAGE == sinkConfig.getSinkConnectorSchemaMessageMode() ? sinkConfig.getSinkConnectorSchemaProtoMessageClass() : sinkConfig.getSinkConnectorSchemaProtoKeyClass(); + StencilClient stencilClient = getStencilClient(statsDReporter); + Descriptors.Descriptor descriptor = stencilClient.get(schemaClass); + this.partitioningStrategy = partitioningStrategyFactory.createPartitioningStrategy(descriptor); + this.maxComputeClient = new MaxComputeClient(maxComputeSinkConfig); + MaxComputeSchemaHelper maxComputeSchemaHelper = new MaxComputeSchemaHelper(converterOrchestrator, maxComputeSinkConfig, partitioningStrategy); + this.maxComputeSchemaCache = new MaxComputeSchemaCache(maxComputeSchemaHelper, sinkConfig, converterOrchestrator, maxComputeClient); + MessageParser messageParser = MessageParserFactory.getParser(sinkConfig, statsDReporter, maxComputeSchemaCache); + this.maxComputeSchemaCache.setMessageParser(messageParser); + this.maxComputeSchemaCache.updateSchema(); + } + + public Sink create() { + RecordConverter recordConverter = new RecordConverter(buildRecordDecorator(), maxComputeSchemaCache); + return new MaxComputeSink(maxComputeClient, recordConverter); + } + + private RecordDecorator buildRecordDecorator() { + MessageParser messageParser = MessageParserFactory.getParser(sinkConfig, statsDReporter, maxComputeSchemaCache); + return RecordDecoratorFactory.createRecordDecorator( + converterOrchestrator, + maxComputeSchemaCache, + messageParser, + partitioningStrategy, + maxComputeSinkConfig, + sinkConfig + ); + } + + private StencilClient getStencilClient(StatsDReporter reporter) { + StencilConfig stencilConfig = StencilUtils.getStencilConfig(sinkConfig, reporter.getClient(), null); + if (sinkConfig.isSchemaRegistryStencilEnable()) { + return StencilClientFactory.getClient(sinkConfig.getSchemaRegistryStencilUrls(), stencilConfig); + } else { + return StencilClientFactory.getClient(); + } + } + +} diff --git a/src/main/java/com/gotocompany/depot/maxcompute/client/MaxComputeClient.java b/src/main/java/com/gotocompany/depot/maxcompute/client/MaxComputeClient.java new file mode 100644 index 00000000..fbbbaacd --- /dev/null +++ b/src/main/java/com/gotocompany/depot/maxcompute/client/MaxComputeClient.java @@ -0,0 +1,75 @@ +package com.gotocompany.depot.maxcompute.client; + +import com.aliyun.odps.Odps; +import com.aliyun.odps.OdpsException; +import com.aliyun.odps.TableSchema; +import com.aliyun.odps.account.Account; +import com.aliyun.odps.account.AliyunAccount; +import com.aliyun.odps.tunnel.TableTunnel; +import com.gotocompany.depot.config.MaxComputeSinkConfig; +import com.gotocompany.depot.maxcompute.client.insert.InsertManager; +import com.gotocompany.depot.maxcompute.client.insert.NonPartitionedInsertManager; +import com.gotocompany.depot.maxcompute.client.insert.PartitionedInsertManager; +import com.gotocompany.depot.maxcompute.model.RecordWrapper; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +public class MaxComputeClient { + + private final Odps odps; + private final MaxComputeSinkConfig maxComputeSinkConfig; + private final TableTunnel tableTunnel; + private final InsertManager insertManager; + + public MaxComputeClient(MaxComputeSinkConfig maxComputeSinkConfig) { + this.maxComputeSinkConfig = maxComputeSinkConfig; + this.odps = initializeOdps(); + this.tableTunnel = new TableTunnel(odps); + this.tableTunnel.setEndpoint(maxComputeSinkConfig.getMaxComputeTunnelUrl()); + this.insertManager = initializeInsertManager(); + } + + public void upsertTable(TableSchema tableSchema) throws OdpsException { + String tableName = maxComputeSinkConfig.getMaxComputeTableName(); + if (!this.odps.tables().exists(tableName)) { + this.odps.tables().create(odps.getDefaultProject(), tableName, tableSchema, "", + false, maxComputeSinkConfig.getMaxComputeTableLifecycleDays(), + null, null); + } + } + + public void insert(List recordWrappers) { + try { + insertManager.insert(recordWrappers); + } catch (Exception e) { + throw new RuntimeException("Failed to insert records into MaxCompute", e); + } + } + + private Odps initializeOdps() { + Account account = new AliyunAccount(maxComputeSinkConfig.getMaxComputeAccessId(), maxComputeSinkConfig.getMaxComputeAccessKey()); + Odps odpsClient = new Odps(account); + odpsClient.setDefaultProject(maxComputeSinkConfig.getMaxComputeProjectId()); + odpsClient.setEndpoint(maxComputeSinkConfig.getMaxComputeOdpsUrl()); + odpsClient.setCurrentSchema(maxComputeSinkConfig.getMaxComputeSchema()); + odpsClient.setGlobalSettings(getGlobalSettings()); + return odpsClient; + } + + private InsertManager initializeInsertManager() { + if (maxComputeSinkConfig.isTablePartitioningEnabled()) { + return new PartitionedInsertManager(tableTunnel, maxComputeSinkConfig); + } + return new NonPartitionedInsertManager(tableTunnel, maxComputeSinkConfig); + } + + private Map getGlobalSettings() { + Map globalSettings = new HashMap<>(); + globalSettings.put("setproject odps.schema.evolution.enable", "true"); + globalSettings.put("odps.namespace.schema", "true"); + return globalSettings; + } + +} diff --git a/src/main/java/com/gotocompany/depot/maxcompute/client/insert/InsertManager.java b/src/main/java/com/gotocompany/depot/maxcompute/client/insert/InsertManager.java new file mode 100644 index 00000000..4445c6cb --- /dev/null +++ b/src/main/java/com/gotocompany/depot/maxcompute/client/insert/InsertManager.java @@ -0,0 +1,11 @@ +package com.gotocompany.depot.maxcompute.client.insert; + +import com.aliyun.odps.tunnel.TunnelException; +import com.gotocompany.depot.maxcompute.model.RecordWrapper; + +import java.io.IOException; +import java.util.List; + +public interface InsertManager { + void insert(List recordWrappers) throws TunnelException, IOException; +} diff --git a/src/main/java/com/gotocompany/depot/maxcompute/client/insert/NonPartitionedInsertManager.java b/src/main/java/com/gotocompany/depot/maxcompute/client/insert/NonPartitionedInsertManager.java new file mode 100644 index 00000000..0e5748f7 --- /dev/null +++ b/src/main/java/com/gotocompany/depot/maxcompute/client/insert/NonPartitionedInsertManager.java @@ -0,0 +1,38 @@ +package com.gotocompany.depot.maxcompute.client.insert; + +import com.aliyun.odps.tunnel.TableTunnel; +import com.aliyun.odps.tunnel.TunnelException; +import com.gotocompany.depot.config.MaxComputeSinkConfig; +import com.gotocompany.depot.maxcompute.model.RecordWrapper; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; + +import java.io.IOException; +import java.util.List; + +@RequiredArgsConstructor +@Slf4j +public class NonPartitionedInsertManager implements InsertManager { + + private final TableTunnel tableTunnel; + private final MaxComputeSinkConfig maxComputeSinkConfig; + + @Override + public void insert(List recordWrappers) throws TunnelException, IOException { + TableTunnel.StreamUploadSession streamUploadSession = getStreamUploadSession(); + TableTunnel.StreamRecordPack recordPack = streamUploadSession.newRecordPack(); + for (RecordWrapper recordWrapper : recordWrappers) { + recordPack.append(recordWrapper.getRecord()); + } + TableTunnel.FlushResult flushResult = recordPack.flush( + new TableTunnel.FlushOption() + .timeout(maxComputeSinkConfig.getMaxComputeRecordPackFlushTimeout())); + log.info("Flushed {} records", flushResult.getRecordCount()); + } + + private TableTunnel.StreamUploadSession getStreamUploadSession() throws TunnelException { + return tableTunnel.buildStreamUploadSession(maxComputeSinkConfig.getMaxComputeProjectId(), + maxComputeSinkConfig.getMaxComputeTableName()) + .build(); + } +} diff --git a/src/main/java/com/gotocompany/depot/maxcompute/client/insert/PartitionedInsertManager.java b/src/main/java/com/gotocompany/depot/maxcompute/client/insert/PartitionedInsertManager.java new file mode 100644 index 00000000..0b7f5f4f --- /dev/null +++ b/src/main/java/com/gotocompany/depot/maxcompute/client/insert/PartitionedInsertManager.java @@ -0,0 +1,48 @@ +package com.gotocompany.depot.maxcompute.client.insert; + +import com.aliyun.odps.PartitionSpec; +import com.aliyun.odps.tunnel.TableTunnel; +import com.aliyun.odps.tunnel.TunnelException; +import com.gotocompany.depot.config.MaxComputeSinkConfig; +import com.gotocompany.depot.maxcompute.model.RecordWrapper; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; + +import java.io.IOException; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +@RequiredArgsConstructor +@Slf4j +public class PartitionedInsertManager implements InsertManager { + + private final TableTunnel tableTunnel; + private final MaxComputeSinkConfig maxComputeSinkConfig; + + @Override + public void insert(List recordWrappers) throws TunnelException, IOException { + Map> partitionSpecRecordWrapperMap = recordWrappers.stream() + .collect(Collectors.groupingBy(record -> record.getPartitionSpec().toString())); + for (Map.Entry> entry : partitionSpecRecordWrapperMap.entrySet()) { + TableTunnel.StreamUploadSession streamUploadSession = getStreamUploadSession(entry.getValue().get(0).getPartitionSpec()); + TableTunnel.StreamRecordPack recordPack = streamUploadSession.newRecordPack(); + for (RecordWrapper recordWrapper : entry.getValue()) { + recordPack.append(recordWrapper.getRecord()); + } + TableTunnel.FlushResult flushResult = recordPack.flush( + new TableTunnel.FlushOption() + .timeout(maxComputeSinkConfig.getMaxComputeRecordPackFlushTimeout())); + log.info("Flushed {} records to partition {}", flushResult.getRecordCount(), entry.getKey()); + } + } + + private TableTunnel.StreamUploadSession getStreamUploadSession(PartitionSpec partitionSpec) throws TunnelException { + return tableTunnel.buildStreamUploadSession(maxComputeSinkConfig.getMaxComputeProjectId(), + maxComputeSinkConfig.getMaxComputeTableName()) + .setCreatePartition(true) + .setPartitionSpec(partitionSpec) + .build(); + } + +} diff --git a/src/main/java/com/gotocompany/depot/maxcompute/converter/BaseTypeInfoConverter.java b/src/main/java/com/gotocompany/depot/maxcompute/converter/BaseTypeInfoConverter.java deleted file mode 100644 index e70b1b32..00000000 --- a/src/main/java/com/gotocompany/depot/maxcompute/converter/BaseTypeInfoConverter.java +++ /dev/null @@ -1,29 +0,0 @@ -package com.gotocompany.depot.maxcompute.converter; - -import com.aliyun.odps.type.TypeInfo; -import com.google.protobuf.Descriptors; - -import java.util.ArrayList; -import java.util.List; - -public class BaseTypeInfoConverter { - private final List converters; - - public BaseTypeInfoConverter() { - converters = new ArrayList<>(); - converters.add(new PrimitiveTypeInfoConverter()); - converters.add(new DurationTypeInfoConverter()); - converters.add(new StructTypeInfoConverter()); - converters.add(new TimestampTypeInfoConverter()); - converters.add(new MessageTypeInfoConverter(converters)); - converters.sort((c1, c2) -> Integer.compare(c2.getPriority(), c1.getPriority())); - } - - public TypeInfo convert(Descriptors.FieldDescriptor fieldDescriptor) { - return converters.stream() - .filter(converter -> converter.canConvert(fieldDescriptor)) - .findFirst() - .map(converter -> converter.convert(fieldDescriptor)) - .orElseThrow(() -> new IllegalArgumentException("Unsupported type: " + fieldDescriptor.getType())); - } -} diff --git a/src/main/java/com/gotocompany/depot/maxcompute/converter/ConverterOrchestrator.java b/src/main/java/com/gotocompany/depot/maxcompute/converter/ConverterOrchestrator.java new file mode 100644 index 00000000..4730deba --- /dev/null +++ b/src/main/java/com/gotocompany/depot/maxcompute/converter/ConverterOrchestrator.java @@ -0,0 +1,76 @@ +package com.gotocompany.depot.maxcompute.converter; + +import com.aliyun.odps.type.TypeInfo; +import com.google.protobuf.Descriptors; +import com.gotocompany.depot.maxcompute.converter.payload.DurationPayloadConverter; +import com.gotocompany.depot.maxcompute.converter.payload.MessagePayloadConverter; +import com.gotocompany.depot.maxcompute.converter.payload.PayloadConverter; +import com.gotocompany.depot.maxcompute.converter.payload.PrimitivePayloadConverter; +import com.gotocompany.depot.maxcompute.converter.payload.StructPayloadConverter; +import com.gotocompany.depot.maxcompute.converter.payload.TimestampPayloadConverter; +import com.gotocompany.depot.maxcompute.converter.type.DurationTypeInfoConverter; +import com.gotocompany.depot.maxcompute.converter.type.MessageTypeInfoConverter; +import com.gotocompany.depot.maxcompute.converter.type.PrimitiveTypeInfoConverter; +import com.gotocompany.depot.maxcompute.converter.type.StructTypeInfoConverter; +import com.gotocompany.depot.maxcompute.converter.type.TimestampTypeInfoConverter; +import com.gotocompany.depot.maxcompute.converter.type.TypeInfoConverter; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; + +public class ConverterOrchestrator { + + private final List typeInfoConverters; + private final List payloadConverters; + private final Map typeInfoCache; + + public ConverterOrchestrator() { + typeInfoConverters = new ArrayList<>(); + payloadConverters = new ArrayList<>(); + typeInfoCache = new ConcurrentHashMap<>(); + initializeConverters(); + } + + public TypeInfo convert(Descriptors.FieldDescriptor fieldDescriptor) { + return typeInfoCache.computeIfAbsent(fieldDescriptor.getFullName(), key -> typeInfoConverters.stream() + .filter(converter -> converter.canConvert(fieldDescriptor)) + .findFirst() + .map(converter -> converter.convert(fieldDescriptor)) + .orElseThrow(() -> new IllegalArgumentException("Unsupported type: " + fieldDescriptor.getType()))); + } + + public Object convert(Descriptors.FieldDescriptor fieldDescriptor, Object object) { + return payloadConverters.stream() + .filter(converter -> converter.canConvert(fieldDescriptor)) + .findFirst() + .map(converter -> converter.convert(fieldDescriptor, object)) + .orElseThrow(() -> new IllegalArgumentException("Unsupported type: " + fieldDescriptor.getType())); + } + + public void clearCache() { + typeInfoCache.clear(); + } + + private void initializeConverters() { + PrimitiveTypeInfoConverter primitiveTypeInfoConverter = new PrimitiveTypeInfoConverter(); + DurationTypeInfoConverter durationTypeInfoConverter = new DurationTypeInfoConverter(); + StructTypeInfoConverter structTypeInfoConverter = new StructTypeInfoConverter(); + TimestampTypeInfoConverter timestampTypeInfoConverter = new TimestampTypeInfoConverter(); + MessageTypeInfoConverter messageTypeInfoConverter = new MessageTypeInfoConverter(typeInfoConverters); + + typeInfoConverters.add(primitiveTypeInfoConverter); + typeInfoConverters.add(durationTypeInfoConverter); + typeInfoConverters.add(structTypeInfoConverter); + typeInfoConverters.add(timestampTypeInfoConverter); + typeInfoConverters.add(messageTypeInfoConverter); + + payloadConverters.add(new PrimitivePayloadConverter(primitiveTypeInfoConverter)); + payloadConverters.add(new DurationPayloadConverter(durationTypeInfoConverter)); + payloadConverters.add(new StructPayloadConverter(structTypeInfoConverter)); + payloadConverters.add(new TimestampPayloadConverter(timestampTypeInfoConverter)); + payloadConverters.add(new MessagePayloadConverter(messageTypeInfoConverter, payloadConverters)); + } + +} diff --git a/src/main/java/com/gotocompany/depot/maxcompute/converter/TypeInfoConverter.java b/src/main/java/com/gotocompany/depot/maxcompute/converter/TypeInfoConverter.java deleted file mode 100644 index 343b4d98..00000000 --- a/src/main/java/com/gotocompany/depot/maxcompute/converter/TypeInfoConverter.java +++ /dev/null @@ -1,12 +0,0 @@ -package com.gotocompany.depot.maxcompute.converter; - -import com.aliyun.odps.type.TypeInfo; -import com.google.protobuf.Descriptors; - -public interface TypeInfoConverter { - TypeInfo convert(Descriptors.FieldDescriptor fieldDescriptor); - boolean canConvert(Descriptors.FieldDescriptor fieldDescriptor); - default int getPriority() { - return 0; - }; -} diff --git a/src/main/java/com/gotocompany/depot/maxcompute/converter/payload/DurationPayloadConverter.java b/src/main/java/com/gotocompany/depot/maxcompute/converter/payload/DurationPayloadConverter.java new file mode 100644 index 00000000..dd3cb628 --- /dev/null +++ b/src/main/java/com/gotocompany/depot/maxcompute/converter/payload/DurationPayloadConverter.java @@ -0,0 +1,37 @@ +package com.gotocompany.depot.maxcompute.converter.payload; + +import com.aliyun.odps.data.SimpleStruct; +import com.aliyun.odps.type.StructTypeInfo; +import com.google.protobuf.Descriptors; +import com.google.protobuf.Duration; +import com.gotocompany.depot.maxcompute.converter.type.DurationTypeInfoConverter; +import lombok.RequiredArgsConstructor; + +import java.util.ArrayList; +import java.util.List; + +@RequiredArgsConstructor +public class DurationPayloadConverter implements PayloadConverter { + + private final DurationTypeInfoConverter durationTypeInfoConverter; + + @Override + public Object convertSingular(Descriptors.FieldDescriptor fieldDescriptor, Object object) { + List values = getValues((Duration) object); + + return new SimpleStruct((StructTypeInfo) durationTypeInfoConverter.convertSingular(fieldDescriptor), values); + } + + @Override + public boolean canConvert(Descriptors.FieldDescriptor fieldDescriptor) { + return durationTypeInfoConverter.canConvert(fieldDescriptor); + } + + private static List getValues(Duration object) { + List values = new ArrayList<>(); + values.add(object.getSeconds()); + values.add(object.getNanos()); + return values; + } + +} diff --git a/src/main/java/com/gotocompany/depot/maxcompute/converter/payload/MessagePayloadConverter.java b/src/main/java/com/gotocompany/depot/maxcompute/converter/payload/MessagePayloadConverter.java new file mode 100644 index 00000000..910d53de --- /dev/null +++ b/src/main/java/com/gotocompany/depot/maxcompute/converter/payload/MessagePayloadConverter.java @@ -0,0 +1,38 @@ +package com.gotocompany.depot.maxcompute.converter.payload; + +import com.aliyun.odps.data.SimpleStruct; +import com.google.protobuf.Descriptors; +import com.google.protobuf.Message; +import com.gotocompany.depot.maxcompute.converter.type.MessageTypeInfoConverter; +import lombok.RequiredArgsConstructor; + +import java.util.ArrayList; +import java.util.List; + +@RequiredArgsConstructor +public class MessagePayloadConverter implements PayloadConverter { + + private final MessageTypeInfoConverter messageTypeInfoConverter; + private final List payloadConverters; + + @Override + public Object convertSingular(Descriptors.FieldDescriptor fieldDescriptor, Object object) { + Message dynamicMessage = (Message) object; + List values = new ArrayList<>(); + fieldDescriptor.getMessageType().getFields().forEach(innerFieldDescriptor -> { + Object mappedInnerValue = payloadConverters.stream() + .filter(converter -> converter.canConvert(innerFieldDescriptor)) + .findFirst() + .map(converter -> converter.convert(innerFieldDescriptor, dynamicMessage.getField(innerFieldDescriptor))) + .orElse(null); + values.add(mappedInnerValue); + }); + return new SimpleStruct(messageTypeInfoConverter.convertSingular(fieldDescriptor), values); + } + + @Override + public boolean canConvert(Descriptors.FieldDescriptor fieldDescriptor) { + return messageTypeInfoConverter.canConvert(fieldDescriptor); + } + +} diff --git a/src/main/java/com/gotocompany/depot/maxcompute/converter/payload/PayloadConverter.java b/src/main/java/com/gotocompany/depot/maxcompute/converter/payload/PayloadConverter.java new file mode 100644 index 00000000..f284eacc --- /dev/null +++ b/src/main/java/com/gotocompany/depot/maxcompute/converter/payload/PayloadConverter.java @@ -0,0 +1,22 @@ +package com.gotocompany.depot.maxcompute.converter.payload; + +import com.google.protobuf.Descriptors; + +import java.util.List; +import java.util.stream.Collectors; + +public interface PayloadConverter { + + default Object convert(Descriptors.FieldDescriptor fieldDescriptor, Object object) { + if (!fieldDescriptor.isRepeated()) { + return convertSingular(fieldDescriptor, object); + } + return ((List) object).stream() + .map(o -> convertSingular(fieldDescriptor, o)) + .collect(Collectors.toList()); + } + + Object convertSingular(Descriptors.FieldDescriptor fieldDescriptor, Object object); + + boolean canConvert(Descriptors.FieldDescriptor fieldDescriptor); +} diff --git a/src/main/java/com/gotocompany/depot/maxcompute/converter/payload/PrimitivePayloadConverter.java b/src/main/java/com/gotocompany/depot/maxcompute/converter/payload/PrimitivePayloadConverter.java new file mode 100644 index 00000000..d4e0ce06 --- /dev/null +++ b/src/main/java/com/gotocompany/depot/maxcompute/converter/payload/PrimitivePayloadConverter.java @@ -0,0 +1,33 @@ +package com.gotocompany.depot.maxcompute.converter.payload; + +import com.google.protobuf.ByteString; +import com.google.protobuf.Descriptors; +import com.gotocompany.depot.maxcompute.converter.type.PrimitiveTypeInfoConverter; + +import java.util.HashMap; +import java.util.Map; +import java.util.function.Function; + +public class PrimitivePayloadConverter implements PayloadConverter { + + private final Map> mappers; + + private final PrimitiveTypeInfoConverter primitiveTypeInfoConverter; + + public PrimitivePayloadConverter(PrimitiveTypeInfoConverter primitiveTypeInfoConverter) { + this.primitiveTypeInfoConverter = primitiveTypeInfoConverter; + this.mappers = new HashMap<>(); + this.mappers.put(Descriptors.FieldDescriptor.Type.BYTES, object -> ((ByteString) object).toByteArray()); + this.mappers.put(Descriptors.FieldDescriptor.Type.ENUM, Object::toString); + } + + @Override + public Object convertSingular(Descriptors.FieldDescriptor fieldDescriptor, Object object) { + return mappers.getOrDefault(fieldDescriptor.getType(), Function.identity()).apply(object); + } + + @Override + public boolean canConvert(Descriptors.FieldDescriptor fieldDescriptor) { + return primitiveTypeInfoConverter.canConvert(fieldDescriptor); + } +} diff --git a/src/main/java/com/gotocompany/depot/maxcompute/converter/payload/StructPayloadConverter.java b/src/main/java/com/gotocompany/depot/maxcompute/converter/payload/StructPayloadConverter.java new file mode 100644 index 00000000..22417a79 --- /dev/null +++ b/src/main/java/com/gotocompany/depot/maxcompute/converter/payload/StructPayloadConverter.java @@ -0,0 +1,32 @@ +package com.gotocompany.depot.maxcompute.converter.payload; + +import com.google.protobuf.Descriptors; +import com.google.protobuf.InvalidProtocolBufferException; +import com.google.protobuf.Message; +import com.google.protobuf.util.JsonFormat; +import com.gotocompany.depot.maxcompute.converter.type.StructTypeInfoConverter; +import lombok.RequiredArgsConstructor; + +@RequiredArgsConstructor +public class StructPayloadConverter implements PayloadConverter { + + private final StructTypeInfoConverter structTypeInfoConverter; + private final JsonFormat.Printer printer = JsonFormat.printer() + .preservingProtoFieldNames() + .omittingInsignificantWhitespace(); + + @Override + public Object convertSingular(Descriptors.FieldDescriptor fieldDescriptor, Object object) { + try { + return printer.print((Message) object); + } catch (InvalidProtocolBufferException e) { + return ""; + } + } + + @Override + public boolean canConvert(Descriptors.FieldDescriptor fieldDescriptor) { + return structTypeInfoConverter.canConvert(fieldDescriptor); + } + +} diff --git a/src/main/java/com/gotocompany/depot/maxcompute/converter/payload/TimestampPayloadConverter.java b/src/main/java/com/gotocompany/depot/maxcompute/converter/payload/TimestampPayloadConverter.java new file mode 100644 index 00000000..25e0904c --- /dev/null +++ b/src/main/java/com/gotocompany/depot/maxcompute/converter/payload/TimestampPayloadConverter.java @@ -0,0 +1,29 @@ +package com.gotocompany.depot.maxcompute.converter.payload; + +import com.google.protobuf.Descriptors; +import com.google.protobuf.Message; +import com.gotocompany.depot.maxcompute.converter.type.TimestampTypeInfoConverter; +import lombok.RequiredArgsConstructor; + +@RequiredArgsConstructor +public class TimestampPayloadConverter implements PayloadConverter { + + public static final int SECOND_TO_MILLIS_MULTIPLIER = 1000; + private final TimestampTypeInfoConverter timestampTypeInfoConverter; + + @Override + public Object convertSingular(Descriptors.FieldDescriptor fieldDescriptor, Object object) { + Message message = (Message) object; + long seconds = (long) message.getField(message.getDescriptorForType().findFieldByName("seconds")); + int nanos = (int) message.getField(message.getDescriptorForType().findFieldByName("nanos")); + java.sql.Timestamp convertedTimestamp = new java.sql.Timestamp(seconds * SECOND_TO_MILLIS_MULTIPLIER); + convertedTimestamp.setNanos(nanos); + return convertedTimestamp; + } + + @Override + public boolean canConvert(Descriptors.FieldDescriptor fieldDescriptor) { + return timestampTypeInfoConverter.canConvert(fieldDescriptor); + } + +} diff --git a/src/main/java/com/gotocompany/depot/maxcompute/converter/record/MessageRecordConverter.java b/src/main/java/com/gotocompany/depot/maxcompute/converter/record/MessageRecordConverter.java new file mode 100644 index 00000000..0d4d7a88 --- /dev/null +++ b/src/main/java/com/gotocompany/depot/maxcompute/converter/record/MessageRecordConverter.java @@ -0,0 +1,10 @@ +package com.gotocompany.depot.maxcompute.converter.record; + +import com.gotocompany.depot.maxcompute.model.RecordWrappers; +import com.gotocompany.depot.message.Message; + +import java.util.List; + +public interface MessageRecordConverter { + RecordWrappers convert(List messages); +} diff --git a/src/main/java/com/gotocompany/depot/maxcompute/converter/record/RecordConverter.java b/src/main/java/com/gotocompany/depot/maxcompute/converter/record/RecordConverter.java new file mode 100644 index 00000000..4ae13d7a --- /dev/null +++ b/src/main/java/com/gotocompany/depot/maxcompute/converter/record/RecordConverter.java @@ -0,0 +1,51 @@ +package com.gotocompany.depot.maxcompute.converter.record; + +import com.aliyun.odps.data.ArrayRecord; +import com.aliyun.odps.data.Record; +import com.gotocompany.depot.error.ErrorInfo; +import com.gotocompany.depot.error.ErrorType; +import com.gotocompany.depot.exception.UnknownFieldsException; +import com.gotocompany.depot.maxcompute.model.MaxComputeSchema; +import com.gotocompany.depot.maxcompute.model.RecordWrapper; +import com.gotocompany.depot.maxcompute.model.RecordWrappers; +import com.gotocompany.depot.maxcompute.record.RecordDecorator; +import com.gotocompany.depot.maxcompute.schema.MaxComputeSchemaCache; +import com.gotocompany.depot.message.Message; +import lombok.RequiredArgsConstructor; + +import java.io.IOException; +import java.util.List; +import java.util.stream.IntStream; + +@RequiredArgsConstructor +public class RecordConverter implements MessageRecordConverter { + + private final RecordDecorator recordDecorator; + private final MaxComputeSchemaCache maxComputeSchemaCache; + + @Override + public RecordWrappers convert(List messages) { + MaxComputeSchema maxComputeSchema = maxComputeSchemaCache.getMaxComputeSchema(); + RecordWrappers recordWrappers = new RecordWrappers(); + IntStream.range(0, messages.size()) + .forEach(index -> { + Record record = new ArrayRecord(maxComputeSchema.getColumns()); + RecordWrapper recordWrapper = new RecordWrapper(record, index, null, null); + try { + recordDecorator.decorate(recordWrapper, messages.get(index)); + recordWrappers.addValidRecord(recordWrapper); + } catch (IOException e) { + handleException(recordWrapper, new ErrorInfo(e, ErrorType.DESERIALIZATION_ERROR), recordWrappers); + } catch (UnknownFieldsException e) { + handleException(recordWrapper, new ErrorInfo(e, ErrorType.UNKNOWN_FIELDS_ERROR), recordWrappers); + } + }); + return recordWrappers; + } + + private void handleException(RecordWrapper recordWrapper, ErrorInfo e, RecordWrappers recordWrappers) { + recordWrapper.setRecord(null); + recordWrapper.setErrorInfo(e); + recordWrappers.addInvalidRecord(recordWrapper); + } +} diff --git a/src/main/java/com/gotocompany/depot/maxcompute/converter/DurationTypeInfoConverter.java b/src/main/java/com/gotocompany/depot/maxcompute/converter/type/DurationTypeInfoConverter.java similarity index 59% rename from src/main/java/com/gotocompany/depot/maxcompute/converter/DurationTypeInfoConverter.java rename to src/main/java/com/gotocompany/depot/maxcompute/converter/type/DurationTypeInfoConverter.java index c3eb8368..58b7aee0 100644 --- a/src/main/java/com/gotocompany/depot/maxcompute/converter/DurationTypeInfoConverter.java +++ b/src/main/java/com/gotocompany/depot/maxcompute/converter/type/DurationTypeInfoConverter.java @@ -1,33 +1,29 @@ -package com.gotocompany.depot.maxcompute.converter; +package com.gotocompany.depot.maxcompute.converter.type; import com.aliyun.odps.type.TypeInfo; import com.aliyun.odps.type.TypeInfoFactory; import com.google.protobuf.Descriptors; -import java.util.ArrayList; +import java.util.Arrays; import java.util.List; public class DurationTypeInfoConverter implements TypeInfoConverter { private static final String SECONDS = "seconds"; private static final String NANOS = "nanos"; + private static final String GOOGLE_PROTOBUF_DURATION = "google.protobuf.Duration"; @Override - public TypeInfo convert(Descriptors.FieldDescriptor fieldDescriptor) { - List fieldNames = new ArrayList<>(); - fieldNames.add(SECONDS); - fieldNames.add(NANOS); - List typeInfos = new ArrayList<>(); - typeInfos.add(TypeInfoFactory.BIGINT); - typeInfos.add(TypeInfoFactory.INT); - + public TypeInfo convertSingular(Descriptors.FieldDescriptor fieldDescriptor) { + List fieldNames = Arrays.asList(SECONDS, NANOS); + List typeInfos = Arrays.asList(TypeInfoFactory.BIGINT, TypeInfoFactory.INT); return TypeInfoFactory.getStructTypeInfo(fieldNames, typeInfos); } @Override public boolean canConvert(Descriptors.FieldDescriptor fieldDescriptor) { return fieldDescriptor.getType() == Descriptors.FieldDescriptor.Type.MESSAGE - && fieldDescriptor.getMessageType().getFullName().equals("google.protobuf.Duration"); + && fieldDescriptor.getMessageType().getFullName().equals(GOOGLE_PROTOBUF_DURATION); } } diff --git a/src/main/java/com/gotocompany/depot/maxcompute/converter/MessageTypeInfoConverter.java b/src/main/java/com/gotocompany/depot/maxcompute/converter/type/MessageTypeInfoConverter.java similarity index 78% rename from src/main/java/com/gotocompany/depot/maxcompute/converter/MessageTypeInfoConverter.java rename to src/main/java/com/gotocompany/depot/maxcompute/converter/type/MessageTypeInfoConverter.java index 0a427326..2e3ec01b 100644 --- a/src/main/java/com/gotocompany/depot/maxcompute/converter/MessageTypeInfoConverter.java +++ b/src/main/java/com/gotocompany/depot/maxcompute/converter/type/MessageTypeInfoConverter.java @@ -1,5 +1,6 @@ -package com.gotocompany.depot.maxcompute.converter; +package com.gotocompany.depot.maxcompute.converter.type; +import com.aliyun.odps.type.StructTypeInfo; import com.aliyun.odps.type.TypeInfo; import com.aliyun.odps.type.TypeInfoFactory; import com.google.protobuf.Descriptors; @@ -16,7 +17,7 @@ public MessageTypeInfoConverter(List protoFieldToTypeInfoConv } @Override - public TypeInfo convert(Descriptors.FieldDescriptor fieldDescriptor) { + public StructTypeInfo convertSingular(Descriptors.FieldDescriptor fieldDescriptor) { List fieldNames = fieldDescriptor.getMessageType().getFields().stream() .map(Descriptors.FieldDescriptor::getName) .collect(Collectors.toList()); @@ -27,8 +28,7 @@ public TypeInfo convert(Descriptors.FieldDescriptor fieldDescriptor) { .map(converter -> converter.convert(fd)) .orElseThrow(() -> new IllegalArgumentException("Unsupported type: " + fd.getJavaType()))) .collect(Collectors.toList()); - TypeInfo typeInfo = TypeInfoFactory.getStructTypeInfo(fieldNames, typeInfos); - return fieldDescriptor.isRepeated() ? TypeInfoFactory.getArrayTypeInfo(typeInfo) : typeInfo; + return TypeInfoFactory.getStructTypeInfo(fieldNames, typeInfos); } @Override @@ -36,8 +36,4 @@ public boolean canConvert(Descriptors.FieldDescriptor fieldDescriptor) { return Descriptors.FieldDescriptor.Type.MESSAGE.equals(fieldDescriptor.getType()); } - @Override - public int getPriority() { - return Integer.MIN_VALUE; - } } diff --git a/src/main/java/com/gotocompany/depot/maxcompute/converter/PrimitiveTypeInfoConverter.java b/src/main/java/com/gotocompany/depot/maxcompute/converter/type/PrimitiveTypeInfoConverter.java similarity index 86% rename from src/main/java/com/gotocompany/depot/maxcompute/converter/PrimitiveTypeInfoConverter.java rename to src/main/java/com/gotocompany/depot/maxcompute/converter/type/PrimitiveTypeInfoConverter.java index 732ceb57..61b770d7 100644 --- a/src/main/java/com/gotocompany/depot/maxcompute/converter/PrimitiveTypeInfoConverter.java +++ b/src/main/java/com/gotocompany/depot/maxcompute/converter/type/PrimitiveTypeInfoConverter.java @@ -1,4 +1,4 @@ -package com.gotocompany.depot.maxcompute.converter; +package com.gotocompany.depot.maxcompute.converter.type; import com.aliyun.odps.type.TypeInfo; import com.aliyun.odps.type.TypeInfoFactory; @@ -10,7 +10,6 @@ public class PrimitiveTypeInfoConverter implements TypeInfoConverter { private static final Map PROTO_TYPE_MAP; - static { PROTO_TYPE_MAP = new HashMap<>(); PROTO_TYPE_MAP.put(Descriptors.FieldDescriptor.Type.BYTES, TypeInfoFactory.BINARY); @@ -32,14 +31,12 @@ public class PrimitiveTypeInfoConverter implements TypeInfoConverter { } @Override - public TypeInfo convert(Descriptors.FieldDescriptor fieldDescriptor) { - TypeInfo typeInfo = PROTO_TYPE_MAP.get(fieldDescriptor.getType()); - return fieldDescriptor.isRepeated() ? TypeInfoFactory.getArrayTypeInfo(typeInfo) : typeInfo; + public TypeInfo convertSingular(Descriptors.FieldDescriptor fieldDescriptor) { + return PROTO_TYPE_MAP.get(fieldDescriptor.getType()); } @Override public boolean canConvert(Descriptors.FieldDescriptor fieldDescriptor) { return PROTO_TYPE_MAP.containsKey(fieldDescriptor.getType()); } - } diff --git a/src/main/java/com/gotocompany/depot/maxcompute/converter/StructTypeInfoConverter.java b/src/main/java/com/gotocompany/depot/maxcompute/converter/type/StructTypeInfoConverter.java similarity index 79% rename from src/main/java/com/gotocompany/depot/maxcompute/converter/StructTypeInfoConverter.java rename to src/main/java/com/gotocompany/depot/maxcompute/converter/type/StructTypeInfoConverter.java index 81f4bf38..00e83816 100644 --- a/src/main/java/com/gotocompany/depot/maxcompute/converter/StructTypeInfoConverter.java +++ b/src/main/java/com/gotocompany/depot/maxcompute/converter/type/StructTypeInfoConverter.java @@ -1,4 +1,4 @@ -package com.gotocompany.depot.maxcompute.converter; +package com.gotocompany.depot.maxcompute.converter.type; import com.aliyun.odps.type.TypeInfo; import com.aliyun.odps.type.TypeInfoFactory; @@ -7,7 +7,7 @@ public class StructTypeInfoConverter implements TypeInfoConverter { @Override - public TypeInfo convert(Descriptors.FieldDescriptor fieldDescriptor) { + public TypeInfo convertSingular(Descriptors.FieldDescriptor fieldDescriptor) { return TypeInfoFactory.STRING; } diff --git a/src/main/java/com/gotocompany/depot/maxcompute/converter/TimestampTypeInfoConverter.java b/src/main/java/com/gotocompany/depot/maxcompute/converter/type/TimestampTypeInfoConverter.java similarity index 55% rename from src/main/java/com/gotocompany/depot/maxcompute/converter/TimestampTypeInfoConverter.java rename to src/main/java/com/gotocompany/depot/maxcompute/converter/type/TimestampTypeInfoConverter.java index b11c7026..d77e93bb 100644 --- a/src/main/java/com/gotocompany/depot/maxcompute/converter/TimestampTypeInfoConverter.java +++ b/src/main/java/com/gotocompany/depot/maxcompute/converter/type/TimestampTypeInfoConverter.java @@ -1,4 +1,4 @@ -package com.gotocompany.depot.maxcompute.converter; +package com.gotocompany.depot.maxcompute.converter.type; import com.aliyun.odps.type.TypeInfo; import com.aliyun.odps.type.TypeInfoFactory; @@ -7,14 +7,14 @@ public class TimestampTypeInfoConverter implements TypeInfoConverter { @Override - public TypeInfo convert(Descriptors.FieldDescriptor fieldDescriptor) { - return TypeInfoFactory.TIMESTAMP_NTZ; + public TypeInfo convertSingular(Descriptors.FieldDescriptor fieldDescriptor) { + return TypeInfoFactory.TIMESTAMP; } @Override public boolean canConvert(Descriptors.FieldDescriptor fieldDescriptor) { - return Descriptors.FieldDescriptor.Type.MESSAGE.equals(fieldDescriptor.getType()) && - fieldDescriptor.getMessageType().getFullName().equals("google.protobuf.Timestamp"); + return Descriptors.FieldDescriptor.Type.MESSAGE.equals(fieldDescriptor.getType()) + && fieldDescriptor.getMessageType().getFullName().equals("google.protobuf.Timestamp"); } } diff --git a/src/main/java/com/gotocompany/depot/maxcompute/converter/type/TypeInfoConverter.java b/src/main/java/com/gotocompany/depot/maxcompute/converter/type/TypeInfoConverter.java new file mode 100644 index 00000000..bb536ea3 --- /dev/null +++ b/src/main/java/com/gotocompany/depot/maxcompute/converter/type/TypeInfoConverter.java @@ -0,0 +1,16 @@ +package com.gotocompany.depot.maxcompute.converter.type; + +import com.aliyun.odps.type.TypeInfo; +import com.aliyun.odps.type.TypeInfoFactory; +import com.google.protobuf.Descriptors; + +public interface TypeInfoConverter { + default TypeInfo convert(Descriptors.FieldDescriptor fieldDescriptor) { + return wrap(fieldDescriptor, convertSingular(fieldDescriptor)); + } + TypeInfo convertSingular(Descriptors.FieldDescriptor fieldDescriptor); + boolean canConvert(Descriptors.FieldDescriptor fieldDescriptor); + default TypeInfo wrap(Descriptors.FieldDescriptor fieldDescriptor, TypeInfo typeInfo) { + return fieldDescriptor.isRepeated() ? TypeInfoFactory.getArrayTypeInfo(typeInfo) : typeInfo; + } +} diff --git a/src/main/java/com/gotocompany/depot/maxcompute/helper/MaxComputeSchemaHelper.java b/src/main/java/com/gotocompany/depot/maxcompute/helper/MaxComputeSchemaHelper.java new file mode 100644 index 00000000..30293369 --- /dev/null +++ b/src/main/java/com/gotocompany/depot/maxcompute/helper/MaxComputeSchemaHelper.java @@ -0,0 +1,81 @@ +package com.gotocompany.depot.maxcompute.helper; + +import com.aliyun.odps.Column; +import com.aliyun.odps.TableSchema; +import com.google.protobuf.Descriptors; +import com.gotocompany.depot.config.MaxComputeSinkConfig; +import com.gotocompany.depot.maxcompute.converter.ConverterOrchestrator; +import com.gotocompany.depot.maxcompute.model.MaxComputeSchema; +import com.gotocompany.depot.maxcompute.schema.partition.PartitioningStrategy; +import com.gotocompany.depot.maxcompute.util.MetadataUtil; +import lombok.RequiredArgsConstructor; +import org.apache.commons.lang3.StringUtils; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Objects; +import java.util.stream.Collectors; + +@RequiredArgsConstructor +public class MaxComputeSchemaHelper { + + private final ConverterOrchestrator converterOrchestrator; + private final MaxComputeSinkConfig maxComputeSinkConfig; + private final PartitioningStrategy partitioningStrategy; + + public MaxComputeSchema buildMaxComputeSchema(Descriptors.Descriptor descriptor) { + List dataColumn = buildDataColumns(descriptor, partitioningStrategy); + List metadataColumns = buildMetadataColumns(); + Column partitionColumn = maxComputeSinkConfig.isTablePartitioningEnabled() ? buildPartitionColumn(partitioningStrategy) : null; + TableSchema.Builder tableSchemaBuilder = com.aliyun.odps.TableSchema.builder(); + tableSchemaBuilder.withColumns(dataColumn); + tableSchemaBuilder.withColumns(metadataColumns); + if (Objects.nonNull(partitionColumn)) { + tableSchemaBuilder.withPartitionColumn(partitionColumn); + } + + return MaxComputeSchema.builder() + .descriptor(descriptor) + .tableSchema(tableSchemaBuilder.build()) + .dataColumns(dataColumn.stream().collect(Collectors.toMap(Column::getName, Column::getTypeInfo))) + .metadataColumns(metadataColumns.stream().collect(Collectors.toMap(Column::getName, Column::getTypeInfo))) + .partitionColumns(Objects.nonNull(partitionColumn) ? + Collections.singletonMap(partitionColumn.getName(), partitionColumn.getTypeInfo()) : Collections.emptyMap()) + .build(); + + } + + private List buildDataColumns(Descriptors.Descriptor descriptor, + PartitioningStrategy partitioningStrategy) { + return descriptor.getFields() + .stream() + .filter(fieldDescriptor -> { + if (!maxComputeSinkConfig.isTablePartitioningEnabled() || !fieldDescriptor.getName().equals(maxComputeSinkConfig.getTablePartitionKey())) { + return true; + } + return !partitioningStrategy.shouldReplaceOriginalColumn(); + }) + .map(fieldDescriptor -> Column.newBuilder(fieldDescriptor.getName(), + converterOrchestrator.convert(fieldDescriptor)).build()) + .collect(Collectors.toList()); + } + + private Column buildPartitionColumn(PartitioningStrategy partitioningStrategy) { + return partitioningStrategy.getPartitionColumn(); + } + + private List buildMetadataColumns() { + if (!maxComputeSinkConfig.shouldAddMetadata()) { + return new ArrayList<>(); + } + if (StringUtils.isBlank(maxComputeSinkConfig.getMaxcomputeMetadataNamespace())) { + return maxComputeSinkConfig.getMetadataColumnsTypes() + .stream() + .map(tuple -> Column.newBuilder(tuple.getFirst(), MetadataUtil.getMetadataTypeInfo(tuple.getSecond())).build()) + .collect(Collectors.toList()); + } + return Collections.singletonList(Column.newBuilder(maxComputeSinkConfig.getMaxcomputeMetadataNamespace(), + MetadataUtil.getMetadataTypeInfo(maxComputeSinkConfig)).build()); + } +} diff --git a/src/main/java/com/gotocompany/depot/maxcompute/model/MaxComputeSchema.java b/src/main/java/com/gotocompany/depot/maxcompute/model/MaxComputeSchema.java new file mode 100644 index 00000000..e7d66399 --- /dev/null +++ b/src/main/java/com/gotocompany/depot/maxcompute/model/MaxComputeSchema.java @@ -0,0 +1,34 @@ +package com.gotocompany.depot.maxcompute.model; + +import com.aliyun.odps.Column; +import com.aliyun.odps.TableSchema; +import com.aliyun.odps.type.TypeInfo; +import com.google.protobuf.Descriptors; +import lombok.AllArgsConstructor; +import lombok.Builder; +import lombok.Data; +import lombok.RequiredArgsConstructor; + +import java.util.Map; +import java.util.Objects; + +@Data +@AllArgsConstructor +@RequiredArgsConstructor +@Builder +public class MaxComputeSchema { + private final Descriptors.Descriptor descriptor; + private final TableSchema tableSchema; + private final Map dataColumns; + private final Map metadataColumns; + private final Map partitionColumns; + private Column[] columns; + + public Column[] getColumns() { + if (Objects.isNull(columns)) { + columns = tableSchema.getColumns().toArray(new Column[]{}); + } + return columns; + } + +} diff --git a/src/main/java/com/gotocompany/depot/maxcompute/model/RecordWrapper.java b/src/main/java/com/gotocompany/depot/maxcompute/model/RecordWrapper.java new file mode 100644 index 00000000..c21718df --- /dev/null +++ b/src/main/java/com/gotocompany/depot/maxcompute/model/RecordWrapper.java @@ -0,0 +1,20 @@ +package com.gotocompany.depot.maxcompute.model; + +import com.aliyun.odps.PartitionSpec; +import com.aliyun.odps.data.Record; +import com.gotocompany.depot.error.ErrorInfo; +import lombok.AllArgsConstructor; +import lombok.Data; +import lombok.Getter; +import lombok.Setter; + +@Data +@Getter +@Setter +@AllArgsConstructor +public class RecordWrapper { + private Record record; + private long index; + private ErrorInfo errorInfo; + private PartitionSpec partitionSpec; +} diff --git a/src/main/java/com/gotocompany/depot/maxcompute/model/RecordWrappers.java b/src/main/java/com/gotocompany/depot/maxcompute/model/RecordWrappers.java new file mode 100644 index 00000000..57dbdd89 --- /dev/null +++ b/src/main/java/com/gotocompany/depot/maxcompute/model/RecordWrappers.java @@ -0,0 +1,26 @@ +package com.gotocompany.depot.maxcompute.model; + +import lombok.Data; + +import java.util.ArrayList; +import java.util.List; + +@Data +public class RecordWrappers { + private List validRecords; + private List invalidRecords; + + public RecordWrappers() { + this.validRecords = new ArrayList<>(); + this.invalidRecords = new ArrayList<>(); + } + + public void addValidRecord(RecordWrapper recordWrapper) { + this.validRecords.add(recordWrapper); + } + + public void addInvalidRecord(RecordWrapper recordWrapper) { + this.invalidRecords.add(recordWrapper); + } + +} diff --git a/src/main/java/com/gotocompany/depot/maxcompute/record/ProtoDataColumnRecordDecorator.java b/src/main/java/com/gotocompany/depot/maxcompute/record/ProtoDataColumnRecordDecorator.java new file mode 100644 index 00000000..1f90a38b --- /dev/null +++ b/src/main/java/com/gotocompany/depot/maxcompute/record/ProtoDataColumnRecordDecorator.java @@ -0,0 +1,59 @@ +package com.gotocompany.depot.maxcompute.record; + +import com.google.protobuf.Descriptors; +import com.gotocompany.depot.config.SinkConfig; +import com.gotocompany.depot.maxcompute.converter.ConverterOrchestrator; +import com.gotocompany.depot.maxcompute.model.RecordWrapper; +import com.gotocompany.depot.maxcompute.schema.MaxComputeSchemaCache; +import com.gotocompany.depot.maxcompute.schema.partition.PartitioningStrategy; +import com.gotocompany.depot.message.Message; +import com.gotocompany.depot.message.MessageParser; +import com.gotocompany.depot.message.ParsedMessage; +import com.gotocompany.depot.message.SinkConnectorSchemaMessageMode; + +import java.io.IOException; + +public class ProtoDataColumnRecordDecorator extends RecordDecorator { + + private final ConverterOrchestrator converterOrchestrator; + private final MaxComputeSchemaCache maxComputeSchemaCache; + private final MessageParser protoMessageParser; + private final PartitioningStrategy partitioningStrategy; + private final SinkConfig sinkConfig; + + public ProtoDataColumnRecordDecorator(RecordDecorator decorator, + ConverterOrchestrator converterOrchestrator, + MaxComputeSchemaCache maxComputeSchemaCache, + MessageParser messageParser, + SinkConfig sinkConfig, + PartitioningStrategy partitioningStrategy) { + super(decorator); + this.converterOrchestrator = converterOrchestrator; + this.maxComputeSchemaCache = maxComputeSchemaCache; + this.protoMessageParser = messageParser; + this.partitioningStrategy = partitioningStrategy; + this.sinkConfig = sinkConfig; + } + + @Override + public void append(RecordWrapper recordWrapper, Message message) throws IOException { + String schemaClass = getSchemaClass(); + ParsedMessage parsedMessage = protoMessageParser.parse(message, sinkConfig.getSinkConnectorSchemaMessageMode(), schemaClass); + parsedMessage.validate(sinkConfig); + com.google.protobuf.Message protoMessage = (com.google.protobuf.Message) parsedMessage.getRaw(); + for (Descriptors.FieldDescriptor fieldDescriptor : protoMessage.getDescriptorForType().getFields()) { + recordWrapper.getRecord() + .set(fieldDescriptor.getName(), converterOrchestrator.convert(fieldDescriptor, protoMessage.getField(fieldDescriptor))); + } + if (partitioningStrategy != null) { + Object object = protoMessage.getField(protoMessage.getDescriptorForType().findFieldByName(partitioningStrategy.getOriginalPartitionColumnName())); + recordWrapper.setPartitionSpec(partitioningStrategy.getPartitionSpec(object)); + } + } + + private String getSchemaClass() { + return sinkConfig.getSinkConnectorSchemaMessageMode() == SinkConnectorSchemaMessageMode.LOG_MESSAGE + ? sinkConfig.getSinkConnectorSchemaProtoMessageClass() : sinkConfig.getSinkConnectorSchemaProtoKeyClass(); + } + +} diff --git a/src/main/java/com/gotocompany/depot/maxcompute/record/ProtoMetadataColumnRecordDecorator.java b/src/main/java/com/gotocompany/depot/maxcompute/record/ProtoMetadataColumnRecordDecorator.java new file mode 100644 index 00000000..a1e5c078 --- /dev/null +++ b/src/main/java/com/gotocompany/depot/maxcompute/record/ProtoMetadataColumnRecordDecorator.java @@ -0,0 +1,70 @@ +package com.gotocompany.depot.maxcompute.record; + +import com.aliyun.odps.data.Record; +import com.aliyun.odps.data.SimpleStruct; +import com.aliyun.odps.type.StructTypeInfo; +import com.aliyun.odps.type.TypeInfo; +import com.aliyun.odps.utils.StringUtils; +import com.gotocompany.depot.common.TupleString; +import com.gotocompany.depot.config.MaxComputeSinkConfig; +import com.gotocompany.depot.maxcompute.model.MaxComputeSchema; +import com.gotocompany.depot.maxcompute.model.RecordWrapper; +import com.gotocompany.depot.maxcompute.schema.MaxComputeSchemaCache; +import com.gotocompany.depot.maxcompute.util.MetadataUtil; +import com.gotocompany.depot.message.Message; + +import java.io.IOException; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +public class ProtoMetadataColumnRecordDecorator extends RecordDecorator { + + private final MaxComputeSinkConfig maxComputeSinkConfig; + private final MaxComputeSchemaCache maxComputeSchemaCache; + private final Map metadataTypePairs; + public ProtoMetadataColumnRecordDecorator(RecordDecorator recordDecorator, + MaxComputeSinkConfig maxComputeSinkConfig, + MaxComputeSchemaCache maxComputeSchemaCache) { + super(recordDecorator); + this.maxComputeSinkConfig = maxComputeSinkConfig; + this.maxComputeSchemaCache = maxComputeSchemaCache; + this.metadataTypePairs = maxComputeSinkConfig.getMetadataColumnsTypes() + .stream() + .collect(Collectors.toMap(TupleString::getFirst, TupleString::getSecond)); + } + + @Override + public void append(RecordWrapper recordWrapper, Message message) throws IOException { + if (StringUtils.isNotBlank(maxComputeSinkConfig.getMaxcomputeMetadataNamespace())) { + appendNamespacedMetadata(recordWrapper.getRecord(), message); + return; + } + appendMetadata(recordWrapper.getRecord(), message); + } + + private void appendNamespacedMetadata(Record record, Message message) { + Map metadata = message.getMetadata(maxComputeSinkConfig.getMetadataColumnsTypes()); + MaxComputeSchema maxComputeSchema = maxComputeSchemaCache.getMaxComputeSchema(); + StructTypeInfo typeInfo = (StructTypeInfo) maxComputeSchema.getMetadataColumns().get(maxComputeSinkConfig.getMaxcomputeMetadataNamespace()); + List values = IntStream.range(0, typeInfo.getFieldCount()) + .mapToObj(index -> { + Object metadataValue = metadata.get(typeInfo.getFieldNames().get(index)); + return MetadataUtil.getValidMetadataValue(metadataTypePairs.get(typeInfo.getFieldNames().get(index)), metadataValue); + }).collect(Collectors.toList()); + record.set(maxComputeSinkConfig.getMaxcomputeMetadataNamespace(), new SimpleStruct(typeInfo, values)); + } + + private void appendMetadata(Record record, Message message) { + Map metadata = message.getMetadata(maxComputeSinkConfig.getMetadataColumnsTypes()); + + for (Map.Entry entry : maxComputeSchemaCache.getMaxComputeSchema() + .getMetadataColumns() + .entrySet()) { + Object value = metadata.get(entry.getKey()); + record.set(entry.getKey(), MetadataUtil.getValidMetadataValue(metadataTypePairs.get(entry.getKey()), value)); + } + } + +} diff --git a/src/main/java/com/gotocompany/depot/maxcompute/record/RecordDecorator.java b/src/main/java/com/gotocompany/depot/maxcompute/record/RecordDecorator.java new file mode 100644 index 00000000..06613890 --- /dev/null +++ b/src/main/java/com/gotocompany/depot/maxcompute/record/RecordDecorator.java @@ -0,0 +1,24 @@ +package com.gotocompany.depot.maxcompute.record; + + +import com.gotocompany.depot.maxcompute.model.RecordWrapper; +import com.gotocompany.depot.message.Message; + +import java.io.IOException; + +public abstract class RecordDecorator { + private final RecordDecorator decorator; + + public RecordDecorator(RecordDecorator decorator) { + this.decorator = decorator; + } + + public void decorate(RecordWrapper recordWrapper, Message message) throws IOException { + append(recordWrapper, message); + if (decorator != null) { + decorator.decorate(recordWrapper, message); + } + } + + public abstract void append(RecordWrapper recordWrapper, Message message) throws IOException; +} diff --git a/src/main/java/com/gotocompany/depot/maxcompute/record/RecordDecoratorFactory.java b/src/main/java/com/gotocompany/depot/maxcompute/record/RecordDecoratorFactory.java new file mode 100644 index 00000000..4bdf3db6 --- /dev/null +++ b/src/main/java/com/gotocompany/depot/maxcompute/record/RecordDecoratorFactory.java @@ -0,0 +1,29 @@ +package com.gotocompany.depot.maxcompute.record; + +import com.gotocompany.depot.config.MaxComputeSinkConfig; +import com.gotocompany.depot.config.SinkConfig; +import com.gotocompany.depot.maxcompute.converter.ConverterOrchestrator; +import com.gotocompany.depot.maxcompute.schema.MaxComputeSchemaCache; +import com.gotocompany.depot.maxcompute.schema.partition.PartitioningStrategy; +import com.gotocompany.depot.message.MessageParser; +import lombok.RequiredArgsConstructor; + +@RequiredArgsConstructor +public class RecordDecoratorFactory { + + public static RecordDecorator createRecordDecorator( + ConverterOrchestrator converterOrchestrator, + MaxComputeSchemaCache maxComputeSchemaCache, + MessageParser messageParser, + PartitioningStrategy partitioningStrategy, + MaxComputeSinkConfig maxComputeSinkConfig, + SinkConfig sinkConfig + ) { + RecordDecorator dataColumnRecordDecorator = new ProtoDataColumnRecordDecorator(null, converterOrchestrator, maxComputeSchemaCache, messageParser, sinkConfig, partitioningStrategy); + if (!maxComputeSinkConfig.shouldAddMetadata()) { + return dataColumnRecordDecorator; + } + return new ProtoMetadataColumnRecordDecorator(dataColumnRecordDecorator, maxComputeSinkConfig, maxComputeSchemaCache); + } + +} diff --git a/src/main/java/com/gotocompany/depot/maxcompute/schema/MaxComputeSchemaCache.java b/src/main/java/com/gotocompany/depot/maxcompute/schema/MaxComputeSchemaCache.java new file mode 100644 index 00000000..61af2b8b --- /dev/null +++ b/src/main/java/com/gotocompany/depot/maxcompute/schema/MaxComputeSchemaCache.java @@ -0,0 +1,73 @@ +package com.gotocompany.depot.maxcompute.schema; + +import com.aliyun.odps.OdpsException; +import com.google.protobuf.Descriptors; +import com.gotocompany.depot.config.SinkConfig; +import com.gotocompany.depot.maxcompute.client.MaxComputeClient; +import com.gotocompany.depot.maxcompute.converter.ConverterOrchestrator; +import com.gotocompany.depot.maxcompute.helper.MaxComputeSchemaHelper; +import com.gotocompany.depot.maxcompute.model.MaxComputeSchema; +import com.gotocompany.depot.message.SinkConnectorSchemaMessageMode; +import com.gotocompany.depot.message.proto.ProtoMessageParser; +import com.gotocompany.depot.stencil.DepotStencilUpdateListener; +import lombok.extern.slf4j.Slf4j; + +import java.util.Map; + +@Slf4j +public class MaxComputeSchemaCache extends DepotStencilUpdateListener { + + private final MaxComputeSchemaHelper maxComputeSchemaHelper; + private final SinkConfig sinkConfig; + private final ConverterOrchestrator converterOrchestrator; + private final MaxComputeClient maxComputeClient; + private MaxComputeSchema maxComputeSchema; + + public MaxComputeSchemaCache(MaxComputeSchemaHelper maxComputeSchemaHelper, + SinkConfig sinkConfig, + ConverterOrchestrator converterOrchestrator, + MaxComputeClient maxComputeClient) { + this.maxComputeSchemaHelper = maxComputeSchemaHelper; + this.sinkConfig = sinkConfig; + this.converterOrchestrator = converterOrchestrator; + this.maxComputeClient = maxComputeClient; + } + + public MaxComputeSchema getMaxComputeSchema() { + synchronized (this) { + if (maxComputeSchema == null) { + updateSchema(); + } + } + return maxComputeSchema; + } + + @Override + public void onSchemaUpdate(Map newDescriptor) { + Descriptors.Descriptor descriptor; + if (newDescriptor == null) { + Map descriptorMap = ((ProtoMessageParser) getMessageParser()).getDescriptorMap(); + descriptor = descriptorMap.get(getSchemaClass()); + } else { + descriptor = newDescriptor.get(getSchemaClass()); + } + maxComputeSchema = maxComputeSchemaHelper.buildMaxComputeSchema(descriptor); + converterOrchestrator.clearCache(); + try { + maxComputeClient.upsertTable(maxComputeSchema.getTableSchema()); + log.info("MaxCompute table upserted successfully"); + } catch (OdpsException e) { + throw new RuntimeException("Error while updating maxcompute table on callback", e); + } + } + + @Override + public void updateSchema() { + onSchemaUpdate(null); + } + + private String getSchemaClass() { + return sinkConfig.getSinkConnectorSchemaMessageMode() == SinkConnectorSchemaMessageMode.LOG_MESSAGE + ? sinkConfig.getSinkConnectorSchemaProtoMessageClass() : sinkConfig.getSinkConnectorSchemaProtoKeyClass(); + } +} diff --git a/src/main/java/com/gotocompany/depot/maxcompute/schema/partition/DefaultPartitioningStrategy.java b/src/main/java/com/gotocompany/depot/maxcompute/schema/partition/DefaultPartitioningStrategy.java new file mode 100644 index 00000000..4a6bcb47 --- /dev/null +++ b/src/main/java/com/gotocompany/depot/maxcompute/schema/partition/DefaultPartitioningStrategy.java @@ -0,0 +1,36 @@ +package com.gotocompany.depot.maxcompute.schema.partition; + +import com.aliyun.odps.Column; +import com.aliyun.odps.PartitionSpec; +import com.aliyun.odps.type.TypeInfo; +import com.gotocompany.depot.config.MaxComputeSinkConfig; +import lombok.RequiredArgsConstructor; + +@RequiredArgsConstructor +public class DefaultPartitioningStrategy implements PartitioningStrategy { + + private final TypeInfo typeInfo; + private final MaxComputeSinkConfig maxComputeSinkConfig; + + @Override + public String getOriginalPartitionColumnName() { + return maxComputeSinkConfig.getTablePartitionKey(); + } + + @Override + public boolean shouldReplaceOriginalColumn() { + return true; + } + + @Override + public Column getPartitionColumn() { + return Column.newBuilder(maxComputeSinkConfig.getTablePartitionColumnName(), typeInfo) + .build(); + } + + @Override + public PartitionSpec getPartitionSpec(Object object) { + return new PartitionSpec(String.format("%s=%s", maxComputeSinkConfig.getTablePartitionColumnName(), object.toString())); + } + +} diff --git a/src/main/java/com/gotocompany/depot/maxcompute/schema/partition/PartitioningStrategy.java b/src/main/java/com/gotocompany/depot/maxcompute/schema/partition/PartitioningStrategy.java new file mode 100644 index 00000000..1c9e0b22 --- /dev/null +++ b/src/main/java/com/gotocompany/depot/maxcompute/schema/partition/PartitioningStrategy.java @@ -0,0 +1,11 @@ +package com.gotocompany.depot.maxcompute.schema.partition; + +import com.aliyun.odps.Column; +import com.aliyun.odps.PartitionSpec; + +public interface PartitioningStrategy { + String getOriginalPartitionColumnName(); + boolean shouldReplaceOriginalColumn(); + Column getPartitionColumn(); + PartitionSpec getPartitionSpec(Object object); +} diff --git a/src/main/java/com/gotocompany/depot/maxcompute/schema/partition/PartitioningStrategyFactory.java b/src/main/java/com/gotocompany/depot/maxcompute/schema/partition/PartitioningStrategyFactory.java new file mode 100644 index 00000000..3763c860 --- /dev/null +++ b/src/main/java/com/gotocompany/depot/maxcompute/schema/partition/PartitioningStrategyFactory.java @@ -0,0 +1,55 @@ +package com.gotocompany.depot.maxcompute.schema.partition; + +import com.aliyun.odps.type.TypeInfo; +import com.aliyun.odps.type.TypeInfoFactory; +import com.google.protobuf.Descriptors; +import com.gotocompany.depot.config.MaxComputeSinkConfig; +import com.gotocompany.depot.maxcompute.converter.ConverterOrchestrator; +import lombok.RequiredArgsConstructor; + +import java.util.HashSet; +import java.util.Set; + +@RequiredArgsConstructor +public class PartitioningStrategyFactory { + + private final ConverterOrchestrator converterOrchestrator; + private final MaxComputeSinkConfig maxComputeSinkConfig; + private static final Set allowedPartitionKeyTypeInfo; + + static { + allowedPartitionKeyTypeInfo = new HashSet<>(); + allowedPartitionKeyTypeInfo.add(TypeInfoFactory.TIMESTAMP); + allowedPartitionKeyTypeInfo.add(TypeInfoFactory.STRING); + allowedPartitionKeyTypeInfo.add(TypeInfoFactory.TINYINT); + allowedPartitionKeyTypeInfo.add(TypeInfoFactory.SMALLINT); + allowedPartitionKeyTypeInfo.add(TypeInfoFactory.INT); + allowedPartitionKeyTypeInfo.add(TypeInfoFactory.BIGINT); + } + + public PartitioningStrategy createPartitioningStrategy(Descriptors.Descriptor descriptor) { + if (!maxComputeSinkConfig.isTablePartitioningEnabled()) { + return null; + } + String partitionKey = maxComputeSinkConfig.getTablePartitionKey(); + Descriptors.FieldDescriptor fieldDescriptor = descriptor + .findFieldByName(partitionKey); + if (fieldDescriptor == null) { + throw new IllegalArgumentException("Partition key not found in the descriptor: " + partitionKey); + } + TypeInfo partitionKeyTypeInfo = converterOrchestrator.convert(fieldDescriptor); + checkPartitionTypePrecondition(partitionKeyTypeInfo); + if (TypeInfoFactory.TIMESTAMP.equals(partitionKeyTypeInfo)) { + return new TimestampPartitioningStrategy(maxComputeSinkConfig); + } else { + return new DefaultPartitioningStrategy(partitionKeyTypeInfo, maxComputeSinkConfig); + } + } + + private void checkPartitionTypePrecondition(TypeInfo typeInfo) { + if (!allowedPartitionKeyTypeInfo.contains(typeInfo)) { + throw new IllegalArgumentException("Partition key type not supported: " + typeInfo.getTypeName()); + } + } + +} diff --git a/src/main/java/com/gotocompany/depot/maxcompute/schema/partition/TimestampPartitioningStrategy.java b/src/main/java/com/gotocompany/depot/maxcompute/schema/partition/TimestampPartitioningStrategy.java new file mode 100644 index 00000000..ddc78710 --- /dev/null +++ b/src/main/java/com/gotocompany/depot/maxcompute/schema/partition/TimestampPartitioningStrategy.java @@ -0,0 +1,53 @@ +package com.gotocompany.depot.maxcompute.schema.partition; + +import com.aliyun.odps.Column; +import com.aliyun.odps.PartitionSpec; +import com.aliyun.odps.type.TypeInfoFactory; +import com.google.protobuf.Message; +import com.gotocompany.depot.config.MaxComputeSinkConfig; +import lombok.RequiredArgsConstructor; + +import java.time.Instant; +import java.time.LocalDateTime; +import java.time.ZoneId; +import java.time.ZoneOffset; + +@RequiredArgsConstructor +public class TimestampPartitioningStrategy implements PartitioningStrategy { + + private final MaxComputeSinkConfig maxComputeSinkConfig; + + @Override + public String getOriginalPartitionColumnName() { + return maxComputeSinkConfig.getTablePartitionKey(); + } + + @Override + public boolean shouldReplaceOriginalColumn() { + return false; + } + + @Override + public Column getPartitionColumn() { + return Column.newBuilder(maxComputeSinkConfig.getTablePartitionColumnName(), TypeInfoFactory.STRING) + .build(); + } + + @Override + public PartitionSpec getPartitionSpec(Object object) { + Message message = (Message) object; + long seconds = (long) message.getField(message.getDescriptorForType().findFieldByName("seconds")); + int nanos = (int) message.getField(message.getDescriptorForType().findFieldByName("nanos")); + return new PartitionSpec(String.format("%s=%d", maxComputeSinkConfig.getTablePartitionColumnName(), getStartOfDayEpoch(seconds, nanos))); + } + + private long getStartOfDayEpoch(long seconds, int nanos) { + Instant instant = Instant.ofEpochSecond(seconds, nanos); + ZoneId zoneId = ZoneId.of(maxComputeSinkConfig.getTablePartitionByTimestampTimezone()); + ZoneOffset zoneOffset = ZoneOffset.of(maxComputeSinkConfig.getTablePartitionByTimestampZoneOffset()); + LocalDateTime localDateTime = LocalDateTime.ofInstant(instant, zoneId); + LocalDateTime startOfDay = localDateTime.toLocalDate().atStartOfDay(); + return startOfDay.toInstant(zoneOffset).getEpochSecond(); + } + +} diff --git a/src/main/java/com/gotocompany/depot/maxcompute/util/MetadataUtil.java b/src/main/java/com/gotocompany/depot/maxcompute/util/MetadataUtil.java new file mode 100644 index 00000000..6053a54a --- /dev/null +++ b/src/main/java/com/gotocompany/depot/maxcompute/util/MetadataUtil.java @@ -0,0 +1,59 @@ +package com.gotocompany.depot.maxcompute.util; + +import com.aliyun.odps.type.StructTypeInfo; +import com.aliyun.odps.type.TypeInfo; +import com.aliyun.odps.type.TypeInfoFactory; +import com.gotocompany.depot.common.TupleString; +import com.gotocompany.depot.config.MaxComputeSinkConfig; + +import java.sql.Timestamp; +import java.util.HashMap; +import java.util.Map; +import java.util.function.Function; +import java.util.stream.Collectors; + +public class MetadataUtil { + + private static final Map METADATA_TYPE_MAP; + private static final Map> METADATA_MAPPER_MAP; + + static { + METADATA_TYPE_MAP = new HashMap<>(); + METADATA_TYPE_MAP.put("integer", TypeInfoFactory.INT); + METADATA_TYPE_MAP.put("long", TypeInfoFactory.BIGINT); + METADATA_TYPE_MAP.put("float", TypeInfoFactory.FLOAT); + METADATA_TYPE_MAP.put("double", TypeInfoFactory.DOUBLE); + METADATA_TYPE_MAP.put("string", TypeInfoFactory.STRING); + METADATA_TYPE_MAP.put("boolean", TypeInfoFactory.BOOLEAN); + METADATA_TYPE_MAP.put("timestamp", TypeInfoFactory.TIMESTAMP); + + METADATA_MAPPER_MAP = new HashMap<>(); + METADATA_MAPPER_MAP.put("integer", obj -> ((Number) obj).intValue()); + METADATA_MAPPER_MAP.put("long", obj -> ((Number) obj).longValue()); + METADATA_MAPPER_MAP.put("float", obj -> ((Number) obj).floatValue()); + METADATA_MAPPER_MAP.put("double", obj -> ((Number) obj).doubleValue()); + METADATA_MAPPER_MAP.put("string", Function.identity()); + METADATA_MAPPER_MAP.put("boolean", Function.identity()); + METADATA_MAPPER_MAP.put("timestamp", obj -> new Timestamp(((Number) obj).longValue())); + } + + public static TypeInfo getMetadataTypeInfo(String type) { + return METADATA_TYPE_MAP.get(type.toLowerCase()); + } + + public static Object getValidMetadataValue(String type, Object value) { + return METADATA_MAPPER_MAP.get(type.toLowerCase()).apply(value); + } + + public static StructTypeInfo getMetadataTypeInfo(MaxComputeSinkConfig maxComputeSinkConfig) { + return TypeInfoFactory.getStructTypeInfo(maxComputeSinkConfig.getMetadataColumnsTypes() + .stream() + .map(TupleString::getFirst) + .collect(Collectors.toList()), + maxComputeSinkConfig.getMetadataColumnsTypes() + .stream() + .map(tuple -> METADATA_TYPE_MAP.get(tuple.getSecond().toLowerCase())) + .collect(Collectors.toList())); + } + +} diff --git a/src/main/java/com/gotocompany/depot/message/proto/ProtoMessageParser.java b/src/main/java/com/gotocompany/depot/message/proto/ProtoMessageParser.java index fdb58072..19fe5644 100644 --- a/src/main/java/com/gotocompany/depot/message/proto/ProtoMessageParser.java +++ b/src/main/java/com/gotocompany/depot/message/proto/ProtoMessageParser.java @@ -114,4 +114,8 @@ public ProtoField getProtoField(String schemaClass) throws IOException { getTypeNameToPackageNameMap(getDescriptorMap())); return protoField; } + + public Descriptors.Descriptor getDescriptor(String schemaClass) { + return stencilClient.get(schemaClass); + } } diff --git a/src/main/proto/test-dse-dev.proto b/src/main/proto/test-dse-dev.proto new file mode 100644 index 00000000..d0aa79b6 --- /dev/null +++ b/src/main/proto/test-dse-dev.proto @@ -0,0 +1,51 @@ +syntax = "proto3"; + +package deduction; + +option java_multiple_files = true; +option java_package = "deduction"; +option java_outer_classname = "Deduction"; + +import "google/protobuf/timestamp.proto"; +import "google/protobuf/struct.proto"; + +message DeductionEvent { + string deduction_id = 1; // Unique identifier for deduction. + string subscription_id = 2; // Subscription id associated to the deduction. + string status = 3; // State of the deduction. + google.protobuf.Timestamp trigger_date_time = 4; // Date time used trigger schedule deductions + google.protobuf.Timestamp scheduled_at = 5; // Timestamp at which deduction was scheduled for deduction. + uint32 attempt_count = 6; // Number of times this deduction processing was attempted. + google.protobuf.Timestamp created_at = 7; // Timestamp at which deduction was created. + google.protobuf.Timestamp updated_at = 8; // Timestamp at which deduction was updated. + google.protobuf.Timestamp published_at = 9; // Timestamp at which deduction was published. + uint32 version = 10; // Version of deduction. + string iteration_id = 11; // Unique identifier for each iteration of deduction associated with a subscription. + uint32 retry_attempt_count = 12; // Number of times deductions have been retried for a particular iteration. +} + +message Error { + string code = 1 [json_name = "code"]; + string entity = 2 [json_name = "entity"]; + string cause = 3 [json_name = "cause"]; + google.protobuf.Struct details = 4 [json_name = "details"]; +} + +message GenericResponse { + string correlation_id = 1; + bool success = 2; + repeated Error errors = 3; +} + +message HttpRequest { + string field1 = 1; + string field2 = 2; + repeated NestedRequest request = 3; + google.protobuf.Timestamp event_timestamp = 4; +} + +message NestedRequest { + string id = 1; + string correlation_id = 2; + int64 count = 3; +} \ No newline at end of file diff --git a/src/test/java/com/gotocompany/depot/maxcompute/BaseTypeInfoConverterTest.java b/src/test/java/com/gotocompany/depot/maxcompute/BaseTypeInfoConverterTest.java deleted file mode 100644 index 74631f43..00000000 --- a/src/test/java/com/gotocompany/depot/maxcompute/BaseTypeInfoConverterTest.java +++ /dev/null @@ -1,45 +0,0 @@ -package com.gotocompany.depot.maxcompute; - -import com.aliyun.odps.type.TypeInfo; -import com.google.protobuf.Descriptors; -import com.gotocompany.depot.TestMaxComputeTypeInfo; -import com.gotocompany.depot.maxcompute.converter.BaseTypeInfoConverter; -import org.junit.Test; -import org.junit.jupiter.api.Assertions; - -public class BaseTypeInfoConverterTest { - - private final Descriptors.Descriptor descriptor = TestMaxComputeTypeInfo.TestRoot.getDescriptor(); - private final BaseTypeInfoConverter baseTypeInfoConverter = new BaseTypeInfoConverter(); - - @Test - public void shouldConvertPayloadToTypeInfo() { - String expectedStringTypeInfoRepresentation = "STRING"; - String expectedMessageTypeRepresentation = "STRUCT,another_inner_list_field:ARRAY>>"; - String expectedRepeatedMessageTypeRepresentation = String.format("ARRAY<%s>", expectedMessageTypeRepresentation); - String expectedTimestampTypeInfoRepresentation = "TIMESTAMP_NTZ"; - String expectedDurationTypeInfoRepresentation = "STRUCT"; - String expectedStructTypeInfoRepresentation = "STRING"; - - TypeInfo stringTypeInfo = baseTypeInfoConverter.convert(descriptor.findFieldByName("string_field")); - TypeInfo messageTypeInfo = baseTypeInfoConverter.convert(descriptor.findFieldByName("inner_field")); - TypeInfo repeatedTypeInfo = baseTypeInfoConverter.convert(descriptor.findFieldByName("inner_list_field")); - TypeInfo timestampTypeInfo = baseTypeInfoConverter.convert(descriptor.findFieldByName("timestamp_field")); - TypeInfo durationTypeInfo = baseTypeInfoConverter.convert(descriptor.findFieldByName("duration_field")); - TypeInfo structTypeInfo = baseTypeInfoConverter.convert(descriptor.findFieldByName("struct_field")); - - Assertions.assertEquals(expectedStringTypeInfoRepresentation, stringTypeInfo.toString()); - Assertions.assertEquals(expectedMessageTypeRepresentation, messageTypeInfo.toString()); - Assertions.assertEquals(expectedRepeatedMessageTypeRepresentation, repeatedTypeInfo.toString()); - Assertions.assertEquals(expectedTimestampTypeInfoRepresentation, timestampTypeInfo.toString()); - Assertions.assertEquals(expectedDurationTypeInfoRepresentation, durationTypeInfo.toString()); - Assertions.assertEquals(expectedStructTypeInfoRepresentation, structTypeInfo.toString()); - } - - @Test(expected = IllegalArgumentException.class) - public void shouldThrowIllegalArgumentExceptionForUnsupportedType() { - Descriptors.FieldDescriptor unsupportedFieldDescriptor = descriptor.findFieldByName("empty_field"); - baseTypeInfoConverter.convert(unsupportedFieldDescriptor); - } - -} diff --git a/src/test/java/com/gotocompany/depot/maxcompute/client/insert/NonPartitionedInsertManagerTest.java b/src/test/java/com/gotocompany/depot/maxcompute/client/insert/NonPartitionedInsertManagerTest.java new file mode 100644 index 00000000..289ea080 --- /dev/null +++ b/src/test/java/com/gotocompany/depot/maxcompute/client/insert/NonPartitionedInsertManagerTest.java @@ -0,0 +1,53 @@ +package com.gotocompany.depot.maxcompute.client.insert; + + +import com.aliyun.odps.tunnel.TableTunnel; +import com.aliyun.odps.tunnel.TunnelException; +import com.gotocompany.depot.config.MaxComputeSinkConfig; +import com.gotocompany.depot.maxcompute.model.RecordWrapper; +import org.junit.Test; +import org.mockito.Mockito; + +import java.io.IOException; +import java.util.Collections; +import java.util.List; + +public class NonPartitionedInsertManagerTest { + + @Test + public void shouldFlushAllTheRecords() throws IOException, TunnelException { + TableTunnel.FlushResult flushResult = Mockito.mock(TableTunnel.FlushResult.class); + Mockito.when(flushResult.getRecordCount()) + .thenReturn(2L); + TableTunnel.StreamRecordPack streamRecordPack = Mockito.mock(TableTunnel.StreamRecordPack.class); + TableTunnel.StreamUploadSession streamUploadSession = Mockito.spy(TableTunnel.StreamUploadSession.class); + Mockito.when(streamRecordPack.flush(Mockito.any(TableTunnel.FlushOption.class))) + .thenReturn(flushResult); + Mockito.when(streamUploadSession.newRecordPack()) + .thenReturn(streamRecordPack); + Mockito.when(streamRecordPack.flush()) + .thenReturn("traceId"); + TableTunnel tableTunnel = Mockito.mock(TableTunnel.class); + TableTunnel.StreamUploadSession.Builder builder = Mockito.mock(TableTunnel.StreamUploadSession.Builder.class); + Mockito.when(tableTunnel.buildStreamUploadSession(Mockito.anyString(), Mockito.anyString())) + .thenReturn(builder); + Mockito.when(builder.build()) + .thenReturn(streamUploadSession); + MaxComputeSinkConfig maxComputeSinkConfig = Mockito.mock(MaxComputeSinkConfig.class); + Mockito.when(maxComputeSinkConfig.getMaxComputeProjectId()) + .thenReturn("project"); + Mockito.when(maxComputeSinkConfig.getMaxComputeTableName()) + .thenReturn("table"); + Mockito.when(maxComputeSinkConfig.getMaxComputeRecordPackFlushTimeout()) + .thenReturn(1000L); + NonPartitionedInsertManager nonPartitionedInsertManager = new NonPartitionedInsertManager(tableTunnel, maxComputeSinkConfig); + List recordWrappers = Collections.singletonList( + Mockito.mock(RecordWrapper.class) + ); + + nonPartitionedInsertManager.insert(recordWrappers); + + Mockito.verify(streamRecordPack, Mockito.times(1)) + .flush(Mockito.any(TableTunnel.FlushOption.class)); + } +} diff --git a/src/test/java/com/gotocompany/depot/maxcompute/client/insert/PartitionedInsertManagerTest.java b/src/test/java/com/gotocompany/depot/maxcompute/client/insert/PartitionedInsertManagerTest.java new file mode 100644 index 00000000..0d37ff34 --- /dev/null +++ b/src/test/java/com/gotocompany/depot/maxcompute/client/insert/PartitionedInsertManagerTest.java @@ -0,0 +1,67 @@ +package com.gotocompany.depot.maxcompute.client.insert; + +import com.aliyun.odps.PartitionSpec; +import com.aliyun.odps.tunnel.TableTunnel; +import com.aliyun.odps.tunnel.TunnelException; +import com.gotocompany.depot.config.MaxComputeSinkConfig; +import com.gotocompany.depot.maxcompute.model.RecordWrapper; +import org.junit.Test; +import org.mockito.Mockito; + +import java.io.IOException; +import java.util.Arrays; +import java.util.List; + +public class PartitionedInsertManagerTest { + + @Test + public void shouldGroupRecordsBasedOnPartitionSpecAndFlushAll() throws IOException, TunnelException { + TableTunnel.FlushResult flushResult = Mockito.mock(TableTunnel.FlushResult.class); + Mockito.when(flushResult.getRecordCount()) + .thenReturn(2L); + TableTunnel.StreamRecordPack streamRecordPack = Mockito.mock(TableTunnel.StreamRecordPack.class); + TableTunnel.StreamUploadSession streamUploadSession = Mockito.spy(TableTunnel.StreamUploadSession.class); + Mockito.when(streamRecordPack.flush(Mockito.any(TableTunnel.FlushOption.class))) + .thenReturn(flushResult); + Mockito.when(streamUploadSession.newRecordPack()) + .thenReturn(streamRecordPack); + Mockito.when(streamRecordPack.flush()) + .thenReturn("traceId"); + TableTunnel tableTunnel = Mockito.mock(TableTunnel.class); + TableTunnel.StreamUploadSession.Builder builder = Mockito.mock(TableTunnel.StreamUploadSession.Builder.class); + Mockito.when(tableTunnel.buildStreamUploadSession(Mockito.anyString(), Mockito.anyString())) + .thenReturn(builder); + Mockito.when(builder.setCreatePartition(Mockito.anyBoolean())) + .thenReturn(builder); + Mockito.when(builder.setPartitionSpec(Mockito.any(PartitionSpec.class))) + .thenReturn(builder); + Mockito.when(builder.build()) + .thenReturn(streamUploadSession); + MaxComputeSinkConfig maxComputeSinkConfig = Mockito.mock(MaxComputeSinkConfig.class); + Mockito.when(maxComputeSinkConfig.getMaxComputeProjectId()) + .thenReturn("project"); + Mockito.when(maxComputeSinkConfig.getMaxComputeTableName()) + .thenReturn("table"); + Mockito.when(maxComputeSinkConfig.getMaxComputeRecordPackFlushTimeout()) + .thenReturn(1000L); + RecordWrapper firstPartitionRecordWrapper = Mockito.mock(RecordWrapper.class); + Mockito.when(firstPartitionRecordWrapper.getPartitionSpec()) + .thenReturn(new PartitionSpec("ds=1")); + RecordWrapper secondPartitionRecordWrapper = Mockito.mock(RecordWrapper.class); + Mockito.when(secondPartitionRecordWrapper.getPartitionSpec()) + .thenReturn(new PartitionSpec("ds=2")); + List recordWrappers = Arrays.asList( + firstPartitionRecordWrapper, + secondPartitionRecordWrapper + ); + PartitionedInsertManager partitionedInsertManager = new PartitionedInsertManager(tableTunnel, maxComputeSinkConfig); + int expectedPartitionFlushInvocation = 2; + + partitionedInsertManager.insert(recordWrappers); + + Mockito.verify(streamRecordPack, Mockito.times(expectedPartitionFlushInvocation)) + .flush(Mockito.any(TableTunnel.FlushOption.class)); + } + + +} diff --git a/src/test/java/com/gotocompany/depot/maxcompute/converter/ConverterOrchestratorTest.java b/src/test/java/com/gotocompany/depot/maxcompute/converter/ConverterOrchestratorTest.java new file mode 100644 index 00000000..87ba35cb --- /dev/null +++ b/src/test/java/com/gotocompany/depot/maxcompute/converter/ConverterOrchestratorTest.java @@ -0,0 +1,107 @@ +package com.gotocompany.depot.maxcompute.converter; + +import com.aliyun.odps.data.SimpleStruct; +import com.aliyun.odps.type.StructTypeInfo; +import com.aliyun.odps.type.TypeInfo; +import com.aliyun.odps.type.TypeInfoFactory; +import com.google.protobuf.Descriptors; +import com.google.protobuf.Duration; +import com.google.protobuf.Message; +import com.google.protobuf.Struct; +import com.google.protobuf.Timestamp; +import com.google.protobuf.Value; + +import com.gotocompany.depot.TestMaxComputeTypeInfo; +import org.junit.Test; +import org.junit.jupiter.api.Assertions; + +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +public class ConverterOrchestratorTest { + + private final Descriptors.Descriptor descriptor = TestMaxComputeTypeInfo.TestRoot.getDescriptor(); + private final ConverterOrchestrator converterOrchestrator = new ConverterOrchestrator(); + + @Test + public void shouldConvertPayloadToTypeInfo() { + String expectedStringTypeInfoRepresentation = "STRING"; + String expectedMessageTypeRepresentation = "STRUCT,another_inner_list_field:ARRAY>>"; + String expectedRepeatedMessageTypeRepresentation = String.format("ARRAY<%s>", expectedMessageTypeRepresentation); + String expectedTimestampTypeInfoRepresentation = "TIMESTAMP"; + String expectedDurationTypeInfoRepresentation = "STRUCT"; + String expectedStructTypeInfoRepresentation = "STRING"; + + TypeInfo stringTypeInfo = converterOrchestrator.convert(descriptor.findFieldByName("string_field")); + TypeInfo messageTypeInfo = converterOrchestrator.convert(descriptor.findFieldByName("inner_field")); + TypeInfo repeatedTypeInfo = converterOrchestrator.convert(descriptor.findFieldByName("inner_list_field")); + TypeInfo timestampTypeInfo = converterOrchestrator.convert(descriptor.findFieldByName("timestamp_field")); + TypeInfo durationTypeInfo = converterOrchestrator.convert(descriptor.findFieldByName("duration_field")); + TypeInfo structTypeInfo = converterOrchestrator.convert(descriptor.findFieldByName("struct_field")); + + Assertions.assertEquals(expectedStringTypeInfoRepresentation, stringTypeInfo.toString()); + Assertions.assertEquals(expectedMessageTypeRepresentation, messageTypeInfo.toString()); + Assertions.assertEquals(expectedRepeatedMessageTypeRepresentation, repeatedTypeInfo.toString()); + Assertions.assertEquals(expectedTimestampTypeInfoRepresentation, timestampTypeInfo.toString()); + Assertions.assertEquals(expectedDurationTypeInfoRepresentation, durationTypeInfo.toString()); + Assertions.assertEquals(expectedStructTypeInfoRepresentation, structTypeInfo.toString()); + } + + @Test(expected = IllegalArgumentException.class) + public void shouldThrowIllegalArgumentExceptionForUnsupportedType() { + Descriptors.FieldDescriptor unsupportedFieldDescriptor = descriptor.findFieldByName("empty_field"); + converterOrchestrator.convert(unsupportedFieldDescriptor); + } + + @Test + public void shouldConvertPayloadToRecord() { + Struct.Builder structBuilder = Struct.newBuilder(); + structBuilder.putFields("intField", Value.newBuilder().setNumberValue(1.0).build()); + structBuilder.putFields("stringField", Value.newBuilder().setStringValue("String").build()); + TestMaxComputeTypeInfo.TestAnotherInner testAnotherInner = TestMaxComputeTypeInfo.TestAnotherInner.newBuilder() + .setStringField("inner_string_field") + .build(); + TestMaxComputeTypeInfo.TestInner testInner = TestMaxComputeTypeInfo.TestInner.newBuilder() + .setAnotherInnerField(testAnotherInner) + .addAllAnotherInnerListField(Collections.singletonList(testAnotherInner)) + .setStringField("string_field") + .build(); + Message messagePayload = TestMaxComputeTypeInfo.TestRoot.newBuilder() + .setStringField("string_field") + .setTimestampField(Timestamp.newBuilder() + .setSeconds(100) + .setNanos(0) + .build()) + .setDurationField(Duration.newBuilder() + .setSeconds(100) + .setNanos(0) + .build()) + .setStructField(structBuilder.build()) + .setInnerField(testInner) + .addAllInnerListField(Collections.singletonList(testInner)) + .build(); + StructTypeInfo messageTypeInfo = TypeInfoFactory.getStructTypeInfo( + Arrays.asList("string_field", "another_inner_field", "another_inner_list_field"), + Arrays.asList(TypeInfoFactory.STRING, TypeInfoFactory.getStructTypeInfo(Collections.singletonList("string_field"), Collections.singletonList(TypeInfoFactory.STRING)), + TypeInfoFactory.getArrayTypeInfo(TypeInfoFactory.getStructTypeInfo(Collections.singletonList("string_field"), Collections.singletonList(TypeInfoFactory.STRING)))) + ); + List messageValues = Arrays.asList("string_field", new SimpleStruct(TypeInfoFactory.getStructTypeInfo(Collections.singletonList("string_field"), Collections.singletonList(TypeInfoFactory.STRING)), Collections.singletonList("inner_string_field")), + Collections.singletonList(new SimpleStruct(TypeInfoFactory.getStructTypeInfo(Collections.singletonList("string_field"), Collections.singletonList(TypeInfoFactory.STRING)), Collections.singletonList("inner_string_field")))); + SimpleStruct expectedMessage = new SimpleStruct(messageTypeInfo, messageValues); + + Object stringRecord = converterOrchestrator.convert(descriptor.findFieldByName("string_field"), messagePayload.getField(descriptor.findFieldByName("string_field"))); + Object messageRecord = converterOrchestrator.convert(descriptor.findFieldByName("inner_field"), messagePayload.getField(descriptor.findFieldByName("inner_field"))); + Object repeatedMessageRecord = converterOrchestrator.convert(descriptor.findFieldByName("inner_list_field"), messagePayload.getField(descriptor.findFieldByName("inner_list_field"))); + Object timestampRecord = converterOrchestrator.convert(descriptor.findFieldByName("timestamp_field"), messagePayload.getField(descriptor.findFieldByName("timestamp_field"))); + Object durationRecord = converterOrchestrator.convert(descriptor.findFieldByName("duration_field"), messagePayload.getField(descriptor.findFieldByName("duration_field"))); + Object structRecord = converterOrchestrator.convert(descriptor.findFieldByName("struct_field"), messagePayload.getField(descriptor.findFieldByName("struct_field"))); + + Assertions.assertEquals("string_field", stringRecord); + Assertions.assertEquals(new java.sql.Timestamp(100 * 1000), timestampRecord); + Assertions.assertEquals(new SimpleStruct(TypeInfoFactory.getStructTypeInfo(Arrays.asList("seconds", "nanos"), Arrays.asList(TypeInfoFactory.BIGINT, TypeInfoFactory.INT)), Arrays.asList(100L, 0)), durationRecord); + Assertions.assertEquals(expectedMessage, messageRecord); + Assertions.assertEquals(Collections.singletonList(expectedMessage), repeatedMessageRecord); + Assertions.assertEquals("{\"intField\":1.0,\"stringField\":\"String\"}", structRecord); + } +} diff --git a/src/test/java/com/gotocompany/depot/maxcompute/converter/payload/DurationPayloadConverterTest.java b/src/test/java/com/gotocompany/depot/maxcompute/converter/payload/DurationPayloadConverterTest.java new file mode 100644 index 00000000..f274a43a --- /dev/null +++ b/src/test/java/com/gotocompany/depot/maxcompute/converter/payload/DurationPayloadConverterTest.java @@ -0,0 +1,74 @@ +package com.gotocompany.depot.maxcompute.converter.payload; + +import com.aliyun.odps.type.TypeInfo; +import com.aliyun.odps.type.TypeInfoFactory; +import com.google.protobuf.Descriptors; +import com.google.protobuf.Duration; +import com.gotocompany.depot.TestMaxComputeTypeInfo; +import com.gotocompany.depot.maxcompute.converter.type.DurationTypeInfoConverter; +import org.assertj.core.api.Assertions; +import org.junit.Test; + +import java.util.Arrays; +import java.util.List; + +public class DurationPayloadConverterTest { + + private final DurationTypeInfoConverter durationTypeInfoConverter = new DurationTypeInfoConverter(); + private final DurationPayloadConverter durationPayloadConverter = new DurationPayloadConverter(durationTypeInfoConverter); + private final Descriptors.Descriptor descriptor = TestMaxComputeTypeInfo.TestRoot.getDescriptor(); + private final Descriptors.Descriptor repeatedDescriptor = TestMaxComputeTypeInfo.TestRootRepeated.getDescriptor(); + + @Test + public void shouldConvertDurationPayloadToStruct() { + Duration duration = Duration.newBuilder() + .setSeconds(1) + .setNanos(1) + .build(); + TestMaxComputeTypeInfo.TestRoot message = TestMaxComputeTypeInfo.TestRoot.newBuilder() + .setDurationField(duration) + .build(); + List expectedFieldNames = Arrays.asList("seconds", "nanos"); + List expectedTypeInfos = Arrays.asList(TypeInfoFactory.BIGINT, TypeInfoFactory.INT); + List values = Arrays.asList(1L, 1); + Object result = durationPayloadConverter.convert(descriptor.getFields().get(5), message.getField(descriptor.getFields().get(5))); + + Assertions.assertThat(result) + .isInstanceOf(com.aliyun.odps.data.SimpleStruct.class) + .extracting("typeInfo", "values") + .containsExactly(TypeInfoFactory.getStructTypeInfo(expectedFieldNames, expectedTypeInfos), values); + } + + @Test + public void shouldConvertRepeatedDurationPayloadToStructList() { + Duration duration1 = Duration.newBuilder() + .setSeconds(1) + .setNanos(1) + .build(); + Duration duration2 = Duration.newBuilder() + .setSeconds(2) + .setNanos(2) + .build(); + TestMaxComputeTypeInfo.TestRootRepeated message = TestMaxComputeTypeInfo.TestRootRepeated.newBuilder() + .addAllDurationFields(Arrays.asList(duration1, duration2)) + .build(); + List expectedFieldNames = Arrays.asList("seconds", "nanos"); + List expectedTypeInfos = Arrays.asList(TypeInfoFactory.BIGINT, TypeInfoFactory.INT); + List values1 = Arrays.asList(1L, 1); + List values2 = Arrays.asList(2L, 2); + + Object result = durationPayloadConverter.convert(repeatedDescriptor.getFields().get(5), message.getField(repeatedDescriptor.getFields().get(5))); + + Assertions.assertThat(result) + .isInstanceOf(List.class); + Assertions.assertThat((List) result) + .hasSize(2) + .allMatch(element -> element instanceof com.aliyun.odps.data.SimpleStruct) + .extracting("typeInfo", "values") + .containsExactly( + Assertions.tuple(TypeInfoFactory.getStructTypeInfo(expectedFieldNames, expectedTypeInfos), values1), + Assertions.tuple(TypeInfoFactory.getStructTypeInfo(expectedFieldNames, expectedTypeInfos), values2) + ); + } + +} diff --git a/src/test/java/com/gotocompany/depot/maxcompute/converter/payload/MessagePayloadConverterTest.java b/src/test/java/com/gotocompany/depot/maxcompute/converter/payload/MessagePayloadConverterTest.java new file mode 100644 index 00000000..17dabe67 --- /dev/null +++ b/src/test/java/com/gotocompany/depot/maxcompute/converter/payload/MessagePayloadConverterTest.java @@ -0,0 +1,117 @@ +package com.gotocompany.depot.maxcompute.converter.payload; + +import com.aliyun.odps.data.SimpleStruct; +import com.aliyun.odps.type.StructTypeInfo; +import com.aliyun.odps.type.TypeInfoFactory; +import com.google.protobuf.Descriptors; +import com.google.protobuf.Duration; +import com.google.protobuf.Timestamp; +import com.gotocompany.depot.TestMaxComputeTypeInfo; +import com.gotocompany.depot.maxcompute.converter.type.DurationTypeInfoConverter; +import com.gotocompany.depot.maxcompute.converter.type.MessageTypeInfoConverter; +import com.gotocompany.depot.maxcompute.converter.type.PrimitiveTypeInfoConverter; +import com.gotocompany.depot.maxcompute.converter.type.StructTypeInfoConverter; +import com.gotocompany.depot.maxcompute.converter.type.TimestampTypeInfoConverter; +import com.gotocompany.depot.maxcompute.converter.type.TypeInfoConverter; +import org.assertj.core.api.Assertions; +import org.junit.Before; +import org.junit.Test; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +public class MessagePayloadConverterTest { + + private MessagePayloadConverter messagePayloadConverter; + private Descriptors.Descriptor descriptor = TestMaxComputeTypeInfo.TestBuyerWrapper.getDescriptor(); + + @Before + public void init() { + MessageTypeInfoConverter messageTypeInfoConverter = initializeTypeInfoConverters(); + List payloadConverters = initializePayloadConverter(messageTypeInfoConverter); + messagePayloadConverter = new MessagePayloadConverter(messageTypeInfoConverter, payloadConverters); + } + + @Test + public void shouldConvertToStruct() { + Timestamp timestamp = Timestamp.newBuilder() + .setSeconds(1704067200) + .setNanos(0) + .build(); + Duration duration = Duration.newBuilder() + .setSeconds(100) + .build(); + TestMaxComputeTypeInfo.TestBuyer message = TestMaxComputeTypeInfo.TestBuyer.newBuilder() + .setName("buyerName") + .setCart(TestMaxComputeTypeInfo.TestCart.newBuilder() + .setCartId("cart_id") + .addAllItems(Arrays.asList( + TestMaxComputeTypeInfo.TestItem.newBuilder() + .setId("item1") + .setQuantity(1) + .build(), + TestMaxComputeTypeInfo.TestItem.newBuilder() + .setId("item2") + .setQuantity(2) + .build())) + .setCreatedAt(timestamp) + .setCartAge(duration) + ) + .setCreatedAt(timestamp) + .build(); + TestMaxComputeTypeInfo.TestBuyerWrapper wrapper = TestMaxComputeTypeInfo.TestBuyerWrapper + .newBuilder() + .setBuyer(message) + .build(); + StructTypeInfo durationTypeInfo = TypeInfoFactory.getStructTypeInfo(Arrays.asList("seconds", "nanos"), Arrays.asList(TypeInfoFactory.BIGINT, TypeInfoFactory.INT)); + StructTypeInfo itemTypeInfo = TypeInfoFactory.getStructTypeInfo(Arrays.asList("id", "quantity"), Arrays.asList(TypeInfoFactory.STRING, TypeInfoFactory.INT)); + StructTypeInfo cartTypeInfo = TypeInfoFactory.getStructTypeInfo( + Arrays.asList("cart_id", "items", "created_at", "cart_age"), + Arrays.asList(TypeInfoFactory.STRING, TypeInfoFactory.getArrayTypeInfo(itemTypeInfo), TypeInfoFactory.TIMESTAMP, durationTypeInfo) + ); + StructTypeInfo expectedStructTypeInfo = TypeInfoFactory.getStructTypeInfo( + Arrays.asList("name", "cart", "created_at"), + Arrays.asList(TypeInfoFactory.STRING, cartTypeInfo, TypeInfoFactory.TIMESTAMP) + ); + List expectedStructValues = Arrays.asList( + "buyerName", + new SimpleStruct(cartTypeInfo, + Arrays.asList( + "cart_id", + Arrays.asList(new SimpleStruct(itemTypeInfo, Arrays.asList("item1", 1)), new SimpleStruct(itemTypeInfo, Arrays.asList("item2", 2))), + new java.sql.Timestamp(timestamp.getSeconds() * 1000), + new SimpleStruct(durationTypeInfo, Arrays.asList(duration.getSeconds(), duration.getNanos())))), + new java.sql.Timestamp(timestamp.getSeconds() * 1000) + ); + + Object object = messagePayloadConverter.convert(descriptor.getFields().get(0), wrapper.getField(descriptor.getFields().get(0))); + + Assertions.assertThat(object) + .extracting("typeInfo", "values") + .containsExactly(expectedStructTypeInfo, expectedStructValues); + } + + + private MessageTypeInfoConverter initializeTypeInfoConverters() { + List converters = new ArrayList<>(); + converters.add(new PrimitiveTypeInfoConverter()); + converters.add(new DurationTypeInfoConverter()); + converters.add(new StructTypeInfoConverter()); + converters.add(new TimestampTypeInfoConverter()); + MessageTypeInfoConverter messageTypeInfoConverter = new MessageTypeInfoConverter(converters); + converters.add(messageTypeInfoConverter); + return messageTypeInfoConverter; + } + + private List initializePayloadConverter(MessageTypeInfoConverter messageTypeInfoConverter) { + List payloadConverters = new ArrayList<>(); + payloadConverters.add(new DurationPayloadConverter(new DurationTypeInfoConverter())); + payloadConverters.add(new PrimitivePayloadConverter(new PrimitiveTypeInfoConverter())); + payloadConverters.add(new StructPayloadConverter(new StructTypeInfoConverter())); + payloadConverters.add(new TimestampPayloadConverter(new TimestampTypeInfoConverter())); + payloadConverters.add(new MessagePayloadConverter(messageTypeInfoConverter, payloadConverters)); + return payloadConverters; + } + +} diff --git a/src/test/java/com/gotocompany/depot/maxcompute/converter/payload/PrimitivePayloadConverterTest.java b/src/test/java/com/gotocompany/depot/maxcompute/converter/payload/PrimitivePayloadConverterTest.java new file mode 100644 index 00000000..c4151e6b --- /dev/null +++ b/src/test/java/com/gotocompany/depot/maxcompute/converter/payload/PrimitivePayloadConverterTest.java @@ -0,0 +1,438 @@ +package com.gotocompany.depot.maxcompute.converter.payload; + +import com.google.protobuf.ByteString; +import com.google.protobuf.Descriptors; +import com.google.protobuf.Message; +import com.gotocompany.depot.TestMaxComputeTypeInfo; +import com.gotocompany.depot.maxcompute.converter.type.PrimitiveTypeInfoConverter; +import org.junit.Test; +import org.junit.jupiter.api.Assertions; + +import java.nio.charset.StandardCharsets; +import java.util.Collections; +import java.util.List; + +public class PrimitivePayloadConverterTest { + + private final PrimitiveTypeInfoConverter primitiveTypeInfoConverter = new PrimitiveTypeInfoConverter(); + private final PrimitivePayloadConverter primitivePayloadConverter = new PrimitivePayloadConverter(primitiveTypeInfoConverter); + private final Descriptors.Descriptor descriptor = TestMaxComputeTypeInfo.TestFields.getDescriptor(); + private final Descriptors.Descriptor descriptorRepeated = TestMaxComputeTypeInfo.TestFieldsRepeated.getDescriptor(); + + @Test + public void shouldReturnObjectAsItIsWhenTypeIsByteArray() { + byte[] bytes = "bytes".getBytes(StandardCharsets.UTF_8); + Message message = TestMaxComputeTypeInfo.TestFields.newBuilder() + .setBytesField(ByteString.copyFrom(bytes)) + .build(); + + Object result = primitivePayloadConverter.convert(descriptor.getFields().get(0), message.getField(descriptor.getFields().get(0))); + + Assertions.assertTrue(result instanceof byte[]); + Assertions.assertArrayEquals(bytes, (byte[]) result); + } + + @Test + public void shouldReturnObjectAsItIsWhenTypeIsString() { + String value = "test"; + Message message = TestMaxComputeTypeInfo.TestFields.newBuilder() + .setStringField(value) + .build(); + + Object result = primitivePayloadConverter.convert(descriptor.getFields().get(1), message.getField(descriptor.getFields().get(1))); + + Assertions.assertTrue(result instanceof String); + Assertions.assertEquals(value, result); + } + + @Test + public void shouldReturnObjectAsItIsWhenTypeIsEnum() { + TestMaxComputeTypeInfo.TestEnum value = TestMaxComputeTypeInfo.TestEnum.TEST_1; + Message message = TestMaxComputeTypeInfo.TestFields.newBuilder() + .setEnumField(value) + .build(); + + Object result = primitivePayloadConverter.convert(descriptor.getFields().get(2), message.getField(descriptor.getFields().get(2))); + + Assertions.assertTrue(result instanceof String); + Assertions.assertEquals(value.name(), result); + } + + @Test + public void shouldReturnObjectAsItIsWhenTypeIsDouble() { + double value = 1.23; + Message message = TestMaxComputeTypeInfo.TestFields.newBuilder() + .setDoubleField(value) + .build(); + + Object result = primitivePayloadConverter.convert(descriptor.getFields().get(3), message.getField(descriptor.getFields().get(3))); + + Assertions.assertTrue(result instanceof Double); + Assertions.assertEquals(value, result); + } + + @Test + public void shouldReturnObjectAsItIsWhenTypeIsFloat() { + float value = 1.23f; + Message message = TestMaxComputeTypeInfo.TestFields.newBuilder() + .setFloatField(value) + .build(); + + Object result = primitivePayloadConverter.convert(descriptor.getFields().get(4), message.getField(descriptor.getFields().get(4))); + + Assertions.assertTrue(result instanceof Float); + Assertions.assertEquals(value, result); + } + + @Test + public void shouldReturnObjectAsItIsWhenTypeIsBoolean() { + boolean value = true; + Message message = TestMaxComputeTypeInfo.TestFields.newBuilder() + .setBoolField(value) + .build(); + + Object result = primitivePayloadConverter.convert(descriptor.getFields().get(5), message.getField(descriptor.getFields().get(5))); + + Assertions.assertTrue(result instanceof Boolean); + Assertions.assertEquals(value, result); + } + + @Test + public void shouldReturnObjectAsItIsWhenTypeIsInt64() { + long value = 123L; + Message message = TestMaxComputeTypeInfo.TestFields.newBuilder() + .setInt64Field(value) + .build(); + + Object result = primitivePayloadConverter.convert(descriptor.getFields().get(6), message.getField(descriptor.getFields().get(6))); + + Assertions.assertTrue(result instanceof Long); + Assertions.assertEquals(value, result); + } + + @Test + public void shouldReturnObjectAsItIsWhenTypeIsUInt64() { + long value = 123L; + Message message = TestMaxComputeTypeInfo.TestFields.newBuilder() + .setUint64Field(value) + .build(); + + Object result = primitivePayloadConverter.convert(descriptor.getFields().get(7), message.getField(descriptor.getFields().get(7))); + + Assertions.assertTrue(result instanceof Long); + Assertions.assertEquals(value, result); + } + + @Test + public void shouldReturnObjectAsItIsWhenTypeIsInt32() { + int value = 123; + Message message = TestMaxComputeTypeInfo.TestFields.newBuilder() + .setInt32Field(value) + .build(); + + Object result = primitivePayloadConverter.convert(descriptor.getFields().get(8), message.getField(descriptor.getFields().get(8))); + + Assertions.assertTrue(result instanceof Integer); + Assertions.assertEquals(value, result); + } + + @Test + public void shouldReturnObjectAsItIsWhenTypeIsUInt32() { + int value = 123; + Message message = TestMaxComputeTypeInfo.TestFields.newBuilder() + .setUint32Field(value) + .build(); + + Object result = primitivePayloadConverter.convert(descriptor.getFields().get(9), message.getField(descriptor.getFields().get(9))); + + Assertions.assertTrue(result instanceof Integer); + Assertions.assertEquals(value, result); + } + + @Test + public void shouldReturnObjectAsItIsWhenTypeIsFixed64() { + long value = 123L; + Message message = TestMaxComputeTypeInfo.TestFields.newBuilder() + .setFixed64Field(value) + .build(); + + Object result = primitivePayloadConverter.convert(descriptor.getFields().get(10), message.getField(descriptor.getFields().get(10))); + + Assertions.assertTrue(result instanceof Long); + Assertions.assertEquals(value, result); + } + + @Test + public void shouldReturnObjectAsItIsWhenTypeIsFixed32() { + int value = 123; + Message message = TestMaxComputeTypeInfo.TestFields.newBuilder() + .setFixed32Field(value) + .build(); + + Object result = primitivePayloadConverter.convert(descriptor.getFields().get(11), message.getField(descriptor.getFields().get(11))); + + Assertions.assertTrue(result instanceof Integer); + Assertions.assertEquals(value, result); + } + + @Test + public void shouldReturnObjectAsItIsWhenTypeIsSFixed32() { + int value = 123; + Message message = TestMaxComputeTypeInfo.TestFields.newBuilder() + .setSfixed32Field(value) + .build(); + + Object result = primitivePayloadConverter.convert(descriptor.getFields().get(12), message.getField(descriptor.getFields().get(12))); + + Assertions.assertTrue(result instanceof Integer); + Assertions.assertEquals(value, result); + } + + @Test + public void shouldReturnObjectAsItIsWhenTypeIsSFixed64() { + long value = 123L; + Message message = TestMaxComputeTypeInfo.TestFields.newBuilder() + .setSfixed64Field(value) + .build(); + + Object result = primitivePayloadConverter.convert(descriptor.getFields().get(13), message.getField(descriptor.getFields().get(13))); + + Assertions.assertTrue(result instanceof Long); + Assertions.assertEquals(value, result); + } + + @Test + public void shouldReturnObjectAsItIsWhenTypeIsSInt32() { + int value = 123; + Message message = TestMaxComputeTypeInfo.TestFields.newBuilder() + .setSint32Field(value) + .build(); + + Object result = primitivePayloadConverter.convert(descriptor.getFields().get(14), message.getField(descriptor.getFields().get(14))); + + Assertions.assertTrue(result instanceof Integer); + Assertions.assertEquals(value, result); + } + + @Test + public void shouldReturnObjectAsItIsWhenTypeIsSInt64() { + long value = 123L; + Message message = TestMaxComputeTypeInfo.TestFields.newBuilder() + .setSint64Field(value) + .build(); + + Object result = primitivePayloadConverter.convert(descriptor.getFields().get(15), message.getField(descriptor.getFields().get(15))); + + Assertions.assertTrue(result instanceof Long); + Assertions.assertEquals(value, result); + } + + @Test + public void shouldReturnListObjectAsItIsWhenTypeIsByteArrayList() { + byte[] bytes = "bytes".getBytes(StandardCharsets.UTF_8); + Message message = TestMaxComputeTypeInfo.TestFieldsRepeated.newBuilder() + .addAllBytesFields(Collections.singletonList(ByteString.copyFrom(bytes))) + .build(); + + Object result = primitivePayloadConverter.convert(descriptorRepeated.getFields().get(0), message.getField(descriptorRepeated.getFields().get(0))); + + Assertions.assertTrue(result instanceof List); + Assertions.assertTrue(((List) result).stream().allMatch(element -> element instanceof byte[])); + } + + @Test + public void shouldReturnListObjectAsItIsWhenTypeIsStringList() { + String value = "test"; + Message message = TestMaxComputeTypeInfo.TestFieldsRepeated.newBuilder() + .addAllStringFields(Collections.singletonList(value)) + .build(); + + Object result = primitivePayloadConverter.convert(descriptorRepeated.getFields().get(1), message.getField(descriptorRepeated.getFields().get(1))); + + Assertions.assertTrue(result instanceof List); + Assertions.assertTrue(((List) result).stream().allMatch(element -> element instanceof String)); + } + + @Test + public void shouldReturnListObjectAsItIsWhenTypeIsEnumList() { + TestMaxComputeTypeInfo.TestEnum value = TestMaxComputeTypeInfo.TestEnum.TEST_1; + Message message = TestMaxComputeTypeInfo.TestFieldsRepeated.newBuilder() + .addAllEnumFields(Collections.singletonList(value)) + .build(); + + Object result = primitivePayloadConverter.convert(descriptorRepeated.getFields().get(2), message.getField(descriptorRepeated.getFields().get(2))); + + Assertions.assertTrue(result instanceof List); + Assertions.assertTrue(((List) result).stream().allMatch(element -> element instanceof String)); + } + + @Test + public void shouldReturnListObjectAsItIsWhenTypeIsDoubleList() { + double value = 1.23; + Message message = TestMaxComputeTypeInfo.TestFieldsRepeated.newBuilder() + .addAllDoubleFields(Collections.singletonList(value)) + .build(); + + Object result = primitivePayloadConverter.convert(descriptorRepeated.getFields().get(3), message.getField(descriptorRepeated.getFields().get(3))); + + Assertions.assertTrue(result instanceof List); + Assertions.assertTrue(((List) result).stream().allMatch(element -> element instanceof Double)); + } + + @Test + public void shouldReturnListObjectAsItIsWhenTypeIsFloatList() { + float value = 1.23f; + Message message = TestMaxComputeTypeInfo.TestFieldsRepeated.newBuilder() + .addAllFloatFields(Collections.singletonList(value)) + .build(); + + Object result = primitivePayloadConverter.convert(descriptorRepeated.getFields().get(4), message.getField(descriptorRepeated.getFields().get(4))); + + Assertions.assertTrue(result instanceof List); + Assertions.assertTrue(((List) result).stream().allMatch(element -> element instanceof Float)); + } + + @Test + public void shouldReturnListObjectAsItIsWhenTypeIsBooleanList() { + boolean value = true; + Message message = TestMaxComputeTypeInfo.TestFieldsRepeated.newBuilder() + .addAllBoolFields(Collections.singletonList(value)) + .build(); + + Object result = primitivePayloadConverter.convert(descriptorRepeated.getFields().get(5), message.getField(descriptorRepeated.getFields().get(5))); + + Assertions.assertTrue(result instanceof List); + Assertions.assertTrue(((List) result).stream().allMatch(element -> element instanceof Boolean)); + } + + @Test + public void shouldReturnListObjectAsItIsWhenTypeIsInt64List() { + long value = 123L; + Message message = TestMaxComputeTypeInfo.TestFieldsRepeated.newBuilder() + .addAllInt64Fields(Collections.singletonList(value)) + .build(); + + Object result = primitivePayloadConverter.convert(descriptorRepeated.getFields().get(6), message.getField(descriptorRepeated.getFields().get(6))); + + Assertions.assertTrue(result instanceof List); + Assertions.assertTrue(((List) result).stream().allMatch(element -> element instanceof Long)); + } + + @Test + public void shouldReturnListObjectAsItIsWhenTypeIsUInt64List() { + long value = 123L; + Message message = TestMaxComputeTypeInfo.TestFieldsRepeated.newBuilder() + .addAllUint64Fields(Collections.singletonList(value)) + .build(); + + Object result = primitivePayloadConverter.convert(descriptorRepeated.getFields().get(7), message.getField(descriptorRepeated.getFields().get(7))); + + Assertions.assertTrue(result instanceof List); + Assertions.assertTrue(((List) result).stream().allMatch(element -> element instanceof Long)); + } + + @Test + public void shouldReturnListObjectAsItIsWhenTypeIsInt32List() { + int value = 123; + Message message = TestMaxComputeTypeInfo.TestFieldsRepeated.newBuilder() + .addAllInt32Fields(Collections.singletonList(value)) + .build(); + + Object result = primitivePayloadConverter.convert(descriptorRepeated.getFields().get(8), message.getField(descriptorRepeated.getFields().get(8))); + + Assertions.assertTrue(result instanceof List); + Assertions.assertTrue(((List) result).stream().allMatch(element -> element instanceof Integer)); + } + + @Test + public void shouldReturnListObjectAsItIsWhenTypeIsUInt32List() { + int value = 123; + Message message = TestMaxComputeTypeInfo.TestFieldsRepeated.newBuilder() + .addAllUint32Fields(Collections.singletonList(value)) + .build(); + + Object result = primitivePayloadConverter.convert(descriptorRepeated.getFields().get(9), message.getField(descriptorRepeated.getFields().get(9))); + + Assertions.assertTrue(result instanceof List); + Assertions.assertTrue(((List) result).stream().allMatch(element -> element instanceof Integer)); + } + + @Test + public void shouldReturnListObjectAsItIsWhenTypeIsFixed64List() { + long value = 123L; + Message message = TestMaxComputeTypeInfo.TestFieldsRepeated.newBuilder() + .addAllFixed64Fields(Collections.singletonList(value)) + .build(); + + Object result = primitivePayloadConverter.convert(descriptorRepeated.getFields().get(10), message.getField(descriptorRepeated.getFields().get(10))); + + Assertions.assertTrue(result instanceof List); + Assertions.assertTrue(((List) result).stream().allMatch(element -> element instanceof Long)); + } + + @Test + public void shouldReturnListObjectAsItIsWhenTypeIsFixed32List() { + int value = 123; + Message message = TestMaxComputeTypeInfo.TestFieldsRepeated.newBuilder() + .addAllFixed32Fields(Collections.singletonList(value)) + .build(); + + Object result = primitivePayloadConverter.convert(descriptorRepeated.getFields().get(11), message.getField(descriptorRepeated.getFields().get(11))); + + Assertions.assertTrue(result instanceof List); + Assertions.assertTrue(((List) result).stream().allMatch(element -> element instanceof Integer)); + } + + @Test + public void shouldReturnListObjectAsItIsWhenTypeIsSFixed32List() { + int value = 123; + Message message = TestMaxComputeTypeInfo.TestFieldsRepeated.newBuilder() + .addAllSfixed32Fields(Collections.singletonList(value)) + .build(); + + Object result = primitivePayloadConverter.convert(descriptorRepeated.getFields().get(12), message.getField(descriptorRepeated.getFields().get(12))); + + Assertions.assertTrue(result instanceof List); + Assertions.assertTrue(((List) result).stream().allMatch(element -> element instanceof Integer)); + } + + @Test + public void shouldReturnListObjectAsItIsWhenTypeIsSFixed64List() { + long value = 123L; + Message message = TestMaxComputeTypeInfo.TestFieldsRepeated.newBuilder() + .addAllSfixed64Fields(Collections.singletonList(value)) + .build(); + + Object result = primitivePayloadConverter.convert(descriptorRepeated.getFields().get(13), message.getField(descriptorRepeated.getFields().get(13))); + + Assertions.assertTrue(result instanceof List); + Assertions.assertTrue(((List) result).stream().allMatch(element -> element instanceof Long)); + } + + @Test + public void shouldReturnListObjectAsItIsWhenTypeIsSInt32List() { + int value = 123; + Message message = TestMaxComputeTypeInfo.TestFieldsRepeated.newBuilder() + .addAllSint32Fields(Collections.singletonList(value)) + .build(); + + Object result = primitivePayloadConverter.convert(descriptorRepeated.getFields().get(14), message.getField(descriptorRepeated.getFields().get(14))); + + Assertions.assertTrue(result instanceof List); + Assertions.assertTrue(((List) result).stream().allMatch(element -> element instanceof Integer)); + } + + @Test + public void shouldReturnListObjectAsItIsWhenTypeIsSInt64List() { + long value = 123L; + Message message = TestMaxComputeTypeInfo.TestFieldsRepeated.newBuilder() + .addAllSint64Fields(Collections.singletonList(value)) + .build(); + + Object result = primitivePayloadConverter.convert(descriptorRepeated.getFields().get(15), message.getField(descriptorRepeated.getFields().get(15))); + + Assertions.assertTrue(result instanceof List); + Assertions.assertTrue(((List) result).stream().allMatch(element -> element instanceof Long)); + } + +} diff --git a/src/test/java/com/gotocompany/depot/maxcompute/converter/payload/StructPayloadConverterTest.java b/src/test/java/com/gotocompany/depot/maxcompute/converter/payload/StructPayloadConverterTest.java new file mode 100644 index 00000000..94a68d77 --- /dev/null +++ b/src/test/java/com/gotocompany/depot/maxcompute/converter/payload/StructPayloadConverterTest.java @@ -0,0 +1,58 @@ +package com.gotocompany.depot.maxcompute.converter.payload; + +import com.google.protobuf.Descriptors; +import com.google.protobuf.Message; +import com.google.protobuf.Struct; +import com.google.protobuf.Value; +import com.gotocompany.depot.TestMaxComputeTypeInfo; +import com.gotocompany.depot.maxcompute.converter.type.StructTypeInfoConverter; +import org.junit.Test; +import org.junit.jupiter.api.Assertions; + +import java.util.ArrayList; +import java.util.List; + +public class StructPayloadConverterTest { + + private final StructTypeInfoConverter structTypeInfoConverter = new StructTypeInfoConverter(); + private final StructPayloadConverter structPayloadConverter = new StructPayloadConverter(structTypeInfoConverter); + private final Descriptors.Descriptor descriptor = TestMaxComputeTypeInfo.TestRoot.getDescriptor(); + private final Descriptors.Descriptor repeatedDescriptor = TestMaxComputeTypeInfo.TestRootRepeated.getDescriptor(); + + @Test + public void shouldConvertStructPayloadToJsonString() { + Struct.Builder structBuilder = Struct.newBuilder(); + structBuilder.putFields("intField", Value.newBuilder().setNumberValue(1.0).build()); + structBuilder.putFields("stringField", Value.newBuilder().setStringValue("String").build()); + Message message = TestMaxComputeTypeInfo.TestRoot.newBuilder() + .setStructField(structBuilder.build()) + .build(); + String expected = "{\"intField\":1.0,\"stringField\":\"String\"}"; + + Object result = structPayloadConverter.convert(descriptor.getFields().get(4), message.getField(descriptor.getFields().get(4))); + + Assertions.assertTrue(result instanceof String); + Assertions.assertEquals(expected, result); + } + + @Test + public void shouldConvertRepeatedStructPayloadToJsonString() { + Struct.Builder structBuilder = Struct.newBuilder(); + structBuilder.putFields("intField", Value.newBuilder().setNumberValue(1.0).build()); + structBuilder.putFields("stringField", Value.newBuilder().setStringValue("String").build()); + List structs = new ArrayList<>(); + structs.add(structBuilder.build()); + structs.add(structBuilder.build()); + Message message = TestMaxComputeTypeInfo.TestRootRepeated.newBuilder() + .addAllStructFields(structs) + .build(); + String expected = "[{\"intField\":1.0,\"stringField\":\"String\"}, {\"intField\":1.0,\"stringField\":\"String\"}]"; + + Object result = structPayloadConverter.convert(repeatedDescriptor.getFields().get(4), message.getField(repeatedDescriptor.getFields().get(4))); + + Assertions.assertTrue(result instanceof List); + Assertions.assertTrue(((List) result).stream().allMatch(e -> e instanceof String)); + Assertions.assertEquals(expected, result.toString()); + } + +} diff --git a/src/test/java/com/gotocompany/depot/maxcompute/converter/payload/TimestampPayloadConverterTest.java b/src/test/java/com/gotocompany/depot/maxcompute/converter/payload/TimestampPayloadConverterTest.java new file mode 100644 index 00000000..c164eefe --- /dev/null +++ b/src/test/java/com/gotocompany/depot/maxcompute/converter/payload/TimestampPayloadConverterTest.java @@ -0,0 +1,66 @@ +package com.gotocompany.depot.maxcompute.converter.payload; + +import com.google.protobuf.Descriptors; +import com.google.protobuf.Timestamp; +import com.gotocompany.depot.TestMaxComputeTypeInfo; +import com.gotocompany.depot.maxcompute.converter.type.TimestampTypeInfoConverter; +import org.assertj.core.api.Assertions; +import org.junit.Test; + +import java.util.Arrays; +import java.util.List; + +public class TimestampPayloadConverterTest { + + private final TimestampTypeInfoConverter timestampTypeInfoConverter = new TimestampTypeInfoConverter(); + private final TimestampPayloadConverter timestampPayloadConverter = new TimestampPayloadConverter(timestampTypeInfoConverter); + private final Descriptors.Descriptor descriptor = TestMaxComputeTypeInfo.TestRoot.getDescriptor(); + private final Descriptors.Descriptor repeatedDescriptor = TestMaxComputeTypeInfo.TestRootRepeated.getDescriptor(); + + + @Test + public void shouldConvertToTimestampNtz() { + Timestamp timestamp = Timestamp.newBuilder() + .setSeconds(2500) + .setNanos(100) + .build(); + TestMaxComputeTypeInfo.TestRoot message = TestMaxComputeTypeInfo.TestRoot.newBuilder() + .setTimestampField(timestamp) + .build(); + java.sql.Timestamp expectedTimestamp = new java.sql.Timestamp(timestamp.getSeconds() * 1000); + expectedTimestamp.setNanos(timestamp.getNanos()); + + Object result = timestampPayloadConverter.convertSingular(descriptor.getFields().get(3), message.getField(descriptor.getFields().get(3))); + + Assertions.assertThat(result) + .isEqualTo(expectedTimestamp); + } + + @Test + public void shouldConvertRepeatedTimestampPayloadToTimestampList() { + Timestamp timestamp1 = Timestamp.newBuilder() + .setSeconds(2500) + .setNanos(100) + .build(); + Timestamp timestamp2 = Timestamp.newBuilder() + .setSeconds(3600) + .setNanos(200) + .build(); + TestMaxComputeTypeInfo.TestRootRepeated message = TestMaxComputeTypeInfo.TestRootRepeated.newBuilder() + .addAllTimestampFields(Arrays.asList(timestamp1, timestamp2)) + .build(); + java.sql.Timestamp expectedTimestamp1 = new java.sql.Timestamp(timestamp1.getSeconds() * 1000); + expectedTimestamp1.setNanos(timestamp1.getNanos()); + java.sql.Timestamp expectedTimestamp2 = new java.sql.Timestamp(timestamp2.getSeconds() * 1000); + expectedTimestamp2.setNanos(timestamp2.getNanos()); + + Object result = timestampPayloadConverter.convert(repeatedDescriptor.getFields().get(3), message.getField(repeatedDescriptor.getFields().get(3))); + + Assertions.assertThat(result) + .isInstanceOf(List.class); + Assertions.assertThat(((List) result).stream().map(java.sql.Timestamp.class::cast)) + .hasSize(2) + .containsExactly(expectedTimestamp1, expectedTimestamp2); + } + +} diff --git a/src/test/java/com/gotocompany/depot/maxcompute/converter/record/RecordConverterTest.java b/src/test/java/com/gotocompany/depot/maxcompute/converter/record/RecordConverterTest.java new file mode 100644 index 00000000..f7258f98 --- /dev/null +++ b/src/test/java/com/gotocompany/depot/maxcompute/converter/record/RecordConverterTest.java @@ -0,0 +1,209 @@ +package com.gotocompany.depot.maxcompute.converter.record; + +import com.aliyun.odps.data.SimpleStruct; +import com.aliyun.odps.type.TypeInfoFactory; +import com.google.protobuf.Descriptors; +import com.google.protobuf.Timestamp; +import com.gotocompany.depot.TestMaxComputeRecord; +import com.gotocompany.depot.common.Tuple; +import com.gotocompany.depot.common.TupleString; +import com.gotocompany.depot.config.MaxComputeSinkConfig; +import com.gotocompany.depot.config.SinkConfig; +import com.gotocompany.depot.error.ErrorInfo; +import com.gotocompany.depot.error.ErrorType; +import com.gotocompany.depot.exception.UnknownFieldsException; +import com.gotocompany.depot.maxcompute.converter.ConverterOrchestrator; +import com.gotocompany.depot.maxcompute.helper.MaxComputeSchemaHelper; +import com.gotocompany.depot.maxcompute.model.MaxComputeSchema; +import com.gotocompany.depot.maxcompute.model.RecordWrapper; +import com.gotocompany.depot.maxcompute.model.RecordWrappers; +import com.gotocompany.depot.maxcompute.record.ProtoDataColumnRecordDecorator; +import com.gotocompany.depot.maxcompute.record.ProtoMetadataColumnRecordDecorator; +import com.gotocompany.depot.maxcompute.record.RecordDecorator; +import com.gotocompany.depot.maxcompute.schema.MaxComputeSchemaCache; +import com.gotocompany.depot.maxcompute.schema.partition.PartitioningStrategy; +import com.gotocompany.depot.maxcompute.schema.partition.PartitioningStrategyFactory; +import com.gotocompany.depot.message.Message; +import com.gotocompany.depot.message.ParsedMessage; +import com.gotocompany.depot.message.SinkConnectorSchemaMessageMode; +import com.gotocompany.depot.message.proto.ProtoMessageParser; +import org.assertj.core.api.Assertions; +import org.junit.Before; +import org.junit.Test; +import org.mockito.Mockito; + +import java.io.IOException; +import java.io.Serializable; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; + +public class RecordConverterTest { + + private final Descriptors.Descriptor descriptor = TestMaxComputeRecord.MaxComputeRecord.getDescriptor(); + private MaxComputeSinkConfig maxComputeSinkConfig; + private ConverterOrchestrator converterOrchestrator; + private ProtoMessageParser protoMessageParser; + private MaxComputeSchemaHelper maxComputeSchemaHelper; + private SinkConfig sinkConfig; + private MaxComputeSchemaCache maxComputeSchemaCache; + private RecordConverter recordConverter; + + @Before + public void setup() throws IOException { + maxComputeSinkConfig = Mockito.mock(MaxComputeSinkConfig.class); + Mockito.when(maxComputeSinkConfig.shouldAddMetadata()).thenReturn(Boolean.TRUE); + Mockito.when(maxComputeSinkConfig.getMetadataColumnsTypes()).thenReturn( + Arrays.asList(new TupleString("__message_timestamp", "timestamp"), + new TupleString("__kafka_topic", "string"), + new TupleString("__kafka_offset", "long") + ) + ); + Mockito.when(maxComputeSinkConfig.isTablePartitioningEnabled()).thenReturn(Boolean.TRUE); + Mockito.when(maxComputeSinkConfig.getTablePartitionKey()).thenReturn("timestamp"); + Mockito.when(maxComputeSinkConfig.getTablePartitionColumnName()).thenReturn("__partition_column"); + Mockito.when(maxComputeSinkConfig.getTablePartitionByTimestampTimezone()).thenReturn("UTC+7"); + Mockito.when(maxComputeSinkConfig.getTablePartitionByTimestampZoneOffset()).thenReturn("+07:00"); + converterOrchestrator = new ConverterOrchestrator(); + protoMessageParser = Mockito.mock(ProtoMessageParser.class); + ParsedMessage parsedMessage = Mockito.mock(ParsedMessage.class); + Mockito.when(parsedMessage.getRaw()).thenReturn(getMockedMessage()); + Mockito.when(protoMessageParser.parse(Mockito.any(), Mockito.any(), Mockito.any())) + .thenReturn(parsedMessage); + sinkConfig = Mockito.mock(SinkConfig.class); + Mockito.when(sinkConfig.getSinkConnectorSchemaMessageMode()) + .thenReturn(SinkConnectorSchemaMessageMode.LOG_MESSAGE); + PartitioningStrategyFactory partitioningStrategyFactory = new PartitioningStrategyFactory(converterOrchestrator, maxComputeSinkConfig); + PartitioningStrategy partitioningStrategy = partitioningStrategyFactory.createPartitioningStrategy( + descriptor + ); + maxComputeSchemaHelper = new MaxComputeSchemaHelper(converterOrchestrator, maxComputeSinkConfig, partitioningStrategy); + maxComputeSchemaCache = Mockito.mock(MaxComputeSchemaCache.class); + MaxComputeSchema maxComputeSchema = maxComputeSchemaHelper.buildMaxComputeSchema(descriptor); + Mockito.when(maxComputeSchemaCache.getMaxComputeSchema()).thenReturn(maxComputeSchema); + + RecordDecorator protoDataColumnRecordDecorator = new ProtoDataColumnRecordDecorator(null, + converterOrchestrator, maxComputeSchemaCache, + protoMessageParser, sinkConfig, partitioningStrategy); + RecordDecorator metadataColumnRecordDecorator = new ProtoMetadataColumnRecordDecorator( + protoDataColumnRecordDecorator, maxComputeSinkConfig, maxComputeSchemaCache); + recordConverter = new RecordConverter(metadataColumnRecordDecorator, maxComputeSchemaCache); + } + + @Test + public void shouldConvertMessageToRecordWrapper() { + Message message = new Message( + null, + getMockedMessage().toByteArray(), + new Tuple<>("__message_timestamp", 123012311L), + new Tuple<>("__kafka_topic", "topic"), + new Tuple<>("__kafka_offset", 100L) + ); + java.sql.Timestamp expectedTimestamp = new java.sql.Timestamp(10002010L * 1000); + expectedTimestamp.setNanos(1000); + RecordWrappers recordWrappers = recordConverter.convert(Collections.singletonList(message)); + + Assertions.assertThat(recordWrappers.getValidRecords()).size().isEqualTo(1); + RecordWrapper recordWrapper = recordWrappers.getValidRecords().get(0); + Assertions.assertThat(recordWrapper.getIndex()).isEqualTo(0); + Assertions.assertThat(recordWrapper.getRecord()) + .extracting("values") + .isEqualTo(new Serializable[]{ + "id", + new ArrayList<>(Arrays.asList( + new SimpleStruct( + TypeInfoFactory.getStructTypeInfo( + Arrays.asList("name", "balance"), + Arrays.asList(TypeInfoFactory.STRING, TypeInfoFactory.FLOAT) + ), + Arrays.asList("name_1", 100.2f) + ), + new SimpleStruct( + TypeInfoFactory.getStructTypeInfo( + Arrays.asList("name", "balance"), + Arrays.asList(TypeInfoFactory.STRING, TypeInfoFactory.FLOAT) + ), + Arrays.asList("name_2", 50f) + ) + )), + expectedTimestamp, + new java.sql.Timestamp(123012311L), + "topic", + 100L + }); + Assertions.assertThat(recordWrapper.getErrorInfo()).isNull(); + } + + @Test + public void shouldReturnRecordWrapperWithDeserializationErrorWhenIOExceptionIsThrown() throws IOException { + RecordDecorator recordDecorator = Mockito.mock(RecordDecorator.class); + Mockito.doThrow(new IOException()).when(recordDecorator) + .decorate(Mockito.any(), Mockito.any()); + RecordConverter recordConverter = new RecordConverter(recordDecorator, maxComputeSchemaCache); + Message message = new Message( + null, + getMockedMessage().toByteArray(), + new Tuple<>("__message_timestamp", 123012311L), + new Tuple<>("__kafka_topic", "topic"), + new Tuple<>("__kafka_offset", 100L) + ); + + RecordWrappers recordWrappers = recordConverter.convert(Collections.singletonList(message)); + + Assertions.assertThat(recordWrappers.getInvalidRecords()).size().isEqualTo(1); + RecordWrapper recordWrapper = recordWrappers.getInvalidRecords().get(0); + Assertions.assertThat(recordWrapper.getIndex()).isEqualTo(0); + Assertions.assertThat(recordWrapper.getRecord()) + .isNull(); + Assertions.assertThat(recordWrapper.getErrorInfo()) + .isEqualTo(new ErrorInfo(new IOException(), ErrorType.DESERIALIZATION_ERROR)); + } + + @Test + public void shouldReturnRecordWrapperWithUnknownFieldsErrorWhenUnknownFieldExceptionIsThrown() throws IOException { + RecordDecorator recordDecorator = Mockito.mock(RecordDecorator.class); + com.google.protobuf.Message mockedMessage = getMockedMessage(); + Mockito.doThrow(new UnknownFieldsException(mockedMessage)).when(recordDecorator) + .decorate(Mockito.any(), Mockito.any()); + RecordConverter recordConverter = new RecordConverter(recordDecorator, maxComputeSchemaCache); + Message message = new Message( + null, + getMockedMessage().toByteArray(), + new Tuple<>("__message_timestamp", 123012311L), + new Tuple<>("__kafka_topic", "topic"), + new Tuple<>("__kafka_offset", 100L) + ); + + RecordWrappers recordWrappers = recordConverter.convert(Collections.singletonList(message)); + + Assertions.assertThat(recordWrappers.getInvalidRecords()).size().isEqualTo(1); + RecordWrapper recordWrapper = recordWrappers.getInvalidRecords().get(0); + Assertions.assertThat(recordWrapper.getIndex()).isEqualTo(0); + Assertions.assertThat(recordWrapper.getRecord()) + .isNull(); + Assertions.assertThat(recordWrapper.getErrorInfo()) + .isEqualTo(new ErrorInfo(new UnknownFieldsException(mockedMessage), ErrorType.UNKNOWN_FIELDS_ERROR)); + } + + private static TestMaxComputeRecord.MaxComputeRecord getMockedMessage() { + return TestMaxComputeRecord.MaxComputeRecord + .newBuilder() + .setId("id") + .addAllInnerRecord(Arrays.asList( + TestMaxComputeRecord.InnerRecord.newBuilder() + .setName("name_1") + .setBalance(100.2f) + .build(), + TestMaxComputeRecord.InnerRecord.newBuilder() + .setName("name_2") + .setBalance(50f) + .build() + )) + .setTimestamp(Timestamp.newBuilder() + .setSeconds(10002010) + .setNanos(1000) + .build()) + .build(); + } + +} diff --git a/src/test/java/com/gotocompany/depot/maxcompute/DurationTypeInfoConverterTest.java b/src/test/java/com/gotocompany/depot/maxcompute/converter/type/DurationTypeInfoConverterTest.java similarity index 79% rename from src/test/java/com/gotocompany/depot/maxcompute/DurationTypeInfoConverterTest.java rename to src/test/java/com/gotocompany/depot/maxcompute/converter/type/DurationTypeInfoConverterTest.java index f13caa25..8acf28a1 100644 --- a/src/test/java/com/gotocompany/depot/maxcompute/DurationTypeInfoConverterTest.java +++ b/src/test/java/com/gotocompany/depot/maxcompute/converter/type/DurationTypeInfoConverterTest.java @@ -1,20 +1,19 @@ -package com.gotocompany.depot.maxcompute; +package com.gotocompany.depot.maxcompute.converter.type; import com.aliyun.odps.type.TypeInfo; import com.google.protobuf.Descriptors; import com.gotocompany.depot.TestMaxComputeTypeInfo; -import com.gotocompany.depot.maxcompute.converter.DurationTypeInfoConverter; import org.junit.Test; import org.junit.jupiter.api.Assertions; public class DurationTypeInfoConverterTest { - private final int DURATION_INDEX = 5; + private static final int DURATION_INDEX = 5; private final Descriptors.Descriptor descriptor = TestMaxComputeTypeInfo.TestRoot.getDescriptor(); private final DurationTypeInfoConverter durationTypeInfoConverter = new DurationTypeInfoConverter(); @Test - public void shouldConvertToStruct(){ + public void shouldConvertToStruct() { Descriptors.FieldDescriptor fieldDescriptor = descriptor.getFields().get(DURATION_INDEX); TypeInfo typeInfo = durationTypeInfoConverter.convert(fieldDescriptor); @@ -23,7 +22,7 @@ public void shouldConvertToStruct(){ } @Test - public void shouldReturnTrueForDuration(){ + public void shouldReturnTrueForDuration() { Descriptors.FieldDescriptor fieldDescriptor = descriptor.getFields().get(DURATION_INDEX); boolean canConvert = durationTypeInfoConverter.canConvert(fieldDescriptor); @@ -32,12 +31,12 @@ public void shouldReturnTrueForDuration(){ } @Test - public void shouldReturnFalseForNonDuration(){ + public void shouldReturnFalseForNonDuration() { Descriptors.FieldDescriptor fieldDescriptor = descriptor.getFields().get(0); boolean canConvert = durationTypeInfoConverter.canConvert(fieldDescriptor); Assertions.assertFalse(canConvert); } - + } diff --git a/src/test/java/com/gotocompany/depot/maxcompute/MessageTypeInfoConverterTest.java b/src/test/java/com/gotocompany/depot/maxcompute/converter/type/MessageTypeInfoConverterTest.java similarity index 72% rename from src/test/java/com/gotocompany/depot/maxcompute/MessageTypeInfoConverterTest.java rename to src/test/java/com/gotocompany/depot/maxcompute/converter/type/MessageTypeInfoConverterTest.java index 9d42021a..d9ea9486 100644 --- a/src/test/java/com/gotocompany/depot/maxcompute/MessageTypeInfoConverterTest.java +++ b/src/test/java/com/gotocompany/depot/maxcompute/converter/type/MessageTypeInfoConverterTest.java @@ -1,14 +1,8 @@ -package com.gotocompany.depot.maxcompute; +package com.gotocompany.depot.maxcompute.converter.type; import com.aliyun.odps.type.TypeInfo; import com.google.protobuf.Descriptors; import com.gotocompany.depot.TestMaxComputeTypeInfo; -import com.gotocompany.depot.maxcompute.converter.DurationTypeInfoConverter; -import com.gotocompany.depot.maxcompute.converter.MessageTypeInfoConverter; -import com.gotocompany.depot.maxcompute.converter.PrimitiveTypeInfoConverter; -import com.gotocompany.depot.maxcompute.converter.StructTypeInfoConverter; -import com.gotocompany.depot.maxcompute.converter.TimestampTypeInfoConverter; -import com.gotocompany.depot.maxcompute.converter.TypeInfoConverter; import org.junit.Before; import org.junit.Test; import org.junit.jupiter.api.Assertions; @@ -18,7 +12,7 @@ public class MessageTypeInfoConverterTest { - private final Descriptors.Descriptor DESCRIPTOR = TestMaxComputeTypeInfo.TestRoot.getDescriptor(); + private final Descriptors.Descriptor descriptor = TestMaxComputeTypeInfo.TestRoot.getDescriptor(); private MessageTypeInfoConverter messageTypeInfoConverter; @Before @@ -28,8 +22,8 @@ public void initialize() { @Test public void shouldConvertMessageToProperTypeInfo() { - TypeInfo firstMessageFieldTypeInfo = messageTypeInfoConverter.convert(DESCRIPTOR.getFields().get(1)); - TypeInfo secondMessageFieldTypeInfo = messageTypeInfoConverter.convert(DESCRIPTOR.getFields().get(2)); + TypeInfo firstMessageFieldTypeInfo = messageTypeInfoConverter.convert(descriptor.getFields().get(1)); + TypeInfo secondMessageFieldTypeInfo = messageTypeInfoConverter.convert(descriptor.getFields().get(2)); String expectedFirstMessageTypeRepresentation = "STRUCT,another_inner_list_field:ARRAY>>"; String expectedSecondMessageTypeRepresentation = String.format("ARRAY<%s>", expectedFirstMessageTypeRepresentation); @@ -41,24 +35,19 @@ public void shouldConvertMessageToProperTypeInfo() { @Test(expected = IllegalArgumentException.class) public void shouldThrowIllegalArgumentExceptionWhenUnsupportedTypeIsGiven() { messageTypeInfoConverter = new MessageTypeInfoConverter(new ArrayList<>()); - Descriptors.FieldDescriptor unsupportedFieldDescriptor = DESCRIPTOR.getFields().get(1); + Descriptors.FieldDescriptor unsupportedFieldDescriptor = descriptor.getFields().get(1); messageTypeInfoConverter.convert(unsupportedFieldDescriptor); } @Test public void shouldReturnTrueWhenCanConvertIsCalledWithMessageFieldDescriptor() { - Assertions.assertTrue(messageTypeInfoConverter.canConvert(DESCRIPTOR.getFields().get(1))); + Assertions.assertTrue(messageTypeInfoConverter.canConvert(descriptor.getFields().get(1))); } @Test public void shouldReturnFalseWhenCanConvertIsCalledWithNonMessageFieldDescriptor() { - Assertions.assertFalse(messageTypeInfoConverter.canConvert(DESCRIPTOR.getFields().get(0))); - } - - @Test - public void shouldReturnMinIntegerAsPriority() { - Assertions.assertEquals(Integer.MIN_VALUE, messageTypeInfoConverter.getPriority()); + Assertions.assertFalse(messageTypeInfoConverter.canConvert(descriptor.getFields().get(0))); } private void initializeConverters() { diff --git a/src/test/java/com/gotocompany/depot/maxcompute/PrimitiveTypeInfoConverterTest.java b/src/test/java/com/gotocompany/depot/maxcompute/converter/type/PrimitiveTypeInfoConverterTest.java similarity index 97% rename from src/test/java/com/gotocompany/depot/maxcompute/PrimitiveTypeInfoConverterTest.java rename to src/test/java/com/gotocompany/depot/maxcompute/converter/type/PrimitiveTypeInfoConverterTest.java index 58fc611e..6fb972be 100644 --- a/src/test/java/com/gotocompany/depot/maxcompute/PrimitiveTypeInfoConverterTest.java +++ b/src/test/java/com/gotocompany/depot/maxcompute/converter/type/PrimitiveTypeInfoConverterTest.java @@ -1,10 +1,9 @@ -package com.gotocompany.depot.maxcompute; +package com.gotocompany.depot.maxcompute.converter.type; import com.aliyun.odps.type.TypeInfo; import com.aliyun.odps.type.TypeInfoFactory; import com.google.protobuf.Descriptors; import com.gotocompany.depot.TestMaxComputeTypeInfo; -import com.gotocompany.depot.maxcompute.converter.PrimitiveTypeInfoConverter; import org.junit.jupiter.api.Assertions; import org.junit.Test; @@ -16,96 +15,112 @@ public class PrimitiveTypeInfoConverterTest { @Test public void shouldConvertToBinary() { TypeInfo typeInfo = primitiveTypeInfoConverter.convert(descriptor.findFieldByName("bytes_field")); + Assertions.assertEquals(TypeInfoFactory.BINARY, typeInfo); } @Test public void shouldConvertToString() { TypeInfo typeInfo = primitiveTypeInfoConverter.convert(descriptor.findFieldByName("string_field")); + Assertions.assertEquals(TypeInfoFactory.STRING, typeInfo); } @Test public void shouldConvertEnumToString() { TypeInfo typeInfo = primitiveTypeInfoConverter.convert(descriptor.findFieldByName("enum_field")); + Assertions.assertEquals(TypeInfoFactory.STRING, typeInfo); } @Test public void shouldConvertToDouble() { TypeInfo typeInfo = primitiveTypeInfoConverter.convert(descriptor.findFieldByName("double_field")); + Assertions.assertEquals(TypeInfoFactory.DOUBLE, typeInfo); } @Test public void shouldConvertToFloat() { TypeInfo typeInfo = primitiveTypeInfoConverter.convert(descriptor.findFieldByName("float_field")); + Assertions.assertEquals(TypeInfoFactory.FLOAT, typeInfo); } @Test public void shouldConvertToBoolean() { TypeInfo typeInfo = primitiveTypeInfoConverter.convert(descriptor.findFieldByName("bool_field")); + Assertions.assertEquals(TypeInfoFactory.BOOLEAN, typeInfo); } @Test public void shouldConvertToBigInt() { TypeInfo typeInfo = primitiveTypeInfoConverter.convert(descriptor.findFieldByName("int64_field")); + Assertions.assertEquals(TypeInfoFactory.BIGINT, typeInfo); } @Test public void shouldConvertUInt64ToBigInt() { TypeInfo typeInfo = primitiveTypeInfoConverter.convert(descriptor.findFieldByName("uint64_field")); + Assertions.assertEquals(TypeInfoFactory.BIGINT, typeInfo); } @Test public void shouldConvertToInt() { TypeInfo typeInfo = primitiveTypeInfoConverter.convert(descriptor.findFieldByName("int32_field")); + Assertions.assertEquals(TypeInfoFactory.INT, typeInfo); } @Test public void shouldConvertUInt32ToInt() { TypeInfo typeInfo = primitiveTypeInfoConverter.convert(descriptor.findFieldByName("uint32_field")); + Assertions.assertEquals(TypeInfoFactory.INT, typeInfo); } @Test public void shouldConvertFixed64ToBigInt() { TypeInfo typeInfo = primitiveTypeInfoConverter.convert(descriptor.findFieldByName("fixed64_field")); + Assertions.assertEquals(TypeInfoFactory.BIGINT, typeInfo); } @Test public void shouldConvertFixed32ToInt() { TypeInfo typeInfo = primitiveTypeInfoConverter.convert(descriptor.findFieldByName("fixed32_field")); + Assertions.assertEquals(TypeInfoFactory.INT, typeInfo); } @Test public void shouldConvertSFixed32ToInt() { TypeInfo typeInfo = primitiveTypeInfoConverter.convert(descriptor.findFieldByName("sfixed32_field")); + Assertions.assertEquals(TypeInfoFactory.INT, typeInfo); } @Test public void shouldConvertSFixed64ToBigInt() { TypeInfo typeInfo = primitiveTypeInfoConverter.convert(descriptor.findFieldByName("sfixed64_field")); + Assertions.assertEquals(TypeInfoFactory.BIGINT, typeInfo); } @Test public void shouldConvertSInt32ToInt() { TypeInfo typeInfo = primitiveTypeInfoConverter.convert(descriptor.findFieldByName("sint32_field")); + Assertions.assertEquals(TypeInfoFactory.INT, typeInfo); } @Test public void shouldConvertSInt64ToBigInt() { TypeInfo typeInfo = primitiveTypeInfoConverter.convert(descriptor.findFieldByName("sint64_field")); + Assertions.assertEquals(TypeInfoFactory.BIGINT, typeInfo); } -} \ No newline at end of file +} diff --git a/src/test/java/com/gotocompany/depot/maxcompute/StructTypeInfoConverterTest.java b/src/test/java/com/gotocompany/depot/maxcompute/converter/type/StructTypeInfoConverterTest.java similarity index 81% rename from src/test/java/com/gotocompany/depot/maxcompute/StructTypeInfoConverterTest.java rename to src/test/java/com/gotocompany/depot/maxcompute/converter/type/StructTypeInfoConverterTest.java index 0ec5b253..5286a381 100644 --- a/src/test/java/com/gotocompany/depot/maxcompute/StructTypeInfoConverterTest.java +++ b/src/test/java/com/gotocompany/depot/maxcompute/converter/type/StructTypeInfoConverterTest.java @@ -1,17 +1,16 @@ -package com.gotocompany.depot.maxcompute; +package com.gotocompany.depot.maxcompute.converter.type; import com.aliyun.odps.type.TypeInfo; import com.aliyun.odps.type.TypeInfoFactory; import com.google.protobuf.Descriptors; import com.gotocompany.depot.TestMaxComputeTypeInfo; -import com.gotocompany.depot.maxcompute.converter.StructTypeInfoConverter; import org.junit.Test; import org.junit.jupiter.api.Assertions; public class StructTypeInfoConverterTest { - private final int STRUCT_INDEX = 4; - private final Descriptors.Descriptor DESCRIPTOR = TestMaxComputeTypeInfo.TestRoot.getDescriptor(); + private static final int STRUCT_INDEX = 4; + private static final Descriptors.Descriptor DESCRIPTOR = TestMaxComputeTypeInfo.TestRoot.getDescriptor(); private final StructTypeInfoConverter structTypeInfoConverter = new StructTypeInfoConverter(); @Test diff --git a/src/test/java/com/gotocompany/depot/maxcompute/TimestampTypeInfoConverterTest.java b/src/test/java/com/gotocompany/depot/maxcompute/converter/type/TimestampPayloadConverterTest.java similarity index 80% rename from src/test/java/com/gotocompany/depot/maxcompute/TimestampTypeInfoConverterTest.java rename to src/test/java/com/gotocompany/depot/maxcompute/converter/type/TimestampPayloadConverterTest.java index df2bc5c1..ee7e24a1 100644 --- a/src/test/java/com/gotocompany/depot/maxcompute/TimestampTypeInfoConverterTest.java +++ b/src/test/java/com/gotocompany/depot/maxcompute/converter/type/TimestampPayloadConverterTest.java @@ -1,16 +1,15 @@ -package com.gotocompany.depot.maxcompute; +package com.gotocompany.depot.maxcompute.converter.type; import com.aliyun.odps.type.TypeInfo; import com.aliyun.odps.type.TypeInfoFactory; import com.google.protobuf.Descriptors; import com.gotocompany.depot.TestMaxComputeTypeInfo; -import com.gotocompany.depot.maxcompute.converter.TimestampTypeInfoConverter; import org.junit.Test; import org.junit.jupiter.api.Assertions; -public class TimestampTypeInfoConverterTest { +public class TimestampPayloadConverterTest { - private final int TIMESTAMP_INDEX = 3; + private static final int TIMESTAMP_INDEX = 3; private final Descriptors.Descriptor descriptor = TestMaxComputeTypeInfo.TestRoot.getDescriptor(); private final TimestampTypeInfoConverter timestampTypeInfoConverter = new TimestampTypeInfoConverter(); @@ -18,7 +17,7 @@ public class TimestampTypeInfoConverterTest { public void shouldConvertToTimestampNtz() { TypeInfo typeInfo = timestampTypeInfoConverter.convert(descriptor.getFields().get(TIMESTAMP_INDEX)); - Assertions.assertEquals(TypeInfoFactory.TIMESTAMP_NTZ, typeInfo); + Assertions.assertEquals(TypeInfoFactory.TIMESTAMP, typeInfo); } @Test diff --git a/src/test/java/com/gotocompany/depot/maxcompute/helper/MaxComputeSchemaHelperTest.java b/src/test/java/com/gotocompany/depot/maxcompute/helper/MaxComputeSchemaHelperTest.java new file mode 100644 index 00000000..48402109 --- /dev/null +++ b/src/test/java/com/gotocompany/depot/maxcompute/helper/MaxComputeSchemaHelperTest.java @@ -0,0 +1,181 @@ +package com.gotocompany.depot.maxcompute.helper; + +import com.aliyun.odps.type.TypeInfoFactory; +import com.google.protobuf.Descriptors; +import com.gotocompany.depot.TextMaxComputeTable; +import com.gotocompany.depot.common.TupleString; +import com.gotocompany.depot.config.MaxComputeSinkConfig; +import com.gotocompany.depot.maxcompute.converter.ConverterOrchestrator; +import com.gotocompany.depot.maxcompute.model.MaxComputeSchema; +import com.gotocompany.depot.maxcompute.schema.partition.PartitioningStrategyFactory; +import com.sun.tools.javac.util.List; +import org.assertj.core.api.Assertions; +import org.assertj.core.groups.Tuple; +import org.junit.Test; +import org.mockito.Mockito; + + +public class MaxComputeSchemaHelperTest { + + private final Descriptors.Descriptor descriptor = TextMaxComputeTable.Table.getDescriptor(); + + @Test + public void shouldBuildPartitionedTableSchemaWithRootLevelMetadata() { + MaxComputeSinkConfig maxComputeSinkConfig = Mockito.mock(MaxComputeSinkConfig.class); + Mockito.when(maxComputeSinkConfig.shouldAddMetadata()).thenReturn(Boolean.TRUE); + Mockito.when(maxComputeSinkConfig.getMetadataColumnsTypes()).thenReturn( + List.of(new TupleString("__message_timestamp", "timestamp"), + new TupleString("__kafka_topic", "string"), + new TupleString("__kafka_offset", "long") + ) + ); + Mockito.when(maxComputeSinkConfig.isTablePartitioningEnabled()).thenReturn(Boolean.TRUE); + Mockito.when(maxComputeSinkConfig.getTablePartitionKey()).thenReturn("event_timestamp"); + Mockito.when(maxComputeSinkConfig.getTablePartitionColumnName()).thenReturn("__partitioning_column"); + PartitioningStrategyFactory partitioningStrategyFactory = new PartitioningStrategyFactory( + new ConverterOrchestrator(), maxComputeSinkConfig + ); + MaxComputeSchemaHelper maxComputeSchemaHelper = new MaxComputeSchemaHelper(new ConverterOrchestrator(), + maxComputeSinkConfig, partitioningStrategyFactory.createPartitioningStrategy(descriptor)); + int expectedNonPartitionColumnCount = 7; + int expectedPartitionColumnCount = 1; + + MaxComputeSchema maxComputeSchema = maxComputeSchemaHelper.buildMaxComputeSchema(descriptor); + + Assertions.assertThat(maxComputeSchema.getTableSchema().getColumns().size()).isEqualTo(expectedNonPartitionColumnCount); + Assertions.assertThat(maxComputeSchema.getTableSchema().getPartitionColumns().size()).isEqualTo(expectedPartitionColumnCount); + Assertions.assertThat(maxComputeSchema.getTableSchema().getColumns()) + .extracting("name", "typeInfo") + .containsExactlyInAnyOrder( + Tuple.tuple("id", TypeInfoFactory.STRING), + Tuple.tuple("user", TypeInfoFactory.getStructTypeInfo( + List.of("id", "contacts"), + List.of(TypeInfoFactory.STRING, TypeInfoFactory.getArrayTypeInfo(TypeInfoFactory.getStructTypeInfo( + List.of("number"), + List.of(TypeInfoFactory.STRING) + ))) + )), + Tuple.tuple("items", TypeInfoFactory.getArrayTypeInfo(TypeInfoFactory.getStructTypeInfo( + List.of("id", "name"), + List.of(TypeInfoFactory.STRING, TypeInfoFactory.STRING) + ))), + Tuple.tuple("event_timestamp", TypeInfoFactory.TIMESTAMP), + Tuple.tuple("__message_timestamp", TypeInfoFactory.TIMESTAMP), + Tuple.tuple("__kafka_topic", TypeInfoFactory.STRING), + Tuple.tuple("__kafka_offset", TypeInfoFactory.BIGINT) + ); + Assertions.assertThat(maxComputeSchema.getTableSchema().getPartitionColumns()) + .extracting("name", "typeInfo") + .contains(Tuple.tuple("__partitioning_column", TypeInfoFactory.STRING)); + } + + @Test + public void shouldBuildPartitionedTableSchemaWithNestedMetadata() { + MaxComputeSinkConfig maxComputeSinkConfig = Mockito.mock(MaxComputeSinkConfig.class); + Mockito.when(maxComputeSinkConfig.shouldAddMetadata()).thenReturn(Boolean.TRUE); + Mockito.when(maxComputeSinkConfig.getMaxcomputeMetadataNamespace()).thenReturn("meta"); + Mockito.when(maxComputeSinkConfig.getMetadataColumnsTypes()).thenReturn( + List.of(new TupleString("__message_timestamp", "timestamp"), + new TupleString("__kafka_topic", "string"), + new TupleString("__kafka_offset", "long") + ) + ); + Mockito.when(maxComputeSinkConfig.isTablePartitioningEnabled()).thenReturn(Boolean.TRUE); + Mockito.when(maxComputeSinkConfig.getTablePartitionKey()).thenReturn("event_timestamp"); + Mockito.when(maxComputeSinkConfig.getTablePartitionColumnName()).thenReturn("__partitioning_column"); + int expectedNonPartitionColumnCount = 5; + int expectedPartitionColumnCount = 1; + PartitioningStrategyFactory partitioningStrategyFactory = new PartitioningStrategyFactory( + new ConverterOrchestrator(), maxComputeSinkConfig + ); + MaxComputeSchemaHelper maxComputeSchemaHelper = new MaxComputeSchemaHelper( + new ConverterOrchestrator(), maxComputeSinkConfig, partitioningStrategyFactory.createPartitioningStrategy(descriptor)); + + MaxComputeSchema maxComputeSchema = maxComputeSchemaHelper.buildMaxComputeSchema(descriptor); + + Assertions.assertThat(maxComputeSchema.getTableSchema().getColumns().size()).isEqualTo(expectedNonPartitionColumnCount); + Assertions.assertThat(maxComputeSchema.getTableSchema().getPartitionColumns().size()).isEqualTo(expectedPartitionColumnCount); + Assertions.assertThat(maxComputeSchema.getTableSchema().getColumns()) + .extracting("name", "typeInfo") + .containsExactlyInAnyOrder( + Tuple.tuple("id", TypeInfoFactory.STRING), + Tuple.tuple("user", TypeInfoFactory.getStructTypeInfo( + List.of("id", "contacts"), + List.of(TypeInfoFactory.STRING, TypeInfoFactory.getArrayTypeInfo(TypeInfoFactory.getStructTypeInfo( + List.of("number"), + List.of(TypeInfoFactory.STRING) + ))) + )), + Tuple.tuple("items", TypeInfoFactory.getArrayTypeInfo(TypeInfoFactory.getStructTypeInfo( + List.of("id", "name"), + List.of(TypeInfoFactory.STRING, TypeInfoFactory.STRING) + ))), + Tuple.tuple("event_timestamp", TypeInfoFactory.TIMESTAMP), + Tuple.tuple("meta", TypeInfoFactory.getStructTypeInfo( + List.of("__message_timestamp", "__kafka_topic", "__kafka_offset"), + List.of(TypeInfoFactory.TIMESTAMP, TypeInfoFactory.STRING, TypeInfoFactory.BIGINT) + )) + ); + Assertions.assertThat(maxComputeSchema.getTableSchema().getPartitionColumns()) + .extracting("name", "typeInfo") + .contains(Tuple.tuple("__partitioning_column", TypeInfoFactory.STRING)); + } + + @Test + public void shouldBuildTableSchemaWithoutPartitionAndMeta() { + MaxComputeSinkConfig maxComputeSinkConfig = Mockito.mock(MaxComputeSinkConfig.class); + Mockito.when(maxComputeSinkConfig.shouldAddMetadata()).thenReturn(Boolean.FALSE); + Mockito.when(maxComputeSinkConfig.isTablePartitioningEnabled()).thenReturn(Boolean.FALSE); + int expectedNonPartitionColumnCount = 4; + int expectedPartitionColumnCount = 0; + PartitioningStrategyFactory partitioningStrategyFactory = new PartitioningStrategyFactory( + new ConverterOrchestrator(), maxComputeSinkConfig + ); + MaxComputeSchemaHelper maxComputeSchemaHelper = new MaxComputeSchemaHelper(new ConverterOrchestrator(), + maxComputeSinkConfig, partitioningStrategyFactory.createPartitioningStrategy(descriptor)); + + MaxComputeSchema maxComputeSchema = maxComputeSchemaHelper.buildMaxComputeSchema(descriptor); + + Assertions.assertThat(maxComputeSchema.getTableSchema().getColumns().size()).isEqualTo(expectedNonPartitionColumnCount); + Assertions.assertThat(maxComputeSchema.getTableSchema().getPartitionColumns().size()).isEqualTo(expectedPartitionColumnCount); + Assertions.assertThat(maxComputeSchema.getTableSchema().getColumns()) + .extracting("name", "typeInfo") + .containsExactlyInAnyOrder( + Tuple.tuple("id", TypeInfoFactory.STRING), + Tuple.tuple("user", TypeInfoFactory.getStructTypeInfo( + List.of("id", "contacts"), + List.of(TypeInfoFactory.STRING, TypeInfoFactory.getArrayTypeInfo(TypeInfoFactory.getStructTypeInfo( + List.of("number"), + List.of(TypeInfoFactory.STRING) + ))) + )), + Tuple.tuple("items", TypeInfoFactory.getArrayTypeInfo(TypeInfoFactory.getStructTypeInfo( + List.of("id", "name"), + List.of(TypeInfoFactory.STRING, TypeInfoFactory.STRING) + ))), + Tuple.tuple("event_timestamp", TypeInfoFactory.TIMESTAMP) + ); + } + + @Test(expected = IllegalArgumentException.class) + public void shouldThrowIllegalArgumentExceptionWhenPartitionKeyIsNotFound() { + MaxComputeSinkConfig maxComputeSinkConfig = Mockito.mock(MaxComputeSinkConfig.class); + Mockito.when(maxComputeSinkConfig.shouldAddMetadata()).thenReturn(Boolean.TRUE); + Mockito.when(maxComputeSinkConfig.getMetadataColumnsTypes()).thenReturn( + List.of(new TupleString("__message_timestamp", "timestamp"), + new TupleString("__kafka_topic", "string"), + new TupleString("__kafka_offset", "long") + ) + ); + Mockito.when(maxComputeSinkConfig.isTablePartitioningEnabled()).thenReturn(Boolean.TRUE); + Mockito.when(maxComputeSinkConfig.getTablePartitionKey()).thenReturn("non_existent_partition_key"); + PartitioningStrategyFactory partitioningStrategyFactory = new PartitioningStrategyFactory( + new ConverterOrchestrator(), maxComputeSinkConfig + ); + MaxComputeSchemaHelper maxComputeSchemaHelper = new MaxComputeSchemaHelper(new ConverterOrchestrator(), + maxComputeSinkConfig, partitioningStrategyFactory.createPartitioningStrategy(descriptor)); + + maxComputeSchemaHelper.buildMaxComputeSchema(descriptor); + } + +} diff --git a/src/test/java/com/gotocompany/depot/maxcompute/record/ProtoDataColumnRecordDecoratorTest.java b/src/test/java/com/gotocompany/depot/maxcompute/record/ProtoDataColumnRecordDecoratorTest.java new file mode 100644 index 00000000..fef4b813 --- /dev/null +++ b/src/test/java/com/gotocompany/depot/maxcompute/record/ProtoDataColumnRecordDecoratorTest.java @@ -0,0 +1,120 @@ +package com.gotocompany.depot.maxcompute.record; + +import com.aliyun.odps.data.ArrayRecord; +import com.aliyun.odps.data.Record; +import com.aliyun.odps.data.SimpleStruct; +import com.aliyun.odps.type.ArrayTypeInfo; +import com.aliyun.odps.type.StructTypeInfo; +import com.google.protobuf.Descriptors; +import com.google.protobuf.Timestamp; +import com.gotocompany.depot.TestMaxComputeRecord; +import com.gotocompany.depot.config.MaxComputeSinkConfig; +import com.gotocompany.depot.config.SinkConfig; +import com.gotocompany.depot.maxcompute.converter.ConverterOrchestrator; +import com.gotocompany.depot.maxcompute.helper.MaxComputeSchemaHelper; +import com.gotocompany.depot.maxcompute.model.MaxComputeSchema; +import com.gotocompany.depot.maxcompute.model.RecordWrapper; +import com.gotocompany.depot.maxcompute.schema.MaxComputeSchemaCache; +import com.gotocompany.depot.message.Message; +import com.gotocompany.depot.message.ParsedMessage; +import com.gotocompany.depot.message.SinkConnectorSchemaMessageMode; +import com.gotocompany.depot.message.proto.ProtoMessageParser; +import org.assertj.core.api.Assertions; +import org.junit.Before; +import org.junit.Test; +import org.mockito.Mockito; + +import java.io.IOException; +import java.util.Arrays; + +public class ProtoDataColumnRecordDecoratorTest { + + private static final Descriptors.Descriptor DESCRIPTOR = TestMaxComputeRecord.MaxComputeRecord.getDescriptor(); + + private MaxComputeSchemaHelper maxComputeSchemaHelper; + private ProtoDataColumnRecordDecorator protoDataColumnRecordDecorator; + + @Before + public void setup() throws IOException { + MaxComputeSinkConfig maxComputeSinkConfig = Mockito.mock(MaxComputeSinkConfig.class); + Mockito.when(maxComputeSinkConfig.getMaxcomputeMetadataNamespace()).thenReturn("__kafka_metadata"); + Mockito.when(maxComputeSinkConfig.isTablePartitioningEnabled()).thenReturn(Boolean.FALSE); + Mockito.when(maxComputeSinkConfig.shouldAddMetadata()).thenReturn(Boolean.FALSE); + + SinkConfig sinkConfig = Mockito.mock(SinkConfig.class); + Mockito.when(sinkConfig.getSinkConnectorSchemaMessageMode()).thenReturn(SinkConnectorSchemaMessageMode.LOG_MESSAGE); + + instantiateProtoDataColumnRecordDecorator(sinkConfig, maxComputeSinkConfig); + } + + @Test + public void decorateShouldAppendDataColumnToRecord() throws IOException { + MaxComputeSchema maxComputeSchema = maxComputeSchemaHelper.buildMaxComputeSchema(DESCRIPTOR); + Record record = new ArrayRecord(maxComputeSchema.getTableSchema()); + RecordWrapper recordWrapper = new RecordWrapper(record, 0, null, null); + TestMaxComputeRecord.MaxComputeRecord maxComputeRecord = getMockedMessage(); + Message message = new Message(null, maxComputeRecord.toByteArray()); + java.sql.Timestamp expectedTimestamp = new java.sql.Timestamp(10002010L * 1000); + expectedTimestamp.setNanos(1000); + StructTypeInfo expectedArrayStructElementTypeInfo = (StructTypeInfo) ((ArrayTypeInfo) maxComputeSchema.getDataColumns().get("inner_record")).getElementTypeInfo(); + protoDataColumnRecordDecorator.decorate(recordWrapper, message); + + Assertions.assertThat(record) + .extracting("values") + .isEqualTo(new Object[]{"id", + Arrays.asList( + new SimpleStruct(expectedArrayStructElementTypeInfo, Arrays.asList("name_1", 100.2f)), + new SimpleStruct(expectedArrayStructElementTypeInfo, Arrays.asList("name_2", 50f)) + ), + expectedTimestamp}); + } + + private void instantiateProtoDataColumnRecordDecorator(SinkConfig sinkConfig, MaxComputeSinkConfig maxComputeSinkConfig) throws IOException { + ConverterOrchestrator converterOrchestrator = new ConverterOrchestrator(); + maxComputeSchemaHelper = new MaxComputeSchemaHelper( + converterOrchestrator, + maxComputeSinkConfig, + null + ); + + MaxComputeSchema maxComputeSchema = maxComputeSchemaHelper.buildMaxComputeSchema(DESCRIPTOR); + MaxComputeSchemaCache maxComputeSchemaCache = Mockito.mock(MaxComputeSchemaCache.class); + Mockito.when(maxComputeSchemaCache.getMaxComputeSchema()).thenReturn(maxComputeSchema); + + ProtoMessageParser protoMessageParser = Mockito.mock(ProtoMessageParser.class); + ParsedMessage parsedMessage = Mockito.mock(ParsedMessage.class); + Mockito.when(parsedMessage.getRaw()).thenReturn(getMockedMessage()); + Mockito.when(protoMessageParser.parse(Mockito.any(), Mockito.any(), Mockito.any())) + .thenReturn(parsedMessage); + + protoDataColumnRecordDecorator = new ProtoDataColumnRecordDecorator( + null, + converterOrchestrator, + maxComputeSchemaCache, + protoMessageParser, + sinkConfig, + null + ); + } + + private static TestMaxComputeRecord.MaxComputeRecord getMockedMessage() { + return TestMaxComputeRecord.MaxComputeRecord + .newBuilder() + .setId("id") + .addAllInnerRecord(Arrays.asList( + TestMaxComputeRecord.InnerRecord.newBuilder() + .setName("name_1") + .setBalance(100.2f) + .build(), + TestMaxComputeRecord.InnerRecord.newBuilder() + .setName("name_2") + .setBalance(50f) + .build() + )) + .setTimestamp(Timestamp.newBuilder() + .setSeconds(10002010) + .setNanos(1000) + .build()) + .build(); + } +} diff --git a/src/test/java/com/gotocompany/depot/maxcompute/record/ProtoMetadataColumnRecordDecoratorTest.java b/src/test/java/com/gotocompany/depot/maxcompute/record/ProtoMetadataColumnRecordDecoratorTest.java new file mode 100644 index 00000000..c1920331 --- /dev/null +++ b/src/test/java/com/gotocompany/depot/maxcompute/record/ProtoMetadataColumnRecordDecoratorTest.java @@ -0,0 +1,138 @@ +package com.gotocompany.depot.maxcompute.record; + +import com.aliyun.odps.data.ArrayRecord; +import com.aliyun.odps.data.Record; +import com.aliyun.odps.data.SimpleStruct; +import com.aliyun.odps.type.TypeInfoFactory; +import com.google.protobuf.Descriptors; +import com.google.protobuf.Timestamp; +import com.gotocompany.depot.TestMaxComputeRecord; +import com.gotocompany.depot.common.Tuple; +import com.gotocompany.depot.common.TupleString; +import com.gotocompany.depot.config.MaxComputeSinkConfig; +import com.gotocompany.depot.maxcompute.converter.ConverterOrchestrator; +import com.gotocompany.depot.maxcompute.helper.MaxComputeSchemaHelper; +import com.gotocompany.depot.maxcompute.model.MaxComputeSchema; +import com.gotocompany.depot.maxcompute.model.RecordWrapper; +import com.gotocompany.depot.maxcompute.schema.MaxComputeSchemaCache; +import com.gotocompany.depot.message.Message; +import com.gotocompany.depot.message.proto.ProtoParsedMessage; +import org.assertj.core.api.Assertions; +import org.junit.Before; +import org.junit.Test; +import org.mockito.Mockito; + +import java.io.IOException; +import java.util.Arrays; + +public class ProtoMetadataColumnRecordDecoratorTest { + + private final Descriptors.Descriptor descriptor = TestMaxComputeRecord.MaxComputeRecord.getDescriptor(); + + private MaxComputeSinkConfig maxComputeSinkConfig; + private MaxComputeSchemaCache maxComputeSchemaCache; + private ProtoMetadataColumnRecordDecorator protoMetadataColumnRecordDecorator; + + @Before + public void setup() { + MaxComputeSinkConfig config = Mockito.mock(MaxComputeSinkConfig.class); + Mockito.when(config.isTablePartitioningEnabled()).thenReturn(Boolean.FALSE); + Mockito.when(config.shouldAddMetadata()).thenReturn(Boolean.TRUE); + Mockito.when(config.getMaxcomputeMetadataNamespace()).thenReturn("__kafka_metadata"); + Mockito.when(config.getMetadataColumnsTypes()).thenReturn(Arrays.asList( + new TupleString("__message_timestamp", "timestamp"), + new TupleString("__kafka_topic", "string"), + new TupleString("__kafka_offset", "long") + )); + initializeDecorator(config); + } + + private void initializeDecorator(MaxComputeSinkConfig maxComputeSinkConfig) { + this.maxComputeSinkConfig = maxComputeSinkConfig; + ConverterOrchestrator converterOrchestrator = new ConverterOrchestrator(); + MaxComputeSchemaHelper maxComputeSchemaHelper = new MaxComputeSchemaHelper(converterOrchestrator, maxComputeSinkConfig, null); + MaxComputeSchema maxComputeSchema = maxComputeSchemaHelper.buildMaxComputeSchema(descriptor); + maxComputeSchemaCache = Mockito.mock(MaxComputeSchemaCache.class); + Mockito.when(maxComputeSchemaCache.getMaxComputeSchema()).thenReturn(maxComputeSchema); + protoMetadataColumnRecordDecorator = new ProtoMetadataColumnRecordDecorator(null, maxComputeSinkConfig, maxComputeSchemaCache); + } + + @Test + public void shouldPopulateRecordWithNamespacedMetadata() throws IOException { + Message message = new Message( + null, + new ProtoParsedMessage(getMockedMessage(), null), + new Tuple<>("__message_timestamp", 10002010L), + new Tuple<>("__kafka_topic", "topic"), + new Tuple<>("__kafka_offset", 100L) + ); + Record record = new ArrayRecord(maxComputeSchemaCache.getMaxComputeSchema().getColumns()); + RecordWrapper recordWrapper = new RecordWrapper(record, 0, null, null); + java.sql.Timestamp expectedTimestamp = new java.sql.Timestamp(10002010L); + + protoMetadataColumnRecordDecorator.decorate(recordWrapper, message); + + Assertions.assertThat(record.get(maxComputeSinkConfig.getMaxcomputeMetadataNamespace())) + .isEqualTo(new SimpleStruct( + TypeInfoFactory.getStructTypeInfo(Arrays.asList("__message_timestamp", "__kafka_topic", "__kafka_offset"), + Arrays.asList(TypeInfoFactory.TIMESTAMP_NTZ, TypeInfoFactory.STRING, TypeInfoFactory.BIGINT)), + Arrays.asList(expectedTimestamp, "topic", 100L) + )); + } + + @Test + public void shouldPopulateRecordWithNonNamespacedMetadata() throws IOException { + MaxComputeSinkConfig mcSinkConfig = Mockito.mock(MaxComputeSinkConfig.class); + Mockito.when(mcSinkConfig.isTablePartitioningEnabled()).thenReturn(Boolean.FALSE); + Mockito.when(mcSinkConfig.shouldAddMetadata()).thenReturn(Boolean.TRUE); + Mockito.when(mcSinkConfig.getMetadataColumnsTypes()).thenReturn(Arrays.asList( + new TupleString("__message_timestamp", "timestamp"), + new TupleString("__kafka_topic", "string"), + new TupleString("__kafka_offset", "long") + )); + initializeDecorator(mcSinkConfig); + Message message = new Message( + null, + new ProtoParsedMessage(getMockedMessage(), null), + new Tuple<>("__message_timestamp", 10002010L), + new Tuple<>("__kafka_topic", "topic"), + new Tuple<>("__kafka_offset", 100L) + ); + Record record = new ArrayRecord(maxComputeSchemaCache.getMaxComputeSchema().getColumns()); + RecordWrapper recordWrapper = new RecordWrapper(record, 0, null, null); + java.sql.Timestamp expectedTimestamp = new java.sql.Timestamp(10002010L); + + protoMetadataColumnRecordDecorator.decorate(recordWrapper, message); + + Assertions.assertThat(record) + .satisfies(r -> { + Assertions.assertThat(r.get("__message_timestamp")) + .isEqualTo(expectedTimestamp); + Assertions.assertThat(r.get("__kafka_topic")) + .isEqualTo("topic"); + Assertions.assertThat(r.get("__kafka_offset")) + .isEqualTo(100L); + }); + } + + private static TestMaxComputeRecord.MaxComputeRecord getMockedMessage() { + return TestMaxComputeRecord.MaxComputeRecord + .newBuilder() + .setId("id") + .addAllInnerRecord(Arrays.asList( + TestMaxComputeRecord.InnerRecord.newBuilder() + .setName("name_1") + .setBalance(100.2f) + .build(), + TestMaxComputeRecord.InnerRecord.newBuilder() + .setName("name_2") + .setBalance(50f) + .build() + )) + .setTimestamp(Timestamp.newBuilder() + .setSeconds(10002010) + .setNanos(1000) + .build()) + .build(); + } +} diff --git a/src/test/java/com/gotocompany/depot/maxcompute/record/RecordDecoratorFactoryTest.java b/src/test/java/com/gotocompany/depot/maxcompute/record/RecordDecoratorFactoryTest.java new file mode 100644 index 00000000..1320df5b --- /dev/null +++ b/src/test/java/com/gotocompany/depot/maxcompute/record/RecordDecoratorFactoryTest.java @@ -0,0 +1,39 @@ +package com.gotocompany.depot.maxcompute.record; + +import com.gotocompany.depot.config.MaxComputeSinkConfig; +import org.assertj.core.api.Assertions; +import org.junit.Test; +import org.mockito.Mockito; + +public class RecordDecoratorFactoryTest { + + @Test + public void shouldCreateDataRecordDecorator() { + MaxComputeSinkConfig maxComputeSinkConfig = Mockito.mock(MaxComputeSinkConfig.class); + Mockito.when(maxComputeSinkConfig.shouldAddMetadata()).thenReturn(Boolean.FALSE); + + RecordDecorator recordDecorator = RecordDecoratorFactory.createRecordDecorator( + null, null, null, null, maxComputeSinkConfig, null + ); + + Assertions.assertThat(recordDecorator) + .isInstanceOf(ProtoDataColumnRecordDecorator.class) + .extracting("decorator") + .isNull(); + } + + @Test + public void shouldCreateDataRecordDecoratorWithNamespaceDecorator() { + MaxComputeSinkConfig maxComputeSinkConfig = Mockito.mock(MaxComputeSinkConfig.class); + Mockito.when(maxComputeSinkConfig.shouldAddMetadata()).thenReturn(Boolean.TRUE); + + RecordDecorator recordDecorator = RecordDecoratorFactory.createRecordDecorator( + null, null, null, null, maxComputeSinkConfig, null + ); + Assertions.assertThat(recordDecorator) + .isInstanceOf(ProtoMetadataColumnRecordDecorator.class) + .extracting("decorator") + .isNotNull() + .isInstanceOf(ProtoDataColumnRecordDecorator.class); + } +} diff --git a/src/test/java/com/gotocompany/depot/maxcompute/schema/partition/PartitioningStrategyFactoryTest.java b/src/test/java/com/gotocompany/depot/maxcompute/schema/partition/PartitioningStrategyFactoryTest.java new file mode 100644 index 00000000..d71c1626 --- /dev/null +++ b/src/test/java/com/gotocompany/depot/maxcompute/schema/partition/PartitioningStrategyFactoryTest.java @@ -0,0 +1,78 @@ +package com.gotocompany.depot.maxcompute.schema.partition; + +import com.google.protobuf.Descriptors; +import com.gotocompany.depot.TestMaxComputePartition; +import com.gotocompany.depot.config.MaxComputeSinkConfig; +import com.gotocompany.depot.maxcompute.converter.ConverterOrchestrator; +import org.junit.Assert; +import org.junit.Test; +import org.mockito.Mockito; + +public class PartitioningStrategyFactoryTest { + + private final Descriptors.Descriptor descriptor = TestMaxComputePartition.MaxComputePartition.getDescriptor(); + + @Test + public void shouldReturnDefaultPartitionStrategy() { + String stringFieldName = "string_field"; + MaxComputeSinkConfig maxComputeSinkConfig = Mockito.mock(MaxComputeSinkConfig.class); + Mockito.when(maxComputeSinkConfig.isTablePartitioningEnabled()).thenReturn(true); + Mockito.when(maxComputeSinkConfig.getTablePartitionKey()).thenReturn(stringFieldName); + Mockito.when(maxComputeSinkConfig.getTablePartitionColumnName()).thenReturn(stringFieldName); + PartitioningStrategyFactory partitioningStrategyFactory = new PartitioningStrategyFactory(new ConverterOrchestrator(), maxComputeSinkConfig); + + PartitioningStrategy partitioningStrategy = partitioningStrategyFactory.createPartitioningStrategy(descriptor); + + Assert.assertTrue(partitioningStrategy instanceof DefaultPartitioningStrategy); + } + + @Test + public void shouldReturnTimestampPartitionStrategy() { + String timestampFieldName = "timestamp_field"; + MaxComputeSinkConfig maxComputeSinkConfig = Mockito.mock(MaxComputeSinkConfig.class); + Mockito.when(maxComputeSinkConfig.isTablePartitioningEnabled()).thenReturn(true); + Mockito.when(maxComputeSinkConfig.getTablePartitionKey()).thenReturn(timestampFieldName); + Mockito.when(maxComputeSinkConfig.getTablePartitionColumnName()).thenReturn(timestampFieldName); + PartitioningStrategyFactory partitioningStrategyFactory = new PartitioningStrategyFactory(new ConverterOrchestrator(), maxComputeSinkConfig); + + PartitioningStrategy partitioningStrategy = partitioningStrategyFactory.createPartitioningStrategy(descriptor); + + Assert.assertTrue(partitioningStrategy instanceof TimestampPartitioningStrategy); + } + + @Test + public void shouldReturnNull() { + MaxComputeSinkConfig maxComputeSinkConfig = Mockito.mock(MaxComputeSinkConfig.class); + Mockito.when(maxComputeSinkConfig.isTablePartitioningEnabled()).thenReturn(Boolean.FALSE); + PartitioningStrategyFactory partitioningStrategyFactory = new PartitioningStrategyFactory(new ConverterOrchestrator(), maxComputeSinkConfig); + + PartitioningStrategy partitioningStrategy = partitioningStrategyFactory.createPartitioningStrategy(descriptor); + + Assert.assertNull(partitioningStrategy); + } + + @Test(expected = IllegalArgumentException.class) + public void shouldThrowIllegalArgumentExceptionWhenTypeInfoIsNotSupported() { + String unsupportedTypeFieldName = "float_field"; + MaxComputeSinkConfig maxComputeSinkConfig = Mockito.mock(MaxComputeSinkConfig.class); + Mockito.when(maxComputeSinkConfig.isTablePartitioningEnabled()).thenReturn(true); + Mockito.when(maxComputeSinkConfig.getTablePartitionKey()).thenReturn(unsupportedTypeFieldName); + Mockito.when(maxComputeSinkConfig.getTablePartitionColumnName()).thenReturn(unsupportedTypeFieldName); + PartitioningStrategyFactory partitioningStrategyFactory = new PartitioningStrategyFactory(new ConverterOrchestrator(), maxComputeSinkConfig); + + partitioningStrategyFactory.createPartitioningStrategy(descriptor); + } + + @Test(expected = IllegalArgumentException.class) + public void shouldThrowIllegalArgumentExceptionWhenFieldIsNotFoundInDescriptor() { + String fieldName = "non_existent_field"; + MaxComputeSinkConfig maxComputeSinkConfig = Mockito.mock(MaxComputeSinkConfig.class); + Mockito.when(maxComputeSinkConfig.isTablePartitioningEnabled()).thenReturn(true); + Mockito.when(maxComputeSinkConfig.getTablePartitionKey()).thenReturn(fieldName); + Mockito.when(maxComputeSinkConfig.getTablePartitionColumnName()).thenReturn(fieldName); + PartitioningStrategyFactory partitioningStrategyFactory = new PartitioningStrategyFactory(new ConverterOrchestrator(), maxComputeSinkConfig); + + partitioningStrategyFactory.createPartitioningStrategy(descriptor); + } + +} diff --git a/src/test/java/com/gotocompany/depot/maxcompute/util/MetadataUtilTest.java b/src/test/java/com/gotocompany/depot/maxcompute/util/MetadataUtilTest.java new file mode 100644 index 00000000..4f027081 --- /dev/null +++ b/src/test/java/com/gotocompany/depot/maxcompute/util/MetadataUtilTest.java @@ -0,0 +1,44 @@ +package com.gotocompany.depot.maxcompute.util; + +import com.aliyun.odps.type.StructTypeInfo; +import com.aliyun.odps.type.TypeInfoFactory; +import com.gotocompany.depot.common.TupleString; +import com.gotocompany.depot.config.MaxComputeSinkConfig; +import org.assertj.core.api.Assertions; +import org.junit.Test; +import org.mockito.Mockito; + +import java.util.Arrays; + +public class MetadataUtilTest { + + @Test + public void shouldReturnAppropriateStructTypeInfoForNamespacedMetadata() { + MaxComputeSinkConfig maxComputeSinkConfig = Mockito.mock(MaxComputeSinkConfig.class); + Mockito.when(maxComputeSinkConfig.getMetadataColumnsTypes()).thenReturn( + Arrays.asList(new TupleString("__message_timestamp", "timestamp"), + new TupleString("__kafka_topic", "string"), + new TupleString("__kafka_offset", "long") + ) + ); + + StructTypeInfo structTypeInfo = MetadataUtil.getMetadataTypeInfo(maxComputeSinkConfig); + + Assertions.assertThat(structTypeInfo.getFieldNames()).containsExactlyInAnyOrder("__message_timestamp", "__kafka_topic", "__kafka_offset"); + Assertions.assertThat(structTypeInfo.getFieldTypeInfos()).containsExactlyInAnyOrder( + TypeInfoFactory.TIMESTAMP, TypeInfoFactory.STRING, TypeInfoFactory.BIGINT + ); + } + + @Test + public void shouldReturnAppropriateTypeInfoForMetadataType() { + Assertions.assertThat(MetadataUtil.getMetadataTypeInfo("integer")).isEqualTo(TypeInfoFactory.INT); + Assertions.assertThat(MetadataUtil.getMetadataTypeInfo("long")).isEqualTo(TypeInfoFactory.BIGINT); + Assertions.assertThat(MetadataUtil.getMetadataTypeInfo("float")).isEqualTo(TypeInfoFactory.FLOAT); + Assertions.assertThat(MetadataUtil.getMetadataTypeInfo("double")).isEqualTo(TypeInfoFactory.DOUBLE); + Assertions.assertThat(MetadataUtil.getMetadataTypeInfo("string")).isEqualTo(TypeInfoFactory.STRING); + Assertions.assertThat(MetadataUtil.getMetadataTypeInfo("boolean")).isEqualTo(TypeInfoFactory.BOOLEAN); + Assertions.assertThat(MetadataUtil.getMetadataTypeInfo("timestamp")).isEqualTo(TypeInfoFactory.TIMESTAMP); + } + +} diff --git a/src/test/proto/TestMaxComputePartition.proto b/src/test/proto/TestMaxComputePartition.proto new file mode 100644 index 00000000..805ce5cb --- /dev/null +++ b/src/test/proto/TestMaxComputePartition.proto @@ -0,0 +1,13 @@ +syntax = "proto3"; + +import "google/protobuf/timestamp.proto"; + +package com.gotocompany.depot; + +message MaxComputePartition { + string string_field = 1; + int32 int32_field = 2; + int64 int64_field = 3; + google.protobuf.Timestamp timestamp_field = 4; + float float_field = 5; +} \ No newline at end of file diff --git a/src/test/proto/TestMaxComputeRecord.proto b/src/test/proto/TestMaxComputeRecord.proto new file mode 100644 index 00000000..93e8a30f --- /dev/null +++ b/src/test/proto/TestMaxComputeRecord.proto @@ -0,0 +1,16 @@ +syntax = "proto3"; + +import "google/protobuf/timestamp.proto"; + +package com.gotocompany.depot; + +message MaxComputeRecord { + string id = 1; + repeated InnerRecord inner_record = 2; + google.protobuf.Timestamp timestamp = 3; +} + +message InnerRecord { + string name = 1; + float balance = 2; +} \ No newline at end of file diff --git a/src/test/proto/TestMaxComputeTypeInfo.proto b/src/test/proto/TestMaxComputeTypeInfo.proto index fa3e28d0..00eab5b2 100644 --- a/src/test/proto/TestMaxComputeTypeInfo.proto +++ b/src/test/proto/TestMaxComputeTypeInfo.proto @@ -26,6 +26,25 @@ message TestFields { sint64 sint64_field = 16; } +message TestFieldsRepeated { + repeated bytes bytes_fields = 1; + repeated string string_fields = 2; + repeated TestEnum enum_fields = 3; + repeated double double_fields = 4; + repeated float float_fields = 5; + repeated bool bool_fields = 6; + repeated int64 int64_fields = 7; + repeated uint64 uint64_fields = 8; + repeated int32 int32_fields = 9; + repeated uint32 uint32_fields = 10; + repeated fixed64 fixed64_fields = 11; + repeated fixed32 fixed32_fields = 12; + repeated sfixed32 sfixed32_fields = 13; + repeated sfixed64 sfixed64_fields = 14; + repeated sint32 sint32_fields = 15; + repeated sint64 sint64_fields = 16; +} + message TestRoot { string string_field = 1; TestInner inner_field = 2; @@ -36,6 +55,16 @@ message TestRoot { google.protobuf.Empty empty_field = 7; } +message TestRootRepeated { + repeated string string_fields = 1; + repeated TestInner inner_fields = 2; + repeated TestInner inner_list_fields = 3; + repeated google.protobuf.Timestamp timestamp_fields = 4; + repeated google.protobuf.Struct struct_fields = 5; + repeated google.protobuf.Duration duration_fields = 6; + repeated google.protobuf.Empty empty_fields = 7; +} + message TestInner { string string_field = 1; TestAnotherInner another_inner_field = 2; @@ -46,7 +75,27 @@ message TestAnotherInner { string string_field = 1; } +message TestBuyerWrapper { + TestBuyer buyer = 1; +} + +message TestBuyer { + string name = 1; + TestCart cart = 2; + google.protobuf.Timestamp created_at = 3; +} +message TestCart { + string cart_id = 1; + repeated TestItem items = 2; + google.protobuf.Timestamp created_at = 3; + google.protobuf.Duration cart_age = 4; +} + +message TestItem { + string id = 1; + int32 quantity = 2; +} enum TestEnum { TEST_1 = 0; TEST_2 = 1; diff --git a/src/test/proto/TextMaxComputeTable.proto b/src/test/proto/TextMaxComputeTable.proto new file mode 100644 index 00000000..6db1cd72 --- /dev/null +++ b/src/test/proto/TextMaxComputeTable.proto @@ -0,0 +1,26 @@ +syntax = "proto3"; + +import "google/protobuf/timestamp.proto"; + +package com.gotocompany.depot; + +message Table { + string id = 1; + User user = 2; + repeated Item items = 3; + google.protobuf.Timestamp event_timestamp = 4; +} + +message User { + string id = 1; + repeated Contact contacts = 2; +} + +message Contact { + string number = 1; +} + +message Item { + string id = 1; + string name = 2; +} \ No newline at end of file