diff --git a/build.gradle b/build.gradle index bdd91140b..a95dd513b 100644 --- a/build.gradle +++ b/build.gradle @@ -33,7 +33,7 @@ lombok { } group 'com.gotocompany' -version '0.10.7' +version '0.11.1' def projName = "firehose" @@ -100,7 +100,7 @@ dependencies { implementation platform('com.google.cloud:libraries-bom:20.5.0') implementation 'com.google.cloud:google-cloud-storage:2.20.1' implementation 'org.apache.logging.log4j:log4j-core:2.20.0' - implementation group: 'com.gotocompany', name: 'depot', version: '0.9.2' + implementation group: 'com.gotocompany', name: 'depot', version: '0.10.1' implementation group: 'com.networknt', name: 'json-schema-validator', version: '1.0.59' exclude group: 'org.slf4j' implementation 'dev.cel:cel:0.5.2' diff --git a/docs/docs/sinks/maxcompute-sink.md b/docs/docs/sinks/maxcompute-sink.md new file mode 100644 index 000000000..46398f81c --- /dev/null +++ b/docs/docs/sinks/maxcompute-sink.md @@ -0,0 +1,54 @@ +# MaxCompute sink + +### Datatype Protobuf + +MaxCompute sink has several responsibilities, including : + +1. Creation of MaxCompute table if it does not exist. +2. Updating the MaxCompute table schema based on the latest protobuf schema. +3. Translating protobuf messages into MaxCompute compatible records and inserting them into MaxCompute tables. + +## MaxCompute Table Schema Update + +### Protobuf + +MaxCompute Sink update the MaxCompute table schema on separate table update operation. MaxCompute +utilise [Stencil](https://github.com/goto/stencil) to parse protobuf messages generate schema and update MaxCompute +tables with the latest schema. +The stencil client periodically reload the descriptor cache. Table schema update happened after the descriptor caches +uploaded. + +#### Supported Protobuf - MaxCompute Table Type Mapping + +| Protobuf Type | MaxCompute Type | +|------------------------------------------------------------------------------------|-----------------------------| +| bytes | BINARY | +| string | STRING | +| enum | STRING | +| float | FLOAT | +| double | DOUBLE | +| bool | BOOLEAN | +| int64, uint64, int32, uint32, fixed64, fixed32, sfixed64, sfixed32, sint64, sint32 | BIGINT | +| message | STRUCT | +| .google.protobuf.Timestamp | TIMESTAMP_NTZ | +| .google.protobuf.Struct | STRING (Json Serialised) | +| .google.protobuf.Duration | STRUCT | +| map | ARRAY