From f4ad58881200bcae06eafa41c46f42bdf542cb99 Mon Sep 17 00:00:00 2001 From: Johnny Schmidt Date: Wed, 20 Nov 2024 16:55:07 -0800 Subject: [PATCH] Bulk Load CDK: S3V2: Spec Aligns Perfectly with V1 (#48584) --- .../load/command/DestinationConfiguration.kt | 3 + .../command/aws/AWSAccessKeySpecification.kt | 10 ++- .../command/aws/AWSArnRoleSpecification.kt | 4 - .../ObjectStorageCompressionSpecification.kt | 4 +- .../ObjectStorageFormatSpecification.kt | 49 ++++++----- .../load/command/s3/S3BucketSpecification.kt | 79 +++++++++-------- .../load/command/s3/S3PathSpecification.kt | 2 +- .../connectors/destination-s3-v2/build.gradle | 7 ++ .../destination-s3-v2/metadata.yaml | 2 +- .../src/main/kotlin/S3V2Specification.kt | 43 +++++++++- .../resources/expected-spec-cloud.json | 84 ++++++++++++------- .../resources/expected-spec-oss.json | 84 ++++++++++++------- 12 files changed, 239 insertions(+), 132 deletions(-) diff --git a/airbyte-cdk/bulk/core/load/src/main/kotlin/io/airbyte/cdk/load/command/DestinationConfiguration.kt b/airbyte-cdk/bulk/core/load/src/main/kotlin/io/airbyte/cdk/load/command/DestinationConfiguration.kt index 34be7c37d5b7..d8b64143a8d5 100644 --- a/airbyte-cdk/bulk/core/load/src/main/kotlin/io/airbyte/cdk/load/command/DestinationConfiguration.kt +++ b/airbyte-cdk/bulk/core/load/src/main/kotlin/io/airbyte/cdk/load/command/DestinationConfiguration.kt @@ -20,6 +20,9 @@ import java.nio.file.Path * * - Add any required custom fields to the spec w/ jackson annotations * + * - Add annotation overrides (note that this will replace the original annotation, so to extend an + * existing annotation, you must copy the original annotation and add the new fields). + * * - Create a class `{MyDestination}Configuration` extending [DestinationConfiguration] * * - Add the corresponding mixin `...ConfigurationProvider`s for any added spec mixins diff --git a/airbyte-cdk/bulk/toolkits/load-aws/src/main/kotlin/io/airbyte/cdk/load/command/aws/AWSAccessKeySpecification.kt b/airbyte-cdk/bulk/toolkits/load-aws/src/main/kotlin/io/airbyte/cdk/load/command/aws/AWSAccessKeySpecification.kt index 524899919643..f599a8806487 100644 --- a/airbyte-cdk/bulk/toolkits/load-aws/src/main/kotlin/io/airbyte/cdk/load/command/aws/AWSAccessKeySpecification.kt +++ b/airbyte-cdk/bulk/toolkits/load-aws/src/main/kotlin/io/airbyte/cdk/load/command/aws/AWSAccessKeySpecification.kt @@ -21,7 +21,10 @@ interface AWSAccessKeySpecification { "The access key ID to access the S3 bucket. Airbyte requires Read and Write permissions to the given bucket. Read more here." ) @get:JsonProperty("access_key_id") - @get:JsonSchemaInject(json = """{"examples":["A012345678910EXAMPLE"]}""") + @get:JsonSchemaInject( + json = + """{"examples":["A012345678910EXAMPLE"],"airbyte_secret": true,"always_show": true}""" + ) val accessKeyId: String? @get:JsonSchemaTitle("S3 Access Key") @@ -29,7 +32,10 @@ interface AWSAccessKeySpecification { "The corresponding secret to the access key ID. Read more here" ) @get:JsonProperty("secret_access_key") - @get:JsonSchemaInject(json = """{"examples":["a012345678910ABCDEFGH/AbCdEfGhEXAMPLEKEY"]}""") + @get:JsonSchemaInject( + json = + """{"examples":["a012345678910ABCDEFGH/AbCdEfGhEXAMPLEKEY"],"airbyte_secret": true,"always_show": true}""" + ) val secretAccessKey: String? fun toAWSAccessKeyConfiguration(): AWSAccessKeyConfiguration { diff --git a/airbyte-cdk/bulk/toolkits/load-aws/src/main/kotlin/io/airbyte/cdk/load/command/aws/AWSArnRoleSpecification.kt b/airbyte-cdk/bulk/toolkits/load-aws/src/main/kotlin/io/airbyte/cdk/load/command/aws/AWSArnRoleSpecification.kt index 51689a2f5825..7c48b7997672 100644 --- a/airbyte-cdk/bulk/toolkits/load-aws/src/main/kotlin/io/airbyte/cdk/load/command/aws/AWSArnRoleSpecification.kt +++ b/airbyte-cdk/bulk/toolkits/load-aws/src/main/kotlin/io/airbyte/cdk/load/command/aws/AWSArnRoleSpecification.kt @@ -6,16 +6,12 @@ package io.airbyte.cdk.load.command.aws import com.fasterxml.jackson.annotation.JsonProperty import com.fasterxml.jackson.annotation.JsonPropertyDescription -import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaInject import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle interface AWSArnRoleSpecification { @get:JsonSchemaTitle("Role ARN") @get:JsonPropertyDescription("The Role ARN.") @get:JsonProperty("role_arn") - @get:JsonSchemaInject( - json = """{"examples":["arn:aws:iam::123456789:role/ExternalIdIsYourWorkspaceId"]}""" - ) val roleArn: String? fun toAWSArnRoleConfiguration(): AWSArnRoleConfiguration { diff --git a/airbyte-cdk/bulk/toolkits/load-object-storage/src/main/kotlin/io/airbyte/cdk/load/command/object_storage/ObjectStorageCompressionSpecification.kt b/airbyte-cdk/bulk/toolkits/load-object-storage/src/main/kotlin/io/airbyte/cdk/load/command/object_storage/ObjectStorageCompressionSpecification.kt index e1a5e6c64f52..7f5761174f4f 100644 --- a/airbyte-cdk/bulk/toolkits/load-object-storage/src/main/kotlin/io/airbyte/cdk/load/command/object_storage/ObjectStorageCompressionSpecification.kt +++ b/airbyte-cdk/bulk/toolkits/load-object-storage/src/main/kotlin/io/airbyte/cdk/load/command/object_storage/ObjectStorageCompressionSpecification.kt @@ -32,10 +32,10 @@ interface ObjectStorageCompressionSpecificationProvider { "Whether the output files should be compressed. If compression is selected, the output filename will have an extra extension (GZIP: \".jsonl.gz\").", ) @get:JsonProperty("compression") - val compression: ObjectStorageCompressionSpecification + val compression: ObjectStorageCompressionSpecification? fun toCompressionConfiguration(): ObjectStorageCompressionConfiguration<*> { - return when (compression) { + return when (compression ?: NoCompressionSpecification()) { is NoCompressionSpecification -> ObjectStorageCompressionConfiguration(NoopProcessor) is GZIPCompressionSpecification -> ObjectStorageCompressionConfiguration(GZIPProcessor) } diff --git a/airbyte-cdk/bulk/toolkits/load-object-storage/src/main/kotlin/io/airbyte/cdk/load/command/object_storage/ObjectStorageFormatSpecification.kt b/airbyte-cdk/bulk/toolkits/load-object-storage/src/main/kotlin/io/airbyte/cdk/load/command/object_storage/ObjectStorageFormatSpecification.kt index 0833649e91e2..852818140589 100644 --- a/airbyte-cdk/bulk/toolkits/load-object-storage/src/main/kotlin/io/airbyte/cdk/load/command/object_storage/ObjectStorageFormatSpecification.kt +++ b/airbyte-cdk/bulk/toolkits/load-object-storage/src/main/kotlin/io/airbyte/cdk/load/command/object_storage/ObjectStorageFormatSpecification.kt @@ -9,6 +9,7 @@ import com.fasterxml.jackson.annotation.JsonPropertyDescription import com.fasterxml.jackson.annotation.JsonSubTypes import com.fasterxml.jackson.annotation.JsonTypeInfo import com.fasterxml.jackson.annotation.JsonValue +import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaInject import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle import io.airbyte.cdk.load.command.avro.AvroCompressionConfiguration import io.airbyte.cdk.load.command.avro.AvroCompressionConfigurationProvider @@ -89,8 +90,8 @@ interface ObjectStorageFormatSpecificationProvider { property = "format_type" ) @JsonSubTypes( - JsonSubTypes.Type(value = JsonFormatSpecification::class, name = "JSONL"), JsonSubTypes.Type(value = CSVFormatSpecification::class, name = "CSV"), + JsonSubTypes.Type(value = JsonFormatSpecification::class, name = "JSONL"), JsonSubTypes.Type(value = AvroFormatSpecification::class, name = "Avro"), JsonSubTypes.Type(value = ParquetFormatSpecification::class, name = "Parquet") ) @@ -98,8 +99,8 @@ sealed class ObjectStorageFormatSpecification( @JsonSchemaTitle("Format Type") open val formatType: Type ) { enum class Type(@get:JsonValue val typeName: String) { - JSONL("JSONL"), CSV("CSV"), + JSONL("JSONL"), AVRO("Avro"), PARQUET("Parquet") } @@ -109,39 +110,39 @@ interface FlatteningSpecificationProvider { @get:JsonSchemaTitle("Flattening") @get:JsonProperty("flattening") val flattening: Flattening? enum class Flattening(@get:JsonValue val flatteningName: String) { - NO_FLATTENING("No Flattening"), + NO_FLATTENING("No flattening"), ROOT_LEVEL_FLATTENING("Root level flattening") } } -/** JSONL */ -@JsonSchemaTitle("JSON Lines: Newline-delimited JSON") -class JsonFormatSpecification( +/** CSV */ +@JsonSchemaTitle("CSV: Comma-Separated Values") +class CSVFormatSpecification( @JsonSchemaTitle("Format Type") @JsonProperty("format_type") - override val formatType: Type = Type.JSONL + override val formatType: Type = Type.CSV ) : ObjectStorageFormatSpecification(formatType), - ObjectStorageCompressionSpecificationProvider, - FlatteningSpecificationProvider { - override val compression: ObjectStorageCompressionSpecification = NoCompressionSpecification() - override val flattening: FlatteningSpecificationProvider.Flattening? = + FlatteningSpecificationProvider, + ObjectStorageCompressionSpecificationProvider { + override val flattening: FlatteningSpecificationProvider.Flattening = FlatteningSpecificationProvider.Flattening.NO_FLATTENING + override val compression: ObjectStorageCompressionSpecification? = NoCompressionSpecification() } -/** CSV */ -@JsonSchemaTitle("CSV: Comma-Separated Values") -class CSVFormatSpecification( +/** JSONL */ +@JsonSchemaTitle("JSON Lines: Newline-delimited JSON") +class JsonFormatSpecification( @JsonSchemaTitle("Format Type") @JsonProperty("format_type") - override val formatType: Type = Type.CSV + override val formatType: Type = Type.JSONL ) : ObjectStorageFormatSpecification(formatType), - ObjectStorageCompressionSpecificationProvider, - FlatteningSpecificationProvider { - override val compression: ObjectStorageCompressionSpecification = NoCompressionSpecification() + FlatteningSpecificationProvider, + ObjectStorageCompressionSpecificationProvider { override val flattening: FlatteningSpecificationProvider.Flattening? = FlatteningSpecificationProvider.Flattening.NO_FLATTENING + override val compression: ObjectStorageCompressionSpecification? = NoCompressionSpecification() } /** AVRO */ @@ -149,6 +150,7 @@ class CSVFormatSpecification( class AvroFormatSpecification( @JsonSchemaTitle("Format Type") @JsonProperty("format_type") + @JsonSchemaInject(json = """{"order":0}""") override val formatType: Type = Type.AVRO ) : ObjectStorageFormatSpecification(formatType) { @@ -157,6 +159,7 @@ class AvroFormatSpecification( "The compression algorithm used to compress data. Default to no compression." ) @JsonProperty("compression_codec") + @JsonSchemaInject(json = """{"order":1}""") val compressionCodec: AvroFormatCompressionCodecSpecification = AvroFormatNoCompressionCodecSpecification() } @@ -180,7 +183,7 @@ class ParquetFormatSpecification( @JsonSchemaTitle("Compression Codec") @JsonPropertyDescription("The compression algorithm used to compress data pages.") - @JsonProperty("compression_codec") + @JsonProperty("compression_codec", defaultValue = "UNCOMPRESSED") val compressionCodec: ParquetFormatCompressionCodec? = ParquetFormatCompressionCodec.UNCOMPRESSED @@ -188,28 +191,28 @@ class ParquetFormatSpecification( @JsonPropertyDescription( "This is the size of a row group being buffered in memory. It limits the memory usage when writing. Larger values will improve the IO when reading, but consume more memory when writing. Default: 128 MB." ) - @JsonProperty("block_size_mb") + @JsonProperty("block_size_mb", defaultValue = "128") val blockSizeMb: Int? = 128 @JsonSchemaTitle("Max Padding Size (MB)") @JsonPropertyDescription( "Maximum size allowed as padding to align row groups. This is also the minimum size of a row group. Default: 8 MB." ) - @JsonProperty("max_padding_size_mb") + @JsonProperty("max_padding_size_mb", defaultValue = "8") val maxPaddingSizeMb: Int? = 8 @JsonSchemaTitle("Page Size (KB)") @JsonPropertyDescription( "The page size is for compression. A block is composed of pages. A page is the smallest unit that must be read fully to access a single record. If this value is too small, the compression will deteriorate. Default: 1024 KB." ) - @JsonProperty("page_size_kb") + @JsonProperty("page_size_kb", defaultValue = "1024") val pageSizeKb: Int? = 1024 @JsonSchemaTitle("Dictionary Page Size (KB)") @JsonPropertyDescription( "There is one dictionary page per column per row group when dictionary encoding is used. The dictionary page size works like the page size but for dictionary. Default: 1024 KB." ) - @JsonProperty("dictionary_page_size_kb") + @JsonProperty("dictionary_page_size_kb", defaultValue = "1024") val dictionaryPageSizeKb: Int? = 1024 @JsonSchemaTitle("Dictionary Encoding") diff --git a/airbyte-cdk/bulk/toolkits/load-s3/src/main/kotlin/io/airbyte/cdk/load/command/s3/S3BucketSpecification.kt b/airbyte-cdk/bulk/toolkits/load-s3/src/main/kotlin/io/airbyte/cdk/load/command/s3/S3BucketSpecification.kt index 8b537653175a..033bd92d8392 100644 --- a/airbyte-cdk/bulk/toolkits/load-s3/src/main/kotlin/io/airbyte/cdk/load/command/s3/S3BucketSpecification.kt +++ b/airbyte-cdk/bulk/toolkits/load-s3/src/main/kotlin/io/airbyte/cdk/load/command/s3/S3BucketSpecification.kt @@ -6,43 +6,46 @@ package io.airbyte.cdk.load.command.s3 import com.fasterxml.jackson.annotation.JsonProperty import com.fasterxml.jackson.annotation.JsonPropertyDescription +import com.fasterxml.jackson.annotation.JsonValue import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaInject import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle -enum class S3BucketRegion { - `af-south-1`, - `ap-east-1`, - `ap-northeast-1`, - `ap-northeast-2`, - `ap-northeast-3`, - `ap-south-1`, - `ap-south-2`, - `ap-southeast-1`, - `ap-southeast-2`, - `ap-southeast-3`, - `ap-southeast-4`, - `ca-central-1`, - `ca-west-1`, - `cn-north-1`, - `cn-northwest-1`, - `eu-central-1`, - `eu-central-2`, - `eu-north-1`, - `eu-south-1`, - `eu-south-2`, - `eu-west-1`, - `eu-west-2`, - `eu-west-3`, - `il-central-1`, - `me-central-1`, - `me-south-1`, - `sa-east-1`, - `us-east-1`, - `us-east-2`, - `us-gov-east-1`, - `us-gov-west-1`, - `us-west-1`, - `us-west-2` +// First region is a kotlin-legal empty string +enum class S3BucketRegion(@get:JsonValue val region: String) { + NO_REGION(""), + `af-south-1`("af-south-1"), + `ap-east-1`("ap-east-1"), + `ap-northeast-1`("ap-northeast-1"), + `ap-northeast-2`("ap-northeast-2"), + `ap-northeast-3`("ap-northeast-3"), + `ap-south-1`("ap-south-1"), + `ap-south-2`("ap-south-2"), + `ap-southeast-1`("ap-southeast-1"), + `ap-southeast-2`("ap-southeast-2"), + `ap-southeast-3`("ap-southeast-3"), + `ap-southeast-4`("ap-southeast-4"), + `ca-central-1`("ca-central-1"), + `ca-west-1`("ca-west-1"), + `cn-north-1`("cn-north-1"), + `cn-northwest-1`("cn-northwest-1"), + `eu-central-1`("eu-central-1"), + `eu-central-2`("eu-central-2"), + `eu-north-1`("eu-north-1"), + `eu-south-1`("eu-south-1"), + `eu-south-2`("eu-south-2"), + `eu-west-1`("eu-west-1"), + `eu-west-2`("eu-west-2"), + `eu-west-3`("eu-west-3"), + `il-central-1`("il-central-1"), + `me-central-1`("me-central-1"), + `me-south-1`("me-south-1"), + `sa-east-1`("sa-east-1"), + `us-east-1`("us-east-1"), + `us-east-2`("us-east-2"), + `us-gov-east-1`("us-gov-east-1"), + `us-gov-west-1`("us-gov-west-1"), + `us-west-1`("us-west-1"), + `us-west-2`("us-west-2") } /** @@ -66,7 +69,7 @@ interface S3BucketSpecification { ) @get:JsonProperty("s3_bucket_region", defaultValue = "") @get:JsonSchemaInject(json = """{"examples":["us-east-1"]}""") - val s3BucketRegion: S3BucketRegion + val s3BucketRegion: S3BucketRegion? @get:JsonSchemaTitle("S3 Endpoint") @get:JsonPropertyDescription( @@ -77,7 +80,11 @@ interface S3BucketSpecification { val s3Endpoint: String? fun toS3BucketConfiguration(): S3BucketConfiguration { - return S3BucketConfiguration(s3BucketName, s3BucketRegion, s3Endpoint) + return S3BucketConfiguration( + s3BucketName, + s3BucketRegion ?: S3BucketRegion.NO_REGION, + s3Endpoint + ) } } diff --git a/airbyte-cdk/bulk/toolkits/load-s3/src/main/kotlin/io/airbyte/cdk/load/command/s3/S3PathSpecification.kt b/airbyte-cdk/bulk/toolkits/load-s3/src/main/kotlin/io/airbyte/cdk/load/command/s3/S3PathSpecification.kt index ba973732a375..1c99bd9caa1d 100644 --- a/airbyte-cdk/bulk/toolkits/load-s3/src/main/kotlin/io/airbyte/cdk/load/command/s3/S3PathSpecification.kt +++ b/airbyte-cdk/bulk/toolkits/load-s3/src/main/kotlin/io/airbyte/cdk/load/command/s3/S3PathSpecification.kt @@ -61,7 +61,7 @@ interface S3PathSpecification { @get:JsonPropertyDescription( "Path to use when staging data in the bucket directory. Airbyte will stage data here during sync and/or write small manifest/recovery files." ) - @get:JsonProperty("s3_staging_prefix", defaultValue = "{s3_bucket_path}/__airbyte_tmp") + @get:JsonProperty("s3_staging_prefix") @get:JsonSchemaInject(json = """{"examples":["__staging/data_sync/test"]}""") val s3StagingPrefix: String? diff --git a/airbyte-integrations/connectors/destination-s3-v2/build.gradle b/airbyte-integrations/connectors/destination-s3-v2/build.gradle index 82be645fb445..14abdd09d0ff 100644 --- a/airbyte-integrations/connectors/destination-s3-v2/build.gradle +++ b/airbyte-integrations/connectors/destination-s3-v2/build.gradle @@ -29,3 +29,10 @@ dependencies { integrationTestLegacyImplementation testFixtures(project(":airbyte-cdk:java:airbyte-cdk:airbyte-cdk-s3-destinations")) // integrationTestLegacyImplementation testFixtures("io.airbyte.cdk:airbyte-cdk-db-destinations:0.47.0") } + +// Exclude conflicting log4j-over-slf4j dependency +configurations { + all { + exclude group: 'log4j-over-slf4j', module: 'log4j-over-slf4j' + } +} diff --git a/airbyte-integrations/connectors/destination-s3-v2/metadata.yaml b/airbyte-integrations/connectors/destination-s3-v2/metadata.yaml index bdcf01acb8c6..95ac5b49550c 100644 --- a/airbyte-integrations/connectors/destination-s3-v2/metadata.yaml +++ b/airbyte-integrations/connectors/destination-s3-v2/metadata.yaml @@ -2,7 +2,7 @@ data: connectorSubtype: file connectorType: destination definitionId: d6116991-e809-4c7c-ae09-c64712df5b66 - dockerImageTag: 0.2.3 + dockerImageTag: 0.2.4 dockerRepository: airbyte/destination-s3-v2 githubIssueLabel: destination-s3-v2 icon: s3.svg diff --git a/airbyte-integrations/connectors/destination-s3-v2/src/main/kotlin/S3V2Specification.kt b/airbyte-integrations/connectors/destination-s3-v2/src/main/kotlin/S3V2Specification.kt index 191d85a75056..f283c71c61a4 100644 --- a/airbyte-integrations/connectors/destination-s3-v2/src/main/kotlin/S3V2Specification.kt +++ b/airbyte-integrations/connectors/destination-s3-v2/src/main/kotlin/S3V2Specification.kt @@ -5,6 +5,7 @@ package io.airbyte.integrations.destination.s3_v2 import com.fasterxml.jackson.annotation.JsonProperty +import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaInject import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle import io.airbyte.cdk.command.ConfigurationSpecification import io.airbyte.cdk.load.command.aws.AWSAccessKeySpecification @@ -22,6 +23,7 @@ import jakarta.inject.Singleton @Singleton @JsonSchemaTitle("S3 V2 Destination Spec") +@JsonSchemaInject() class S3V2Specification : ConfigurationSpecification(), AWSAccessKeySpecification, @@ -29,17 +31,56 @@ class S3V2Specification : S3BucketSpecification, S3PathSpecification, ObjectStorageFormatSpecificationProvider { + + @get:JsonSchemaInject( + json = + """{"examples":["A012345678910EXAMPLE"],"airbyte_secret": true,"always_show": true,"order":0}""" + ) override val accessKeyId: String? = null + + @get:JsonSchemaInject( + json = + """{"examples":["a012345678910ABCDEFGH/AbCdEfGhEXAMPLEKEY"],"airbyte_secret": true,"always_show": true,"order":1}""" + ) override val secretAccessKey: String? = null + + @get:JsonSchemaInject( + json = + """{"examples":["arn:aws:iam::123456789:role/ExternalIdIsYourWorkspaceId"],"order":2}""" + ) override val roleArn: String? = null + + @get:JsonSchemaInject(json = """{"examples":["airbyte_sync"],"order":3}""") override val s3BucketName: String = "" + + @get:JsonSchemaInject(json = """{"examples":["data_sync/test"],"order":4}""") override val s3BucketPath: String = "" - override val s3BucketRegion: S3BucketRegion = S3BucketRegion.`us-west-1` + + @get:JsonSchemaInject(json = """{"examples":["us-east-1"],"order":5,"default":""}""") + override val s3BucketRegion: S3BucketRegion = S3BucketRegion.NO_REGION + + @get:JsonSchemaInject(json = """{"order":6}""") override val format: ObjectStorageFormatSpecification = JsonFormatSpecification() + + @get:JsonSchemaInject(json = """{"examples":["http://localhost:9000"],"order":7}""") override val s3Endpoint: String? = null + + @get:JsonSchemaInject( + json = + "{\"examples\":[\"\${NAMESPACE}/\${STREAM_NAME}/\${YEAR}_\${MONTH}_\${DAY}_\${EPOCH}_\"],\"order\":8}" + ) override val s3PathFormat: String? = null + + @get:JsonSchemaInject( + json = + "{\"examples\":[\"{date}\",\"{date:yyyy_MM}\",\"{timestamp}\",\"{part_number}\",\"{sync_id}\"],\"order\":9}" + ) override val fileNamePattern: String? = null + + @get:JsonSchemaInject(json = """{"order":10}""") override val useStagingDirectory: Boolean? = null + + @get:JsonSchemaInject(json = """{"examples":["__staging/data_sync/test"],"order":11}""") override val s3StagingPrefix: String? = null @JsonProperty("max_concurrent_uploads") diff --git a/airbyte-integrations/connectors/destination-s3-v2/src/test-integration/resources/expected-spec-cloud.json b/airbyte-integrations/connectors/destination-s3-v2/src/test-integration/resources/expected-spec-cloud.json index 473bb1818854..5b9f1ba2c8e9 100644 --- a/airbyte-integrations/connectors/destination-s3-v2/src/test-integration/resources/expected-spec-cloud.json +++ b/airbyte-integrations/connectors/destination-s3-v2/src/test-integration/resources/expected-spec-cloud.json @@ -10,49 +10,65 @@ "type" : "string", "description" : "The access key ID to access the S3 bucket. Airbyte requires Read and Write permissions to the given bucket. Read more here.", "title" : "S3 Key ID", - "examples" : [ "A012345678910EXAMPLE" ] + "examples" : [ "A012345678910EXAMPLE" ], + "airbyte_secret" : true, + "always_show" : true, + "order" : 0 }, "secret_access_key" : { "type" : "string", "description" : "The corresponding secret to the access key ID. Read more here", "title" : "S3 Access Key", - "examples" : [ "a012345678910ABCDEFGH/AbCdEfGhEXAMPLEKEY" ] + "examples" : [ "a012345678910ABCDEFGH/AbCdEfGhEXAMPLEKEY" ], + "airbyte_secret" : true, + "always_show" : true, + "order" : 1 }, "role_arn" : { "type" : "string", "description" : "The Role ARN.", "title" : "Role ARN", - "examples" : [ "arn:aws:iam::123456789:role/ExternalIdIsYourWorkspaceId" ] + "examples" : [ "arn:aws:iam::123456789:role/ExternalIdIsYourWorkspaceId" ], + "order" : 2 }, "s3_bucket_name" : { "type" : "string", "description" : "The name of the S3 bucket. Read more here.", "title" : "S3 Bucket Name", - "examples" : [ "airbyte_sync" ] + "examples" : [ "airbyte_sync" ], + "order" : 3 }, "s3_bucket_path" : { "type" : "string", "description" : "Directory under the S3 bucket where data will be written. Read more here", "title" : "S3 Bucket Path", - "examples" : [ "data_sync/test" ] + "examples" : [ "data_sync/test" ], + "order" : 4 }, "s3_bucket_region" : { "type" : "string", - "enum" : [ "af-south-1", "ap-east-1", "ap-northeast-1", "ap-northeast-2", "ap-northeast-3", "ap-south-1", "ap-south-2", "ap-southeast-1", "ap-southeast-2", "ap-southeast-3", "ap-southeast-4", "ca-central-1", "ca-west-1", "cn-north-1", "cn-northwest-1", "eu-central-1", "eu-central-2", "eu-north-1", "eu-south-1", "eu-south-2", "eu-west-1", "eu-west-2", "eu-west-3", "il-central-1", "me-central-1", "me-south-1", "sa-east-1", "us-east-1", "us-east-2", "us-gov-east-1", "us-gov-west-1", "us-west-1", "us-west-2" ], + "enum" : [ "", "af-south-1", "ap-east-1", "ap-northeast-1", "ap-northeast-2", "ap-northeast-3", "ap-south-1", "ap-south-2", "ap-southeast-1", "ap-southeast-2", "ap-southeast-3", "ap-southeast-4", "ca-central-1", "ca-west-1", "cn-north-1", "cn-northwest-1", "eu-central-1", "eu-central-2", "eu-north-1", "eu-south-1", "eu-south-2", "eu-west-1", "eu-west-2", "eu-west-3", "il-central-1", "me-central-1", "me-south-1", "sa-east-1", "us-east-1", "us-east-2", "us-gov-east-1", "us-gov-west-1", "us-west-1", "us-west-2" ], "description" : "The region of the S3 bucket. See here for all region codes.", "title" : "S3 Bucket Region", - "examples" : [ "us-east-1" ] + "examples" : [ "us-east-1" ], + "order" : 5, + "default" : "" }, "format" : { "oneOf" : [ { - "title" : "JSON Lines: Newline-delimited JSON", + "title" : "CSV: Comma-Separated Values", "type" : "object", "additionalProperties" : true, "properties" : { "format_type" : { "type" : "string", - "enum" : [ "JSONL" ], - "default" : "JSONL" + "enum" : [ "CSV" ], + "default" : "CSV" + }, + "flattening" : { + "type" : "string", + "enum" : [ "No flattening", "Root level flattening" ], + "title" : "Flattening" }, "compression" : { "oneOf" : [ { @@ -83,23 +99,23 @@ "description" : "Whether the output files should be compressed. If compression is selected, the output filename will have an extra extension (GZIP: \".jsonl.gz\").", "title" : "Compression", "type" : "object" - }, - "flattening" : { - "type" : "string", - "enum" : [ "No Flattening", "Root level flattening" ], - "title" : "Flattening" } }, - "required" : [ "format_type", "compression" ] + "required" : [ "format_type", "flattening" ] }, { - "title" : "CSV: Comma-Separated Values", + "title" : "JSON Lines: Newline-delimited JSON", "type" : "object", "additionalProperties" : true, "properties" : { "format_type" : { "type" : "string", - "enum" : [ "CSV" ], - "default" : "CSV" + "enum" : [ "JSONL" ], + "default" : "JSONL" + }, + "flattening" : { + "type" : "string", + "enum" : [ "No flattening", "Root level flattening" ], + "title" : "Flattening" }, "compression" : { "oneOf" : [ { @@ -130,14 +146,9 @@ "description" : "Whether the output files should be compressed. If compression is selected, the output filename will have an extra extension (GZIP: \".jsonl.gz\").", "title" : "Compression", "type" : "object" - }, - "flattening" : { - "type" : "string", - "enum" : [ "No Flattening", "Root level flattening" ], - "title" : "Flattening" } }, - "required" : [ "format_type", "compression" ] + "required" : [ "format_type" ] }, { "title" : "Avro: Apache Avro", "type" : "object", @@ -240,6 +251,7 @@ } ], "description" : "The compression algorithm used to compress data. Default to no compression.", "title" : "Compression Codec", + "order" : 1, "type" : "object" } }, @@ -256,27 +268,32 @@ }, "compression_codec" : { "type" : "string", + "default" : "UNCOMPRESSED", "enum" : [ "UNCOMPRESSED", "SNAPPY", "GZIP", "LZO", "BROTLI", "LZ4", "ZSTD" ], "description" : "The compression algorithm used to compress data pages.", "title" : "Compression Codec" }, "block_size_mb" : { "type" : "integer", + "default" : 128, "description" : "This is the size of a row group being buffered in memory. It limits the memory usage when writing. Larger values will improve the IO when reading, but consume more memory when writing. Default: 128 MB.", "title" : "Block Size (Row Group Size) (MB)" }, "max_padding_size_mb" : { "type" : "integer", + "default" : 8, "description" : "Maximum size allowed as padding to align row groups. This is also the minimum size of a row group. Default: 8 MB.", "title" : "Max Padding Size (MB)" }, "page_size_kb" : { "type" : "integer", + "default" : 1024, "description" : "The page size is for compression. A block is composed of pages. A page is the smallest unit that must be read fully to access a single record. If this value is too small, the compression will deteriorate. Default: 1024 KB.", "title" : "Page Size (KB)" }, "dictionary_page_size_kb" : { "type" : "integer", + "default" : 1024, "description" : "There is one dictionary page per column per row group when dictionary encoding is used. The dictionary page size works like the page size but for dictionary. Default: 1024 KB.", "title" : "Dictionary Page Size (KB)" }, @@ -290,38 +307,43 @@ } ], "description" : "Format of the data output. See here for more details", "title" : "Output Format", + "order" : 6, "type" : "object" }, "s3_endpoint" : { "type" : "string", "description" : "Your S3 endpoint url. Read more here", "title" : "S3 Endpoint", - "examples" : [ "http://localhost:9000" ] + "examples" : [ "http://localhost:9000" ], + "order" : 7 }, "s3_path_format" : { "type" : "string", "description" : "Format string on how data will be organized inside the bucket directory. Read more here", "title" : "S3 Path Format", - "examples" : [ "${NAMESPACE}/${STREAM_NAME}/${YEAR}_${MONTH}_${DAY}_${EPOCH}_" ] + "examples" : [ "${NAMESPACE}/${STREAM_NAME}/${YEAR}_${MONTH}_${DAY}_${EPOCH}_" ], + "order" : 8 }, "file_name_pattern" : { "type" : "string", "description" : "Pattern to match file names in the bucket directory. Read more here", "title" : "File Name Pattern", - "examples" : [ "{date}", "{date:yyyy_MM}", "{timestamp}", "{part_number}", "{sync_id}" ] + "examples" : [ "{date}", "{date:yyyy_MM}", "{timestamp}", "{part_number}", "{sync_id}" ], + "order" : 9 }, "use_staging_directory" : { "type" : "boolean", "default" : false, "description" : "Whether to use a staging directory in the bucket based on the s3_staging_prefix. If this is not set, airbyte will maintain sync integrity by adding metadata to each object.", - "title" : "Use a Staging Directory" + "title" : "Use a Staging Directory", + "order" : 10 }, "s3_staging_prefix" : { "type" : "string", - "default" : "{s3_bucket_path}/__airbyte_tmp", "description" : "Path to use when staging data in the bucket directory. Airbyte will stage data here during sync and/or write small manifest/recovery files.", "title" : "S3 Staging Prefix", - "examples" : [ "__staging/data_sync/test" ] + "examples" : [ "__staging/data_sync/test" ], + "order" : 11 }, "max_concurrent_uploads" : { "type" : "integer" diff --git a/airbyte-integrations/connectors/destination-s3-v2/src/test-integration/resources/expected-spec-oss.json b/airbyte-integrations/connectors/destination-s3-v2/src/test-integration/resources/expected-spec-oss.json index 473bb1818854..5b9f1ba2c8e9 100644 --- a/airbyte-integrations/connectors/destination-s3-v2/src/test-integration/resources/expected-spec-oss.json +++ b/airbyte-integrations/connectors/destination-s3-v2/src/test-integration/resources/expected-spec-oss.json @@ -10,49 +10,65 @@ "type" : "string", "description" : "The access key ID to access the S3 bucket. Airbyte requires Read and Write permissions to the given bucket. Read more here.", "title" : "S3 Key ID", - "examples" : [ "A012345678910EXAMPLE" ] + "examples" : [ "A012345678910EXAMPLE" ], + "airbyte_secret" : true, + "always_show" : true, + "order" : 0 }, "secret_access_key" : { "type" : "string", "description" : "The corresponding secret to the access key ID. Read more here", "title" : "S3 Access Key", - "examples" : [ "a012345678910ABCDEFGH/AbCdEfGhEXAMPLEKEY" ] + "examples" : [ "a012345678910ABCDEFGH/AbCdEfGhEXAMPLEKEY" ], + "airbyte_secret" : true, + "always_show" : true, + "order" : 1 }, "role_arn" : { "type" : "string", "description" : "The Role ARN.", "title" : "Role ARN", - "examples" : [ "arn:aws:iam::123456789:role/ExternalIdIsYourWorkspaceId" ] + "examples" : [ "arn:aws:iam::123456789:role/ExternalIdIsYourWorkspaceId" ], + "order" : 2 }, "s3_bucket_name" : { "type" : "string", "description" : "The name of the S3 bucket. Read more here.", "title" : "S3 Bucket Name", - "examples" : [ "airbyte_sync" ] + "examples" : [ "airbyte_sync" ], + "order" : 3 }, "s3_bucket_path" : { "type" : "string", "description" : "Directory under the S3 bucket where data will be written. Read more here", "title" : "S3 Bucket Path", - "examples" : [ "data_sync/test" ] + "examples" : [ "data_sync/test" ], + "order" : 4 }, "s3_bucket_region" : { "type" : "string", - "enum" : [ "af-south-1", "ap-east-1", "ap-northeast-1", "ap-northeast-2", "ap-northeast-3", "ap-south-1", "ap-south-2", "ap-southeast-1", "ap-southeast-2", "ap-southeast-3", "ap-southeast-4", "ca-central-1", "ca-west-1", "cn-north-1", "cn-northwest-1", "eu-central-1", "eu-central-2", "eu-north-1", "eu-south-1", "eu-south-2", "eu-west-1", "eu-west-2", "eu-west-3", "il-central-1", "me-central-1", "me-south-1", "sa-east-1", "us-east-1", "us-east-2", "us-gov-east-1", "us-gov-west-1", "us-west-1", "us-west-2" ], + "enum" : [ "", "af-south-1", "ap-east-1", "ap-northeast-1", "ap-northeast-2", "ap-northeast-3", "ap-south-1", "ap-south-2", "ap-southeast-1", "ap-southeast-2", "ap-southeast-3", "ap-southeast-4", "ca-central-1", "ca-west-1", "cn-north-1", "cn-northwest-1", "eu-central-1", "eu-central-2", "eu-north-1", "eu-south-1", "eu-south-2", "eu-west-1", "eu-west-2", "eu-west-3", "il-central-1", "me-central-1", "me-south-1", "sa-east-1", "us-east-1", "us-east-2", "us-gov-east-1", "us-gov-west-1", "us-west-1", "us-west-2" ], "description" : "The region of the S3 bucket. See here for all region codes.", "title" : "S3 Bucket Region", - "examples" : [ "us-east-1" ] + "examples" : [ "us-east-1" ], + "order" : 5, + "default" : "" }, "format" : { "oneOf" : [ { - "title" : "JSON Lines: Newline-delimited JSON", + "title" : "CSV: Comma-Separated Values", "type" : "object", "additionalProperties" : true, "properties" : { "format_type" : { "type" : "string", - "enum" : [ "JSONL" ], - "default" : "JSONL" + "enum" : [ "CSV" ], + "default" : "CSV" + }, + "flattening" : { + "type" : "string", + "enum" : [ "No flattening", "Root level flattening" ], + "title" : "Flattening" }, "compression" : { "oneOf" : [ { @@ -83,23 +99,23 @@ "description" : "Whether the output files should be compressed. If compression is selected, the output filename will have an extra extension (GZIP: \".jsonl.gz\").", "title" : "Compression", "type" : "object" - }, - "flattening" : { - "type" : "string", - "enum" : [ "No Flattening", "Root level flattening" ], - "title" : "Flattening" } }, - "required" : [ "format_type", "compression" ] + "required" : [ "format_type", "flattening" ] }, { - "title" : "CSV: Comma-Separated Values", + "title" : "JSON Lines: Newline-delimited JSON", "type" : "object", "additionalProperties" : true, "properties" : { "format_type" : { "type" : "string", - "enum" : [ "CSV" ], - "default" : "CSV" + "enum" : [ "JSONL" ], + "default" : "JSONL" + }, + "flattening" : { + "type" : "string", + "enum" : [ "No flattening", "Root level flattening" ], + "title" : "Flattening" }, "compression" : { "oneOf" : [ { @@ -130,14 +146,9 @@ "description" : "Whether the output files should be compressed. If compression is selected, the output filename will have an extra extension (GZIP: \".jsonl.gz\").", "title" : "Compression", "type" : "object" - }, - "flattening" : { - "type" : "string", - "enum" : [ "No Flattening", "Root level flattening" ], - "title" : "Flattening" } }, - "required" : [ "format_type", "compression" ] + "required" : [ "format_type" ] }, { "title" : "Avro: Apache Avro", "type" : "object", @@ -240,6 +251,7 @@ } ], "description" : "The compression algorithm used to compress data. Default to no compression.", "title" : "Compression Codec", + "order" : 1, "type" : "object" } }, @@ -256,27 +268,32 @@ }, "compression_codec" : { "type" : "string", + "default" : "UNCOMPRESSED", "enum" : [ "UNCOMPRESSED", "SNAPPY", "GZIP", "LZO", "BROTLI", "LZ4", "ZSTD" ], "description" : "The compression algorithm used to compress data pages.", "title" : "Compression Codec" }, "block_size_mb" : { "type" : "integer", + "default" : 128, "description" : "This is the size of a row group being buffered in memory. It limits the memory usage when writing. Larger values will improve the IO when reading, but consume more memory when writing. Default: 128 MB.", "title" : "Block Size (Row Group Size) (MB)" }, "max_padding_size_mb" : { "type" : "integer", + "default" : 8, "description" : "Maximum size allowed as padding to align row groups. This is also the minimum size of a row group. Default: 8 MB.", "title" : "Max Padding Size (MB)" }, "page_size_kb" : { "type" : "integer", + "default" : 1024, "description" : "The page size is for compression. A block is composed of pages. A page is the smallest unit that must be read fully to access a single record. If this value is too small, the compression will deteriorate. Default: 1024 KB.", "title" : "Page Size (KB)" }, "dictionary_page_size_kb" : { "type" : "integer", + "default" : 1024, "description" : "There is one dictionary page per column per row group when dictionary encoding is used. The dictionary page size works like the page size but for dictionary. Default: 1024 KB.", "title" : "Dictionary Page Size (KB)" }, @@ -290,38 +307,43 @@ } ], "description" : "Format of the data output. See here for more details", "title" : "Output Format", + "order" : 6, "type" : "object" }, "s3_endpoint" : { "type" : "string", "description" : "Your S3 endpoint url. Read more here", "title" : "S3 Endpoint", - "examples" : [ "http://localhost:9000" ] + "examples" : [ "http://localhost:9000" ], + "order" : 7 }, "s3_path_format" : { "type" : "string", "description" : "Format string on how data will be organized inside the bucket directory. Read more here", "title" : "S3 Path Format", - "examples" : [ "${NAMESPACE}/${STREAM_NAME}/${YEAR}_${MONTH}_${DAY}_${EPOCH}_" ] + "examples" : [ "${NAMESPACE}/${STREAM_NAME}/${YEAR}_${MONTH}_${DAY}_${EPOCH}_" ], + "order" : 8 }, "file_name_pattern" : { "type" : "string", "description" : "Pattern to match file names in the bucket directory. Read more here", "title" : "File Name Pattern", - "examples" : [ "{date}", "{date:yyyy_MM}", "{timestamp}", "{part_number}", "{sync_id}" ] + "examples" : [ "{date}", "{date:yyyy_MM}", "{timestamp}", "{part_number}", "{sync_id}" ], + "order" : 9 }, "use_staging_directory" : { "type" : "boolean", "default" : false, "description" : "Whether to use a staging directory in the bucket based on the s3_staging_prefix. If this is not set, airbyte will maintain sync integrity by adding metadata to each object.", - "title" : "Use a Staging Directory" + "title" : "Use a Staging Directory", + "order" : 10 }, "s3_staging_prefix" : { "type" : "string", - "default" : "{s3_bucket_path}/__airbyte_tmp", "description" : "Path to use when staging data in the bucket directory. Airbyte will stage data here during sync and/or write small manifest/recovery files.", "title" : "S3 Staging Prefix", - "examples" : [ "__staging/data_sync/test" ] + "examples" : [ "__staging/data_sync/test" ], + "order" : 11 }, "max_concurrent_uploads" : { "type" : "integer"