From e6ac79839685a7445d5423ccff63d9b56334a402 Mon Sep 17 00:00:00 2001 From: spenes Date: Wed, 13 Nov 2024 15:08:35 +0300 Subject: [PATCH 1/4] Add slash at the end of the load path (#1366) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Shredder puts together entities with the same schema model-revision-addition in the same batch under same folder. Let’s say you have events with `1-0-0`, `1-0-1` and `1-0-2` version of the `com.acme.test` in the same batch. In that case, resulting run folder will have following subfolders: ``` output=good/vendor=com.acme/name=test/format=tsv/model=1/revision=0/addition=0 output=good/vendor=com.acme/name=test/format=tsv/model=1/revision=0/addition=1 output=good/vendor=com.acme/name=test/format=tsv/model=1/revision=0/addition=2 ``` Before the fix, Loader was using the s3 paths without slash (/) at the end in the created copy statements. This works fine in most cases. However, when same batch contains events with `1-0-1` and `1-0-11`, then problem starts. In that case, run folder will have following subfolders: ``` output=good/vendor=com.acme/name=test/format=tsv/model=1/revision=0/addition=1 output=good/vendor=com.acme/name=test/format=tsv/model=1/revision=0/addition=11 ``` When entities in the `/model=1/revision=0/addition=1` are tried to be copied to respective table with copy statement, Redshift tries to copy the entities under `/model=1/revision=0/addition=11` as well since they have same prefix and it gives error during the copy since data under `/model=1/revision=0/addition=11` doesn’t have same structure with `1-0-1`. Putting slash at the end of the path solved the problem. After that change, only entities under `model=1/revision=0/addition=1` are copied as expected. --- .../snowplow/rdbloader/discovery/ShreddedType.scala | 4 ++-- .../snowplow/loader/redshift/RedshiftSpec.scala | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/loader/src/main/scala/com/snowplowanalytics/snowplow/rdbloader/discovery/ShreddedType.scala b/modules/loader/src/main/scala/com/snowplowanalytics/snowplow/rdbloader/discovery/ShreddedType.scala index 733022e65..b3839bc41 100644 --- a/modules/loader/src/main/scala/com/snowplowanalytics/snowplow/rdbloader/discovery/ShreddedType.scala +++ b/modules/loader/src/main/scala/com/snowplowanalytics/snowplow/rdbloader/discovery/ShreddedType.scala @@ -64,7 +64,7 @@ object ShreddedType { */ final case class Json(info: Info, jsonPaths: BlobStorage.Key) extends ShreddedType { def getLoadPath: String = - s"${info.base}${Common.GoodPrefix}/vendor=${info.vendor}/name=${info.name}/format=json/model=${info.version.model}/revision=${info.version.revision}/addition=${info.version.addition}" + s"${info.base}${Common.GoodPrefix}/vendor=${info.vendor}/name=${info.name}/format=json/model=${info.version.model}/revision=${info.version.revision}/addition=${info.version.addition}/" def show: String = s"${info.toCriterion.asString} ($jsonPaths)" } @@ -78,7 +78,7 @@ object ShreddedType { */ final case class Tabular(info: Info) extends ShreddedType { def getLoadPath: String = - s"${info.base}${Common.GoodPrefix}/vendor=${info.vendor}/name=${info.name}/format=tsv/model=${info.version.model}/revision=${info.version.revision}/addition=${info.version.addition}" + s"${info.base}${Common.GoodPrefix}/vendor=${info.vendor}/name=${info.name}/format=tsv/model=${info.version.model}/revision=${info.version.revision}/addition=${info.version.addition}/" def show: String = s"${info.toCriterion.asString} TSV" } diff --git a/modules/redshift-loader/src/test/scala/com/snowplowanalytics/snowplow/loader/redshift/RedshiftSpec.scala b/modules/redshift-loader/src/test/scala/com/snowplowanalytics/snowplow/loader/redshift/RedshiftSpec.scala index 6b37db250..eb1094610 100644 --- a/modules/redshift-loader/src/test/scala/com/snowplowanalytics/snowplow/loader/redshift/RedshiftSpec.scala +++ b/modules/redshift-loader/src/test/scala/com/snowplowanalytics/snowplow/loader/redshift/RedshiftSpec.scala @@ -144,8 +144,8 @@ class RedshiftSpec extends Specification { result.toList must containTheSameElementsAs( List( "COPY events FROM s3://my-bucket/my-path/", // atomic - "COPY com_acme_event_2 FROM s3://my-bucket/my-path/output=good/vendor=com.acme/name=event/format=tsv/model=2/revision=0/addition=0", - "COPY com_acme_event_3 FROM s3://my-bucket/my-path/output=good/vendor=com.acme/name=event/format=tsv/model=3/revision=0/addition=0" + "COPY com_acme_event_2 FROM s3://my-bucket/my-path/output=good/vendor=com.acme/name=event/format=tsv/model=2/revision=0/addition=0/", + "COPY com_acme_event_3 FROM s3://my-bucket/my-path/output=good/vendor=com.acme/name=event/format=tsv/model=3/revision=0/addition=0/" ) ) } From 92db8599c2e768c01eb8a20adf7f79a3ee900d03 Mon Sep 17 00:00:00 2001 From: spenes Date: Thu, 21 Nov 2024 13:53:10 +0300 Subject: [PATCH 2/4] Bump Iglu Scala Client to 4.0.1 --- project/Dependencies.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/project/Dependencies.scala b/project/Dependencies.scala index 8b15f5433..9e6134e83 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -15,7 +15,7 @@ object Dependencies { object V { // Scala (Loader) val decline = "2.4.1" - val igluClient = "4.0.0" + val igluClient = "4.0.1" val igluCore = "1.1.1" val badrows = "2.2.0" val analyticsSdk = "3.1.0" From a1e782f13630a1d00608d671f32b8a1b163ae235 Mon Sep 17 00:00:00 2001 From: spenes Date: Thu, 21 Nov 2024 14:16:39 +0300 Subject: [PATCH 3/4] Upgrade dependencies to fix some vulnerabilites --- project/Dependencies.scala | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/project/Dependencies.scala b/project/Dependencies.scala index 9e6134e83..c06ef0727 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -64,7 +64,7 @@ object Dependencies { val slf4j = "2.0.3" val redshiftJdbc = "1.2.55.1083" - val snowflakeJdbc = "3.13.30" + val snowflakeJdbc = "3.20.0" val enumeratum = "1.7.0" val aws = "1.12.677" val aws2 = "2.23.17" @@ -77,6 +77,7 @@ object Dependencies { val jettison = "1.5.4" // Fix CVE val reactorNetty = "1.0.39" // Fix CVE val jettyHttp = "9.4.51.v20230217" // Fix CVE + val jettyServer = "9.4.56.v20240826" // Fix CVE val zookeeper = "3.9.2" // Fix CVE val dnsjava = "3.6.1" // Fix CVE @@ -187,6 +188,7 @@ object Dependencies { val jettison = "org.codehaus.jettison" % "jettison" % V.jettison val reactorNetty = "io.projectreactor.netty" % "reactor-netty-http" % V.reactorNetty val jettyHttp = "org.eclipse.jetty" % "jetty-http" % V.jettyHttp + val jettyServer = "org.eclipse.jetty" % "jetty-server" % V.jettyServer // Java (Shredder) val dynamodb = "com.amazonaws" % "aws-java-sdk-dynamodb" % V.aws @@ -261,6 +263,7 @@ object Dependencies { avro, reactorNetty, jettyHttp, + jettyServer, dnsjava ) From deca7147b1af087474ee4809ba72c8c0aeb56aff Mon Sep 17 00:00:00 2001 From: spenes Date: Thu, 21 Nov 2024 16:55:50 +0300 Subject: [PATCH 4/4] Prepare for 6.1.2 release --- CHANGELOG | 6 ++++++ README.md | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGELOG b/CHANGELOG index 2a47a9734..3bd271b5b 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,9 @@ +Version 6.1.2 (2024-11-21) +-------------------------- +Add slash at the end of the load path (#1366) +Bump Iglu Scala Client to 4.0.1 (#1367) +Upgrade dependencies to fix some vulnerabilities (#1367) + Version 6.1.1 (2024-10-09) -------------------------- Upgrade dependencies to fix some vulnerabilites (#1364) diff --git a/README.md b/README.md index ff8fccd9b..1d1585ce6 100644 --- a/README.md +++ b/README.md @@ -66,7 +66,7 @@ Licensed under the [Snowplow Limited Use License Agreement][license]. _(If you a [build-image]: https://github.com/snowplow/snowplow-rdb-loader/workflows/CI/badge.svg [build]: https://github.com/snowplow/snowplow-rdb-loader/actions/workflows/ci.yml -[release-image]: https://img.shields.io/badge/release-6.1.1-blue.svg?style=flat +[release-image]: https://img.shields.io/badge/release-6.1.2-blue.svg?style=flat [releases]: https://github.com/snowplow/snowplow-rdb-loader/releases [license]: https://docs.snowplow.io/limited-use-license-1.0