From 65722b22af9f28ebadc07a25c72f1bd7a3d733ce Mon Sep 17 00:00:00 2001 From: Zhihao Zhang <12044174+izhangzhihao@users.noreply.github.com> Date: Sun, 1 Oct 2023 19:08:51 +0800 Subject: [PATCH] Add support for Spark 3.5 (#16) --- .github/workflows/build.yml | 3 ++ .github/workflows/release.yml | 3 ++ .gitignore | 1 + README.md | 1 + core/build.gradle | 18 +++++++++-- spark/build.gradle | 32 +++++++++++++------ .../spark/extension/UDFExtension.scala | 2 +- .../spark/end2end/delta/DeltaLakeSpec.scala | 4 +-- .../spark/end2end/delta/FlyDeltaSpec.scala | 4 +-- 9 files changed, 51 insertions(+), 17 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 55a3c52..ce04872 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -43,6 +43,9 @@ jobs: - scalaVersion: 2.13 sparkVersion: 3.4.0 scalaCompt: 2.13.10 + - scalaVersion: 2.13 + sparkVersion: 3.5.0 + scalaCompt: 2.13.12 steps: - uses: actions/checkout@v2 diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index aecb2b6..803ba57 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -41,6 +41,9 @@ jobs: - scalaVersion: 2.13 sparkVersion: 3.4.0 scalaCompt: 2.13.10 + - scalaVersion: 2.13 + sparkVersion: 3.5.0 + scalaCompt: 2.13.12 steps: - uses: actions/checkout@v2 diff --git a/.gitignore b/.gitignore index be473aa..ae381a3 100644 --- a/.gitignore +++ b/.gitignore @@ -35,3 +35,4 @@ metastore_db *.jar ~$*.xlsx spark-warehouse +**/bin diff --git a/README.md b/README.md index 64313f5..2806e05 100644 --- a/README.md +++ b/README.md @@ -92,6 +92,7 @@ The compatible versions of [Spark](http://spark.apache.org/) are as follows: | 3.2.x | 2.12 / 2.13 | 3.3.x | 2.12 / 2.13 | 3.4.x | 2.12 / 2.13 +| 3.5.x | 2.13 diff --git a/core/build.gradle b/core/build.gradle index ff1905a..4c9ae29 100644 --- a/core/build.gradle +++ b/core/build.gradle @@ -45,9 +45,15 @@ dependencies { implementation 'org.apache.commons:commons-lang3:3.10' implementation 'info.picocli:picocli:4.6.3' - implementation "io.circe:circe-yaml_$scalaVersion:0.11.0-M1" - implementation "io.circe:circe-generic_$scalaVersion:0.12.0-M3" - implementation "io.circe:circe-generic-extras_$scalaVersion:0.12.0-M3" + if (sparkVersion.startsWith("3.5")) { + implementation "io.circe:circe-yaml_$scalaVersion:0.15.0-RC1" + implementation "io.circe:circe-generic_$scalaVersion:0.15.0-M1" + implementation "io.circe:circe-generic-extras_$scalaVersion:0.14.3" + } else { + implementation "io.circe:circe-yaml_$scalaVersion:0.11.0-M1" + implementation "io.circe:circe-generic_$scalaVersion:0.12.0-M3" + implementation "io.circe:circe-generic-extras_$scalaVersion:0.12.0-M3" + } compileOnly 'org.projectlombok:lombok:1.18.22' annotationProcessor 'org.projectlombok:lombok:1.18.22' // https://mvnrepository.com/artifact/com.jcraft/jsch @@ -83,6 +89,12 @@ dependencies { strictly '2.14.2' } } + } else if (sparkVersion.startsWith("3.5")) { + implementation("com.fasterxml.jackson.module:jackson-module-scala_$scalaVersion") { + version { + strictly '2.15.2' + } + } } else { implementation("com.fasterxml.jackson.module:jackson-module-scala_$scalaVersion") { version { diff --git a/spark/build.gradle b/spark/build.gradle index 7e84dbe..3fef2b9 100644 --- a/spark/build.gradle +++ b/spark/build.gradle @@ -78,7 +78,9 @@ dependencies { implementation "org.apache.spark:spark-streaming_$scalaVersion:$sparkVersion" implementation "org.apache.spark:spark-streaming-kafka-0-10_$scalaVersion:$sparkVersion" implementation "org.apache.kafka:kafka-clients:2.0.0" - if (sparkVersion.startsWith("3.4")) { + if (sparkVersion.startsWith("3.5")) { + implementation "io.delta:delta-core_$scalaVersion:2.4.0" + } else if (sparkVersion.startsWith("3.4")) { implementation "io.delta:delta-core_$scalaVersion:2.4.0" } else if (sparkVersion.startsWith("3.3")) { implementation "io.delta:delta-core_$scalaVersion:2.3.0" @@ -147,6 +149,12 @@ dependencies { strictly '2.14.2' } } + } else if (sparkVersion.startsWith("3.5")) { + implementation("com.fasterxml.jackson.core:jackson-databind") { + version { + strictly '2.15.2' + } + } } else { implementation("com.fasterxml.jackson.core:jackson-databind") { version { @@ -318,12 +326,16 @@ shadowJar { } else if (sparkVersion.startsWith("3")) { include dependency("org.apache.kudu:kudu-spark3_$scalaVersion:1.15.0") } - include dependency("io.circe:circe-yaml_$scalaVersion:0.11.0-M1") - include dependency("io.circe:circe-core_$scalaVersion:0.12.0-M3") - include dependency("io.circe:circe-generic_$scalaVersion:0.12.0-M3") - include dependency("io.circe:circe-generic-extras_$scalaVersion:0.12.0-M3") - include dependency("org.typelevel:cats-core_$scalaVersion:2.0.0-M4") - include dependency("org.typelevel:cats-kernel_$scalaVersion:2.0.0-M4") + if (sparkVersion.startsWith("3.5")) { + addDeps("io.circe:circe-yaml_$scalaVersion:0.15.0-RC1") + addDeps("io.circe:circe-generic_$scalaVersion:0.15.0-M1") + addDeps("io.circe:circe-generic-extras_$scalaVersion:0.14.3") + } else { + addDeps("io.circe:circe-yaml_$scalaVersion:0.11.0-M1") + addDeps("io.circe:circe-generic_$scalaVersion:0.12.0-M3") + addDeps("io.circe:circe-generic-extras_$scalaVersion:0.12.0-M3") + } + include dependency("org.postgresql:postgresql:42.2.14") include dependency("com.jcraft:jsch:0.1.55") include dependency("com.cloudera:ImpalaJDBC41:2.6.3") @@ -364,7 +376,9 @@ shadowJar { include dependency('info.picocli:picocli:4.6.3') include dependency("net.liftweb:lift-json_$scalaVersion:3.4.3") - if (sparkVersion.startsWith("3.4")) { + if (sparkVersion.startsWith("3.5")) { + addDeps("io.delta:delta-core_$scalaVersion:2.4.0") + } else if (sparkVersion.startsWith("3.4")) { addDeps("io.delta:delta-core_$scalaVersion:2.4.0") } else if (sparkVersion.startsWith("3.3")) { addDeps("io.delta:delta-core_$scalaVersion:2.3.0") @@ -603,7 +617,7 @@ if (scalaVersion.startsWith('2.11')) { } } } - } else if (sparkVersion.startsWith("3.2") || sparkVersion.startsWith("3.3") || sparkVersion.startsWith("3.4")) { + } else if (sparkVersion.startsWith("3.2") || sparkVersion.startsWith("3.3") || sparkVersion.startsWith("3.4")|| sparkVersion.startsWith("3.5")) { sourceSets { main { scala { diff --git a/spark/src/main/spark_3.2_scala_212/com/sharpdata/sharpetl/spark/extension/UDFExtension.scala b/spark/src/main/spark_3.2_scala_212/com/sharpdata/sharpetl/spark/extension/UDFExtension.scala index 10c2aae..9947be0 100644 --- a/spark/src/main/spark_3.2_scala_212/com/sharpdata/sharpetl/spark/extension/UDFExtension.scala +++ b/spark/src/main/spark_3.2_scala_212/com/sharpdata/sharpetl/spark/extension/UDFExtension.scala @@ -57,7 +57,7 @@ object UDFExtension { })) //TODO: 可能不工作 - val inputEncoders = Seq(Option(RowEncoder.apply(schema).resolveAndBind())) + //val inputEncoders = Seq(Option(RowEncoder.apply(schema).resolveAndBind())) val returnType = ScalaReflection.schemaFor(methodSymbol.returnType).dataType val fun = generateFunction( diff --git a/spark/src/test/scala/com/github/sharpdata/sharpetl/spark/end2end/delta/DeltaLakeSpec.scala b/spark/src/test/scala/com/github/sharpdata/sharpetl/spark/end2end/delta/DeltaLakeSpec.scala index 32ec537..58e6b12 100644 --- a/spark/src/test/scala/com/github/sharpdata/sharpetl/spark/end2end/delta/DeltaLakeSpec.scala +++ b/spark/src/test/scala/com/github/sharpdata/sharpetl/spark/end2end/delta/DeltaLakeSpec.scala @@ -38,8 +38,8 @@ class DeltaLakeSpec extends DeltaSuit { ) it("delta should works") { - if (spark.version.startsWith("2.3")) { - ETLLogger.error("Delta Lake does NOT support Spark 2.3.x") + if (spark.version.startsWith("2.3") || spark.version.startsWith("3.5")) { + ETLLogger.error("Delta Lake does NOT support Spark 2.3.x and Spark 3.5.x") } else if (spark.version.startsWith("2.4") || spark.version.startsWith("3.0") || spark.version.startsWith("3.1")) { ETLLogger.error("Delta Lake does not works well on Spark 2.4.x, " + "CREATE TABLE USING delta is not supported by Spark before 3.0.0 and Delta Lake before 0.7.0.") diff --git a/spark/src/test/scala/com/github/sharpdata/sharpetl/spark/end2end/delta/FlyDeltaSpec.scala b/spark/src/test/scala/com/github/sharpdata/sharpetl/spark/end2end/delta/FlyDeltaSpec.scala index fb3ef5d..fc34aa8 100644 --- a/spark/src/test/scala/com/github/sharpdata/sharpetl/spark/end2end/delta/FlyDeltaSpec.scala +++ b/spark/src/test/scala/com/github/sharpdata/sharpetl/spark/end2end/delta/FlyDeltaSpec.scala @@ -15,8 +15,8 @@ class FlyDeltaSpec extends AnyFunSpec with BeforeAndAfterEach { it("should just run with delta") { - if (spark.version.startsWith("2.3")) { - ETLLogger.error("Delta Lake does NOT support Spark 2.3.x") + if (spark.version.startsWith("2.3") || spark.version.startsWith("3.5")) { + ETLLogger.error("Delta Lake does NOT support Spark 2.3.x and Spark 3.5.x") } else if (spark.version.startsWith("2.4") || spark.version.startsWith("3.0") || spark.version.startsWith("3.1")) { ETLLogger.error("Delta Lake does not works well on Spark 2.4.x, " + "CREATE TABLE USING delta is not supported by Spark before 3.0.0 and Delta Lake before 0.7.0.")