From 408b78d0dfdd95263436e25a3312b482de714ecd Mon Sep 17 00:00:00 2001 From: Yann Byron Date: Tue, 26 Nov 2024 15:53:06 +0800 Subject: [PATCH] [spark] adjust paimon spark structure (#4573) --- .github/workflows/utitcase-spark-3.x.yml | 2 +- .github/workflows/utitcase-spark-4.x.yml | 2 +- .github/workflows/utitcase.yml | 2 +- paimon-spark/paimon-spark-3.2/pom.xml | 10 +- paimon-spark/paimon-spark-3.3/pom.xml | 10 +- .../spark/sql/InsertOverwriteTest.scala | 4 +- paimon-spark/paimon-spark-3.4/pom.xml | 10 +- paimon-spark/paimon-spark-3.5/pom.xml | 10 +- paimon-spark/paimon-spark-4.0/pom.xml | 10 +- paimon-spark/paimon-spark-common/pom.xml | 68 ------- ...Row.java => AbstractSparkInternalRow.java} | 103 ++-------- .../apache/paimon/spark/DataConverter.java | 117 ++++++++++++ .../apache/paimon/spark/SparkArrayData.java | 172 ----------------- .../paimon/spark/SparkGenericCatalog.java | 5 +- .../spark/PaimonPartitionManagement.scala | 3 +- .../paimon/spark/PaimonPartitionReader.scala | 1 + .../spark/PaimonPartitionReaderFactory.scala | 7 +- .../paimon/spark/PaimonStatistics.scala | 7 +- .../spark/aggregate/LocalAggregator.scala | 5 +- .../expressions/ExpressionHelper.scala | 11 +- .../MergePaimonScalarSubqueriesBase.scala | 4 +- .../spark/commands/BucketProcessor.scala | 4 +- .../spark/commands/MergeIntoPaimonTable.scala | 13 +- .../commands/UpdatePaimonTableCommand.scala | 7 +- .../paimon/spark/data/SparkArrayData.scala | 118 ++++++++++++ .../paimon/spark/data/SparkInternalRow.scala | 36 ++++ .../PaimonSparkSessionExtensions.scala | 6 +- ...tractPaimonSparkSqlExtensionsParser.scala} | 4 +- .../catalog/PaimonCatalogUtils.scala | 25 +-- .../spark/sql/paimon/ReflectUtils.scala | 43 +++++ .../spark/sql/paimon/shims/SparkShim.scala | 61 ++++++ .../sql/paimon/shims/SparkShimLoader.scala | 43 +++++ paimon-spark/paimon-spark-ut/pom.xml | 180 ++++++++++++++++++ .../spark/SparkCatalogWithHiveTest.java | 0 .../paimon/spark/SparkFileIndexITCase.java | 0 .../spark/SparkFilterConverterTest.java | 0 .../paimon/spark/SparkGenericCatalogTest.java | 0 .../SparkGenericCatalogWithHiveTest.java | 0 .../paimon/spark/SparkInternalRowTest.java | 6 +- .../apache/paimon/spark/SparkReadITCase.java | 0 .../paimon/spark/SparkReadTestBase.java | 0 .../apache/paimon/spark/SparkS3ITCase.java | 0 .../spark/SparkSchemaEvolutionITCase.java | 0 .../paimon/spark/SparkTimeTravelITCase.java | 0 .../SparkTimeTravelWithDataFrameITCase.java | 0 .../apache/paimon/spark/SparkTypeTest.java | 0 .../apache/paimon/spark/SparkWriteITCase.java | 0 .../spark/SparkWriteWithKyroITCase.java | 0 .../extensions/CallStatementParserTest.java | 0 .../org.junit.jupiter.api.extension.Extension | 0 .../src/test/resources/hive-site.xml | 0 .../src/test/resources/log4j2-test.properties | 0 .../paimon/spark/PaimonCDCSourceTest.scala | 0 .../paimon/spark/PaimonCommitTest.scala | 0 .../paimon/spark/PaimonHiveTestBase.scala | 2 +- .../apache/paimon/spark/PaimonSinkTest.scala | 0 .../paimon/spark/PaimonSourceTest.scala | 0 .../paimon/spark/PaimonSparkTestBase.scala | 39 +++- .../apache/paimon/spark/PaimonTableTest.scala | 0 .../apache/paimon/spark/ScanHelperTest.scala | 3 +- .../procedure/AlterBranchProcedureTest.scala | 0 .../spark/procedure/BranchProcedureTest.scala | 0 .../CompactManifestProcedureTest.scala | 0 .../procedure/CompactProcedureTestBase.scala | 0 .../CreateAndDeleteTagProcedureTest.scala | 0 .../CreateTagFromTimestampProcedureTest.scala | 0 .../ExpirePartitionsProcedureTest.scala | 0 .../ExpireSnapshotsProcedureTest.scala | 0 .../procedure/ExpireTagsProcedureTest.scala | 0 .../procedure/FastForwardProcedureTest.scala | 0 .../MarkPartitionDoneProcedureTest.scala | 0 .../MigrateDatabaseProcedureTest.scala | 0 .../procedure/MigrateFileProcedureTest.scala | 0 .../procedure/MigrateTableProcedureTest.scala | 0 .../spark/procedure/ProcedureTestBase.scala | 0 .../RemoveOrphanFilesProcedureTest.scala | 0 .../procedure/ReplaceTagProcedureTest.scala | 0 .../procedure/RollbackProcedureTest.scala | 0 .../spark/sql/AnalyzeTableTestBase.scala | 14 +- .../spark/sql/BucketedTableQueryTest.scala | 8 +- .../apache/paimon/spark/sql/DDLTestBase.scala | 14 +- .../sql/DDLWithHiveCatalogTestBase.scala | 2 +- .../paimon/spark/sql/DataFrameWriteTest.scala | 0 .../spark/sql/DeleteFromTableTestBase.scala | 0 .../paimon/spark/sql/DeletionVectorTest.scala | 2 +- .../paimon/spark/sql/DescribeTableTest.scala | 0 ...leUnnecessaryPaimonBucketedScanSuite.scala | 6 +- .../spark/sql/DynamicBucketTableTest.scala | 0 .../sql/InsertOverwriteTableTestBase.scala | 4 +- .../spark/sql/LookupCompactionTest.scala | 0 .../sql/MergeIntoNotMatchedBySourceTest.scala | 0 .../spark/sql/MergeIntoTableTestBase.scala | 0 .../paimon/spark/sql/ObjectTableTest.scala | 0 .../PaimonCompositePartitionKeyTestBase.scala | 0 .../paimon/spark/sql/PaimonFunctionTest.scala | 0 .../paimon/spark/sql/PaimonMetricTest.scala | 0 .../sql/PaimonOptimizationTestBase.scala | 35 ++-- .../paimon/spark/sql/PaimonOptionTest.scala | 26 +-- .../sql/PaimonPartitionManagementTest.scala | 0 .../paimon/spark/sql/PaimonPushDownTest.scala | 0 .../paimon/spark/sql/PaimonQueryTest.scala | 2 +- .../spark/sql/PaimonShowColumnsTestBase.scala | 0 .../spark/sql/PaimonSystemTableTest.scala | 0 .../spark/sql/PaimonTagDdlTestBase.scala | 0 .../paimon/spark/sql/PaimonViewTestBase.scala | 0 .../spark/sql/PushDownAggregatesTest.scala | 0 .../spark/sql/SparkVersionSupport.scala | 0 .../spark/sql/TableValuedFunctionsTest.scala | 0 .../spark/sql/UpdateTableTestBase.scala | 0 .../paimon/spark/sql/WithTableOptions.scala | 0 .../org/apache/spark/sql}/paimon/Utils.scala | 12 +- paimon-spark/paimon-spark3-common/pom.xml | 32 +++- ...rg.apache.spark.sql.paimon.shims.SparkShim | 16 ++ .../PaimonSpark3SqlExtensionsParser.scala | 25 +++ .../paimon/spark/data/Spark3ArrayData.scala} | 15 +- .../spark/data/Spark3InternalRow.scala} | 16 +- .../{shims.scala => shims/Spark3Shim.scala} | 54 +++--- paimon-spark/paimon-spark4-common/pom.xml | 51 ++++- ...rg.apache.spark.sql.paimon.shims.SparkShim | 16 ++ .../PaimonSpark4SqlExtensionsParser.scala | 28 +++ .../paimon/spark/data/Spark4ArrayData.scala} | 13 +- .../paimon/spark/data/Spark4InternalRow.scala | 28 +++ .../org/apache/spark/sql/paimon/shims.scala | 86 --------- .../spark/sql/paimon/shims/Spark4Shim.scala | 69 +++++++ paimon-spark/pom.xml | 38 ++-- 125 files changed, 1138 insertions(+), 627 deletions(-) rename paimon-spark/paimon-spark-common/src/main/java/org/apache/paimon/spark/{SparkInternalRow.java => AbstractSparkInternalRow.java} (67%) create mode 100644 paimon-spark/paimon-spark-common/src/main/java/org/apache/paimon/spark/DataConverter.java delete mode 100644 paimon-spark/paimon-spark-common/src/main/java/org/apache/paimon/spark/SparkArrayData.java create mode 100644 paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/data/SparkArrayData.scala create mode 100644 paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/data/SparkInternalRow.scala rename paimon-spark/paimon-spark-common/src/main/scala/org/apache/spark/sql/catalyst/parser/extensions/{PaimonSparkSqlExtensionsParser.scala => AbstractPaimonSparkSqlExtensionsParser.scala} (98%) create mode 100644 paimon-spark/paimon-spark-common/src/main/scala/org/apache/spark/sql/paimon/ReflectUtils.scala create mode 100644 paimon-spark/paimon-spark-common/src/main/scala/org/apache/spark/sql/paimon/shims/SparkShim.scala create mode 100644 paimon-spark/paimon-spark-common/src/main/scala/org/apache/spark/sql/paimon/shims/SparkShimLoader.scala create mode 100644 paimon-spark/paimon-spark-ut/pom.xml rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/java/org/apache/paimon/spark/SparkCatalogWithHiveTest.java (100%) rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/java/org/apache/paimon/spark/SparkFileIndexITCase.java (100%) rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/java/org/apache/paimon/spark/SparkFilterConverterTest.java (100%) rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/java/org/apache/paimon/spark/SparkGenericCatalogTest.java (100%) rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/java/org/apache/paimon/spark/SparkGenericCatalogWithHiveTest.java (100%) rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/java/org/apache/paimon/spark/SparkInternalRowTest.java (95%) rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/java/org/apache/paimon/spark/SparkReadITCase.java (100%) rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/java/org/apache/paimon/spark/SparkReadTestBase.java (100%) rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/java/org/apache/paimon/spark/SparkS3ITCase.java (100%) rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/java/org/apache/paimon/spark/SparkSchemaEvolutionITCase.java (100%) rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/java/org/apache/paimon/spark/SparkTimeTravelITCase.java (100%) rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/java/org/apache/paimon/spark/SparkTimeTravelWithDataFrameITCase.java (100%) rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/java/org/apache/paimon/spark/SparkTypeTest.java (100%) rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/java/org/apache/paimon/spark/SparkWriteITCase.java (100%) rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/java/org/apache/paimon/spark/SparkWriteWithKyroITCase.java (100%) rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/java/org/apache/paimon/spark/extensions/CallStatementParserTest.java (100%) rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/resources/META-INF/services/org.junit.jupiter.api.extension.Extension (100%) rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/resources/hive-site.xml (100%) rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/resources/log4j2-test.properties (100%) rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/scala/org/apache/paimon/spark/PaimonCDCSourceTest.scala (100%) rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/scala/org/apache/paimon/spark/PaimonCommitTest.scala (100%) rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/scala/org/apache/paimon/spark/PaimonHiveTestBase.scala (98%) rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/scala/org/apache/paimon/spark/PaimonSinkTest.scala (100%) rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/scala/org/apache/paimon/spark/PaimonSourceTest.scala (100%) rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/scala/org/apache/paimon/spark/PaimonSparkTestBase.scala (79%) rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/scala/org/apache/paimon/spark/PaimonTableTest.scala (100%) rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/scala/org/apache/paimon/spark/ScanHelperTest.scala (97%) rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/scala/org/apache/paimon/spark/procedure/AlterBranchProcedureTest.scala (100%) rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/scala/org/apache/paimon/spark/procedure/BranchProcedureTest.scala (100%) rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/scala/org/apache/paimon/spark/procedure/CompactManifestProcedureTest.scala (100%) rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/scala/org/apache/paimon/spark/procedure/CompactProcedureTestBase.scala (100%) rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/scala/org/apache/paimon/spark/procedure/CreateAndDeleteTagProcedureTest.scala (100%) rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/scala/org/apache/paimon/spark/procedure/CreateTagFromTimestampProcedureTest.scala (100%) rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/scala/org/apache/paimon/spark/procedure/ExpirePartitionsProcedureTest.scala (100%) rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/scala/org/apache/paimon/spark/procedure/ExpireSnapshotsProcedureTest.scala (100%) rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/scala/org/apache/paimon/spark/procedure/ExpireTagsProcedureTest.scala (100%) rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/scala/org/apache/paimon/spark/procedure/FastForwardProcedureTest.scala (100%) rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/scala/org/apache/paimon/spark/procedure/MarkPartitionDoneProcedureTest.scala (100%) rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/scala/org/apache/paimon/spark/procedure/MigrateDatabaseProcedureTest.scala (100%) rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/scala/org/apache/paimon/spark/procedure/MigrateFileProcedureTest.scala (100%) rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/scala/org/apache/paimon/spark/procedure/MigrateTableProcedureTest.scala (100%) rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/scala/org/apache/paimon/spark/procedure/ProcedureTestBase.scala (100%) rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/scala/org/apache/paimon/spark/procedure/RemoveOrphanFilesProcedureTest.scala (100%) rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/scala/org/apache/paimon/spark/procedure/ReplaceTagProcedureTest.scala (100%) rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/scala/org/apache/paimon/spark/procedure/RollbackProcedureTest.scala (100%) rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/scala/org/apache/paimon/spark/sql/AnalyzeTableTestBase.scala (97%) rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/scala/org/apache/paimon/spark/sql/BucketedTableQueryTest.scala (97%) rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/scala/org/apache/paimon/spark/sql/DDLTestBase.scala (97%) rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/scala/org/apache/paimon/spark/sql/DDLWithHiveCatalogTestBase.scala (99%) rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/scala/org/apache/paimon/spark/sql/DataFrameWriteTest.scala (100%) rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/scala/org/apache/paimon/spark/sql/DeleteFromTableTestBase.scala (100%) rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/scala/org/apache/paimon/spark/sql/DeletionVectorTest.scala (99%) rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/scala/org/apache/paimon/spark/sql/DescribeTableTest.scala (100%) rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/scala/org/apache/paimon/spark/sql/DisableUnnecessaryPaimonBucketedScanSuite.scala (97%) rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/scala/org/apache/paimon/spark/sql/DynamicBucketTableTest.scala (100%) rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/scala/org/apache/paimon/spark/sql/InsertOverwriteTableTestBase.scala (99%) rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/scala/org/apache/paimon/spark/sql/LookupCompactionTest.scala (100%) rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/scala/org/apache/paimon/spark/sql/MergeIntoNotMatchedBySourceTest.scala (100%) rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/scala/org/apache/paimon/spark/sql/MergeIntoTableTestBase.scala (100%) rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/scala/org/apache/paimon/spark/sql/ObjectTableTest.scala (100%) rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/scala/org/apache/paimon/spark/sql/PaimonCompositePartitionKeyTestBase.scala (100%) rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/scala/org/apache/paimon/spark/sql/PaimonFunctionTest.scala (100%) rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/scala/org/apache/paimon/spark/sql/PaimonMetricTest.scala (100%) rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/scala/org/apache/paimon/spark/sql/PaimonOptimizationTestBase.scala (90%) rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/scala/org/apache/paimon/spark/sql/PaimonOptionTest.scala (91%) rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/scala/org/apache/paimon/spark/sql/PaimonPartitionManagementTest.scala (100%) rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/scala/org/apache/paimon/spark/sql/PaimonPushDownTest.scala (100%) rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/scala/org/apache/paimon/spark/sql/PaimonQueryTest.scala (99%) rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/scala/org/apache/paimon/spark/sql/PaimonShowColumnsTestBase.scala (100%) rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/scala/org/apache/paimon/spark/sql/PaimonSystemTableTest.scala (100%) rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/scala/org/apache/paimon/spark/sql/PaimonTagDdlTestBase.scala (100%) rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/scala/org/apache/paimon/spark/sql/PaimonViewTestBase.scala (100%) rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/scala/org/apache/paimon/spark/sql/PushDownAggregatesTest.scala (100%) rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/scala/org/apache/paimon/spark/sql/SparkVersionSupport.scala (100%) rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/scala/org/apache/paimon/spark/sql/TableValuedFunctionsTest.scala (100%) rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/scala/org/apache/paimon/spark/sql/UpdateTableTestBase.scala (100%) rename paimon-spark/{paimon-spark-common => paimon-spark-ut}/src/test/scala/org/apache/paimon/spark/sql/WithTableOptions.scala (100%) rename paimon-spark/{paimon-spark-common/src/test/scala/org/apache/spark => paimon-spark-ut/src/test/scala/org/apache/spark/sql}/paimon/Utils.scala (74%) create mode 100644 paimon-spark/paimon-spark3-common/src/main/resources/META-INF/services/org.apache.spark.sql.paimon.shims.SparkShim create mode 100644 paimon-spark/paimon-spark3-common/src/main/scala/org/apache/paimon/spark/catalyst/parser/extensions/PaimonSpark3SqlExtensionsParser.scala rename paimon-spark/{paimon-spark-3.2/src/test/scala/org/apache/spark/paimon/Utils.scala => paimon-spark3-common/src/main/scala/org/apache/paimon/spark/data/Spark3ArrayData.scala} (73%) rename paimon-spark/{paimon-spark-3.3/src/test/scala/org/apache/spark/paimon/Utils.scala => paimon-spark3-common/src/main/scala/org/apache/paimon/spark/data/Spark3InternalRow.scala} (73%) rename paimon-spark/paimon-spark3-common/src/main/scala/org/apache/spark/sql/paimon/{shims.scala => shims/Spark3Shim.scala} (51%) create mode 100644 paimon-spark/paimon-spark4-common/src/main/resources/META-INF/services/org.apache.spark.sql.paimon.shims.SparkShim create mode 100644 paimon-spark/paimon-spark4-common/src/main/scala/org/apache/paimon/spark/catalyst/parser/extensions/PaimonSpark4SqlExtensionsParser.scala rename paimon-spark/{paimon-spark-3.4/src/test/scala/org/apache/spark/paimon/Utils.scala => paimon-spark4-common/src/main/scala/org/apache/paimon/spark/data/Spark4ArrayData.scala} (72%) create mode 100644 paimon-spark/paimon-spark4-common/src/main/scala/org/apache/paimon/spark/data/Spark4InternalRow.scala delete mode 100644 paimon-spark/paimon-spark4-common/src/main/scala/org/apache/spark/sql/paimon/shims.scala create mode 100644 paimon-spark/paimon-spark4-common/src/main/scala/org/apache/spark/sql/paimon/shims/Spark4Shim.scala diff --git a/.github/workflows/utitcase-spark-3.x.yml b/.github/workflows/utitcase-spark-3.x.yml index 5edcfe49007a..2d3df5f4d005 100644 --- a/.github/workflows/utitcase-spark-3.x.yml +++ b/.github/workflows/utitcase-spark-3.x.yml @@ -54,7 +54,7 @@ jobs: jvm_timezone=$(random_timezone) echo "JVM timezone is set to $jvm_timezone" test_modules="" - for suffix in common_2.12 3.5 3.4 3.3 3.2; do + for suffix in ut 3.5 3.4 3.3 3.2; do test_modules+="org.apache.paimon:paimon-spark-${suffix}," done test_modules="${test_modules%,}" diff --git a/.github/workflows/utitcase-spark-4.x.yml b/.github/workflows/utitcase-spark-4.x.yml index 7fbac23dda4f..c58fd7c03be2 100644 --- a/.github/workflows/utitcase-spark-4.x.yml +++ b/.github/workflows/utitcase-spark-4.x.yml @@ -54,7 +54,7 @@ jobs: jvm_timezone=$(random_timezone) echo "JVM timezone is set to $jvm_timezone" test_modules="" - for suffix in common_2.13 4.0; do + for suffix in ut 4.0; do test_modules+="org.apache.paimon:paimon-spark-${suffix}," done test_modules="${test_modules%,}" diff --git a/.github/workflows/utitcase.yml b/.github/workflows/utitcase.yml index bde67cb4c203..8aa33f5b8218 100644 --- a/.github/workflows/utitcase.yml +++ b/.github/workflows/utitcase.yml @@ -54,7 +54,7 @@ jobs: jvm_timezone=$(random_timezone) echo "JVM timezone is set to $jvm_timezone" test_modules="!paimon-e2e-tests," - for suffix in 3.5 3.4 3.3 3.2 common_2.12; do + for suffix in 3.5 3.4 3.3 3.2 ut; do test_modules+="!org.apache.paimon:paimon-spark-${suffix}," done test_modules="${test_modules%,}" diff --git a/paimon-spark/paimon-spark-3.2/pom.xml b/paimon-spark/paimon-spark-3.2/pom.xml index 626bb5bae833..957319b47dab 100644 --- a/paimon-spark/paimon-spark-3.2/pom.xml +++ b/paimon-spark/paimon-spark-3.2/pom.xml @@ -36,6 +36,12 @@ under the License. + + org.apache.paimon + paimon-spark3-common + ${project.version} + + org.apache.paimon paimon-spark-common_${scala.binary.version} @@ -63,7 +69,7 @@ under the License. org.apache.paimon - paimon-spark-common_${scala.binary.version} + paimon-spark-ut ${project.version} tests test @@ -126,7 +132,7 @@ under the License. - org.apache.paimon:paimon-spark-common_${scala.binary.version} + org.apache.paimon:paimon-spark3-common diff --git a/paimon-spark/paimon-spark-3.3/pom.xml b/paimon-spark/paimon-spark-3.3/pom.xml index 689e4131ccd9..0a390d926789 100644 --- a/paimon-spark/paimon-spark-3.3/pom.xml +++ b/paimon-spark/paimon-spark-3.3/pom.xml @@ -36,6 +36,12 @@ under the License. + + org.apache.paimon + paimon-spark3-common + ${project.version} + + org.apache.paimon paimon-spark-common_${scala.binary.version} @@ -63,7 +69,7 @@ under the License. org.apache.paimon - paimon-spark-common_${scala.binary.version} + paimon-spark-ut ${project.version} tests test @@ -126,7 +132,7 @@ under the License. - org.apache.paimon:paimon-spark-common_${scala.binary.version} + org.apache.paimon:paimon-spark3-common diff --git a/paimon-spark/paimon-spark-3.3/src/test/scala/org/apache/paimon/spark/sql/InsertOverwriteTest.scala b/paimon-spark/paimon-spark-3.3/src/test/scala/org/apache/paimon/spark/sql/InsertOverwriteTest.scala index 304b814b33d3..219d57c865c8 100644 --- a/paimon-spark/paimon-spark-3.3/src/test/scala/org/apache/paimon/spark/sql/InsertOverwriteTest.scala +++ b/paimon-spark/paimon-spark-3.3/src/test/scala/org/apache/paimon/spark/sql/InsertOverwriteTest.scala @@ -248,7 +248,7 @@ class InsertOverwriteTest extends PaimonSparkTestBase { spark.sql("SELECT * FROM T ORDER BY a, b"), Row(1, 3, "3") :: Row(2, 4, "4") :: Nil) - withSQLConf("spark.sql.sources.partitionOverwriteMode" -> "dynamic") { + withSparkSQLConf("spark.sql.sources.partitionOverwriteMode" -> "dynamic") { // dynamic overwrite the a=1 partition spark.sql("INSERT OVERWRITE T VALUES (1, 5, '5'), (1, 7, '7')") checkAnswer( @@ -289,7 +289,7 @@ class InsertOverwriteTest extends PaimonSparkTestBase { "ptv2", 22) :: Nil) - withSQLConf("spark.sql.sources.partitionOverwriteMode" -> "dynamic") { + withSparkSQLConf("spark.sql.sources.partitionOverwriteMode" -> "dynamic") { // dynamic overwrite the pt2=22 partition spark.sql( "INSERT OVERWRITE T PARTITION (pt2 = 22) VALUES (3, 'c2', 'ptv1'), (4, 'd2', 'ptv3')") diff --git a/paimon-spark/paimon-spark-3.4/pom.xml b/paimon-spark/paimon-spark-3.4/pom.xml index d1ded508a927..0f4cb30e4f7f 100644 --- a/paimon-spark/paimon-spark-3.4/pom.xml +++ b/paimon-spark/paimon-spark-3.4/pom.xml @@ -36,6 +36,12 @@ under the License. + + org.apache.paimon + paimon-spark3-common + ${project.version} + + org.apache.paimon paimon-spark-common_${scala.binary.version} @@ -63,7 +69,7 @@ under the License. org.apache.paimon - paimon-spark-common_${scala.binary.version} + paimon-spark-ut ${project.version} tests test @@ -126,7 +132,7 @@ under the License. - org.apache.paimon:paimon-spark-common_${scala.binary.version} + org.apache.paimon:paimon-spark3-common diff --git a/paimon-spark/paimon-spark-3.5/pom.xml b/paimon-spark/paimon-spark-3.5/pom.xml index 92803cda540e..1b9c96888908 100644 --- a/paimon-spark/paimon-spark-3.5/pom.xml +++ b/paimon-spark/paimon-spark-3.5/pom.xml @@ -36,6 +36,12 @@ under the License. + + org.apache.paimon + paimon-spark3-common + ${project.version} + + org.apache.paimon paimon-spark-common_${scala.binary.version} @@ -63,7 +69,7 @@ under the License. org.apache.paimon - paimon-spark-common_${scala.binary.version} + paimon-spark-ut ${project.version} tests test @@ -126,7 +132,7 @@ under the License. - org.apache.paimon:paimon-spark-common_${scala.binary.version} + org.apache.paimon:paimon-spark3-common diff --git a/paimon-spark/paimon-spark-4.0/pom.xml b/paimon-spark/paimon-spark-4.0/pom.xml index 9f819f820ce2..8e7d166dc55b 100644 --- a/paimon-spark/paimon-spark-4.0/pom.xml +++ b/paimon-spark/paimon-spark-4.0/pom.xml @@ -36,6 +36,12 @@ under the License. + + org.apache.paimon + paimon-spark4-common + ${project.version} + + org.apache.paimon paimon-spark-common_${scala.binary.version} @@ -63,7 +69,7 @@ under the License. org.apache.paimon - paimon-spark-common_${scala.binary.version} + paimon-spark-ut ${project.version} tests test @@ -126,7 +132,7 @@ under the License. - org.apache.paimon:paimon-spark-common_${scala.binary.version} + org.apache.paimon:paimon-spark4-common diff --git a/paimon-spark/paimon-spark-common/pom.xml b/paimon-spark/paimon-spark-common/pom.xml index 1cfc53f42d48..052c4c4265fc 100644 --- a/paimon-spark/paimon-spark-common/pom.xml +++ b/paimon-spark/paimon-spark-common/pom.xml @@ -38,18 +38,6 @@ under the License. - - org.apache.paimon - ${paimon-sparkx-common} - ${project.version} - - - * - * - - - - org.apache.spark spark-sql_${scala.binary.version} @@ -72,46 +60,6 @@ under the License. org.apache.paimon paimon-bundle - - - - - org.apache.spark - spark-sql_${scala.binary.version} - ${spark.version} - tests - test - - - - org.apache.spark - spark-catalyst_${scala.binary.version} - ${spark.version} - tests - test - - - - org.apache.spark - spark-core_${scala.binary.version} - ${spark.version} - tests - test - - - - org.apache.spark - spark-hive_${scala.binary.version} - ${spark.version} - test - - - - org.apache.spark - spark-avro_${scala.binary.version} - ${spark.version} - test - @@ -130,7 +78,6 @@ under the License. org.apache.paimon:paimon-bundle - org.apache.paimon:${paimon-sparkx-common} @@ -155,21 +102,6 @@ under the License. src/main/antlr4 - - - - org.apache.maven.plugins - maven-jar-plugin - - - prepare-test-jar - test-compile - - test-jar - - - - diff --git a/paimon-spark/paimon-spark-common/src/main/java/org/apache/paimon/spark/SparkInternalRow.java b/paimon-spark/paimon-spark-common/src/main/java/org/apache/paimon/spark/AbstractSparkInternalRow.java similarity index 67% rename from paimon-spark/paimon-spark-common/src/main/java/org/apache/paimon/spark/SparkInternalRow.java rename to paimon-spark/paimon-spark-common/src/main/java/org/apache/paimon/spark/AbstractSparkInternalRow.java index 147c6c2d77c8..28604a6d6293 100644 --- a/paimon-spark/paimon-spark-common/src/main/java/org/apache/paimon/spark/SparkInternalRow.java +++ b/paimon-spark/paimon-spark-common/src/main/java/org/apache/paimon/spark/AbstractSparkInternalRow.java @@ -18,24 +18,15 @@ package org.apache.paimon.spark; -import org.apache.paimon.data.BinaryString; -import org.apache.paimon.data.InternalArray; -import org.apache.paimon.data.InternalMap; import org.apache.paimon.data.InternalRow; -import org.apache.paimon.data.Timestamp; -import org.apache.paimon.spark.util.shim.TypeUtils; +import org.apache.paimon.spark.data.SparkInternalRow; import org.apache.paimon.types.ArrayType; import org.apache.paimon.types.BigIntType; import org.apache.paimon.types.DataType; import org.apache.paimon.types.DataTypeChecks; -import org.apache.paimon.types.IntType; -import org.apache.paimon.types.MapType; -import org.apache.paimon.types.MultisetType; import org.apache.paimon.types.RowType; -import org.apache.spark.sql.catalyst.util.ArrayBasedMapData; import org.apache.spark.sql.catalyst.util.ArrayData; -import org.apache.spark.sql.catalyst.util.DateTimeUtils; import org.apache.spark.sql.catalyst.util.MapData; import org.apache.spark.sql.types.BinaryType; import org.apache.spark.sql.types.BooleanType; @@ -61,19 +52,23 @@ import java.util.Objects; +import static org.apache.paimon.spark.DataConverter.fromPaimon; import static org.apache.paimon.utils.InternalRowUtils.copyInternalRow; -/** Spark {@link org.apache.spark.sql.catalyst.InternalRow} to wrap {@link InternalRow}. */ -public class SparkInternalRow extends org.apache.spark.sql.paimon.shims.InternalRow { +/** + * An abstract {@link SparkInternalRow} that overwrite all the common methods in spark3 and spark4. + */ +public abstract class AbstractSparkInternalRow extends SparkInternalRow { - private final RowType rowType; + protected RowType rowType; - private InternalRow row; + protected InternalRow row; - public SparkInternalRow(RowType rowType) { + public AbstractSparkInternalRow(RowType rowType) { this.rowType = rowType; } + @Override public SparkInternalRow replace(InternalRow row) { this.row = row; return this; @@ -96,7 +91,7 @@ public void update(int i, Object value) { @Override public org.apache.spark.sql.catalyst.InternalRow copy() { - return new SparkInternalRow(rowType).replace(copyInternalRow(row, rowType)); + return SparkInternalRow.create(rowType).replace(copyInternalRow(row, rowType)); } @Override @@ -255,7 +250,7 @@ public boolean equals(Object o) { if (o == null || getClass() != o.getClass()) { return false; } - SparkInternalRow that = (SparkInternalRow) o; + AbstractSparkInternalRow that = (AbstractSparkInternalRow) o; return Objects.equals(rowType, that.rowType) && Objects.equals(row, that.row); } @@ -263,78 +258,4 @@ public boolean equals(Object o) { public int hashCode() { return Objects.hash(rowType, row); } - - // ================== static methods ========================================= - - public static Object fromPaimon(Object o, DataType type) { - if (o == null) { - return null; - } - switch (type.getTypeRoot()) { - case TIMESTAMP_WITHOUT_TIME_ZONE: - case TIMESTAMP_WITH_LOCAL_TIME_ZONE: - return fromPaimon((Timestamp) o); - case CHAR: - case VARCHAR: - return fromPaimon((BinaryString) o); - case DECIMAL: - return fromPaimon((org.apache.paimon.data.Decimal) o); - case ARRAY: - return fromPaimon((InternalArray) o, (ArrayType) type); - case MAP: - case MULTISET: - return fromPaimon((InternalMap) o, type); - case ROW: - return fromPaimon((InternalRow) o, (RowType) type); - default: - return o; - } - } - - public static UTF8String fromPaimon(BinaryString string) { - return UTF8String.fromBytes(string.toBytes()); - } - - public static Decimal fromPaimon(org.apache.paimon.data.Decimal decimal) { - return Decimal.apply(decimal.toBigDecimal()); - } - - public static org.apache.spark.sql.catalyst.InternalRow fromPaimon( - InternalRow row, RowType rowType) { - return new SparkInternalRow(rowType).replace(row); - } - - public static long fromPaimon(Timestamp timestamp) { - if (TypeUtils.treatPaimonTimestampTypeAsSparkTimestampType()) { - return DateTimeUtils.fromJavaTimestamp(timestamp.toSQLTimestamp()); - } else { - return timestamp.toMicros(); - } - } - - public static ArrayData fromPaimon(InternalArray array, ArrayType arrayType) { - return fromPaimonArrayElementType(array, arrayType.getElementType()); - } - - private static ArrayData fromPaimonArrayElementType(InternalArray array, DataType elementType) { - return new SparkArrayData(elementType).replace(array); - } - - public static MapData fromPaimon(InternalMap map, DataType mapType) { - DataType keyType; - DataType valueType; - if (mapType instanceof MapType) { - keyType = ((MapType) mapType).getKeyType(); - valueType = ((MapType) mapType).getValueType(); - } else if (mapType instanceof MultisetType) { - keyType = ((MultisetType) mapType).getElementType(); - valueType = new IntType(); - } else { - throw new UnsupportedOperationException("Unsupported type: " + mapType); - } - - return new ArrayBasedMapData( - fromPaimonArrayElementType(map.keyArray(), keyType), - fromPaimonArrayElementType(map.valueArray(), valueType)); - } } diff --git a/paimon-spark/paimon-spark-common/src/main/java/org/apache/paimon/spark/DataConverter.java b/paimon-spark/paimon-spark-common/src/main/java/org/apache/paimon/spark/DataConverter.java new file mode 100644 index 000000000000..0b5ea899476e --- /dev/null +++ b/paimon-spark/paimon-spark-common/src/main/java/org/apache/paimon/spark/DataConverter.java @@ -0,0 +1,117 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.paimon.spark; + +import org.apache.paimon.data.BinaryString; +import org.apache.paimon.data.InternalArray; +import org.apache.paimon.data.InternalMap; +import org.apache.paimon.data.InternalRow; +import org.apache.paimon.data.Timestamp; +import org.apache.paimon.spark.data.SparkArrayData; +import org.apache.paimon.spark.data.SparkInternalRow; +import org.apache.paimon.spark.util.shim.TypeUtils; +import org.apache.paimon.types.ArrayType; +import org.apache.paimon.types.DataType; +import org.apache.paimon.types.IntType; +import org.apache.paimon.types.MapType; +import org.apache.paimon.types.MultisetType; +import org.apache.paimon.types.RowType; + +import org.apache.spark.sql.catalyst.util.ArrayBasedMapData; +import org.apache.spark.sql.catalyst.util.ArrayData; +import org.apache.spark.sql.catalyst.util.DateTimeUtils; +import org.apache.spark.sql.catalyst.util.MapData; +import org.apache.spark.sql.types.Decimal; +import org.apache.spark.unsafe.types.UTF8String; + +/** A data converter that convert Paimon data to Spark Data. */ +public class DataConverter { + + public static Object fromPaimon(Object o, DataType type) { + if (o == null) { + return null; + } + switch (type.getTypeRoot()) { + case TIMESTAMP_WITHOUT_TIME_ZONE: + case TIMESTAMP_WITH_LOCAL_TIME_ZONE: + return fromPaimon((Timestamp) o); + case CHAR: + case VARCHAR: + return fromPaimon((BinaryString) o); + case DECIMAL: + return fromPaimon((org.apache.paimon.data.Decimal) o); + case ARRAY: + return fromPaimon((InternalArray) o, (ArrayType) type); + case MAP: + case MULTISET: + return fromPaimon((InternalMap) o, type); + case ROW: + return fromPaimon((InternalRow) o, (RowType) type); + default: + return o; + } + } + + public static UTF8String fromPaimon(BinaryString string) { + return UTF8String.fromBytes(string.toBytes()); + } + + public static Decimal fromPaimon(org.apache.paimon.data.Decimal decimal) { + return Decimal.apply(decimal.toBigDecimal()); + } + + public static org.apache.spark.sql.catalyst.InternalRow fromPaimon( + InternalRow row, RowType rowType) { + return SparkInternalRow.create(rowType).replace(row); + } + + public static long fromPaimon(Timestamp timestamp) { + if (TypeUtils.treatPaimonTimestampTypeAsSparkTimestampType()) { + return DateTimeUtils.fromJavaTimestamp(timestamp.toSQLTimestamp()); + } else { + return timestamp.toMicros(); + } + } + + public static ArrayData fromPaimon(InternalArray array, ArrayType arrayType) { + return fromPaimonArrayElementType(array, arrayType.getElementType()); + } + + private static ArrayData fromPaimonArrayElementType(InternalArray array, DataType elementType) { + return SparkArrayData.create(elementType).replace(array); + } + + public static MapData fromPaimon(InternalMap map, DataType mapType) { + DataType keyType; + DataType valueType; + if (mapType instanceof MapType) { + keyType = ((MapType) mapType).getKeyType(); + valueType = ((MapType) mapType).getValueType(); + } else if (mapType instanceof MultisetType) { + keyType = ((MultisetType) mapType).getElementType(); + valueType = new IntType(); + } else { + throw new UnsupportedOperationException("Unsupported type: " + mapType); + } + + return new ArrayBasedMapData( + fromPaimonArrayElementType(map.keyArray(), keyType), + fromPaimonArrayElementType(map.valueArray(), valueType)); + } +} diff --git a/paimon-spark/paimon-spark-common/src/main/java/org/apache/paimon/spark/SparkArrayData.java b/paimon-spark/paimon-spark-common/src/main/java/org/apache/paimon/spark/SparkArrayData.java deleted file mode 100644 index 9934047a1825..000000000000 --- a/paimon-spark/paimon-spark-common/src/main/java/org/apache/paimon/spark/SparkArrayData.java +++ /dev/null @@ -1,172 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.paimon.spark; - -import org.apache.paimon.data.InternalArray; -import org.apache.paimon.types.ArrayType; -import org.apache.paimon.types.BigIntType; -import org.apache.paimon.types.DataType; -import org.apache.paimon.types.DataTypeChecks; -import org.apache.paimon.types.RowType; -import org.apache.paimon.utils.InternalRowUtils; - -import org.apache.spark.sql.catalyst.InternalRow; -import org.apache.spark.sql.catalyst.expressions.SpecializedGettersReader; -import org.apache.spark.sql.catalyst.util.ArrayData; -import org.apache.spark.sql.catalyst.util.MapData; -import org.apache.spark.sql.types.Decimal; -import org.apache.spark.unsafe.types.CalendarInterval; -import org.apache.spark.unsafe.types.UTF8String; - -import static org.apache.paimon.spark.SparkInternalRow.fromPaimon; -import static org.apache.paimon.utils.InternalRowUtils.copyArray; - -/** Spark {@link ArrayData} to wrap Paimon {@link InternalArray}. */ -public class SparkArrayData extends org.apache.spark.sql.paimon.shims.ArrayData { - - private final DataType elementType; - - private InternalArray array; - - public SparkArrayData(DataType elementType) { - this.elementType = elementType; - } - - public SparkArrayData replace(InternalArray array) { - this.array = array; - return this; - } - - @Override - public int numElements() { - return array.size(); - } - - @Override - public ArrayData copy() { - return new SparkArrayData(elementType).replace(copyArray(array, elementType)); - } - - @Override - public Object[] array() { - Object[] objects = new Object[numElements()]; - for (int i = 0; i < objects.length; i++) { - objects[i] = fromPaimon(InternalRowUtils.get(array, i, elementType), elementType); - } - return objects; - } - - @Override - public void setNullAt(int i) { - throw new UnsupportedOperationException(); - } - - @Override - public void update(int i, Object value) { - throw new UnsupportedOperationException(); - } - - @Override - public boolean isNullAt(int ordinal) { - return array.isNullAt(ordinal); - } - - @Override - public boolean getBoolean(int ordinal) { - return array.getBoolean(ordinal); - } - - @Override - public byte getByte(int ordinal) { - return array.getByte(ordinal); - } - - @Override - public short getShort(int ordinal) { - return array.getShort(ordinal); - } - - @Override - public int getInt(int ordinal) { - return array.getInt(ordinal); - } - - @Override - public long getLong(int ordinal) { - if (elementType instanceof BigIntType) { - return array.getLong(ordinal); - } - - return getTimestampMicros(ordinal); - } - - private long getTimestampMicros(int ordinal) { - return fromPaimon(array.getTimestamp(ordinal, DataTypeChecks.getPrecision(elementType))); - } - - @Override - public float getFloat(int ordinal) { - return array.getFloat(ordinal); - } - - @Override - public double getDouble(int ordinal) { - return array.getDouble(ordinal); - } - - @Override - public Decimal getDecimal(int ordinal, int precision, int scale) { - return fromPaimon(array.getDecimal(ordinal, precision, scale)); - } - - @Override - public UTF8String getUTF8String(int ordinal) { - return fromPaimon(array.getString(ordinal)); - } - - @Override - public byte[] getBinary(int ordinal) { - return array.getBinary(ordinal); - } - - @Override - public CalendarInterval getInterval(int ordinal) { - throw new UnsupportedOperationException(); - } - - @Override - public InternalRow getStruct(int ordinal, int numFields) { - return fromPaimon(array.getRow(ordinal, numFields), (RowType) elementType); - } - - @Override - public ArrayData getArray(int ordinal) { - return fromPaimon(array.getArray(ordinal), (ArrayType) elementType); - } - - @Override - public MapData getMap(int ordinal) { - return fromPaimon(array.getMap(ordinal), elementType); - } - - @Override - public Object get(int ordinal, org.apache.spark.sql.types.DataType dataType) { - return SpecializedGettersReader.read(this, ordinal, dataType, true, true); - } -} diff --git a/paimon-spark/paimon-spark-common/src/main/java/org/apache/paimon/spark/SparkGenericCatalog.java b/paimon-spark/paimon-spark-common/src/main/java/org/apache/paimon/spark/SparkGenericCatalog.java index d4b712fcb8ee..9957f0cdf91f 100644 --- a/paimon-spark/paimon-spark-common/src/main/java/org/apache/paimon/spark/SparkGenericCatalog.java +++ b/paimon-spark/paimon-spark-common/src/main/java/org/apache/paimon/spark/SparkGenericCatalog.java @@ -52,7 +52,7 @@ import org.apache.spark.sql.internal.SQLConf; import org.apache.spark.sql.internal.SessionState; import org.apache.spark.sql.internal.StaticSQLConf; -import org.apache.spark.sql.paimon.shims; +import org.apache.spark.sql.paimon.shims.SparkShimLoader; import org.apache.spark.sql.types.StructType; import org.apache.spark.sql.util.CaseInsensitiveStringMap; import org.slf4j.Logger; @@ -203,7 +203,8 @@ public Table createTable( return sparkCatalog.createTable(ident, schema, partitions, properties); } else { // delegate to the session catalog - return shims.createTable(asTableCatalog(), ident, schema, partitions, properties); + return SparkShimLoader.getSparkShim() + .createTable(asTableCatalog(), ident, schema, partitions, properties); } } diff --git a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/PaimonPartitionManagement.scala b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/PaimonPartitionManagement.scala index 54970bfe3cb2..9a305ca59a0f 100644 --- a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/PaimonPartitionManagement.scala +++ b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/PaimonPartitionManagement.scala @@ -21,6 +21,7 @@ package org.apache.paimon.spark import org.apache.paimon.CoreOptions import org.apache.paimon.metastore.MetastoreClient import org.apache.paimon.operation.FileStoreCommit +import org.apache.paimon.spark.data.SparkInternalRow import org.apache.paimon.table.FileStoreTable import org.apache.paimon.table.sink.BatchWriteBuilder import org.apache.paimon.types.RowType @@ -116,7 +117,7 @@ trait PaimonPartitionManagement extends SupportsAtomicPartitionManagement { s"the partition schema '${partitionSchema.sql}'." ) table.newReadBuilder.newScan.listPartitions.asScala - .map(binaryRow => SparkInternalRow.fromPaimon(binaryRow, partitionRowType)) + .map(binaryRow => DataConverter.fromPaimon(binaryRow, partitionRowType)) .filter( sparkInternalRow => { partitionCols.zipWithIndex diff --git a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/PaimonPartitionReader.scala b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/PaimonPartitionReader.scala index fa9072df3149..526178e28ec3 100644 --- a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/PaimonPartitionReader.scala +++ b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/PaimonPartitionReader.scala @@ -20,6 +20,7 @@ package org.apache.paimon.spark import org.apache.paimon.data.{InternalRow => PaimonInternalRow} import org.apache.paimon.reader.RecordReader +import org.apache.paimon.spark.data.SparkInternalRow import org.apache.paimon.spark.schema.PaimonMetadataColumn import org.apache.paimon.table.source.{DataSplit, Split} diff --git a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/PaimonPartitionReaderFactory.scala b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/PaimonPartitionReaderFactory.scala index 94de0bec3b50..59b07a794481 100644 --- a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/PaimonPartitionReaderFactory.scala +++ b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/PaimonPartitionReaderFactory.scala @@ -18,10 +18,11 @@ package org.apache.paimon.spark -import org.apache.paimon.data +import org.apache.paimon.data.{InternalRow => PaimonInternalRow} import org.apache.paimon.disk.IOManager import org.apache.paimon.reader.RecordReader import org.apache.paimon.spark.SparkUtils.createIOManager +import org.apache.paimon.spark.data.SparkInternalRow import org.apache.paimon.spark.schema.PaimonMetadataColumn import org.apache.paimon.table.source.{ReadBuilder, Split} import org.apache.paimon.types.RowType @@ -45,13 +46,13 @@ case class PaimonPartitionReaderFactory( val dataFields = new JList(readBuilder.readType().getFields) dataFields.addAll(metadataColumns.map(_.toPaimonDataField).asJava) val rowType = new RowType(dataFields) - new SparkInternalRow(rowType) + SparkInternalRow.create(rowType) } override def createReader(partition: InputPartition): PartitionReader[InternalRow] = { partition match { case paimonInputPartition: PaimonInputPartition => - val readFunc: Split => RecordReader[data.InternalRow] = + val readFunc: Split => RecordReader[PaimonInternalRow] = (split: Split) => readBuilder.newRead().withIOManager(ioManager).createReader(split) PaimonPartitionReader(readFunc, paimonInputPartition, row, metadataColumns) case _ => diff --git a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/PaimonStatistics.scala b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/PaimonStatistics.scala index 28af4ac0a4fd..8dd464933032 100644 --- a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/PaimonStatistics.scala +++ b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/PaimonStatistics.scala @@ -18,6 +18,7 @@ package org.apache.paimon.spark +import org.apache.paimon.spark.data.SparkInternalRow import org.apache.paimon.stats.ColStats import org.apache.paimon.types.{DataField, DataType, RowType} @@ -118,8 +119,10 @@ object PaimonColumnStats { def apply(dateType: DataType, paimonColStats: ColStats[_]): PaimonColumnStats = { PaimonColumnStats( paimonColStats.nullCount, - Optional.ofNullable(SparkInternalRow.fromPaimon(paimonColStats.min().orElse(null), dateType)), - Optional.ofNullable(SparkInternalRow.fromPaimon(paimonColStats.max().orElse(null), dateType)), + Optional.ofNullable( + DataConverter + .fromPaimon(paimonColStats.min().orElse(null), dateType)), + Optional.ofNullable(DataConverter.fromPaimon(paimonColStats.max().orElse(null), dateType)), paimonColStats.distinctCount, paimonColStats.avgLen, paimonColStats.maxLen diff --git a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/aggregate/LocalAggregator.scala b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/aggregate/LocalAggregator.scala index cd9718cf44eb..41e7fd3c3ce9 100644 --- a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/aggregate/LocalAggregator.scala +++ b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/aggregate/LocalAggregator.scala @@ -20,7 +20,8 @@ package org.apache.paimon.spark.aggregate import org.apache.paimon.data.BinaryRow import org.apache.paimon.manifest.PartitionEntry -import org.apache.paimon.spark.{SparkInternalRow, SparkTypeUtils} +import org.apache.paimon.spark.SparkTypeUtils +import org.apache.paimon.spark.data.SparkInternalRow import org.apache.paimon.table.{DataTable, Table} import org.apache.paimon.utils.{InternalRowUtils, ProjectedRow} @@ -104,7 +105,7 @@ class LocalAggregator(table: Table) { ProjectedRow.from(requiredGroupByIndexMapping.toArray).replaceRow(partitionRow) // `ProjectedRow` does not support `hashCode`, so do a deep copy val genericRow = InternalRowUtils.copyInternalRow(projectedRow, partitionType) - new SparkInternalRow(partitionType).replace(genericRow) + SparkInternalRow.create(partitionType).replace(genericRow) } def update(partitionEntry: PartitionEntry): Unit = { diff --git a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/catalyst/analysis/expressions/ExpressionHelper.scala b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/catalyst/analysis/expressions/ExpressionHelper.scala index c008819fb0cc..d4010ea33811 100644 --- a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/catalyst/analysis/expressions/ExpressionHelper.scala +++ b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/catalyst/analysis/expressions/ExpressionHelper.scala @@ -23,12 +23,13 @@ import org.apache.paimon.spark.SparkFilterConverter import org.apache.paimon.spark.catalyst.Compatibility import org.apache.paimon.types.RowType +import org.apache.spark.sql.{Column, SparkSession} import org.apache.spark.sql.PaimonUtils.{normalizeExprs, translateFilter} -import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.analysis.Resolver import org.apache.spark.sql.catalyst.expressions.{Alias, And, Attribute, Cast, Expression, GetStructField, Literal, PredicateHelper, SubqueryExpression} import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan} import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.paimon.shims.SparkShimLoader import org.apache.spark.sql.types.{DataType, NullType} /** An expression helper. */ @@ -36,6 +37,14 @@ trait ExpressionHelper extends PredicateHelper { import ExpressionHelper._ + def toColumn(expr: Expression): Column = { + SparkShimLoader.getSparkShim.column(expr) + } + + def toExpression(spark: SparkSession, col: Column): Expression = { + SparkShimLoader.getSparkShim.convertToExpression(spark, col) + } + protected def resolveExpression( spark: SparkSession)(expr: Expression, plan: LogicalPlan): Expression = { if (expr.resolved) { diff --git a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/catalyst/optimizer/MergePaimonScalarSubqueriesBase.scala b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/catalyst/optimizer/MergePaimonScalarSubqueriesBase.scala index b0b1a76e7a1f..3428ed89f004 100644 --- a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/catalyst/optimizer/MergePaimonScalarSubqueriesBase.scala +++ b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/catalyst/optimizer/MergePaimonScalarSubqueriesBase.scala @@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.catalyst.trees.TreePattern.{SCALAR_SUBQUERY, SCALAR_SUBQUERY_REFERENCE, TreePattern} import org.apache.spark.sql.execution.datasources.v2.DataSourceV2ScanRelation import org.apache.spark.sql.internal.SQLConf -import org.apache.spark.sql.paimon.shims +import org.apache.spark.sql.paimon.shims.SparkShimLoader import org.apache.spark.sql.types.{DataType, StructType} import scala.collection.mutable.ArrayBuffer @@ -344,7 +344,7 @@ trait MergePaimonScalarSubqueriesBase extends Rule[LogicalPlan] with PredicateHe val Seq(newPlanSupportsHashAggregate, cachedPlanSupportsHashAggregate) = aggregateExpressionsSeq.zip(groupByExpressionSeq).map { case (aggregateExpressions, groupByExpressions) => - shims.Aggregate.supportsHashAggregate( + SparkShimLoader.getSparkShim.supportsHashAggregate( aggregateExpressions.flatMap(_.aggregateFunction.aggBufferAttributes), groupByExpressions) } diff --git a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/commands/BucketProcessor.scala b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/commands/BucketProcessor.scala index f252b3bb130b..57a8a8e4abfd 100644 --- a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/commands/BucketProcessor.scala +++ b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/commands/BucketProcessor.scala @@ -22,7 +22,7 @@ import org.apache.paimon.crosspartition.{GlobalIndexAssigner, KeyPartOrRow} import org.apache.paimon.data.{BinaryRow, GenericRow, InternalRow => PaimonInternalRow, JoinedRow} import org.apache.paimon.disk.IOManager import org.apache.paimon.index.HashBucketAssigner -import org.apache.paimon.spark.{SparkInternalRow, SparkRow} +import org.apache.paimon.spark.{DataConverter, SparkRow} import org.apache.paimon.spark.SparkUtils.createIOManager import org.apache.paimon.spark.util.EncoderUtils import org.apache.paimon.table.FileStoreTable @@ -179,7 +179,7 @@ class GlobalIndexAssignerIterator( extraRow.setField(1, bucket) queue.enqueue( encoderGroup.internalToRow( - SparkInternalRow.fromPaimon(new JoinedRow(row, extraRow), rowType))) + DataConverter.fromPaimon(new JoinedRow(row, extraRow), rowType))) } ) rowIterator.foreach { diff --git a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/commands/MergeIntoPaimonTable.scala b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/commands/MergeIntoPaimonTable.scala index f557a0cf38ee..52e704172fc8 100644 --- a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/commands/MergeIntoPaimonTable.scala +++ b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/commands/MergeIntoPaimonTable.scala @@ -38,7 +38,6 @@ import org.apache.spark.sql.catalyst.expressions.codegen.GeneratePredicate import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation import org.apache.spark.sql.functions.{col, lit, monotonically_increasing_id, sum} -import org.apache.spark.sql.paimon.shims.ExpressionUtils.{column, convertToExpression} import org.apache.spark.sql.types.{ByteType, StructField, StructType} import scala.collection.mutable @@ -153,12 +152,12 @@ case class MergeIntoPaimonTable( } if (hasUpdate(matchedActions)) { touchedFilePathsSet ++= findTouchedFiles( - targetDS.join(sourceDS, column(mergeCondition), "inner"), + targetDS.join(sourceDS, toColumn(mergeCondition), "inner"), sparkSession) } if (hasUpdate(notMatchedBySourceActions)) { touchedFilePathsSet ++= findTouchedFiles( - targetDS.join(sourceDS, column(mergeCondition), "left_anti"), + targetDS.join(sourceDS, toColumn(mergeCondition), "left_anti"), sparkSession) } @@ -200,7 +199,7 @@ case class MergeIntoPaimonTable( val sourceDS = createDataset(sparkSession, sourceTable) .withColumn(SOURCE_ROW_COL, lit(true)) - val joinedDS = sourceDS.join(targetDS, column(mergeCondition), "fullOuter") + val joinedDS = sourceDS.join(targetDS, toColumn(mergeCondition), "fullOuter") val joinedPlan = joinedDS.queryExecution.analyzed def resolveOnJoinedPlan(exprs: Seq[Expression]): Seq[Expression] = { @@ -209,9 +208,9 @@ case class MergeIntoPaimonTable( val targetOutput = filteredTargetPlan.output val targetRowNotMatched = resolveOnJoinedPlan( - Seq(convertToExpression(sparkSession, col(SOURCE_ROW_COL).isNull))).head + Seq(toExpression(sparkSession, col(SOURCE_ROW_COL).isNull))).head val sourceRowNotMatched = resolveOnJoinedPlan( - Seq(convertToExpression(sparkSession, col(TARGET_ROW_COL).isNull))).head + Seq(toExpression(sparkSession, col(TARGET_ROW_COL).isNull))).head val matchedExprs = matchedActions.map(_.condition.getOrElse(TrueLiteral)) val notMatchedExprs = notMatchedActions.map(_.condition.getOrElse(TrueLiteral)) val notMatchedBySourceExprs = notMatchedBySourceActions.map(_.condition.getOrElse(TrueLiteral)) @@ -275,7 +274,7 @@ case class MergeIntoPaimonTable( .withColumn(ROW_ID_COL, monotonically_increasing_id()) val sourceDS = createDataset(sparkSession, sourceTable) val count = sourceDS - .join(targetDS, column(mergeCondition), "inner") + .join(targetDS, toColumn(mergeCondition), "inner") .select(col(ROW_ID_COL), lit(1).as("one")) .groupBy(ROW_ID_COL) .agg(sum("one").as("count")) diff --git a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/commands/UpdatePaimonTableCommand.scala b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/commands/UpdatePaimonTableCommand.scala index f2ea965d1407..47e3f77d0e2c 100644 --- a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/commands/UpdatePaimonTableCommand.scala +++ b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/commands/UpdatePaimonTableCommand.scala @@ -33,7 +33,7 @@ import org.apache.spark.sql.catalyst.expressions.Literal.TrueLiteral import org.apache.spark.sql.catalyst.plans.logical.{Assignment, Filter, Project, SupportsSubquery} import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation import org.apache.spark.sql.functions.lit -import org.apache.spark.sql.paimon.shims.ExpressionUtils.column +import org.apache.spark.sql.paimon.shims.SparkShimLoader case class UpdatePaimonTableCommand( relation: DataSourceV2Relation, @@ -133,7 +133,8 @@ case class UpdatePaimonTableCommand( sparkSession: SparkSession, touchedDataSplits: Array[DataSplit]): Seq[CommitMessage] = { val updateColumns = updateExpressions.zip(relation.output).map { - case (update, origin) => column(update).as(origin.name, origin.metadata) + case (update, origin) => + SparkShimLoader.getSparkShim.column(update).as(origin.name, origin.metadata) } val toUpdateScanRelation = createNewRelation(touchedDataSplits, relation) @@ -156,7 +157,7 @@ case class UpdatePaimonTableCommand( } else { If(condition, update, origin) } - column(updated).as(origin.name, origin.metadata) + SparkShimLoader.getSparkShim.column(updated).as(origin.name, origin.metadata) } val data = createDataset(sparkSession, toUpdateScanRelation).select(updateColumns: _*) diff --git a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/data/SparkArrayData.scala b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/data/SparkArrayData.scala new file mode 100644 index 000000000000..c6539a493cee --- /dev/null +++ b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/data/SparkArrayData.scala @@ -0,0 +1,118 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.paimon.spark.data + +import org.apache.paimon.data.InternalArray +import org.apache.paimon.spark.DataConverter +import org.apache.paimon.types.{ArrayType => PaimonArrayType, BigIntType, DataType => PaimonDataType, DataTypeChecks, RowType} +import org.apache.paimon.utils.InternalRowUtils + +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions.SpecializedGettersReader +import org.apache.spark.sql.catalyst.util.{ArrayData, MapData} +import org.apache.spark.sql.paimon.shims.SparkShimLoader +import org.apache.spark.sql.types.{DataType, Decimal} +import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String} + +abstract class SparkArrayData extends org.apache.spark.sql.catalyst.util.ArrayData { + + def replace(array: InternalArray): SparkArrayData +} + +abstract class AbstractSparkArrayData extends SparkArrayData { + + val elementType: PaimonDataType + + var paimonArray: InternalArray = _ + + override def replace(array: InternalArray): SparkArrayData = { + this.paimonArray = array + this + } + + override def numElements(): Int = paimonArray.size() + + override def copy(): ArrayData = { + SparkArrayData.create(elementType).replace(InternalRowUtils.copyArray(paimonArray, elementType)) + } + + override def array: Array[Any] = { + Array.range(0, numElements()).map { + i => + DataConverter + .fromPaimon(InternalRowUtils.get(paimonArray, i, elementType), elementType) + } + } + + override def setNullAt(i: Int): Unit = throw new UnsupportedOperationException() + + override def update(i: Int, value: Any): Unit = throw new UnsupportedOperationException() + + override def isNullAt(ordinal: Int): Boolean = paimonArray.isNullAt(ordinal) + + override def getBoolean(ordinal: Int): Boolean = paimonArray.getBoolean(ordinal) + + override def getByte(ordinal: Int): Byte = paimonArray.getByte(ordinal) + + override def getShort(ordinal: Int): Short = paimonArray.getShort(ordinal) + + override def getInt(ordinal: Int): Int = paimonArray.getInt(ordinal) + + override def getLong(ordinal: Int): Long = elementType match { + case _: BigIntType => paimonArray.getLong(ordinal) + case _ => + DataConverter.fromPaimon( + paimonArray.getTimestamp(ordinal, DataTypeChecks.getPrecision(elementType))) + } + + override def getFloat(ordinal: Int): Float = paimonArray.getFloat(ordinal) + + override def getDouble(ordinal: Int): Double = paimonArray.getDouble(ordinal) + + override def getDecimal(ordinal: Int, precision: Int, scale: Int): Decimal = + DataConverter.fromPaimon(paimonArray.getDecimal(ordinal, precision, scale)) + + override def getUTF8String(ordinal: Int): UTF8String = + DataConverter.fromPaimon(paimonArray.getString(ordinal)) + + override def getBinary(ordinal: Int): Array[Byte] = paimonArray.getBinary(ordinal) + + override def getInterval(ordinal: Int): CalendarInterval = + throw new UnsupportedOperationException() + + override def getStruct(ordinal: Int, numFields: Int): InternalRow = DataConverter + .fromPaimon(paimonArray.getRow(ordinal, numFields), elementType.asInstanceOf[RowType]) + + override def getArray(ordinal: Int): ArrayData = DataConverter.fromPaimon( + paimonArray.getArray(ordinal), + elementType.asInstanceOf[PaimonArrayType]) + + override def getMap(ordinal: Int): MapData = + DataConverter.fromPaimon(paimonArray.getMap(ordinal), elementType) + + override def get(ordinal: Int, dataType: DataType): AnyRef = + SpecializedGettersReader.read(this, ordinal, dataType, true, true) + +} + +object SparkArrayData { + def create(elementType: PaimonDataType): SparkArrayData = { + SparkShimLoader.getSparkShim.createSparkArrayData(elementType) + } +} diff --git a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/data/SparkInternalRow.scala b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/data/SparkInternalRow.scala new file mode 100644 index 000000000000..f3e607e9d7d2 --- /dev/null +++ b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/data/SparkInternalRow.scala @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.paimon.spark.data + +import org.apache.paimon.types.RowType + +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.paimon.shims.SparkShimLoader + +abstract class SparkInternalRow extends InternalRow { + def replace(row: org.apache.paimon.data.InternalRow): SparkInternalRow +} + +object SparkInternalRow { + + def create(rowType: RowType): SparkInternalRow = { + SparkShimLoader.getSparkShim.createSparkInternalRow(rowType) + } + +} diff --git a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/extensions/PaimonSparkSessionExtensions.scala b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/extensions/PaimonSparkSessionExtensions.scala index 6f47a77ef308..e8f75d394a81 100644 --- a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/extensions/PaimonSparkSessionExtensions.scala +++ b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/extensions/PaimonSparkSessionExtensions.scala @@ -25,14 +25,16 @@ import org.apache.paimon.spark.execution.PaimonStrategy import org.apache.paimon.spark.execution.adaptive.DisableUnnecessaryPaimonBucketedScan import org.apache.spark.sql.SparkSessionExtensions -import org.apache.spark.sql.catalyst.parser.extensions.PaimonSparkSqlExtensionsParser +import org.apache.spark.sql.paimon.shims.SparkShimLoader /** Spark session extension to extends the syntax and adds the rules. */ class PaimonSparkSessionExtensions extends (SparkSessionExtensions => Unit) { override def apply(extensions: SparkSessionExtensions): Unit = { // parser extensions - extensions.injectParser { case (_, parser) => new PaimonSparkSqlExtensionsParser(parser) } + extensions.injectParser { + case (_, parser) => SparkShimLoader.getSparkShim.createSparkParser(parser) + } // analyzer extensions extensions.injectResolutionRule(spark => new PaimonAnalysis(spark)) diff --git a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/spark/sql/catalyst/parser/extensions/PaimonSparkSqlExtensionsParser.scala b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/spark/sql/catalyst/parser/extensions/AbstractPaimonSparkSqlExtensionsParser.scala similarity index 98% rename from paimon-spark/paimon-spark-common/src/main/scala/org/apache/spark/sql/catalyst/parser/extensions/PaimonSparkSqlExtensionsParser.scala rename to paimon-spark/paimon-spark-common/src/main/scala/org/apache/spark/sql/catalyst/parser/extensions/AbstractPaimonSparkSqlExtensionsParser.scala index 9ece186930d7..c1d61e973834 100644 --- a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/spark/sql/catalyst/parser/extensions/PaimonSparkSqlExtensionsParser.scala +++ b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/spark/sql/catalyst/parser/extensions/AbstractPaimonSparkSqlExtensionsParser.scala @@ -47,8 +47,8 @@ import java.util.Locale * @param delegate * The extension parser. */ -class PaimonSparkSqlExtensionsParser(val delegate: ParserInterface) - extends org.apache.spark.sql.paimon.shims.ParserInterface +abstract class AbstractPaimonSparkSqlExtensionsParser(val delegate: ParserInterface) + extends org.apache.spark.sql.catalyst.parser.ParserInterface with Logging { private lazy val substitutor = new VariableSubstitution() diff --git a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/spark/sql/connector/catalog/PaimonCatalogUtils.scala b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/spark/sql/connector/catalog/PaimonCatalogUtils.scala index 265c82866195..2ab3dc494524 100644 --- a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/spark/sql/connector/catalog/PaimonCatalogUtils.scala +++ b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/spark/sql/connector/catalog/PaimonCatalogUtils.scala @@ -23,10 +23,7 @@ import org.apache.spark.SparkConf import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.catalog.ExternalCatalog import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION -import org.apache.spark.util.Utils - -import scala.reflect.ClassTag -import scala.util.control.NonFatal +import org.apache.spark.sql.paimon.ReflectUtils object PaimonCatalogUtils { @@ -37,22 +34,10 @@ object PaimonCatalogUtils { } else { "org.apache.spark.sql.catalyst.catalog.InMemoryCatalog" } - reflect[ExternalCatalog, SparkConf, Configuration](externalCatalogClassName, conf, hadoopConf) - } - - private def reflect[T, Arg1 <: AnyRef, Arg2 <: AnyRef]( - className: String, - ctorArg1: Arg1, - ctorArg2: Arg2)(implicit ctorArgTag1: ClassTag[Arg1], ctorArgTag2: ClassTag[Arg2]): T = { - try { - val clazz = Utils.classForName(className) - val ctor = clazz.getDeclaredConstructor(ctorArgTag1.runtimeClass, ctorArgTag2.runtimeClass) - val args = Array[AnyRef](ctorArg1, ctorArg2) - ctor.newInstance(args: _*).asInstanceOf[T] - } catch { - case NonFatal(e) => - throw new IllegalArgumentException(s"Error while instantiating '$className':", e) - } + ReflectUtils.reflect[ExternalCatalog, SparkConf, Configuration]( + externalCatalogClassName, + conf, + hadoopConf) } } diff --git a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/spark/sql/paimon/ReflectUtils.scala b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/spark/sql/paimon/ReflectUtils.scala new file mode 100644 index 000000000000..bedac542ab8b --- /dev/null +++ b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/spark/sql/paimon/ReflectUtils.scala @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.paimon + +import org.apache.spark.util.Utils + +import scala.reflect.ClassTag +import scala.util.control.NonFatal + +object ReflectUtils { + + def reflect[T, Arg1 <: AnyRef, Arg2 <: AnyRef](className: String, ctorArg1: Arg1, ctorArg2: Arg2)( + implicit + ctorArgTag1: ClassTag[Arg1], + ctorArgTag2: ClassTag[Arg2]): T = { + try { + val clazz = Utils.classForName(className) + val ctor = clazz.getDeclaredConstructor(ctorArgTag1.runtimeClass, ctorArgTag2.runtimeClass) + val args = Array[AnyRef](ctorArg1, ctorArg2) + ctor.newInstance(args: _*).asInstanceOf[T] + } catch { + case NonFatal(e) => + throw new IllegalArgumentException(s"Error while instantiating '$className':", e) + } + } + +} diff --git a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/spark/sql/paimon/shims/SparkShim.scala b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/spark/sql/paimon/shims/SparkShim.scala new file mode 100644 index 000000000000..bd85282737e9 --- /dev/null +++ b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/spark/sql/paimon/shims/SparkShim.scala @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.paimon.shims + +import org.apache.paimon.spark.data.{SparkArrayData, SparkInternalRow} +import org.apache.paimon.types.{DataType, RowType} + +import org.apache.spark.sql.{Column, SparkSession} +import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression} +import org.apache.spark.sql.catalyst.parser.ParserInterface +import org.apache.spark.sql.connector.catalog.{Identifier, Table, TableCatalog} +import org.apache.spark.sql.connector.expressions.Transform +import org.apache.spark.sql.types.StructType + +import java.util.{Map => JMap} + +/** + * A spark shim trait. It declare methods which have incompatible implementations between Spark 3 + * and Spark 4. The specific SparkShim implementation will be loaded through Service Provider + * Interface. + */ +trait SparkShim { + + def createSparkParser(delegate: ParserInterface): ParserInterface + + def createSparkInternalRow(rowType: RowType): SparkInternalRow + + def createSparkArrayData(elementType: DataType): SparkArrayData + + def supportsHashAggregate( + aggregateBufferAttributes: Seq[Attribute], + groupingExpression: Seq[Expression]): Boolean + + def createTable( + tableCatalog: TableCatalog, + ident: Identifier, + schema: StructType, + partitions: Array[Transform], + properties: JMap[String, String]): Table + + def column(expr: Expression): Column + + def convertToExpression(spark: SparkSession, column: Column): Expression + +} diff --git a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/spark/sql/paimon/shims/SparkShimLoader.scala b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/spark/sql/paimon/shims/SparkShimLoader.scala new file mode 100644 index 000000000000..920896547a1e --- /dev/null +++ b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/spark/sql/paimon/shims/SparkShimLoader.scala @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.paimon.shims + +import java.util.ServiceLoader + +import scala.collection.JavaConverters._ + +/** Load a [[SparkShim]]'s implementation. */ +object SparkShimLoader { + + private lazy val sparkShim: SparkShim = loadSparkShim() + + def getSparkShim: SparkShim = { + sparkShim + } + + private def loadSparkShim(): SparkShim = { + val shims = ServiceLoader.load(classOf[SparkShim]).asScala + if (shims.isEmpty) { + throw new IllegalStateException("No available spark shim here.") + } else if (shims.size > 1) { + throw new IllegalStateException("Found more than one spark shim here.") + } + shims.head + } +} diff --git a/paimon-spark/paimon-spark-ut/pom.xml b/paimon-spark/paimon-spark-ut/pom.xml new file mode 100644 index 000000000000..0a1840596487 --- /dev/null +++ b/paimon-spark/paimon-spark-ut/pom.xml @@ -0,0 +1,180 @@ + + + + 4.0.0 + + + org.apache.paimon + paimon-spark + 1.0-SNAPSHOT + + + paimon-spark-ut + Paimon : Spark : UT + + + ${paimon-spark-common.spark.version} + ${paimon.shade.jackson.version} + + + + + org.apache.paimon + ${paimon-sparkx-common} + ${project.version} + test + + + + org.apache.paimon + paimon-spark-common_${scala.binary.version} + ${project.version} + test + + + + org.apache.spark + spark-core_${scala.binary.version} + ${spark.version} + test + + + com.fasterxml.jackson.core + * + + + com.fasterxml.jackson.module + * + + + + + + org.apache.spark + spark-sql_${scala.binary.version} + ${spark.version} + test + + + com.fasterxml.jackson.core + * + + + + + + org.apache.spark + spark-sql_${scala.binary.version} + ${spark.version} + tests + test + + + com.fasterxml.jackson.core + * + + + + + + org.apache.spark + spark-catalyst_${scala.binary.version} + ${spark.version} + tests + test + + + + org.apache.spark + spark-core_${scala.binary.version} + ${spark.version} + tests + test + + + com.fasterxml.jackson.core + * + + + com.fasterxml.jackson.module + * + + + + + + org.apache.spark + spark-hive_${scala.binary.version} + ${spark.version} + test + + + com.fasterxml.jackson.core + * + + + com.google.protobuf + protobuf-java + + + + + + org.apache.spark + spark-avro_${scala.binary.version} + ${spark.version} + test + + + + com.fasterxml.jackson.module + jackson-module-scala_${scala.binary.version} + ${jackson.version} + test + + + + com.google.protobuf + protobuf-java + ${protobuf-java.version} + + + + + + + org.apache.maven.plugins + maven-jar-plugin + + + prepare-test-jar + test-compile + + test-jar + + + + + + + + diff --git a/paimon-spark/paimon-spark-common/src/test/java/org/apache/paimon/spark/SparkCatalogWithHiveTest.java b/paimon-spark/paimon-spark-ut/src/test/java/org/apache/paimon/spark/SparkCatalogWithHiveTest.java similarity index 100% rename from paimon-spark/paimon-spark-common/src/test/java/org/apache/paimon/spark/SparkCatalogWithHiveTest.java rename to paimon-spark/paimon-spark-ut/src/test/java/org/apache/paimon/spark/SparkCatalogWithHiveTest.java diff --git a/paimon-spark/paimon-spark-common/src/test/java/org/apache/paimon/spark/SparkFileIndexITCase.java b/paimon-spark/paimon-spark-ut/src/test/java/org/apache/paimon/spark/SparkFileIndexITCase.java similarity index 100% rename from paimon-spark/paimon-spark-common/src/test/java/org/apache/paimon/spark/SparkFileIndexITCase.java rename to paimon-spark/paimon-spark-ut/src/test/java/org/apache/paimon/spark/SparkFileIndexITCase.java diff --git a/paimon-spark/paimon-spark-common/src/test/java/org/apache/paimon/spark/SparkFilterConverterTest.java b/paimon-spark/paimon-spark-ut/src/test/java/org/apache/paimon/spark/SparkFilterConverterTest.java similarity index 100% rename from paimon-spark/paimon-spark-common/src/test/java/org/apache/paimon/spark/SparkFilterConverterTest.java rename to paimon-spark/paimon-spark-ut/src/test/java/org/apache/paimon/spark/SparkFilterConverterTest.java diff --git a/paimon-spark/paimon-spark-common/src/test/java/org/apache/paimon/spark/SparkGenericCatalogTest.java b/paimon-spark/paimon-spark-ut/src/test/java/org/apache/paimon/spark/SparkGenericCatalogTest.java similarity index 100% rename from paimon-spark/paimon-spark-common/src/test/java/org/apache/paimon/spark/SparkGenericCatalogTest.java rename to paimon-spark/paimon-spark-ut/src/test/java/org/apache/paimon/spark/SparkGenericCatalogTest.java diff --git a/paimon-spark/paimon-spark-common/src/test/java/org/apache/paimon/spark/SparkGenericCatalogWithHiveTest.java b/paimon-spark/paimon-spark-ut/src/test/java/org/apache/paimon/spark/SparkGenericCatalogWithHiveTest.java similarity index 100% rename from paimon-spark/paimon-spark-common/src/test/java/org/apache/paimon/spark/SparkGenericCatalogWithHiveTest.java rename to paimon-spark/paimon-spark-ut/src/test/java/org/apache/paimon/spark/SparkGenericCatalogWithHiveTest.java diff --git a/paimon-spark/paimon-spark-common/src/test/java/org/apache/paimon/spark/SparkInternalRowTest.java b/paimon-spark/paimon-spark-ut/src/test/java/org/apache/paimon/spark/SparkInternalRowTest.java similarity index 95% rename from paimon-spark/paimon-spark-common/src/test/java/org/apache/paimon/spark/SparkInternalRowTest.java rename to paimon-spark/paimon-spark-ut/src/test/java/org/apache/paimon/spark/SparkInternalRowTest.java index b98213c0e662..1117ad58c737 100644 --- a/paimon-spark/paimon-spark-common/src/test/java/org/apache/paimon/spark/SparkInternalRowTest.java +++ b/paimon-spark/paimon-spark-ut/src/test/java/org/apache/paimon/spark/SparkInternalRowTest.java @@ -25,6 +25,7 @@ import org.apache.paimon.data.GenericRow; import org.apache.paimon.data.InternalRow; import org.apache.paimon.data.Timestamp; +import org.apache.paimon.spark.data.SparkInternalRow; import org.apache.paimon.utils.DateTimeUtils; import org.apache.spark.sql.catalyst.CatalystTypeConverters; @@ -95,7 +96,7 @@ public void test() { SparkTypeUtils.fromPaimonType(ALL_TYPES))); org.apache.spark.sql.Row sparkRow = (org.apache.spark.sql.Row) - sparkConverter.apply(new SparkInternalRow(ALL_TYPES).replace(rowData)); + sparkConverter.apply(SparkInternalRow.create(ALL_TYPES).replace(rowData)); String expected = "1," @@ -122,7 +123,8 @@ public void test() { SparkRow sparkRowData = new SparkRow(ALL_TYPES, sparkRow); sparkRow = (org.apache.spark.sql.Row) - sparkConverter.apply(new SparkInternalRow(ALL_TYPES).replace(sparkRowData)); + sparkConverter.apply( + SparkInternalRow.create(ALL_TYPES).replace(sparkRowData)); assertThat(sparkRowToString(sparkRow)).isEqualTo(expected); TimeZone.setDefault(tz); } diff --git a/paimon-spark/paimon-spark-common/src/test/java/org/apache/paimon/spark/SparkReadITCase.java b/paimon-spark/paimon-spark-ut/src/test/java/org/apache/paimon/spark/SparkReadITCase.java similarity index 100% rename from paimon-spark/paimon-spark-common/src/test/java/org/apache/paimon/spark/SparkReadITCase.java rename to paimon-spark/paimon-spark-ut/src/test/java/org/apache/paimon/spark/SparkReadITCase.java diff --git a/paimon-spark/paimon-spark-common/src/test/java/org/apache/paimon/spark/SparkReadTestBase.java b/paimon-spark/paimon-spark-ut/src/test/java/org/apache/paimon/spark/SparkReadTestBase.java similarity index 100% rename from paimon-spark/paimon-spark-common/src/test/java/org/apache/paimon/spark/SparkReadTestBase.java rename to paimon-spark/paimon-spark-ut/src/test/java/org/apache/paimon/spark/SparkReadTestBase.java diff --git a/paimon-spark/paimon-spark-common/src/test/java/org/apache/paimon/spark/SparkS3ITCase.java b/paimon-spark/paimon-spark-ut/src/test/java/org/apache/paimon/spark/SparkS3ITCase.java similarity index 100% rename from paimon-spark/paimon-spark-common/src/test/java/org/apache/paimon/spark/SparkS3ITCase.java rename to paimon-spark/paimon-spark-ut/src/test/java/org/apache/paimon/spark/SparkS3ITCase.java diff --git a/paimon-spark/paimon-spark-common/src/test/java/org/apache/paimon/spark/SparkSchemaEvolutionITCase.java b/paimon-spark/paimon-spark-ut/src/test/java/org/apache/paimon/spark/SparkSchemaEvolutionITCase.java similarity index 100% rename from paimon-spark/paimon-spark-common/src/test/java/org/apache/paimon/spark/SparkSchemaEvolutionITCase.java rename to paimon-spark/paimon-spark-ut/src/test/java/org/apache/paimon/spark/SparkSchemaEvolutionITCase.java diff --git a/paimon-spark/paimon-spark-common/src/test/java/org/apache/paimon/spark/SparkTimeTravelITCase.java b/paimon-spark/paimon-spark-ut/src/test/java/org/apache/paimon/spark/SparkTimeTravelITCase.java similarity index 100% rename from paimon-spark/paimon-spark-common/src/test/java/org/apache/paimon/spark/SparkTimeTravelITCase.java rename to paimon-spark/paimon-spark-ut/src/test/java/org/apache/paimon/spark/SparkTimeTravelITCase.java diff --git a/paimon-spark/paimon-spark-common/src/test/java/org/apache/paimon/spark/SparkTimeTravelWithDataFrameITCase.java b/paimon-spark/paimon-spark-ut/src/test/java/org/apache/paimon/spark/SparkTimeTravelWithDataFrameITCase.java similarity index 100% rename from paimon-spark/paimon-spark-common/src/test/java/org/apache/paimon/spark/SparkTimeTravelWithDataFrameITCase.java rename to paimon-spark/paimon-spark-ut/src/test/java/org/apache/paimon/spark/SparkTimeTravelWithDataFrameITCase.java diff --git a/paimon-spark/paimon-spark-common/src/test/java/org/apache/paimon/spark/SparkTypeTest.java b/paimon-spark/paimon-spark-ut/src/test/java/org/apache/paimon/spark/SparkTypeTest.java similarity index 100% rename from paimon-spark/paimon-spark-common/src/test/java/org/apache/paimon/spark/SparkTypeTest.java rename to paimon-spark/paimon-spark-ut/src/test/java/org/apache/paimon/spark/SparkTypeTest.java diff --git a/paimon-spark/paimon-spark-common/src/test/java/org/apache/paimon/spark/SparkWriteITCase.java b/paimon-spark/paimon-spark-ut/src/test/java/org/apache/paimon/spark/SparkWriteITCase.java similarity index 100% rename from paimon-spark/paimon-spark-common/src/test/java/org/apache/paimon/spark/SparkWriteITCase.java rename to paimon-spark/paimon-spark-ut/src/test/java/org/apache/paimon/spark/SparkWriteITCase.java diff --git a/paimon-spark/paimon-spark-common/src/test/java/org/apache/paimon/spark/SparkWriteWithKyroITCase.java b/paimon-spark/paimon-spark-ut/src/test/java/org/apache/paimon/spark/SparkWriteWithKyroITCase.java similarity index 100% rename from paimon-spark/paimon-spark-common/src/test/java/org/apache/paimon/spark/SparkWriteWithKyroITCase.java rename to paimon-spark/paimon-spark-ut/src/test/java/org/apache/paimon/spark/SparkWriteWithKyroITCase.java diff --git a/paimon-spark/paimon-spark-common/src/test/java/org/apache/paimon/spark/extensions/CallStatementParserTest.java b/paimon-spark/paimon-spark-ut/src/test/java/org/apache/paimon/spark/extensions/CallStatementParserTest.java similarity index 100% rename from paimon-spark/paimon-spark-common/src/test/java/org/apache/paimon/spark/extensions/CallStatementParserTest.java rename to paimon-spark/paimon-spark-ut/src/test/java/org/apache/paimon/spark/extensions/CallStatementParserTest.java diff --git a/paimon-spark/paimon-spark-common/src/test/resources/META-INF/services/org.junit.jupiter.api.extension.Extension b/paimon-spark/paimon-spark-ut/src/test/resources/META-INF/services/org.junit.jupiter.api.extension.Extension similarity index 100% rename from paimon-spark/paimon-spark-common/src/test/resources/META-INF/services/org.junit.jupiter.api.extension.Extension rename to paimon-spark/paimon-spark-ut/src/test/resources/META-INF/services/org.junit.jupiter.api.extension.Extension diff --git a/paimon-spark/paimon-spark-common/src/test/resources/hive-site.xml b/paimon-spark/paimon-spark-ut/src/test/resources/hive-site.xml similarity index 100% rename from paimon-spark/paimon-spark-common/src/test/resources/hive-site.xml rename to paimon-spark/paimon-spark-ut/src/test/resources/hive-site.xml diff --git a/paimon-spark/paimon-spark-common/src/test/resources/log4j2-test.properties b/paimon-spark/paimon-spark-ut/src/test/resources/log4j2-test.properties similarity index 100% rename from paimon-spark/paimon-spark-common/src/test/resources/log4j2-test.properties rename to paimon-spark/paimon-spark-ut/src/test/resources/log4j2-test.properties diff --git a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/PaimonCDCSourceTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/PaimonCDCSourceTest.scala similarity index 100% rename from paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/PaimonCDCSourceTest.scala rename to paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/PaimonCDCSourceTest.scala diff --git a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/PaimonCommitTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/PaimonCommitTest.scala similarity index 100% rename from paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/PaimonCommitTest.scala rename to paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/PaimonCommitTest.scala diff --git a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/PaimonHiveTestBase.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/PaimonHiveTestBase.scala similarity index 98% rename from paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/PaimonHiveTestBase.scala rename to paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/PaimonHiveTestBase.scala index 842147615d1a..6d2ffea04df5 100644 --- a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/PaimonHiveTestBase.scala +++ b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/PaimonHiveTestBase.scala @@ -22,7 +22,7 @@ import org.apache.paimon.hive.TestHiveMetastore import org.apache.hadoop.conf.Configuration import org.apache.spark.SparkConf -import org.apache.spark.paimon.Utils +import org.apache.spark.sql.paimon.Utils import java.io.File diff --git a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/PaimonSinkTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/PaimonSinkTest.scala similarity index 100% rename from paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/PaimonSinkTest.scala rename to paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/PaimonSinkTest.scala diff --git a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/PaimonSourceTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/PaimonSourceTest.scala similarity index 100% rename from paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/PaimonSourceTest.scala rename to paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/PaimonSourceTest.scala diff --git a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/PaimonSparkTestBase.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/PaimonSparkTestBase.scala similarity index 79% rename from paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/PaimonSparkTestBase.scala rename to paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/PaimonSparkTestBase.scala index 9b4a344259a9..605b2e6ca5f2 100644 --- a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/PaimonSparkTestBase.scala +++ b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/PaimonSparkTestBase.scala @@ -25,11 +25,12 @@ import org.apache.paimon.spark.sql.{SparkVersionSupport, WithTableOptions} import org.apache.paimon.table.FileStoreTable import org.apache.spark.SparkConf -import org.apache.spark.paimon.Utils import org.apache.spark.sql.QueryTest import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.connector.catalog.{Identifier => SparkIdentifier} import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, DataSourceV2ScanRelation} +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.paimon.Utils import org.apache.spark.sql.test.SharedSparkSession import org.scalactic.source.Position import org.scalatest.Tag @@ -105,7 +106,7 @@ class PaimonSparkTestBase } protected def withTimeZone(timeZone: String)(f: => Unit): Unit = { - withSQLConf("spark.sql.session.timeZone" -> timeZone) { + withSparkSQLConf("spark.sql.session.timeZone" -> timeZone) { val originTimeZone = TimeZone.getDefault try { TimeZone.setDefault(TimeZone.getTimeZone(timeZone)) @@ -116,6 +117,40 @@ class PaimonSparkTestBase } } + // Since SPARK-46227 has changed the definition of withSQLConf that resulted in + // incompatibility between the Spark3.x and Spark4.x, So Paimon declare a separate method + // to provide the same function. + protected def withSparkSQLConf(pairs: (String, String)*)(f: => Unit): Unit = { + withSparkSQLConf0(pairs: _*)(f) + } + + private def withSparkSQLConf0(pairs: (String, String)*)(f: => Unit): Unit = { + val conf = SQLConf.get + val (keys, values) = pairs.unzip + val currentValues = keys.map { + key => + if (conf.contains(key)) { + Some(conf.getConfString(key)) + } else { + None + } + } + (keys, values).zipped.foreach { + (k, v) => + if (SQLConf.isStaticConfigKey(k)) { + throw new RuntimeException(s"Cannot modify the value of a static config: $k") + } + conf.setConfString(k, v) + } + try f + finally { + keys.zip(currentValues).foreach { + case (key, Some(value)) => conf.setConfString(key, value) + case (key, None) => conf.unsetConf(key) + } + } + } + override def test(testName: String, testTags: Tag*)(testFun: => Any)(implicit pos: Position): Unit = { println(testName) diff --git a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/PaimonTableTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/PaimonTableTest.scala similarity index 100% rename from paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/PaimonTableTest.scala rename to paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/PaimonTableTest.scala diff --git a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/ScanHelperTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/ScanHelperTest.scala similarity index 97% rename from paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/ScanHelperTest.scala rename to paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/ScanHelperTest.scala index fc787246f9f1..a3223446f644 100644 --- a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/ScanHelperTest.scala +++ b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/ScanHelperTest.scala @@ -26,7 +26,6 @@ import org.apache.paimon.table.source.{DataSplit, Split} import org.junit.jupiter.api.Assertions -import java.util import java.util.{HashMap => JHashMap} import scala.collection.JavaConverters._ @@ -35,7 +34,7 @@ import scala.collection.mutable class ScanHelperTest extends PaimonSparkTestBase { test("Paimon: reshuffle splits") { - withSQLConf(("spark.sql.leafNodeDefaultParallelism", "20")) { + withSparkSQLConf(("spark.sql.leafNodeDefaultParallelism", "20")) { val splitNum = 5 val fileNum = 100 diff --git a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/procedure/AlterBranchProcedureTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/AlterBranchProcedureTest.scala similarity index 100% rename from paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/procedure/AlterBranchProcedureTest.scala rename to paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/AlterBranchProcedureTest.scala diff --git a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/procedure/BranchProcedureTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/BranchProcedureTest.scala similarity index 100% rename from paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/procedure/BranchProcedureTest.scala rename to paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/BranchProcedureTest.scala diff --git a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/procedure/CompactManifestProcedureTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/CompactManifestProcedureTest.scala similarity index 100% rename from paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/procedure/CompactManifestProcedureTest.scala rename to paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/CompactManifestProcedureTest.scala diff --git a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/procedure/CompactProcedureTestBase.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/CompactProcedureTestBase.scala similarity index 100% rename from paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/procedure/CompactProcedureTestBase.scala rename to paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/CompactProcedureTestBase.scala diff --git a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/procedure/CreateAndDeleteTagProcedureTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/CreateAndDeleteTagProcedureTest.scala similarity index 100% rename from paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/procedure/CreateAndDeleteTagProcedureTest.scala rename to paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/CreateAndDeleteTagProcedureTest.scala diff --git a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/procedure/CreateTagFromTimestampProcedureTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/CreateTagFromTimestampProcedureTest.scala similarity index 100% rename from paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/procedure/CreateTagFromTimestampProcedureTest.scala rename to paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/CreateTagFromTimestampProcedureTest.scala diff --git a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/procedure/ExpirePartitionsProcedureTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/ExpirePartitionsProcedureTest.scala similarity index 100% rename from paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/procedure/ExpirePartitionsProcedureTest.scala rename to paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/ExpirePartitionsProcedureTest.scala diff --git a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/procedure/ExpireSnapshotsProcedureTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/ExpireSnapshotsProcedureTest.scala similarity index 100% rename from paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/procedure/ExpireSnapshotsProcedureTest.scala rename to paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/ExpireSnapshotsProcedureTest.scala diff --git a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/procedure/ExpireTagsProcedureTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/ExpireTagsProcedureTest.scala similarity index 100% rename from paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/procedure/ExpireTagsProcedureTest.scala rename to paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/ExpireTagsProcedureTest.scala diff --git a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/procedure/FastForwardProcedureTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/FastForwardProcedureTest.scala similarity index 100% rename from paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/procedure/FastForwardProcedureTest.scala rename to paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/FastForwardProcedureTest.scala diff --git a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/procedure/MarkPartitionDoneProcedureTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/MarkPartitionDoneProcedureTest.scala similarity index 100% rename from paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/procedure/MarkPartitionDoneProcedureTest.scala rename to paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/MarkPartitionDoneProcedureTest.scala diff --git a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/procedure/MigrateDatabaseProcedureTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/MigrateDatabaseProcedureTest.scala similarity index 100% rename from paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/procedure/MigrateDatabaseProcedureTest.scala rename to paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/MigrateDatabaseProcedureTest.scala diff --git a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/procedure/MigrateFileProcedureTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/MigrateFileProcedureTest.scala similarity index 100% rename from paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/procedure/MigrateFileProcedureTest.scala rename to paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/MigrateFileProcedureTest.scala diff --git a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/procedure/MigrateTableProcedureTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/MigrateTableProcedureTest.scala similarity index 100% rename from paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/procedure/MigrateTableProcedureTest.scala rename to paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/MigrateTableProcedureTest.scala diff --git a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/procedure/ProcedureTestBase.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/ProcedureTestBase.scala similarity index 100% rename from paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/procedure/ProcedureTestBase.scala rename to paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/ProcedureTestBase.scala diff --git a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/procedure/RemoveOrphanFilesProcedureTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/RemoveOrphanFilesProcedureTest.scala similarity index 100% rename from paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/procedure/RemoveOrphanFilesProcedureTest.scala rename to paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/RemoveOrphanFilesProcedureTest.scala diff --git a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/procedure/ReplaceTagProcedureTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/ReplaceTagProcedureTest.scala similarity index 100% rename from paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/procedure/ReplaceTagProcedureTest.scala rename to paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/ReplaceTagProcedureTest.scala diff --git a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/procedure/RollbackProcedureTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/RollbackProcedureTest.scala similarity index 100% rename from paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/procedure/RollbackProcedureTest.scala rename to paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/procedure/RollbackProcedureTest.scala diff --git a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/AnalyzeTableTestBase.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/AnalyzeTableTestBase.scala similarity index 97% rename from paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/AnalyzeTableTestBase.scala rename to paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/AnalyzeTableTestBase.scala index 238dd039969a..4f8ccae22dd5 100644 --- a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/AnalyzeTableTestBase.scala +++ b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/AnalyzeTableTestBase.scala @@ -86,7 +86,7 @@ abstract class AnalyzeTableTestBase extends PaimonSparkTestBase { spark.sql(s"ANALYZE TABLE T COMPUTE STATISTICS") - withSQLConf("spark.paimon.scan.timestamp-millis" -> System.currentTimeMillis.toString) { + withSparkSQLConf("spark.paimon.scan.timestamp-millis" -> System.currentTimeMillis.toString) { checkAnswer( sql("SELECT snapshot_id, schema_id, mergedRecordCount, colstat FROM `T$statistics`"), Row(2, 0, 2, "{ }")) @@ -97,7 +97,7 @@ abstract class AnalyzeTableTestBase extends PaimonSparkTestBase { spark.sql(s"ANALYZE TABLE T COMPUTE STATISTICS") - withSQLConf("spark.paimon.scan.timestamp-millis" -> System.currentTimeMillis.toString) { + withSparkSQLConf("spark.paimon.scan.timestamp-millis" -> System.currentTimeMillis.toString) { checkAnswer( sql("SELECT snapshot_id, schema_id, mergedRecordCount, colstat FROM `T$statistics`"), Row(5, 0, 4, "{ }")) @@ -111,31 +111,31 @@ abstract class AnalyzeTableTestBase extends PaimonSparkTestBase { spark.sql("CALL paimon.sys.create_tag(table => 'test.T', tag => 'test_tag6', snapshot => 6)"), Row(true) :: Nil) - withSQLConf("spark.paimon.scan.tag-name" -> "test_tag5") { + withSparkSQLConf("spark.paimon.scan.tag-name" -> "test_tag5") { checkAnswer( sql("SELECT snapshot_id, schema_id, mergedRecordCount, colstat FROM `T$statistics`"), Row(2, 0, 2, "{ }")) } - withSQLConf("spark.paimon.scan.tag-name" -> "test_tag6") { + withSparkSQLConf("spark.paimon.scan.tag-name" -> "test_tag6") { checkAnswer( sql("SELECT snapshot_id, schema_id, mergedRecordCount, colstat FROM `T$statistics`"), Row(5, 0, 4, "{ }")) } - withSQLConf("spark.paimon.scan.snapshot-id" -> "3") { + withSparkSQLConf("spark.paimon.scan.snapshot-id" -> "3") { checkAnswer( sql("SELECT snapshot_id, schema_id, mergedRecordCount, colstat FROM `T$statistics`"), Row(2, 0, 2, "{ }")) } - withSQLConf("spark.paimon.scan.snapshot-id" -> "4") { + withSparkSQLConf("spark.paimon.scan.snapshot-id" -> "4") { checkAnswer( sql("SELECT snapshot_id, schema_id, mergedRecordCount, colstat FROM `T$statistics`"), Row(2, 0, 2, "{ }")) } - withSQLConf("spark.paimon.scan.snapshot-id" -> "6") { + withSparkSQLConf("spark.paimon.scan.snapshot-id" -> "6") { checkAnswer( sql("SELECT snapshot_id, schema_id, mergedRecordCount, colstat FROM `T$statistics`"), Row(5, 0, 4, "{ }")) diff --git a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/BucketedTableQueryTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/BucketedTableQueryTest.scala similarity index 97% rename from paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/BucketedTableQueryTest.scala rename to paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/BucketedTableQueryTest.scala index afc70bf9105d..35931924c487 100644 --- a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/BucketedTableQueryTest.scala +++ b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/BucketedTableQueryTest.scala @@ -29,12 +29,12 @@ class BucketedTableQueryTest extends PaimonSparkTestBase with AdaptiveSparkPlanH private def checkAnswerAndShuffleSorts(query: String, numShuffles: Int, numSorts: Int): Unit = { var expectedResult: Array[Row] = null // avoid config default value change in future, so specify it manually - withSQLConf( + withSparkSQLConf( "spark.sql.sources.v2.bucketing.enabled" -> "false", "spark.sql.autoBroadcastJoinThreshold" -> "-1") { expectedResult = spark.sql(query).collect() } - withSQLConf( + withSparkSQLConf( "spark.sql.sources.v2.bucketing.enabled" -> "true", "spark.sql.autoBroadcastJoinThreshold" -> "-1") { val df = spark.sql(query) @@ -162,10 +162,10 @@ class BucketedTableQueryTest extends PaimonSparkTestBase with AdaptiveSparkPlanH checkAnswerAndShuffleSorts("select max(c) OVER (PARTITION BY id ORDER BY id) from t1", 0, 1) checkAnswerAndShuffleSorts("select sum(id) OVER (PARTITION BY c ORDER BY id) from t1", 1, 1) - withSQLConf("spark.sql.requireAllClusterKeysForDistribution" -> "false") { + withSparkSQLConf("spark.sql.requireAllClusterKeysForDistribution" -> "false") { checkAnswerAndShuffleSorts("SELECT id, c, count(*) FROM t1 GROUP BY id, c", 0, 0) } - withSQLConf("spark.sql.requireAllClusterKeysForDistribution" -> "true") { + withSparkSQLConf("spark.sql.requireAllClusterKeysForDistribution" -> "true") { checkAnswerAndShuffleSorts("SELECT id, c, count(*) FROM t1 GROUP BY id, c", 1, 0) } } diff --git a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/DDLTestBase.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/DDLTestBase.scala similarity index 97% rename from paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/DDLTestBase.scala rename to paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/DDLTestBase.scala index b09a2be98dc8..6ad5274496a9 100644 --- a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/DDLTestBase.scala +++ b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/DDLTestBase.scala @@ -238,21 +238,21 @@ abstract class DDLTestBase extends PaimonSparkTestBase { |USING PAIMON |""".stripMargin) - withSQLConf("spark.sql.legacy.charVarcharAsString" -> "true") { + withSparkSQLConf("spark.sql.legacy.charVarcharAsString" -> "true") { sql("INSERT INTO paimon_tbl VALUES (1, 'ab')") } - withSQLConf("spark.sql.legacy.charVarcharAsString" -> "false") { + withSparkSQLConf("spark.sql.legacy.charVarcharAsString" -> "false") { sql("INSERT INTO paimon_tbl VALUES (2, 'ab')") } if (gteqSpark3_4) { - withSQLConf("spark.sql.readSideCharPadding" -> "true") { + withSparkSQLConf("spark.sql.readSideCharPadding" -> "true") { checkAnswer( spark.sql("SELECT c FROM paimon_tbl ORDER BY id"), Row("ab ") :: Row("ab ") :: Nil) } - withSQLConf("spark.sql.readSideCharPadding" -> "false") { + withSparkSQLConf("spark.sql.readSideCharPadding" -> "false") { checkAnswer( spark.sql("SELECT c FROM paimon_tbl ORDER BY id"), Row("ab") :: Row("ab ") :: Nil) @@ -270,7 +270,8 @@ abstract class DDLTestBase extends PaimonSparkTestBase { format => Seq(true, false).foreach { datetimeJava8APIEnabled => - withSQLConf("spark.sql.datetime.java8API.enabled" -> datetimeJava8APIEnabled.toString) { + withSparkSQLConf( + "spark.sql.datetime.java8API.enabled" -> datetimeJava8APIEnabled.toString) { withTimeZone("Asia/Shanghai") { withTable("paimon_tbl") { // Spark support create table with timestamp_ntz since 3.4 @@ -430,7 +431,8 @@ abstract class DDLTestBase extends PaimonSparkTestBase { test("Paimon DDL: select table with timestamp and timestamp_ntz with filter") { Seq(true, false).foreach { datetimeJava8APIEnabled => - withSQLConf("spark.sql.datetime.java8API.enabled" -> datetimeJava8APIEnabled.toString) { + withSparkSQLConf( + "spark.sql.datetime.java8API.enabled" -> datetimeJava8APIEnabled.toString) { withTable("paimon_tbl") { // Spark support create table with timestamp_ntz since 3.4 if (gteqSpark3_4) { diff --git a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/DDLWithHiveCatalogTestBase.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/DDLWithHiveCatalogTestBase.scala similarity index 99% rename from paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/DDLWithHiveCatalogTestBase.scala rename to paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/DDLWithHiveCatalogTestBase.scala index 9be8e21a8df2..e99e4434ef7f 100644 --- a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/DDLWithHiveCatalogTestBase.scala +++ b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/DDLWithHiveCatalogTestBase.scala @@ -304,7 +304,7 @@ abstract class DDLWithHiveCatalogTestBase extends PaimonHiveTestBase { withTempDir { tbLocation => withDatabase("paimon_db") { - spark.sql(s"CREATE DATABASE paimon_db") + spark.sql(s"CREATE DATABASE IF NOT EXISTS paimon_db") spark.sql(s"USE paimon_db") withTable("external_tbl", "managed_tbl") { val expertTbLocation = tbLocation.getCanonicalPath diff --git a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/DataFrameWriteTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/DataFrameWriteTest.scala similarity index 100% rename from paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/DataFrameWriteTest.scala rename to paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/DataFrameWriteTest.scala diff --git a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/DeleteFromTableTestBase.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/DeleteFromTableTestBase.scala similarity index 100% rename from paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/DeleteFromTableTestBase.scala rename to paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/DeleteFromTableTestBase.scala diff --git a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/DeletionVectorTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/DeletionVectorTest.scala similarity index 99% rename from paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/DeletionVectorTest.scala rename to paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/DeletionVectorTest.scala index e944429e4218..ea8309e14ffe 100644 --- a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/DeletionVectorTest.scala +++ b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/DeletionVectorTest.scala @@ -25,13 +25,13 @@ import org.apache.paimon.spark.{PaimonSparkTestBase, PaimonSplitScan} import org.apache.paimon.spark.schema.PaimonMetadataColumn import org.apache.paimon.table.FileStoreTable -import org.apache.spark.paimon.Utils import org.apache.spark.sql.Row import org.apache.spark.sql.execution.{QueryExecution, SparkPlan} import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper import org.apache.spark.sql.execution.columnar.InMemoryTableScanExec import org.apache.spark.sql.execution.datasources.v2.{BatchScanExec, DataSourceV2Relation} import org.apache.spark.sql.functions.lit +import org.apache.spark.sql.paimon.Utils import org.apache.spark.sql.util.QueryExecutionListener import org.junit.jupiter.api.Assertions diff --git a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/DescribeTableTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/DescribeTableTest.scala similarity index 100% rename from paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/DescribeTableTest.scala rename to paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/DescribeTableTest.scala diff --git a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/DisableUnnecessaryPaimonBucketedScanSuite.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/DisableUnnecessaryPaimonBucketedScanSuite.scala similarity index 97% rename from paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/DisableUnnecessaryPaimonBucketedScanSuite.scala rename to paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/DisableUnnecessaryPaimonBucketedScanSuite.scala index 70339bd7cac3..f47d40285aa9 100644 --- a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/DisableUnnecessaryPaimonBucketedScanSuite.scala +++ b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/DisableUnnecessaryPaimonBucketedScanSuite.scala @@ -48,13 +48,13 @@ class DisableUnnecessaryPaimonBucketedScanSuite assert(bucketedScan.length == expectedNumBucketedScan, query) } - withSQLConf("spark.sql.sources.v2.bucketing.enabled" -> "true") { - withSQLConf("spark.sql.sources.bucketing.autoBucketedScan.enabled" -> "true") { + withSparkSQLConf("spark.sql.sources.v2.bucketing.enabled" -> "true") { + withSparkSQLConf("spark.sql.sources.bucketing.autoBucketedScan.enabled" -> "true") { val df = sql(query) val result = df.collect() checkNumBucketedScan(df, expectedNumScanWithAutoScanEnabled) - withSQLConf("spark.sql.sources.bucketing.autoBucketedScan.enabled" -> "false") { + withSparkSQLConf("spark.sql.sources.bucketing.autoBucketedScan.enabled" -> "false") { val expected = sql(query) checkAnswer(expected, result) checkNumBucketedScan(expected, expectedNumScanWithAutoScanDisabled) diff --git a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/DynamicBucketTableTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/DynamicBucketTableTest.scala similarity index 100% rename from paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/DynamicBucketTableTest.scala rename to paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/DynamicBucketTableTest.scala diff --git a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/InsertOverwriteTableTestBase.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/InsertOverwriteTableTestBase.scala similarity index 99% rename from paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/InsertOverwriteTableTestBase.scala rename to paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/InsertOverwriteTableTestBase.scala index 674b45fda68b..03026e857429 100644 --- a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/InsertOverwriteTableTestBase.scala +++ b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/InsertOverwriteTableTestBase.scala @@ -346,7 +346,7 @@ abstract class InsertOverwriteTableTestBase extends PaimonSparkTestBase { spark.sql("SELECT * FROM T ORDER BY a, b"), Row(1, 3, "3") :: Row(2, 4, "4") :: Nil) - withSQLConf("spark.sql.sources.partitionOverwriteMode" -> "dynamic") { + withSparkSQLConf("spark.sql.sources.partitionOverwriteMode" -> "dynamic") { // dynamic overwrite the a=1 partition spark.sql("INSERT OVERWRITE T VALUES (1, 5, '5'), (1, 7, '7')") checkAnswer( @@ -387,7 +387,7 @@ abstract class InsertOverwriteTableTestBase extends PaimonSparkTestBase { "ptv2", 22) :: Nil) - withSQLConf("spark.sql.sources.partitionOverwriteMode" -> "dynamic") { + withSparkSQLConf("spark.sql.sources.partitionOverwriteMode" -> "dynamic") { // dynamic overwrite the pt2=22 partition spark.sql( "INSERT OVERWRITE T PARTITION (pt2 = 22) VALUES (3, 'c2', 'ptv1'), (4, 'd2', 'ptv3')") diff --git a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/LookupCompactionTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/LookupCompactionTest.scala similarity index 100% rename from paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/LookupCompactionTest.scala rename to paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/LookupCompactionTest.scala diff --git a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/MergeIntoNotMatchedBySourceTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/MergeIntoNotMatchedBySourceTest.scala similarity index 100% rename from paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/MergeIntoNotMatchedBySourceTest.scala rename to paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/MergeIntoNotMatchedBySourceTest.scala diff --git a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/MergeIntoTableTestBase.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/MergeIntoTableTestBase.scala similarity index 100% rename from paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/MergeIntoTableTestBase.scala rename to paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/MergeIntoTableTestBase.scala diff --git a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/ObjectTableTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/ObjectTableTest.scala similarity index 100% rename from paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/ObjectTableTest.scala rename to paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/ObjectTableTest.scala diff --git a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/PaimonCompositePartitionKeyTestBase.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/PaimonCompositePartitionKeyTestBase.scala similarity index 100% rename from paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/PaimonCompositePartitionKeyTestBase.scala rename to paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/PaimonCompositePartitionKeyTestBase.scala diff --git a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/PaimonFunctionTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/PaimonFunctionTest.scala similarity index 100% rename from paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/PaimonFunctionTest.scala rename to paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/PaimonFunctionTest.scala diff --git a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/PaimonMetricTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/PaimonMetricTest.scala similarity index 100% rename from paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/PaimonMetricTest.scala rename to paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/PaimonMetricTest.scala diff --git a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/PaimonOptimizationTestBase.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/PaimonOptimizationTestBase.scala similarity index 90% rename from paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/PaimonOptimizationTestBase.scala rename to paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/PaimonOptimizationTestBase.scala index 78e8905fa969..87f4c9448619 100644 --- a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/PaimonOptimizationTestBase.scala +++ b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/PaimonOptimizationTestBase.scala @@ -20,6 +20,7 @@ package org.apache.paimon.spark.sql import org.apache.paimon.Snapshot.CommitKind import org.apache.paimon.spark.PaimonSparkTestBase +import org.apache.paimon.spark.catalyst.analysis.expressions.ExpressionHelper import org.apache.paimon.spark.catalyst.optimizer.MergePaimonScalarSubqueries import org.apache.spark.sql.Row @@ -27,11 +28,12 @@ import org.apache.spark.sql.catalyst.expressions.{Attribute, CreateNamedStruct, import org.apache.spark.sql.catalyst.plans.logical.{CTERelationDef, LogicalPlan, OneRowRelation, WithCTE} import org.apache.spark.sql.catalyst.rules.RuleExecutor import org.apache.spark.sql.functions._ +import org.apache.spark.sql.paimon.Utils import org.junit.jupiter.api.Assertions import scala.collection.immutable -abstract class PaimonOptimizationTestBase extends PaimonSparkTestBase { +abstract class PaimonOptimizationTestBase extends PaimonSparkTestBase with ExpressionHelper { import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ @@ -59,24 +61,25 @@ abstract class PaimonOptimizationTestBase extends PaimonSparkTestBase { |""".stripMargin) val optimizedPlan = Optimize.execute(query.queryExecution.analyzed) - val relation = createRelationV2("T") - val mergedSubquery = relation + val df = Utils.createDataFrame(spark, createRelationV2("T")) + val mergedSubquery = df .select( - count(Literal(1)).as("cnt"), - sum(col("a").expr).as("sum_a"), - avg(col("b").expr).as("avg_b") + toColumn(count(Literal(1))).as("cnt"), + toColumn(sum(toExpression(spark, col("a")))).as("sum_a"), + toColumn(avg(toExpression(spark, col("b"))).as("avg_b")) ) .select( - CreateNamedStruct( - Seq( - Literal("cnt"), - 'cnt, - Literal("sum_a"), - 'sum_a, - Literal("avg_b"), - 'avg_b - )).as("mergedValue")) - val analyzedMergedSubquery = mergedSubquery.analyze + toColumn( + CreateNamedStruct( + Seq( + Literal("cnt"), + 'cnt, + Literal("sum_a"), + 'sum_a, + Literal("avg_b"), + 'avg_b + )).as("mergedValue"))) + val analyzedMergedSubquery = mergedSubquery.queryExecution.analyzed val correctAnswer = WithCTE( OneRowRelation() .select( diff --git a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/PaimonOptionTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/PaimonOptionTest.scala similarity index 91% rename from paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/PaimonOptionTest.scala rename to paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/PaimonOptionTest.scala index f74d6959b9f1..44df3e54ca72 100644 --- a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/PaimonOptionTest.scala +++ b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/PaimonOptionTest.scala @@ -29,7 +29,7 @@ class PaimonOptionTest extends PaimonSparkTestBase { import testImplicits._ test("Paimon Option: create table with sql conf") { - withSQLConf("spark.paimon.scan.snapshot-id" -> "2") { + withSparkSQLConf("spark.paimon.scan.snapshot-id" -> "2") { sql("CREATE TABLE T (id INT)") val table = loadTable("T") // check options in schema file directly @@ -39,7 +39,7 @@ class PaimonOptionTest extends PaimonSparkTestBase { } test("Paimon Option: create table by dataframe with sql conf") { - withSQLConf("spark.paimon.scan.snapshot-id" -> "2") { + withSparkSQLConf("spark.paimon.scan.snapshot-id" -> "2") { Seq((1L, "x1"), (2L, "x2")) .toDF("a", "b") .write @@ -61,13 +61,13 @@ class PaimonOptionTest extends PaimonSparkTestBase { val table = loadTable("T") // query with mutable option - withSQLConf("spark.paimon.scan.snapshot-id" -> "1") { + withSparkSQLConf("spark.paimon.scan.snapshot-id" -> "1") { checkAnswer(sql("SELECT * FROM T ORDER BY id"), Row(1)) checkAnswer(spark.read.format("paimon").load(table.location().toString), Row(1)) } // query with immutable option - withSQLConf("spark.paimon.bucket" -> "1") { + withSparkSQLConf("spark.paimon.bucket" -> "1") { assertThrows[UnsupportedOperationException] { sql("SELECT * FROM T ORDER BY id") } @@ -85,19 +85,19 @@ class PaimonOptionTest extends PaimonSparkTestBase { val table = loadTable("T") // query with global options - withSQLConf("spark.paimon.scan.snapshot-id" -> "1") { + withSparkSQLConf("spark.paimon.scan.snapshot-id" -> "1") { checkAnswer(sql("SELECT * FROM T ORDER BY id"), Row(1)) checkAnswer(spark.read.format("paimon").load(table.location().toString), Row(1)) } // query with table options - withSQLConf("spark.paimon.*.*.T.scan.snapshot-id" -> "1") { + withSparkSQLConf("spark.paimon.*.*.T.scan.snapshot-id" -> "1") { checkAnswer(sql("SELECT * FROM T ORDER BY id"), Row(1)) checkAnswer(spark.read.format("paimon").load(table.location().toString), Row(1)) } // query with both global and table options - withSQLConf( + withSparkSQLConf( "spark.paimon.scan.snapshot-id" -> "1", "spark.paimon.*.*.T.scan.snapshot-id" -> "2") { checkAnswer(sql("SELECT * FROM T ORDER BY id"), Row(1) :: Row(2) :: Nil) @@ -122,7 +122,7 @@ class PaimonOptionTest extends PaimonSparkTestBase { val table2 = loadTable("T1") // query with global options - withSQLConf("spark.paimon.scan.snapshot-id" -> "1") { + withSparkSQLConf("spark.paimon.scan.snapshot-id" -> "1") { checkAnswer(sql("SELECT * FROM T1 join T2 on T1.id = T2.id ORDER BY T1.id"), Row(1, 1)) checkAnswer( spark.read @@ -134,7 +134,7 @@ class PaimonOptionTest extends PaimonSparkTestBase { } // query with table options - withSQLConf("spark.paimon.*.*.*.scan.snapshot-id" -> "1") { + withSparkSQLConf("spark.paimon.*.*.*.scan.snapshot-id" -> "1") { checkAnswer(sql("SELECT * FROM T1 join T2 on T1.id = T2.id ORDER BY T1.id"), Row(1, 1)) checkAnswer( spark.read @@ -146,7 +146,7 @@ class PaimonOptionTest extends PaimonSparkTestBase { } // query with both global and table options - withSQLConf( + withSparkSQLConf( "spark.paimon.scan.snapshot-id" -> "1", "spark.paimon.*.*.*.scan.snapshot-id" -> "2") { checkAnswer( @@ -161,7 +161,7 @@ class PaimonOptionTest extends PaimonSparkTestBase { ) } - withSQLConf( + withSparkSQLConf( "spark.paimon.*.*.T1.scan.snapshot-id" -> "1", "spark.paimon.*.*.T2.scan.snapshot-id" -> "1") { checkAnswer(sql("SELECT * FROM T1 join T2 on T1.id = T2.id ORDER BY T1.id"), Row(1, 1)) @@ -174,7 +174,7 @@ class PaimonOptionTest extends PaimonSparkTestBase { ) } - withSQLConf( + withSparkSQLConf( "spark.paimon.*.*.T1.scan.snapshot-id" -> "1", "spark.paimon.*.*.T2.scan.snapshot-id" -> "2") { checkAnswer(sql("SELECT * FROM T1 join T2 on T1.id = T2.id ORDER BY T1.id"), Row(1, 1)) @@ -187,7 +187,7 @@ class PaimonOptionTest extends PaimonSparkTestBase { ) } - withSQLConf( + withSparkSQLConf( "spark.paimon.*.*.T1.scan.snapshot-id" -> "2", "spark.paimon.*.*.T2.scan.snapshot-id" -> "2") { checkAnswer( diff --git a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/PaimonPartitionManagementTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/PaimonPartitionManagementTest.scala similarity index 100% rename from paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/PaimonPartitionManagementTest.scala rename to paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/PaimonPartitionManagementTest.scala diff --git a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/PaimonPushDownTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/PaimonPushDownTest.scala similarity index 100% rename from paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/PaimonPushDownTest.scala rename to paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/PaimonPushDownTest.scala diff --git a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/PaimonQueryTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/PaimonQueryTest.scala similarity index 99% rename from paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/PaimonQueryTest.scala rename to paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/PaimonQueryTest.scala index beea19c35e92..08f5275f01b5 100644 --- a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/PaimonQueryTest.scala +++ b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/PaimonQueryTest.scala @@ -281,7 +281,7 @@ class PaimonQueryTest extends PaimonSparkTestBase { // Since Spark 4.0, when `spark.sql.ansi.enabled` is `true` and `array[i]` does not exist, an exception // will be thrown instead of returning null. Here, just disabled it and return null for test. - withSQLConf("spark.sql.ansi.enabled" -> "false") { + withSparkSQLConf("spark.sql.ansi.enabled" -> "false") { checkAnswer( sql(s""" |SELECT diff --git a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/PaimonShowColumnsTestBase.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/PaimonShowColumnsTestBase.scala similarity index 100% rename from paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/PaimonShowColumnsTestBase.scala rename to paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/PaimonShowColumnsTestBase.scala diff --git a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/PaimonSystemTableTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/PaimonSystemTableTest.scala similarity index 100% rename from paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/PaimonSystemTableTest.scala rename to paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/PaimonSystemTableTest.scala diff --git a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/PaimonTagDdlTestBase.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/PaimonTagDdlTestBase.scala similarity index 100% rename from paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/PaimonTagDdlTestBase.scala rename to paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/PaimonTagDdlTestBase.scala diff --git a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/PaimonViewTestBase.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/PaimonViewTestBase.scala similarity index 100% rename from paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/PaimonViewTestBase.scala rename to paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/PaimonViewTestBase.scala diff --git a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/PushDownAggregatesTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/PushDownAggregatesTest.scala similarity index 100% rename from paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/PushDownAggregatesTest.scala rename to paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/PushDownAggregatesTest.scala diff --git a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/SparkVersionSupport.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/SparkVersionSupport.scala similarity index 100% rename from paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/SparkVersionSupport.scala rename to paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/SparkVersionSupport.scala diff --git a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/TableValuedFunctionsTest.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/TableValuedFunctionsTest.scala similarity index 100% rename from paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/TableValuedFunctionsTest.scala rename to paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/TableValuedFunctionsTest.scala diff --git a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/UpdateTableTestBase.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/UpdateTableTestBase.scala similarity index 100% rename from paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/UpdateTableTestBase.scala rename to paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/UpdateTableTestBase.scala diff --git a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/WithTableOptions.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/WithTableOptions.scala similarity index 100% rename from paimon-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/WithTableOptions.scala rename to paimon-spark/paimon-spark-ut/src/test/scala/org/apache/paimon/spark/sql/WithTableOptions.scala diff --git a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/spark/paimon/Utils.scala b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/spark/sql/paimon/Utils.scala similarity index 74% rename from paimon-spark/paimon-spark-common/src/test/scala/org/apache/spark/paimon/Utils.scala rename to paimon-spark/paimon-spark-ut/src/test/scala/org/apache/spark/sql/paimon/Utils.scala index 5ea2dd861e19..03f1c7706efb 100644 --- a/paimon-spark/paimon-spark-common/src/test/scala/org/apache/spark/paimon/Utils.scala +++ b/paimon-spark/paimon-spark-ut/src/test/scala/org/apache/spark/sql/paimon/Utils.scala @@ -16,9 +16,10 @@ * limitations under the License. */ -package org.apache.spark.paimon +package org.apache.spark.sql.paimon -import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.{DataFrame, Dataset, SparkSession} +import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.util.{Utils => SparkUtils} import java.io.File @@ -28,9 +29,14 @@ import java.io.File */ object Utils { - def createTempDir: File = SparkUtils.createTempDir() + def createTempDir: File = SparkUtils.createTempDir(System.getProperty("java.io.tmpdir"), "spark") def waitUntilEventEmpty(spark: SparkSession): Unit = { spark.sparkContext.listenerBus.waitUntilEmpty() } + + def createDataFrame(sparkSession: SparkSession, plan: LogicalPlan): DataFrame = { + Dataset.ofRows(sparkSession, plan) + } + } diff --git a/paimon-spark/paimon-spark3-common/pom.xml b/paimon-spark/paimon-spark3-common/pom.xml index 03d29ea05b3a..5fd869f1b393 100644 --- a/paimon-spark/paimon-spark3-common/pom.xml +++ b/paimon-spark/paimon-spark3-common/pom.xml @@ -39,9 +39,35 @@ under the License. - org.apache.spark - spark-sql_${scala.binary.version} - ${spark.version} + org.apache.paimon + paimon-spark-common_${scala.binary.version} + ${project.version} + + + + + org.apache.maven.plugins + maven-shade-plugin + + + shade-paimon + package + + shade + + + + + org.apache.paimon:paimon-bundle + org.apache.paimon:paimon-spark-common_${scala.binary.version} + + + + + + + + \ No newline at end of file diff --git a/paimon-spark/paimon-spark3-common/src/main/resources/META-INF/services/org.apache.spark.sql.paimon.shims.SparkShim b/paimon-spark/paimon-spark3-common/src/main/resources/META-INF/services/org.apache.spark.sql.paimon.shims.SparkShim new file mode 100644 index 000000000000..b79ef54f6e30 --- /dev/null +++ b/paimon-spark/paimon-spark3-common/src/main/resources/META-INF/services/org.apache.spark.sql.paimon.shims.SparkShim @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.spark.sql.paimon.shims.Spark3Shim \ No newline at end of file diff --git a/paimon-spark/paimon-spark3-common/src/main/scala/org/apache/paimon/spark/catalyst/parser/extensions/PaimonSpark3SqlExtensionsParser.scala b/paimon-spark/paimon-spark3-common/src/main/scala/org/apache/paimon/spark/catalyst/parser/extensions/PaimonSpark3SqlExtensionsParser.scala new file mode 100644 index 000000000000..07481b6f639f --- /dev/null +++ b/paimon-spark/paimon-spark3-common/src/main/scala/org/apache/paimon/spark/catalyst/parser/extensions/PaimonSpark3SqlExtensionsParser.scala @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.paimon.spark.catalyst.parser.extensions + +import org.apache.spark.sql.catalyst.parser.ParserInterface +import org.apache.spark.sql.catalyst.parser.extensions.AbstractPaimonSparkSqlExtensionsParser + +class PaimonSpark3SqlExtensionsParser(override val delegate: ParserInterface) + extends AbstractPaimonSparkSqlExtensionsParser(delegate) {} diff --git a/paimon-spark/paimon-spark-3.2/src/test/scala/org/apache/spark/paimon/Utils.scala b/paimon-spark/paimon-spark3-common/src/main/scala/org/apache/paimon/spark/data/Spark3ArrayData.scala similarity index 73% rename from paimon-spark/paimon-spark-3.2/src/test/scala/org/apache/spark/paimon/Utils.scala rename to paimon-spark/paimon-spark3-common/src/main/scala/org/apache/paimon/spark/data/Spark3ArrayData.scala index 1a899f500153..cb393d928dcb 100644 --- a/paimon-spark/paimon-spark-3.2/src/test/scala/org/apache/spark/paimon/Utils.scala +++ b/paimon-spark/paimon-spark3-common/src/main/scala/org/apache/paimon/spark/data/Spark3ArrayData.scala @@ -16,17 +16,8 @@ * limitations under the License. */ -package org.apache.spark.paimon +package org.apache.paimon.spark.data -import org.apache.spark.util.{Utils => SparkUtils} +import org.apache.paimon.types.DataType -import java.io.File - -/** - * A wrapper that some Objects or Classes is limited to access beyond [[org.apache.spark]] package. - */ -object Utils { - - def createTempDir: File = SparkUtils.createTempDir() - -} +class Spark3ArrayData(override val elementType: DataType) extends AbstractSparkArrayData {} diff --git a/paimon-spark/paimon-spark-3.3/src/test/scala/org/apache/spark/paimon/Utils.scala b/paimon-spark/paimon-spark3-common/src/main/scala/org/apache/paimon/spark/data/Spark3InternalRow.scala similarity index 73% rename from paimon-spark/paimon-spark-3.3/src/test/scala/org/apache/spark/paimon/Utils.scala rename to paimon-spark/paimon-spark3-common/src/main/scala/org/apache/paimon/spark/data/Spark3InternalRow.scala index 1a899f500153..9c9a1c6bac95 100644 --- a/paimon-spark/paimon-spark-3.3/src/test/scala/org/apache/spark/paimon/Utils.scala +++ b/paimon-spark/paimon-spark3-common/src/main/scala/org/apache/paimon/spark/data/Spark3InternalRow.scala @@ -16,17 +16,9 @@ * limitations under the License. */ -package org.apache.spark.paimon +package org.apache.paimon.spark.data -import org.apache.spark.util.{Utils => SparkUtils} +import org.apache.paimon.spark.AbstractSparkInternalRow +import org.apache.paimon.types.RowType -import java.io.File - -/** - * A wrapper that some Objects or Classes is limited to access beyond [[org.apache.spark]] package. - */ -object Utils { - - def createTempDir: File = SparkUtils.createTempDir() - -} +class Spark3InternalRow(rowType: RowType) extends AbstractSparkInternalRow(rowType) {} diff --git a/paimon-spark/paimon-spark3-common/src/main/scala/org/apache/spark/sql/paimon/shims.scala b/paimon-spark/paimon-spark3-common/src/main/scala/org/apache/spark/sql/paimon/shims/Spark3Shim.scala similarity index 51% rename from paimon-spark/paimon-spark3-common/src/main/scala/org/apache/spark/sql/paimon/shims.scala rename to paimon-spark/paimon-spark3-common/src/main/scala/org/apache/spark/sql/paimon/shims/Spark3Shim.scala index 13ade3f3c5ac..57d79d6474e9 100644 --- a/paimon-spark/paimon-spark3-common/src/main/scala/org/apache/spark/sql/paimon/shims.scala +++ b/paimon-spark/paimon-spark3-common/src/main/scala/org/apache/spark/sql/paimon/shims/Spark3Shim.scala @@ -16,45 +16,44 @@ * limitations under the License. */ -package org.apache.spark.sql.paimon +package org.apache.spark.sql.paimon.shims + +import org.apache.paimon.spark.catalyst.parser.extensions.PaimonSpark3SqlExtensionsParser +import org.apache.paimon.spark.data.{Spark3ArrayData, Spark3InternalRow, SparkArrayData, SparkInternalRow} +import org.apache.paimon.types.{DataType, RowType} import org.apache.spark.sql.{Column, SparkSession} -import org.apache.spark.sql.catalyst.{InternalRow => SparkInternalRow} import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression} -import org.apache.spark.sql.catalyst.parser.{ParserInterface => SparkParserInterface} -import org.apache.spark.sql.catalyst.plans.logical.{Aggregate => SparkAggregate} -import org.apache.spark.sql.catalyst.util.{ArrayData => SparkArrayData} -import org.apache.spark.sql.connector.catalog.{Identifier, Table, TableCatalog => SparkTableCatalog} +import org.apache.spark.sql.catalyst.parser.ParserInterface +import org.apache.spark.sql.catalyst.plans.logical.Aggregate +import org.apache.spark.sql.connector.catalog.{Identifier, Table, TableCatalog} import org.apache.spark.sql.connector.expressions.Transform import org.apache.spark.sql.types.StructType import java.util.{Map => JMap} -/** Shims for Spark 3.x in [[org.apache.spark.sql]]. */ -object shims { - - /** In [[org.apache.spark.sql.catalyst]]. */ +class Spark3Shim extends SparkShim { - abstract class ParserInterface extends SparkParserInterface { - val delegate: SparkParserInterface + override def createSparkParser(delegate: ParserInterface): ParserInterface = { + new PaimonSpark3SqlExtensionsParser(delegate) } - abstract class ArrayData extends SparkArrayData {} - - abstract class InternalRow extends SparkInternalRow {} + override def createSparkInternalRow(rowType: RowType): SparkInternalRow = { + new Spark3InternalRow(rowType) + } - object Aggregate { - def supportsHashAggregate( - aggregateBufferAttributes: Seq[Attribute], - groupingExpression: Seq[Expression]): Boolean = { - SparkAggregate.supportsHashAggregate(aggregateBufferAttributes) - } + override def createSparkArrayData(elementType: DataType): SparkArrayData = { + new Spark3ArrayData(elementType) } - /** In [[org.apache.spark.sql.connector]]. */ + override def supportsHashAggregate( + aggregateBufferAttributes: Seq[Attribute], + groupingExpression: Seq[Expression]): Boolean = { + Aggregate.supportsHashAggregate(aggregateBufferAttributes) + } - def createTable( - tableCatalog: SparkTableCatalog, + override def createTable( + tableCatalog: TableCatalog, ident: Identifier, schema: StructType, partitions: Array[Transform], @@ -62,11 +61,8 @@ object shims { tableCatalog.createTable(ident, schema, partitions, properties) } - /** In [[org.apache.spark.sql.internal]]. */ + override def column(expr: Expression): Column = new Column(expr) - object ExpressionUtils { - def column(expr: Expression): Column = new Column(expr) + override def convertToExpression(spark: SparkSession, column: Column): Expression = column.expr - def convertToExpression(spark: SparkSession, column: Column): Expression = column.expr - } } diff --git a/paimon-spark/paimon-spark4-common/pom.xml b/paimon-spark/paimon-spark4-common/pom.xml index dcc5b370d59a..d160b984fa05 100644 --- a/paimon-spark/paimon-spark4-common/pom.xml +++ b/paimon-spark/paimon-spark4-common/pom.xml @@ -38,10 +38,57 @@ under the License. + + org.apache.paimon + paimon-spark-common_${scala.binary.version} + ${project.version} + + org.apache.spark - spark-sql_${scala.binary.version} + spark-sql-api_2.13 ${spark.version} + + + log4j + log4j + + + org.slf4j + slf4j-log4j12 + + + org.apache.logging.log4j + log4j-slf4j2-impl + + + - \ No newline at end of file + + + + + org.apache.maven.plugins + maven-shade-plugin + + + shade-paimon + package + + shade + + + + + org.apache.paimon:paimon-bundle + org.apache.paimon:paimon-spark-common_${scala.binary.version} + + + + + + + + + diff --git a/paimon-spark/paimon-spark4-common/src/main/resources/META-INF/services/org.apache.spark.sql.paimon.shims.SparkShim b/paimon-spark/paimon-spark4-common/src/main/resources/META-INF/services/org.apache.spark.sql.paimon.shims.SparkShim new file mode 100644 index 000000000000..b0df8c67cf9a --- /dev/null +++ b/paimon-spark/paimon-spark4-common/src/main/resources/META-INF/services/org.apache.spark.sql.paimon.shims.SparkShim @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.spark.sql.paimon.shims.Spark4Shim \ No newline at end of file diff --git a/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/paimon/spark/catalyst/parser/extensions/PaimonSpark4SqlExtensionsParser.scala b/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/paimon/spark/catalyst/parser/extensions/PaimonSpark4SqlExtensionsParser.scala new file mode 100644 index 000000000000..ef1f5763d27b --- /dev/null +++ b/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/paimon/spark/catalyst/parser/extensions/PaimonSpark4SqlExtensionsParser.scala @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.paimon.spark.catalyst.parser.extensions + +import org.apache.spark.sql.catalyst.parser.{CompoundBody, ParserInterface} +import org.apache.spark.sql.catalyst.parser.extensions.AbstractPaimonSparkSqlExtensionsParser + +class PaimonSpark4SqlExtensionsParser(override val delegate: ParserInterface) + extends AbstractPaimonSparkSqlExtensionsParser(delegate) { + + def parseScript(sqlScriptText: String): CompoundBody = delegate.parseScript(sqlScriptText) +} diff --git a/paimon-spark/paimon-spark-3.4/src/test/scala/org/apache/spark/paimon/Utils.scala b/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/paimon/spark/data/Spark4ArrayData.scala similarity index 72% rename from paimon-spark/paimon-spark-3.4/src/test/scala/org/apache/spark/paimon/Utils.scala rename to paimon-spark/paimon-spark4-common/src/main/scala/org/apache/paimon/spark/data/Spark4ArrayData.scala index 1a899f500153..be319c0a9c23 100644 --- a/paimon-spark/paimon-spark-3.4/src/test/scala/org/apache/spark/paimon/Utils.scala +++ b/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/paimon/spark/data/Spark4ArrayData.scala @@ -16,17 +16,14 @@ * limitations under the License. */ -package org.apache.spark.paimon +package org.apache.paimon.spark.data -import org.apache.spark.util.{Utils => SparkUtils} +import org.apache.paimon.types.DataType -import java.io.File +import org.apache.spark.unsafe.types.VariantVal -/** - * A wrapper that some Objects or Classes is limited to access beyond [[org.apache.spark]] package. - */ -object Utils { +class Spark4ArrayData(override val elementType: DataType) extends AbstractSparkArrayData { - def createTempDir: File = SparkUtils.createTempDir() + override def getVariant(ordinal: Int): VariantVal = throw new UnsupportedOperationException } diff --git a/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/paimon/spark/data/Spark4InternalRow.scala b/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/paimon/spark/data/Spark4InternalRow.scala new file mode 100644 index 000000000000..54b0f420ea93 --- /dev/null +++ b/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/paimon/spark/data/Spark4InternalRow.scala @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.paimon.spark.data + +import org.apache.paimon.spark.AbstractSparkInternalRow +import org.apache.paimon.types.RowType + +import org.apache.spark.unsafe.types.VariantVal + +class Spark4InternalRow(rowType: RowType) extends AbstractSparkInternalRow(rowType) { + override def getVariant(i: Int): VariantVal = throw new UnsupportedOperationException +} diff --git a/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/spark/sql/paimon/shims.scala b/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/spark/sql/paimon/shims.scala deleted file mode 100644 index ee6c9ad35857..000000000000 --- a/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/spark/sql/paimon/shims.scala +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.paimon - -import org.apache.spark.sql.{Column, SparkSession} -import org.apache.spark.sql.catalyst.{InternalRow => SparkInternalRow} -import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression} -import org.apache.spark.sql.catalyst.parser.{CompoundBody, ParserInterface => SparkParserInterface} -import org.apache.spark.sql.catalyst.plans.logical.{Aggregate => SparkAggregate} -import org.apache.spark.sql.catalyst.util.{ArrayData => SparkArrayData} -import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Identifier, Table, TableCatalog => SparkTableCatalog} -import org.apache.spark.sql.connector.expressions.Transform -import org.apache.spark.sql.internal.{ExpressionUtils => SparkExpressionUtils} -import org.apache.spark.sql.types.StructType -import org.apache.spark.unsafe.types.VariantVal - -import java.util.{Map => JMap} - -/** Shims for Spark 4.x in [[org.apache.spark.sql]]. */ -object shims { - - /** In [[org.apache.spark.sql.catalyst]]. */ - - abstract class ParserInterface extends SparkParserInterface { - val delegate: SparkParserInterface - - def parseScript(sqlScriptText: String): CompoundBody = delegate.parseScript(sqlScriptText) - } - - abstract class ArrayData extends SparkArrayData { - def getVariant(ordinal: Int): VariantVal = throw new UnsupportedOperationException - } - - abstract class InternalRow extends SparkInternalRow { - override def getVariant(i: Int): VariantVal = throw new UnsupportedOperationException - } - - object Aggregate { - def supportsHashAggregate( - aggregateBufferAttributes: Seq[Attribute], - groupingExpression: Seq[Expression]): Boolean = { - SparkAggregate.supportsHashAggregate(aggregateBufferAttributes, groupingExpression) - } - } - - /** In [[org.apache.spark.sql.connector]]. */ - - def createTable( - tableCatalog: SparkTableCatalog, - ident: Identifier, - schema: StructType, - partitions: Array[Transform], - properties: JMap[String, String]): Table = { - tableCatalog.createTable( - ident, - CatalogV2Util.structTypeToV2Columns(schema), - partitions, - properties) - } - - /** In [[org.apache.spark.sql.internal]]. */ - - object ExpressionUtils { - def column(expr: Expression): Column = SparkExpressionUtils.column(expr) - - def convertToExpression(spark: SparkSession, column: Column): Expression = { - spark.expression(column) - } - } -} diff --git a/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/spark/sql/paimon/shims/Spark4Shim.scala b/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/spark/sql/paimon/shims/Spark4Shim.scala new file mode 100644 index 000000000000..dfec4eb71f4f --- /dev/null +++ b/paimon-spark/paimon-spark4-common/src/main/scala/org/apache/spark/sql/paimon/shims/Spark4Shim.scala @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.paimon.shims + +import org.apache.paimon.spark.catalyst.parser.extensions.PaimonSpark4SqlExtensionsParser +import org.apache.paimon.spark.data.{Spark4ArrayData, Spark4InternalRow, SparkArrayData, SparkInternalRow} +import org.apache.paimon.types.{DataType, RowType} + +import org.apache.spark.sql.{Column, SparkSession} +import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression} +import org.apache.spark.sql.catalyst.parser.ParserInterface +import org.apache.spark.sql.catalyst.plans.logical.Aggregate +import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Identifier, Table, TableCatalog} +import org.apache.spark.sql.connector.expressions.Transform +import org.apache.spark.sql.internal.ExpressionUtils +import org.apache.spark.sql.types.StructType + +import java.util.{Map => JMap} + +class Spark4Shim extends SparkShim { + + override def createSparkParser(delegate: ParserInterface): ParserInterface = { + new PaimonSpark4SqlExtensionsParser(delegate) + } + override def createSparkInternalRow(rowType: RowType): SparkInternalRow = { + new Spark4InternalRow(rowType) + } + + override def createSparkArrayData(elementType: DataType): SparkArrayData = { + new Spark4ArrayData(elementType) + } + + def supportsHashAggregate( + aggregateBufferAttributes: Seq[Attribute], + groupingExpression: Seq[Expression]): Boolean = { + Aggregate.supportsHashAggregate(aggregateBufferAttributes, groupingExpression) + } + + def createTable( + tableCatalog: TableCatalog, + ident: Identifier, + schema: StructType, + partitions: Array[Transform], + properties: JMap[String, String]): Table = { + val columns = CatalogV2Util.structTypeToV2Columns(schema) + tableCatalog.createTable(ident, columns, partitions, properties) + } + + def column(expr: Expression): Column = ExpressionUtils.column(expr) + + def convertToExpression(spark: SparkSession, column: Column): Expression = + spark.expression(column) +} diff --git a/paimon-spark/pom.xml b/paimon-spark/pom.xml index aac73baa5fec..61ecd20a0500 100644 --- a/paimon-spark/pom.xml +++ b/paimon-spark/pom.xml @@ -39,6 +39,7 @@ under the License. paimon-spark-common + paimon-spark-ut @@ -114,6 +115,21 @@ under the License. + + org.apache.spark + spark-hive_${scala.binary.version} + + + log4j + log4j + + + org.slf4j + slf4j-log4j12 + + + + org.apache.paimon paimon-bundle @@ -180,22 +196,6 @@ under the License. - - - org.apache.spark - spark-hive_${scala.binary.version} - test - - - log4j - log4j - - - org.slf4j - slf4j-log4j12 - - - @@ -278,6 +278,12 @@ under the License. aws-java-sdk-core ${aws.version} test + + + com.fasterxml.jackson.core + * + +