apache · JingsongLi · Jul 30, 2024 · Jul 30, 2024 · Jul 30, 2024 · Jul 30, 2024
diff --git a/docs/layouts/shortcodes/generated/spark_connector_configuration.html b/docs/layouts/shortcodes/generated/spark_connector_configuration.html
@@ -26,6 +26,24 @@
         </tr>
     </thead>
     <tbody>
+        <tr>
+            <td><h5>write.merge-schema</h5></td>
+            <td style="word-wrap: break-word;">false</td>
+            <td>Boolean</td>
+            <td>If true, merge the data schema and the table schema automatically before write data.</td>
+        </tr>
+        <tr>
+            <td><h5>write.merge-schema.explicit-cast</h5></td>
+            <td style="word-wrap: break-word;">false</td>
+            <td>Boolean</td>
+            <td>If true, allow to merge data types if the two types meet the rules for explicit casting.</td>
+        </tr>
+        <tr>
+            <td><h5>write.check.ignoreNullability</h5></td>
+            <td style="word-wrap: break-word;">false</td>
+            <td>Boolean</td>
+            <td>If true, skip the check whether the nullability of incoming data is compatible with the table's.</td>
+        </tr>
         <tr>
             <td><h5>read.changelog</h5></td>
             <td style="word-wrap: break-word;">false</td>
@@ -62,17 +80,5 @@
             <td>Long</td>
             <td>The minimum number of rows returned in a single batch, which used to create MinRowsReadLimit with read.stream.maxTriggerDelayMs together.</td>
         </tr>
-        <tr>
-            <td><h5>write.merge-schema</h5></td>
-            <td style="word-wrap: break-word;">false</td>
-            <td>Boolean</td>
-            <td>If true, merge the data schema and the table schema automatically before write data.</td>
-        </tr>
-        <tr>
-            <td><h5>write.merge-schema.explicit-cast</h5></td>
-            <td style="word-wrap: break-word;">false</td>
-            <td>Boolean</td>
-            <td>If true, allow to merge data types if the two types meet the rules for explicit casting.</td>
-        </tr>
     </tbody>
 </table>
diff --git a/...park/paimon-spark-common/src/main/java/org/apache/paimon/spark/SparkConnectorOptions.java b/...park/paimon-spark-common/src/main/java/org/apache/paimon/spark/SparkConnectorOptions.java
@@ -39,6 +39,13 @@ public class SparkConnectorOptions {
                     .withDescription(
                             "If true, allow to merge data types if the two types meet the rules for explicit casting.");
 
+    public static final ConfigOption<Boolean> IGNORE_NULLABLE_CHECK =
+            key("write.check.ignoreNullability")
+                    .booleanType()
+                    .defaultValue(false)
+                    .withDescription(
+                            "If true, skip the check whether the nullability of incoming data is compatible with the table's.");
+
     public static final ConfigOption<Integer> MAX_FILES_PER_TRIGGER =
             key("read.stream.maxFilesPerTrigger")
                     .intType()

diff --git a/...park-common/src/main/scala/org/apache/paimon/spark/catalyst/analysis/PaimonAnalysis.scala b/...park-common/src/main/scala/org/apache/paimon/spark/catalyst/analysis/PaimonAnalysis.scala
@@ -18,15 +18,15 @@
 
 package org.apache.paimon.spark.catalyst.analysis
 
-import org.apache.paimon.spark.SparkTable
+import org.apache.paimon.spark.{SparkConnectorOptions, SparkTable}
 import org.apache.paimon.spark.catalyst.Compatibility
 import org.apache.paimon.spark.catalyst.analysis.PaimonRelation.isPaimonTable
 import org.apache.paimon.spark.commands.{PaimonAnalyzeTableColumnCommand, PaimonDynamicPartitionOverwriteCommand, PaimonTruncateTableCommand}
 import org.apache.paimon.table.FileStoreTable
 
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.analysis.ResolvedTable
-import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, Cast, Expression, NamedExpression}
+import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, Expression, NamedExpression}
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
@@ -42,7 +42,8 @@ class PaimonAnalysis(session: SparkSession) extends Rule[LogicalPlan] {
         if !schemaCompatible(
           a.query.output.toStructType,
           table.output.toStructType,
-          paimonTable.partitionKeys().asScala) =>
+          paimonTable.partitionKeys().asScala,
+          ignoreNullabilityCheck(paimonTable)) =>
       val newQuery = resolveQueryColumns(a.query, table.output)
       if (newQuery != a.query) {
         Compatibility.withNewQuery(a, newQuery)
@@ -58,9 +59,10 @@ class PaimonAnalysis(session: SparkSession) extends Rule[LogicalPlan] {
   }
 
   private def schemaCompatible(
-      tableSchema: StructType,
       dataSchema: StructType,
+      tableSchema: StructType,
       partitionCols: Seq[String],
+      ignoreNullabilityCheck: Boolean = false,
       parent: Array[String] = Array.empty): Boolean = {
 
     if (tableSchema.size != dataSchema.size) {
@@ -70,7 +72,7 @@ class PaimonAnalysis(session: SparkSession) extends Rule[LogicalPlan] {
     def dataTypeCompatible(column: String, dt1: DataType, dt2: DataType): Boolean = {
       (dt1, dt2) match {
         case (s1: StructType, s2: StructType) =>
-          schemaCompatible(s1, s2, partitionCols, Array(column))
+          schemaCompatible(s1, s2, partitionCols, ignoreNullabilityCheck, Array(column))
         case (a1: ArrayType, a2: ArrayType) =>
           dataTypeCompatible(column, a1.elementType, a2.elementType)
         case (m1: MapType, m2: MapType) =>
@@ -82,9 +84,11 @@ class PaimonAnalysis(session: SparkSession) extends Rule[LogicalPlan] {
       }
     }
 
-    tableSchema.zip(dataSchema).forall {
+    dataSchema.zip(tableSchema).forall {
       case (f1, f2) =>
-        checkNullability(f1, f2, partitionCols, parent)
+        if (!ignoreNullabilityCheck) {
+          checkNullability(f1, f2, partitionCols, parent)
+        }
         f1.name == f2.name && dataTypeCompatible(f1.name, f1.dataType, f2.dataType)
     }
   }
@@ -126,6 +130,15 @@ class PaimonAnalysis(session: SparkSession) extends Rule[LogicalPlan] {
       throw new RuntimeException("Cannot write nullable values to non-null column")
     }
   }
+
+  private def ignoreNullabilityCheck(paimonTable: FileStoreTable): Boolean = {
+    paimonTable
+      .options()
+      .asScala
+      .get(SparkConnectorOptions.IGNORE_NULLABLE_CHECK.key)
+      .map(_.toBoolean)
+      .getOrElse(SparkConnectorOptions.IGNORE_NULLABLE_CHECK.defaultValue)
+  }
 }
 
 case class PaimonPostHocResolutionRules(session: SparkSession) extends Rule[LogicalPlan] {

diff --git a/...on-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/DDLTestBase.scala b/...on-spark/paimon-spark-common/src/test/scala/org/apache/paimon/spark/sql/DDLTestBase.scala
@@ -63,6 +63,22 @@ abstract class DDLTestBase extends PaimonSparkTestBase {
     }
   }
 
+  test("Paimon DDL: write.check.ignoreNullability") {
+    withTable("T") {
+      sql("""
+            |CREATE TABLE T (id INT NOT NULL, ts TIMESTAMP NOT NULL)
+            |TBLPROPERTIES ("write.check.ignoreNullability" = "true")
+            |""".stripMargin)
+
+      sql("INSERT INTO T SELECT 1, TO_TIMESTAMP('2024-07-01 16:00:00')")
+
+      checkAnswer(
+        sql("SELECT * FROM T ORDER BY id"),
+        Row(1, Timestamp.valueOf("2024-07-01 16:00:00")) :: Nil
+      )
+    }
+  }
+
   test("Paimon DDL: Create Table As Select") {
     withTable("source", "t1", "t2") {
       Seq((1L, "x1", "2023"), (2L, "x2", "2023"))