[SPARK-50027][SQL] Move Identity Column SQL parsing code to DataTypeA…

…stBuilder ### What changes were proposed in this pull request? It moves code parsing Identity Column DDL from AstBuilder to DataTypeAstBuilder. ### Why are the changes needed? `DataTypeAstBuilder` is intended for parsing code of column definitions. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Existent tests cover this. ### Was this patch authored or co-authored using generative AI tooling? No. Closes apache#48543 from zhipengmao-db/zhipengmao-db/id-column-refactor. Authored-by: zhipeng.mao <[email protected]> Signed-off-by: Max Gekk <[email protected]>
zhangbutao · Oct 20, 2024 · 4508911 · 4508911
1 parent f2f3099
commit 4508911
Show file tree

Hide file tree

Showing 2 changed files with 58 additions and 53 deletions.
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala
@@ -23,9 +23,11 @@ import scala.jdk.CollectionConverters._
 import org.antlr.v4.runtime.Token
 import org.antlr.v4.runtime.tree.ParseTree
 
+import org.apache.spark.SparkException
 import org.apache.spark.sql.catalyst.parser.SqlBaseParser._
 import org.apache.spark.sql.catalyst.util.CollationFactory
 import org.apache.spark.sql.catalyst.util.SparkParserUtils.{string, withOrigin}
+import org.apache.spark.sql.connector.catalog.IdentityColumnSpec
 import org.apache.spark.sql.errors.QueryParsingErrors
 import org.apache.spark.sql.internal.SqlApiConf
 import org.apache.spark.sql.types.{ArrayType, BinaryType, BooleanType, ByteType, CalendarIntervalType, CharType, DataType, DateType, DayTimeIntervalType, DecimalType, DoubleType, FloatType, IntegerType, LongType, MapType, MetadataBuilder, NullType, ShortType, StringType, StructField, StructType, TimestampNTZType, TimestampType, VarcharType, VariantType, YearMonthIntervalType}
@@ -220,4 +222,59 @@ class DataTypeAstBuilder extends SqlBaseParserBaseVisitor[AnyRef] {
   override def visitCollateClause(ctx: CollateClauseContext): String = withOrigin(ctx) {
     ctx.identifier.getText
   }
+
+  /**
+   * Parse and verify IDENTITY column definition.
+   *
+   * @param ctx
+   *   The parser context.
+   * @param dataType
+   *   The data type of column defined as IDENTITY column. Used for verification.
+   * @return
+   *   Tuple containing start, step and allowExplicitInsert.
+   */
+  protected def visitIdentityColumn(
+      ctx: IdentityColumnContext,
+      dataType: DataType): IdentityColumnSpec = {
+    if (dataType != LongType && dataType != IntegerType) {
+      throw QueryParsingErrors.identityColumnUnsupportedDataType(ctx, dataType.toString)
+    }
+    // We support two flavors of syntax:
+    // (1) GENERATED ALWAYS AS IDENTITY (...)
+    // (2) GENERATED BY DEFAULT AS IDENTITY (...)
+    // (1) forbids explicit inserts, while (2) allows.
+    val allowExplicitInsert = ctx.BY() != null && ctx.DEFAULT() != null
+    val (start, step) = visitIdentityColSpec(ctx.identityColSpec())
+
+    new IdentityColumnSpec(start, step, allowExplicitInsert)
+  }
+
+  override def visitIdentityColSpec(ctx: IdentityColSpecContext): (Long, Long) = {
+    val defaultStart = 1
+    val defaultStep = 1
+    if (ctx == null) {
+      return (defaultStart, defaultStep)
+    }
+    var (start, step): (Option[Long], Option[Long]) = (None, None)
+    ctx.sequenceGeneratorOption().asScala.foreach { option =>
+      if (option.start != null) {
+        if (start.isDefined) {
+          throw QueryParsingErrors.identityColumnDuplicatedSequenceGeneratorOption(ctx, "START")
+        }
+        start = Some(option.start.getText.toLong)
+      } else if (option.step != null) {
+        if (step.isDefined) {
+          throw QueryParsingErrors.identityColumnDuplicatedSequenceGeneratorOption(ctx, "STEP")
+        }
+        step = Some(option.step.getText.toLong)
+        if (step.get == 0L) {
+          throw QueryParsingErrors.identityColumnIllegalStep(ctx)
+        }
+      } else {
+        throw SparkException
+          .internalError(s"Invalid identity column sequence generator option: ${option.getText}")
+      }
+    }
+    (start.getOrElse(defaultStart), step.getOrElse(defaultStep))
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -45,7 +45,7 @@ import org.apache.spark.sql.catalyst.trees.TreePattern.PARAMETER
 import org.apache.spark.sql.catalyst.types.DataTypeUtils
 import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, DateTimeUtils, IntervalUtils}
 import org.apache.spark.sql.catalyst.util.DateTimeUtils.{convertSpecialDate, convertSpecialTimestamp, convertSpecialTimestampNTZ, getZoneId, stringToDate, stringToTimestamp, stringToTimestampWithoutTimeZone}
-import org.apache.spark.sql.connector.catalog.{CatalogV2Util, IdentityColumnSpec, SupportsNamespaces, TableCatalog, TableWritePrivilege}
+import org.apache.spark.sql.connector.catalog.{CatalogV2Util, SupportsNamespaces, TableCatalog, TableWritePrivilege}
 import org.apache.spark.sql.connector.catalog.TableChange.ColumnPosition
 import org.apache.spark.sql.connector.expressions.{ApplyTransform, BucketTransform, DaysTransform, Expression => V2Expression, FieldReference, HoursTransform, IdentityTransform, LiteralValue, MonthsTransform, Transform, YearsTransform}
 import org.apache.spark.sql.errors.{DataTypeErrorsBase, QueryCompilationErrors, QueryParsingErrors, SqlScriptingErrors}
@@ -3734,58 +3734,6 @@ class AstBuilder extends DataTypeAstBuilder
       getDefaultExpression(ctx.expression(), "GENERATED").originalSQL
     }
 
-  /**
-   * Parse and verify IDENTITY column definition.
-   *
-   * @param ctx      The parser context.
-   * @param dataType The data type of column defined as IDENTITY column. Used for verification.
-   * @return Tuple containing start, step and allowExplicitInsert.
-   */
-  protected def visitIdentityColumn(
-      ctx: IdentityColumnContext,
-      dataType: DataType): IdentityColumnSpec = {
-    if (dataType != LongType && dataType != IntegerType) {
-      throw QueryParsingErrors.identityColumnUnsupportedDataType(ctx, dataType.toString)
-    }
-    // We support two flavors of syntax:
-    // (1) GENERATED ALWAYS AS IDENTITY (...)
-    // (2) GENERATED BY DEFAULT AS IDENTITY (...)
-    // (1) forbids explicit inserts, while (2) allows.
-    val allowExplicitInsert = ctx.BY() != null && ctx.DEFAULT() != null
-    val (start, step) = visitIdentityColSpec(ctx.identityColSpec())
-
-    new IdentityColumnSpec(start, step, allowExplicitInsert)
-  }
-
-  override def visitIdentityColSpec(ctx: IdentityColSpecContext): (Long, Long) = {
-    val defaultStart = 1
-    val defaultStep = 1
-    if (ctx == null) {
-      return (defaultStart, defaultStep)
-    }
-    var (start, step): (Option[Long], Option[Long]) = (None, None)
-    ctx.sequenceGeneratorOption().asScala.foreach { option =>
-      if (option.start != null) {
-        if (start.isDefined) {
-          throw QueryParsingErrors.identityColumnDuplicatedSequenceGeneratorOption(ctx, "START")
-        }
-        start = Some(option.start.getText.toLong)
-      } else if (option.step != null) {
-        if (step.isDefined) {
-          throw QueryParsingErrors.identityColumnDuplicatedSequenceGeneratorOption(ctx, "STEP")
-        }
-        step = Some(option.step.getText.toLong)
-        if (step.get == 0L) {
-          throw QueryParsingErrors.identityColumnIllegalStep(ctx)
-        }
-      } else {
-        throw SparkException
-            .internalError(s"Invalid identity column sequence generator option: ${option.getText}")
-      }
-    }
-    (start.getOrElse(defaultStart), step.getOrElse(defaultStep))
-  }
-
   /**
    * Create an optional comment string.
    */