Skip to content

Commit

Permalink
[SPARK-50027][SQL] Move Identity Column SQL parsing code to DataTypeA…
Browse files Browse the repository at this point in the history
…stBuilder

### What changes were proposed in this pull request?

It moves code parsing Identity Column DDL from AstBuilder to DataTypeAstBuilder.

### Why are the changes needed?

`DataTypeAstBuilder` is intended for parsing code of column definitions.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Existent tests cover this.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes apache#48543 from zhipengmao-db/zhipengmao-db/id-column-refactor.

Authored-by: zhipeng.mao <[email protected]>
Signed-off-by: Max Gekk <[email protected]>
  • Loading branch information
zhipengmao-db authored and MaxGekk committed Oct 20, 2024
1 parent f2f3099 commit 4508911
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 53 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,11 @@ import scala.jdk.CollectionConverters._
import org.antlr.v4.runtime.Token
import org.antlr.v4.runtime.tree.ParseTree

import org.apache.spark.SparkException
import org.apache.spark.sql.catalyst.parser.SqlBaseParser._
import org.apache.spark.sql.catalyst.util.CollationFactory
import org.apache.spark.sql.catalyst.util.SparkParserUtils.{string, withOrigin}
import org.apache.spark.sql.connector.catalog.IdentityColumnSpec
import org.apache.spark.sql.errors.QueryParsingErrors
import org.apache.spark.sql.internal.SqlApiConf
import org.apache.spark.sql.types.{ArrayType, BinaryType, BooleanType, ByteType, CalendarIntervalType, CharType, DataType, DateType, DayTimeIntervalType, DecimalType, DoubleType, FloatType, IntegerType, LongType, MapType, MetadataBuilder, NullType, ShortType, StringType, StructField, StructType, TimestampNTZType, TimestampType, VarcharType, VariantType, YearMonthIntervalType}
Expand Down Expand Up @@ -220,4 +222,59 @@ class DataTypeAstBuilder extends SqlBaseParserBaseVisitor[AnyRef] {
override def visitCollateClause(ctx: CollateClauseContext): String = withOrigin(ctx) {
ctx.identifier.getText
}

/**
* Parse and verify IDENTITY column definition.
*
* @param ctx
* The parser context.
* @param dataType
* The data type of column defined as IDENTITY column. Used for verification.
* @return
* Tuple containing start, step and allowExplicitInsert.
*/
protected def visitIdentityColumn(
ctx: IdentityColumnContext,
dataType: DataType): IdentityColumnSpec = {
if (dataType != LongType && dataType != IntegerType) {
throw QueryParsingErrors.identityColumnUnsupportedDataType(ctx, dataType.toString)
}
// We support two flavors of syntax:
// (1) GENERATED ALWAYS AS IDENTITY (...)
// (2) GENERATED BY DEFAULT AS IDENTITY (...)
// (1) forbids explicit inserts, while (2) allows.
val allowExplicitInsert = ctx.BY() != null && ctx.DEFAULT() != null
val (start, step) = visitIdentityColSpec(ctx.identityColSpec())

new IdentityColumnSpec(start, step, allowExplicitInsert)
}

override def visitIdentityColSpec(ctx: IdentityColSpecContext): (Long, Long) = {
val defaultStart = 1
val defaultStep = 1
if (ctx == null) {
return (defaultStart, defaultStep)
}
var (start, step): (Option[Long], Option[Long]) = (None, None)
ctx.sequenceGeneratorOption().asScala.foreach { option =>
if (option.start != null) {
if (start.isDefined) {
throw QueryParsingErrors.identityColumnDuplicatedSequenceGeneratorOption(ctx, "START")
}
start = Some(option.start.getText.toLong)
} else if (option.step != null) {
if (step.isDefined) {
throw QueryParsingErrors.identityColumnDuplicatedSequenceGeneratorOption(ctx, "STEP")
}
step = Some(option.step.getText.toLong)
if (step.get == 0L) {
throw QueryParsingErrors.identityColumnIllegalStep(ctx)
}
} else {
throw SparkException
.internalError(s"Invalid identity column sequence generator option: ${option.getText}")
}
}
(start.getOrElse(defaultStart), step.getOrElse(defaultStep))
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ import org.apache.spark.sql.catalyst.trees.TreePattern.PARAMETER
import org.apache.spark.sql.catalyst.types.DataTypeUtils
import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, DateTimeUtils, IntervalUtils}
import org.apache.spark.sql.catalyst.util.DateTimeUtils.{convertSpecialDate, convertSpecialTimestamp, convertSpecialTimestampNTZ, getZoneId, stringToDate, stringToTimestamp, stringToTimestampWithoutTimeZone}
import org.apache.spark.sql.connector.catalog.{CatalogV2Util, IdentityColumnSpec, SupportsNamespaces, TableCatalog, TableWritePrivilege}
import org.apache.spark.sql.connector.catalog.{CatalogV2Util, SupportsNamespaces, TableCatalog, TableWritePrivilege}
import org.apache.spark.sql.connector.catalog.TableChange.ColumnPosition
import org.apache.spark.sql.connector.expressions.{ApplyTransform, BucketTransform, DaysTransform, Expression => V2Expression, FieldReference, HoursTransform, IdentityTransform, LiteralValue, MonthsTransform, Transform, YearsTransform}
import org.apache.spark.sql.errors.{DataTypeErrorsBase, QueryCompilationErrors, QueryParsingErrors, SqlScriptingErrors}
Expand Down Expand Up @@ -3734,58 +3734,6 @@ class AstBuilder extends DataTypeAstBuilder
getDefaultExpression(ctx.expression(), "GENERATED").originalSQL
}

/**
* Parse and verify IDENTITY column definition.
*
* @param ctx The parser context.
* @param dataType The data type of column defined as IDENTITY column. Used for verification.
* @return Tuple containing start, step and allowExplicitInsert.
*/
protected def visitIdentityColumn(
ctx: IdentityColumnContext,
dataType: DataType): IdentityColumnSpec = {
if (dataType != LongType && dataType != IntegerType) {
throw QueryParsingErrors.identityColumnUnsupportedDataType(ctx, dataType.toString)
}
// We support two flavors of syntax:
// (1) GENERATED ALWAYS AS IDENTITY (...)
// (2) GENERATED BY DEFAULT AS IDENTITY (...)
// (1) forbids explicit inserts, while (2) allows.
val allowExplicitInsert = ctx.BY() != null && ctx.DEFAULT() != null
val (start, step) = visitIdentityColSpec(ctx.identityColSpec())

new IdentityColumnSpec(start, step, allowExplicitInsert)
}

override def visitIdentityColSpec(ctx: IdentityColSpecContext): (Long, Long) = {
val defaultStart = 1
val defaultStep = 1
if (ctx == null) {
return (defaultStart, defaultStep)
}
var (start, step): (Option[Long], Option[Long]) = (None, None)
ctx.sequenceGeneratorOption().asScala.foreach { option =>
if (option.start != null) {
if (start.isDefined) {
throw QueryParsingErrors.identityColumnDuplicatedSequenceGeneratorOption(ctx, "START")
}
start = Some(option.start.getText.toLong)
} else if (option.step != null) {
if (step.isDefined) {
throw QueryParsingErrors.identityColumnDuplicatedSequenceGeneratorOption(ctx, "STEP")
}
step = Some(option.step.getText.toLong)
if (step.get == 0L) {
throw QueryParsingErrors.identityColumnIllegalStep(ctx)
}
} else {
throw SparkException
.internalError(s"Invalid identity column sequence generator option: ${option.getText}")
}
}
(start.getOrElse(defaultStart), step.getOrElse(defaultStep))
}

/**
* Create an optional comment string.
*/
Expand Down

0 comments on commit 4508911

Please sign in to comment.