forked from apache/paimon
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[spark] Support UPDATE command (apache#2268)
- Loading branch information
Showing
12 changed files
with
619 additions
and
105 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
37 changes: 37 additions & 0 deletions
37
paimon-spark/paimon-spark-common/src/main/scala/org/apache/spark/sql/RowLevelOp.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package org.apache.spark.sql | ||
|
||
import org.apache.paimon.CoreOptions.MergeEngine | ||
|
||
sealed trait RowLevelOp { | ||
val supportedMergeEngine: Seq[MergeEngine] | ||
} | ||
|
||
case object Delete extends RowLevelOp { | ||
override def toString: String = "delete" | ||
|
||
override val supportedMergeEngine: Seq[MergeEngine] = Seq(MergeEngine.DEDUPLICATE) | ||
} | ||
|
||
case object Update extends RowLevelOp { | ||
override def toString: String = "update" | ||
|
||
override val supportedMergeEngine: Seq[MergeEngine] = | ||
Seq(MergeEngine.DEDUPLICATE, MergeEngine.PARTIAL_UPDATE) | ||
} |
119 changes: 119 additions & 0 deletions
119
...mon/src/main/scala/org/apache/spark/sql/catalyst/analysis/AssignmentAlignmentHelper.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,119 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package org.apache.spark.sql.catalyst.analysis | ||
|
||
import org.apache.spark.sql.catalyst.SQLConfHelper | ||
import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeReference, CreateNamedStruct, Expression, GetStructField, Literal, NamedExpression} | ||
import org.apache.spark.sql.catalyst.plans.logical.Assignment | ||
import org.apache.spark.sql.types.StructType | ||
|
||
trait AssignmentAlignmentHelper extends SQLConfHelper { | ||
|
||
private lazy val resolver = conf.resolver | ||
|
||
/** | ||
* @param ref | ||
* attribute reference seq, e.g. a => Seq["a"], s.c1 => Seq["s", "c1"] | ||
* @param expr | ||
* update expression | ||
*/ | ||
private case class AttrUpdate(ref: Seq[String], expr: Expression) | ||
|
||
/** | ||
* Align update assignments to the given attrs, only supports PrimitiveType and StructType. For | ||
* example, if attrs are [a int, b int, s struct(c1 int, c2 int)] and update assignments are [a = | ||
* 1, s.c1 = 2], will return [1, b, struct(2, c2)]. | ||
* @param attrs | ||
* target attrs | ||
* @param assignments | ||
* update assignments | ||
* @return | ||
* aligned update expressions | ||
*/ | ||
protected def alignUpdateAssignments( | ||
attrs: Seq[Attribute], | ||
assignments: Seq[Assignment]): Seq[Expression] = { | ||
val attrUpdates = assignments.map(a => AttrUpdate(toRefSeq(a.key), a.value)) | ||
recursiveAlignUpdates(attrs, attrUpdates) | ||
} | ||
|
||
def toRefSeq(expr: Expression): Seq[String] = expr match { | ||
case attr: Attribute => | ||
Seq(attr.name) | ||
case GetStructField(child, _, Some(name)) => | ||
toRefSeq(child) :+ name | ||
case other => | ||
throw new UnsupportedOperationException( | ||
s"Unsupported update expression: $other, only support update with PrimitiveType and StructType.") | ||
} | ||
|
||
private def recursiveAlignUpdates( | ||
targetAttrs: Seq[NamedExpression], | ||
updates: Seq[AttrUpdate], | ||
namePrefix: Seq[String] = Nil): Seq[Expression] = { | ||
|
||
// build aligned updated expression for each target attr | ||
targetAttrs.map { | ||
targetAttr => | ||
val headMatchedUpdates = updates.filter(u => resolver(u.ref.head, targetAttr.name)) | ||
if (headMatchedUpdates.isEmpty) { | ||
// when no matched update, return the attr as is | ||
targetAttr | ||
} else { | ||
val exactMatchedUpdate = headMatchedUpdates.find(_.ref.size == 1) | ||
if (exactMatchedUpdate.isDefined) { | ||
if (headMatchedUpdates.size == 1) { | ||
// when an exact match (no nested fields) occurs, it must be the only match, then return it's expr | ||
exactMatchedUpdate.get.expr | ||
} else { | ||
// otherwise, there must be conflicting updates, for example: | ||
// - update the same attr multiple times | ||
// - update a struct attr and its fields at the same time (e.g. s and s.c1) | ||
val conflictingAttrNames = | ||
headMatchedUpdates.map(u => (namePrefix ++ u.ref).mkString(".")).distinct | ||
throw new UnsupportedOperationException( | ||
s"Conflicting updates on attrs: ${conflictingAttrNames.mkString(", ")}" | ||
) | ||
} | ||
} else { | ||
targetAttr.dataType match { | ||
case StructType(fields) => | ||
val fieldExprs = fields.zipWithIndex.map { | ||
case (field, ordinal) => | ||
Alias(GetStructField(targetAttr, ordinal, Some(field.name)), field.name)() | ||
} | ||
val newUpdates = updates.map(u => u.copy(ref = u.ref.tail)) | ||
// process StructType's nested fields recursively | ||
val updatedFieldExprs = | ||
recursiveAlignUpdates(fieldExprs, newUpdates, namePrefix :+ targetAttr.name) | ||
|
||
// build updated struct expression | ||
CreateNamedStruct(fields.zip(updatedFieldExprs).flatMap { | ||
case (field, expr) => | ||
Seq(Literal(field.name), expr) | ||
}) | ||
case _ => | ||
// can't reach here | ||
throw new UnsupportedOperationException("") | ||
} | ||
} | ||
} | ||
} | ||
} | ||
|
||
} |
117 changes: 117 additions & 0 deletions
117
...ommon/src/main/scala/org/apache/spark/sql/catalyst/analysis/RewriteRowLevelCommands.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,117 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package org.apache.spark.sql.catalyst.analysis | ||
|
||
import org.apache.paimon.CoreOptions.MERGE_ENGINE | ||
import org.apache.paimon.options.Options | ||
import org.apache.paimon.spark.SparkTable | ||
import org.apache.paimon.table.Table | ||
|
||
import org.apache.spark.sql.{AnalysisException, Delete, RowLevelOp, Update} | ||
import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression, PredicateHelper, SubqueryExpression} | ||
import org.apache.spark.sql.catalyst.plans.logical._ | ||
import org.apache.spark.sql.catalyst.rules.Rule | ||
import org.apache.spark.sql.connector.catalog.SupportsDelete | ||
import org.apache.spark.sql.execution.datasources.DataSourceStrategy | ||
import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation | ||
|
||
import java.util | ||
|
||
object RewriteRowLevelCommands extends Rule[LogicalPlan] with PredicateHelper { | ||
|
||
override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperators { | ||
case d @ DeleteFromTable(ResolvesToPaimonTable(table), condition) => | ||
validateRowLevelOp(Delete, table.getTable, Option.empty) | ||
if (canDeleteWhere(d, table, condition)) { | ||
d | ||
} else { | ||
DeleteFromPaimonTableCommand(d) | ||
} | ||
|
||
case u @ UpdateTable(ResolvesToPaimonTable(table), assignments, _) => | ||
validateRowLevelOp(Update, table.getTable, Option.apply(assignments)) | ||
UpdatePaimonTableCommand(u) | ||
} | ||
|
||
private object ResolvesToPaimonTable { | ||
def unapply(plan: LogicalPlan): Option[SparkTable] = | ||
EliminateSubqueryAliases(plan) match { | ||
case DataSourceV2Relation(table: SparkTable, _, _, _, _) => Some(table) | ||
case _ => None | ||
} | ||
} | ||
|
||
private def validateRowLevelOp( | ||
op: RowLevelOp, | ||
table: Table, | ||
assignments: Option[Seq[Assignment]]): Unit = { | ||
val options = Options.fromMap(table.options) | ||
val primaryKeys = table.primaryKeys() | ||
if (primaryKeys.isEmpty) { | ||
throw new UnsupportedOperationException( | ||
s"table ${table.getClass.getName} can not support $op, because there is no primary key.") | ||
} | ||
|
||
if (op.equals(Update) && isPrimaryKeyUpdate(primaryKeys, assignments.get)) { | ||
throw new UnsupportedOperationException(s"$op to primary keys is not supported.") | ||
} | ||
|
||
val mergeEngine = options.get(MERGE_ENGINE) | ||
if (!op.supportedMergeEngine.contains(mergeEngine)) { | ||
throw new UnsupportedOperationException( | ||
s"merge engine $mergeEngine can not support $op, currently only ${op.supportedMergeEngine | ||
.mkString(", ")} can support $op.") | ||
} | ||
} | ||
|
||
private def canDeleteWhere( | ||
d: DeleteFromTable, | ||
table: SparkTable, | ||
condition: Expression): Boolean = { | ||
table match { | ||
case t: SupportsDelete if !SubqueryExpression.hasSubquery(condition) => | ||
// fail if any filter cannot be converted. | ||
// correctness depends on removing all matching data. | ||
val filters = DataSourceStrategy | ||
.normalizeExprs(Seq(condition), d.output) | ||
.flatMap(splitConjunctivePredicates(_).map { | ||
f => | ||
DataSourceStrategy | ||
.translateFilter(f, supportNestedPredicatePushdown = true) | ||
.getOrElse(throw new AnalysisException(s"Exec update failed:" + | ||
s" cannot translate expression to source filter: $f")) | ||
}) | ||
.toArray | ||
t.canDeleteWhere(filters) | ||
case _ => false | ||
} | ||
} | ||
|
||
private def isPrimaryKeyUpdate( | ||
primaryKeys: util.List[String], | ||
assignments: Seq[Assignment]): Boolean = { | ||
assignments.exists( | ||
a => { | ||
a.key match { | ||
case attr: Attribute => primaryKeys.contains(attr.name) | ||
case _ => false | ||
} | ||
}) | ||
} | ||
|
||
} |
Oops, something went wrong.