Skip to content

Commit

Permalink
Sync back the change of #603 and #604 to branch-1.2 to make 1.2 work …
Browse files Browse the repository at this point in the history
…with Spark 4.0 (#605)
  • Loading branch information
HeartSaVioR authored Nov 12, 2024
1 parent da7fea1 commit 93f5faf
Showing 1 changed file with 58 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,18 @@

package io.delta.sharing.spark.perf

import scala.reflect.runtime.universe.termNames
import scala.reflect.runtime.universe.typeOf
import scala.reflect.runtime.universe.typeTag

import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.catalyst.catalog.CatalogTable
import org.apache.spark.sql.catalyst.expressions.AttributeReference
import org.apache.spark.sql.catalyst.expressions.IntegerLiteral
import org.apache.spark.sql.catalyst.plans.logical.{LocalLimit, LogicalPlan}
import org.apache.spark.sql.catalyst.rules.Rule
import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation}
import org.apache.spark.sql.sources.BaseRelation

import io.delta.sharing.client.util.ConfUtils
import io.delta.sharing.spark.RemoteDeltaSnapshotFileIndex
Expand All @@ -38,17 +45,17 @@ object DeltaSharingLimitPushDown extends Rule[LogicalPlan] {
p transform {
case localLimit @ LocalLimit(
literalExpr @ IntegerLiteral(limit),
l @ LogicalRelation(
l @ LogicalRelationWithTable(
r @ HadoopFsRelation(remoteIndex: RemoteDeltaSnapshotFileIndex, _, _, _, _, _),
_, _, _)
_)
) =>
if (remoteIndex.limitHint.isEmpty) {
val spark = SparkSession.active
LocalLimit(literalExpr,
l.copy(
relation = r.copy(
location = remoteIndex.copy(limitHint = Some(limit)))(spark)
)
LogicalRelationShim.copyWithNewRelation(
l,
r.copy(
location = remoteIndex.copy(limitHint = Some(limit)))(spark))
)
} else {
localLimit
Expand All @@ -59,3 +66,48 @@ object DeltaSharingLimitPushDown extends Rule[LogicalPlan] {
}
}
}

/**
* Extract the [[BaseRelation]] and [[CatalogTable]] from [[LogicalRelation]]. You can also
* retrieve the instance of LogicalRelation like following:
*
* case l @ LogicalRelationWithTable(relation, catalogTable) => ...
*
* NOTE: This is copied from Spark 4.0 codebase - license: Apache-2.0.
*/
object LogicalRelationWithTable {
def unapply(plan: LogicalRelation): Option[(BaseRelation, Option[CatalogTable])] = {
Some(plan.relation, plan.catalogTable)
}
}

/**
* This class helps the codebase to address the differences among multiple Spark versions.
*/
object LogicalRelationShim {
/**
* This method provides the ability of copying LogicalRelation instance across Spark versions,
* when the caller only wants to replace the relation in the LogicalRelation.
*/
def copyWithNewRelation(src: LogicalRelation, newRelation: BaseRelation): LogicalRelation = {
// We assume Spark would not change the order of the existing parameter, but it's even safe
// as long as the first parameter is reserved to the `relation`.
val paramsForPrimaryConstructor = src.productIterator.toArray
paramsForPrimaryConstructor(0) = newRelation

val constructor = typeOf[LogicalRelation]
.decl(termNames.CONSTRUCTOR)
// Getting all the constructors
.alternatives
.map(_.asMethod)
// Picking the primary constructor
.find(_.isPrimaryConstructor)
// A class must always have a primary constructor, so this is safe
.get
val constructorMirror = typeTag[LogicalRelation].mirror
.reflectClass(typeOf[LogicalRelation].typeSymbol.asClass)
.reflectConstructor(constructor)

constructorMirror.apply(paramsForPrimaryConstructor: _*).asInstanceOf[LogicalRelation]
}
}

0 comments on commit 93f5faf

Please sign in to comment.