Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[spark] Add paimon prefix to objects under spark sql package #3170

Merged
merged 2 commits into from
Apr 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ import org.apache.spark.sql.types.{BinaryType, BooleanType, DataType, DateType,

import java.net.URI

object StatsUtils {
object PaimonStatsUtils {

def calculateTotalSize(
sessionState: SessionState,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ import org.apache.spark.sql.types.{BinaryType, BooleanType, DataType, DateType,

import java.net.URI

object StatsUtils {
object PaimonStatsUtils {
def calculateTotalSize(
sessionState: SessionState,
tableName: String,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,10 @@
import org.apache.spark.sql.catalyst.catalog.SessionCatalog;
import org.apache.spark.sql.connector.catalog.CatalogExtension;
import org.apache.spark.sql.connector.catalog.CatalogPlugin;
import org.apache.spark.sql.connector.catalog.CatalogUtils;
import org.apache.spark.sql.connector.catalog.FunctionCatalog;
import org.apache.spark.sql.connector.catalog.Identifier;
import org.apache.spark.sql.connector.catalog.NamespaceChange;
import org.apache.spark.sql.connector.catalog.PaimonCatalogUtils;
import org.apache.spark.sql.connector.catalog.SupportsNamespaces;
import org.apache.spark.sql.connector.catalog.Table;
import org.apache.spark.sql.connector.catalog.TableCatalog;
Expand Down Expand Up @@ -260,7 +260,7 @@ public final void initialize(String name, CaseInsensitiveStringMap options) {
hadoopConf.set(entry.getKey(), entry.getValue());
}
ExternalCatalog externalCatalog =
CatalogUtils.buildExternalCatalog(sparkConf, hadoopConf);
PaimonCatalogUtils.buildExternalCatalog(sparkConf, hadoopConf);
this.sessionCatalog = new V2SessionCatalog(new SessionCatalog(externalCatalog));
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,8 @@
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.PaimonUtils;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.Utils;
import org.apache.spark.sql.catalyst.InternalRow;
import org.apache.spark.sql.catalyst.expressions.Expression;
import org.apache.spark.sql.catalyst.plans.logical.Filter;
Expand Down Expand Up @@ -363,7 +363,7 @@ private void sortCompactUnAwareBucketTable(
LogicalPlan relation,
@Nullable Expression condition) {
Dataset<Row> row =
Utils.createDataset(
PaimonUtils.createDataset(
spark(), condition == null ? relation : new Filter(condition, relation));
new WriteIntoPaimonTable(
table,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

package org.apache.paimon.spark

import org.apache.spark.sql.Utils
import org.apache.spark.sql.PaimonUtils
import org.apache.spark.sql.connector.metric.{CustomAvgMetric, CustomSumMetric, CustomTaskMetric}

import java.text.DecimalFormat
Expand Down Expand Up @@ -85,7 +85,7 @@ case class PaimonSplitSizeMetric() extends PaimonSumMetric {
override def description(): String = "size of splits read"

override def aggregateTaskMetrics(taskMetrics: Array[Long]): String = {
Utils.bytesToString(aggregateTaskMetrics0(taskMetrics))
PaimonUtils.bytesToString(aggregateTaskMetrics0(taskMetrics))
}
}

Expand Down Expand Up @@ -119,7 +119,7 @@ case class PaimonAvgSplitSizeMetric() extends PaimonAvgMetric {

override def aggregateTaskMetrics(taskMetrics: Array[Long]): String = {
val average = aggregateTaskMetrics0(taskMetrics).round
Utils.bytesToString(average)
PaimonUtils.bytesToString(average)
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ import org.apache.paimon.fs.Path
import org.apache.paimon.reader.{FileRecordIterator, RecordReader}
import org.apache.paimon.utils.CloseableIterator

import org.apache.spark.sql.Utils
import org.apache.spark.sql.PaimonUtils

import java.io.IOException

Expand Down Expand Up @@ -61,7 +61,7 @@ case class PaimonRecordReaderIterator(reader: RecordReader[PaimonInternalRow])
}
} finally {
reader.close()
Utils.unsetInputFileName()
PaimonUtils.unsetInputFileName()
}
}

Expand All @@ -70,7 +70,7 @@ case class PaimonRecordReaderIterator(reader: RecordReader[PaimonInternalRow])
iter match {
case fileRecordIterator: FileRecordIterator[_] =>
if (lastFilePath != fileRecordIterator.filePath()) {
Utils.setInputFileName(fileRecordIterator.filePath().toUri.toString)
PaimonUtils.setInputFileName(fileRecordIterator.filePath().toUri.toString)
lastFilePath = fileRecordIterator.filePath()
}
case _ =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ package org.apache.paimon.spark
import org.apache.paimon.predicate.Predicate
import org.apache.paimon.table.Table

import org.apache.spark.sql.Utils.fieldReference
import org.apache.spark.sql.PaimonUtils.fieldReference
import org.apache.spark.sql.connector.expressions.NamedReference
import org.apache.spark.sql.connector.read.SupportsRuntimeFiltering
import org.apache.spark.sql.sources.{Filter, In}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ import org.apache.paimon.stats
import org.apache.paimon.stats.ColStats
import org.apache.paimon.types.DataType

import org.apache.spark.sql.Utils
import org.apache.spark.sql.PaimonUtils
import org.apache.spark.sql.catalyst.plans.logical.ColumnStat
import org.apache.spark.sql.connector.expressions.NamedReference
import org.apache.spark.sql.connector.read.Statistics
Expand Down Expand Up @@ -59,7 +59,7 @@ case class PaimonStatistics[T <: PaimonBaseScan](scan: T) extends Statistics {
.forEach(
f =>
resultMap.put(
Utils.fieldReference(f.name()),
PaimonUtils.fieldReference(f.name()),
PaimonColumnStats(f.`type`(), paimonColStats.get(f.name()))))
}
resultMap
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ import org.apache.paimon.predicate.{Predicate, PredicateBuilder}
import org.apache.paimon.spark.SparkFilterConverter
import org.apache.paimon.types.RowType

import org.apache.spark.sql.PaimonUtils.{normalizeExprs, translateFilter}
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.Utils.{normalizeExprs, translateFilter}
import org.apache.spark.sql.catalyst.analysis.Resolver
import org.apache.spark.sql.catalyst.expressions.{Alias, And, Attribute, Cast, Expression, GetStructField, Literal, PredicateHelper, SubqueryExpression}
import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ import org.apache.paimon.table.sink.{BatchWriteBuilder, CommitMessage}
import org.apache.paimon.types.RowKind

import org.apache.spark.sql.{Row, SparkSession}
import org.apache.spark.sql.Utils.createDataset
import org.apache.spark.sql.PaimonUtils.createDataset
import org.apache.spark.sql.catalyst.expressions.{And, Expression, Not}
import org.apache.spark.sql.catalyst.expressions.Literal.TrueLiteral
import org.apache.spark.sql.catalyst.plans.logical.{Filter, SupportsSubquery}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ import org.apache.paimon.table.FileStoreTable
import org.apache.paimon.types.RowKind

import org.apache.spark.sql.{Column, Dataset, Row, SparkSession}
import org.apache.spark.sql.Utils._
import org.apache.spark.sql.PaimonUtils._
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, BasePredicate, Expression, Literal, UnsafeProjection}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ import org.apache.paimon.table.FileStoreTable
import org.apache.paimon.table.sink.BatchWriteBuilder

import org.apache.parquet.Preconditions
import org.apache.spark.sql.{Row, SparkSession, StatsUtils}
import org.apache.spark.sql.{PaimonStatsUtils, Row, SparkSession}
import org.apache.spark.sql.catalyst.expressions.Attribute
import org.apache.spark.sql.catalyst.plans.logical.ColumnStat
import org.apache.spark.sql.catalyst.util.DateTimeUtils
Expand Down Expand Up @@ -57,12 +57,12 @@ case class PaimonAnalyzeTableColumnCommand(

// compute stats
val attributes = getColumnsToAnalyze(relation, columnNames, allColumns)
val totalSize = StatsUtils.calculateTotalSize(
val totalSize = PaimonStatsUtils.calculateTotalSize(
sparkSession.sessionState,
table.name(),
Some(table.location().toUri))
val (mergedRecordCount, colStats) =
StatsUtils.computeColumnStats(sparkSession, relation, attributes)
PaimonStatsUtils.computeColumnStats(sparkSession, relation, attributes)

val totalRecordCount = currentSnapshot.totalRecordCount()
Preconditions.checkState(
Expand Down Expand Up @@ -113,7 +113,7 @@ case class PaimonAnalyzeTableColumnCommand(
}
columnsToAnalyze.foreach {
attr =>
if (!StatsUtils.analyzeSupportsType(attr.dataType)) {
if (!PaimonStatsUtils.analyzeSupportsType(attr.dataType)) {
throw new UnsupportedOperationException(
s"Analyzing on col: ${attr.name}, data type: ${attr.dataType} is not supported.")
}
Expand Down Expand Up @@ -148,12 +148,12 @@ case class PaimonAnalyzeTableColumnCommand(
}

/**
* Convert data from spark type to paimon, only cover datatype meet [[StatsUtils.hasMinMax]]
* Convert data from spark type to paimon, only cover datatype meet [[PaimonStatsUtils.hasMinMax]]
* currently.
*/
private def toPaimonData(o: Any, dataType: DataType): Any = {
dataType match {
case d if !StatsUtils.hasMinMax(d) =>
case d if !PaimonStatsUtils.hasMinMax(d) =>
// should not reach here
throw new UnsupportedOperationException(s"Unsupported data type $d, value is $o.")
case _: DecimalType =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ import org.apache.paimon.table.sink.{CommitMessage, CommitMessageImpl}
import org.apache.paimon.table.source.DataSplit
import org.apache.paimon.types.RowType

import org.apache.spark.sql.PaimonUtils.createDataset
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.Utils.createDataset
import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression}
import org.apache.spark.sql.catalyst.expressions.Literal.TrueLiteral
import org.apache.spark.sql.catalyst.plans.logical.{Filter => FilterLogicalNode}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ import org.apache.paimon.spark.DynamicOverWrite
import org.apache.paimon.table.FileStoreTable

import org.apache.spark.sql.{Row, SparkSession}
import org.apache.spark.sql.Utils.createDataset
import org.apache.spark.sql.PaimonUtils.createDataset
import org.apache.spark.sql.catalyst.analysis.NamedRelation
import org.apache.spark.sql.catalyst.plans.logical.{Command, LogicalPlan, V2WriteCommand}
import org.apache.spark.sql.execution.command.RunnableCommand
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ import org.apache.paimon.table.sink.CommitMessage
import org.apache.paimon.types.RowKind

import org.apache.spark.sql.{Column, Row, SparkSession}
import org.apache.spark.sql.Utils.createDataset
import org.apache.spark.sql.PaimonUtils.createDataset
import org.apache.spark.sql.catalyst.expressions.{Alias, Expression, If}
import org.apache.spark.sql.catalyst.expressions.Literal.TrueLiteral
import org.apache.spark.sql.catalyst.plans.logical.{Assignment, Filter, Project, SupportsSubquery}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ import org.apache.paimon.spark.{InsertInto, Overwrite}
import org.apache.paimon.spark.commands.{PaimonCommand, SchemaHelper, WriteIntoPaimonTable}
import org.apache.paimon.table.FileStoreTable

import org.apache.spark.sql.{DataFrame, SQLContext, Utils}
import org.apache.spark.sql.{DataFrame, PaimonUtils, SQLContext}
import org.apache.spark.sql.execution.streaming.Sink
import org.apache.spark.sql.sources.AlwaysTrue
import org.apache.spark.sql.streaming.OutputMode
Expand All @@ -44,7 +44,7 @@ class PaimonSink(
InsertInto
}
partitionColumns.foreach(println)
val newData = Utils.createNewDataFrame(data)
val newData = PaimonUtils.createNewDataFrame(data)
WriteIntoPaimonTable(originTable, saveMode, newData, options).run(sqlContext.sparkSession)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ package org.apache.paimon.spark.statistics

import org.apache.paimon.spark.PaimonColumnStats

import org.apache.spark.sql.Utils
import org.apache.spark.sql.PaimonUtils
import org.apache.spark.sql.catalyst.{SQLConfHelper, StructFilters}
import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, BoundReference, Expression}
import org.apache.spark.sql.catalyst.plans.logical
Expand Down Expand Up @@ -86,7 +86,7 @@ trait StatisticsHelperBase extends SQLConfHelper {
v1Stats.attributeStats.foreach {
case (attr, v1ColStats) =>
columnStatsMap.put(
Utils.fieldReference(attr.name),
PaimonUtils.fieldReference(attr.name),
PaimonColumnStats(v1ColStats)
)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ import java.net.URI
* [[org.apache.spark.sql]] package, Hence, use this class to adapt then so that we can use them
* indirectly.
*/
object StatsUtils {
object PaimonStatsUtils {

def calculateTotalSize(
sessionState: SessionState,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ import org.apache.spark.util.{Utils => SparkUtils}
* [[org.apache.spark.sql]] package, Hence, use this class to adapt then so that we can use them
* indirectly.
*/
object Utils {
object PaimonUtils {

/**
* In the streaming write case, An "Queries with streaming sources must be executed with
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ import org.apache.spark.util.Utils
import scala.reflect.ClassTag
import scala.util.control.NonFatal

object CatalogUtils {
object PaimonCatalogUtils {

def buildExternalCatalog(conf: SparkConf, hadoopConf: Configuration): ExternalCatalog = {
val externalCatalogClassName =
Expand Down
Loading