From 9d2a63a0cf31e66083c0374d7a38c7b3f4777cd7 Mon Sep 17 00:00:00 2001 From: Jingsong Date: Wed, 4 Dec 2024 08:53:52 +0700 Subject: [PATCH] [core] Drop stats for deleted data files to reduce memory --- .../paimon/append/UnawareAppendTableCompactionCoordinator.java | 2 ++ .../org/apache/paimon/operation/AbstractFileStoreWrite.java | 3 ++- .../scala/org/apache/paimon/spark/commands/PaimonCommand.scala | 3 ++- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/paimon-core/src/main/java/org/apache/paimon/append/UnawareAppendTableCompactionCoordinator.java b/paimon-core/src/main/java/org/apache/paimon/append/UnawareAppendTableCompactionCoordinator.java index 577f28d0f5cf..5e43568aac3f 100644 --- a/paimon-core/src/main/java/org/apache/paimon/append/UnawareAppendTableCompactionCoordinator.java +++ b/paimon-core/src/main/java/org/apache/paimon/append/UnawareAppendTableCompactionCoordinator.java @@ -380,6 +380,8 @@ public FilesIterator( if (filter != null) { snapshotReader.withFilter(filter); } + // drop stats to reduce memory + snapshotReader.dropStats(); this.streamingMode = isStreaming; } diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/AbstractFileStoreWrite.java b/paimon-core/src/main/java/org/apache/paimon/operation/AbstractFileStoreWrite.java index d63887030090..79ee3e1f4706 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/AbstractFileStoreWrite.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/AbstractFileStoreWrite.java @@ -101,7 +101,8 @@ protected AbstractFileStoreWrite( int writerNumberMax, boolean legacyPartitionName) { this.snapshotManager = snapshotManager; - this.scan = scan; + // Statistic is useless in writer + this.scan = scan.dropStats(); this.indexFactory = indexFactory; this.dvMaintainerFactory = dvMaintainerFactory; this.totalBuckets = totalBuckets; diff --git a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/commands/PaimonCommand.scala b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/commands/PaimonCommand.scala index 191d7a766b71..466643b15709 100644 --- a/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/commands/PaimonCommand.scala +++ b/paimon-spark/paimon-spark-common/src/main/scala/org/apache/paimon/spark/commands/PaimonCommand.scala @@ -94,7 +94,8 @@ trait PaimonCommand extends WithFileStoreTable with ExpressionHelper with SQLCon condition: Expression, output: Seq[Attribute]): Seq[DataSplit] = { // low level snapshot reader, it can not be affected by 'scan.mode' - val snapshotReader = table.newSnapshotReader() + // dropStats after filter push down + val snapshotReader = table.newSnapshotReader().dropStats() if (condition != TrueLiteral) { val filter = convertConditionToPaimonPredicate(condition, output, rowType, ignoreFailure = true)