diff --git a/docs/content/spark/sql-write.md b/docs/content/spark/sql-write.md
index d2777110914f..5f4fa2dabc9f 100644
--- a/docs/content/spark/sql-write.md
+++ b/docs/content/spark/sql-write.md
@@ -26,17 +26,30 @@ under the License.
# SQL Write
-## Syntax
+## Insert Table
+
+The `INSERT` statement inserts new rows into a table or overwrites the existing data in the table. The inserted rows can be specified by value expressions or result from a query.
+
+**Syntax**
```sql
INSERT { INTO | OVERWRITE } table_identifier [ part_spec ] [ column_list ] { value_expr | query };
```
+**Parameters**
+
+- **table_identifier**: Specifies a table name, which may be optionally qualified with a database name.
+
+- **part_spec**: An optional parameter that specifies a comma-separated list of key and value pairs for partitions.
-For more information, please check the syntax document:
+- **column_list**: An optional parameter that specifies a comma-separated list of columns belonging to the table_identifier table. Spark will reorder the columns of the input query to match the table schema according to the specified column list.
-[Spark INSERT Statement](https://spark.apache.org/docs/latest/sql-ref-syntax-dml-insert-table.html)
+ Note: Since Spark 3.4, INSERT INTO commands with explicit column lists comprising fewer columns than the target table will automatically add the corresponding default values for the remaining columns (or NULL for any column lacking an explicitly-assigned default value). In Spark 3.3 or earlier, column_list's size must be equal to the target table's column size, otherwise these commands would have failed.
-## INSERT INTO
+- **value_expr** ( { value | NULL } [ , … ] ) [ , ( … ) ]: Specifies the values to be inserted. Either an explicitly specified value or a NULL can be inserted. A comma must be used to separate each value in the clause. More than one set of values can be specified to insert multiple rows.
+
+For more information, please check the syntax document: [Spark INSERT Statement](https://spark.apache.org/docs/latest/sql-ref-syntax-dml-insert-table.html)
+
+### Insert Into
Use `INSERT INTO` to apply records and changes to tables.
@@ -44,15 +57,15 @@ Use `INSERT INTO` to apply records and changes to tables.
INSERT INTO my_table SELECT ...
```
-## Overwriting the Whole Table
+### Insert Overwrite
-Use `INSERT OVERWRITE` to overwrite the whole unpartitioned table.
+Use `INSERT OVERWRITE` to overwrite the whole table.
```sql
INSERT OVERWRITE my_table SELECT ...
```
-### Overwriting a Partition
+#### Insert Overwrite Partition
Use `INSERT OVERWRITE` to overwrite a partition.
@@ -60,7 +73,7 @@ Use `INSERT OVERWRITE` to overwrite a partition.
INSERT OVERWRITE my_table PARTITION (key1 = value1, key2 = value2, ...) SELECT ...
```
-### Dynamic Overwrite
+#### Dynamic Overwrite Partition
Spark's default overwrite mode is `static` partition overwrite. To enable dynamic overwritten you need to set the Spark session configuration `spark.sql.sources.partitionOverwriteMode` to `dynamic`
@@ -97,13 +110,15 @@ SELECT * FROM my_table;
*/
```
-## Truncate tables
+## Truncate Table
+
+The `TRUNCATE TABLE` statement removes all the rows from a table or partition(s).
```sql
TRUNCATE TABLE my_table;
```
-## Updating tables
+## Update Table
spark supports update PrimitiveType and StructType, for example:
@@ -125,13 +140,13 @@ UPDATE t SET name = 'a_new' WHERE id = 1;
UPDATE t SET s.c2 = 'a_new' WHERE s.c1 = 1;
```
-## Deleting from table
+## Delete From Table
```sql
DELETE FROM my_table WHERE currency = 'UNKNOWN';
```
-## Merging into table
+## Merge Into Table
Paimon currently supports Merge Into syntax in Spark 3+, which allow a set of updates, insertions and deletions based on a source table in a single commit.
diff --git a/docs/layouts/shortcodes/generated/core_configuration.html b/docs/layouts/shortcodes/generated/core_configuration.html
index 6fb2c72650fe..7d6bacccb026 100644
--- a/docs/layouts/shortcodes/generated/core_configuration.html
+++ b/docs/layouts/shortcodes/generated/core_configuration.html
@@ -593,6 +593,12 @@
Duration |
The check interval of partition expiration. |
+
+ partition.expiration-max-num |
+ 100 |
+ Integer |
+ The default deleted num of partition expiration. |
+
partition.expiration-strategy |
values-time |
diff --git a/paimon-common/src/main/java/org/apache/paimon/CoreOptions.java b/paimon-common/src/main/java/org/apache/paimon/CoreOptions.java
index 765d5a1e32d6..8aebf2f289a0 100644
--- a/paimon-common/src/main/java/org/apache/paimon/CoreOptions.java
+++ b/paimon-common/src/main/java/org/apache/paimon/CoreOptions.java
@@ -809,6 +809,12 @@ public class CoreOptions implements Serializable {
.defaultValue(Duration.ofHours(1))
.withDescription("The check interval of partition expiration.");
+ public static final ConfigOption PARTITION_EXPIRATION_MAX_NUM =
+ key("partition.expiration-max-num")
+ .intType()
+ .defaultValue(100)
+ .withDescription("The default deleted num of partition expiration.");
+
public static final ConfigOption PARTITION_TIMESTAMP_FORMATTER =
key("partition.timestamp-formatter")
.stringType()
@@ -2126,6 +2132,10 @@ public Duration partitionExpireCheckInterval() {
return options.get(PARTITION_EXPIRATION_CHECK_INTERVAL);
}
+ public int partitionExpireMaxNum() {
+ return options.get(PARTITION_EXPIRATION_MAX_NUM);
+ }
+
public PartitionExpireStrategy partitionExpireStrategy() {
return options.get(PARTITION_EXPIRATION_STRATEGY);
}
diff --git a/paimon-core/src/main/java/org/apache/paimon/AbstractFileStore.java b/paimon-core/src/main/java/org/apache/paimon/AbstractFileStore.java
index 1a538ad89e47..54f554aa46d3 100644
--- a/paimon-core/src/main/java/org/apache/paimon/AbstractFileStore.java
+++ b/paimon-core/src/main/java/org/apache/paimon/AbstractFileStore.java
@@ -309,7 +309,8 @@ public PartitionExpire newPartitionExpire(String commitUser) {
newScan(),
newCommit(commitUser),
metastoreClient,
- options.endInputCheckPartitionExpire());
+ options.endInputCheckPartitionExpire(),
+ options.partitionExpireMaxNum());
}
@Override
diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/PartitionExpire.java b/paimon-core/src/main/java/org/apache/paimon/operation/PartitionExpire.java
index 62a9b796476a..d432a37dfd9c 100644
--- a/paimon-core/src/main/java/org/apache/paimon/operation/PartitionExpire.java
+++ b/paimon-core/src/main/java/org/apache/paimon/operation/PartitionExpire.java
@@ -54,7 +54,7 @@ public class PartitionExpire {
private LocalDateTime lastCheck;
private final PartitionExpireStrategy strategy;
private final boolean endInputCheckPartitionExpire;
- private int maxExpires;
+ private int maxExpireNum;
public PartitionExpire(
Duration expirationTime,
@@ -63,7 +63,8 @@ public PartitionExpire(
FileStoreScan scan,
FileStoreCommit commit,
@Nullable MetastoreClient metastoreClient,
- boolean endInputCheckPartitionExpire) {
+ boolean endInputCheckPartitionExpire,
+ int maxExpireNum) {
this.expirationTime = expirationTime;
this.checkInterval = checkInterval;
this.strategy = strategy;
@@ -72,7 +73,7 @@ public PartitionExpire(
this.metastoreClient = metastoreClient;
this.lastCheck = LocalDateTime.now();
this.endInputCheckPartitionExpire = endInputCheckPartitionExpire;
- this.maxExpires = Integer.MAX_VALUE;
+ this.maxExpireNum = maxExpireNum;
}
public PartitionExpire(
@@ -81,8 +82,17 @@ public PartitionExpire(
PartitionExpireStrategy strategy,
FileStoreScan scan,
FileStoreCommit commit,
- @Nullable MetastoreClient metastoreClient) {
- this(expirationTime, checkInterval, strategy, scan, commit, metastoreClient, false);
+ @Nullable MetastoreClient metastoreClient,
+ int maxExpireNum) {
+ this(
+ expirationTime,
+ checkInterval,
+ strategy,
+ scan,
+ commit,
+ metastoreClient,
+ false,
+ maxExpireNum);
}
public PartitionExpire withLock(Lock lock) {
@@ -90,8 +100,8 @@ public PartitionExpire withLock(Lock lock) {
return this;
}
- public PartitionExpire withMaxExpires(int maxExpires) {
- this.maxExpires = maxExpires;
+ public PartitionExpire withMaxExpireNum(int maxExpireNum) {
+ this.maxExpireNum = maxExpireNum;
return this;
}
@@ -145,6 +155,7 @@ private List