Skip to content

Commit

Permalink
[core] Sort the expired partitions and add max_expires param (apache#…
Browse files Browse the repository at this point in the history
  • Loading branch information
askwang authored Oct 8, 2024
1 parent 4f0a475 commit 69e8e23
Show file tree
Hide file tree
Showing 8 changed files with 304 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,15 @@
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

/** Expire partitions. */
public class PartitionExpire {

private static final Logger LOG = LoggerFactory.getLogger(PartitionExpire.class);

private static final String DELIMITER = ",";

private final Duration expirationTime;
private final Duration checkInterval;
private final FileStoreScan scan;
Expand All @@ -51,6 +54,7 @@ public class PartitionExpire {
private LocalDateTime lastCheck;
private final PartitionExpireStrategy strategy;
private final boolean endInputCheckPartitionExpire;
private int maxExpires;

public PartitionExpire(
Duration expirationTime,
Expand All @@ -68,6 +72,7 @@ public PartitionExpire(
this.metastoreClient = metastoreClient;
this.lastCheck = LocalDateTime.now();
this.endInputCheckPartitionExpire = endInputCheckPartitionExpire;
this.maxExpires = Integer.MAX_VALUE;
}

public PartitionExpire(
Expand All @@ -85,6 +90,11 @@ public PartitionExpire withLock(Lock lock) {
return this;
}

public PartitionExpire withMaxExpires(int maxExpires) {
this.maxExpires = maxExpires;
return this;
}

public List<Map<String, String>> expire(long commitIdentifier) {
return expire(LocalDateTime.now(), commitIdentifier);
}
Expand Down Expand Up @@ -125,14 +135,18 @@ List<Map<String, String>> expire(LocalDateTime now, long commitIdentifier) {

private List<Map<String, String>> doExpire(
LocalDateTime expireDateTime, long commitIdentifier) {
List<Map<String, String>> expired = new ArrayList<>();
for (PartitionEntry partition : strategy.selectExpiredPartitions(scan, expireDateTime)) {
List<PartitionEntry> partitionEntries =
strategy.selectExpiredPartitions(scan, expireDateTime);
List<List<String>> expiredPartValues = new ArrayList<>(partitionEntries.size());
for (PartitionEntry partition : partitionEntries) {
Object[] array = strategy.convertPartition(partition.partition());
Map<String, String> partString = strategy.toPartitionString(array);
expired.add(partString);
LOG.info("Expire Partition: {}", partString);
expiredPartValues.add(strategy.toPartitionValue(array));
}
if (!expired.isEmpty()) {

List<Map<String, String>> expired = new ArrayList<>();
if (!expiredPartValues.isEmpty()) {
expired = convertToPartitionString(expiredPartValues);
LOG.info("Expire Partitions: {}", expired);
if (metastoreClient != null) {
deleteMetastorePartitions(expired);
}
Expand All @@ -153,4 +167,15 @@ private void deleteMetastorePartitions(List<Map<String, String>> partitions) {
});
}
}

private List<Map<String, String>> convertToPartitionString(
List<List<String>> expiredPartValues) {
return expiredPartValues.stream()
.map(values -> String.join(DELIMITER, values))
.sorted()
.map(s -> s.split(DELIMITER))
.map(strategy::toPartitionString)
.limit(Math.min(expiredPartValues.size(), maxExpires))
.collect(Collectors.toList());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import org.apache.paimon.utils.RowDataToObjectArrayConverter;

import java.time.LocalDateTime;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
Expand All @@ -42,13 +43,21 @@ public PartitionExpireStrategy(RowType partitionType) {
}

public Map<String, String> toPartitionString(Object[] array) {
Map<String, String> map = new LinkedHashMap<>();
Map<String, String> map = new LinkedHashMap<>(partitionKeys.size());
for (int i = 0; i < partitionKeys.size(); i++) {
map.put(partitionKeys.get(i), array[i].toString());
}
return map;
}

public List<String> toPartitionValue(Object[] array) {
List<String> list = new ArrayList<>(partitionKeys.size());
for (int i = 0; i < partitionKeys.size(); i++) {
list.add(array[i].toString());
}
return list;
}

public Object[] convertPartition(BinaryRow partition) {
return toObjectArrayConverter.convert(partition);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.paimon.flink.procedure;

import org.apache.paimon.CoreOptions;
import org.apache.paimon.FileStore;
import org.apache.paimon.catalog.Catalog;
import org.apache.paimon.metastore.MetastoreClient;
import org.apache.paimon.operation.PartitionExpire;
import org.apache.paimon.table.FileStoreTable;
import org.apache.paimon.utils.TimeUtils;

import org.apache.flink.table.procedure.ProcedureContext;

import java.time.Duration;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;

import static org.apache.paimon.partition.PartitionExpireStrategy.createPartitionExpireStrategy;

/** A procedure to expire partitions. */
public class ExpirePartitionsProcedure extends ProcedureBase {

public static final String IDENTIFIER = "expire_partitions";

@Override
public String identifier() {
return IDENTIFIER;
}

public String[] call(
ProcedureContext procedureContext,
String tableId,
String expirationTime,
String timestampFormatter,
String timestampPattern,
String expireStrategy)
throws Catalog.TableNotExistException {
return call(
procedureContext,
tableId,
expirationTime,
timestampFormatter,
timestampPattern,
expireStrategy,
null);
}

public String[] call(
ProcedureContext procedureContext,
String tableId,
String expirationTime,
String timestampFormatter,
String timestampPattern,
String expireStrategy,
Integer maxExpires)
throws Catalog.TableNotExistException {
FileStoreTable fileStoreTable = (FileStoreTable) table(tableId);
FileStore fileStore = fileStoreTable.store();
Map<String, String> map = new HashMap<>();
map.put(CoreOptions.PARTITION_EXPIRATION_STRATEGY.key(), expireStrategy);
map.put(CoreOptions.PARTITION_TIMESTAMP_FORMATTER.key(), timestampFormatter);
map.put(CoreOptions.PARTITION_TIMESTAMP_PATTERN.key(), timestampPattern);

PartitionExpire partitionExpire =
new PartitionExpire(
TimeUtils.parseDuration(expirationTime),
Duration.ofMillis(0L),
createPartitionExpireStrategy(
CoreOptions.fromMap(map), fileStore.partitionType()),
fileStore.newScan(),
fileStore.newCommit(""),
Optional.ofNullable(
fileStoreTable
.catalogEnvironment()
.metastoreClientFactory())
.map(MetastoreClient.Factory::create)
.orElse(null));
if (maxExpires != null) {
partitionExpire.withMaxExpires(maxExpires);
}
List<Map<String, String>> expired = partitionExpire.expire(Long.MAX_VALUE);
return expired == null || expired.isEmpty()
? new String[] {"No expired partitions."}
: expired.stream()
.map(
x -> {
String r = x.toString();
return r.substring(1, r.length() - 1);
})
.toArray(String[]::new);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,10 @@ public void testExpirePartitionsProcedure() throws Exception {
sql("INSERT INTO T VALUES ('1', '2024-06-01')");
sql("INSERT INTO T VALUES ('2', '9024-06-01')");
assertThat(read(table)).containsExactlyInAnyOrder("1:2024-06-01", "2:9024-06-01");
sql("CALL sys.expire_partitions('default.T', '1 d', 'yyyy-MM-dd', '$dt', 'values-time')");
assertThat(
sql(
"CALL sys.expire_partitions('default.T', '1 d', 'yyyy-MM-dd', '$dt', 'values-time')"))
.containsExactly(Row.of("dt=2024-06-01"));
assertThat(read(table)).containsExactlyInAnyOrder("2:9024-06-01");
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,14 +63,19 @@ public String identifier() {
name = "expire_strategy",
type = @DataTypeHint("STRING"),
isOptional = true),
@ArgumentHint(
name = "max_expires",
type = @DataTypeHint("INTEGER"),
isOptional = true)
})
public @DataTypeHint("ROW< expired_partitions STRING>") Row[] call(
ProcedureContext procedureContext,
String tableId,
String expirationTime,
String timestampFormatter,
String timestampPattern,
String expireStrategy)
String expireStrategy,
Integer maxExpires)
throws Catalog.TableNotExistException {
FileStoreTable fileStoreTable = (FileStoreTable) table(tableId);
FileStore fileStore = fileStoreTable.store();
Expand All @@ -93,6 +98,9 @@ public String identifier() {
.metastoreClientFactory())
.map(MetastoreClient.Factory::create)
.orElse(null));
if (maxExpires != null) {
partitionExpire.withMaxExpires(maxExpires);
}
List<Map<String, String>> expired = partitionExpire.expire(Long.MAX_VALUE);
return expired == null || expired.isEmpty()
? new Row[] {Row.of("No expired partitions.")}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,61 @@ public void testPartitionExpireWithTimePartition() throws Exception {
.containsExactlyInAnyOrder("Never-expire:9999-09-09:99:99");
}

@Test
public void testSortAndLimitExpirePartition() throws Exception {
sql(
"CREATE TABLE T ("
+ " k STRING,"
+ " dt STRING,"
+ " hm STRING,"
+ " PRIMARY KEY (k, dt, hm) NOT ENFORCED"
+ ") PARTITIONED BY (dt, hm) WITH ("
+ " 'bucket' = '1'"
+ ")");
FileStoreTable table = paimonTable("T");
// Test there are no expired partitions.
assertThat(
callExpirePartitions(
"CALL sys.expire_partitions("
+ "`table` => 'default.T'"
+ ", expiration_time => '1 d'"
+ ", timestamp_formatter => 'yyyy-MM-dd')"))
.containsExactlyInAnyOrder("No expired partitions.");

sql("INSERT INTO T VALUES ('3', '2024-06-02', '02:00')");
sql("INSERT INTO T VALUES ('2', '2024-06-02', '01:00')");
sql("INSERT INTO T VALUES ('4', '2024-06-03', '01:00')");
sql("INSERT INTO T VALUES ('1', '2024-06-01', '01:00')");
// This partition never expires.
sql("INSERT INTO T VALUES ('Never-expire', '9999-09-09', '99:99')");

Function<InternalRow, String> consumerReadResult =
(InternalRow row) ->
row.getString(0) + ":" + row.getString(1) + ":" + row.getString(2);
assertThat(read(table, consumerReadResult))
.containsExactlyInAnyOrder(
"1:2024-06-01:01:00",
"2:2024-06-02:01:00",
"3:2024-06-02:02:00",
"4:2024-06-03:01:00",
"Never-expire:9999-09-09:99:99");

// Show a list of expired partitions.
assertThat(
callExpirePartitions(
"CALL sys.expire_partitions("
+ "`table` => 'default.T'"
+ ", expiration_time => '1 d'"
+ ", timestamp_formatter => 'yyyy-MM-dd', max_expires => 3)"))
.containsExactly(
"dt=2024-06-01, hm=01:00",
"dt=2024-06-02, hm=01:00",
"dt=2024-06-02, hm=02:00");

assertThat(read(table, consumerReadResult))
.containsExactlyInAnyOrder("4:2024-06-03:01:00", "Never-expire:9999-09-09:99:99");
}

/** Return a list of expired partitions. */
public List<String> callExpirePartitions(String callSql) {
return sql(callSql).stream()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
import java.util.Optional;

import static org.apache.paimon.partition.PartitionExpireStrategy.createPartitionExpireStrategy;
import static org.apache.spark.sql.types.DataTypes.IntegerType;
import static org.apache.spark.sql.types.DataTypes.StringType;

/** A procedure to expire partitions. */
Expand All @@ -51,7 +52,8 @@ public class ExpirePartitionsProcedure extends BaseProcedure {
ProcedureParameter.required("expiration_time", StringType),
ProcedureParameter.optional("timestamp_formatter", StringType),
ProcedureParameter.optional("timestamp_pattern", StringType),
ProcedureParameter.optional("expire_strategy", StringType)
ProcedureParameter.optional("expire_strategy", StringType),
ProcedureParameter.optional("max_expires", IntegerType)
};

private static final StructType OUTPUT_TYPE =
Expand Down Expand Up @@ -81,6 +83,7 @@ public InternalRow[] call(InternalRow args) {
String timestampFormatter = args.isNullAt(2) ? null : args.getString(2);
String timestampPattern = args.isNullAt(3) ? null : args.getString(3);
String expireStrategy = args.isNullAt(4) ? null : args.getString(4);
Integer maxExpires = args.isNullAt(5) ? null : args.getInt(5);
return modifyPaimonTable(
tableIdent,
table -> {
Expand All @@ -105,6 +108,9 @@ public InternalRow[] call(InternalRow args) {
.metastoreClientFactory())
.map(MetastoreClient.Factory::create)
.orElse(null));
if (maxExpires != null) {
partitionExpire.withMaxExpires(maxExpires);
}
List<Map<String, String>> expired = partitionExpire.expire(Long.MAX_VALUE);
return expired == null || expired.isEmpty()
? new InternalRow[] {
Expand Down
Loading

0 comments on commit 69e8e23

Please sign in to comment.