From 26afa3e743f36b1d81da0206331c11af9da140fc Mon Sep 17 00:00:00 2001 From: Jingsong Date: Wed, 31 Jul 2024 16:44:10 +0800 Subject: [PATCH] [core] Refactor LookupFile.localFilePrefix to cut partition string to 20 --- .../utils/InternalRowPartitionComputer.java | 7 ++-- .../InternalRowPartitionComputerTest.java | 9 +++-- .../apache/paimon/mergetree/LookupFile.java | 15 +++++---- .../operation/KeyValueFileStoreWrite.java | 9 ++--- .../paimon/table/query/LocalTableQuery.java | 13 ++------ .../paimon/mergetree/LookupFileTest.java | 33 ++++++++++++------- 6 files changed, 43 insertions(+), 43 deletions(-) diff --git a/paimon-common/src/main/java/org/apache/paimon/utils/InternalRowPartitionComputer.java b/paimon-common/src/main/java/org/apache/paimon/utils/InternalRowPartitionComputer.java index 07746d58dd2d..3211a2e32cd2 100644 --- a/paimon-common/src/main/java/org/apache/paimon/utils/InternalRowPartitionComputer.java +++ b/paimon-common/src/main/java/org/apache/paimon/utils/InternalRowPartitionComputer.java @@ -79,8 +79,8 @@ public static Map convertSpecToInternal( return partValues; } - public static String paritionToString( - RowType partitionType, BinaryRow partition, String delimiter) { + public static String toSimpleString( + RowType partitionType, BinaryRow partition, String delimiter, int maxLength) { InternalRow.FieldGetter[] getters = partitionType.fieldGetters(); StringBuilder builder = new StringBuilder(); for (int i = 0; i < getters.length; i++) { @@ -94,6 +94,7 @@ public static String paritionToString( builder.append(delimiter); } } - return builder.toString(); + String result = builder.toString(); + return result.substring(0, Math.min(result.length(), maxLength)); } } diff --git a/paimon-common/src/test/java/org/apache/paimon/utils/InternalRowPartitionComputerTest.java b/paimon-common/src/test/java/org/apache/paimon/utils/InternalRowPartitionComputerTest.java index fcd79fa4c084..5f57dd6cf72d 100644 --- a/paimon-common/src/test/java/org/apache/paimon/utils/InternalRowPartitionComputerTest.java +++ b/paimon-common/src/test/java/org/apache/paimon/utils/InternalRowPartitionComputerTest.java @@ -35,16 +35,15 @@ public class InternalRowPartitionComputerTest { public void testPartitionToString() { RowType rowType = RowType.of(); BinaryRow binaryRow = new BinaryRow(0); - BinaryRowWriter writer = new BinaryRowWriter(binaryRow); - assertThat(InternalRowPartitionComputer.paritionToString(rowType, binaryRow, "-")) + assertThat(InternalRowPartitionComputer.toSimpleString(rowType, binaryRow, "-", 30)) .isEqualTo(""); rowType = RowType.of(DataTypes.STRING(), DataTypes.INT()); binaryRow = new BinaryRow(2); - writer = new BinaryRowWriter(binaryRow); + BinaryRowWriter writer = new BinaryRowWriter(binaryRow); writer.writeString(0, BinaryString.fromString("20240731")); writer.writeInt(1, 10); - assertThat(InternalRowPartitionComputer.paritionToString(rowType, binaryRow, "-")) + assertThat(InternalRowPartitionComputer.toSimpleString(rowType, binaryRow, "-", 30)) .isEqualTo("20240731-10"); rowType = RowType.of(DataTypes.STRING(), DataTypes.INT()); @@ -52,7 +51,7 @@ public void testPartitionToString() { writer = new BinaryRowWriter(binaryRow); writer.setNullAt(0); writer.writeInt(1, 10); - assertThat(InternalRowPartitionComputer.paritionToString(rowType, binaryRow, "-")) + assertThat(InternalRowPartitionComputer.toSimpleString(rowType, binaryRow, "-", 30)) .isEqualTo("null-10"); } } diff --git a/paimon-core/src/main/java/org/apache/paimon/mergetree/LookupFile.java b/paimon-core/src/main/java/org/apache/paimon/mergetree/LookupFile.java index 4a1b04f52a0f..b3b6fabc49b2 100644 --- a/paimon-core/src/main/java/org/apache/paimon/mergetree/LookupFile.java +++ b/paimon-core/src/main/java/org/apache/paimon/mergetree/LookupFile.java @@ -18,9 +18,11 @@ package org.apache.paimon.mergetree; +import org.apache.paimon.data.BinaryRow; import org.apache.paimon.io.DataFileMeta; import org.apache.paimon.lookup.LookupStoreReader; import org.apache.paimon.options.MemorySize; +import org.apache.paimon.types.RowType; import org.apache.paimon.utils.FileIOUtils; import org.apache.paimon.shade.caffeine2.com.github.benmanes.caffeine.cache.Cache; @@ -37,6 +39,7 @@ import java.time.Duration; import static org.apache.paimon.mergetree.LookupUtils.fileKibiBytes; +import static org.apache.paimon.utils.InternalRowPartitionComputer.toSimpleString; import static org.apache.paimon.utils.Preconditions.checkArgument; /** Lookup file for cache remote file to local. */ @@ -107,14 +110,12 @@ private static void removalCallback(String file, LookupFile lookupFile, RemovalC } public static String localFilePrefix( - String partition, int bucket, String remoteFileName, int length) { - String identifier; - if (partition.isEmpty()) { - identifier = String.format("%s-%s", bucket, remoteFileName); + RowType partitionType, BinaryRow partition, int bucket, String remoteFileName) { + if (partition.getFieldCount() == 0) { + return String.format("%s-%s", bucket, remoteFileName); } else { - identifier = String.format("%s-%s-%s", partition, bucket, remoteFileName); + String partitionString = toSimpleString(partitionType, partition, "-", 20); + return String.format("%s-%s-%s", partitionString, bucket, remoteFileName); } - - return identifier.substring(0, Math.min(identifier.length(), length)); } } diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/KeyValueFileStoreWrite.java b/paimon-core/src/main/java/org/apache/paimon/operation/KeyValueFileStoreWrite.java index 545f20f096db..5f21d3c1aa95 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/KeyValueFileStoreWrite.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/KeyValueFileStoreWrite.java @@ -69,7 +69,6 @@ import org.apache.paimon.utils.CommitIncrement; import org.apache.paimon.utils.FieldsComparator; import org.apache.paimon.utils.FileStorePathFactory; -import org.apache.paimon.utils.InternalRowPartitionComputer; import org.apache.paimon.utils.SnapshotManager; import org.apache.paimon.utils.UserDefinedSeqComparator; @@ -89,6 +88,7 @@ import static org.apache.paimon.CoreOptions.ChangelogProducer.FULL_COMPACTION; import static org.apache.paimon.CoreOptions.MergeEngine.DEDUPLICATE; import static org.apache.paimon.lookup.LookupStoreFactory.bfGenerator; +import static org.apache.paimon.mergetree.LookupFile.localFilePrefix; /** {@link FileStoreWrite} for {@link KeyValueFileStore}. */ public class KeyValueFileStoreWrite extends MemoryFileStoreWrite { @@ -381,12 +381,7 @@ private LookupLevels createLookupLevels( file -> ioManager .createChannel( - LookupFile.localFilePrefix( - InternalRowPartitionComputer.paritionToString( - partitionType, partition, "-"), - bucket, - file, - 100)) + localFilePrefix(partitionType, partition, bucket, file)) .getPathFile(), lookupStoreFactory, bfGenerator(options), diff --git a/paimon-core/src/main/java/org/apache/paimon/table/query/LocalTableQuery.java b/paimon-core/src/main/java/org/apache/paimon/table/query/LocalTableQuery.java index 5f5463a70cd6..d1315a2aa236 100644 --- a/paimon-core/src/main/java/org/apache/paimon/table/query/LocalTableQuery.java +++ b/paimon-core/src/main/java/org/apache/paimon/table/query/LocalTableQuery.java @@ -39,7 +39,6 @@ import org.apache.paimon.options.Options; import org.apache.paimon.table.FileStoreTable; import org.apache.paimon.types.RowType; -import org.apache.paimon.utils.InternalRowPartitionComputer; import org.apache.paimon.utils.KeyComparatorSupplier; import org.apache.paimon.utils.Preconditions; @@ -56,6 +55,7 @@ import static org.apache.paimon.CoreOptions.MergeEngine.DEDUPLICATE; import static org.apache.paimon.lookup.LookupStoreFactory.bfGenerator; +import static org.apache.paimon.mergetree.LookupFile.localFilePrefix; /** Implementation for {@link TableQuery} for caching data and file in local. */ public class LocalTableQuery implements TableQuery { @@ -162,15 +162,8 @@ private void newLookupLevels(BinaryRow partition, int bucket, List file -> Preconditions.checkNotNull(ioManager, "IOManager is required.") .createChannel( - LookupFile.localFilePrefix( - InternalRowPartitionComputer - .paritionToString( - partitionType, - partition, - "-"), - bucket, - file, - 100)) + localFilePrefix( + partitionType, partition, bucket, file)) .getPathFile(), lookupStoreFactory, bfGenerator(options), diff --git a/paimon-core/src/test/java/org/apache/paimon/mergetree/LookupFileTest.java b/paimon-core/src/test/java/org/apache/paimon/mergetree/LookupFileTest.java index 1fcebb33887d..0855afeb7c6b 100644 --- a/paimon-core/src/test/java/org/apache/paimon/mergetree/LookupFileTest.java +++ b/paimon-core/src/test/java/org/apache/paimon/mergetree/LookupFileTest.java @@ -18,8 +18,14 @@ package org.apache.paimon.mergetree; +import org.apache.paimon.types.DataTypes; +import org.apache.paimon.types.RowType; + import org.junit.jupiter.api.Test; +import static org.apache.paimon.data.BinaryRow.EMPTY_ROW; +import static org.apache.paimon.data.BinaryRow.singleColumn; +import static org.apache.paimon.mergetree.LookupFile.localFilePrefix; import static org.assertj.core.api.Assertions.assertThat; /** Test for {@link LookupFile}. */ @@ -27,23 +33,28 @@ public class LookupFileTest { @Test public void testLocalFilePrefix() { + RowType partType = RowType.of(DataTypes.STRING()); assertThat( - LookupFile.localFilePrefix( - "2024073105", + localFilePrefix( + partType, + singleColumn("2024073105"), 10, - "data-ccbb95e7-8b8c-4549-8ca9-f553843d67ad-3.orc", - 100)) + "data-ccbb95e7-8b8c-4549-8ca9-f553843d67ad-3.orc")) .isEqualTo("2024073105-10-data-ccbb95e7-8b8c-4549-8ca9-f553843d67ad-3.orc"); assertThat( - LookupFile.localFilePrefix( - "", 10, "data-ccbb95e7-8b8c-4549-8ca9-f553843d67ad-3.orc", 100)) + localFilePrefix( + RowType.of(), + EMPTY_ROW, + 10, + "data-ccbb95e7-8b8c-4549-8ca9-f553843d67ad-3.orc")) .isEqualTo("10-data-ccbb95e7-8b8c-4549-8ca9-f553843d67ad-3.orc"); assertThat( - LookupFile.localFilePrefix( - "2024073105", + localFilePrefix( + partType, + singleColumn("2024073105-05-2123232313123123123213123"), 10, - "data-ccbb95e7-8b8c-4549-8ca9-f553843d67ad-3.orc", - 20)) - .isEqualTo("2024073105-10-data-c"); + "data-ccbb95e7-8b8c-4549-8ca9-f553843d67ad-3.orc")) + .isEqualTo( + "2024073105-05-212323-10-data-ccbb95e7-8b8c-4549-8ca9-f553843d67ad-3.orc"); } }