From 841539755bc1ab16556c3899737d5b17075e3ee9 Mon Sep 17 00:00:00 2001 From: Jingsong Date: Thu, 12 Dec 2024 13:39:56 +0800 Subject: [PATCH] [core] Rename BulkFormatMapping to FormatReaderMapping --- .../paimon/io/KeyValueFileReaderFactory.java | 18 ++++++------- .../paimon/operation/RawFileSplitRead.java | 26 +++++++++---------- ...tMapping.java => FormatReaderMapping.java} | 20 +++++++------- ...Test.java => FormatReaderMappingTest.java} | 15 ++++++----- 4 files changed, 40 insertions(+), 39 deletions(-) rename paimon-core/src/main/java/org/apache/paimon/utils/{BulkFormatMapping.java => FormatReaderMapping.java} (96%) rename paimon-core/src/test/java/org/apache/paimon/utils/{BulkFormatMappingTest.java => FormatReaderMappingTest.java} (92%) diff --git a/paimon-core/src/main/java/org/apache/paimon/io/KeyValueFileReaderFactory.java b/paimon-core/src/main/java/org/apache/paimon/io/KeyValueFileReaderFactory.java index 7d3acd729c55..e248351b22e2 100644 --- a/paimon-core/src/main/java/org/apache/paimon/io/KeyValueFileReaderFactory.java +++ b/paimon-core/src/main/java/org/apache/paimon/io/KeyValueFileReaderFactory.java @@ -40,9 +40,9 @@ import org.apache.paimon.types.DataField; import org.apache.paimon.types.RowType; import org.apache.paimon.utils.AsyncRecordReader; -import org.apache.paimon.utils.BulkFormatMapping; -import org.apache.paimon.utils.BulkFormatMapping.BulkFormatMappingBuilder; import org.apache.paimon.utils.FileStorePathFactory; +import org.apache.paimon.utils.FormatReaderMapping; +import org.apache.paimon.utils.FormatReaderMapping.BulkFormatMappingBuilder; import javax.annotation.Nullable; @@ -68,7 +68,7 @@ public class KeyValueFileReaderFactory implements FileReaderFactory { private final DataFilePathFactory pathFactory; private final long asyncThreshold; - private final Map bulkFormatMappings; + private final Map bulkFormatMappings; private final BinaryRow partition; private final DeletionVector.Factory dvFactory; @@ -120,14 +120,14 @@ private FileRecordReader createRecordReader( throws IOException { String formatIdentifier = DataFilePathFactory.formatIdentifier(fileName); - Supplier formatSupplier = + Supplier formatSupplier = () -> bulkFormatMappingBuilder.build( formatIdentifier, schema, schemaId == schema.id() ? schema : schemaManager.schema(schemaId)); - BulkFormatMapping bulkFormatMapping = + FormatReaderMapping formatReaderMapping = reuseFormat ? bulkFormatMappings.computeIfAbsent( new FormatKey(schemaId, formatIdentifier), @@ -137,14 +137,14 @@ private FileRecordReader createRecordReader( FileRecordReader fileRecordReader = new DataFileRecordReader( - bulkFormatMapping.getReaderFactory(), + formatReaderMapping.getReaderFactory(), orcPoolSize == null ? new FormatReaderContext(fileIO, filePath, fileSize) : new OrcFormatReaderContext( fileIO, filePath, fileSize, orcPoolSize), - bulkFormatMapping.getIndexMapping(), - bulkFormatMapping.getCastMapping(), - PartitionUtils.create(bulkFormatMapping.getPartitionPair(), partition)); + formatReaderMapping.getIndexMapping(), + formatReaderMapping.getCastMapping(), + PartitionUtils.create(formatReaderMapping.getPartitionPair(), partition)); Optional deletionVector = dvFactory.create(fileName); if (deletionVector.isPresent() && !deletionVector.get().isEmpty()) { diff --git a/paimon-core/src/main/java/org/apache/paimon/operation/RawFileSplitRead.java b/paimon-core/src/main/java/org/apache/paimon/operation/RawFileSplitRead.java index 46977457c4be..af92e4463662 100644 --- a/paimon-core/src/main/java/org/apache/paimon/operation/RawFileSplitRead.java +++ b/paimon-core/src/main/java/org/apache/paimon/operation/RawFileSplitRead.java @@ -46,9 +46,9 @@ import org.apache.paimon.table.source.DataSplit; import org.apache.paimon.types.DataField; import org.apache.paimon.types.RowType; -import org.apache.paimon.utils.BulkFormatMapping; -import org.apache.paimon.utils.BulkFormatMapping.BulkFormatMappingBuilder; import org.apache.paimon.utils.FileStorePathFactory; +import org.apache.paimon.utils.FormatReaderMapping; +import org.apache.paimon.utils.FormatReaderMapping.BulkFormatMappingBuilder; import org.apache.paimon.utils.IOExceptionSupplier; import org.slf4j.Logger; @@ -75,7 +75,7 @@ public class RawFileSplitRead implements SplitRead { private final TableSchema schema; private final FileFormatDiscover formatDiscover; private final FileStorePathFactory pathFactory; - private final Map bulkFormatMappings; + private final Map bulkFormatMappings; private final boolean fileIndexReadEnabled; private RowType readRowType; @@ -159,7 +159,7 @@ public RecordReader createReader( String formatIdentifier = DataFilePathFactory.formatIdentifier(file.fileName()); long schemaId = file.schemaId(); - Supplier formatSupplier = + Supplier formatSupplier = () -> bulkFormatMappingBuilder.build( formatIdentifier, @@ -168,7 +168,7 @@ public RecordReader createReader( ? schema : schemaManager.schema(schemaId)); - BulkFormatMapping bulkFormatMapping = + FormatReaderMapping formatReaderMapping = bulkFormatMappings.computeIfAbsent( new FormatKey(file.schemaId(), formatIdentifier), key -> formatSupplier.get()); @@ -181,7 +181,7 @@ public RecordReader createReader( partition, file, dataFilePathFactory, - bulkFormatMapping, + formatReaderMapping, dvFactory)); } @@ -192,7 +192,7 @@ private FileRecordReader createFileReader( BinaryRow partition, DataFileMeta file, DataFilePathFactory dataFilePathFactory, - BulkFormatMapping bulkFormatMapping, + FormatReaderMapping formatReaderMapping, IOExceptionSupplier dvFactory) throws IOException { FileIndexResult fileIndexResult = null; @@ -200,8 +200,8 @@ private FileRecordReader createFileReader( fileIndexResult = FileIndexEvaluator.evaluate( fileIO, - bulkFormatMapping.getDataSchema(), - bulkFormatMapping.getDataFilters(), + formatReaderMapping.getDataSchema(), + formatReaderMapping.getDataFilters(), dataFilePathFactory, file); if (!fileIndexResult.remain()) { @@ -217,11 +217,11 @@ private FileRecordReader createFileReader( fileIndexResult); FileRecordReader fileRecordReader = new DataFileRecordReader( - bulkFormatMapping.getReaderFactory(), + formatReaderMapping.getReaderFactory(), formatReaderContext, - bulkFormatMapping.getIndexMapping(), - bulkFormatMapping.getCastMapping(), - PartitionUtils.create(bulkFormatMapping.getPartitionPair(), partition)); + formatReaderMapping.getIndexMapping(), + formatReaderMapping.getCastMapping(), + PartitionUtils.create(formatReaderMapping.getPartitionPair(), partition)); if (fileIndexResult instanceof BitmapIndexResult) { fileRecordReader = diff --git a/paimon-core/src/main/java/org/apache/paimon/utils/BulkFormatMapping.java b/paimon-core/src/main/java/org/apache/paimon/utils/FormatReaderMapping.java similarity index 96% rename from paimon-core/src/main/java/org/apache/paimon/utils/BulkFormatMapping.java rename to paimon-core/src/main/java/org/apache/paimon/utils/FormatReaderMapping.java index 58ef924df178..60fc51806877 100644 --- a/paimon-core/src/main/java/org/apache/paimon/utils/BulkFormatMapping.java +++ b/paimon-core/src/main/java/org/apache/paimon/utils/FormatReaderMapping.java @@ -45,8 +45,8 @@ import static org.apache.paimon.predicate.PredicateBuilder.excludePredicateWithFields; import static org.apache.paimon.table.SpecialFields.KEY_FIELD_ID_START; -/** Class with index mapping and bulk format. */ -public class BulkFormatMapping { +/** Class with index mapping and format reader. */ +public class FormatReaderMapping { // Index mapping from data schema fields to table schema fields, this is used to realize paimon // schema evolution. And it combines trimeedKeyMapping, which maps key fields to the value @@ -56,21 +56,21 @@ public class BulkFormatMapping { @Nullable private final CastFieldGetter[] castMapping; // partition fields mapping, add partition fields to the read fields @Nullable private final Pair partitionPair; - private final FormatReaderFactory bulkFormat; + private final FormatReaderFactory readerFactory; private final TableSchema dataSchema; private final List dataFilters; - public BulkFormatMapping( + public FormatReaderMapping( @Nullable int[] indexMapping, @Nullable CastFieldGetter[] castMapping, @Nullable int[] trimmedKeyMapping, @Nullable Pair partitionPair, - FormatReaderFactory bulkFormat, + FormatReaderFactory readerFactory, TableSchema dataSchema, List dataFilters) { this.indexMapping = combine(indexMapping, trimmedKeyMapping); this.castMapping = castMapping; - this.bulkFormat = bulkFormat; + this.readerFactory = readerFactory; this.partitionPair = partitionPair; this.dataSchema = dataSchema; this.dataFilters = dataFilters; @@ -112,7 +112,7 @@ public Pair getPartitionPair() { } public FormatReaderFactory getReaderFactory() { - return bulkFormat; + return readerFactory; } public TableSchema getDataSchema() { @@ -123,7 +123,7 @@ public List getDataFilters() { return dataFilters; } - /** Builder for {@link BulkFormatMapping}. */ + /** Builder for {@link FormatReaderMapping}. */ public static class BulkFormatMappingBuilder { private final FileFormatDiscover formatDiscover; @@ -160,7 +160,7 @@ public BulkFormatMappingBuilder( * fields. We generate the partitionMappingAndFieldsWithoutPartitionPair which helps reduce * the real read fields and tell us how to map it back. */ - public BulkFormatMapping build( + public FormatReaderMapping build( String formatIdentifier, TableSchema tableSchema, TableSchema dataSchema) { // extract the whole data fields in logic. @@ -187,7 +187,7 @@ public BulkFormatMapping build( // build read filters List readFilters = readFilters(filters, tableSchema, dataSchema); - return new BulkFormatMapping( + return new FormatReaderMapping( indexCastMapping.getIndexMapping(), indexCastMapping.getCastMapping(), trimmedKeyPair.getLeft(), diff --git a/paimon-core/src/test/java/org/apache/paimon/utils/BulkFormatMappingTest.java b/paimon-core/src/test/java/org/apache/paimon/utils/FormatReaderMappingTest.java similarity index 92% rename from paimon-core/src/test/java/org/apache/paimon/utils/BulkFormatMappingTest.java rename to paimon-core/src/test/java/org/apache/paimon/utils/FormatReaderMappingTest.java index 4d5d6e32e85d..f2fab510a10a 100644 --- a/paimon-core/src/test/java/org/apache/paimon/utils/BulkFormatMappingTest.java +++ b/paimon-core/src/test/java/org/apache/paimon/utils/FormatReaderMappingTest.java @@ -31,8 +31,8 @@ import java.util.ArrayList; import java.util.List; -/** Test for {@link BulkFormatMapping.BulkFormatMappingBuilder}. */ -public class BulkFormatMappingTest { +/** Test for {@link FormatReaderMapping.BulkFormatMappingBuilder}. */ +public class FormatReaderMappingTest { @Test public void testTrimKeyFields() { @@ -80,7 +80,7 @@ public void testTrimKeyFields() { testFields.add(new DataField(6, String.valueOf(6), DataTypes.STRING())); Pair res = - BulkFormatMapping.BulkFormatMappingBuilder.trimKeyFields(testFields, allFields); + FormatReaderMapping.BulkFormatMappingBuilder.trimKeyFields(testFields, allFields); Assertions.assertThat(res.getKey()).containsExactly(0, 1, 2, 3, 1, 4, 2, 0, 5); @@ -124,11 +124,11 @@ public void testTrimKeyWithIndexMapping() { // map from key fields reading to value fields reading Pair trimmedKeyPair = - BulkFormatMapping.BulkFormatMappingBuilder.trimKeyFields( + FormatReaderMapping.BulkFormatMappingBuilder.trimKeyFields( readDataFields, readDataFields); - BulkFormatMapping bulkFormatMapping = - new BulkFormatMapping( + FormatReaderMapping formatReaderMapping = + new FormatReaderMapping( indexCastMapping.getIndexMapping(), indexCastMapping.getCastMapping(), trimmedKeyPair.getLeft(), @@ -137,7 +137,8 @@ public void testTrimKeyWithIndexMapping() { null, null); - Assertions.assertThat(bulkFormatMapping.getIndexMapping()).containsExactly(0, 1, 0, -1, 2); + Assertions.assertThat(formatReaderMapping.getIndexMapping()) + .containsExactly(0, 1, 0, -1, 2); List trimmed = trimmedKeyPair.getRight().getFields(); Assertions.assertThat(trimmed.get(0).id()).isEqualTo(1); Assertions.assertThat(trimmed.get(1).id()).isEqualTo(0);