From 7400979e0a3988fc72298c1bbcf762921c307e78 Mon Sep 17 00:00:00 2001 From: Jingsong Date: Tue, 10 Dec 2024 16:00:42 +0800 Subject: [PATCH] [orc] Row group filter push down cannot work with bitmap index --- .../apache/paimon/format/FormatReaderContext.java | 1 + .../apache/paimon/format/FormatReaderFactory.java | 3 +++ .../java/org/apache/orc/impl/RecordReaderImpl.java | 4 +++- .../apache/paimon/format/orc/OrcReaderFactory.java | 13 ++++++++----- 4 files changed, 15 insertions(+), 6 deletions(-) diff --git a/paimon-common/src/main/java/org/apache/paimon/format/FormatReaderContext.java b/paimon-common/src/main/java/org/apache/paimon/format/FormatReaderContext.java index 0d3dd7c79ff3..cae6a977e615 100644 --- a/paimon-common/src/main/java/org/apache/paimon/format/FormatReaderContext.java +++ b/paimon-common/src/main/java/org/apache/paimon/format/FormatReaderContext.java @@ -60,6 +60,7 @@ public long fileSize() { return fileSize; } + @Nullable @Override public FileIndexResult fileIndex() { return fileIndexResult; diff --git a/paimon-common/src/main/java/org/apache/paimon/format/FormatReaderFactory.java b/paimon-common/src/main/java/org/apache/paimon/format/FormatReaderFactory.java index d8af3e2fe37c..5ef084ec4d34 100644 --- a/paimon-common/src/main/java/org/apache/paimon/format/FormatReaderFactory.java +++ b/paimon-common/src/main/java/org/apache/paimon/format/FormatReaderFactory.java @@ -25,6 +25,8 @@ import org.apache.paimon.reader.FileRecordReader; import org.apache.paimon.reader.RecordReader; +import javax.annotation.Nullable; + import java.io.IOException; /** A factory to create {@link RecordReader} for file. */ @@ -41,6 +43,7 @@ interface Context { long fileSize(); + @Nullable FileIndexResult fileIndex(); } } diff --git a/paimon-format/src/main/java/org/apache/orc/impl/RecordReaderImpl.java b/paimon-format/src/main/java/org/apache/orc/impl/RecordReaderImpl.java index 6c3af4e50043..93aa0719caea 100644 --- a/paimon-format/src/main/java/org/apache/orc/impl/RecordReaderImpl.java +++ b/paimon-format/src/main/java/org/apache/orc/impl/RecordReaderImpl.java @@ -62,6 +62,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import javax.annotation.Nullable; + import java.io.IOException; import java.math.BigDecimal; import java.sql.Timestamp; @@ -127,7 +129,7 @@ public class RecordReaderImpl implements RecordReader { private final boolean noSelectedVector; // identifies whether the file has bad bloom filters that we should not use. private final boolean skipBloomFilters; - private final FileIndexResult fileIndexResult; + @Nullable private final FileIndexResult fileIndexResult; static final String[] BAD_CPP_BLOOM_FILTER_VERSIONS = { "1.6.0", "1.6.1", "1.6.2", "1.6.3", "1.6.4", "1.6.5", "1.6.6", "1.6.7", "1.6.8", "1.6.9", "1.6.10", "1.6.11", "1.7.0" diff --git a/paimon-format/src/main/java/org/apache/paimon/format/orc/OrcReaderFactory.java b/paimon-format/src/main/java/org/apache/paimon/format/orc/OrcReaderFactory.java index ee0f8a55c034..db17357bfd70 100644 --- a/paimon-format/src/main/java/org/apache/paimon/format/orc/OrcReaderFactory.java +++ b/paimon-format/src/main/java/org/apache/paimon/format/orc/OrcReaderFactory.java @@ -24,6 +24,7 @@ import org.apache.paimon.data.columnar.ColumnarRowIterator; import org.apache.paimon.data.columnar.VectorizedColumnBatch; import org.apache.paimon.fileindex.FileIndexResult; +import org.apache.paimon.fileindex.bitmap.BitmapIndexResult; import org.apache.paimon.format.FormatReaderFactory; import org.apache.paimon.format.OrcFormatReaderContext; import org.apache.paimon.format.fs.HadoopReadOnlyFileSystem; @@ -258,7 +259,7 @@ private static RecordReader createRecordReader( org.apache.paimon.fs.Path path, long splitStart, long splitLength, - FileIndexResult fileIndexResult, + @Nullable FileIndexResult fileIndexResult, boolean deletionVectorsEnabled) throws IOException { org.apache.orc.Reader orcReader = createReader(conf, fileIO, path, fileIndexResult); @@ -276,9 +277,11 @@ private static RecordReader createRecordReader( .skipCorruptRecords(OrcConf.SKIP_CORRUPT_DATA.getBoolean(conf)) .tolerateMissingSchema( OrcConf.TOLERATE_MISSING_SCHEMA.getBoolean(conf)); - if (!conjunctPredicates.isEmpty() && !deletionVectorsEnabled) { - // deletion vectors can not enable this feature, cased by getRowNumber would be - // changed. + if (!conjunctPredicates.isEmpty() + && !deletionVectorsEnabled + && !(fileIndexResult instanceof BitmapIndexResult)) { + // row group filter push down will make row number change incorrect + // so deletion vectors mode and bitmap index cannot work with row group push down options.useSelected(OrcConf.READER_USE_SELECTED.getBoolean(conf)); options.allowSARGToFilter(OrcConf.ALLOW_SARG_TO_FILTER.getBoolean(conf)); } @@ -342,7 +345,7 @@ public static org.apache.orc.Reader createReader( org.apache.hadoop.conf.Configuration conf, FileIO fileIO, org.apache.paimon.fs.Path path, - FileIndexResult fileIndexResult) + @Nullable FileIndexResult fileIndexResult) throws IOException { // open ORC file and create reader org.apache.hadoop.fs.Path hPath = new org.apache.hadoop.fs.Path(path.toUri());