Skip to content

Commit

Permalink
[orc] Row group filter push down cannot work with bitmap index
Browse files Browse the repository at this point in the history
  • Loading branch information
JingsongLi committed Dec 10, 2024
1 parent f8c33c5 commit 7400979
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ public long fileSize() {
return fileSize;
}

@Nullable
@Override
public FileIndexResult fileIndex() {
return fileIndexResult;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
import org.apache.paimon.reader.FileRecordReader;
import org.apache.paimon.reader.RecordReader;

import javax.annotation.Nullable;

import java.io.IOException;

/** A factory to create {@link RecordReader} for file. */
Expand All @@ -41,6 +43,7 @@ interface Context {

long fileSize();

@Nullable
FileIndexResult fileIndex();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.annotation.Nullable;

import java.io.IOException;
import java.math.BigDecimal;
import java.sql.Timestamp;
Expand Down Expand Up @@ -127,7 +129,7 @@ public class RecordReaderImpl implements RecordReader {
private final boolean noSelectedVector;
// identifies whether the file has bad bloom filters that we should not use.
private final boolean skipBloomFilters;
private final FileIndexResult fileIndexResult;
@Nullable private final FileIndexResult fileIndexResult;
static final String[] BAD_CPP_BLOOM_FILTER_VERSIONS = {
"1.6.0", "1.6.1", "1.6.2", "1.6.3", "1.6.4", "1.6.5", "1.6.6", "1.6.7", "1.6.8", "1.6.9",
"1.6.10", "1.6.11", "1.7.0"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import org.apache.paimon.data.columnar.ColumnarRowIterator;
import org.apache.paimon.data.columnar.VectorizedColumnBatch;
import org.apache.paimon.fileindex.FileIndexResult;
import org.apache.paimon.fileindex.bitmap.BitmapIndexResult;
import org.apache.paimon.format.FormatReaderFactory;
import org.apache.paimon.format.OrcFormatReaderContext;
import org.apache.paimon.format.fs.HadoopReadOnlyFileSystem;
Expand Down Expand Up @@ -258,7 +259,7 @@ private static RecordReader createRecordReader(
org.apache.paimon.fs.Path path,
long splitStart,
long splitLength,
FileIndexResult fileIndexResult,
@Nullable FileIndexResult fileIndexResult,
boolean deletionVectorsEnabled)
throws IOException {
org.apache.orc.Reader orcReader = createReader(conf, fileIO, path, fileIndexResult);
Expand All @@ -276,9 +277,11 @@ private static RecordReader createRecordReader(
.skipCorruptRecords(OrcConf.SKIP_CORRUPT_DATA.getBoolean(conf))
.tolerateMissingSchema(
OrcConf.TOLERATE_MISSING_SCHEMA.getBoolean(conf));
if (!conjunctPredicates.isEmpty() && !deletionVectorsEnabled) {
// deletion vectors can not enable this feature, cased by getRowNumber would be
// changed.
if (!conjunctPredicates.isEmpty()
&& !deletionVectorsEnabled
&& !(fileIndexResult instanceof BitmapIndexResult)) {
// row group filter push down will make row number change incorrect
// so deletion vectors mode and bitmap index cannot work with row group push down
options.useSelected(OrcConf.READER_USE_SELECTED.getBoolean(conf));
options.allowSARGToFilter(OrcConf.ALLOW_SARG_TO_FILTER.getBoolean(conf));
}
Expand Down Expand Up @@ -342,7 +345,7 @@ public static org.apache.orc.Reader createReader(
org.apache.hadoop.conf.Configuration conf,
FileIO fileIO,
org.apache.paimon.fs.Path path,
FileIndexResult fileIndexResult)
@Nullable FileIndexResult fileIndexResult)
throws IOException {
// open ORC file and create reader
org.apache.hadoop.fs.Path hPath = new org.apache.hadoop.fs.Path(path.toUri());
Expand Down

0 comments on commit 7400979

Please sign in to comment.