Skip to content

Commit

Permalink
table level row count
Browse files Browse the repository at this point in the history
  • Loading branch information
morningman committed Dec 8, 2024
1 parent b955b47 commit 1adc1f7
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,13 @@ public class IcebergScanNode extends FileQueryScanNode {
private IcebergSource source;
private Table icebergTable;
private List<String> pushdownIcebergPredicates = Lists.newArrayList();
private boolean pushDownCount = false;
// If tableLevelPushDownCount is true, means we can do count push down opt at table level.
// which means all splits have no position/equality delete files,
// so for query like "select count(*) from ice_tbl", we can get count from snapshot row count info directly.
// If tableLevelPushDownCount is false, means we can't do count push down opt at table level,
// But for part of splits which have no position/equality delete files, we can still do count push down opt.
// And for split level count push down opt, the flag is set in each split.
private boolean tableLevelPushDownCount = false;
private static final long COUNT_WITH_PARALLEL_SPLITS = 10000;

/**
Expand Down Expand Up @@ -140,8 +146,8 @@ private void setIcebergParams(TFileRangeDesc rangeDesc, IcebergSplit icebergSpli
int formatVersion = icebergSplit.getFormatVersion();
fileDesc.setFormatVersion(formatVersion);
fileDesc.setOriginalFilePath(icebergSplit.getOriginalPath());
if (pushDownCount) {
fileDesc.setRowCount(icebergSplit.getRowCount());
if (tableLevelPushDownCount) {
fileDesc.setRowCount(icebergSplit.getTableLevelRowCount());
}
if (formatVersion < MIN_DELETE_FILE_SUPPORT_VERSION) {
fileDesc.setContent(FileContent.DATA.id());
Expand Down Expand Up @@ -271,7 +277,7 @@ private List<Split> doGetSplits(int numBackends) throws UserException {
}
long countFromSnapshot = getCountFromSnapshot();
if (countFromSnapshot >= 0) {
pushDownCount = true;
tableLevelPushDownCount = true;
List<Split> pushDownCountSplits;
if (countFromSnapshot > COUNT_WITH_PARALLEL_SPLITS) {
int minSplits = sessionVariable.getParallelExecInstanceNum() * numBackends;
Expand Down Expand Up @@ -424,8 +430,8 @@ private void assignCountToSplits(List<Split> splits, long totalCount) {
int size = splits.size();
long countPerSplit = totalCount / size;
for (int i = 0; i < size - 1; i++) {
((IcebergSplit) splits.get(i)).setRowCount(countPerSplit);
((IcebergSplit) splits.get(i)).setTableLevelRowCount(countPerSplit);
}
((IcebergSplit) splits.get(size - 1)).setRowCount(countPerSplit + totalCount % size);
((IcebergSplit) splits.get(size - 1)).setTableLevelRowCount(countPerSplit + totalCount % size);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ public class IcebergSplit extends FileSplit {
private Integer formatVersion;
private List<IcebergDeleteFileFilter> deleteFileFilters;
private Map<String, String> config;
private long rowCount = -1;
// tableLevelRowCount will be set only table-level count push down opt is available.
private long tableLevelRowCount = -1;

// File path will be changed if the file is modified, so there's no need to get modification time.
public IcebergSplit(LocationPath file, long start, long length, long fileLength, String[] hosts,
Expand All @@ -50,14 +51,6 @@ public IcebergSplit(LocationPath file, long start, long length, long fileLength,
this.selfSplitWeight = length;
}

public long getRowCount() {
return rowCount;
}

public void setRowCount(long rowCount) {
this.rowCount = rowCount;
}

public void setDeleteFileFilters(List<IcebergDeleteFileFilter> deleteFileFilters) {
this.deleteFileFilters = deleteFileFilters;
this.selfSplitWeight += deleteFileFilters.stream().mapToLong(IcebergDeleteFileFilter::getFilesize).sum();
Expand Down

0 comments on commit 1adc1f7

Please sign in to comment.