Skip to content

Commit

Permalink
join order good for rf
Browse files Browse the repository at this point in the history
  • Loading branch information
englefly committed Jul 30, 2024
1 parent 1b17f99 commit 88268d6
Showing 1 changed file with 33 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import org.apache.doris.catalog.KeysType;
import org.apache.doris.catalog.OlapTable;
import org.apache.doris.nereids.PlanContext;
import org.apache.doris.nereids.processor.post.RuntimeFilterGenerator;
import org.apache.doris.nereids.properties.DistributionSpec;
import org.apache.doris.nereids.properties.DistributionSpecGather;
import org.apache.doris.nereids.properties.DistributionSpecHash;
Expand Down Expand Up @@ -55,6 +56,7 @@
import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor;
import org.apache.doris.qe.ConnectContext;
import org.apache.doris.qe.SessionVariable;
import org.apache.doris.statistics.ColumnStatistic;
import org.apache.doris.statistics.Statistics;

import com.google.common.base.Preconditions;
Expand Down Expand Up @@ -371,6 +373,20 @@ public Cost visitPhysicalHashJoin(
leftRowCount += 1e-3;
}
}
if (!physicalHashJoin.getGroupExpression().get().getOwnerGroup().isStatsReliable()
&& !(RuntimeFilterGenerator.DENIED_JOIN_TYPES.contains(physicalHashJoin.getJoinType()))
&& !physicalHashJoin.isMarkJoin()
&& buildStats.getWidthInJoinCluster() == 1) {
// we prefer join order: A-B-filter(C) to A-filter(C)-B,
// since filter(C) may generate effective RF to B, and RF(C->B) makes RF(B->A) more effective.
double filterFactor = computeFilterFactor(buildStats);
if (filterFactor > 1.0) {
double bonus = filterFactor * probeStats.getWidthInJoinCluster();
if (leftRowCount > bonus) {
leftRowCount -= bonus;
}
}
}

/*
pattern1: L join1 (Agg1() join2 Agg2())
Expand Down Expand Up @@ -428,6 +444,23 @@ public Cost visitPhysicalHashJoin(
);
}

private double computeFilterFactor(Statistics stats) {
double factor = 1.0;
double maxBaseTableRowCount = 0.0;
for (Expression expr : stats.columnStatistics().keySet()) {
ColumnStatistic colStats = stats.findColumnStatistics(expr);
if (colStats.isUnKnown) {
maxBaseTableRowCount = Math.max(maxBaseTableRowCount, colStats.count);
} else {
break;
}
}
if (maxBaseTableRowCount != 0) {
factor = Math.max(1, Math.min(2, maxBaseTableRowCount / stats.getRowCount()));
}
return factor;
}

/*
in a join cluster graph, if a node has higher connectivity, it is more likely to be reduced
by runtime filters, and it is also more likely to produce effective runtime filters.
Expand Down

0 comments on commit 88268d6

Please sign in to comment.