Skip to content

Commit

Permalink
[Fix](statistics) Fix partition name NPE and sample for all table dur…
Browse files Browse the repository at this point in the history
…ing auto analyze (apache#28916)

Fix partition name NPE and sample for all table during auto analyze.
Sample for all tables because getData may have latency, which may cause full analyze a huge table and use too much resource. Sample for all tables to avoid this. Will improve the strategy later.
  • Loading branch information
Jibing-Li authored Dec 23, 2023
1 parent 13a3550 commit 5505fa3
Show file tree
Hide file tree
Showing 7 changed files with 11 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1422,7 +1422,7 @@ public void setEnableLeftZigZag(boolean enableLeftZigZag) {
+ "When enable_auto_sample is enabled, tables"
+ "larger than this value will automatically collect "
+ "statistics through sampling"})
public long hugeTableLowerBoundSizeInBytes = 5L * 1024 * 1024 * 1024;
public long hugeTableLowerBoundSizeInBytes = 0;

@VariableMgr.VarAttr(name = HUGE_TABLE_AUTO_ANALYZE_INTERVAL_IN_MILLIS, flag = VariableMgr.GLOBAL,
description = {"控制对大表的自动ANALYZE的最小时间间隔,"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,11 @@ public OlapAnalysisTask(AnalysisInfo info) {

public void doExecute() throws Exception {
Set<String> partitionNames = info.colToPartitions.get(info.colName);
if (partitionNames.isEmpty()) {
if (partitionNames == null || partitionNames.isEmpty()) {
if (partitionNames == null) {
LOG.warn("Table {}.{}.{}, partitionNames for column {} is null. ColToPartitions:[{}]",
info.catalogId, info.dbId, info.tblId, info.colName, info.colToPartitions);
}
StatsId statsId = new StatsId(concatColumnStatsId(), info.catalogId, info.dbId,
info.tblId, info.indexId, info.colName, null);
job.appendBuf(this, Arrays.asList(new ColStatsData(statsId)));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ public class StatisticConstants {
public static final int INSERT_MERGE_ITEM_COUNT = 200;

public static final long HUGE_TABLE_DEFAULT_SAMPLE_ROWS = 4194304;
public static final long HUGE_TABLE_LOWER_BOUND_SIZE_IN_BYTES = 5L * 1024 * 1024 * 1024;
public static final long HUGE_TABLE_LOWER_BOUND_SIZE_IN_BYTES = 0;

public static final long HUGE_TABLE_AUTO_ANALYZE_INTERVAL_IN_MILLIS = TimeUnit.HOURS.toMillis(12);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ protected boolean skip(TableIf table) {

protected void createAnalyzeJobForTbl(DatabaseIf<? extends TableIf> db,
List<AnalysisInfo> analysisInfos, TableIf table) {
AnalysisMethod analysisMethod = table.getDataSize(true) > StatisticsUtil.getHugeTableLowerBoundSizeInBytes()
AnalysisMethod analysisMethod = table.getDataSize(true) >= StatisticsUtil.getHugeTableLowerBoundSizeInBytes()
? AnalysisMethod.SAMPLE : AnalysisMethod.FULL;
AnalysisInfo jobInfo = new AnalysisInfoBuilder()
.setJobId(Env.getCurrentEnv().getNextId())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ public void testAutoSampleSmallTable(@Mocked HMSExternalTable tableIf)
new MockUp<HMSExternalTable>() {
@Mock
public long getDataSize(boolean singleReplica) {
return 1000;
return StatisticsUtil.getHugeTableLowerBoundSizeInBytes() - 1;
}
};
HMSAnalysisTask task = new HMSAnalysisTask();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ public void testSample3(@Mocked OlapTable tbl) {

@Mock
public long getDataSize(boolean singleReplica) {
return 1000;
return StatisticsUtil.getHugeTableLowerBoundSizeInBytes() - 1;
}
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,7 @@ public List<Column> getBaseSchema() {

@Mock
public long getDataSize(boolean singleReplica) {
return 1000;
return StatisticsUtil.getHugeTableLowerBoundSizeInBytes() - 1;
}

@Mock
Expand Down

0 comments on commit 5505fa3

Please sign in to comment.