Skip to content

Commit

Permalink
[improvement](statistics)Support show column partition update rows in…
Browse files Browse the repository at this point in the history
…fo. (apache#37124)

Support show column update rows of each partition. This may help when
investigate online issues.
```
mysql> show table stats part2 partition(p1, p2) (id, colint);
+------------+-------------+----------------+--------------+
| index_name | column_name | partition_name | updated_rows |
+------------+-------------+----------------+--------------+
| part2      | id          | p1             | 6            |
| part2      | id          | p2             | 6            |
| part2      | colint      | p1             | 6            |
| part2      | colint      | p2             | 6            |
+------------+-------------+----------------+--------------+
4 rows in set (0.01 sec)
```
  • Loading branch information
Jibing-Li authored Jul 2, 2024
1 parent 2469f1f commit 80f6583
Show file tree
Hide file tree
Showing 3 changed files with 116 additions and 7 deletions.
4 changes: 2 additions & 2 deletions fe/fe-core/src/main/cup/sql_parser.cup
Original file line number Diff line number Diff line change
Expand Up @@ -4597,9 +4597,9 @@ show_param ::=
RESULT = new ShowSyncJobStmt(dbName);
:}
/* show table stats */
| KW_TABLE opt_cached:cached KW_STATS table_name:tbl opt_partition_names:partitionNames
| KW_TABLE opt_cached:cached KW_STATS table_name:tbl opt_partition_names:partitionNames opt_col_list:cols
{:
RESULT = new ShowTableStatsStmt(tbl, partitionNames, cached);
RESULT = new ShowTableStatsStmt(tbl, cols, partitionNames, cached);
:}
/* show column stats */
| KW_COLUMN opt_cached:cached KW_STATS table_name:tbl opt_col_list:cols opt_partition_names:partitionNames
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,14 @@
import org.apache.doris.catalog.TableIf;
import org.apache.doris.common.ErrorCode;
import org.apache.doris.common.ErrorReport;
import org.apache.doris.common.Pair;
import org.apache.doris.common.UserException;
import org.apache.doris.datasource.CatalogIf;
import org.apache.doris.mysql.privilege.PrivPredicate;
import org.apache.doris.qe.ConnectContext;
import org.apache.doris.qe.ShowResultSet;
import org.apache.doris.qe.ShowResultSetMetaData;
import org.apache.doris.statistics.ColStatsMeta;
import org.apache.doris.statistics.TableStatsMeta;

import com.google.common.collect.ImmutableList;
Expand All @@ -42,7 +44,10 @@
import java.time.format.DateTimeFormatter;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

public class ShowTableStatsStmt extends ShowStmt {

Expand All @@ -65,15 +70,25 @@ public class ShowTableStatsStmt extends ShowStmt {
.add("row_count")
.build();

private final TableName tableName;
private static final ImmutableList<String> COLUMN_PARTITION_TITLE_NAMES =
new ImmutableList.Builder<String>()
.add("index_name")
.add("column_name")
.add("partition_name")
.add("updated_rows")
.build();

private final TableName tableName;
private final List<String> columnNames;
private final PartitionNames partitionNames;
private final boolean cached;

private TableIf table;

public ShowTableStatsStmt(TableName tableName, PartitionNames partitionNames, boolean cached) {
public ShowTableStatsStmt(TableName tableName, List<String> columnNames,
PartitionNames partitionNames, boolean cached) {
this.tableName = tableName;
this.columnNames = columnNames;
this.partitionNames = partitionNames;
this.cached = cached;
}
Expand All @@ -89,6 +104,9 @@ public void analyze(Analyzer analyzer) throws UserException {
if (partitionNames != null) {
partitionNames.analyze(analyzer);
}
if (columnNames != null && partitionNames == null) {
ErrorReport.reportAnalysisException(String.format("Must specify partitions when columns are specified."));
}
CatalogIf<DatabaseIf> catalog = Env.getCurrentEnv().getCatalogMgr().getCatalog(tableName.getCtl());
if (catalog == null) {
ErrorReport.reportAnalysisException(String.format("Catalog: %s not exists", tableName.getCtl()));
Expand Down Expand Up @@ -122,7 +140,15 @@ public void analyze(Analyzer analyzer) throws UserException {
public ShowResultSetMetaData getMetaData() {
ShowResultSetMetaData.Builder builder = ShowResultSetMetaData.builder();

ImmutableList<String> titles = partitionNames == null ? TABLE_TITLE_NAMES : PARTITION_TITLE_NAMES;
ImmutableList<String> titles;
// If columnNames != null, partitionNames is also not null. Guaranteed in analyze()
if (columnNames != null) {
titles = COLUMN_PARTITION_TITLE_NAMES;
} else if (partitionNames != null) {
titles = PARTITION_TITLE_NAMES;
} else {
titles = TABLE_TITLE_NAMES;
}
for (String title : titles) {
builder.addColumn(new Column(title, ScalarType.createVarchar(30)));
}
Expand All @@ -136,8 +162,11 @@ public TableIf getTable() {
public ShowResultSet constructResultSet(TableStatsMeta tableStatistic) {
if (partitionNames == null) {
return constructTableResultSet(tableStatistic);
} else {
}
if (columnNames == null) {
return constructPartitionResultSet(tableStatistic);
} else {
return constructColumnPartitionResultSet(tableStatistic);
}
}

Expand Down Expand Up @@ -209,6 +238,43 @@ public ShowResultSet constructPartitionResultSet(TableStatsMeta tableStatistic)
return new ShowResultSet(getMetaData(), result);
}

public ShowResultSet constructColumnPartitionResultSet(TableStatsMeta tableStatistic) {
List<List<String>> result = Lists.newArrayList();
if (!(table instanceof OlapTable)) {
return new ShowResultSet(getMetaData(), result);
}
OlapTable olapTable = (OlapTable) table;
Collection<String> partitions = partitionNames.isStar()
? table.getPartitionNames()
: partitionNames.getPartitionNames();
if (partitions.size() > 100) {
throw new RuntimeException("Too many partitions, show at most 100 partitions each time.");
}
Set<Pair<String, String>> columnIndexPairs = olapTable.getColumnIndexPairs(new HashSet<>(columnNames));
for (Pair<String, String> pair : columnIndexPairs) {
ColStatsMeta columnStatsMeta = tableStatistic.findColumnStatsMeta(pair.first, pair.second);
if (columnStatsMeta != null && columnStatsMeta.partitionUpdateRows != null) {
for (Map.Entry<Long, Long> entry : columnStatsMeta.partitionUpdateRows.entrySet()) {
Partition partition = olapTable.getPartition(entry.getKey());
if (partition != null && !partitions.contains(partition.getName())) {
continue;
}
List<String> row = Lists.newArrayList();
row.add(pair.first);
row.add(pair.second);
if (partition == null) {
row.add("Partition " + entry.getKey() + " Not Exist");
} else {
row.add(partition.getName());
}
row.add(String.valueOf(entry.getValue()));
result.add(row);
}
}
}
return new ShowResultSet(getMetaData(), result);
}

public boolean isCached() {
return cached;
}
Expand Down
45 changes: 44 additions & 1 deletion regression-test/suites/statistics/test_partition_stats.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ suite("test_partition_stats") {
result = sql """select * from internal.__internal_schema.partition_statistics where tbl_id = ${tblIdPart1}"""
assertEquals(0, result.size())

// Test analyze table after drop partition
// Test analyze table after drop partition, test show table column stats
sql """drop database if exists test_partition_stats"""
sql """create database test_partition_stats"""
sql """use test_partition_stats"""
Expand Down Expand Up @@ -234,9 +234,52 @@ suite("test_partition_stats") {
"replication_allocation" = "tag.location.default: 1"
)
"""
result = sql """show table stats part2 partition(*) (id)"""
assertEquals(0, result.size())
result = sql """show table stats part2 partition(*) (colint, coltinyint, colsmallint, colbigint, collargeint, colfloat, coldouble, coldecimal)"""
assertEquals(0, result.size())
sql """analyze table part2 with sync;"""
result = sql """show table stats part2 partition(*) (id)"""
assertEquals(3, result.size())
result = sql """show table stats part2 partition(*) (colint, coltinyint, colsmallint, colbigint, collargeint, colfloat, coldouble, coldecimal)"""
assertEquals(24, result.size())
result = sql """show table stats part2 partition(p1, p2) (id, colint)"""
assertEquals(4, result.size())
result = sql """show table stats part2 partition(p1) (id)"""
assertEquals(1, result.size())
assertEquals("part2", result[0][0])
assertEquals("id", result[0][1])
assertEquals("p1", result[0][2])
assertEquals("0", result[0][3])

sql """Insert into part2 values (1, 1, 1, 1, 1, 1, 1.1, 1.1, 1.1), (2, 2, 2, 2, 2, 2, 2.2, 2.2, 2.2), (3, 3, 3, 3, 3, 3, 3.3, 3.3, 3.3),(4, 4, 4, 4, 4, 4, 4.4, 4.4, 4.4),(5, 5, 5, 5, 5, 5, 5.5, 5.5, 5.5),(6, 6, 6, 6, 6, 6, 6.6, 6.6, 6.6),(10001, 10001, 10001, 10001, 10001, 10001, 10001.10001, 10001.10001, 10001.10001),(10002, 10002, 10002, 10002, 10002, 10002, 10002.10002, 10002.10002, 10002.10002),(10003, 10003, 10003, 10003, 10003, 10003, 10003.10003, 10003.10003, 10003.10003),(10004, 10004, 10004, 10004, 10004, 10004, 10004.10004, 10004.10004, 10004.10004),(10005, 10005, 10005, 10005, 10005, 10005, 10005.10005, 10005.10005, 10005.10005),(10006, 10006, 10006, 10006, 10006, 10006, 10006.10006, 10006.10006, 10006.10006),(20001, 20001, 20001, 20001, 20001, 20001, 20001.20001, 20001.20001, 20001.20001),(20002, 20002, 20002, 20002, 20002, 20002, 20002.20002, 20002.20002, 20002.20002),(20003, 20003, 20003, 20003, 20003, 20003, 20003.20003, 20003.20003, 20003.20003),(20004, 20004, 20004, 20004, 20004, 20004, 20004.20004, 20004.20004, 20004.20004),(20005, 20005, 20005, 20005, 20005, 20005, 20005.20005, 20005.20005, 20005.20005),(20006, 20006, 20006, 20006, 20006, 20006, 20006.20006, 20006.20006, 20006.20006)"""
result = sql """show table stats part2 partition(*) (id)"""
assertEquals(3, result.size())
result = sql """show table stats part2 partition(*) (colint, coltinyint, colsmallint, colbigint, collargeint, colfloat, coldouble, coldecimal)"""
assertEquals(24, result.size())
result = sql """show table stats part2 partition(p1, p2) (id, colint)"""
assertEquals(4, result.size())
result = sql """show table stats part2 partition(p1) (id)"""
assertEquals(1, result.size())
assertEquals("part2", result[0][0])
assertEquals("id", result[0][1])
assertEquals("p1", result[0][2])
assertEquals("0", result[0][3])

sql """analyze table part2 with sync;"""
result = sql """show table stats part2 partition(*) (id)"""
assertEquals(3, result.size())
result = sql """show table stats part2 partition(*) (colint, coltinyint, colsmallint, colbigint, collargeint, colfloat, coldouble, coldecimal)"""
assertEquals(24, result.size())
result = sql """show table stats part2 partition(p1, p2) (id, colint)"""
assertEquals(4, result.size())
result = sql """show table stats part2 partition(p1) (id)"""
assertEquals(1, result.size())
assertEquals("part2", result[0][0])
assertEquals("id", result[0][1])
assertEquals("p1", result[0][2])
assertEquals("6", result[0][3])

result = sql """show column stats part2"""
assertEquals(9, result.size())
assertEquals("18.0", result[0][2])
Expand Down

0 comments on commit 80f6583

Please sign in to comment.