Skip to content

Commit

Permalink
[core] Make metadata.stats-dense-store default value is true (#4617)
Browse files Browse the repository at this point in the history
  • Loading branch information
JingsongLi authored Dec 2, 2024
1 parent 77b7d8d commit 6f967b4
Show file tree
Hide file tree
Showing 6 changed files with 58 additions and 11 deletions.
4 changes: 2 additions & 2 deletions docs/content/flink/sql-ddl.md
Original file line number Diff line number Diff line change
Expand Up @@ -203,8 +203,8 @@ Paimon will automatically collect the statistics of the data file for speeding u
The statistics collector mode can be configured by `'metadata.stats-mode'`, by default is `'truncate(16)'`.
You can configure the field level by setting `'fields.{field_name}.stats-mode'`.

For the stats mode of `none`, we suggest that you configure `metadata.stats-dense-store` = `true`, which will
significantly reduce the storage size of the manifest.
For the stats mode of `none`, by default `metadata.stats-dense-store` is `true`, which will significantly reduce the
storage size of the manifest. But the Paimon sdk in reading engine requires at least version 0.9.1 or 1.0.0 or higher.

### Field Default Value

Expand Down
4 changes: 2 additions & 2 deletions docs/layouts/shortcodes/generated/core_configuration.html
Original file line number Diff line number Diff line change
Expand Up @@ -485,9 +485,9 @@
</tr>
<tr>
<td><h5>metadata.stats-dense-store</h5></td>
<td style="word-wrap: break-word;">false</td>
<td style="word-wrap: break-word;">true</td>
<td>Boolean</td>
<td>Whether to store statistic densely in metadata (manifest files), which will significantly reduce the storage size of metadata when the none statistic mode is set.<br />Note, when this mode is enabled, the Paimon sdk in reading engine requires at least version 0.9.1 or 1.0.0 or higher.</td>
<td>Whether to store statistic densely in metadata (manifest files), which will significantly reduce the storage size of metadata when the none statistic mode is set.<br />Note, when this mode is enabled with 'metadata.stats-mode:none', the Paimon sdk in reading engine requires at least version 0.9.1 or 1.0.0 or higher.</td>
</tr>
<tr>
<td><h5>metadata.stats-mode</h5></td>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1101,7 +1101,7 @@ public class CoreOptions implements Serializable {
public static final ConfigOption<Boolean> METADATA_STATS_DENSE_STORE =
key("metadata.stats-dense-store")
.booleanType()
.defaultValue(false)
.defaultValue(true)
.withDescription(
Description.builder()
.text(
Expand All @@ -1110,8 +1110,8 @@ public class CoreOptions implements Serializable {
+ " none statistic mode is set.")
.linebreak()
.text(
"Note, when this mode is enabled, the Paimon sdk in reading engine requires"
+ " at least version 0.9.1 or 1.0.0 or higher.")
"Note, when this mode is enabled with 'metadata.stats-mode:none', the Paimon sdk in"
+ " reading engine requires at least version 0.9.1 or 1.0.0 or higher.")
.build());

public static final ConfigOption<String> COMMIT_CALLBACKS =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,17 +35,19 @@

import org.junit.jupiter.api.Test;

import static org.apache.paimon.CoreOptions.METADATA_STATS_DENSE_STORE;
import static org.apache.paimon.CoreOptions.METADATA_STATS_MODE;
import static org.assertj.core.api.Assertions.assertThat;

/** Test for table stats mode. */
public class StatsTableTest extends TableTestBase {

@Test
public void testPartitionStats() throws Exception {
public void testPartitionStatsNotDense() throws Exception {
Identifier identifier = identifier("T");
Options options = new Options();
options.set(METADATA_STATS_MODE, "NONE");
options.set(METADATA_STATS_DENSE_STORE, false);
options.set(CoreOptions.BUCKET, 1);
Schema schema =
Schema.newBuilder()
Expand Down Expand Up @@ -90,4 +92,52 @@ public void testPartitionStats() throws Exception {
assertThat(recordStats.maxValues().isNullAt(1)).isTrue();
assertThat(recordStats.maxValues().isNullAt(2)).isTrue();
}

@Test
public void testPartitionStatsDenseMode() throws Exception {
Identifier identifier = identifier("T");
Options options = new Options();
options.set(METADATA_STATS_MODE, "NONE");
options.set(CoreOptions.BUCKET, 1);
Schema schema =
Schema.newBuilder()
.column("pt", DataTypes.INT())
.column("pk", DataTypes.INT())
.column("col1", DataTypes.INT())
.partitionKeys("pt")
.primaryKey("pk", "pt")
.options(options.toMap())
.build();
catalog.createTable(identifier, schema, true);
Table table = catalog.getTable(identifier);

write(
table,
GenericRow.of(1, 1, 1),
GenericRow.of(1, 2, 1),
GenericRow.of(1, 3, 1),
GenericRow.of(2, 1, 1));

FileStoreTable storeTable = (FileStoreTable) table;
FileStore<?> store = storeTable.store();
String manifestListFile = storeTable.snapshotManager().latestSnapshot().deltaManifestList();

ManifestList manifestList = store.manifestListFactory().create();
ManifestFileMeta manifest = manifestList.read(manifestListFile).get(0);

// should have partition stats
SimpleStats partitionStats = manifest.partitionStats();
assertThat(partitionStats.minValues().getInt(0)).isEqualTo(1);
assertThat(partitionStats.maxValues().getInt(0)).isEqualTo(2);

// should not have record stats because of NONE mode
ManifestFile manifestFile = store.manifestFileFactory().create();
DataFileMeta file =
manifestFile.read(manifest.fileName(), manifest.fileSize()).get(0).file();
SimpleStats recordStats = file.valueStats();
assertThat(file.valueStatsCols()).isEmpty();
assertThat(recordStats.minValues().getFieldCount()).isEqualTo(0);
assertThat(recordStats.maxValues().getFieldCount()).isEqualTo(0);
assertThat(recordStats.nullCounts().size()).isEqualTo(0);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,6 @@ public void testBatchProjection() throws Exception {
public void testBatchFilter(boolean statsDenseStore) throws Exception {
Consumer<Options> optionsSetter =
options -> {
options.set(CoreOptions.METADATA_STATS_DENSE_STORE, statsDenseStore);
if (statsDenseStore) {
options.set(CoreOptions.METADATA_STATS_MODE, "none");
options.set("fields.b.stats-mode", "full");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -348,7 +348,6 @@ public void testBatchProjection() throws Exception {
public void testBatchFilter(boolean statsDenseStore) throws Exception {
Consumer<Options> optionsSetter =
options -> {
options.set(CoreOptions.METADATA_STATS_DENSE_STORE, statsDenseStore);
if (statsDenseStore) {
// pk table doesn't need value stats
options.set(CoreOptions.METADATA_STATS_MODE, "none");
Expand Down Expand Up @@ -1664,7 +1663,6 @@ public void testReadDeletionVectorTable(boolean statsDenseStore) throws Exceptio
options.set(TARGET_FILE_SIZE, new MemorySize(1));
options.set(DELETION_VECTORS_ENABLED, true);

options.set(CoreOptions.METADATA_STATS_DENSE_STORE, statsDenseStore);
if (statsDenseStore) {
options.set(CoreOptions.METADATA_STATS_MODE, "none");
options.set("fields.b.stats-mode", "full");
Expand Down

0 comments on commit 6f967b4

Please sign in to comment.