diff --git a/paimon-common/src/main/java/org/apache/paimon/types/DataField.java b/paimon-common/src/main/java/org/apache/paimon/types/DataField.java
index 4e51372c25f23..9cacd9f5d8200 100644
--- a/paimon-common/src/main/java/org/apache/paimon/types/DataField.java
+++ b/paimon-common/src/main/java/org/apache/paimon/types/DataField.java
@@ -39,7 +39,7 @@
@Public
public final class DataField implements Serializable {
- private static final long serialVersionUID = 1L;
+ private static final long serialVersionUID = 2L;
public static final String FIELD_FORMAT_WITH_DESCRIPTION = "%s %s '%s'";
@@ -53,15 +53,27 @@ public final class DataField implements Serializable {
private final @Nullable String description;
+ private final @Nullable DataFieldStats stats;
+
public DataField(int id, String name, DataType dataType) {
this(id, name, dataType, null);
}
public DataField(int id, String name, DataType type, @Nullable String description) {
+ this(id, name, type, description, null);
+ }
+
+ public DataField(
+ int id,
+ String name,
+ DataType type,
+ @Nullable String description,
+ @Nullable DataFieldStats stats) {
this.id = id;
this.name = name;
this.type = type;
this.description = description;
+ this.stats = stats;
}
public int id() {
@@ -76,25 +88,39 @@ public DataType type() {
return type;
}
- public DataField newId(int newid) {
- return new DataField(newid, name, type, description);
+ @Nullable
+ public String description() {
+ return description;
+ }
+
+ @Nullable
+ public DataFieldStats stats() {
+ return stats;
+ }
+
+ public DataField newId(int newId) {
+ return new DataField(newId, name, type, description, stats);
}
public DataField newName(String newName) {
- return new DataField(id, newName, type, description);
+ return new DataField(id, newName, type, description, stats);
+ }
+
+ public DataField newRowType(DataType newType) {
+ return new DataField(id, name, newType, description, stats);
}
public DataField newDescription(String newDescription) {
- return new DataField(id, name, type, newDescription);
+ return new DataField(id, name, type, newDescription, stats);
}
- @Nullable
- public String description() {
- return description;
+ public DataField newStats(DataFieldStats newStats) {
+ return new DataField(id, name, type, description, newStats);
}
public DataField copy() {
- return new DataField(id, name, type.copy(), description);
+ return new DataField(
+ id, name, type.copy(), description, stats == null ? null : stats.copy());
}
public String asSQLString() {
@@ -122,6 +148,10 @@ public void serializeJson(JsonGenerator generator) throws IOException {
if (description() != null) {
generator.writeStringField("description", description());
}
+ if (stats() != null) {
+ generator.writeFieldName("stats");
+ stats().serializeJson(generator);
+ }
generator.writeEndObject();
}
@@ -137,12 +167,13 @@ public boolean equals(Object o) {
return Objects.equals(id, field.id)
&& Objects.equals(name, field.name)
&& Objects.equals(type, field.type)
- && Objects.equals(description, field.description);
+ && Objects.equals(description, field.description)
+ && Objects.equals(stats, field.stats);
}
@Override
public int hashCode() {
- return Objects.hash(id, name, type, description);
+ return Objects.hash(id, name, type, description, stats);
}
@Override
diff --git a/paimon-common/src/main/java/org/apache/paimon/types/DataFieldStats.java b/paimon-common/src/main/java/org/apache/paimon/types/DataFieldStats.java
new file mode 100644
index 0000000000000..3648a3e8fa865
--- /dev/null
+++ b/paimon-common/src/main/java/org/apache/paimon/types/DataFieldStats.java
@@ -0,0 +1,140 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.types;
+
+import org.apache.paimon.shade.jackson2.com.fasterxml.jackson.core.JsonGenerator;
+import org.apache.paimon.shade.jackson2.com.fasterxml.jackson.databind.JsonNode;
+
+import javax.annotation.Nullable;
+
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.Objects;
+
+/**
+ * Table level col stats, supports the following stats.
+ *
+ *
+ * - distinctCount: distinct count
+ *
- nullCount: null count
+ *
- avgLen: average length
+ *
- maxLen: max length
+ *
+ *
+ * Todo: Support min, max
+ */
+public class DataFieldStats implements Serializable {
+ private static final long serialVersionUID = 2L;
+ private final @Nullable Long distinctCount;
+ private final @Nullable Long nullCount;
+ private final @Nullable Long avgLen;
+ private final @Nullable Long maxLen;
+
+ public DataFieldStats(
+ @Nullable Long distinctCount,
+ @Nullable Long nullCount,
+ @Nullable Long avgLen,
+ @Nullable Long maxLen) {
+ this.distinctCount = distinctCount;
+ this.nullCount = nullCount;
+ this.avgLen = avgLen;
+ this.maxLen = maxLen;
+ }
+
+ public @Nullable Long avgLen() {
+ return avgLen;
+ }
+
+ public @Nullable Long distinctCount() {
+ return distinctCount;
+ }
+
+ public @Nullable Long maxLen() {
+ return maxLen;
+ }
+
+ public @Nullable Long nullCount() {
+ return nullCount;
+ }
+
+ public void serializeJson(JsonGenerator generator) throws IOException {
+ generator.writeStartObject();
+ if (distinctCount != null) {
+ generator.writeNumberField("distinctCount", distinctCount);
+ }
+ if (nullCount != null) {
+ generator.writeNumberField("nullCount", nullCount);
+ }
+ if (avgLen != null) {
+ generator.writeNumberField("avgLen", avgLen);
+ }
+ if (maxLen != null) {
+ generator.writeNumberField("maxLen", maxLen);
+ }
+ generator.writeEndObject();
+ }
+
+ public static DataFieldStats deserializeJson(JsonNode jsonNode) {
+ return new DataFieldStats(
+ jsonNode.get("distinctCount") != null
+ ? jsonNode.get("distinctCount").asLong()
+ : null,
+ jsonNode.get("nullCount") != null ? jsonNode.get("nullCount").asLong() : null,
+ jsonNode.get("avgLen") != null ? jsonNode.get("avgLen").asLong() : null,
+ jsonNode.get("maxLen") != null ? jsonNode.get("maxLen").asLong() : null);
+ }
+
+ public DataFieldStats copy() {
+ return new DataFieldStats(distinctCount, nullCount, avgLen, maxLen);
+ }
+
+ @Override
+ public boolean equals(Object object) {
+ if (this == object) {
+ return true;
+ }
+ if (object == null || getClass() != object.getClass()) {
+ return false;
+ }
+ DataFieldStats that = (DataFieldStats) object;
+ return Objects.equals(distinctCount, that.distinctCount)
+ && Objects.equals(nullCount, that.nullCount)
+ && Objects.equals(avgLen, that.avgLen)
+ && Objects.equals(maxLen, that.maxLen);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(distinctCount, nullCount, avgLen, maxLen);
+ }
+
+ @Override
+ public String toString() {
+ return "DataFieldStats{"
+ + "distinctCount="
+ + distinctCount
+ + ", nullCount="
+ + nullCount
+ + ", avgLen="
+ + avgLen
+ + ", maxLen="
+ + maxLen
+ + '}';
+ }
+}
diff --git a/paimon-common/src/main/java/org/apache/paimon/types/DataTypeJsonParser.java b/paimon-common/src/main/java/org/apache/paimon/types/DataTypeJsonParser.java
index 0af68df9de2be..fdb3c0eae104c 100644
--- a/paimon-common/src/main/java/org/apache/paimon/types/DataTypeJsonParser.java
+++ b/paimon-common/src/main/java/org/apache/paimon/types/DataTypeJsonParser.java
@@ -40,11 +40,16 @@ public static DataField parseDataField(JsonNode json) {
String name = json.get("name").asText();
DataType type = parseDataType(json.get("type"));
JsonNode descriptionNode = json.get("description");
+ JsonNode statsNode = json.get("stats");
String description = null;
if (descriptionNode != null) {
description = descriptionNode.asText();
}
- return new DataField(id, name, type, description);
+ DataFieldStats fieldStats = null;
+ if (statsNode != null) {
+ fieldStats = DataFieldStats.deserializeJson(statsNode);
+ }
+ return new DataField(id, name, type, description, fieldStats);
}
public static DataType parseDataType(JsonNode json) {
diff --git a/paimon-common/src/main/java/org/apache/paimon/utils/OptionalUtils.java b/paimon-common/src/main/java/org/apache/paimon/utils/OptionalUtils.java
new file mode 100644
index 0000000000000..ccd4167a8db79
--- /dev/null
+++ b/paimon-common/src/main/java/org/apache/paimon/utils/OptionalUtils.java
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.utils;
+
+import java.util.OptionalLong;
+
+/** Utils for Optional. * */
+public class OptionalUtils {
+ public static OptionalLong of(Long value) {
+ return value == null ? OptionalLong.empty() : OptionalLong.of(value);
+ }
+}
diff --git a/paimon-core/src/main/java/org/apache/paimon/schema/SchemaChange.java b/paimon-core/src/main/java/org/apache/paimon/schema/SchemaChange.java
index 28c49af7cd6b8..4beeff2b6f447 100644
--- a/paimon-core/src/main/java/org/apache/paimon/schema/SchemaChange.java
+++ b/paimon-core/src/main/java/org/apache/paimon/schema/SchemaChange.java
@@ -19,6 +19,7 @@
package org.apache.paimon.schema;
import org.apache.paimon.annotation.Public;
+import org.apache.paimon.types.DataFieldStats;
import org.apache.paimon.types.DataType;
import javax.annotation.Nullable;
@@ -87,6 +88,10 @@ static SchemaChange updateColumnPosition(Move move) {
return new UpdateColumnPosition(move);
}
+ static SchemaChange updateColumnStats(String fieldName, DataFieldStats newStats) {
+ return new UpdateColumnStats(fieldName, newStats);
+ }
+
/** A SchemaChange to set a table option. */
final class SetOption implements SchemaChange {
@@ -511,4 +516,44 @@ public int hashCode() {
return result;
}
}
+
+ /** A SchemaChange to update field stats. */
+ final class UpdateColumnStats implements SchemaChange {
+
+ private static final long serialVersionUID = 1L;
+
+ private final String fieldName;
+ private final DataFieldStats newStats;
+
+ public UpdateColumnStats(String fieldName, DataFieldStats newStats) {
+ this.fieldName = fieldName;
+ this.newStats = newStats;
+ }
+
+ public String fieldName() {
+ return fieldName;
+ }
+
+ public DataFieldStats newStats() {
+ return newStats;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) {
+ return true;
+ }
+ if (o == null || getClass() != o.getClass()) {
+ return false;
+ }
+ UpdateColumnStats that = (UpdateColumnStats) o;
+ return Objects.equals(fieldName, that.fieldName)
+ && Objects.equals(newStats, that.newStats);
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(fieldName, newStats);
+ }
+ }
}
diff --git a/paimon-core/src/main/java/org/apache/paimon/schema/SchemaManager.java b/paimon-core/src/main/java/org/apache/paimon/schema/SchemaManager.java
index 01b395a844866..a1882ba1dfbb7 100644
--- a/paimon-core/src/main/java/org/apache/paimon/schema/SchemaManager.java
+++ b/paimon-core/src/main/java/org/apache/paimon/schema/SchemaManager.java
@@ -34,6 +34,7 @@
import org.apache.paimon.schema.SchemaChange.UpdateColumnComment;
import org.apache.paimon.schema.SchemaChange.UpdateColumnNullability;
import org.apache.paimon.schema.SchemaChange.UpdateColumnPosition;
+import org.apache.paimon.schema.SchemaChange.UpdateColumnStats;
import org.apache.paimon.schema.SchemaChange.UpdateColumnType;
import org.apache.paimon.types.DataField;
import org.apache.paimon.types.DataType;
@@ -221,12 +222,7 @@ public TableSchema commitChanges(List changes)
newFields,
new String[] {rename.fieldName()},
0,
- (field) ->
- new DataField(
- field.id(),
- rename.newName(),
- field.type(),
- field.description()));
+ (field) -> field.newName(rename.newName()));
} else if (change instanceof DropColumn) {
DropColumn drop = (DropColumn) change;
validateNotPrimaryAndPartitionKey(schema, drop.fieldName());
@@ -266,11 +262,7 @@ public TableSchema commitChanges(List changes)
"Update column to nested row type '%s' is not supported.",
update.newDataType()));
}
- return new DataField(
- field.id(),
- field.name(),
- update.newDataType(),
- field.description());
+ return field.newRowType(update.newDataType());
});
} else if (change instanceof UpdateColumnNullability) {
UpdateColumnNullability update = (UpdateColumnNullability) change;
@@ -285,23 +277,14 @@ public TableSchema commitChanges(List changes)
update.fieldNames(),
0,
(field) ->
- new DataField(
- field.id(),
- field.name(),
- field.type().copy(update.newNullability()),
- field.description()));
+ field.newRowType(field.type().copy(update.newNullability())));
} else if (change instanceof UpdateColumnComment) {
UpdateColumnComment update = (UpdateColumnComment) change;
updateNestedColumn(
newFields,
update.fieldNames(),
0,
- (field) ->
- new DataField(
- field.id(),
- field.name(),
- field.type(),
- update.newDescription()));
+ (field) -> field.newDescription(update.newDescription()));
} else if (change instanceof UpdateColumnPosition) {
UpdateColumnPosition update = (UpdateColumnPosition) change;
SchemaChange.Move move = update.move();
@@ -327,6 +310,12 @@ public TableSchema commitChanges(List changes)
}
}
+ } else if (change instanceof UpdateColumnStats) {
+ UpdateColumnStats update = (UpdateColumnStats) change;
+ updateColumn(
+ newFields,
+ update.fieldName(),
+ (field) -> field.newStats(update.newStats()));
} else {
throw new UnsupportedOperationException(
"Unsupported change: " + change.getClass());
@@ -413,12 +402,9 @@ private void updateNestedColumn(
updateNestedColumn(nestedFields, updateFieldNames, index + 1, updateFunc);
newFields.set(
i,
- new DataField(
- field.id(),
- field.name(),
+ field.newRowType(
new org.apache.paimon.types.RowType(
- field.type().isNullable(), nestedFields),
- field.description()));
+ field.type().isNullable(), nestedFields)));
}
}
}
diff --git a/paimon-core/src/main/java/org/apache/paimon/stats/FieldStatsArraySerializer.java b/paimon-core/src/main/java/org/apache/paimon/stats/FieldStatsArraySerializer.java
index 17db3a53eeb16..8a0293db97fa2 100644
--- a/paimon-core/src/main/java/org/apache/paimon/stats/FieldStatsArraySerializer.java
+++ b/paimon-core/src/main/java/org/apache/paimon/stats/FieldStatsArraySerializer.java
@@ -53,7 +53,9 @@ public FieldStatsArraySerializer(RowType type) {
}
public FieldStatsArraySerializer(
- RowType type, int[] indexMapping, CastExecutor