Skip to content

Commit

Permalink
first version
Browse files Browse the repository at this point in the history
  • Loading branch information
Zouxxyy committed Dec 27, 2023
1 parent 5178dc7 commit 3cdafc3
Show file tree
Hide file tree
Showing 16 changed files with 667 additions and 47 deletions.
53 changes: 42 additions & 11 deletions paimon-common/src/main/java/org/apache/paimon/types/DataField.java
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
@Public
public final class DataField implements Serializable {

private static final long serialVersionUID = 1L;
private static final long serialVersionUID = 2L;

public static final String FIELD_FORMAT_WITH_DESCRIPTION = "%s %s '%s'";

Expand All @@ -53,15 +53,27 @@ public final class DataField implements Serializable {

private final @Nullable String description;

private final @Nullable DataFieldStats stats;

public DataField(int id, String name, DataType dataType) {
this(id, name, dataType, null);
}

public DataField(int id, String name, DataType type, @Nullable String description) {
this(id, name, type, description, null);
}

public DataField(
int id,
String name,
DataType type,
@Nullable String description,
@Nullable DataFieldStats stats) {
this.id = id;
this.name = name;
this.type = type;
this.description = description;
this.stats = stats;
}

public int id() {
Expand All @@ -76,25 +88,39 @@ public DataType type() {
return type;
}

public DataField newId(int newid) {
return new DataField(newid, name, type, description);
@Nullable
public String description() {
return description;
}

@Nullable
public DataFieldStats stats() {
return stats;
}

public DataField newId(int newId) {
return new DataField(newId, name, type, description, stats);
}

public DataField newName(String newName) {
return new DataField(id, newName, type, description);
return new DataField(id, newName, type, description, stats);
}

public DataField newRowType(DataType newType) {
return new DataField(id, name, newType, description, stats);
}

public DataField newDescription(String newDescription) {
return new DataField(id, name, type, newDescription);
return new DataField(id, name, type, newDescription, stats);
}

@Nullable
public String description() {
return description;
public DataField newStats(DataFieldStats newStats) {
return new DataField(id, name, type, description, newStats);
}

public DataField copy() {
return new DataField(id, name, type.copy(), description);
return new DataField(
id, name, type.copy(), description, stats == null ? null : stats.copy());
}

public String asSQLString() {
Expand Down Expand Up @@ -122,6 +148,10 @@ public void serializeJson(JsonGenerator generator) throws IOException {
if (description() != null) {
generator.writeStringField("description", description());
}
if (stats() != null) {
generator.writeFieldName("stats");
stats().serializeJson(generator);
}
generator.writeEndObject();
}

Expand All @@ -137,12 +167,13 @@ public boolean equals(Object o) {
return Objects.equals(id, field.id)
&& Objects.equals(name, field.name)
&& Objects.equals(type, field.type)
&& Objects.equals(description, field.description);
&& Objects.equals(description, field.description)
&& Objects.equals(stats, field.stats);
}

@Override
public int hashCode() {
return Objects.hash(id, name, type, description);
return Objects.hash(id, name, type, description, stats);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.paimon.types;

import org.apache.paimon.shade.jackson2.com.fasterxml.jackson.core.JsonGenerator;
import org.apache.paimon.shade.jackson2.com.fasterxml.jackson.databind.JsonNode;

import javax.annotation.Nullable;

import java.io.IOException;
import java.io.Serializable;
import java.util.Objects;

/**
* Table level col stats, supports the following stats.
*
* <ul>
* <li>distinctCount: distinct count
* <li>nullCount: null count
* <li>avgLen: average length
* <li>maxLen: max length
* </ul>
*
* <p>Todo: Support min, max
*/
public class DataFieldStats implements Serializable {
private static final long serialVersionUID = 1L;
private final @Nullable Long distinctCount;
private final @Nullable Long nullCount;
private final @Nullable Long avgLen;
private final @Nullable Long maxLen;

public DataFieldStats(
@Nullable Long distinctCount,
@Nullable Long nullCount,
@Nullable Long avgLen,
@Nullable Long maxLen) {
this.distinctCount = distinctCount;
this.nullCount = nullCount;
this.avgLen = avgLen;
this.maxLen = maxLen;
}

public @Nullable Long avgLen() {
return avgLen;
}

public @Nullable Long distinctCount() {
return distinctCount;
}

public @Nullable Long maxLen() {
return maxLen;
}

public @Nullable Long nullCount() {
return nullCount;
}

public void serializeJson(JsonGenerator generator) throws IOException {
generator.writeStartObject();
if (distinctCount != null) {
generator.writeNumberField("distinctCount", distinctCount);
}
if (nullCount != null) {
generator.writeNumberField("nullCount", nullCount);
}
if (avgLen != null) {
generator.writeNumberField("avgLen", avgLen);
}
if (maxLen != null) {
generator.writeNumberField("maxLen", maxLen);
}
generator.writeEndObject();
}

public static DataFieldStats deserializeJson(JsonNode jsonNode) {
return new DataFieldStats(
jsonNode.get("distinctCount") != null
? jsonNode.get("distinctCount").asLong()
: null,
jsonNode.get("nullCount") != null ? jsonNode.get("nullCount").asLong() : null,
jsonNode.get("avgLen") != null ? jsonNode.get("avgLen").asLong() : null,
jsonNode.get("maxLen") != null ? jsonNode.get("maxLen").asLong() : null);
}

public DataFieldStats copy() {
return new DataFieldStats(distinctCount, nullCount, avgLen, maxLen);
}

@Override
public boolean equals(Object object) {
if (this == object) {
return true;
}
if (object == null || getClass() != object.getClass()) {
return false;
}
DataFieldStats that = (DataFieldStats) object;
return Objects.equals(distinctCount, that.distinctCount)
&& Objects.equals(nullCount, that.nullCount)
&& Objects.equals(avgLen, that.avgLen)
&& Objects.equals(maxLen, that.maxLen);
}

@Override
public int hashCode() {
return Objects.hash(distinctCount, nullCount, avgLen, maxLen);
}

@Override
public String toString() {
return "DataFieldStats{"
+ "distinctCount="
+ distinctCount
+ ", nullCount="
+ nullCount
+ ", avgLen="
+ avgLen
+ ", maxLen="
+ maxLen
+ '}';
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,16 @@ public static DataField parseDataField(JsonNode json) {
String name = json.get("name").asText();
DataType type = parseDataType(json.get("type"));
JsonNode descriptionNode = json.get("description");
JsonNode statsNode = json.get("stats");
String description = null;
if (descriptionNode != null) {
description = descriptionNode.asText();
}
return new DataField(id, name, type, description);
DataFieldStats fieldStats = null;
if (statsNode != null) {
fieldStats = DataFieldStats.deserializeJson(statsNode);
}
return new DataField(id, name, type, description, fieldStats);
}

public static DataType parseDataType(JsonNode json) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.paimon.utils;

import java.util.OptionalLong;

/** Utils for Optional. * */
public class OptionalUtils {
public static OptionalLong of(Long value) {
return value == null ? OptionalLong.empty() : OptionalLong.of(value);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
package org.apache.paimon.schema;

import org.apache.paimon.annotation.Public;
import org.apache.paimon.types.DataFieldStats;
import org.apache.paimon.types.DataType;

import javax.annotation.Nullable;
Expand Down Expand Up @@ -87,6 +88,10 @@ static SchemaChange updateColumnPosition(Move move) {
return new UpdateColumnPosition(move);
}

static SchemaChange updateColumnStats(String fieldName, DataFieldStats newStats) {
return new UpdateColumnStats(fieldName, newStats);
}

/** A SchemaChange to set a table option. */
final class SetOption implements SchemaChange {

Expand Down Expand Up @@ -511,4 +516,44 @@ public int hashCode() {
return result;
}
}

/** A SchemaChange to update field stats. */
final class UpdateColumnStats implements SchemaChange {

private static final long serialVersionUID = 1L;

private final String fieldName;
private final DataFieldStats newStats;

public UpdateColumnStats(String fieldName, DataFieldStats newStats) {
this.fieldName = fieldName;
this.newStats = newStats;
}

public String fieldName() {
return fieldName;
}

public DataFieldStats newStats() {
return newStats;
}

@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
UpdateColumnStats that = (UpdateColumnStats) o;
return Objects.equals(fieldName, that.fieldName)
&& Objects.equals(newStats, that.newStats);
}

@Override
public int hashCode() {
return Objects.hash(fieldName, newStats);
}
}
}
Loading

0 comments on commit 3cdafc3

Please sign in to comment.