diff --git a/paimon-common/src/main/java/org/apache/paimon/utils/HllSketchUtil.java b/paimon-common/src/main/java/org/apache/paimon/utils/HllSketchUtil.java index 34c5464f7106..609862dafc20 100644 --- a/paimon-common/src/main/java/org/apache/paimon/utils/HllSketchUtil.java +++ b/paimon-common/src/main/java/org/apache/paimon/utils/HllSketchUtil.java @@ -21,10 +21,20 @@ import org.apache.paimon.annotation.VisibleForTesting; import org.apache.datasketches.hll.HllSketch; +import org.apache.datasketches.hll.TgtHllType; +import org.apache.datasketches.hll.Union; /** A compressed bitmap for 32-bit integer. */ public class HllSketchUtil { + public static byte[] union(byte[] sketchBytes1, byte[] sketchBytes2) { + HllSketch heapify = HllSketch.heapify((byte[]) sketchBytes1); + org.apache.datasketches.hll.Union union = Union.heapify((byte[]) sketchBytes2); + union.update(heapify); + HllSketch result = union.getResult(TgtHllType.HLL_4); + return result.toCompactByteArray(); + } + @VisibleForTesting public static byte[] sketchOf(int... values) { HllSketch hllSketch = new HllSketch(); diff --git a/paimon-core/src/main/java/org/apache/paimon/mergetree/compact/aggregate/FieldHllSketchAgg.java b/paimon-core/src/main/java/org/apache/paimon/mergetree/compact/aggregate/FieldHllSketchAgg.java index 93901753645a..0ccf4af6497c 100644 --- a/paimon-core/src/main/java/org/apache/paimon/mergetree/compact/aggregate/FieldHllSketchAgg.java +++ b/paimon-core/src/main/java/org/apache/paimon/mergetree/compact/aggregate/FieldHllSketchAgg.java @@ -19,10 +19,7 @@ package org.apache.paimon.mergetree.compact.aggregate; import org.apache.paimon.types.VarBinaryType; - -import org.apache.datasketches.hll.HllSketch; -import org.apache.datasketches.hll.TgtHllType; -import org.apache.datasketches.hll.Union; +import org.apache.paimon.utils.HllSketchUtil; /** HllSketch aggregate a field of a row. */ public class FieldHllSketchAgg extends FieldAggregator { @@ -50,10 +47,6 @@ public Object agg(Object accumulator, Object inputField) { return accumulator == null ? inputField : accumulator; } - HllSketch heapify = HllSketch.heapify((byte[]) accumulator); - Union union = Union.heapify((byte[]) inputField); - union.update(heapify); - HllSketch result = union.getResult(TgtHllType.HLL_4); - return result.toCompactByteArray(); + return HllSketchUtil.union((byte[]) accumulator, (byte[]) inputField); } }