diff --git a/paimon-common/src/main/java/org/apache/paimon/sort/hilbert/HilbertIndexer.java b/paimon-common/src/main/java/org/apache/paimon/sort/hilbert/HilbertIndexer.java index e0e55795fda3..b7522730930f 100644 --- a/paimon-common/src/main/java/org/apache/paimon/sort/hilbert/HilbertIndexer.java +++ b/paimon-common/src/main/java/org/apache/paimon/sort/hilbert/HilbertIndexer.java @@ -295,7 +295,7 @@ public Long hilbertValue(InternalRow o) { } } - private byte[] hilbertCurvePosBytes(Long[] points) { + public static byte[] hilbertCurvePosBytes(Long[] points) { long[] data = Arrays.stream(points).mapToLong(Long::longValue).toArray(); HilbertCurve hilbertCurve = HilbertCurve.bits(BITS_NUM).dimensions(points.length); BigInteger index = hilbertCurve.index(data); diff --git a/paimon-spark/paimon-spark-common/pom.xml b/paimon-spark/paimon-spark-common/pom.xml index a3c92482156a..7a6f11a21023 100644 --- a/paimon-spark/paimon-spark-common/pom.xml +++ b/paimon-spark/paimon-spark-common/pom.xml @@ -53,11 +53,6 @@ under the License. scala-compiler ${scala.version} - - com.github.davidmoten - hilbert-curve - 0.2.2 - org.apache.spark diff --git a/paimon-spark/paimon-spark-common/src/main/java/org/apache/paimon/spark/sort/SparkHilbertUDF.java b/paimon-spark/paimon-spark-common/src/main/java/org/apache/paimon/spark/sort/SparkHilbertUDF.java index ed2a1bab04bb..f9334c0972e7 100644 --- a/paimon-spark/paimon-spark-common/src/main/java/org/apache/paimon/spark/sort/SparkHilbertUDF.java +++ b/paimon-spark/paimon-spark-common/src/main/java/org/apache/paimon/spark/sort/SparkHilbertUDF.java @@ -18,6 +18,7 @@ package org.apache.paimon.spark.sort; +import org.apache.paimon.sort.hilbert.HilbertIndexer; import org.apache.paimon.utils.ConvertBinaryUtil; import org.apache.spark.sql.Column; @@ -37,14 +38,11 @@ import org.apache.spark.sql.types.ShortType; import org.apache.spark.sql.types.StringType; import org.apache.spark.sql.types.TimestampType; -import org.davidmoten.hilbert.HilbertCurve; import java.io.IOException; import java.io.ObjectInputStream; import java.io.Serializable; import java.math.BigDecimal; -import java.math.BigInteger; -import java.util.List; import scala.collection.JavaConverters; import scala.collection.Seq; @@ -53,8 +51,6 @@ public class SparkHilbertUDF implements Serializable { private static final long PRIMITIVE_EMPTY = Long.MAX_VALUE; - private static final int BITS_NUM = 63; - SparkHilbertUDF() {} private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException { @@ -62,11 +58,9 @@ private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundE } byte[] hilbertCurvePosBytes(Seq points) { - List longs = JavaConverters.seqAsJavaList(points); - long[] data = longs.stream().mapToLong(Long::longValue).toArray(); - HilbertCurve hilbertCurve = HilbertCurve.bits(BITS_NUM).dimensions(points.size()); - BigInteger index = hilbertCurve.index(data); - return ConvertBinaryUtil.paddingToNByte(index.toByteArray(), BITS_NUM); + Long[] longs = JavaConverters.seqAsJavaList(points).stream().toArray(Long[]::new); + byte[] bytes = HilbertIndexer.hilbertCurvePosBytes(longs); + return bytes; } private UserDefinedFunction tinyToOrderedLongUDF() {