Merge branch 'apache:master' into rest-catalog

jerry-024 · Dec 24, 2024 · bc16c03 · bc16c03
2 parents f5c0286 + 4ac05e4
commit bc16c03
Show file tree

Hide file tree

Showing 254 changed files with 3,963 additions and 1,083 deletions.
diff --git a/docs/content/flink/sql-alter.md b/docs/content/flink/sql-alter.md
@@ -96,8 +96,7 @@ ALTER TABLE my_table RENAME c0 TO c1;
 
 ## Dropping Columns
 
-The following SQL drops two columns `c1` and `c2` from table `my_table`. In hive catalog, you need to ensure disable `hive.metastore.disallow.incompatible.col.type.changes` in your hive server,
-otherwise this operation may fail, throws an exception like `The following columns have types incompatible with the existing columns in their respective positions`.
+The following SQL drops two columns `c1` and `c2` from table `my_table`.
 
 ```sql
 ALTER TABLE my_table DROP (c1, c2);
@@ -107,6 +106,14 @@ ALTER TABLE my_table DROP (c1, c2);
 To drop a column in a row type, see [Changing Column Type](#changing-column-type).
 {{< /hint >}}
 
+In hive catalog, you need to ensure:
+
+1. disable `hive.metastore.disallow.incompatible.col.type.changes` in your hive server
+2. or set `hadoop.hive.metastore.disallow.incompatible.col.type.changes=false` in your paimon catalog.
+
+Otherwise this operation may fail, throws an exception like `The following columns have types incompatible with the
+existing columns in their respective positions`.
+
 ## Dropping Partitions
 
 The following SQL drops the partitions of the paimon table.

diff --git a/docs/content/spark/sql-alter.md b/docs/content/spark/sql-alter.md
@@ -174,6 +174,14 @@ The following SQL drops a nested column `f2` from a struct type, which is the va
 ALTER TABLE my_table DROP COLUMN v.value.f2;
 ```
 
+In hive catalog, you need to ensure:
+
+1. disable `hive.metastore.disallow.incompatible.col.type.changes` in your hive server
+2. or `spark-sql --conf spark.hadoop.hive.metastore.disallow.incompatible.col.type.changes=false` in your spark.
+
+Otherwise this operation may fail, throws an exception like `The following columns have types incompatible with the
+existing columns in their respective positions`.
+
 ## Dropping Partitions
 
 The following SQL drops the partitions of the paimon table. For spark sql, you need to specify all the partition columns.

diff --git a/paimon-arrow/pom.xml b/paimon-arrow/pom.xml
@@ -25,7 +25,7 @@ under the License.
     <parent>
         <artifactId>paimon-parent</artifactId>
         <groupId>org.apache.paimon</groupId>
-        <version>1.0-SNAPSHOT</version>
+        <version>1.1-SNAPSHOT</version>
     </parent>
 
     <artifactId>paimon-arrow</artifactId>

diff --git a/paimon-arrow/src/main/java/org/apache/paimon/arrow/ArrowFieldTypeConversion.java b/paimon-arrow/src/main/java/org/apache/paimon/arrow/ArrowFieldTypeConversion.java
@@ -40,6 +40,7 @@
 import org.apache.paimon.types.TinyIntType;
 import org.apache.paimon.types.VarBinaryType;
 import org.apache.paimon.types.VarCharType;
+import org.apache.paimon.types.VariantType;
 
 import org.apache.arrow.vector.types.TimeUnit;
 import org.apache.arrow.vector.types.Types;
@@ -150,6 +151,11 @@ public FieldType visit(LocalZonedTimestampType localZonedTimestampType) {
             return new FieldType(localZonedTimestampType.isNullable(), arrowType, null);
         }
 
+        @Override
+        public FieldType visit(VariantType variantType) {
+            throw new UnsupportedOperationException();
+        }
+
         private TimeUnit getTimeUnit(int precision) {
             if (precision == 0) {
                 return TimeUnit.SECOND;

diff --git a/...on-arrow/src/main/java/org/apache/paimon/arrow/converter/Arrow2PaimonVectorConverter.java b/...on-arrow/src/main/java/org/apache/paimon/arrow/converter/Arrow2PaimonVectorConverter.java
@@ -63,6 +63,7 @@
 import org.apache.paimon.types.TinyIntType;
 import org.apache.paimon.types.VarBinaryType;
 import org.apache.paimon.types.VarCharType;
+import org.apache.paimon.types.VariantType;
 
 import org.apache.arrow.vector.BigIntVector;
 import org.apache.arrow.vector.BitVector;
@@ -423,6 +424,11 @@ public Timestamp getTimestamp(int i, int precision) {
                     };
         }
 
+        @Override
+        public Arrow2PaimonVectorConverter visit(VariantType variantType) {
+            throw new UnsupportedOperationException();
+        }
+
         @Override
         public Arrow2PaimonVectorConverter visit(ArrayType arrayType) {
             final Arrow2PaimonVectorConverter arrowVectorConvertor =

diff --git a/...on-arrow/src/main/java/org/apache/paimon/arrow/writer/ArrowFieldWriterFactoryVisitor.java b/...on-arrow/src/main/java/org/apache/paimon/arrow/writer/ArrowFieldWriterFactoryVisitor.java
@@ -39,6 +39,7 @@
 import org.apache.paimon.types.TinyIntType;
 import org.apache.paimon.types.VarBinaryType;
 import org.apache.paimon.types.VarCharType;
+import org.apache.paimon.types.VariantType;
 
 import org.apache.arrow.vector.FieldVector;
 import org.apache.arrow.vector.complex.ListVector;
@@ -138,6 +139,11 @@ public ArrowFieldWriterFactory visit(LocalZonedTimestampType localZonedTimestamp
                         fieldVector, localZonedTimestampType.getPrecision(), null);
     }
 
+    @Override
+    public ArrowFieldWriterFactory visit(VariantType variantType) {
+        throw new UnsupportedOperationException("Doesn't support VariantType.");
+    }
+
     @Override
     public ArrowFieldWriterFactory visit(ArrayType arrayType) {
         ArrowFieldWriterFactory elementWriterFactory = arrayType.getElementType().accept(this);

diff --git a/paimon-arrow/src/test/java/org/apache/paimon/arrow/converter/ArrowBatchConverterTest.java b/paimon-arrow/src/test/java/org/apache/paimon/arrow/converter/ArrowBatchConverterTest.java
@@ -167,7 +167,7 @@ private RecordReader.RecordIterator<InternalRow> createPrimitiveIterator(
             rows.add(GenericRow.of(randomRowValues));
         }
 
-        return getRecordIterator(PRIMITIVE_TYPE, rows, projection);
+        return getRecordIterator(PRIMITIVE_TYPE, rows, projection, true);
     }
 
     @TestTemplate
@@ -244,7 +244,7 @@ public void testArrayType() throws Exception {
         }
 
         RecordReader.RecordIterator<InternalRow> iterator =
-                getRecordIterator(nestedArrayType, rows);
+                getRecordIterator(nestedArrayType, rows, null, testMode.equals("per_row"));
         try (RootAllocator allocator = new RootAllocator()) {
             VectorSchemaRoot vsr = ArrowUtils.createVectorSchemaRoot(nestedArrayType, allocator);
             ArrowBatchConverter arrowWriter = createArrowWriter(iterator, nestedArrayType, vsr);
@@ -308,7 +308,8 @@ public void testMapType() throws Exception {
             expectedMaps.add(map1);
         }
 
-        RecordReader.RecordIterator<InternalRow> iterator = getRecordIterator(nestedMapType, rows);
+        RecordReader.RecordIterator<InternalRow> iterator =
+                getRecordIterator(nestedMapType, rows, null, testMode.equals("per_row"));
         try (RootAllocator allocator = new RootAllocator()) {
             VectorSchemaRoot vsr = ArrowUtils.createVectorSchemaRoot(nestedMapType, allocator);
             ArrowBatchConverter arrowWriter = createArrowWriter(iterator, nestedMapType, vsr);
@@ -365,7 +366,11 @@ public void testMapRowType() throws Exception {
         InternalRow row3 = GenericRow.of(new GenericMap(map3));
 
         RecordReader.RecordIterator<InternalRow> iterator =
-                getRecordIterator(nestedMapRowType, Arrays.asList(row1, row2, row3));
+                getRecordIterator(
+                        nestedMapRowType,
+                        Arrays.asList(row1, row2, row3),
+                        null,
+                        testMode.equals("per_row"));
         try (RootAllocator allocator = new RootAllocator()) {
             VectorSchemaRoot vsr = ArrowUtils.createVectorSchemaRoot(nestedMapRowType, allocator);
             ArrowBatchConverter arrowWriter = createArrowWriter(iterator, nestedMapRowType, vsr);
@@ -423,7 +428,8 @@ private void testRowTypeImpl(boolean allNull) throws Exception {
             rows.add(GenericRow.of(GenericRow.of(randomRowValues)));
         }
 
-        RecordReader.RecordIterator<InternalRow> iterator = getRecordIterator(nestedRowType, rows);
+        RecordReader.RecordIterator<InternalRow> iterator =
+                getRecordIterator(nestedRowType, rows, null, testMode.equals("per_row"));
         try (RootAllocator allocator = new RootAllocator()) {
             VectorSchemaRoot vsr = ArrowUtils.createVectorSchemaRoot(nestedRowType, allocator);
             ArrowBatchConverter arrowWriter = createArrowWriter(iterator, nestedRowType, vsr);
@@ -464,7 +470,8 @@ public void testSliceIntType() throws Exception {
             rows.add(GenericRow.of(i));
         }
 
-        RecordReader.RecordIterator<InternalRow> iterator = getRecordIterator(rowType, rows);
+        RecordReader.RecordIterator<InternalRow> iterator =
+                getRecordIterator(rowType, rows, null, true);
         try (RootAllocator allocator = new RootAllocator()) {
             VectorSchemaRoot vsr = ArrowUtils.createVectorSchemaRoot(rowType, allocator);
             ArrowBatchConverter arrowWriter = createArrowWriter(iterator, rowType, vsr);
@@ -515,7 +522,7 @@ public void testDvWithSimpleRowType() throws Exception {
         int[] projection = readEmpty ? new int[0] : null;
         RecordReader.RecordIterator<InternalRow> iterator =
                 getApplyDeletionFileRecordIterator(
-                        rowType, rows, deleted, Collections.singletonList("pk"), projection);
+                        rowType, rows, deleted, Collections.singletonList("pk"), projection, true);
         if (readEmpty) {
             testReadEmpty(iterator, numRows - deleted.size());
         } else {
@@ -588,7 +595,12 @@ public void testDvWithArrayType() throws Exception {
         Set<Integer> deleted = getDeletedPks(numRows);
         RecordReader.RecordIterator<InternalRow> iterator =
                 getApplyDeletionFileRecordIterator(
-                        nestedArrayType, rows, deleted, Collections.singletonList("pk"), null);
+                        nestedArrayType,
+                        rows,
+                        deleted,
+                        Collections.singletonList("pk"),
+                        null,
+                        testMode.equals("per_row"));
         try (RootAllocator allocator = new RootAllocator()) {
             Set<Integer> expectedPks = getExpectedPks(numRows, deleted);
             VectorSchemaRoot vsr = ArrowUtils.createVectorSchemaRoot(nestedArrayType, allocator);
@@ -666,7 +678,12 @@ public void testDvWithMapType() throws Exception {
         Set<Integer> deleted = getDeletedPks(numRows);
         RecordReader.RecordIterator<InternalRow> iterator =
                 getApplyDeletionFileRecordIterator(
-                        nestedMapType, rows, deleted, Collections.singletonList("pk"), null);
+                        nestedMapType,
+                        rows,
+                        deleted,
+                        Collections.singletonList("pk"),
+                        null,
+                        testMode.equals("per_row"));
         try (RootAllocator allocator = new RootAllocator()) {
             Set<Integer> expectedPks = getExpectedPks(numRows, deleted);
             VectorSchemaRoot vsr = ArrowUtils.createVectorSchemaRoot(nestedMapType, allocator);
@@ -735,7 +752,12 @@ public void testDvWithRowType() throws Exception {
         Set<Integer> deleted = getDeletedPks(numRows);
         RecordReader.RecordIterator<InternalRow> iterator =
                 getApplyDeletionFileRecordIterator(
-                        nestedRowType, rows, deleted, Collections.singletonList("pk"), null);
+                        nestedRowType,
+                        rows,
+                        deleted,
+                        Collections.singletonList("pk"),
+                        null,
+                        testMode.equals("per_row"));
         try (RootAllocator allocator = new RootAllocator()) {
             Set<Integer> expectedPks = getExpectedPks(numRows, deleted);
             VectorSchemaRoot vsr = ArrowUtils.createVectorSchemaRoot(nestedRowType, allocator);
@@ -803,14 +825,15 @@ private void testReadEmpty(
     }
 
     private RecordReader.RecordIterator<InternalRow> getRecordIterator(
-            RowType rowType, List<InternalRow> rows) throws Exception {
-        return getRecordIterator(rowType, rows, null);
-    }
-
-    private RecordReader.RecordIterator<InternalRow> getRecordIterator(
-            RowType rowType, List<InternalRow> rows, @Nullable int[] projection) throws Exception {
+            RowType rowType,
+            List<InternalRow> rows,
+            @Nullable int[] projection,
+            boolean canTestParquet)
+            throws Exception {
         Map<String, String> options = new HashMap<>();
-        options.put(CoreOptions.FILE_FORMAT.key(), RND.nextBoolean() ? "orc" : "parquet");
+        options.put(
+                CoreOptions.FILE_FORMAT.key(),
+                canTestParquet && RND.nextBoolean() ? "parquet" : "orc");
         FileStoreTable table = createFileStoreTable(rowType, Collections.emptyList(), options);
 
         StreamTableWrite write = table.newStreamWriteBuilder().newWrite();
@@ -832,12 +855,15 @@ private RecordReader.RecordIterator<InternalRow> getApplyDeletionFileRecordItera
             List<GenericRow> rows,
             Set<Integer> deletedPks,
             List<String> primaryKeys,
-            @Nullable int[] projection)
+            @Nullable int[] projection,
+            boolean canTestParquet)
             throws Exception {
         Map<String, String> options = new HashMap<>();
         options.put(CoreOptions.DELETION_VECTORS_ENABLED.key(), "true");
         options.put(CoreOptions.BUCKET.key(), "1");
-        options.put(CoreOptions.FILE_FORMAT.key(), RND.nextBoolean() ? "orc" : "parquet");
+        options.put(
+                CoreOptions.FILE_FORMAT.key(),
+                canTestParquet && RND.nextBoolean() ? "parquet" : "orc");
         FileStoreTable table = createFileStoreTable(rowType, primaryKeys, options);
 
         StreamTableWrite write = table.newStreamWriteBuilder().newWrite();

diff --git a/paimon-benchmark/paimon-cluster-benchmark/pom.xml b/paimon-benchmark/paimon-cluster-benchmark/pom.xml
@@ -25,7 +25,7 @@ under the License.
     <parent>
         <artifactId>paimon-benchmark</artifactId>
         <groupId>org.apache.paimon</groupId>
-        <version>1.0-SNAPSHOT</version>
+        <version>1.1-SNAPSHOT</version>
     </parent>
 
     <artifactId>paimon-cluster-benchmark</artifactId>

diff --git a/paimon-benchmark/paimon-micro-benchmarks/pom.xml b/paimon-benchmark/paimon-micro-benchmarks/pom.xml
@@ -25,7 +25,7 @@ under the License.
     <parent>
         <artifactId>paimon-benchmark</artifactId>
         <groupId>org.apache.paimon</groupId>
-        <version>1.0-SNAPSHOT</version>
+        <version>1.1-SNAPSHOT</version>
     </parent>
 
     <artifactId>paimon-micro-benchmarks</artifactId>

diff --git a/paimon-benchmark/pom.xml b/paimon-benchmark/pom.xml
@@ -25,7 +25,7 @@ under the License.
     <parent>
         <artifactId>paimon-parent</artifactId>
         <groupId>org.apache.paimon</groupId>
-        <version>1.0-SNAPSHOT</version>
+        <version>1.1-SNAPSHOT</version>
     </parent>
 
     <artifactId>paimon-benchmark</artifactId>

diff --git a/paimon-bundle/pom.xml b/paimon-bundle/pom.xml
@@ -25,7 +25,7 @@ under the License.
     <parent>
         <artifactId>paimon-parent</artifactId>
         <groupId>org.apache.paimon</groupId>
-        <version>1.0-SNAPSHOT</version>
+        <version>1.1-SNAPSHOT</version>
     </parent>
 
     <artifactId>paimon-bundle</artifactId>

diff --git a/paimon-codegen-loader/pom.xml b/paimon-codegen-loader/pom.xml
@@ -25,7 +25,7 @@ under the License.
     <parent>
         <artifactId>paimon-parent</artifactId>
         <groupId>org.apache.paimon</groupId>
-        <version>1.0-SNAPSHOT</version>
+        <version>1.1-SNAPSHOT</version>
     </parent>
 
     <artifactId>paimon-codegen-loader</artifactId>

diff --git a/paimon-codegen/pom.xml b/paimon-codegen/pom.xml
@@ -25,7 +25,7 @@ under the License.
     <parent>
         <artifactId>paimon-parent</artifactId>
         <groupId>org.apache.paimon</groupId>
-        <version>1.0-SNAPSHOT</version>
+        <version>1.1-SNAPSHOT</version>
     </parent>
 
     <artifactId>paimon-codegen</artifactId>

diff --git a/paimon-codegen/src/main/scala/org/apache/paimon/codegen/GenerateUtils.scala b/paimon-codegen/src/main/scala/org/apache/paimon/codegen/GenerateUtils.scala
@@ -19,6 +19,7 @@
 package org.apache.paimon.codegen
 
 import org.apache.paimon.data._
+import org.apache.paimon.data.variant.Variant
 import org.apache.paimon.memory.MemorySegment
 import org.apache.paimon.types._
 import org.apache.paimon.types.DataTypeChecks.{getFieldCount, getFieldTypes, getPrecision, getScale}
@@ -380,6 +381,7 @@ object GenerateUtils {
     case ARRAY => className[InternalArray]
     case MULTISET | MAP => className[InternalMap]
     case ROW => className[InternalRow]
+    case VARIANT => className[Variant]
     case _ =>
       throw new IllegalArgumentException("Illegal type: " + t)
   }
@@ -418,6 +420,8 @@ object GenerateUtils {
         s"$rowTerm.getMap($indexTerm)"
       case ROW =>
         s"$rowTerm.getRow($indexTerm, ${getFieldCount(t)})"
+      case VARIANT =>
+        s"$rowTerm.getVariant($indexTerm)"
       case _ =>
         throw new IllegalArgumentException("Illegal type: " + t)
     }

diff --git a/paimon-codegen/src/test/java/org/apache/paimon/codegen/EqualiserCodeGeneratorTest.java b/paimon-codegen/src/test/java/org/apache/paimon/codegen/EqualiserCodeGeneratorTest.java
@@ -30,6 +30,7 @@
 import org.apache.paimon.data.serializer.InternalMapSerializer;
 import org.apache.paimon.data.serializer.InternalRowSerializer;
 import org.apache.paimon.data.serializer.Serializer;
+import org.apache.paimon.data.variant.GenericVariant;
 import org.apache.paimon.types.DataType;
 import org.apache.paimon.types.DataTypeRoot;
 import org.apache.paimon.types.DataTypes;
@@ -179,6 +180,13 @@ public class EqualiserCodeGeneratorTest {
                                 GenericRow.of(31, BinaryString.fromString("32")),
                                 GenericRow.of(31, BinaryString.fromString("33"))),
                         new InternalRowSerializer(DataTypes.INT(), DataTypes.VARCHAR(2))));
+        TEST_DATA.put(
+                DataTypeRoot.VARIANT,
+                new GeneratedData(
+                        DataTypes.VARIANT(),
+                        Pair.of(
+                                GenericVariant.fromJson("{\"age\":27,\"city\":\"Beijing\"}"),
+                                GenericVariant.fromJson("{\"age\":27,\"city\":\"Hangzhou\"}"))));
     }
 
     @ParameterizedTest

diff --git a/paimon-common/pom.xml b/paimon-common/pom.xml
@@ -25,7 +25,7 @@ under the License.
     <parent>
         <artifactId>paimon-parent</artifactId>
         <groupId>org.apache.paimon</groupId>
-        <version>1.0-SNAPSHOT</version>
+        <version>1.1-SNAPSHOT</version>
     </parent>
 
     <artifactId>paimon-common</artifactId>

diff --git a/paimon-common/src/main/java/org/apache/paimon/PartitionSettedRow.java b/paimon-common/src/main/java/org/apache/paimon/PartitionSettedRow.java
@@ -26,6 +26,7 @@
 import org.apache.paimon.data.InternalRow;
 import org.apache.paimon.data.PartitionInfo;
 import org.apache.paimon.data.Timestamp;
+import org.apache.paimon.data.variant.Variant;
 import org.apache.paimon.types.RowKind;
 
 /** An implementation of {@link InternalRow} which provides a row the fixed partition value. */
@@ -153,6 +154,13 @@ public byte[] getBinary(int pos) {
                 : row.getBinary(partitionInfo.getRealIndex(pos));
     }
 
+    @Override
+    public Variant getVariant(int pos) {
+        return partitionInfo.inPartitionRow(pos)
+                ? partition.getVariant(partitionInfo.getRealIndex(pos))
+                : row.getVariant(partitionInfo.getRealIndex(pos));
+    }
+
     @Override
     public InternalArray getArray(int pos) {
         return partitionInfo.inPartitionRow(pos)