From 4faccfbe01756f66e917ba20091c0fb1c2a51aaa Mon Sep 17 00:00:00 2001 From: daidai Date: Wed, 18 Dec 2024 07:11:52 +0800 Subject: [PATCH] [enchement](iceberg)support read iceberg partition evolution table. (#45367) ### What problem does this PR solve? Problem Summary: Supports reading iceberg tables after `ADD DROP REPLACE` partition. Before this, reading such tables will cause be core. https://iceberg.apache.org/docs/1.7.0/spark-ddl/#alter-table-add-partition-field ### Release note [fix](iceberg) Supports reading Iceberg tables after partition evolution. --- .../writer/iceberg/partition_transformers.cpp | 2 +- .../iceberg/run07.sql | 83 ++++++++ .../iceberg/source/IcebergScanNode.java | 82 +++++--- .../test_iceberg_partition_evolution.out | 190 ++++++++++++++++++ .../write/test_iceberg_write_partitions.out | 24 +-- .../test_iceberg_partition_evolution.groovy | 179 +++++++++++++++++ 6 files changed, 517 insertions(+), 43 deletions(-) create mode 100644 docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/iceberg/run07.sql create mode 100644 regression-test/data/external_table_p0/iceberg/test_iceberg_partition_evolution.out create mode 100644 regression-test/suites/external_table_p0/iceberg/test_iceberg_partition_evolution.groovy diff --git a/be/src/vec/sink/writer/iceberg/partition_transformers.cpp b/be/src/vec/sink/writer/iceberg/partition_transformers.cpp index ee8268d30f73b5..06b22a4bbbce9a 100644 --- a/be/src/vec/sink/writer/iceberg/partition_transformers.cpp +++ b/be/src/vec/sink/writer/iceberg/partition_transformers.cpp @@ -211,7 +211,7 @@ std::string PartitionColumnTransform::get_partition_value(const TypeDescriptor& if (value.has_value()) { switch (type.type) { case TYPE_BOOLEAN: { - return std::to_string(std::any_cast(value)); + return std::any_cast(value) ? "true" : "false"; } case TYPE_TINYINT: { return std::to_string(std::any_cast(value)); diff --git a/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/iceberg/run07.sql b/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/iceberg/run07.sql new file mode 100644 index 00000000000000..3c4131ba619f84 --- /dev/null +++ b/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/iceberg/run07.sql @@ -0,0 +1,83 @@ + +use demo.test_db; +CREATE TABLE iceberg_add_partition ( + id INT, + name STRING, + age INT +) USING iceberg; +INSERT INTO iceberg_add_partition VALUES(1, 'Alice', 30),(2, 'Bob', 25); +ALTER TABLE iceberg_add_partition ADD PARTITION FIELD age; +ALTER TABLE iceberg_add_partition ADD COLUMNS address STRING; +INSERT INTO iceberg_add_partition VALUES (4, 'Charlie', 45, '123 Street Name'); +ALTER TABLE iceberg_add_partition ADD PARTITION FIELD bucket(10, id); +INSERT INTO iceberg_add_partition VALUES (5, 'Eve', 29, '789 Third St'); +ALTER TABLE iceberg_add_partition ADD PARTITION FIELD truncate(5, address); +INSERT INTO iceberg_add_partition VALUES (6, 'Frank', 33,"xx"),(7, 'Grace', 28,"yyyyyyyyy"); + + + +CREATE TABLE iceberg_drop_partition ( + id INT, + name STRING, + amount DOUBLE, + created_date DATE +) +USING iceberg +PARTITIONED BY (year(created_date),bucket(10,created_date)); +INSERT INTO iceberg_drop_partition VALUES + (1, 'Alice', 100.0, DATE '2023-12-01'), + (2, 'Bob', 200.0, DATE '2023-12-02'), + (3, 'Charlie', 300.0, DATE '2024-12-03'); +ALTER TABLE iceberg_drop_partition DROP PARTITION FIELD year(created_date); +INSERT INTO iceberg_drop_partition VALUES + (4, 'David', 400.0, DATE '2023-12-02'), + (5, 'Eve', 500.0, DATE '2024-12-03'); +ALTER TABLE iceberg_drop_partition DROP PARTITION FIELD bucket(10,created_date); +INSERT INTO iceberg_drop_partition VALUES + (6, 'David', 400.0, DATE '2025-12-12'), + (7, 'Eve', 500.0, DATE '2025-12-23'); + + +CREATE TABLE iceberg_replace_partition ( + id INT, + name STRING, + amount DOUBLE, + created_date DATE +) +USING iceberg +PARTITIONED BY (year(created_date),bucket(10,created_date)); +INSERT INTO iceberg_replace_partition VALUES + (1, 'Alice', 100.0, DATE '2023-01-01'), + (2, 'Bob', 200.0, DATE '2023-12-02'), + (3, 'Charlie', 300.0, DATE '2024-12-03'); +ALTER TABLE iceberg_replace_partition REPLACE PARTITION FIELD year(created_date) WITH month(created_date); +INSERT INTO iceberg_replace_partition VALUES + (4, 'David', 400.0, DATE '2023-12-02'), + (5, 'Eve', 500.0, DATE '2024-07-03'); +ALTER TABLE iceberg_replace_partition REPLACE PARTITION FIELD bucket(10,created_date) WITH bucket(10,id); +INSERT INTO iceberg_replace_partition VALUES + (6, 'David', 400.0, DATE '2025-10-12'), + (7, 'Eve', 500.0, DATE '2025-09-23'); + + + + +CREATE TABLE iceberg_evolution_partition ( + id INT, + name STRING, + age INT +) USING iceberg; +INSERT INTO iceberg_evolution_partition VALUES(1, 'Alice', 30),(2, 'Bob', 25); +ALTER TABLE iceberg_evolution_partition ADD PARTITION FIELD age; +ALTER TABLE iceberg_evolution_partition ADD COLUMNS address STRING; +INSERT INTO iceberg_evolution_partition VALUES (4, 'Charlie', 45, '123 Street Name'); +ALTER TABLE iceberg_evolution_partition ADD PARTITION FIELD bucket(10, id); +INSERT INTO iceberg_evolution_partition VALUES (5, 'Eve', 29, '789 Third St'); +ALTER TABLE iceberg_evolution_partition REPLACE PARTITION FIELD bucket(10, id) WITH truncate(5, address); +INSERT INTO iceberg_evolution_partition VALUES (6, 'Frank', 33,"xx"),(7, 'Grace', 28,"yyyyyyyyy"); +ALTER TABLE iceberg_evolution_partition DROP PARTITION FIELD truncate(5, address); +INSERT INTO iceberg_evolution_partition VALUES (8, 'Hank', 40, "zz"), (9, 'Ivy', 22, "aaaaaa"); +ALTER TABLE iceberg_evolution_partition DROP COLUMNS address; +-- INSERT INTO iceberg_evolution_partition VALUES (10, 'Jack', 35), (11, 'Kara', 30); +-- spark error. + diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergScanNode.java index c78140b9d3cd99..e43e8893332a67 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergScanNode.java @@ -35,6 +35,7 @@ import org.apache.doris.datasource.iceberg.IcebergExternalCatalog; import org.apache.doris.datasource.iceberg.IcebergExternalTable; import org.apache.doris.datasource.iceberg.IcebergUtils; +import org.apache.doris.nereids.exceptions.NotSupportedException; import org.apache.doris.planner.PlanNodeId; import org.apache.doris.qe.SessionVariable; import org.apache.doris.spi.Split; @@ -56,7 +57,6 @@ import org.apache.iceberg.FileContent; import org.apache.iceberg.FileScanTask; import org.apache.iceberg.MetadataColumns; -import org.apache.iceberg.PartitionField; import org.apache.iceberg.Snapshot; import org.apache.iceberg.StructLike; import org.apache.iceberg.Table; @@ -64,11 +64,10 @@ import org.apache.iceberg.expressions.Expression; import org.apache.iceberg.io.CloseableIterable; import org.apache.iceberg.types.Conversions; -import org.apache.iceberg.types.Type; -import org.apache.iceberg.types.Types; -import org.apache.iceberg.util.DateTimeUtil; import org.apache.iceberg.util.SnapshotUtil; import org.apache.iceberg.util.TableScanUtil; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import java.io.IOException; import java.util.ArrayList; @@ -78,11 +77,11 @@ import java.util.Map; import java.util.Optional; import java.util.OptionalLong; -import java.util.stream.Collectors; public class IcebergScanNode extends FileQueryScanNode { public static final int MIN_DELETE_FILE_SUPPORT_VERSION = 2; + private static final Logger LOG = LogManager.getLogger(IcebergScanNode.class); private IcebergSource source; private Table icebergTable; @@ -217,32 +216,44 @@ private List doGetSplits(int numBackends) throws UserException { boolean isPartitionedTable = icebergTable.spec().isPartitioned(); long realFileSplitSize = getRealFileSplitSize(0); - CloseableIterable fileScanTasks = TableScanUtil.splitFiles(scan.planFiles(), realFileSplitSize); + CloseableIterable fileScanTasks = null; + try { + fileScanTasks = TableScanUtil.splitFiles(scan.planFiles(), realFileSplitSize); + } catch (NullPointerException e) { + /* + Caused by: java.lang.NullPointerException: Type cannot be null + at org.apache.iceberg.relocated.com.google.common.base.Preconditions.checkNotNull + (Preconditions.java:921) ~[iceberg-bundled-guava-1.4.3.jar:?] + at org.apache.iceberg.types.Types$NestedField.(Types.java:447) ~[iceberg-api-1.4.3.jar:?] + at org.apache.iceberg.types.Types$NestedField.optional(Types.java:416) ~[iceberg-api-1.4.3.jar:?] + at org.apache.iceberg.PartitionSpec.partitionType(PartitionSpec.java:132) ~[iceberg-api-1.4.3.jar:?] + at org.apache.iceberg.DeleteFileIndex.lambda$new$0(DeleteFileIndex.java:97) ~[iceberg-core-1.4.3.jar:?] + at org.apache.iceberg.relocated.com.google.common.collect.RegularImmutableMap.forEach + (RegularImmutableMap.java:297) ~[iceberg-bundled-guava-1.4.3.jar:?] + at org.apache.iceberg.DeleteFileIndex.(DeleteFileIndex.java:97) ~[iceberg-core-1.4.3.jar:?] + at org.apache.iceberg.DeleteFileIndex.(DeleteFileIndex.java:71) ~[iceberg-core-1.4.3.jar:?] + at org.apache.iceberg.DeleteFileIndex$Builder.build(DeleteFileIndex.java:578) ~[iceberg-core-1.4.3.jar:?] + at org.apache.iceberg.ManifestGroup.plan(ManifestGroup.java:183) ~[iceberg-core-1.4.3.jar:?] + at org.apache.iceberg.ManifestGroup.planFiles(ManifestGroup.java:170) ~[iceberg-core-1.4.3.jar:?] + at org.apache.iceberg.DataTableScan.doPlanFiles(DataTableScan.java:89) ~[iceberg-core-1.4.3.jar:?] + at org.apache.iceberg.SnapshotScan.planFiles(SnapshotScan.java:139) ~[iceberg-core-1.4.3.jar:?] + at org.apache.doris.datasource.iceberg.source.IcebergScanNode.doGetSplits + (IcebergScanNode.java:209) ~[doris-fe.jar:1.2-SNAPSHOT] + EXAMPLE: + CREATE TABLE iceberg_tb(col1 INT,col2 STRING) USING ICEBERG PARTITIONED BY (bucket(10,col2)); + INSERT INTO iceberg_tb VALUES( ... ); + ALTER TABLE iceberg_tb DROP PARTITION FIELD bucket(10,col2); + ALTER TABLE iceberg_tb DROP COLUMNS col2 STRING; + Link: https://github.com/apache/iceberg/pull/10755 + */ + LOG.warn("Iceberg TableScanUtil.splitFiles throw NullPointerException. Cause : ", e); + throw new NotSupportedException("Unable to read Iceberg table with dropped old partition column."); + } try (CloseableIterable combinedScanTasks = TableScanUtil.planTasks(fileScanTasks, realFileSplitSize, 1, 0)) { combinedScanTasks.forEach(taskGrp -> taskGrp.files().forEach(splitTask -> { - List partitionValues = new ArrayList<>(); if (isPartitionedTable) { StructLike structLike = splitTask.file().partition(); - List fields = splitTask.spec().fields(); - Types.StructType structType = icebergTable.schema().asStruct(); - - // set partitionValue for this IcebergSplit - for (int i = 0; i < structLike.size(); i++) { - Object obj = structLike.get(i, Object.class); - String value = String.valueOf(obj); - PartitionField partitionField = fields.get(i); - if (partitionField.transform().isIdentity()) { - Type type = structType.fieldType(partitionField.name()); - if (type != null && type.typeId().equals(Type.TypeID.DATE)) { - // iceberg use integer to store date, - // we need transform it to string - value = DateTimeUtil.daysToIsoDate((Integer) obj); - } - } - partitionValues.add(value); - } - // Counts the number of partitions read partitionPathSet.add(structLike.toString()); } @@ -256,7 +267,7 @@ private List doGetSplits(int numBackends) throws UserException { new String[0], formatVersion, source.getCatalog().getProperties(), - partitionValues, + new ArrayList<>(), originalPath); split.setTargetSplitSize(realFileSplitSize); if (formatVersion >= MIN_DELETE_FILE_SUPPORT_VERSION) { @@ -289,7 +300,6 @@ private List doGetSplits(int numBackends) throws UserException { return pushDownCountSplits; } } - selectedPartitionNum = partitionPathSet.size(); return splits; } @@ -351,8 +361,20 @@ public TFileFormatType getFileFormatType() throws UserException { @Override public List getPathPartitionKeys() throws UserException { - return icebergTable.spec().fields().stream().map(PartitionField::name).map(String::toLowerCase) - .collect(Collectors.toList()); + // return icebergTable.spec().fields().stream().map(PartitionField::name).map(String::toLowerCase) + // .collect(Collectors.toList()); + /**First, iceberg partition columns are based on existing fields, which will be stored in the actual data file. + * Second, iceberg partition columns support Partition transforms. In this case, the path partition key is not + * equal to the column name of the partition column, so remove this code and get all the columns you want to + * read from the file. + * Related code: + * be/src/vec/exec/scan/vfile_scanner.cpp: + * VFileScanner::_init_expr_ctxes() + * if (slot_info.is_file_slot) { + * xxxx + * } + */ + return new ArrayList<>(); } @Override diff --git a/regression-test/data/external_table_p0/iceberg/test_iceberg_partition_evolution.out b/regression-test/data/external_table_p0/iceberg/test_iceberg_partition_evolution.out new file mode 100644 index 00000000000000..c9eb9a8db62d39 --- /dev/null +++ b/regression-test/data/external_table_p0/iceberg/test_iceberg_partition_evolution.out @@ -0,0 +1,190 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !add_partition_1 -- +id int Yes true \N +name text Yes true \N +age int Yes true \N +address text Yes true \N + +-- !add_partition_2 -- +1 Alice 30 \N +2 Bob 25 \N +4 Charlie 45 123 Street Name +5 Eve 29 789 Third St +6 Frank 33 xx +7 Grace 28 yyyyyyyyy + +-- !add_partition_3 -- +1 +2 +4 +5 +6 +7 + +-- !add_partition_4 -- +1 Alice +2 Bob +4 Charlie +5 Eve +6 Frank +7 Grace + +-- !add_partition_5 -- +30 +25 +45 +29 +33 +28 + +-- !add_partition_6 -- +123 Street Name +789 Third St +xx +yyyyyyyyy + +-- !add_partition_7 -- +4 Charlie 123 Street Name +5 Eve 789 Third St +7 Grace yyyyyyyyy + +-- !add_partition_8 -- +6 Frank xx + +-- !add_partition_9 -- +4 Charlie 45 123 Street Name +6 Frank 33 xx + +-- !add_partition_10 -- +1 Alice 30 \N +2 Bob 25 \N +4 Charlie 45 123 Street Name + +-- !drop_partition_1 -- +id int Yes true \N +name text Yes true \N +amount double Yes true \N +created_date date Yes true \N + +-- !drop_partition_2 -- +1 Alice 100.0 2023-12-01 +2 Bob 200.0 2023-12-02 +3 Charlie 300.0 2024-12-03 +4 David 400.0 2023-12-02 +5 Eve 500.0 2024-12-03 +6 David 400.0 2025-12-12 +7 Eve 500.0 2025-12-23 + +-- !drop_partition_3 -- +1 Alice 2023-12-01 +2 Bob 2023-12-02 +3 Charlie 2024-12-03 +4 David 2023-12-02 +5 Eve 2024-12-03 +6 David 2025-12-12 +7 Eve 2025-12-23 + +-- !drop_partition_4 -- +6 David 400.0 2025-12-12 +7 Eve 500.0 2025-12-23 + +-- !drop_partition_5 -- +1 Alice 100.0 2023-12-01 +2 Bob 200.0 2023-12-02 + +-- !drop_partition_6 -- +3 Charlie 300.0 2024-12-03 +5 Eve 500.0 2024-12-03 +6 David 400.0 2025-12-12 +7 Eve 500.0 2025-12-23 + +-- !drop_partition_7 -- +3 Charlie 300.0 2024-12-03 +5 Eve 500.0 2024-12-03 +6 David 400.0 2025-12-12 +7 Eve 500.0 2025-12-23 + +-- !drop_partition_8 -- +1 Alice 100.0 2023-12-01 +2 Bob 200.0 2023-12-02 +4 David 400.0 2023-12-02 + +-- !drop_partition_9 -- +1 Alice 100.0 2023-12-01 +2 Bob 200.0 2023-12-02 +3 Charlie 300.0 2024-12-03 +4 David 400.0 2023-12-02 +5 Eve 500.0 2024-12-03 + +-- !drop_partition_10 -- +3 Charlie 300.0 2024-12-03 +5 Eve 500.0 2024-12-03 +6 David 400.0 2025-12-12 +7 Eve 500.0 2025-12-23 + +-- !replace_partition_1 -- +id int Yes true \N +name text Yes true \N +amount double Yes true \N +created_date date Yes true \N + +-- !replace_partition_2 -- +1 Alice 100.0 2023-01-01 +2 Bob 200.0 2023-12-02 +3 Charlie 300.0 2024-12-03 +4 David 400.0 2023-12-02 +5 Eve 500.0 2024-07-03 +6 David 400.0 2025-10-12 +7 Eve 500.0 2025-09-23 + +-- !replace_partition_3 -- +1 Alice 2023-01-01 +2 Bob 2023-12-02 +3 Charlie 2024-12-03 +4 David 2023-12-02 +5 Eve 2024-07-03 +6 David 2025-10-12 +7 Eve 2025-09-23 + +-- !replace_partition_4 -- +6 David 400.0 2025-10-12 +7 Eve 500.0 2025-09-23 + +-- !replace_partition_5 -- +1 Alice 100.0 2023-01-01 +2 Bob 200.0 2023-12-02 + +-- !replace_partition_6 -- +3 Charlie 300.0 2024-12-03 +6 David 400.0 2025-10-12 +7 Eve 500.0 2025-09-23 + +-- !replace_partition_7 -- +3 Charlie 300.0 2024-12-03 +5 Eve 500.0 2024-07-03 +6 David 400.0 2025-10-12 +7 Eve 500.0 2025-09-23 + +-- !replace_partition_8 -- +1 Alice 100.0 2023-01-01 +2 Bob 200.0 2023-12-02 +4 David 400.0 2023-12-02 +5 Eve 500.0 2024-07-03 + +-- !replace_partition_9 -- +1 Alice 100.0 2023-01-01 +2 Bob 200.0 2023-12-02 +3 Charlie 300.0 2024-12-03 +4 David 400.0 2023-12-02 +5 Eve 500.0 2024-07-03 + +-- !replace_partition_10 -- +3 Charlie 300.0 2024-12-03 +5 Eve 500.0 2024-07-03 +6 David 400.0 2025-10-12 +7 Eve 500.0 2025-09-23 + +-- !replace_partition_11 -- +6 David 400.0 2025-10-12 +7 Eve 500.0 2025-09-23 + diff --git a/regression-test/data/external_table_p0/iceberg/write/test_iceberg_write_partitions.out b/regression-test/data/external_table_p0/iceberg/write/test_iceberg_write_partitions.out index 1ec82236951bb9..c101079be333ed 100644 --- a/regression-test/data/external_table_p0/iceberg/write/test_iceberg_write_partitions.out +++ b/regression-test/data/external_table_p0/iceberg/write/test_iceberg_write_partitions.out @@ -9,13 +9,13 @@ false 2 -2147483648 -9223372036854775808 -123.45 -123456.789 -- !q03 -- false 1 -2147483648 -9223372036854775808 -123.45 -123456.789 false 2 -2147483648 -9223372036854775808 -123.45 -123456.789 -false 3 2147483647 9223372036854775807 123.45 123456.789 +true 3 2147483647 9223372036854775807 123.45 123456.789 -- !q04 -- false 1 -2147483648 -9223372036854775808 -123.45 -123456.789 false 2 -2147483648 -9223372036854775808 -123.45 -123456.789 -false 3 2147483647 \N 123.45 \N -false 3 2147483647 9223372036854775807 123.45 123456.789 +true 3 2147483647 \N 123.45 \N +true 3 2147483647 9223372036854775807 123.45 123456.789 -- !q01 -- -123456.789012 1 string_value 2024-03-22 @@ -51,13 +51,13 @@ false 2 -2147483648 -9223372036854775808 -123.45 -123456.789 -- !q03 -- false 1 -2147483648 -9223372036854775808 -123.45 -123456.789 false 2 -2147483648 -9223372036854775808 -123.45 -123456.789 -false 3 2147483647 9223372036854775807 123.45 123456.789 +true 3 2147483647 9223372036854775807 123.45 123456.789 -- !q04 -- false 1 -2147483648 -9223372036854775808 -123.45 -123456.789 false 2 -2147483648 -9223372036854775808 -123.45 -123456.789 -false 3 2147483647 \N 123.45 \N -false 3 2147483647 9223372036854775807 123.45 123456.789 +true 3 2147483647 \N 123.45 \N +true 3 2147483647 9223372036854775807 123.45 123456.789 -- !q01 -- -123456.789012 1 string_value 2024-03-22 @@ -93,13 +93,13 @@ false 2 -2147483648 -9223372036854775808 -123.45 -123456.789 -- !q03 -- false 1 -2147483648 -9223372036854775808 -123.45 -123456.789 false 2 -2147483648 -9223372036854775808 -123.45 -123456.789 -false 3 2147483647 9223372036854775807 123.45 123456.789 +true 3 2147483647 9223372036854775807 123.45 123456.789 -- !q04 -- false 1 -2147483648 -9223372036854775808 -123.45 -123456.789 false 2 -2147483648 -9223372036854775808 -123.45 -123456.789 -false 3 2147483647 \N 123.45 \N -false 3 2147483647 9223372036854775807 123.45 123456.789 +true 3 2147483647 \N 123.45 \N +true 3 2147483647 9223372036854775807 123.45 123456.789 -- !q01 -- -123456.789012 1 string_value 2024-03-22 @@ -135,13 +135,13 @@ false 2 -2147483648 -9223372036854775808 -123.45 -123456.789 -- !q03 -- false 1 -2147483648 -9223372036854775808 -123.45 -123456.789 false 2 -2147483648 -9223372036854775808 -123.45 -123456.789 -false 3 2147483647 9223372036854775807 123.45 123456.789 +true 3 2147483647 9223372036854775807 123.45 123456.789 -- !q04 -- false 1 -2147483648 -9223372036854775808 -123.45 -123456.789 false 2 -2147483648 -9223372036854775808 -123.45 -123456.789 -false 3 2147483647 \N 123.45 \N -false 3 2147483647 9223372036854775807 123.45 123456.789 +true 3 2147483647 \N 123.45 \N +true 3 2147483647 9223372036854775807 123.45 123456.789 -- !q01 -- -123456.789012 1 string_value 2024-03-22 diff --git a/regression-test/suites/external_table_p0/iceberg/test_iceberg_partition_evolution.groovy b/regression-test/suites/external_table_p0/iceberg/test_iceberg_partition_evolution.groovy new file mode 100644 index 00000000000000..985f9e035cb192 --- /dev/null +++ b/regression-test/suites/external_table_p0/iceberg/test_iceberg_partition_evolution.groovy @@ -0,0 +1,179 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_iceberg_partition_evolution", "p0,external,doris,external_docker,external_docker_doris") { + String enabled = context.config.otherConfigs.get("enableIcebergTest") + if (enabled == null || !enabled.equalsIgnoreCase("true")) { + logger.info("disable iceberg test.") + return + } + + String rest_port = context.config.otherConfigs.get("iceberg_rest_uri_port") + String minio_port = context.config.otherConfigs.get("iceberg_minio_port") + String externalEnvIp = context.config.otherConfigs.get("externalEnvIp") + String catalog_name = "test_iceberg_partition_evolution" + + sql """drop catalog if exists ${catalog_name}""" + sql """ + CREATE CATALOG ${catalog_name} PROPERTIES ( + 'type'='iceberg', + 'iceberg.catalog.type'='rest', + 'uri' = 'http://${externalEnvIp}:${rest_port}', + "s3.access_key" = "admin", + "s3.secret_key" = "password", + "s3.endpoint" = "http://${externalEnvIp}:${minio_port}", + "s3.region" = "us-east-1" + );""" + + logger.info("catalog " + catalog_name + " created") + sql """switch ${catalog_name};""" + logger.info("switched to catalog " + catalog_name) + sql """ use test_db;""" + + def tbs = sql """ show tables """ + logger.info("tables = " +tbs ) + + + qt_add_partition_1 """desc iceberg_add_partition;""" + qt_add_partition_2 """select * from iceberg_add_partition order by id;""" + qt_add_partition_3 """select id from iceberg_add_partition order by id;""" + qt_add_partition_4 """select id,name from iceberg_add_partition order by id;""" + qt_add_partition_5 """select age from iceberg_add_partition order by id;""" + qt_add_partition_6 """select address from iceberg_add_partition where address is not null order by id;""" + qt_add_partition_7 """select id,name,address from iceberg_add_partition where length(address) > 5 order by id;""" + qt_add_partition_8 """select id,name,address from iceberg_add_partition where length(address) < 5 order by id;""" + qt_add_partition_9 """select * from iceberg_add_partition where age > 30 order by id;""" + qt_add_partition_10 """select * from iceberg_add_partition where id < 5 order by id;""" + + qt_drop_partition_1 """desc iceberg_drop_partition;""" + qt_drop_partition_2 """select * from iceberg_drop_partition order by id;""" + qt_drop_partition_3 """select id,name,created_date from iceberg_drop_partition order by id;""" + qt_drop_partition_4 """select * from iceberg_drop_partition where id > 5 order by id;""" + qt_drop_partition_5 """select * from iceberg_drop_partition where id < 3 order by id;""" + qt_drop_partition_6 """select * from iceberg_drop_partition where created_date > "2024-12-01" order by id;""" + qt_drop_partition_7 """select * from iceberg_drop_partition where created_date > "2023-12-02" order by id;""" + qt_drop_partition_8 """select * from iceberg_drop_partition where created_date < "2024-12-02" order by id;""" + qt_drop_partition_9 """select * from iceberg_drop_partition where created_date <= "2024-12-03" order by id;""" + qt_drop_partition_10 """select * from iceberg_drop_partition where created_date >= "2024-07-03" order by id;""" + + qt_replace_partition_1 """desc iceberg_replace_partition;""" + qt_replace_partition_2 """select * from iceberg_replace_partition order by id;""" + qt_replace_partition_3 """select id,name,created_date from iceberg_replace_partition order by id;""" + qt_replace_partition_4 """select * from iceberg_replace_partition where id > 5 order by id;""" + qt_replace_partition_5 """select * from iceberg_replace_partition where id < 3 order by id;""" + qt_replace_partition_6 """select * from iceberg_replace_partition where created_date > "2024-12-01" order by id;""" + qt_replace_partition_7 """select * from iceberg_replace_partition where created_date > "2023-12-02" order by id;""" + qt_replace_partition_8 """select * from iceberg_replace_partition where created_date < "2024-12-02" order by id;""" + qt_replace_partition_9 """select * from iceberg_replace_partition where created_date <= "2024-12-03" order by id;""" + qt_replace_partition_10 """select * from iceberg_replace_partition where created_date >= "2024-07-03" order by id;""" + qt_replace_partition_11 """select * from iceberg_replace_partition where created_date >= "2025-09-23" order by id;""" + + + + + try { + sql """ select * from iceberg_evolution_partition """ + }catch (Exception e) { + assertTrue(e.getMessage().contains("Unable to read Iceberg table with dropped old partition column."), e.getMessage()) + } + +} +/* + + +CREATE TABLE iceberg_add_partition ( + id INT, + name STRING, + age INT +) USING iceberg; +INSERT INTO iceberg_add_partition VALUES(1, 'Alice', 30),(2, 'Bob', 25); +ALTER TABLE iceberg_add_partition ADD PARTITION FIELD age; +INSERT INTO iceberg_add_partition VALUES (3, 'Diana', 30, '456'); +ALTER TABLE iceberg_add_partition ADD COLUMNS address STRING; +INSERT INTO iceberg_add_partition VALUES (4, 'Charlie', 45, '123 Street Name'); +ALTER TABLE iceberg_add_partition ADD PARTITION FIELD bucket(10, id); +INSERT INTO iceberg_add_partition VALUES (5, 'Eve', 29, '789 Third St'); +ALTER TABLE iceberg_add_partition ADD PARTITION FIELD truncate(5, address); +INSERT INTO iceberg_add_partition VALUES (6, 'Frank', 33,"xx"),(7, 'Grace', 28,"yyyyyyyyy"); + + +CREATE TABLE iceberg_drop_partition ( + id INT, + name STRING, + amount DOUBLE, + created_date DATE +) +USING iceberg +PARTITIONED BY (year(created_date),bucket(10,created_date)); +INSERT INTO iceberg_drop_partition VALUES + (1, 'Alice', 100.0, DATE '2023-12-01'), + (2, 'Bob', 200.0, DATE '2023-12-02'), + (3, 'Charlie', 300.0, DATE '2024-12-03'); +ALTER TABLE iceberg_drop_partition DROP PARTITION FIELD year(created_date); +INSERT INTO iceberg_drop_partition VALUES + (4, 'David', 400.0, DATE '2023-12-02'), + (5, 'Eve', 500.0, DATE '2024-12-03'); +ALTER TABLE iceberg_drop_partition DROP PARTITION FIELD bucket(10,created_date); +INSERT INTO iceberg_drop_partition VALUES + (6, 'David', 400.0, DATE '2025-12-12'), + (7, 'Eve', 500.0, DATE '2025-12-23'); + + +CREATE TABLE iceberg_replace_partition ( + id INT, + name STRING, + amount DOUBLE, + created_date DATE +) +USING iceberg +PARTITIONED BY (year(created_date),bucket(10,created_date)); +INSERT INTO iceberg_replace_partition VALUES + (1, 'Alice', 100.0, DATE '2023-01-01'), + (2, 'Bob', 200.0, DATE '2023-12-02'), + (3, 'Charlie', 300.0, DATE '2024-12-03'); +ALTER TABLE iceberg_replace_partition REPLACE PARTITION FIELD year(created_date) WITH month(created_date); +INSERT INTO iceberg_replace_partition VALUES + (4, 'David', 400.0, DATE '2023-12-02'), + (5, 'Eve', 500.0, DATE '2024-07-03'); +ALTER TABLE iceberg_replace_partition REPLACE PARTITION FIELD bucket(10,created_date) WITH bucket(10,id); +INSERT INTO iceberg_replace_partition VALUES + (6, 'David', 400.0, DATE '2025-10-12'), + (7, 'Eve', 500.0, DATE '2025-09-23'); + +CREATE TABLE iceberg_evolution_partition ( + id INT, + name STRING, + age INT +) USING iceberg; +INSERT INTO iceberg_evolution_partition VALUES(1, 'Alice', 30),(2, 'Bob', 25); +ALTER TABLE iceberg_evolution_partition ADD PARTITION FIELD age; +INSERT INTO iceberg_evolution_partition VALUES (3, 'Diana', 30, '456'); +ALTER TABLE iceberg_evolution_partition ADD COLUMNS address STRING; +INSERT INTO iceberg_evolution_partition VALUES (4, 'Charlie', 45, '123 Street Name'); +ALTER TABLE iceberg_evolution_partition ADD PARTITION FIELD bucket(10, id); +INSERT INTO iceberg_evolution_partition VALUES (5, 'Eve', 29, '789 Third St'); +ALTER TABLE iceberg_evolution_partition REPLACE PARTITION FIELD bucket(10, id) WITH truncate(5, address); +INSERT INTO iceberg_evolution_partition VALUES (6, 'Frank', 33,"xx"),(7, 'Grace', 28,"yyyyyyyyy"); +ALTER TABLE iceberg_evolution_partition DROP PARTITION FIELD truncate(5, address); +INSERT INTO iceberg_evolution_partition VALUES (8, 'Hank', 40, "zz"), (9, 'Ivy', 22, "aaaaaa"); +ALTER TABLE iceberg_evolution_partition DROP COLUMNS address; +-- INSERT INTO iceberg_evolution_partition VALUES (10, 'Jack', 35), (11, 'Kara', 30); + + +*/ + +