From 3ece2d3d7ff75897ff19a2c8519ece44c98a5d61 Mon Sep 17 00:00:00 2001 From: yuzelin Date: Thu, 19 Dec 2024 11:24:04 +0800 Subject: [PATCH 1/2] [hotfix] Remove unused SchemaEvolutionUtil methods --- .../paimon/schema/SchemaEvolutionUtil.java | 51 ------------------- .../schema/SchemaEvolutionUtilTest.java | 26 ---------- 2 files changed, 77 deletions(-) diff --git a/paimon-core/src/main/java/org/apache/paimon/schema/SchemaEvolutionUtil.java b/paimon-core/src/main/java/org/apache/paimon/schema/SchemaEvolutionUtil.java index cab5dcaeb8ad..dfed60392bac 100644 --- a/paimon-core/src/main/java/org/apache/paimon/schema/SchemaEvolutionUtil.java +++ b/paimon-core/src/main/java/org/apache/paimon/schema/SchemaEvolutionUtil.java @@ -43,7 +43,6 @@ import javax.annotation.Nullable; import java.util.ArrayList; -import java.util.Arrays; import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; @@ -225,56 +224,6 @@ public static IndexCastMapping createIndexCastMapping( return createIndexCastMapping(tableProjection, tableFields, dataProjection, dataFields); } - /** - * Create data projection from table projection. For example, the table and data fields are as - * follows - * - * - * - *

When we project 1->c, 6->b, 3->a from table fields, the table projection is [[0], [4], - * [1]], in which 0 is the index of field 1->c, 4 is the index of field 6->b, 1 is the index of - * field 3->a in table fields. We need to create data projection from [[0], [4], [1]] as - * follows: - * - *

- * - *

The we can create table projection as follows: [[0], [-1], [2]], in which 0, -1 and 2 are - * the index of fields [1->c, 6->b, 3->a] in data fields. When we project column from underlying - * data, we need to specify the field index and name. It is difficult to assign a proper field - * id and name for 6->b in data projection and add it to data fields, and we can't use 6->b - * directly because the field index of b in underlying is 2. We can remove the -1 field index in - * data projection, then the result data projection is: [[0], [2]]. - * - *

We create {@link InternalRow} for 1->a, 3->c after projecting them from underlying data, - * then create {@link ProjectedRow} with a index mapping and return null for 6->b in table - * fields. - * - * @param tableFields the fields of table - * @param dataFields the fields of underlying data - * @param tableProjection the projection of table - * @return the projection of data - */ - public static int[][] createDataProjection( - List tableFields, List dataFields, int[][] tableProjection) { - List dataFieldIdList = - dataFields.stream().map(DataField::id).collect(Collectors.toList()); - return Arrays.stream(tableProjection) - .map(p -> Arrays.copyOf(p, p.length)) - .peek( - p -> { - int fieldId = tableFields.get(p[0]).id(); - p[0] = dataFieldIdList.indexOf(fieldId); - }) - .filter(p -> p[0] >= 0) - .toArray(int[][]::new); - } - /** * When pushing down filters after schema evolution, we should devolve the literals from new * types (in dataFields) to original types (in tableFields). We will visit all predicate in diff --git a/paimon-core/src/test/java/org/apache/paimon/schema/SchemaEvolutionUtilTest.java b/paimon-core/src/test/java/org/apache/paimon/schema/SchemaEvolutionUtilTest.java index 30d844e6c606..a67285c135d6 100644 --- a/paimon-core/src/test/java/org/apache/paimon/schema/SchemaEvolutionUtilTest.java +++ b/paimon-core/src/test/java/org/apache/paimon/schema/SchemaEvolutionUtilTest.java @@ -32,7 +32,6 @@ import org.apache.paimon.types.FloatType; import org.apache.paimon.types.IntType; import org.apache.paimon.utils.ProjectedRow; -import org.apache.paimon.utils.Projection; import org.junit.jupiter.api.Test; @@ -236,31 +235,6 @@ public void testCreateIndexMappingWithKeyValueFields() { assertThat(table2Field7Value).isEqualTo(6.0D); } - @Test - public void testCreateDataProjection() { - int[][] table1Projection = - new int[][] {new int[] {2}, new int[] {0}}; // project 5->d and 1->c in tableField1 - int[][] table2Projection = - new int[][] { - new int[] {4}, new int[] {2}, new int[] {0} - }; // project 8->b, 5->f and 1->c in tableField2 - - int[][] table1DataProjection = - SchemaEvolutionUtil.createDataProjection( - tableFields1, dataFields, table1Projection); - assertThat(Projection.of(table1DataProjection).toTopLevelIndexes()).containsExactly(1); - - int[][] table2DataProjection = - SchemaEvolutionUtil.createDataProjection( - tableFields2, dataFields, table2Projection); - assertThat(Projection.of(table2DataProjection).toTopLevelIndexes()).containsExactly(1); - - int[][] table2Table1Projection = - SchemaEvolutionUtil.createDataProjection( - tableFields2, tableFields1, table2Projection); - assertThat(Projection.of(table2Table1Projection).toTopLevelIndexes()).containsExactly(2, 0); - } - @Test public void testDevolveDataFilters() { List predicates = new ArrayList<>(); From 4409190201bd529dc5eab3739829932fa8f0d018 Mon Sep 17 00:00:00 2001 From: yuzelin Date: Thu, 19 Dec 2024 11:50:47 +0800 Subject: [PATCH 2/2] fix --- .../paimon/schema/SchemaEvolutionUtil.java | 104 ----------- .../schema/SchemaEvolutionUtilTest.java | 162 +----------------- 2 files changed, 1 insertion(+), 265 deletions(-) diff --git a/paimon-core/src/main/java/org/apache/paimon/schema/SchemaEvolutionUtil.java b/paimon-core/src/main/java/org/apache/paimon/schema/SchemaEvolutionUtil.java index dfed60392bac..d30fe19abbac 100644 --- a/paimon-core/src/main/java/org/apache/paimon/schema/SchemaEvolutionUtil.java +++ b/paimon-core/src/main/java/org/apache/paimon/schema/SchemaEvolutionUtil.java @@ -18,7 +18,6 @@ package org.apache.paimon.schema; -import org.apache.paimon.KeyValue; import org.apache.paimon.casting.CastElementGetter; import org.apache.paimon.casting.CastExecutor; import org.apache.paimon.casting.CastExecutors; @@ -102,50 +101,6 @@ public static int[] createIndexMapping( return null; } - /** - * Create index mapping from table projection to underlying data projection. For example, the - * table and data fields are as follows - * - *

- * - *

The table and data top projections are as follows - * - *

- * - *

We can first get fields list for table and data projections from their fields as follows - * - *

- * - *

Then create index mapping based on the fields list and create cast mapping based on index - * mapping. - * - *

/// TODO should support nest index mapping when nest schema evolution is supported. - * - * @param tableProjection the table projection - * @param tableFields the fields in table - * @param dataProjection the underlying data projection - * @param dataFields the fields in underlying data - * @return the index mapping - */ - public static IndexCastMapping createIndexCastMapping( - int[] tableProjection, - List tableFields, - int[] dataProjection, - List dataFields) { - return createIndexCastMapping( - projectDataFields(tableProjection, tableFields), - projectDataFields(dataProjection, dataFields)); - } - /** Create index mapping from table fields to underlying data fields. */ public static IndexCastMapping createIndexCastMapping( List tableFields, List dataFields) { @@ -167,63 +122,6 @@ public CastFieldGetter[] getCastMapping() { }; } - private static List projectDataFields(int[] projection, List dataFields) { - List projectFields = new ArrayList<>(projection.length); - for (int index : projection) { - projectFields.add(dataFields.get(index)); - } - - return projectFields; - } - - /** - * Create index mapping from table projection to data with key and value fields. We should first - * create table and data fields with their key/value fields, then create index mapping with - * their projections and fields. For example, the table and data projections and fields are as - * follows - * - *

- * - *

First we will get max key id from table and data fields which is 6, then create table and - * data fields on it - * - *

- * - *

Finally we can create index mapping with table/data projections and fields, and create - * cast mapping based on index mapping. - * - *

/// TODO should support nest index mapping when nest schema evolution is supported. - * - * @param tableProjection the table projection - * @param tableKeyFields the table key fields - * @param tableValueFields the table value fields - * @param dataProjection the data projection - * @param dataKeyFields the data key fields - * @param dataValueFields the data value fields - * @return the result index and cast mapping - */ - public static IndexCastMapping createIndexCastMapping( - int[] tableProjection, - List tableKeyFields, - List tableValueFields, - int[] dataProjection, - List dataKeyFields, - List dataValueFields) { - List tableFields = - KeyValue.createKeyValueFields(tableKeyFields, tableValueFields); - List dataFields = KeyValue.createKeyValueFields(dataKeyFields, dataValueFields); - return createIndexCastMapping(tableProjection, tableFields, dataProjection, dataFields); - } - /** * When pushing down filters after schema evolution, we should devolve the literals from new * types (in dataFields) to original types (in tableFields). We will visit all predicate in @@ -303,8 +201,6 @@ private static int indexOf(DataField dataField, LinkedHashMapa BIGINT) in table fields through index mapping [0, -1, 1], then compare the data * type and create getter and casting mapping. * - *

/// TODO should support nest index mapping when nest schema evolution is supported. - * * @param tableFields the fields of table * @param dataFields the fields of underlying data * @param indexMapping the index mapping from table fields to data fields diff --git a/paimon-core/src/test/java/org/apache/paimon/schema/SchemaEvolutionUtilTest.java b/paimon-core/src/test/java/org/apache/paimon/schema/SchemaEvolutionUtilTest.java index a67285c135d6..291ad0ef4fc0 100644 --- a/paimon-core/src/test/java/org/apache/paimon/schema/SchemaEvolutionUtilTest.java +++ b/paimon-core/src/test/java/org/apache/paimon/schema/SchemaEvolutionUtilTest.java @@ -18,8 +18,6 @@ package org.apache.paimon.schema; -import org.apache.paimon.data.GenericRow; -import org.apache.paimon.data.InternalRow; import org.apache.paimon.predicate.IsNotNull; import org.apache.paimon.predicate.IsNull; import org.apache.paimon.predicate.LeafPredicate; @@ -31,7 +29,6 @@ import org.apache.paimon.types.DoubleType; import org.apache.paimon.types.FloatType; import org.apache.paimon.types.IntType; -import org.apache.paimon.utils.ProjectedRow; import org.junit.jupiter.api.Test; @@ -44,11 +41,7 @@ /** Tests for {@link SchemaEvolutionUtil}. */ public class SchemaEvolutionUtilTest { - private final List keyFields = - Arrays.asList( - new DataField(0, "key_1", new IntType()), - new DataField(1, "key_2", new IntType()), - new DataField(2, "key_3", new IntType())); + private final List dataFields = Arrays.asList( new DataField(0, "a", new IntType()), @@ -82,159 +75,6 @@ public void testCreateIndexMapping() { assertThat(indexMapping[3]).isLessThan(0); } - @Test - public void testCreateIndexMappingWithFields() { - int[] dataProjection = new int[] {1}; // project (1, b, int) - int[] table1Projection = new int[] {2, 0}; // project (5->d, int), (1, c, bigint) - int[] table2Projection = - new int[] {4, 2, 0}; // project (8, b, int), (5, f, bigint), (1, c, double) - - InternalRow dataValue = GenericRow.of(1234); - IndexCastMapping table1DataIndexMapping = - SchemaEvolutionUtil.createIndexCastMapping( - table1Projection, tableFields1, dataProjection, dataFields); - assertThat(table1DataIndexMapping.getIndexMapping()).containsExactly(-1, 0); - - // Get (null, 1234L) from data value - assertThat(table1DataIndexMapping.getCastMapping().length).isEqualTo(2); - ProjectedRow projectedDataRow1 = - ProjectedRow.from(table1DataIndexMapping.getIndexMapping()); - projectedDataRow1.replaceRow(dataValue); - Object table1Field1Value = - table1DataIndexMapping.getCastMapping()[0].getFieldOrNull(projectedDataRow1); - long table1Field2Value = - table1DataIndexMapping.getCastMapping()[1].getFieldOrNull(projectedDataRow1); - assertThat(table1Field1Value).isNull(); - assertThat(table1Field2Value).isEqualTo(1234L); - - IndexCastMapping table2DataIndexMapping = - SchemaEvolutionUtil.createIndexCastMapping( - table2Projection, tableFields2, dataProjection, dataFields); - assertThat(table2DataIndexMapping.getIndexMapping()).containsExactly(-1, -1, 0); - - // Get (null, null, 1234.0D) from data value - assertThat(table2DataIndexMapping.getCastMapping().length).isEqualTo(3); - ProjectedRow projectedDataRow2 = - ProjectedRow.from(table2DataIndexMapping.getIndexMapping()); - projectedDataRow2.replaceRow(dataValue); - Object table2Field1Value = - table2DataIndexMapping.getCastMapping()[0].getFieldOrNull(projectedDataRow2); - Object table2Field2Value = - table2DataIndexMapping.getCastMapping()[1].getFieldOrNull(projectedDataRow2); - Object table2Field3Value = - table2DataIndexMapping.getCastMapping()[2].getFieldOrNull(projectedDataRow2); - assertThat(table2Field1Value).isNull(); - assertThat(table2Field2Value).isNull(); - assertThat(table2Field3Value).isEqualTo(1234D); - - IndexCastMapping table2Table1IndexMapping = - SchemaEvolutionUtil.createIndexCastMapping( - table2Projection, tableFields2, table1Projection, tableFields1); - assertThat(table2Table1IndexMapping.getIndexMapping()).containsExactly(-1, 0, 1); - - InternalRow table1Data = GenericRow.of(123, 321L); - ProjectedRow projectedDataRow3 = - ProjectedRow.from(table2Table1IndexMapping.getIndexMapping()); - projectedDataRow3.replaceRow(table1Data); - // Get (null, 123L, 321.0D) from table1 data - assertThat(table2Table1IndexMapping.getCastMapping().length).isEqualTo(3); - Object table2Table1Field1Value = - table2Table1IndexMapping.getCastMapping()[0].getFieldOrNull(projectedDataRow3); - long table2Table1Field2Value = - table2Table1IndexMapping.getCastMapping()[1].getFieldOrNull(projectedDataRow3); - double table2Table1Field3Value = - table2Table1IndexMapping.getCastMapping()[2].getFieldOrNull(projectedDataRow3); - assertThat(table2Table1Field1Value).isNull(); - assertThat(table2Table1Field2Value).isEqualTo(123L); - assertThat(table2Table1Field3Value).isEqualTo(321.0D); - } - - @Test - public void testCreateIndexMappingWithKeyValueFields() { - int[] dataProjection = - new int[] {0, 2, 3, 4, 6}; // project "key_1", "key3", "seq", "kind", "b" - int[] table1Projection = - new int[] {0, 2, 3, 4, 7, 5}; // project "key_1", "key3", "seq", "kind", "d", "c" - int[] table2Projection = - new int[] { - 0, 2, 3, 4, 9, 7, 5 - }; // project "key_1", "key3", "seq", "kind", "b", "f", "c" - - IndexCastMapping table1DataIndexMapping = - SchemaEvolutionUtil.createIndexCastMapping( - table1Projection, - keyFields, - tableFields1, - dataProjection, - keyFields, - dataFields); - assertThat(table1DataIndexMapping.getIndexMapping()).containsExactly(0, 1, 2, 3, -1, 4); - - // Get (1, 2, 3, (byte) 4, null, 5L) from data value - InternalRow dataValue = GenericRow.of(1, 2, 3L, (byte) 4, 5); - ProjectedRow projectedDataValue = - ProjectedRow.from(table1DataIndexMapping.getIndexMapping()); - projectedDataValue.replaceRow(dataValue); - assertThat(table1DataIndexMapping.getCastMapping().length).isEqualTo(6); - int table1Field1Value = - table1DataIndexMapping.getCastMapping()[0].getFieldOrNull(projectedDataValue); - int table1Field2Value = - table1DataIndexMapping.getCastMapping()[1].getFieldOrNull(projectedDataValue); - long table1Field3Value = - table1DataIndexMapping.getCastMapping()[2].getFieldOrNull(projectedDataValue); - byte table1Field4Value = - table1DataIndexMapping.getCastMapping()[3].getFieldOrNull(projectedDataValue); - Object table1Field5Value = - table1DataIndexMapping.getCastMapping()[4].getFieldOrNull(projectedDataValue); - long table1Field6Value = - table1DataIndexMapping.getCastMapping()[5].getFieldOrNull(projectedDataValue); - assertThat(table1Field1Value).isEqualTo(1); - assertThat(table1Field2Value).isEqualTo(2); - assertThat(table1Field3Value).isEqualTo(3L); - assertThat(table1Field4Value).isEqualTo((byte) 4); - assertThat(table1Field5Value).isNull(); - assertThat(table1Field6Value).isEqualTo(5L); - - IndexCastMapping table2Table1IndexMapping = - SchemaEvolutionUtil.createIndexCastMapping( - table2Projection, - keyFields, - tableFields2, - table1Projection, - keyFields, - tableFields1); - assertThat(table2Table1IndexMapping.getIndexMapping()) - .containsExactly(0, 1, 2, 3, -1, 4, 5); - - // Get (1, 2, 3, (byte) 4, null, 5L, 6.0D) from data value - InternalRow table1Value = GenericRow.of(1, 2, 3L, (byte) 4, 5, 6L); - ProjectedRow projectedTableValue = - ProjectedRow.from(table2Table1IndexMapping.getIndexMapping()); - projectedTableValue.replaceRow(table1Value); - assertThat(table2Table1IndexMapping.getCastMapping().length).isEqualTo(7); - int table2Field1Value = - table2Table1IndexMapping.getCastMapping()[0].getFieldOrNull(projectedTableValue); - int table2Field2Value = - table2Table1IndexMapping.getCastMapping()[1].getFieldOrNull(projectedTableValue); - long table2Field3Value = - table2Table1IndexMapping.getCastMapping()[2].getFieldOrNull(projectedTableValue); - byte table2Field4Value = - table2Table1IndexMapping.getCastMapping()[3].getFieldOrNull(projectedTableValue); - Object table2Field5Value = - table2Table1IndexMapping.getCastMapping()[4].getFieldOrNull(projectedTableValue); - long table2Field6Value = - table2Table1IndexMapping.getCastMapping()[5].getFieldOrNull(projectedTableValue); - double table2Field7Value = - table2Table1IndexMapping.getCastMapping()[6].getFieldOrNull(projectedTableValue); - assertThat(table2Field1Value).isEqualTo(1); - assertThat(table2Field2Value).isEqualTo(2); - assertThat(table2Field3Value).isEqualTo(3L); - assertThat(table2Field4Value).isEqualTo((byte) 4); - assertThat(table2Field5Value).isNull(); - assertThat(table2Field6Value).isEqualTo(5L); - assertThat(table2Field7Value).isEqualTo(6.0D); - } - @Test public void testDevolveDataFilters() { List predicates = new ArrayList<>();