Skip to content

Commit

Permalink
Spark 3.5: Mandate identifier fields when create_changelog_view for t…
Browse files Browse the repository at this point in the history
…able contain unsortable columns (apache#11045)
  • Loading branch information
dramaticlly authored Sep 5, 2024
1 parent f508a7e commit f7c6d57
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -436,4 +436,18 @@ public void testNetChangesWithComputeUpdates() {
.isInstanceOf(IllegalArgumentException.class)
.hasMessageContaining("Not support net changes with update images");
}

@TestTemplate
public void testUpdateWithInComparableType() {
sql(
"CREATE TABLE %s (id INT NOT NULL, data MAP<STRING,STRING>, age INT) USING iceberg",
tableName);

assertThatThrownBy(
() ->
sql("CALL %s.system.create_changelog_view(table => '%s')", catalogName, tableName))
.isInstanceOf(IllegalArgumentException.class)
.hasMessageContaining(
"Identifier field is required as table contains unorderable columns: [data]");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import java.util.Map;
import java.util.Set;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import org.apache.iceberg.MetadataColumns;
import org.apache.iceberg.Table;
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
Expand All @@ -37,6 +38,7 @@
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.catalyst.InternalRow;
import org.apache.spark.sql.catalyst.expressions.OrderUtils;
import org.apache.spark.sql.connector.catalog.Identifier;
import org.apache.spark.sql.connector.catalog.TableCatalog;
import org.apache.spark.sql.connector.iceberg.catalog.ProcedureParameter;
Expand Down Expand Up @@ -146,10 +148,21 @@ public InternalRow[] call(InternalRow args) {
Dataset<Row> df = loadRows(changelogTableIdent, options(input));

boolean netChanges = input.asBoolean(NET_CHANGES, false);
String[] identifierColumns = identifierColumns(input, tableIdent);
Set<String> unorderableColumnNames =
Arrays.stream(df.schema().fields())
.filter(field -> !OrderUtils.isOrderable(field.dataType()))
.map(StructField::name)
.collect(Collectors.toSet());

Preconditions.checkArgument(
identifierColumns.length > 0 || unorderableColumnNames.isEmpty(),
"Identifier field is required as table contains unorderable columns: %s",
unorderableColumnNames);

if (shouldComputeUpdateImages(input)) {
Preconditions.checkArgument(!netChanges, "Not support net changes with update images");
df = computeUpdateImages(identifierColumns(input, tableIdent), df);
df = computeUpdateImages(identifierColumns, df);
} else {
df = removeCarryoverRows(df, netChanges);
}
Expand Down

0 comments on commit f7c6d57

Please sign in to comment.