Skip to content

Commit

Permalink
[spark] Fix the build of read type in binlog table (#4689)
Browse files Browse the repository at this point in the history
  • Loading branch information
Zouxxyy authored Dec 12, 2024
1 parent c0f61e2 commit 0a07ebc
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -72,13 +72,8 @@ public RowType rowType() {
List<DataField> fields = new ArrayList<>();
fields.add(SpecialFields.ROW_KIND);
for (DataField field : wrapped.rowType().getFields()) {
DataField newField =
new DataField(
field.id(),
field.name(),
new ArrayType(field.type().nullable()), // convert to nullable
field.description());
fields.add(newField);
// convert to nullable
fields.add(field.newType(new ArrayType(field.type().nullable())));
}
return new RowType(fields);
}
Expand All @@ -99,6 +94,19 @@ private BinlogRead(InnerTableRead dataRead) {
super(dataRead);
}

@Override
public InnerTableRead withReadType(RowType readType) {
List<DataField> fields = new ArrayList<>();
for (DataField field : readType.getFields()) {
if (field.name().equals(SpecialFields.ROW_KIND.name())) {
fields.add(field);
} else {
fields.add(field.newType(((ArrayType) field.type()).getElementType()));
}
}
return super.withReadType(readType.copy(fields));
}

@Override
public RecordReader<InternalRow> createReader(Split split) throws IOException {
DataSplit dataSplit = (DataSplit) split;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,4 +81,20 @@ class PaimonSystemTableTest extends PaimonSparkTestBase {
spark.sql("select partition,bucket from `T$buckets`"),
Row("[2024-10-10, 01]", 0) :: Row("[2024-10-10, 01]", 1) :: Row("[2024-10-10, 01]", 2) :: Nil)
}

test("system table: binlog table") {
sql("""
|CREATE TABLE T (a INT, b INT)
|TBLPROPERTIES ('primary-key'='a', 'changelog-producer' = 'lookup', 'bucket' = '2')
|""".stripMargin)

sql("INSERT INTO T VALUES (1, 2)")
sql("INSERT INTO T VALUES (1, 3)")
sql("INSERT INTO T VALUES (2, 2)")

checkAnswer(
sql("SELECT * FROM `T$binlog`"),
Seq(Row("+I", Array(1), Array(3)), Row("+I", Array(2), Array(2)))
)
}
}

0 comments on commit 0a07ebc

Please sign in to comment.