Skip to content

Commit

Permalink
[core] fix parquet can not read row with last column is array. (#4755)
Browse files Browse the repository at this point in the history
  • Loading branch information
Stephen0421 authored Dec 23, 2024
1 parent bbcece8 commit 4302002
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 32 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -50,16 +50,11 @@ public static RowPosition calculateRowOffsets(
int nullValuesCount = 0;
BooleanArrayList nullRowFlags = new BooleanArrayList(0);
for (int i = 0; i < fieldDefinitionLevels.length; i++) {
// TODO: this is not correct ?
// if (fieldRepetitionLevels[i] > rowRepetitionLevel) {
// throw new IllegalStateException(
// format(
// "In parquet's row type field repetition level should
// not larger than row's repetition level. "
// + "Row repetition level is %s, row field
// repetition level is %s.",
// rowRepetitionLevel, fieldRepetitionLevels[i]));
// }
// If a row's last field is an array, the repetition levels for the array's items will
// be larger than the parent row's repetition level, so we need to skip those values.
if (fieldRepetitionLevels[i] > rowRepetitionLevel) {
continue;
}

if (fieldDefinitionLevels[i] >= rowDefinitionLevel) {
// current row is defined and not empty
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,7 @@ public class ParquetReadWriteTest {
new VarCharType(VarCharType.MAX_LENGTH))),
new ArrayType(true, RowType.builder().field("a", new IntType()).build()),
RowType.of(
new IntType(),
new ArrayType(
true,
RowType.builder()
Expand All @@ -174,8 +175,7 @@ public class ParquetReadWriteTest {
true,
new ArrayType(true, new IntType())))
.field("c", new IntType())
.build()),
new IntType()),
.build())),
RowType.of(
new ArrayType(RowType.of(new VarCharType(255))),
RowType.of(new IntType()),
Expand Down Expand Up @@ -808,6 +808,7 @@ null, new GenericMap(mp1), new GenericMap(mp2)
new GenericArray(
new GenericRow[] {GenericRow.of(i), GenericRow.of(i + 1)}),
GenericRow.of(
i,
new GenericArray(
new GenericRow[] {
GenericRow.of(
Expand All @@ -826,8 +827,7 @@ null, new GenericMap(mp1), new GenericMap(mp2)
null
}),
i)
}),
i),
})),
null));
}
return rows;
Expand Down Expand Up @@ -881,15 +881,15 @@ private Path createNestedDataByOriginWriter(int rowNum, File tmpDir, int rowGrou
row2.add(0, i + 1);
f4.addGroup(0);

// add ROW<`f0` ARRAY<ROW<`b` ARRAY<ARRAY<INT>>, `c` INT>>, `f1` INT>>
// add ROW<`f0` INT , `f1` INTARRAY<ROW<`b` ARRAY<ARRAY<INT>>, `c` INT>>>>
Group f5 = row.addGroup("f5");
Group arrayRow = f5.addGroup(0);
f5.add(0, i);
Group arrayRow = f5.addGroup(1);
Group insideRow = arrayRow.addGroup(0).addGroup(0);
Group insideArray = insideRow.addGroup(0);
createParquetDoubleNestedArray(insideArray, i);
insideRow.add(1, i);
arrayRow.addGroup(0);
f5.add(1, i);
writer.write(row);
}
} catch (Exception e) {
Expand Down Expand Up @@ -982,43 +982,43 @@ private void compareNestedRow(List<InternalRow> rows, List<InternalRow> results)
origin.getArray(4).getRow(1, 1).getInt(0),
result.getArray(4).getRow(1, 1).getInt(0));

Assertions.assertEquals(origin.getRow(5, 2).getInt(0), result.getRow(5, 2).getInt(0));
Assertions.assertEquals(
origin.getRow(5, 2).getArray(0).getRow(0, 2).getArray(0).getArray(0).getInt(0),
result.getRow(5, 2).getArray(0).getRow(0, 2).getArray(0).getArray(0).getInt(0));
origin.getRow(5, 2).getArray(1).getRow(0, 2).getArray(0).getArray(0).getInt(0),
result.getRow(5, 2).getArray(1).getRow(0, 2).getArray(0).getArray(0).getInt(0));
Assertions.assertEquals(
origin.getRow(5, 2).getArray(0).getRow(0, 2).getArray(0).getArray(0).getInt(1),
result.getRow(5, 2).getArray(0).getRow(0, 2).getArray(0).getArray(0).getInt(1));
origin.getRow(5, 2).getArray(1).getRow(0, 2).getArray(0).getArray(0).getInt(1),
result.getRow(5, 2).getArray(1).getRow(0, 2).getArray(0).getArray(0).getInt(1));
Assertions.assertTrue(
result.getRow(5, 2)
.getArray(0)
.getArray(1)
.getRow(0, 2)
.getArray(0)
.getArray(0)
.isNullAt(2));

Assertions.assertEquals(
origin.getRow(5, 2).getArray(0).getRow(0, 2).getArray(0).getArray(1).getInt(0),
result.getRow(5, 2).getArray(0).getRow(0, 2).getArray(0).getArray(1).getInt(0));
origin.getRow(5, 2).getArray(1).getRow(0, 2).getArray(0).getArray(1).getInt(0),
result.getRow(5, 2).getArray(1).getRow(0, 2).getArray(0).getArray(1).getInt(0));
Assertions.assertEquals(
origin.getRow(5, 2).getArray(0).getRow(0, 2).getArray(0).getArray(1).getInt(1),
result.getRow(5, 2).getArray(0).getRow(0, 2).getArray(0).getArray(1).getInt(1));
origin.getRow(5, 2).getArray(1).getRow(0, 2).getArray(0).getArray(1).getInt(1),
result.getRow(5, 2).getArray(1).getRow(0, 2).getArray(0).getArray(1).getInt(1));
Assertions.assertTrue(
result.getRow(5, 2)
.getArray(0)
.getArray(1)
.getRow(0, 2)
.getArray(0)
.getArray(1)
.isNullAt(2));

Assertions.assertEquals(
0, result.getRow(5, 2).getArray(0).getRow(0, 2).getArray(0).getArray(2).size());
0, result.getRow(5, 2).getArray(1).getRow(0, 2).getArray(0).getArray(2).size());
Assertions.assertTrue(
result.getRow(5, 2).getArray(0).getRow(0, 2).getArray(0).isNullAt(3));
result.getRow(5, 2).getArray(1).getRow(0, 2).getArray(0).isNullAt(3));

Assertions.assertEquals(
origin.getRow(5, 2).getArray(0).getRow(0, 2).getInt(1),
result.getRow(5, 2).getArray(0).getRow(0, 2).getInt(1));
Assertions.assertEquals(origin.getRow(5, 2).getInt(1), result.getRow(5, 2).getInt(1));
origin.getRow(5, 2).getArray(1).getRow(0, 2).getInt(1),
result.getRow(5, 2).getArray(1).getRow(0, 2).getInt(1));
Assertions.assertTrue(result.isNullAt(6));
Assertions.assertTrue(result.getRow(6, 2).isNullAt(0));
Assertions.assertTrue(result.getRow(6, 2).isNullAt(1));
Expand Down

0 comments on commit 4302002

Please sign in to comment.