Skip to content

Commit

Permalink
[AMORO-2864] Filter the null field when estimate size of records (apa…
Browse files Browse the repository at this point in the history
…che#2865)

* Throw NPE if the number of last fields is less than the current number of fields

* checkstyle
  • Loading branch information
XBaith authored May 27, 2024
1 parent 6285291 commit a71710f
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@
import org.apache.iceberg.util.StructLikeWrapper;
import org.apache.lucene.util.RamUsageEstimator;

import java.util.Arrays;
import java.util.Objects;

/** Size Estimator for StructLikeWrapper record payload. */
public class StructLikeWrapperSizeEstimator implements SizeEstimator<StructLikeWrapper> {
@Override
Expand All @@ -34,15 +37,21 @@ public long sizeEstimate(StructLikeWrapper structLikeWrapper) {
}

private long sizeOf(Object[] objects) {
long size = 0;
for (Object object : objects) {
if (object.getClass().isArray()) {
size += sizeOf((Object[]) object);
} else {
size += RamUsageEstimator.sizeOfObject(object, 0);
}
if (objects == null) {
return 0;
}
return size;

return Arrays.stream(objects)
.filter(Objects::nonNull)
.mapToLong(
object -> {
if (object.getClass().isArray()) {
return sizeOf((Object[]) object);
} else {
return RamUsageEstimator.sizeOfObject(object, 0);
}
})
.sum();
}

private Object[] structLikeObjects(StructLike structLike) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,11 @@
import org.apache.amoro.BasicTableTestHelper;
import org.apache.amoro.data.ChangedLsn;
import org.apache.amoro.io.MixedDataTestHelpers;
import org.apache.iceberg.Schema;
import org.apache.iceberg.StructLike;
import org.apache.iceberg.data.Record;
import org.apache.iceberg.relocated.com.google.common.collect.Maps;
import org.apache.iceberg.types.Types;
import org.apache.iceberg.util.StructLikeWrapper;
import org.apache.lucene.util.RamUsageEstimator;
import org.junit.Assert;
Expand Down Expand Up @@ -53,4 +55,29 @@ public void testSizeEstimator() {
long estimateSize = new StructLikeWrapperSizeEstimator().sizeEstimate(wrapper);
Assert.assertEquals(1, record2Size / estimateSize);
}

@Test
public void testSizeEstimatorWithNullField() {
final Schema schema =
new Schema(
Types.NestedField.required(1, "id", Types.IntegerType.get()),
Types.NestedField.required(2, "name", Types.StringType.get()),
Types.NestedField.optional(3, "ts", Types.LongType.get()));

Record record1 = MixedDataTestHelpers.createRecord(schema, 1, "name1", 0);
// Set the ts field to null
Record record2 = MixedDataTestHelpers.createRecord(schema, 1, "name1", null);
Map<StructLike, ChangedLsn> map = Maps.newHashMap();
ChangedLsn changedLsn = ChangedLsn.of(1, 2);
map.put(record1, changedLsn);
long oldSize = RamUsageEstimator.sizeOfObject(map, 0);
map.put(record2, changedLsn);
long newSize = RamUsageEstimator.sizeOfObject(map, 0);
long record2Size = newSize - oldSize;

StructLikeWrapper wrapper = StructLikeWrapper.forType(schema.asStruct()).set(record2);

long estimateSize = new StructLikeWrapperSizeEstimator().sizeEstimate(wrapper);
Assert.assertEquals(1, record2Size / estimateSize);
}
}

0 comments on commit a71710f

Please sign in to comment.