Skip to content

Commit

Permalink
[format] remove the code that uses reflection to obtain the estimated…
Browse files Browse the repository at this point in the history
… memory size of the orc writer.
  • Loading branch information
liming30 committed Nov 7, 2023
1 parent 4db2e9a commit efeb8f5
Showing 1 changed file with 1 addition and 25 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,8 @@

import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.orc.Writer;
import org.apache.orc.impl.writer.TreeWriter;

import java.io.IOException;
import java.lang.reflect.Field;
import java.security.AccessController;
import java.security.PrivilegedAction;

import static org.apache.paimon.utils.Preconditions.checkNotNull;

Expand All @@ -41,7 +37,6 @@ public class OrcBulkWriter implements FormatWriter {
private final Vectorizer<InternalRow> vectorizer;
private final VectorizedRowBatch rowBatch;
private final PositionOutputStream underlyingStream;
private final TreeWriter treeWriter;

public OrcBulkWriter(
Vectorizer<InternalRow> vectorizer,
Expand All @@ -56,8 +51,6 @@ public OrcBulkWriter(
// metadata on the fly through the Vectorizer#vectorize(...) method.
this.vectorizer.setWriter(this.writer);
this.underlyingStream = underlyingStream;
// TODO: Turn to access these hidden field directly after upgrade to ORC 1.7.4
this.treeWriter = getHiddenFieldInORC("treeWriter");
}

@Override
Expand Down Expand Up @@ -89,30 +82,13 @@ public boolean reachTargetSize(boolean suggestedCheck, long targetSize) throws I
}

private long length() throws IOException {
long estimateMemory = treeWriter.estimateMemory();
long estimateMemory = writer.estimateMemory();
long fileLength = underlyingStream.getPos();

// This value is estimated, not actual.
return (long) Math.ceil(fileLength + estimateMemory * 0.2);
}

@SuppressWarnings("unchecked")
private <T> T getHiddenFieldInORC(String fieldName) {
try {
Field treeWriterField = writer.getClass().getDeclaredField(fieldName);
AccessController.doPrivileged(
(PrivilegedAction<Void>)
() -> {
treeWriterField.setAccessible(true);
return null;
});
return (T) treeWriterField.get(writer);
} catch (Exception e) {
throw new RuntimeException(
"Cannot get " + fieldName + " from " + writer.getClass().getName(), e);
}
}

@VisibleForTesting
VectorizedRowBatch getRowBatch() {
return rowBatch;
Expand Down

0 comments on commit efeb8f5

Please sign in to comment.