Skip to content

Commit

Permalink
[orc] Optimize configuration creating in orc file format (apache#4716)
Browse files Browse the repository at this point in the history
  • Loading branch information
zhangyazhe authored Dec 16, 2024
1 parent c9eafb6 commit 683fa19
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,9 @@ public class OrcFileFormat extends FileFormat {
public OrcFileFormat(FormatContext formatContext) {
super(IDENTIFIER);
this.orcProperties = getOrcProperties(formatContext.options(), formatContext);
this.readerConf = new org.apache.hadoop.conf.Configuration();
this.readerConf = new org.apache.hadoop.conf.Configuration(false);
this.orcProperties.forEach((k, v) -> readerConf.set(k.toString(), v.toString()));
this.writerConf = new org.apache.hadoop.conf.Configuration();
this.writerConf = new org.apache.hadoop.conf.Configuration(false);
this.orcProperties.forEach((k, v) -> writerConf.set(k.toString(), v.toString()));
this.readBatchSize = formatContext.readBatchSize();
this.writeBatchSize = formatContext.writeBatchSize();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,4 +83,68 @@ public void testSupportedDataTypes() {
dataFields.add(new DataField(index++, "decimal_type", DataTypes.DECIMAL(10, 3)));
orc.validateDataFields(new RowType(dataFields));
}

@Test
public void testCreateCost() {
double createConfCost = createConfigCost();
for (int i = 0; i < 1000; i++) {
create();
}
int times = 10_000;
long start = System.nanoTime();
for (int i = 0; i < times; i++) {
create();
}
double cost = ((double) (System.nanoTime() - start)) / 1000_000 / times;
assertThat(cost * 500 < createConfCost).isTrue();
}

@Test
public void testCreateCostWithRandomConfig() {
double createConfCost = createConfigCost();
for (int i = 0; i < 1000; i++) {
createRandomConfig();
}
int times = 10_000;
long start = System.nanoTime();
for (int i = 0; i < times; i++) {
createRandomConfig();
}
double cost = ((double) (System.nanoTime() - start)) / 1000_000 / times;
assertThat(cost * 10 < createConfCost).isTrue();
}

private double createConfigCost() {
for (int i = 0; i < 1000; i++) {
createConfig();
}
int times = 10_000;
long start = System.nanoTime();
for (int i = 0; i < times; i++) {
createConfig();
}
return ((double) (System.nanoTime() - start)) / 1000_000 / times;
}

private void createConfig() {
org.apache.hadoop.conf.Configuration conf = new org.apache.hadoop.conf.Configuration();
conf.set("a", "a");
}

private void create() {
Options options = new Options();
options.setString("haha", "1");
options.setString("compress", "zlib");
OrcFileFormat orcFileFormat =
new OrcFileFormatFactory().create(new FormatContext(options, 1024, 1024));
}

private void createRandomConfig() {
Options options = new Options();
options.setString("haha", "1");
options.setString("compress", "zlib");
options.setString("a", Math.random() + "");
OrcFileFormat orcFileFormat =
new OrcFileFormatFactory().create(new FormatContext(options, 1024, 1024));
}
}

0 comments on commit 683fa19

Please sign in to comment.