From 683fa199e270459b435b58a2a22a4d8d5c6120ca Mon Sep 17 00:00:00 2001 From: zyz33 <35164637+zhangyazhe@users.noreply.github.com> Date: Mon, 16 Dec 2024 21:18:58 +0800 Subject: [PATCH] [orc] Optimize configuration creating in orc file format (#4716) --- .../paimon/format/orc/OrcFileFormat.java | 4 +- .../paimon/format/orc/OrcFileFormatTest.java | 64 +++++++++++++++++++ 2 files changed, 66 insertions(+), 2 deletions(-) diff --git a/paimon-format/src/main/java/org/apache/paimon/format/orc/OrcFileFormat.java b/paimon-format/src/main/java/org/apache/paimon/format/orc/OrcFileFormat.java index c3521c6f1a37..9acea56ab393 100644 --- a/paimon-format/src/main/java/org/apache/paimon/format/orc/OrcFileFormat.java +++ b/paimon-format/src/main/java/org/apache/paimon/format/orc/OrcFileFormat.java @@ -74,9 +74,9 @@ public class OrcFileFormat extends FileFormat { public OrcFileFormat(FormatContext formatContext) { super(IDENTIFIER); this.orcProperties = getOrcProperties(formatContext.options(), formatContext); - this.readerConf = new org.apache.hadoop.conf.Configuration(); + this.readerConf = new org.apache.hadoop.conf.Configuration(false); this.orcProperties.forEach((k, v) -> readerConf.set(k.toString(), v.toString())); - this.writerConf = new org.apache.hadoop.conf.Configuration(); + this.writerConf = new org.apache.hadoop.conf.Configuration(false); this.orcProperties.forEach((k, v) -> writerConf.set(k.toString(), v.toString())); this.readBatchSize = formatContext.readBatchSize(); this.writeBatchSize = formatContext.writeBatchSize(); diff --git a/paimon-format/src/test/java/org/apache/paimon/format/orc/OrcFileFormatTest.java b/paimon-format/src/test/java/org/apache/paimon/format/orc/OrcFileFormatTest.java index 46bf6afe6613..9e5769595c32 100644 --- a/paimon-format/src/test/java/org/apache/paimon/format/orc/OrcFileFormatTest.java +++ b/paimon-format/src/test/java/org/apache/paimon/format/orc/OrcFileFormatTest.java @@ -83,4 +83,68 @@ public void testSupportedDataTypes() { dataFields.add(new DataField(index++, "decimal_type", DataTypes.DECIMAL(10, 3))); orc.validateDataFields(new RowType(dataFields)); } + + @Test + public void testCreateCost() { + double createConfCost = createConfigCost(); + for (int i = 0; i < 1000; i++) { + create(); + } + int times = 10_000; + long start = System.nanoTime(); + for (int i = 0; i < times; i++) { + create(); + } + double cost = ((double) (System.nanoTime() - start)) / 1000_000 / times; + assertThat(cost * 500 < createConfCost).isTrue(); + } + + @Test + public void testCreateCostWithRandomConfig() { + double createConfCost = createConfigCost(); + for (int i = 0; i < 1000; i++) { + createRandomConfig(); + } + int times = 10_000; + long start = System.nanoTime(); + for (int i = 0; i < times; i++) { + createRandomConfig(); + } + double cost = ((double) (System.nanoTime() - start)) / 1000_000 / times; + assertThat(cost * 10 < createConfCost).isTrue(); + } + + private double createConfigCost() { + for (int i = 0; i < 1000; i++) { + createConfig(); + } + int times = 10_000; + long start = System.nanoTime(); + for (int i = 0; i < times; i++) { + createConfig(); + } + return ((double) (System.nanoTime() - start)) / 1000_000 / times; + } + + private void createConfig() { + org.apache.hadoop.conf.Configuration conf = new org.apache.hadoop.conf.Configuration(); + conf.set("a", "a"); + } + + private void create() { + Options options = new Options(); + options.setString("haha", "1"); + options.setString("compress", "zlib"); + OrcFileFormat orcFileFormat = + new OrcFileFormatFactory().create(new FormatContext(options, 1024, 1024)); + } + + private void createRandomConfig() { + Options options = new Options(); + options.setString("haha", "1"); + options.setString("compress", "zlib"); + options.setString("a", Math.random() + ""); + OrcFileFormat orcFileFormat = + new OrcFileFormatFactory().create(new FormatContext(options, 1024, 1024)); + } }