diff --git a/LICENSE b/LICENSE
index dc4edaaf33421..1e2174731f161 100644
--- a/LICENSE
+++ b/LICENSE
@@ -206,6 +206,7 @@
This product includes code from Apache Hive.
* org.apache.hadoop.hive.ql.io.CombineHiveInputFormat copied to org.apache.hudi.hadoop.hive.HoodieCombineHiveInputFormat
+* org.apache.hadoop.hive.serde2.ColumnProjectionUtils copied and modified to org.apache.hudi.hadoop.HoodieColumnProjectionUtils
Copyright: 2011-2019 The Apache Software Foundation
Home page: http://hive.apache.org/
diff --git a/docker/demo/compaction.commands b/docker/demo/compaction.commands
index 9bb8eb82aa785..6abdad743e57c 100644
--- a/docker/demo/compaction.commands
+++ b/docker/demo/compaction.commands
@@ -19,4 +19,7 @@ connect --path /user/hive/warehouse/stock_ticks_mor
compactions show all
compaction schedule --hoodieConfigs hoodie.compact.inline.max.delta.commits=1
compaction run --parallelism 2 --sparkMemory 1G --schemaFilePath /var/demo/config/schema.avsc --retry 1
-
+connect --path /user/hive/warehouse/stock_ticks_mor_bs
+compactions show all
+compaction schedule --hoodieConfigs hoodie.compact.inline.max.delta.commits=1
+compaction run --parallelism 2 --sparkMemory 1G --schemaFilePath /var/demo/config/schema.avsc --retry 1
diff --git a/docker/demo/hive-batch1.commands b/docker/demo/hive-batch1.commands
index 93bf3b67930aa..021c6d55b800d 100644
--- a/docker/demo/hive-batch1.commands
+++ b/docker/demo/hive-batch1.commands
@@ -25,4 +25,12 @@ select symbol, ts, volume, open, close from stock_ticks_cow where symbol = 'GO
select symbol, ts, volume, open, close from stock_ticks_mor_ro where symbol = 'GOOG';
select symbol, ts, volume, open, close from stock_ticks_mor_rt where symbol = 'GOOG';
+select symbol, max(ts) from stock_ticks_cow_bs group by symbol HAVING symbol = 'GOOG';
+select symbol, max(ts) from stock_ticks_mor_bs_ro group by symbol HAVING symbol = 'GOOG';
+select symbol, max(ts) from stock_ticks_mor_bs_rt group by symbol HAVING symbol = 'GOOG';
+
+select symbol, ts, volume, open, close from stock_ticks_cow_bs where symbol = 'GOOG';
+select symbol, ts, volume, open, close from stock_ticks_mor_bs_ro where symbol = 'GOOG';
+select symbol, ts, volume, open, close from stock_ticks_mor_bs_rt where symbol = 'GOOG';
+
!quit
diff --git a/docker/demo/hive-batch2-after-compaction.commands b/docker/demo/hive-batch2-after-compaction.commands
index 6b087019d5cca..06582a309ae00 100644
--- a/docker/demo/hive-batch2-after-compaction.commands
+++ b/docker/demo/hive-batch2-after-compaction.commands
@@ -23,4 +23,10 @@ select symbol, max(ts) from stock_ticks_mor_rt group by symbol HAVING symbol = '
select symbol, ts, volume, open, close from stock_ticks_mor_ro where symbol = 'GOOG';
select symbol, ts, volume, open, close from stock_ticks_mor_rt where symbol = 'GOOG';
+select symbol, max(ts) from stock_ticks_mor_bs_ro group by symbol HAVING symbol = 'GOOG';
+select symbol, max(ts) from stock_ticks_mor_bs_rt group by symbol HAVING symbol = 'GOOG';
+
+select symbol, ts, volume, open, close from stock_ticks_mor_bs_ro where symbol = 'GOOG';
+select symbol, ts, volume, open, close from stock_ticks_mor_bs_rt where symbol = 'GOOG';
+
!quit
diff --git a/docker/demo/hive-incremental-cow.commands b/docker/demo/hive-incremental-cow.commands
index 7f43548071863..702b2afa52733 100644
--- a/docker/demo/hive-incremental-cow.commands
+++ b/docker/demo/hive-incremental-cow.commands
@@ -23,5 +23,11 @@ set hoodie.stock_ticks_cow.consume.start.timestamp='${min.commit.time}';
select symbol, ts, volume, open, close from stock_ticks_cow where symbol = 'GOOG' and `_hoodie_commit_time` > '${min.commit.time}';
+set hoodie.stock_ticks_cow_bs.consume.mode=INCREMENTAL;
+set hoodie.stock_ticks_cow_bs.consume.max.commits=3;
+set hoodie.stock_ticks_cow_bs.consume.start.timestamp='00000000000001';
+
+select symbol, ts, volume, open, close from stock_ticks_cow_bs where symbol = 'GOOG' and `_hoodie_commit_time` > '00000000000001';
+
!quit
diff --git a/docker/demo/hive-incremental-mor-ro.commands b/docker/demo/hive-incremental-mor-ro.commands
index 8b97c0aac9b5e..51683c010a496 100644
--- a/docker/demo/hive-incremental-mor-ro.commands
+++ b/docker/demo/hive-incremental-mor-ro.commands
@@ -23,5 +23,11 @@ set hoodie.stock_ticks_mor.consume.start.timestamp='${min.commit.time}';
select symbol, ts, volume, open, close from stock_ticks_mor_ro where symbol = 'GOOG' and `_hoodie_commit_time` > '${min.commit.time}';
+set hoodie.stock_ticks_mor_bs.consume.mode=INCREMENTAL;
+set hoodie.stock_ticks_mor_bs.consume.max.commits=3;
+set hoodie.stock_ticks_mor_bs.consume.start.timestamp='00000000000001';
+
+select symbol, ts, volume, open, close from stock_ticks_mor_bs_ro where symbol = 'GOOG' and `_hoodie_commit_time` > '00000000000001';
+
!quit
diff --git a/docker/demo/hive-incremental-mor-rt.commands b/docker/demo/hive-incremental-mor-rt.commands
index a81fb77e077d8..c29fc7ce55730 100644
--- a/docker/demo/hive-incremental-mor-rt.commands
+++ b/docker/demo/hive-incremental-mor-rt.commands
@@ -23,5 +23,11 @@ set hoodie.stock_ticks_mor.consume.start.timestamp='${min.commit.time}';
select symbol, ts, volume, open, close from stock_ticks_mor_rt where symbol = 'GOOG' and `_hoodie_commit_time` > '${min.commit.time}';
+set hoodie.stock_ticks_mor_bs.consume.mode=INCREMENTAL;
+set hoodie.stock_ticks_mor_bs.consume.max.commits=3;
+set hoodie.stock_ticks_mor_bs.consume.start.timestamp='00000000000001';
+
+select symbol, ts, volume, open, close from stock_ticks_mor_bs_rt where symbol = 'GOOG' and `_hoodie_commit_time` > '00000000000001';
+
!quit
diff --git a/docker/demo/sparksql-batch1.commands b/docker/demo/sparksql-batch1.commands
index 727aa1633154d..4de2486c6ce58 100644
--- a/docker/demo/sparksql-batch1.commands
+++ b/docker/demo/sparksql-batch1.commands
@@ -27,4 +27,14 @@ spark.sql("select symbol, max(ts) from stock_ticks_mor_rt group by symbol HAVING
spark.sql("select `_hoodie_commit_time`, symbol, ts, volume, open, close from stock_ticks_mor_ro where symbol = 'GOOG'").show(100, false)
spark.sql("select `_hoodie_commit_time`, symbol, ts, volume, open, close from stock_ticks_mor_rt where symbol = 'GOOG'").show(100, false)
+// Bootstrapped Copy-On-Write table
+spark.sql("select symbol, max(ts) from stock_ticks_cow_bs group by symbol HAVING symbol = 'GOOG'").show(100, false)
+spark.sql("select `_hoodie_commit_time`, symbol, ts, volume, open, close from stock_ticks_cow_bs where symbol = 'GOOG'").show(100, false)
+
+// Bootstrapped Merge-On-Read table
+spark.sql("select symbol, max(ts) from stock_ticks_mor_bs_ro group by symbol HAVING symbol = 'GOOG'").show(100, false)
+spark.sql("select symbol, max(ts) from stock_ticks_mor_bs_rt group by symbol HAVING symbol = 'GOOG'").show(100, false)
+spark.sql("select `_hoodie_commit_time`, symbol, ts, volume, open, close from stock_ticks_mor_bs_ro where symbol = 'GOOG'").show(100, false)
+spark.sql("select `_hoodie_commit_time`, symbol, ts, volume, open, close from stock_ticks_mor_bs_rt where symbol = 'GOOG'").show(100, false)
+
System.exit(0)
diff --git a/docker/demo/sparksql-batch2.commands b/docker/demo/sparksql-batch2.commands
index 391e11b971a27..739d991dbbc1d 100644
--- a/docker/demo/sparksql-batch2.commands
+++ b/docker/demo/sparksql-batch2.commands
@@ -26,4 +26,14 @@ spark.sql("select `_hoodie_commit_time`, symbol, ts, volume, open, close from s
spark.sql("select symbol, max(ts) from stock_ticks_mor_rt group by symbol HAVING symbol = 'GOOG'").show(100, false)
spark.sql("select `_hoodie_commit_time`, symbol, ts, volume, open, close from stock_ticks_mor_rt where symbol = 'GOOG'").show(100, false)
+ // Copy-On-Write Bootstrapped table
+spark.sql("select symbol, max(ts) from stock_ticks_cow_bs group by symbol HAVING symbol = 'GOOG'").show(100, false)
+spark.sql("select `_hoodie_commit_time`, symbol, ts, volume, open, close from stock_ticks_cow_bs where symbol = 'GOOG'").show(100, false)
+
+// Merge-On-Read table Bootstrapped Table
+spark.sql("select symbol, max(ts) from stock_ticks_mor_bs_ro group by symbol HAVING symbol = 'GOOG'").show(100, false)
+spark.sql("select `_hoodie_commit_time`, symbol, ts, volume, open, close from stock_ticks_mor_bs_ro where symbol = 'GOOG'").show(100, false)
+spark.sql("select symbol, max(ts) from stock_ticks_mor_bs_rt group by symbol HAVING symbol = 'GOOG'").show(100, false)
+spark.sql("select `_hoodie_commit_time`, symbol, ts, volume, open, close from stock_ticks_mor_bs_rt where symbol = 'GOOG'").show(100, false)
+
System.exit(0)
diff --git a/docker/demo/sparksql-bootstrap-prep-source.commands b/docker/demo/sparksql-bootstrap-prep-source.commands
new file mode 100644
index 0000000000000..23db3e4d38c4b
--- /dev/null
+++ b/docker/demo/sparksql-bootstrap-prep-source.commands
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import org.apache.spark.sql.functions.col
+
+val df = spark.read.format("org.apache.hudi").load("/user/hive/warehouse/stock_ticks_cow/*/*/*").drop("_hoodie_commit_time", "_hoodie_record_key", "_hoodie_file_name", "_hoodie_commit_seqno", "_hoodie_partition_path")
+df.write.format("parquet").save("/user/hive/warehouse/stock_ticks_cow_bs_src/2018/08/31/")
+System.exit(0)
diff --git a/docker/demo/sparksql-incremental.commands b/docker/demo/sparksql-incremental.commands
index 8e3e153e27e7e..febfcd28a1116 100644
--- a/docker/demo/sparksql-incremental.commands
+++ b/docker/demo/sparksql-incremental.commands
@@ -52,8 +52,38 @@ spark.sql("select key, `_hoodie_partition_path` as datestr, symbol, ts, open, cl
mode(SaveMode.Overwrite).
save("/user/hive/warehouse/stock_ticks_derived_mor");
-spark.sql("show tables").show(20, false)
spark.sql("select count(*) from stock_ticks_derived_mor_ro").show(20, false)
spark.sql("select count(*) from stock_ticks_derived_mor_rt").show(20, false)
-System.exit(0);
\ No newline at end of file
+val hoodieIncQueryBsDF = spark.read.format("org.apache.hudi").
+ option(DataSourceReadOptions.QUERY_TYPE_OPT_KEY, DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL).
+ option(DataSourceReadOptions.BEGIN_INSTANTTIME_OPT_KEY, "00000000000001").
+ load("/user/hive/warehouse/stock_ticks_cow_bs");
+hoodieIncQueryBsDF.registerTempTable("stock_ticks_cow_bs_incr")
+spark.sql("select `_hoodie_commit_time`, symbol, ts, volume, open, close from stock_ticks_cow_bs_incr where symbol = 'GOOG'").show(100, false);
+
+spark.sql("select key, `_hoodie_partition_path` as datestr, symbol, ts, open, close from stock_ticks_cow_bs_incr").
+ write.format("org.apache.hudi").
+ option("hoodie.insert.shuffle.parallelism", "2").
+ option("hoodie.upsert.shuffle.parallelism","2").
+ option(DataSourceWriteOptions.TABLE_TYPE_OPT_KEY, DataSourceWriteOptions.MOR_TABLE_TYPE_OPT_VAL).
+ option(DataSourceWriteOptions.OPERATION_OPT_KEY, DataSourceWriteOptions.UPSERT_OPERATION_OPT_VAL).
+ option(DataSourceWriteOptions.RECORDKEY_FIELD_OPT_KEY, "key").
+ option(DataSourceWriteOptions.PARTITIONPATH_FIELD_OPT_KEY, "datestr").
+ option(DataSourceWriteOptions.PRECOMBINE_FIELD_OPT_KEY, "ts").
+ option(HoodieWriteConfig.TABLE_NAME, "stock_ticks_derived_mor_bs").
+ option(DataSourceWriteOptions.HIVE_TABLE_OPT_KEY, "stock_ticks_derived_mor_bs").
+ option(DataSourceWriteOptions.HIVE_DATABASE_OPT_KEY, "default").
+ option(DataSourceWriteOptions.HIVE_URL_OPT_KEY, "jdbc:hive2://hiveserver:10000").
+ option(DataSourceWriteOptions.HIVE_USER_OPT_KEY, "hive").
+ option(DataSourceWriteOptions.HIVE_PASS_OPT_KEY, "hive").
+ option(DataSourceWriteOptions.HIVE_SYNC_ENABLED_OPT_KEY, "true").
+ option(DataSourceWriteOptions.HIVE_PARTITION_FIELDS_OPT_KEY, "datestr").
+ mode(SaveMode.Overwrite).
+ save("/user/hive/warehouse/stock_ticks_derived_mor_bs");
+
+spark.sql("show tables").show(20, false)
+spark.sql("select count(*) from stock_ticks_derived_mor_bs_ro").show(20, false)
+spark.sql("select count(*) from stock_ticks_derived_mor_bs_rt").show(20, false)
+
+System.exit(0);
diff --git a/hudi-cli/hudi-cli.sh b/hudi-cli/hudi-cli.sh
index b6e708c14436d..bbfba85a8010e 100755
--- a/hudi-cli/hudi-cli.sh
+++ b/hudi-cli/hudi-cli.sh
@@ -25,4 +25,7 @@ if [ -z "$CLIENT_JAR" ]; then
echo "Client jar location not set, please set it in conf/hudi-env.sh"
fi
-java -cp ${HADOOP_CONF_DIR}:${SPARK_CONF_DIR}:$DIR/target/lib/*:$HOODIE_JAR:${CLIENT_JAR} -DSPARK_CONF_DIR=${SPARK_CONF_DIR} -DHADOOP_CONF_DIR=${HADOOP_CONF_DIR} org.springframework.shell.Bootstrap $@
+OTHER_JARS=`ls ${DIR}/target/lib/* | grep -v 'hudi-[^/]*jar' | tr '\n' ':'`
+
+echo "Running : java -cp ${HADOOP_CONF_DIR}:${SPARK_CONF_DIR}:${HOODIE_JAR}:${OTHER_JARS}:${CLIENT_JAR} -DSPARK_CONF_DIR=${SPARK_CONF_DIR} -DHADOOP_CONF_DIR=${HADOOP_CONF_DIR} org.springframework.shell.Bootstrap $@"
+java -cp ${HADOOP_CONF_DIR}:${SPARK_CONF_DIR}:${HOODIE_JAR}:${OTHER_JARS}:${CLIENT_JAR} -DSPARK_CONF_DIR=${SPARK_CONF_DIR} -DHADOOP_CONF_DIR=${HADOOP_CONF_DIR} org.springframework.shell.Bootstrap $@
diff --git a/hudi-cli/pom.xml b/hudi-cli/pom.xml
index 388e4145a18e2..ec15cce76b3fa 100644
--- a/hudi-cli/pom.xml
+++ b/hudi-cli/pom.xml
@@ -139,26 +139,6 @@
-
- org.apache.hudi
- hudi-client
- ${project.version}
-
-
- org.apache.hudi
- hudi-common
- ${project.version}
-
-
- org.apache.hudi
- hudi-hive-sync
- ${project.version}
-
-
- org.apache.hudi
- hudi-utilities_${scala.binary.version}
- ${project.version}
-
org.apache.hudi
hudi-common
@@ -198,6 +178,12 @@
+
+ org.apache.hudi
+ hudi-utilities-bundle_${scala.binary.version}
+ ${project.version}
+
+
log4j
diff --git a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/CompactionCommand.java b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/CompactionCommand.java
index 9b55fe26eaeed..ffbf70e12a9bc 100644
--- a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/CompactionCommand.java
+++ b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/CompactionCommand.java
@@ -213,7 +213,7 @@ public String scheduleCompact(@CliOption(key = "sparkMemory", unspecifiedDefault
if (exitCode != 0) {
return "Failed to run compaction for " + compactionInstantTime;
}
- return "Compaction successfully completed for " + compactionInstantTime;
+ return "Attempted to schedule compaction for " + compactionInstantTime;
}
@CliCommand(value = "compaction run", help = "Run Compaction for given instant time")
diff --git a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/FileSystemViewCommand.java b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/FileSystemViewCommand.java
index 2e32515cffe09..ef76ee4e2f1ad 100644
--- a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/FileSystemViewCommand.java
+++ b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/FileSystemViewCommand.java
@@ -22,6 +22,7 @@
import org.apache.hudi.cli.HoodiePrintHelper;
import org.apache.hudi.cli.HoodieTableHeaderFields;
import org.apache.hudi.cli.TableHeader;
+import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.model.FileSlice;
import org.apache.hudi.common.model.HoodieLogFile;
import org.apache.hudi.common.table.HoodieTableMetaClient;
@@ -239,7 +240,7 @@ private HoodieTableFileSystemView buildFileSystemView(String globRegex, String m
new HoodieTableMetaClient(client.getHadoopConf(), client.getBasePath(), true);
FileSystem fs = HoodieCLI.fs;
String globPath = String.format("%s/%s/*", client.getBasePath(), globRegex);
- FileStatus[] statuses = fs.globStatus(new Path(globPath));
+ List statuses = FSUtils.getGlobStatusExcludingMetaFolder(fs, new Path(globPath));
Stream instantsStream;
HoodieTimeline timeline;
@@ -269,6 +270,6 @@ private HoodieTableFileSystemView buildFileSystemView(String globRegex, String m
HoodieTimeline filteredTimeline = new HoodieDefaultTimeline(instantsStream,
(Function> & Serializable) metaClient.getActiveTimeline()::getInstantDetails);
- return new HoodieTableFileSystemView(metaClient, filteredTimeline, statuses);
+ return new HoodieTableFileSystemView(metaClient, filteredTimeline, statuses.toArray(new FileStatus[0]));
}
}
diff --git a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/HoodieLogFileCommand.java b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/HoodieLogFileCommand.java
index a5fe4fe53826f..f8e82ae618581 100644
--- a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/HoodieLogFileCommand.java
+++ b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/HoodieLogFileCommand.java
@@ -22,6 +22,7 @@
import org.apache.hudi.cli.HoodiePrintHelper;
import org.apache.hudi.cli.HoodieTableHeaderFields;
import org.apache.hudi.cli.TableHeader;
+import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.model.HoodieLogFile;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.model.HoodieRecordPayload;
@@ -53,7 +54,6 @@
import java.io.IOException;
import java.util.ArrayList;
-import java.util.Arrays;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
@@ -82,7 +82,7 @@ public String showLogFileCommits(
throws IOException {
FileSystem fs = HoodieCLI.getTableMetaClient().getFs();
- List logFilePaths = Arrays.stream(fs.globStatus(new Path(logFilePathPattern)))
+ List logFilePaths = FSUtils.getGlobStatusExcludingMetaFolder(fs, new Path(logFilePathPattern)).stream()
.map(status -> status.getPath().toString()).collect(Collectors.toList());
Map, Map>, Integer>>> commitCountAndMetadata =
new HashMap<>();
@@ -175,7 +175,7 @@ public String showLogFileRecords(
HoodieTableMetaClient client = HoodieCLI.getTableMetaClient();
FileSystem fs = client.getFs();
- List logFilePaths = Arrays.stream(fs.globStatus(new Path(logFilePathPattern)))
+ List logFilePaths = FSUtils.getGlobStatusExcludingMetaFolder(fs, new Path(logFilePathPattern)).stream()
.map(status -> status.getPath().toString()).sorted(Comparator.reverseOrder())
.collect(Collectors.toList());
diff --git a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/StatsCommand.java b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/StatsCommand.java
index 72cf6c02da3b4..66c5563102848 100644
--- a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/StatsCommand.java
+++ b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/StatsCommand.java
@@ -118,7 +118,7 @@ public String fileSizeStats(
FileSystem fs = HoodieCLI.fs;
String globPath = String.format("%s/%s/*", HoodieCLI.getTableMetaClient().getBasePath(), globRegex);
- FileStatus[] statuses = fs.globStatus(new Path(globPath));
+ List statuses = FSUtils.getGlobStatusExcludingMetaFolder(fs, new Path(globPath));
// max, min, #small files < 10MB, 50th, avg, 95th
Histogram globalHistogram = new Histogram(new UniformReservoir(MAX_FILES));
diff --git a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/TableCommand.java b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/TableCommand.java
index 78ae35e198b1c..9c947e4d407e3 100644
--- a/hudi-cli/src/main/java/org/apache/hudi/cli/commands/TableCommand.java
+++ b/hudi-cli/src/main/java/org/apache/hudi/cli/commands/TableCommand.java
@@ -88,8 +88,7 @@ public String createTable(
@CliOption(key = {"archiveLogFolder"}, help = "Folder Name for storing archived timeline") String archiveFolder,
@CliOption(key = {"layoutVersion"}, help = "Specific Layout Version to use") Integer layoutVersion,
@CliOption(key = {"payloadClass"}, unspecifiedDefaultValue = "org.apache.hudi.common.model.HoodieAvroPayload",
- help = "Payload Class") final String payloadClass)
- throws IOException {
+ help = "Payload Class") final String payloadClass) throws IOException {
boolean initialized = HoodieCLI.initConf();
HoodieCLI.initFS(initialized);
diff --git a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestArchivedCommitsCommand.java b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestArchivedCommitsCommand.java
index 4c7ce8819d534..7d5cee6939e86 100644
--- a/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestArchivedCommitsCommand.java
+++ b/hudi-cli/src/test/java/org/apache/hudi/cli/commands/TestArchivedCommitsCommand.java
@@ -62,6 +62,7 @@ public void init() throws IOException {
// Create table and connect
String tableName = "test_table";
tablePath = basePath + File.separator + tableName;
+
new TableCommand().createTable(
tablePath, tableName,
"COPY_ON_WRITE", "", 1, "org.apache.hudi.common.model.HoodieAvroPayload");
diff --git a/hudi-client/src/main/java/org/apache/hudi/client/HoodieWriteClient.java b/hudi-client/src/main/java/org/apache/hudi/client/HoodieWriteClient.java
index 9782b46b6ab12..2a1520a77fd8f 100644
--- a/hudi-client/src/main/java/org/apache/hudi/client/HoodieWriteClient.java
+++ b/hudi-client/src/main/java/org/apache/hudi/client/HoodieWriteClient.java
@@ -149,6 +149,35 @@ public JavaRDD> filterExists(JavaRDD> hoodieReco
return recordsWithLocation.filter(v1 -> !v1.isCurrentLocationKnown());
}
+ /**
+ * Main API to run bootstrap to hudi.
+ */
+ public void bootstrap(Option