From c4afd179c1983a382b8a5197d800b0f5dba254de Mon Sep 17 00:00:00 2001 From: Shen Hong Date: Mon, 25 Jan 2021 03:17:19 +0800 Subject: [PATCH] [HUDI-1476] Introduce unit test infra for java client (#2478) --- .../org/apache/hudi/testutils/Assertions.java | 0 .../testutils/MetadataMergeWriteStatus.java | 0 hudi-client/hudi-java-client/pom.xml | 6 + .../testutils/HoodieJavaClientTestBase.java | 48 ++++ .../HoodieJavaClientTestHarness.java | 242 ++++++++++++++++++ 5 files changed, 296 insertions(+) rename hudi-client/{hudi-spark-client => hudi-client-common}/src/test/java/org/apache/hudi/testutils/Assertions.java (100%) rename hudi-client/{hudi-spark-client => hudi-client-common}/src/test/java/org/apache/hudi/testutils/MetadataMergeWriteStatus.java (100%) create mode 100644 hudi-client/hudi-java-client/src/test/java/org/apache/hudi/testutils/HoodieJavaClientTestBase.java create mode 100644 hudi-client/hudi-java-client/src/test/java/org/apache/hudi/testutils/HoodieJavaClientTestHarness.java diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/Assertions.java b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/testutils/Assertions.java similarity index 100% rename from hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/Assertions.java rename to hudi-client/hudi-client-common/src/test/java/org/apache/hudi/testutils/Assertions.java diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/MetadataMergeWriteStatus.java b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/testutils/MetadataMergeWriteStatus.java similarity index 100% rename from hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/testutils/MetadataMergeWriteStatus.java rename to hudi-client/hudi-client-common/src/test/java/org/apache/hudi/testutils/MetadataMergeWriteStatus.java diff --git a/hudi-client/hudi-java-client/pom.xml b/hudi-client/hudi-java-client/pom.xml index 06ed892e9dbaa..0ef741f924772 100644 --- a/hudi-client/hudi-java-client/pom.xml +++ b/hudi-client/hudi-java-client/pom.xml @@ -37,6 +37,12 @@ ${parent.version} + + + org.apache.parquet + parquet-avro + + org.apache.hudi diff --git a/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/testutils/HoodieJavaClientTestBase.java b/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/testutils/HoodieJavaClientTestBase.java new file mode 100644 index 0000000000000..171df5fe59356 --- /dev/null +++ b/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/testutils/HoodieJavaClientTestBase.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.testutils; + +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; + +import java.io.IOException; + +/** + * Base Class providing setup/cleanup and utility methods for testing Hoodie Client facing tests. + */ +public class HoodieJavaClientTestBase extends HoodieJavaClientTestHarness { + + @BeforeEach + public void setUp() throws Exception { + initResources(); + } + + @AfterEach + public void tearDown() throws Exception { + cleanupResources(); + } + + // Functional Interfaces for passing lambda and Hoodie Write API contexts + + @FunctionalInterface + public interface Function2 { + + R apply(T1 v1, T2 v2) throws IOException; + } +} diff --git a/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/testutils/HoodieJavaClientTestHarness.java b/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/testutils/HoodieJavaClientTestHarness.java new file mode 100644 index 0000000000000..2b4c5d85c8f15 --- /dev/null +++ b/hudi-client/hudi-java-client/src/test/java/org/apache/hudi/testutils/HoodieJavaClientTestHarness.java @@ -0,0 +1,242 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.testutils; + +import org.apache.hudi.client.HoodieJavaWriteClient; +import org.apache.hudi.client.common.HoodieJavaEngineContext; +import org.apache.hudi.common.engine.EngineProperty; +import org.apache.hudi.common.engine.TaskContextSupplier; +import org.apache.hudi.common.fs.FSUtils; +import org.apache.hudi.common.model.HoodieTableType; +import org.apache.hudi.common.table.HoodieTableMetaClient; +import org.apache.hudi.common.table.view.HoodieTableFileSystemView; +import org.apache.hudi.common.testutils.HoodieCommonTestHarness; +import org.apache.hudi.common.testutils.HoodieTestUtils; +import org.apache.hudi.common.testutils.minicluster.HdfsTestService; +import org.apache.hudi.common.util.Option; +import org.apache.hudi.config.HoodieWriteConfig; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocalFileSystem; +import org.apache.hadoop.hdfs.DistributedFileSystem; +import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.log4j.LogManager; +import org.apache.log4j.Logger; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.TestInfo; + +import java.io.IOException; +import java.io.Serializable; +import java.util.concurrent.ExecutorService; +import java.util.function.Supplier; + +/** + * The test harness for resource initialization and cleanup. + */ +public abstract class HoodieJavaClientTestHarness extends HoodieCommonTestHarness implements Serializable { + + private static final Logger LOG = LogManager.getLogger(HoodieJavaClientTestHarness.class); + + private String testMethodName; + protected transient Configuration hadoopConf = null; + protected transient HoodieJavaEngineContext context = null; + protected transient TestJavaTaskContextSupplier taskContextSupplier = null; + protected transient FileSystem fs; + protected transient ExecutorService executorService; + protected transient HoodieTableMetaClient metaClient; + protected transient HoodieTableFileSystemView tableView; + protected transient HoodieJavaWriteClient writeClient; + + // dfs + protected String dfsBasePath; + protected transient HdfsTestService hdfsTestService; + protected transient MiniDFSCluster dfsCluster; + protected transient DistributedFileSystem dfs; + + @BeforeEach + public void setTestMethodName(TestInfo testInfo) { + if (testInfo.getTestMethod().isPresent()) { + testMethodName = testInfo.getTestMethod().get().getName(); + } else { + testMethodName = "Unknown"; + } + } + + /** + * Initializes resource group for the subclasses of {@link HoodieJavaClientTestHarness}. + */ + public void initResources() throws IOException { + initPath(); + hadoopConf = new Configuration(); + taskContextSupplier = new TestJavaTaskContextSupplier(); + context = new HoodieJavaEngineContext(hadoopConf, taskContextSupplier); + initTestDataGenerator(); + initFileSystem(); + initMetaClient(); + } + + public class TestJavaTaskContextSupplier extends TaskContextSupplier { + int partitionId = 0; + int stageId = 0; + long attemptId = 0; + + public void reset() { + stageId += 1; + } + + @Override + public Supplier getPartitionIdSupplier() { + return () -> partitionId; + } + + @Override + public Supplier getStageIdSupplier() { + return () -> stageId; + } + + @Override + public Supplier getAttemptIdSupplier() { + return () -> attemptId; + } + + @Override + public Option getProperty(EngineProperty prop) { + return Option.empty(); + } + } + + /** + * Cleanups resource group for the subclasses of {@link HoodieJavaClientTestHarness}. + */ + public void cleanupResources() throws IOException { + cleanupClients(); + cleanupTestDataGenerator(); + cleanupFileSystem(); + cleanupDFS(); + cleanupExecutorService(); + System.gc(); + } + + /** + * Initializes a file system with the hadoop configuration of Spark context. + */ + protected void initFileSystem() { + initFileSystemWithConfiguration(hadoopConf); + } + + /** + * Cleanups file system. + * + * @throws IOException + */ + protected void cleanupFileSystem() throws IOException { + if (fs != null) { + LOG.warn("Closing file-system instance used in previous test-run"); + fs.close(); + fs = null; + } + } + + /** + * Initializes an instance of {@link HoodieTableMetaClient} with a special table type specified by + * {@code getTableType()}. + * + * @throws IOException + */ + protected void initMetaClient() throws IOException { + initMetaClient(getTableType()); + } + + protected void initMetaClient(HoodieTableType tableType) throws IOException { + if (basePath == null) { + throw new IllegalStateException("The base path has not been initialized."); + } + + metaClient = HoodieTestUtils.init(hadoopConf, basePath, tableType); + } + + /** + * Cleanups hoodie clients. + */ + protected void cleanupClients() { + if (metaClient != null) { + metaClient = null; + } + if (writeClient != null) { + writeClient.close(); + writeClient = null; + } + if (tableView != null) { + tableView.close(); + tableView = null; + } + } + + /** + * Cleanups the distributed file system. + * + * @throws IOException + */ + protected void cleanupDFS() throws IOException { + if (hdfsTestService != null) { + hdfsTestService.stop(); + dfsCluster.shutdown(); + hdfsTestService = null; + dfsCluster = null; + dfs = null; + } + // Need to closeAll to clear FileSystem.Cache, required because DFS and LocalFS used in the + // same JVM + FileSystem.closeAll(); + } + + /** + * Cleanups the executor service. + */ + protected void cleanupExecutorService() { + if (this.executorService != null) { + this.executorService.shutdownNow(); + this.executorService = null; + } + } + + private void initFileSystemWithConfiguration(Configuration configuration) { + if (basePath == null) { + throw new IllegalStateException("The base path has not been initialized."); + } + + fs = FSUtils.getFs(basePath, configuration); + if (fs instanceof LocalFileSystem) { + LocalFileSystem lfs = (LocalFileSystem) fs; + // With LocalFileSystem, with checksum disabled, fs.open() returns an inputStream which is FSInputStream + // This causes ClassCastExceptions in LogRecordScanner (and potentially other places) calling fs.open + // So, for the tests, we enforce checksum verification to circumvent the problem + lfs.setVerifyChecksum(true); + } + } + + public HoodieJavaWriteClient getHoodieWriteClient(HoodieWriteConfig cfg) { + if (null != writeClient) { + writeClient.close(); + writeClient = null; + } + writeClient = new HoodieJavaWriteClient(context, cfg); + return writeClient; + } +}