Skip to content

Commit

Permalink
Add Initial Java Support for GDS to KvikIO (#396)
Browse files Browse the repository at this point in the history
This PR is intended to add initial support for Java binding to GDS as part of the KvikIO library. In this PR are the minimal set of bindings required to support synchronous read and write IO operations via GDS as well as a single example to demonstrate how the bindings can be used alongside other CUDA libraries, such as JCuda. Full support for the GDS CuFile API, including batch and asynchronous IO, has not yet been implemented and more sophisticated error/exception handling is not yet in place. There is a README located within kvikio/java detailing how this new functionality can be compiled and built locally, along with detailed instructions on how to run the included usage example.

Authors:
  - Alex Sloboda (https://github.com/aslobodaNV)
  - https://github.com/jakirkham
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - Robert (Bobby) Evans (https://github.com/revans2)
  - Mads R. B. Kristensen (https://github.com/madsbk)
  - Bradley Dice (https://github.com/bdice)

URL: #396
  • Loading branch information
aslobodaNV authored Nov 20, 2024
1 parent 8d19a29 commit 19e44c0
Show file tree
Hide file tree
Showing 15 changed files with 908 additions and 0 deletions.
11 changes: 11 additions & 0 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ jobs:
- checks
- conda-cpp-build
- conda-cpp-tests
- conda-java-tests
- conda-python-build
- conda-python-tests
- docs-build
Expand Down Expand Up @@ -72,6 +73,16 @@ jobs:
if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_cpp
with:
build_type: pull-request
conda-java-tests:
needs: conda-cpp-build
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
build_type: pull-request
node_type: "gpu-v100-latest-1"
arch: "amd64"
container_image: "rapidsai/ci-conda:latest"
run_script: "ci/test_java.sh"
conda-python-build:
needs: conda-cpp-build
secrets: inherit
Expand Down
12 changes: 12 additions & 0 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,15 @@ jobs:
branch: ${{ inputs.branch }}
date: ${{ inputs.date }}
sha: ${{ inputs.sha }}
conda-java-tests:
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
build_type: nightly
branch: ${{ inputs.branch }}
date: ${{ inputs.date }}
sha: ${{ inputs.sha }}
node_type: "gpu-v100-latest-1"
arch: "amd64"
container_image: "rapidsai/ci-conda:latest"
run_script: "ci/test_java.sh"
6 changes: 6 additions & 0 deletions ci/release/update-version.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,12 @@ CURRENT_SHORT_TAG=${CURRENT_MAJOR}.${CURRENT_MINOR}
# Get <major>.<minor> for next version
NEXT_MAJOR=$(echo $NEXT_FULL_TAG | awk '{split($0, a, "."); print a[1]}')
NEXT_MINOR=$(echo $NEXT_FULL_TAG | awk '{split($0, a, "."); print a[2]}')
NEXT_PATCH=$(echo $NEXT_FULL_TAG | awk '{split($0, a, "."); print a[3]}')
NEXT_SHORT_TAG=${NEXT_MAJOR}.${NEXT_MINOR}

# Need to distutils-normalize the original version
NEXT_SHORT_TAG_PEP440=$(python -c "from packaging.version import Version; print(Version('${NEXT_SHORT_TAG}'))")
PATCH_PEP440=$(python -c "from packaging.version import Version; print(Version('${NEXT_PATCH}'))")

echo "Preparing release $CURRENT_TAG => $NEXT_FULL_TAG"

Expand Down Expand Up @@ -52,6 +54,10 @@ for FILE in .github/workflows/*.yaml; do
sed_runner "/shared-workflows/ s/@.*/@branch-${NEXT_SHORT_TAG}/g" "${FILE}"
done

# Java files
NEXT_FULL_JAVA_TAG="${NEXT_SHORT_TAG}.${PATCH_PEP440}-SNAPSHOT"
sed_runner "s|<version>.*-SNAPSHOT</version>|<version>${NEXT_FULL_JAVA_TAG}</version>|g" java/pom.xml

# .devcontainer files
find .devcontainer/ -type f -name devcontainer.json -print0 | while IFS= read -r -d '' filename; do
sed_runner "s@rapidsai/devcontainers:[0-9.]*@rapidsai/devcontainers:${NEXT_SHORT_TAG}@g" "${filename}"
Expand Down
47 changes: 47 additions & 0 deletions ci/test_java.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#!/bin/bash
# Copyright (c) 2024, NVIDIA CORPORATION.

set -euo pipefail

. /opt/conda/etc/profile.d/conda.sh

rapids-logger "Generate java testing dependencies"
rapids-dependency-file-generator \
--output conda \
--file-key test_java \
--matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch)" | tee env.yaml

rapids-mamba-retry env create --yes -f env.yaml -n test

# Temporarily allow unbound variables for conda activation.
set +u
conda activate test
set -u

rapids-logger "Downloading artifacts from previous jobs"
CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp)

rapids-print-env

rapids-mamba-retry install \
--channel "${CPP_CHANNEL}" \
libkvikio

rapids-logger "Check GPU usage"
nvidia-smi

EXITCODE=0
trap "EXITCODE=1" ERR
set +e

# CI/CD machines don't support running GDS, so the test will only make sure the library builds for now
rapids-logger "Run Java tests"
mkdir -p /mnt/nvme
rm -f /mnt/nvme/java_test
touch -f /mnt/nvme/java_test
pushd java
mvn clean install -DskipTests
popd

rapids-logger "Test script exiting with value: $EXITCODE"
exit ${EXITCODE}
15 changes: 15 additions & 0 deletions dependencies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,14 @@ files:
key: test
includes:
- test_python
test_java:
output: none
includes:
- build-universal
- build-cpp
- cuda_version
- cuda
- test_java
channels:
- rapidsai
- rapidsai-nightly
Expand Down Expand Up @@ -388,3 +396,10 @@ dependencies:
- matrix: # All CUDA 11 versions
packages:
- cuda-python>=11.7.1,<12.0a0,<=11.8.3
test_java:
common:
- output_types: conda
packages:
- make
- maven
- openjdk=11.*
33 changes: 33 additions & 0 deletions java/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Java KvikIO Bindings

## Summary
These Java KvikIO bindings for GDS currently support only synchronous read and write IO operations using the underlying cuFile API. Support for batch IO and asynchronous operations are not yet supported.

## Dependencies
The Java KvikIO bindings have been developed to work on Linux based systems and require [CUDA](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html) to be installed and for [GDS](https://docs.nvidia.com/gpudirect-storage/troubleshooting-guide/index.html) to be properly enabled. To compile the shared library it is also necessary to have a JDK installed. To run the included example, it is also necessary to install JCuda as it is used to handle memory allocations and the transfer of data between host and GPU memory. JCuda jar files supporting CUDA 12.x can be found here:
[jcuda-12.0.0.jar](https://repo1.maven.org/maven2/org/jcuda/jcuda/12.0.0/jcuda-12.0.0.jar),
[jcuda-natives-12.0.0.jar](https://repo1.maven.org/maven2/org/jcuda/jcuda-natives/12.0.0/jcuda-natives-12.0.0.jar)

For more information on JCuda and potentially more up to date installation instructions or jar files, see here:
[JCuda](http://javagl.de/jcuda.org/), [JCuda Usage](https://github.com/jcuda/jcuda-main/blob/master/USAGE.md), [JCuda Maven Repo](https://mvnrepository.com/artifact/org.jcuda)

## Compilation and examples
An example for how to use the Java KvikIO bindings can be found in `src/test/java/ai/rapids/kvikio/cufile/BasicReadWriteTest.java`

##### Note: This example has a dependency on JCuda so ensure that when running the example the JCuda shared library files are on the JVM library path along with the `libCuFileJNI.so` file.

### Setup a test file target
##### NOTE: the example as written will default to creating a temporary file in your `/tmp` directory. This directory may not be mounted in a compatible manner for use with GDS on your particular system, causing the example to run in compatibility mode. If this is the case, run the following command replacing `/mnt/nvme/` with your mount directory and update `cufile/BasicReadWriteTest.java` to point to the correct file path.

touch /mnt/nvme/java_test

### Compile the shared library and Java files with Maven
##### Note: This wil also run the example code

cd kvikio/java/
mvn clean install

### Rerun example code with Maven

cd kvikio/java/
mvn test
146 changes: 146 additions & 0 deletions java/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
<?xml version="1.0" encoding="UTF-8"?>

<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<groupId>ai.rapids.kvikio</groupId>
<artifactId>cufile</artifactId>
<version>25.02.0-SNAPSHOT</version>

<name>cuFile</name>
<description>
This project provides Java bindings for the GPUDirect Storage cuFile library, enabling the GPU to load and
save large amounts of data to and from persistent storage. This is still a work in progress so some APIs may change.
</description>
<url>https://rapids.ai</url>

<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>11</maven.compiler.source>
<maven.compiler.target>11</maven.compiler.target>
<junit.version>5.4.2</junit.version>
<jcuda.version>12.0.0</jcuda.version>
<cmake.version>3.23.2-b1</cmake.version>
</properties>

<dependencies>
<dependency>
<groupId>org.jcuda</groupId>
<artifactId>jcuda</artifactId>
<version>${jcuda.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.jcuda</groupId>
<artifactId>jcuda-natives</artifactId>
<version>${jcuda.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter-api</artifactId>
<version>${junit.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter-params</artifactId>
<version>${junit.version}</version>
<scope>test</scope>
</dependency>
</dependencies>

<build>
<pluginManagement>
<plugins>
<plugin>
<artifactId>maven-exec-plugin</artifactId>
<version>1.6.0</version>
</plugin>
<plugin>
<artifactId>maven-clean-plugin</artifactId>
<version>3.1.0</version>
<configuration>
<createDirs>true</createDirs>
</configuration>
</plugin>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.8.0</version>
</plugin>
<plugin>
<artifactId>maven-surefire-plugin</artifactId>
<version>2.22.1</version>
<configuration>
<argLine>-Djava.library.path=${project.build.directory}:${java.library.path}</argLine>
</configuration>
<dependencies>
<dependency>
<groupId>org.junit.platform</groupId>
<artifactId>junit-platform-surefire-provider</artifactId>
<version>1.2.0</version>
</dependency>
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter-engine</artifactId>
<version>5.4.2</version>
</dependency>
</dependencies>
</plugin>
<plugin>
<artifactId>maven-jar-plugin</artifactId>
<version>3.0.2</version>
</plugin>
<plugin>
<artifactId>maven-install-plugin</artifactId>
<version>2.5.2</version>
</plugin>
<plugin>
<artifactId>maven-deploy-plugin</artifactId>
<version>2.8.2</version>
</plugin>
<plugin>
<artifactId>maven-site-plugin</artifactId>
<version>3.7.1</version>
</plugin>
<plugin>
<artifactId>maven-project-info-reports-plugin</artifactId>
<version>3.0.0</version>
</plugin>
</plugins>
</pluginManagement>
<plugins>
<plugin>
<groupId>com.googlecode.cmake-maven-project</groupId>
<artifactId>cmake-maven-plugin</artifactId>
<version>${cmake.version}</version>
<executions>
<execution>
<id>cmake-generate</id>
<goals>
<goal>generate</goal>
</goals>
<configuration>
<sourcePath>${project.basedir}/src/main/native</sourcePath>
<targetPath>${project.build.directory}/native-build</targetPath>
<options>
<option>-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=${project.build.directory}</option>
</options>
</configuration>
</execution>
<execution>
<id>cmake-compile</id>
<goals>
<goal>compile</goal>
</goals>
<configuration>
<projectDirectory>${project.build.directory}/native-build</projectDirectory>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
66 changes: 66 additions & 0 deletions java/src/main/java/ai/rapids/kvikio/cufile/CuFile.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
/*
* Copyright (c) 2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package ai.rapids.kvikio.cufile;

/**
* The {@code CuFile} class is responsible for initializing and managing the
* cuFile JNI library and its associated driver.
* It ensures that the native cuFile library is loaded only once during the
* application lifecycle.
* <p>
* Upon class loading, the {@code initialize()} method is called to load the
* cuFile JNI library and initialize the {@code CuFileDriver}.
* A shutdown hook is also registered to ensure that the driver is properly
* closed when the application terminates.
* </p>
* <p>
* The class provides a static method, {@code libraryLoaded()}, to check if the
* library has been successfully loaded and initialized.
* </p>
*/
public class CuFile {
private static boolean initialized = false;
private static CuFileDriver driver;

static {
initialize();
}

static synchronized void initialize() {
if (!initialized) {
try {
System.loadLibrary("CuFileJNI");
driver = new CuFileDriver();
Runtime.getRuntime().addShutdownHook(new Thread(() -> {
driver.close();
}));
initialized = true;
} catch (Throwable t) {
System.out.println("could not load cufile jni library:" + t.getMessage());
}
}
}

/**
* Checks if the cuFile library has been successfully loaded and initialized.
*
* @return {@code true} if the library is loaded, {@code false} otherwise.
*/
public static boolean libraryLoaded() {
return initialized;
}
}
Loading

0 comments on commit 19e44c0

Please sign in to comment.