From 20d7227a01bb408922cf7d460883cb72bdd1bf2a Mon Sep 17 00:00:00 2001 From: RichardScottOZ Date: Mon, 4 Nov 2024 07:06:26 +1030 Subject: [PATCH 01/10] to (#532) Authors: - https://github.com/RichardScottOZ Approvers: - Mads R. B. Kristensen (https://github.com/madsbk) URL: https://github.com/rapidsai/kvikio/pull/532 --- docs/source/zarr.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/zarr.rst b/docs/source/zarr.rst index 5b63ffd8b7..82e6186026 100644 --- a/docs/source/zarr.rst +++ b/docs/source/zarr.rst @@ -8,7 +8,7 @@ Zarr KvikIO provides a GPU backend to Zarr-Python that enables `GPUDirect Storage (GDS) `_ seamlessly. The following is an example of how to use the convenience function :py:meth:`kvikio.zarr.open_cupy_array` -to create a new Zarr array and how open an existing Zarr array. +to create a new Zarr array and how to open an existing Zarr array. .. literalinclude:: ../../python/kvikio/examples/zarr_cupy_nvcomp.py From e2b11918ba770cd5af6b30e2af2cf04e04038fcc Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 4 Nov 2024 23:13:50 -0800 Subject: [PATCH 02/10] Fix case of find_package call (#534) See https://github.com/rapidsai/devcontainers/pull/414 Authors: - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Mads R. B. Kristensen (https://github.com/madsbk) URL: https://github.com/rapidsai/kvikio/pull/534 --- python/kvikio/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/kvikio/CMakeLists.txt b/python/kvikio/CMakeLists.txt index 4e40e86fa6..4db09123d4 100644 --- a/python/kvikio/CMakeLists.txt +++ b/python/kvikio/CMakeLists.txt @@ -31,7 +31,7 @@ option(USE_NVCOMP_RUNTIME_WHEEL "Use the nvcomp wheel at runtime instead of the # TODO: Should we symlink FindcuFile.cmake into python/cmake? find cuFile include(../../cpp/cmake/Modules/FindcuFile.cmake) -find_package(KvikIO REQUIRED "${RAPIDS_VERSION}") +find_package(kvikio REQUIRED "${RAPIDS_VERSION}") find_package(CUDAToolkit REQUIRED) From 1c9984189828d501e74e1d9a9cca199c9377bcd2 Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Wed, 6 Nov 2024 08:00:26 +0100 Subject: [PATCH 03/10] Build KvikIO as a shared library (#527) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Moving libcurl dependent functions to `remote_handle.cpp` (let's move the rest in a follow up PR) and setup building `libkvikio.so`. ### Background Now that KvikIO has evolved into a standalone IO library that works without cuFile and CUDA, I think it makes sense to move to a shared library. Originally, KvikIO was a very thin wrapper around cuFile, which is why we decided to keep it header-only (not because of templates). **Pros**: - Enables us to statically compile libcurl into `libkvikio.so`. * Required by our wheels. If we don’t do this, we will have to find another solution to https://github.com/rapidsai/kvikio/issues/512 - Avoid having to compile libcurl in downstream projects. Currently, cudf must compile libcurl in every build. - Reduce compile time in CI, both for KvikIO and downstream projects. - Ease development by not having to rebuild downstream projects like cudf every time KvikIO is modified. **Cons**: - Projects cannot vendoring KvikIO as a header-only project. As far as we know, nobody does this. Authors: - Mads R. B. Kristensen (https://github.com/madsbk) - Paul Taylor (https://github.com/trxcllnt) Approvers: - Robert Maynard (https://github.com/robertmaynard) - Vyas Ramasubramani (https://github.com/vyasr) - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/kvikio/pull/527 --- .devcontainer/Dockerfile | 2 +- README.md | 1 - ci/build_wheel.sh | 26 ++ ci/build_wheel_cpp.sh | 35 ++- ci/build_wheel_python.sh | 34 +-- ci/test_wheel.sh | 10 +- cpp/CMakeLists.txt | 239 +++++++----------- cpp/cmake/thirdparty/get_libcurl.cmake | 1 + cpp/doxygen/main_page.md | 6 +- cpp/examples/downstream/CMakeLists.txt | 4 +- cpp/include/kvikio/remote_handle.hpp | 160 +----------- cpp/include/kvikio/shim/utils.hpp | 9 +- cpp/src/remote_handle.cpp | 201 +++++++++++++++ cpp/tests/CMakeLists.txt | 6 +- dependencies.yaml | 26 ++ .../kvikio/cmake/thirdparty/get_nvcomp.cmake | 6 +- python/kvikio/kvikio/__init__.py | 11 + python/kvikio/pyproject.toml | 1 + python/libkvikio/CMakeLists.txt | 14 + python/libkvikio/libkvikio/__init__.py | 3 +- python/libkvikio/libkvikio/load.py | 45 ++++ 21 files changed, 488 insertions(+), 352 deletions(-) create mode 100755 ci/build_wheel.sh create mode 100644 cpp/src/remote_handle.cpp create mode 100644 python/libkvikio/libkvikio/load.py diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 9d35e3f97f..5d1d536704 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -26,5 +26,5 @@ ENV PYTHONDONTWRITEBYTECODE="1" ENV SCCACHE_REGION="us-east-2" ENV SCCACHE_BUCKET="rapids-sccache-devs" -ENV VAULT_HOST="https://vault.ops.k8s.rapids.ai" +ENV AWS_ROLE_ARN="arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs" ENV HISTFILE="/home/coder/.cache/._bash_history" diff --git a/README.md b/README.md index e0e26ac560..50e4328fbd 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,6 @@ KvikIO (pronounced "kuh-VICK-eye-oh", see [here](https://ordnet.dk/ddo_en/dict?q bindings to [cuFile](https://docs.nvidia.com/gpudirect-storage/api-reference-guide/index.html), which enables [GPUDirect Storage (GDS)](https://developer.nvidia.com/blog/gpudirect-storage/). KvikIO also works efficiently when GDS isn't available and can read/write both host and device data seamlessly. -The C++ library is header-only making it easy to include in [existing projects](https://github.com/rapidsai/kvikio/blob/HEAD/cpp/examples/downstream/). ### Features diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh new file mode 100755 index 0000000000..b1ede832da --- /dev/null +++ b/ci/build_wheel.sh @@ -0,0 +1,26 @@ +#!/bin/bash +# Copyright (c) 2024, NVIDIA CORPORATION. + +set -euo pipefail + +package_name=$1 +package_dir=$2 + +source rapids-configure-sccache +source rapids-date-string + +rapids-generate-version > ./VERSION + +cd "${package_dir}" + +sccache --zero-stats + +rapids-logger "Building '${package_name}' wheel" +python -m pip wheel \ + -w dist \ + -v \ + --no-deps \ + --disable-pip-version-check \ + . + +sccache --show-adv-stats diff --git a/ci/build_wheel_cpp.sh b/ci/build_wheel_cpp.sh index b11cdf6677..0367842a8c 100755 --- a/ci/build_wheel_cpp.sh +++ b/ci/build_wheel_cpp.sh @@ -6,19 +6,34 @@ set -euo pipefail package_name="libkvikio" package_dir="python/libkvikio" -source rapids-configure-sccache -source rapids-date-string +rapids-logger "Generating build requirements" -rapids-generate-version > ./VERSION +rapids-dependency-file-generator \ + --output requirements \ + --file-key "py_build_${package_name}" \ + --file-key "py_rapids_build_${package_name}" \ + --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION};cuda_suffixed=true" \ +| tee /tmp/requirements-build.txt -cd "${package_dir}" +rapids-logger "Installing build requirements" +python -m pip install \ + -v \ + --prefer-binary \ + -r /tmp/requirements-build.txt -sccache --zero-stats +# build with '--no-build-isolation', for better sccache hit rate +# 0 really means "add --no-build-isolation" (ref: https://github.com/pypa/pip/issues/5735) +export PIP_NO_BUILD_ISOLATION=0 -python -m pip install wheel -python -m pip wheel . -w dist -v --no-deps --disable-pip-version-check - -sccache --show-adv-stats +export SKBUILD_CMAKE_ARGS="-DUSE_NVCOMP_RUNTIME_WHEEL=ON" +./ci/build_wheel.sh "${package_name}" "${package_dir}" RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" -RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 cpp dist + +mkdir -p ${package_dir}/final_dist +python -m auditwheel repair \ + --exclude libnvcomp.so.4 \ + -w ${package_dir}/final_dist \ + ${package_dir}/dist/* + +RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 cpp "${package_dir}/final_dist" diff --git a/ci/build_wheel_python.sh b/ci/build_wheel_python.sh index 44cb76586c..0c709f2fe6 100755 --- a/ci/build_wheel_python.sh +++ b/ci/build_wheel_python.sh @@ -6,35 +6,23 @@ set -euo pipefail package_name="kvikio" package_dir="python/kvikio" -source rapids-configure-sccache -source rapids-date-string - RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" -rapids-generate-version > ./VERSION - -CPP_WHEELHOUSE=$(RAPIDS_PY_WHEEL_NAME="libkvikio_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp /tmp/libkvikio_dist) - -cd "${package_dir}" - -# ensure 'kvikio' wheel builds always use the 'libkvikio' just built in the same CI run +# Ensure 'kvikio' wheel builds always use the 'libkvikio' just built in the same CI run # -# using env variable PIP_CONSTRAINT is necessary to ensure the constraints +# Using env variable PIP_CONSTRAINT is necessary to ensure the constraints # are used when creating the isolated build environment -echo "libkvikio-${RAPIDS_PY_CUDA_SUFFIX} @ file://$(echo ${CPP_WHEELHOUSE}/libkvikio_*.whl)" > ./constraints.txt - -sccache --zero-stats - -PIP_CONSTRAINT="${PWD}/constraints.txt" \ -SKBUILD_CMAKE_ARGS="-DUSE_NVCOMP_RUNTIME_WHEEL=ON" \ - python -m pip wheel . -w dist -v --no-deps --disable-pip-version-check +RAPIDS_PY_WHEEL_NAME="libkvikio_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp /tmp/libkvikio_dist +echo "libkvikio-${RAPIDS_PY_CUDA_SUFFIX} @ file://$(echo /tmp/libkvikio_dist/libkvikio_*.whl)" > /tmp/constraints.txt +export PIP_CONSTRAINT="/tmp/constraints.txt" -sccache --show-adv-stats +export SKBUILD_CMAKE_ARGS="-DUSE_NVCOMP_RUNTIME_WHEEL=ON" +./ci/build_wheel.sh "${package_name}" "${package_dir}" -mkdir -p final_dist python -m auditwheel repair \ + --exclude libkvikio.so \ --exclude libnvcomp.so.4 \ - -w final_dist \ - dist/* + -w ${package_dir}/final_dist \ + ${package_dir}/dist/* -RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 python final_dist +RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 python ${package_dir}/final_dist diff --git a/ci/test_wheel.sh b/ci/test_wheel.sh index a3f014ca3f..46ae5cbcf2 100755 --- a/ci/test_wheel.sh +++ b/ci/test_wheel.sh @@ -4,10 +4,14 @@ set -eou pipefail RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" -WHEELHOUSE="${PWD}/dist/" -RAPIDS_PY_WHEEL_NAME="kvikio_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python "${WHEELHOUSE}" -python -m pip install "$(echo ${WHEELHOUSE}/kvikio_${RAPIDS_PY_CUDA_SUFFIX}*.whl)[test]" +# Download and install the libkvikio and kvikio wheels built in the previous step +RAPIDS_PY_WHEEL_NAME="libkvikio_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp ./dist +RAPIDS_PY_WHEEL_NAME="kvikio_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./dist + +python -m pip install -v \ + "$(echo ./dist/libkvikio_${RAPIDS_PY_CUDA_SUFFIX}*.whl)" \ + "$(echo ./dist/kvikio_${RAPIDS_PY_CUDA_SUFFIX}*.whl)[test]" # If running CUDA 11.8 on arm64, we skip tests marked "cufile" since # cuFile didn't support arm until 12.4 diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 786ccb9266..772a97e560 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -35,14 +35,23 @@ rapids_cmake_write_version_file(include/kvikio/version_config.hpp) # Set a default build type if none was specified rapids_cmake_build_type(Release) -# build options -option(KvikIO_REMOTE_SUPPORT "Configure CMake to build with remote IO support" ON) +# ################################################################################################## +# * build options ---------------------------------------------------------------------------------- + +option(BUILD_SHARED_LIBS "Build KvikIO shared library" ON) option(KvikIO_BUILD_EXAMPLES "Configure CMake to build examples" ON) option(KvikIO_BUILD_TESTS "Configure CMake to build tests" ON) +option(KvikIO_REMOTE_SUPPORT "Configure CMake to build with remote IO support" ON) +option(KvikIO_CUDA_SUPPORT "Configure CMake to build with CUDA support" ON) +option(KvikIO_EXPORT_NVCOMP "Export NVCOMP as a dependency" ON) +# ################################################################################################## +# * conda environment ------------------------------------------------------------------------------ rapids_cmake_support_conda_env(conda_env MODIFY_PREFIX_PATH) -# find packages we depend on +# ################################################################################################## +# * dependencies ----------------------------------------------------------------------------------- + rapids_cpm_init() rapids_find_package( @@ -55,99 +64,95 @@ if(KvikIO_REMOTE_SUPPORT) include(cmake/thirdparty/get_libcurl.cmake) endif() +if(KvikIO_CUDA_SUPPORT) + rapids_find_package( + CUDAToolkit REQUIRED + BUILD_EXPORT_SET kvikio-exports + INSTALL_EXPORT_SET kvikio-exports + ) + include(cmake/thirdparty/get_nvtx.cmake) +endif() + rapids_find_package( - CUDAToolkit + cuFile BUILD_EXPORT_SET kvikio-exports INSTALL_EXPORT_SET kvikio-exports ) -if(CUDAToolkit_FOUND) - rapids_find_package( - cuFile - BUILD_EXPORT_SET kvikio-exports - INSTALL_EXPORT_SET kvikio-exports +if(NOT cuFile_FOUND) + message( + WARNING "Cannot find cuFile - KvikIO will still work but won't use GPUDirect Storage (GDS)" ) - if(NOT cuFile_FOUND) - message( - WARNING "Cannot find cuFile - KvikIO will still work but won't use GPUDirect Storage (GDS)" - ) +else() + # Check batch and stream API support (cuFile_BATCH_API_FOUND and cuFile_STREAM_API_FOUND) + file(READ "${cuFile_INCLUDE_DIRS}/cufile.h" CUFILE_H_STR) + string(FIND "${CUFILE_H_STR}" "cuFileBatchIOSetUp" cuFileBatchIOSetUp_location) + if(cuFileBatchIOSetUp_location EQUAL "-1") + set(cuFile_BATCH_API_FOUND FALSE) else() - file(READ "${cuFile_INCLUDE_DIRS}/cufile.h" CUFILE_H_STR) - string(FIND "${CUFILE_H_STR}" "cuFileBatchIOSetUp" cuFileBatchIOSetUp_location) - if(cuFileBatchIOSetUp_location EQUAL "-1") - set(cuFile_BATCH_API_FOUND FALSE) - else() - set(cuFile_BATCH_API_FOUND TRUE) - endif() - message(STATUS "Found cuFile Batch API: ${cuFile_BATCH_API_FOUND}") - string(FIND "${CUFILE_H_STR}" "cuFileReadAsync" cuFileReadAsync_location) - if(cuFileReadAsync_location EQUAL "-1") - set(cuFile_STREAM_API_FOUND FALSE) - else() - set(cuFile_STREAM_API_FOUND TRUE) - endif() - message(STATUS "Found cuFile Stream API: ${cuFile_STREAM_API_FOUND}") + set(cuFile_BATCH_API_FOUND TRUE) endif() - - include(cmake/thirdparty/get_nvtx.cmake) + message(STATUS "Found cuFile Batch API: ${cuFile_BATCH_API_FOUND}") + string(FIND "${CUFILE_H_STR}" "cuFileReadAsync" cuFileReadAsync_location) + if(cuFileReadAsync_location EQUAL "-1") + set(cuFile_STREAM_API_FOUND FALSE) + else() + set(cuFile_STREAM_API_FOUND TRUE) + endif() + message(STATUS "Found cuFile Stream API: ${cuFile_STREAM_API_FOUND}") endif() include(cmake/thirdparty/get_thread_pool.cmake) -# library targets -add_library(kvikio INTERFACE) -add_library(kvikio::kvikio ALIAS kvikio) +# ################################################################################################## +# * library targets -------------------------------------------------------------------------------- -# We enable CUDA and cuFile both here and in the FINAL_CODE_BLOCK export block. While the code block -# below (in FINAL_CODE_BLOCK) sets this information when KvikIO is imported from a -# kvikio-config.cmake file, this code block is intended to be used by projects that include KvikIO's -# source directory in their own CMake build. -# -# Normally we would just set the below without using $, and without the -# final_code_string, but in this case we want to conditionally set these things at import time, not -# export time, since KvikIO is a header-only library that can adapt to different build environments. - -# Enable CUDA in KvikIO -if(CUDAToolkit_FOUND) - if(CUDA_STATIC_RUNTIME) - target_link_libraries(kvikio INTERFACE $) - else() - target_link_libraries(kvikio INTERFACE $) - endif() - target_compile_definitions(kvikio INTERFACE $) -else() - message(WARNING "Building KvikIO without CUDA") -endif() +file(GLOB SOURCES "src/*.cpp") +add_library(kvikio ${SOURCES}) -# Enable supported cuFile features in KvikIO -if(cuFile_FOUND) - target_link_libraries(kvikio INTERFACE $) - target_compile_definitions(kvikio INTERFACE $) - if(cuFile_BATCH_API_FOUND) - target_compile_definitions( - kvikio INTERFACE $ - ) - endif() - if(cuFile_STREAM_API_FOUND) - target_compile_definitions( - kvikio INTERFACE $ - ) - endif() -endif() +# To avoid symbol conflicts when statically linking to libcurl.a (see get_libcurl.cmake) and its +# dependency OpenSSL, we exclude them when building libkvikio.so. This way, libkvikio.so will not +# expose any OpenSSL symbols that could conflict with downstream users like CPython that also links +# to (another version of) OpenSSL. +target_link_options(kvikio PRIVATE "LINKER:--exclude-libs,ALL") + +add_library(kvikio::kvikio ALIAS kvikio) target_include_directories( - kvikio INTERFACE "$" - "$" + kvikio + PUBLIC "$" "${CUDAToolkit_INCLUDE_DIRS}" + "${cuFile_INCLUDE_DIRS}" + INTERFACE "$" ) + +# Notice, we do not link to cuda or cufile since KvikIO opens them manually using `dlopen()`. target_link_libraries( - kvikio INTERFACE Threads::Threads BS::thread_pool ${CMAKE_DL_LIBS} - $ + kvikio + PUBLIC Threads::Threads BS::thread_pool ${CMAKE_DL_LIBS} $ + PRIVATE $ ) -if(TARGET CURL::libcurl) - target_link_libraries(kvikio INTERFACE $) - target_compile_definitions(kvikio INTERFACE $) -endif() -target_compile_features(kvikio INTERFACE cxx_std_17) + +target_compile_definitions( + kvikio + PUBLIC $<$:KVIKIO_LIBCURL_FOUND> + $<$:KVIKIO_CUDA_FOUND> + $<$:KVIKIO_CUFILE_FOUND> + $<$:KVIKIO_CUFILE_BATCH_API_FOUND> + $<$:KVIKIO_CUFILE_STREAM_API_FOUND> +) + +set_target_properties( + kvikio + PROPERTIES BUILD_RPATH "\$ORIGIN" + INSTALL_RPATH "\$ORIGIN" + CXX_STANDARD 17 + CXX_STANDARD_REQUIRED ON + POSITION_INDEPENDENT_CODE ON + INTERFACE_POSITION_INDEPENDENT_CODE ON +) + +# ################################################################################################## +# * add examples ----------------------------------------------------------------------------------- # optionally build examples if(KvikIO_BUILD_EXAMPLES) @@ -168,10 +173,21 @@ if(CUDAToolkit_FOUND add_subdirectory(tests) endif() +# ################################################################################################## +# * install targets -------------------------------------------------------------------------------- + +rapids_cmake_install_lib_dir(lib_dir) include(CPack) +include(GNUInstallDirs) + +set(CMAKE_INSTALL_DEFAULT_COMPONENT_NAME kvikio) + +install( + TARGETS kvikio + DESTINATION ${lib_dir} + EXPORT kvikio-exports +) -# install export targets -install(TARGETS kvikio EXPORT kvikio-exports) install(DIRECTORY include/kvikio/ DESTINATION include/kvikio) install(FILES ${KvikIO_BINARY_DIR}/include/kvikio/version_config.hpp DESTINATION include/kvikio) @@ -185,68 +201,7 @@ rapids_export_find_package_file( set(doc_string [=[ -Provide targets for KvikIO: C++ bindings for cuFile. -]=] -) - -set(final_code_string - [=[ -get_property(already_set_kvikio DIRECTORY PROPERTY kvikio_already_set_defines SET) -if(NOT already_set_kvikio) - set_property(DIRECTORY PROPERTY kvikio_already_set_defines "ON") - - find_package(CUDAToolkit QUIET) - if(CUDAToolkit_FOUND) - if(CUDA_STATIC_RUNTIME) - target_link_libraries(kvikio::kvikio INTERFACE CUDA::cudart_static) - else() - target_link_libraries(kvikio::kvikio INTERFACE CUDA::cudart) - endif() - target_compile_definitions(kvikio::kvikio INTERFACE KVIKIO_CUDA_FOUND) - else() - message(WARNING "Building KvikIO without CUDA") - endif() - - # Find cuFile and determine which features are supported - find_package(cuFile QUIET) - if(NOT cuFile_FOUND) - message(WARNING "KvikIO: cuFile not found") - else() - file(READ "${cuFile_INCLUDE_DIRS}/cufile.h" CUFILE_H_STR) - string(FIND "${CUFILE_H_STR}" "cuFileBatchIOSetUp" cuFileBatchIOSetUp_location) - if(cuFileBatchIOSetUp_location EQUAL "-1") - set(cuFile_BATCH_API_FOUND FALSE) - else() - set(cuFile_BATCH_API_FOUND TRUE) - endif() - message(STATUS "KvikIO: Found cuFile Batch API: ${cuFile_BATCH_API_FOUND}") - string(FIND "${CUFILE_H_STR}" "cuFileReadAsync" cuFileReadAsync_location) - if(cuFileReadAsync_location EQUAL "-1") - set(cuFile_STREAM_API_FOUND FALSE) - else() - set(cuFile_STREAM_API_FOUND TRUE) - endif() - message(STATUS "KvikIO: Found cuFile Stream API: ${cuFile_STREAM_API_FOUND}") - endif() - - # Enable supported cuFile features in KvikIO - if(cuFile_FOUND) - target_link_libraries(kvikio::kvikio INTERFACE cufile::cuFile_interface) - target_compile_definitions(kvikio::kvikio INTERFACE KVIKIO_CUFILE_FOUND) - if(cuFile_BATCH_API_FOUND) - target_compile_definitions(kvikio::kvikio INTERFACE KVIKIO_CUFILE_BATCH_API_FOUND) - endif() - if(cuFile_STREAM_API_FOUND) - target_compile_definitions(kvikio::kvikio INTERFACE KVIKIO_CUFILE_STREAM_API_FOUND) - endif() - endif() - - if(TARGET CURL::libcurl) - target_link_libraries(kvikio::kvikio INTERFACE CURL::libcurl) - target_compile_definitions(kvikio::kvikio INTERFACE KVIKIO_LIBCURL_FOUND) - endif() - -endif() +Provide targets for KvikIO. ]=] ) @@ -256,7 +211,6 @@ rapids_export( GLOBAL_TARGETS kvikio NAMESPACE kvikio:: DOCUMENTATION doc_string - FINAL_CODE_BLOCK final_code_string ) rapids_export( @@ -265,5 +219,4 @@ rapids_export( GLOBAL_TARGETS kvikio NAMESPACE kvikio:: DOCUMENTATION doc_string - FINAL_CODE_BLOCK final_code_string ) diff --git a/cpp/cmake/thirdparty/get_libcurl.cmake b/cpp/cmake/thirdparty/get_libcurl.cmake index 5694494b9a..6b137bbde2 100644 --- a/cpp/cmake/thirdparty/get_libcurl.cmake +++ b/cpp/cmake/thirdparty/get_libcurl.cmake @@ -31,6 +31,7 @@ function(find_and_configure_libcurl) GIT_TAG curl-7_87_0 OPTIONS "BUILD_CURL_EXE OFF" "BUILD_SHARED_LIBS OFF" "BUILD_TESTING OFF" "CURL_USE_LIBPSL OFF" "CURL_DISABLE_LDAP ON" "CMAKE_POSITION_INDEPENDENT_CODE ON" + EXCLUDE_FROM_ALL YES # Don't install libcurl.a (only needed when building libkvikio.so) ) if(DEFINED CACHE_HAS_BUILD_TESTING) set(BUILD_TESTING diff --git a/cpp/doxygen/main_page.md b/cpp/doxygen/main_page.md index 21a33b1d45..497fb3e13e 100644 --- a/cpp/doxygen/main_page.md +++ b/cpp/doxygen/main_page.md @@ -5,7 +5,7 @@ bindings to [cuFile](https://docs.nvidia.com/gpudirect-storage/api-reference-gui which enables [GPUDirect Storage (GDS)](https://developer.nvidia.com/blog/gpudirect-storage/). KvikIO also works efficiently when GDS isn't available and can read/write both host and device data seamlessly. -KvikIO C++ is a header-only library that is part of the [RAPIDS](https://rapids.ai/) suite of open-source software libraries for GPU-accelerated data science. +KvikIO C++ is part of the [RAPIDS](https://rapids.ai/) suite of open-source software libraries for GPU-accelerated data science. --- **Notice** this is the documentation for the C++ library. For the Python documentation, see under [kvikio](https://docs.rapids.ai/api/kvikio/nightly/). @@ -23,9 +23,7 @@ KvikIO C++ is a header-only library that is part of the [RAPIDS](https://rapids. ## Installation -KvikIO is a header-only library and as such doesn't need installation. -However, for convenience we release Conda packages that makes it easy -to include KvikIO in your CMake projects. +For convenience we release Conda packages that makes it easy to include KvikIO in your CMake projects. ### Conda/Mamba diff --git a/cpp/examples/downstream/CMakeLists.txt b/cpp/examples/downstream/CMakeLists.txt index a80d0ba44f..5dddd30441 100644 --- a/cpp/examples/downstream/CMakeLists.txt +++ b/cpp/examples/downstream/CMakeLists.txt @@ -1,5 +1,5 @@ # ============================================================================= -# Copyright (c) 2021-2023, NVIDIA CORPORATION. +# Copyright (c) 2021-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except # in compliance with the License. You may obtain a copy of the License at @@ -28,6 +28,4 @@ include(cmake/get_kvikio.cmake) add_executable(downstream_example downstream_example.cpp) -# Notice, even though KvikIO is a header-only library, we link to it here. Linking to -# `kvikio::kvikio` makes CMake include the headers of KvikIO when building. target_link_libraries(downstream_example PRIVATE kvikio::kvikio) diff --git a/cpp/include/kvikio/remote_handle.hpp b/cpp/include/kvikio/remote_handle.hpp index 5bb18f6396..8ac2798f31 100644 --- a/cpp/include/kvikio/remote_handle.hpp +++ b/cpp/include/kvikio/remote_handle.hpp @@ -30,7 +30,6 @@ #include #include #include -#include #include namespace kvikio { @@ -128,78 +127,10 @@ class BounceBufferH2D { } }; -/** - * @brief Context used by the "CURLOPT_WRITEFUNCTION" callbacks. - */ -struct CallbackContext { - char* buf; // Output buffer to read into. - std::size_t size; // Total number of bytes to read. - std::ptrdiff_t offset; // Offset into `buf` to start reading. - bool overflow_error; // Flag to indicate overflow. - CallbackContext(void* buf, std::size_t size) - : buf{static_cast(buf)}, size{size}, offset{0}, overflow_error{0} - { - } - BounceBufferH2D* bounce_buffer{nullptr}; // Only used by callback_device_memory -}; - -/** - * @brief A "CURLOPT_WRITEFUNCTION" to copy downloaded data to the output host buffer. - * - * See . - * - * @param data Data downloaded by libcurl that is ready for consumption. - * @param size Size of each element in `nmemb`; size is always 1. - * @param nmemb Size of the data in `nmemb`. - * @param context A pointer to an instance of `CallbackContext`. - */ -inline std::size_t callback_host_memory(char* data, - std::size_t size, - std::size_t nmemb, - void* context) -{ - auto ctx = reinterpret_cast(context); - std::size_t const nbytes = size * nmemb; - if (ctx->size < ctx->offset + nbytes) { - ctx->overflow_error = true; - return CURL_WRITEFUNC_ERROR; - } - KVIKIO_NVTX_SCOPED_RANGE("RemoteHandle - callback_host_memory()", nbytes); - std::memcpy(ctx->buf + ctx->offset, data, nbytes); - ctx->offset += nbytes; - return nbytes; -} - -/** - * @brief A "CURLOPT_WRITEFUNCTION" to copy downloaded data to the output device buffer. - * - * See . - * - * @param data Data downloaded by libcurl that is ready for consumption. - * @param size Size of each element in `nmemb`; size is always 1. - * @param nmemb Size of the data in `nmemb`. - * @param context A pointer to an instance of `CallbackContext`. - */ -inline std::size_t callback_device_memory(char* data, - std::size_t size, - std::size_t nmemb, - void* context) -{ - auto ctx = reinterpret_cast(context); - std::size_t const nbytes = size * nmemb; - if (ctx->size < ctx->offset + nbytes) { - ctx->overflow_error = true; - return CURL_WRITEFUNC_ERROR; - } - KVIKIO_NVTX_SCOPED_RANGE("RemoteHandle - callback_device_memory()", nbytes); - - ctx->bounce_buffer->write(data, nbytes); - ctx->offset += nbytes; - return nbytes; -} - } // namespace detail +class CurlHandle; // Prototype + /** * @brief Abstract base class for remote endpoints. * @@ -243,7 +174,7 @@ class HttpEndpoint : public RemoteEndpoint { * @param url The full http url to the remote file. */ HttpEndpoint(std::string url) : _url{std::move(url)} {} - void setopt(CurlHandle& curl) override { curl.setopt(CURLOPT_URL, _url.c_str()); } + void setopt(CurlHandle& curl) override; std::string str() const override { return _url; } ~HttpEndpoint() override = default; }; @@ -424,12 +355,7 @@ class S3Endpoint : public RemoteEndpoint { { } - void setopt(CurlHandle& curl) override - { - curl.setopt(CURLOPT_URL, _url.c_str()); - curl.setopt(CURLOPT_AWS_SIGV4, _aws_sigv4.c_str()); - curl.setopt(CURLOPT_USERPWD, _aws_userpwd.c_str()); - } + void setopt(CurlHandle& curl) override; std::string str() const override { return _url; } ~S3Endpoint() override = default; }; @@ -461,23 +387,7 @@ class RemoteHandle { * * @param endpoint Remote endpoint used for subsequently IO. */ - RemoteHandle(std::unique_ptr endpoint) - { - auto curl = create_curl_handle(); - - endpoint->setopt(curl); - curl.setopt(CURLOPT_NOBODY, 1L); - curl.setopt(CURLOPT_FOLLOWLOCATION, 1L); - curl.perform(); - curl_off_t cl; - curl.getinfo(CURLINFO_CONTENT_LENGTH_DOWNLOAD_T, &cl); - if (cl < 0) { - throw std::runtime_error("cannot get size of " + endpoint->str() + - ", content-length not provided by the server"); - } - _nbytes = cl; - _endpoint = std::move(endpoint); - } + RemoteHandle(std::unique_ptr endpoint); // A remote handle is moveable but not copyable. RemoteHandle(RemoteHandle&& o) = default; @@ -513,53 +423,7 @@ class RemoteHandle { * @param file_offset File offset in bytes. * @return Number of bytes read, which is always `size`. */ - std::size_t read(void* buf, std::size_t size, std::size_t file_offset = 0) - { - KVIKIO_NVTX_SCOPED_RANGE("RemoteHandle::read()", size); - - if (file_offset + size > _nbytes) { - std::stringstream ss; - ss << "cannot read " << file_offset << "+" << size << " bytes into a " << _nbytes - << " bytes file (" << _endpoint->str() << ")"; - throw std::invalid_argument(ss.str()); - } - bool const is_host_mem = is_host_memory(buf); - auto curl = create_curl_handle(); - _endpoint->setopt(curl); - - std::string const byte_range = - std::to_string(file_offset) + "-" + std::to_string(file_offset + size - 1); - curl.setopt(CURLOPT_RANGE, byte_range.c_str()); - - if (is_host_mem) { - curl.setopt(CURLOPT_WRITEFUNCTION, detail::callback_host_memory); - } else { - curl.setopt(CURLOPT_WRITEFUNCTION, detail::callback_device_memory); - } - detail::CallbackContext ctx{buf, size}; - curl.setopt(CURLOPT_WRITEDATA, &ctx); - - try { - if (is_host_mem) { - curl.perform(); - } else { - PushAndPopContext c(get_context_from_pointer(buf)); - // We use a bounce buffer to avoid many small memory copies to device. Libcurl has a - // maximum chunk size of 16kb (`CURL_MAX_WRITE_SIZE`) but chunks are often much smaller. - detail::BounceBufferH2D bounce_buffer(detail::StreamsByThread::get(), buf); - ctx.bounce_buffer = &bounce_buffer; - curl.perform(); - } - } catch (std::runtime_error const& e) { - if (ctx.overflow_error) { - std::stringstream ss; - ss << "maybe the server doesn't support file ranges? [" << e.what() << "]"; - throw std::overflow_error(ss.str()); - } - throw; - } - return size; - } + std::size_t read(void* buf, std::size_t size, std::size_t file_offset = 0); /** * @brief Read from remote source into buffer (host or device memory) in parallel. @@ -576,17 +440,7 @@ class RemoteHandle { std::future pread(void* buf, std::size_t size, std::size_t file_offset = 0, - std::size_t task_size = defaults::task_size()) - { - KVIKIO_NVTX_SCOPED_RANGE("RemoteHandle::pread()", size); - auto task = [this](void* devPtr_base, - std::size_t size, - std::size_t file_offset, - std::size_t devPtr_offset) -> std::size_t { - return read(static_cast(devPtr_base) + devPtr_offset, size, file_offset); - }; - return parallel_io(task, buf, size, file_offset, task_size, 0); - } + std::size_t task_size = defaults::task_size()); }; } // namespace kvikio diff --git a/cpp/include/kvikio/shim/utils.hpp b/cpp/include/kvikio/shim/utils.hpp index f805ed8ee3..7a3c439899 100644 --- a/cpp/include/kvikio/shim/utils.hpp +++ b/cpp/include/kvikio/shim/utils.hpp @@ -24,11 +24,10 @@ namespace kvikio { // Macros used for defining symbol visibility. -// Since KvikIO is header-only, we rely on the linker to disambiguate inline functions -// and static methods that have (or return) static references. To do this, the relevant -// function/method must have `__attribute__((visibility("default")))`. If not, then if -// KvikIO is used in two different DSOs, the function will appear twice, and there will -// be two static objects. +// Since KvikIO declares global default values in headers, we rely on the linker to disambiguate +// inline and static methods that have (or return) static references. To do this, the relevant +// function/method must have `__attribute__((visibility("default")))`. If not, then if KvikIO is +// used in two different DSOs, the function will appear twice, and there will be two static objects. // See and . #if (defined(__GNUC__) || defined(__clang__)) && !defined(__MINGW32__) && !defined(__MINGW64__) #define KVIKIO_EXPORT __attribute__((visibility("default"))) diff --git a/cpp/src/remote_handle.cpp b/cpp/src/remote_handle.cpp new file mode 100644 index 0000000000..527811e143 --- /dev/null +++ b/cpp/src/remote_handle.cpp @@ -0,0 +1,201 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +namespace kvikio { + +void HttpEndpoint::setopt(CurlHandle& curl) { curl.setopt(CURLOPT_URL, _url.c_str()); } + +void S3Endpoint::setopt(CurlHandle& curl) +{ + curl.setopt(CURLOPT_URL, _url.c_str()); + curl.setopt(CURLOPT_AWS_SIGV4, _aws_sigv4.c_str()); + curl.setopt(CURLOPT_USERPWD, _aws_userpwd.c_str()); +} + +RemoteHandle::RemoteHandle(std::unique_ptr endpoint) +{ + auto curl = create_curl_handle(); + + endpoint->setopt(curl); + curl.setopt(CURLOPT_NOBODY, 1L); + curl.setopt(CURLOPT_FOLLOWLOCATION, 1L); + curl.perform(); + curl_off_t cl; + curl.getinfo(CURLINFO_CONTENT_LENGTH_DOWNLOAD_T, &cl); + if (cl < 0) { + throw std::runtime_error("cannot get size of " + endpoint->str() + + ", content-length not provided by the server"); + } + _nbytes = cl; + _endpoint = std::move(endpoint); +} + +namespace { + +/** + * @brief Context used by the "CURLOPT_WRITEFUNCTION" callbacks. + */ +struct CallbackContext { + char* buf; // Output buffer to read into. + std::size_t size; // Total number of bytes to read. + std::ptrdiff_t offset; // Offset into `buf` to start reading. + bool overflow_error; // Flag to indicate overflow. + CallbackContext(void* buf, std::size_t size) + : buf{static_cast(buf)}, size{size}, offset{0}, overflow_error{0} + { + } + detail::BounceBufferH2D* bounce_buffer{nullptr}; // Only used by callback_device_memory +}; + +/** + * @brief A "CURLOPT_WRITEFUNCTION" to copy downloaded data to the output host buffer. + * + * See . + * + * @param data Data downloaded by libcurl that is ready for consumption. + * @param size Size of each element in `nmemb`; size is always 1. + * @param nmemb Size of the data in `nmemb`. + * @param context A pointer to an instance of `CallbackContext`. + */ +inline std::size_t callback_host_memory(char* data, + std::size_t size, + std::size_t nmemb, + void* context) +{ + auto ctx = reinterpret_cast(context); + std::size_t const nbytes = size * nmemb; + if (ctx->size < ctx->offset + nbytes) { + ctx->overflow_error = true; + return CURL_WRITEFUNC_ERROR; + } + KVIKIO_NVTX_SCOPED_RANGE("RemoteHandle - callback_host_memory()", nbytes); + std::memcpy(ctx->buf + ctx->offset, data, nbytes); + ctx->offset += nbytes; + return nbytes; +} + +/** + * @brief A "CURLOPT_WRITEFUNCTION" to copy downloaded data to the output device buffer. + * + * See . + * + * @param data Data downloaded by libcurl that is ready for consumption. + * @param size Size of each element in `nmemb`; size is always 1. + * @param nmemb Size of the data in `nmemb`. + * @param context A pointer to an instance of `CallbackContext`. + */ +inline std::size_t callback_device_memory(char* data, + std::size_t size, + std::size_t nmemb, + void* context) +{ + auto ctx = reinterpret_cast(context); + std::size_t const nbytes = size * nmemb; + if (ctx->size < ctx->offset + nbytes) { + ctx->overflow_error = true; + return CURL_WRITEFUNC_ERROR; + } + KVIKIO_NVTX_SCOPED_RANGE("RemoteHandle - callback_device_memory()", nbytes); + + ctx->bounce_buffer->write(data, nbytes); + ctx->offset += nbytes; + return nbytes; +} +} // namespace + +std::size_t RemoteHandle::read(void* buf, std::size_t size, std::size_t file_offset) +{ + KVIKIO_NVTX_SCOPED_RANGE("RemoteHandle::read()", size); + + if (file_offset + size > _nbytes) { + std::stringstream ss; + ss << "cannot read " << file_offset << "+" << size << " bytes into a " << _nbytes + << " bytes file (" << _endpoint->str() << ")"; + throw std::invalid_argument(ss.str()); + } + bool const is_host_mem = is_host_memory(buf); + auto curl = create_curl_handle(); + _endpoint->setopt(curl); + + std::string const byte_range = + std::to_string(file_offset) + "-" + std::to_string(file_offset + size - 1); + curl.setopt(CURLOPT_RANGE, byte_range.c_str()); + + if (is_host_mem) { + curl.setopt(CURLOPT_WRITEFUNCTION, callback_host_memory); + } else { + curl.setopt(CURLOPT_WRITEFUNCTION, callback_device_memory); + } + CallbackContext ctx{buf, size}; + curl.setopt(CURLOPT_WRITEDATA, &ctx); + + try { + if (is_host_mem) { + curl.perform(); + } else { + PushAndPopContext c(get_context_from_pointer(buf)); + // We use a bounce buffer to avoid many small memory copies to device. Libcurl has a + // maximum chunk size of 16kb (`CURL_MAX_WRITE_SIZE`) but chunks are often much smaller. + detail::BounceBufferH2D bounce_buffer(detail::StreamsByThread::get(), buf); + ctx.bounce_buffer = &bounce_buffer; + curl.perform(); + } + } catch (std::runtime_error const& e) { + if (ctx.overflow_error) { + std::stringstream ss; + ss << "maybe the server doesn't support file ranges? [" << e.what() << "]"; + throw std::overflow_error(ss.str()); + } + throw; + } + return size; +} + +std::future RemoteHandle::pread(void* buf, + std::size_t size, + std::size_t file_offset, + std::size_t task_size) +{ + KVIKIO_NVTX_SCOPED_RANGE("RemoteHandle::pread()", size); + auto task = [this](void* devPtr_base, + std::size_t size, + std::size_t file_offset, + std::size_t devPtr_offset) -> std::size_t { + return read(static_cast(devPtr_base) + devPtr_offset, size, file_offset); + }; + return parallel_io(task, buf, size, file_offset, task_size, 0); +} + +} // namespace kvikio diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 429bd8b722..e9024795f5 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -12,9 +12,6 @@ # the License. # ============================================================================= -# ################################################################################################## -# enable testing ----------------------------------------------------------------------------------- -# ################################################################################################## enable_testing() include(rapids-test) @@ -32,7 +29,8 @@ set_target_properties( CUDA_STANDARD 17 CUDA_STANDARD_REQUIRED ON ) -target_link_libraries(cpp_tests PRIVATE kvikio::kvikio GTest::gmock GTest::gtest) +target_link_libraries(cpp_tests PRIVATE kvikio::kvikio GTest::gmock GTest::gtest CUDA::cudart) + rapids_test_add( NAME cpp_tests COMMAND cpp_tests diff --git a/dependencies.yaml b/dependencies.yaml index ae99fb5d83..b80ed69337 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -64,6 +64,7 @@ files: includes: - depends_on_cupy - depends_on_nvcomp + - depends_on_libkvikio - run py_rapids_build_libkvikio: output: pyproject @@ -282,6 +283,31 @@ dependencies: - matrix: packages: - nvidia-nvcomp==4.1.0.6 + depends_on_libkvikio: + common: + - output_types: conda + packages: + - &libkvikio_unsuffixed libkvikio==24.12.*,>=0.0.0a0 + - output_types: requirements + packages: + # pip recognizes the index as a global option for the requirements.txt file + # This index is needed for libkvikio-cu{11,12}. + - --extra-index-url=https://pypi.nvidia.com + - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple + specific: + - output_types: [requirements, pyproject] + matrices: + - matrix: + cuda: "12.*" + cuda_suffixed: "true" + packages: + - libkvikio-cu12==24.12.*,>=0.0.0a0 + - matrix: + cuda: "11.*" + cuda_suffixed: "true" + packages: + - libkvikio-cu11==24.12.*,>=0.0.0a0 + - {matrix: null, packages: [*libkvikio_unsuffixed]} docs: common: - output_types: [conda, requirements] diff --git a/python/kvikio/cmake/thirdparty/get_nvcomp.cmake b/python/kvikio/cmake/thirdparty/get_nvcomp.cmake index 9361624c07..a2c6326e76 100644 --- a/python/kvikio/cmake/thirdparty/get_nvcomp.cmake +++ b/python/kvikio/cmake/thirdparty/get_nvcomp.cmake @@ -18,7 +18,11 @@ set(KVIKIO_USE_PROPRIETARY_BINARY ON) function(find_and_configure_nvcomp) include(${rapids-cmake-dir}/cpm/nvcomp.cmake) - rapids_cpm_nvcomp(USE_PROPRIETARY_BINARY ${KVIKIO_USE_PROPRIETARY_BINARY}) + set(export_args) + if(KvikIO_EXPORT_NVCOMP) + set(export_args BUILD_EXPORT_SET kvikio-exports INSTALL_EXPORT_SET kvikio-exports) + endif() + rapids_cpm_nvcomp(${export_args} USE_PROPRIETARY_BINARY ${KVIKIO_USE_PROPRIETARY_BINARY}) # Per-thread default stream if(TARGET nvcomp AND PER_THREAD_DEFAULT_STREAM) diff --git a/python/kvikio/kvikio/__init__.py b/python/kvikio/kvikio/__init__.py index a2bfffaf48..f4db6d1d05 100644 --- a/python/kvikio/kvikio/__init__.py +++ b/python/kvikio/kvikio/__init__.py @@ -1,6 +1,17 @@ # Copyright (c) 2021-2024, NVIDIA CORPORATION. All rights reserved. # See file LICENSE for terms. +# If libkvikio was installed as a wheel, we must request it to load the library symbols. +# Otherwise, we assume that the library was installed in a system path that ld can find. +try: + import libkvikio +except ModuleNotFoundError: + pass +else: + libkvikio.load_library() + del libkvikio + + from kvikio._version import __git_commit__, __version__ from kvikio.cufile import CuFile from kvikio.remote_file import RemoteFile, is_remote_file_available diff --git a/python/kvikio/pyproject.toml b/python/kvikio/pyproject.toml index cb9491e75e..5921e4b762 100644 --- a/python/kvikio/pyproject.toml +++ b/python/kvikio/pyproject.toml @@ -20,6 +20,7 @@ license = { text = "Apache 2.0" } requires-python = ">=3.10" dependencies = [ "cupy-cuda11x>=12.0.0", + "libkvikio==24.12.*,>=0.0.0a0", "numcodecs !=0.12.0", "numpy>=1.23,<3.0a0", "nvidia-nvcomp==4.1.0.6", diff --git a/python/libkvikio/CMakeLists.txt b/python/libkvikio/CMakeLists.txt index 278e09f462..270e8ff5be 100644 --- a/python/libkvikio/CMakeLists.txt +++ b/python/libkvikio/CMakeLists.txt @@ -38,6 +38,20 @@ unset(kvikio_FOUND) set(KvikIO_BUILD_EXAMPLES OFF) set(KvikIO_BUILD_TESTS OFF) +if(USE_NVCOMP_RUNTIME_WHEEL) + set(KvikIO_EXPORT_NVCOMP OFF) +endif() set(CUDA_STATIC_RUNTIME ON) add_subdirectory(../../cpp kvikio-cpp) + +if(USE_NVCOMP_RUNTIME_WHEEL) + set(rpaths "$ORIGIN/../../nvidia/nvcomp") + foreach(tgt IN LISTS RAPIDS_CYTHON_CREATED_TARGETS) + set_property( + TARGET ${tgt} + PROPERTY INSTALL_RPATH ${rpaths} + APPEND + ) + endforeach() +endif() diff --git a/python/libkvikio/libkvikio/__init__.py b/python/libkvikio/libkvikio/__init__.py index 995cd1027d..a221295d4c 100644 --- a/python/libkvikio/libkvikio/__init__.py +++ b/python/libkvikio/libkvikio/__init__.py @@ -13,5 +13,6 @@ # limitations under the License. from libkvikio._version import __git_commit__, __version__ +from libkvikio.load import load_library -__all__ = ["__git_commit__", "__version__"] +__all__ = ["__git_commit__", "__version__", "load_library"] diff --git a/python/libkvikio/libkvikio/load.py b/python/libkvikio/libkvikio/load.py new file mode 100644 index 0000000000..a6b0898e18 --- /dev/null +++ b/python/libkvikio/libkvikio/load.py @@ -0,0 +1,45 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import ctypes +import os + + +def load_library(): + # Dynamically load libkvikio.so. Prefer a system library if one is present to + # avoid clobbering symbols that other packages might expect, but if no + # other library is present use the one in the wheel. + libkvikio_lib = None + try: + libkvikio_lib = ctypes.CDLL("libkvikio.so", ctypes.RTLD_GLOBAL) + except OSError: + # If neither of these directories contain the library, we assume we are in an + # environment where the C++ library is already installed somewhere else and the + # CMake build of the libkvikio Python package was a no-op. + # + # Note that this approach won't work for real editable installs of the libkvikio + # package. scikit-build-core has limited support for importlib.resources so + # there isn't a clean way to support that case yet. + for lib_dir in ("lib", "lib64"): + if os.path.isfile( + lib := os.path.join(os.path.dirname(__file__), lib_dir, "libkvikio.so") + ): + libkvikio_lib = ctypes.CDLL(lib, ctypes.RTLD_GLOBAL) + break + + # The caller almost never needs to do anything with this library, but no + # harm in offering the option since this object at least provides a handle + # to inspect where libkvikio was loaded from. + return libkvikio_lib From 12ca83b2e8d3d0293b74e5fc973f05e9cba94312 Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Wed, 6 Nov 2024 17:11:45 +0100 Subject: [PATCH 04/10] Moving details in file_handle.hpp to .cpp (#539) ... also don't build `remote_handle.cpp` if `KvikIO_REMOTE_SUPPORT=OFF`, which fixes https://github.com/rapidsai/kvikio/issues/538 Authors: - Mads R. B. Kristensen (https://github.com/madsbk) Approvers: - Kyle Edwards (https://github.com/KyleFromNVIDIA) - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/kvikio/pull/539 --- cpp/CMakeLists.txt | 7 +- cpp/include/kvikio/file_handle.hpp | 130 +----------------------- cpp/src/file_handle.cpp | 158 +++++++++++++++++++++++++++++ cpp/src/remote_handle.cpp | 2 - 4 files changed, 167 insertions(+), 130 deletions(-) create mode 100644 cpp/src/file_handle.cpp diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 772a97e560..3a42e44401 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -107,7 +107,12 @@ include(cmake/thirdparty/get_thread_pool.cmake) # ################################################################################################## # * library targets -------------------------------------------------------------------------------- -file(GLOB SOURCES "src/*.cpp") +set(SOURCES "src/file_handle.cpp") + +if(KvikIO_REMOTE_SUPPORT) + list(APPEND SOURCES "src/remote_handle.cpp") +endif() + add_library(kvikio ${SOURCES}) # To avoid symbol conflicts when statically linking to libcurl.a (see get_libcurl.cmake) and its diff --git a/cpp/include/kvikio/file_handle.hpp b/cpp/include/kvikio/file_handle.hpp index 97c0ba9748..141c17371a 100644 --- a/cpp/include/kvikio/file_handle.hpp +++ b/cpp/include/kvikio/file_handle.hpp @@ -15,14 +15,11 @@ */ #pragma once -#include #include #include -#include #include #include -#include #include #include @@ -37,96 +34,6 @@ #include namespace kvikio { -namespace detail { - -/** - * @brief Parse open file flags given as a string and return oflags - * - * @param flags The flags - * @param o_direct Append O_DIRECT to the open flags - * @return oflags - * - * @throw std::invalid_argument if the specified flags are not supported. - * @throw std::invalid_argument if `o_direct` is true, but `O_DIRECT` is not supported. - */ -inline int open_fd_parse_flags(const std::string& flags, bool o_direct) -{ - int file_flags = -1; - if (flags.empty()) { throw std::invalid_argument("Unknown file open flag"); } - switch (flags[0]) { - case 'r': - file_flags = O_RDONLY; - if (flags[1] == '+') { file_flags = O_RDWR; } - break; - case 'w': - file_flags = O_WRONLY; - if (flags[1] == '+') { file_flags = O_RDWR; } - file_flags |= O_CREAT | O_TRUNC; - break; - case 'a': throw std::invalid_argument("Open flag 'a' isn't supported"); - default: throw std::invalid_argument("Unknown file open flag"); - } - file_flags |= O_CLOEXEC; - if (o_direct) { -#if defined(O_DIRECT) - file_flags |= O_DIRECT; -#else - throw std::invalid_argument("'o_direct' flag unsupported on this platform"); -#endif - } - return file_flags; -} - -/** - * @brief Open file using `open(2)` - * - * @param flags Open flags given as a string - * @param o_direct Append O_DIRECT to `flags` - * @param mode Access modes - * @return File descriptor - */ -inline int open_fd(const std::string& file_path, - const std::string& flags, - bool o_direct, - mode_t mode) -{ - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) - int fd = ::open(file_path.c_str(), open_fd_parse_flags(flags, o_direct), mode); - if (fd == -1) { throw std::system_error(errno, std::generic_category(), "Unable to open file"); } - return fd; -} - -/** - * @brief Get the flags of the file descriptor (see `open(2)`) - * - * @return Open flags - */ -[[nodiscard]] inline int open_flags(int fd) -{ - int ret = fcntl(fd, F_GETFL); // NOLINT(cppcoreguidelines-pro-type-vararg) - if (ret == -1) { - throw std::system_error(errno, std::generic_category(), "Unable to retrieve open flags"); - } - return ret; -} - -/** - * @brief Get file size from file descriptor `fstat(3)` - * - * @param file_descriptor Open file descriptor - * @return The number of bytes - */ -[[nodiscard]] inline std::size_t get_file_size(int file_descriptor) -{ - struct stat st {}; - int ret = fstat(file_descriptor, &st); - if (ret == -1) { - throw std::system_error(errno, std::generic_category(), "Unable to query file size"); - } - return static_cast(st.st_size); -} - -} // namespace detail /** * @brief Handle of an open file registered with cufile. @@ -166,33 +73,7 @@ class FileHandle { FileHandle(const std::string& file_path, const std::string& flags = "r", mode_t mode = m644, - bool compat_mode = defaults::compat_mode()) - : _fd_direct_off{detail::open_fd(file_path, flags, false, mode)}, - _initialized{true}, - _compat_mode{compat_mode} - { - if (_compat_mode) { - return; // Nothing to do in compatibility mode - } - - // Try to open the file with the O_DIRECT flag. Fall back to compatibility mode, if it fails. - try { - _fd_direct_on = detail::open_fd(file_path, flags, true, mode); - } catch (const std::system_error&) { - _compat_mode = true; - } catch (const std::invalid_argument&) { - _compat_mode = true; - } - - // Create a cuFile handle, if not in compatibility mode - if (!_compat_mode) { - CUfileDescr_t desc{}; // It is important to set to zero! - desc.type = CU_FILE_HANDLE_TYPE_OPAQUE_FD; - // NOLINTNEXTLINE(cppcoreguidelines-pro-type-union-access) - desc.handle.fd = _fd_direct_on; - CUFILE_TRY(cuFileAPI::instance().HandleRegister(&_handle, &desc)); - } - } + bool compat_mode = defaults::compat_mode()); /** * @brief FileHandle support move semantic but isn't copyable @@ -274,7 +155,7 @@ class FileHandle { * * @return File descriptor */ - [[nodiscard]] int fd_open_flags() const { return detail::open_flags(_fd_direct_off); } + [[nodiscard]] int fd_open_flags() const; /** * @brief Get the file size @@ -283,12 +164,7 @@ class FileHandle { * * @return The number of bytes */ - [[nodiscard]] std::size_t nbytes() const - { - if (closed()) { return 0; } - if (_nbytes == 0) { _nbytes = detail::get_file_size(_fd_direct_off); } - return _nbytes; - } + [[nodiscard]] std::size_t nbytes() const; /** * @brief Reads specified bytes from the file into the device memory. diff --git a/cpp/src/file_handle.cpp b/cpp/src/file_handle.cpp new file mode 100644 index 0000000000..c5b7ada59a --- /dev/null +++ b/cpp/src/file_handle.cpp @@ -0,0 +1,158 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace kvikio { + +namespace { + +/** + * @brief Parse open file flags given as a string and return oflags + * + * @param flags The flags + * @param o_direct Append O_DIRECT to the open flags + * @return oflags + * + * @throw std::invalid_argument if the specified flags are not supported. + * @throw std::invalid_argument if `o_direct` is true, but `O_DIRECT` is not supported. + */ +int open_fd_parse_flags(const std::string& flags, bool o_direct) +{ + int file_flags = -1; + if (flags.empty()) { throw std::invalid_argument("Unknown file open flag"); } + switch (flags[0]) { + case 'r': + file_flags = O_RDONLY; + if (flags[1] == '+') { file_flags = O_RDWR; } + break; + case 'w': + file_flags = O_WRONLY; + if (flags[1] == '+') { file_flags = O_RDWR; } + file_flags |= O_CREAT | O_TRUNC; + break; + case 'a': throw std::invalid_argument("Open flag 'a' isn't supported"); + default: throw std::invalid_argument("Unknown file open flag"); + } + file_flags |= O_CLOEXEC; + if (o_direct) { +#if defined(O_DIRECT) + file_flags |= O_DIRECT; +#else + throw std::invalid_argument("'o_direct' flag unsupported on this platform"); +#endif + } + return file_flags; +} + +/** + * @brief Open file using `open(2)` + * + * @param flags Open flags given as a string + * @param o_direct Append O_DIRECT to `flags` + * @param mode Access modes + * @return File descriptor + */ +int open_fd(const std::string& file_path, const std::string& flags, bool o_direct, mode_t mode) +{ + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-vararg) + int fd = ::open(file_path.c_str(), open_fd_parse_flags(flags, o_direct), mode); + if (fd == -1) { throw std::system_error(errno, std::generic_category(), "Unable to open file"); } + return fd; +} + +/** + * @brief Get the flags of the file descriptor (see `open(2)`) + * + * @return Open flags + */ +[[nodiscard]] int open_flags(int fd) +{ + int ret = fcntl(fd, F_GETFL); // NOLINT(cppcoreguidelines-pro-type-vararg) + if (ret == -1) { + throw std::system_error(errno, std::generic_category(), "Unable to retrieve open flags"); + } + return ret; +} + +/** + * @brief Get file size from file descriptor `fstat(3)` + * + * @param file_descriptor Open file descriptor + * @return The number of bytes + */ +[[nodiscard]] std::size_t get_file_size(int file_descriptor) +{ + struct stat st {}; + int ret = fstat(file_descriptor, &st); + if (ret == -1) { + throw std::system_error(errno, std::generic_category(), "Unable to query file size"); + } + return static_cast(st.st_size); +} + +} // namespace + +FileHandle::FileHandle(const std::string& file_path, + const std::string& flags, + mode_t mode, + bool compat_mode) + : _fd_direct_off{open_fd(file_path, flags, false, mode)}, + _initialized{true}, + _compat_mode{compat_mode} +{ + if (_compat_mode) { + return; // Nothing to do in compatibility mode + } + + // Try to open the file with the O_DIRECT flag. Fall back to compatibility mode, if it fails. + try { + _fd_direct_on = open_fd(file_path, flags, true, mode); + } catch (const std::system_error&) { + _compat_mode = true; + } catch (const std::invalid_argument&) { + _compat_mode = true; + } + + // Create a cuFile handle, if not in compatibility mode + if (!_compat_mode) { + CUfileDescr_t desc{}; // It is important to set to zero! + desc.type = CU_FILE_HANDLE_TYPE_OPAQUE_FD; + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-union-access) + desc.handle.fd = _fd_direct_on; + CUFILE_TRY(cuFileAPI::instance().HandleRegister(&_handle, &desc)); + } +} + +[[nodiscard]] int FileHandle::fd_open_flags() const { return open_flags(_fd_direct_off); } + +[[nodiscard]] std::size_t FileHandle::nbytes() const +{ + if (closed()) { return 0; } + if (_nbytes == 0) { _nbytes = get_file_size(_fd_direct_off); } + return _nbytes; +} + +} // namespace kvikio diff --git a/cpp/src/remote_handle.cpp b/cpp/src/remote_handle.cpp index 527811e143..adcf56befc 100644 --- a/cpp/src/remote_handle.cpp +++ b/cpp/src/remote_handle.cpp @@ -19,8 +19,6 @@ #include #include #include -#include -#include #include #include #include From dac08038b8234fafbdb82c8fe2d91b688f3a1d29 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Wed, 6 Nov 2024 13:17:42 -0500 Subject: [PATCH 05/10] Disallow cuda-python 12.6.1 and 11.8.4 (#537) Due to a bug in cuda-python we must disallow cuda-python 12.6.1 and 11.8.4. This PR disallows those versions. See https://github.com/rapidsai/build-planning/issues/116 for more information. Authors: - Bradley Dice (https://github.com/bdice) Approvers: - https://github.com/jakirkham URL: https://github.com/rapidsai/kvikio/pull/537 --- conda/environments/all_cuda-118_arch-aarch64.yaml | 2 +- conda/environments/all_cuda-118_arch-x86_64.yaml | 2 +- conda/environments/all_cuda-125_arch-aarch64.yaml | 2 +- conda/environments/all_cuda-125_arch-x86_64.yaml | 2 +- dependencies.yaml | 4 ++-- python/kvikio/pyproject.toml | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) diff --git a/conda/environments/all_cuda-118_arch-aarch64.yaml b/conda/environments/all_cuda-118_arch-aarch64.yaml index 54cbb12072..daeaa4272c 100644 --- a/conda/environments/all_cuda-118_arch-aarch64.yaml +++ b/conda/environments/all_cuda-118_arch-aarch64.yaml @@ -9,7 +9,7 @@ dependencies: - boto3>=1.21.21 - c-compiler - cmake>=3.26.4,!=3.30.0 -- cuda-python>=11.7.1,<12.0a0 +- cuda-python>=11.7.1,<12.0a0,!=11.8.4 - cuda-version=11.8 - cudatoolkit - cupy>=12.0.0 diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index ff3da55951..0a9517431b 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -9,7 +9,7 @@ dependencies: - boto3>=1.21.21 - c-compiler - cmake>=3.26.4,!=3.30.0 -- cuda-python>=11.7.1,<12.0a0 +- cuda-python>=11.7.1,<12.0a0,!=11.8.4 - cuda-version=11.8 - cudatoolkit - cupy>=12.0.0 diff --git a/conda/environments/all_cuda-125_arch-aarch64.yaml b/conda/environments/all_cuda-125_arch-aarch64.yaml index 41f6e6b3a3..bbf2c33a6f 100644 --- a/conda/environments/all_cuda-125_arch-aarch64.yaml +++ b/conda/environments/all_cuda-125_arch-aarch64.yaml @@ -10,7 +10,7 @@ dependencies: - c-compiler - cmake>=3.26.4,!=3.30.0 - cuda-nvcc -- cuda-python>=12.0,<13.0a0 +- cuda-python>=12.0,<13.0a0,!=12.6.1 - cuda-version=12.5 - cupy>=12.0.0 - cxx-compiler diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml index cab86c6fdc..261017db97 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-125_arch-x86_64.yaml @@ -10,7 +10,7 @@ dependencies: - c-compiler - cmake>=3.26.4,!=3.30.0 - cuda-nvcc -- cuda-python>=12.0,<13.0a0 +- cuda-python>=12.0,<13.0a0,!=12.6.1 - cuda-version=12.5 - cupy>=12.0.0 - cxx-compiler diff --git a/dependencies.yaml b/dependencies.yaml index b80ed69337..7ec7ab6b8a 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -384,7 +384,7 @@ dependencies: - matrix: cuda: "12.*" packages: - - cuda-python>=12.0,<13.0a0 + - cuda-python>=12.0,<13.0a0,!=12.6.1 - matrix: # All CUDA 11 versions packages: - - cuda-python>=11.7.1,<12.0a0 + - cuda-python>=11.7.1,<12.0a0,!=11.8.4 diff --git a/python/kvikio/pyproject.toml b/python/kvikio/pyproject.toml index 5921e4b762..79ad94ecde 100644 --- a/python/kvikio/pyproject.toml +++ b/python/kvikio/pyproject.toml @@ -41,7 +41,7 @@ classifiers = [ [project.optional-dependencies] test = [ "boto3>=1.21.21", - "cuda-python>=11.7.1,<12.0a0", + "cuda-python>=11.7.1,<12.0a0,!=11.8.4", "dask>=2022.05.2", "moto[server]>=4.0.8", "pytest", From 130db9684c15211b3f7a1c1820dc2dc044c1cc29 Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Wed, 6 Nov 2024 14:13:43 -0500 Subject: [PATCH 06/10] Fix exporting of include directories (#540) The CUDA and cufile include directories should not be exported as absolute paths when installing, only in the build tree. Add a `$` generator expression. Authors: - Kyle Edwards (https://github.com/KyleFromNVIDIA) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/kvikio/pull/540 --- cpp/CMakeLists.txt | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 3a42e44401..8e3302f991 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -125,15 +125,19 @@ add_library(kvikio::kvikio ALIAS kvikio) target_include_directories( kvikio - PUBLIC "$" "${CUDAToolkit_INCLUDE_DIRS}" - "${cuFile_INCLUDE_DIRS}" + PUBLIC "$" INTERFACE "$" ) # Notice, we do not link to cuda or cufile since KvikIO opens them manually using `dlopen()`. target_link_libraries( kvikio - PUBLIC Threads::Threads BS::thread_pool ${CMAKE_DL_LIBS} $ + PUBLIC Threads::Threads + BS::thread_pool + ${CMAKE_DL_LIBS} + $ + $> + $> PRIVATE $ ) From b6a2655e0c93560ef18a462f006608af05ba63c7 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Wed, 6 Nov 2024 21:49:39 -0600 Subject: [PATCH 07/10] Put a ceiling on cuda-python (#543) Follow-up to #537 Contributes to https://github.com/rapidsai/build-planning/issues/116 That PR used `!=` requirements to skip a particular version of `cuda-python` that `kvikio` was incompatible with. A newer version of `cuda-python` (12.6.2 for CUDA 12, 11.8.5 for CUDA 11) was just released, and it also causes some build issues for RAPIDS libraries: https://github.com/rapidsai/cuvs/pull/445#issuecomment-2461146449 To unblock CI across RAPIDS, this proposes **temporarily** switching to ceilings on the `cuda-python` dependency here. Authors: - James Lamb (https://github.com/jameslamb) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/kvikio/pull/543 --- conda/environments/all_cuda-118_arch-aarch64.yaml | 2 +- conda/environments/all_cuda-118_arch-x86_64.yaml | 2 +- conda/environments/all_cuda-125_arch-aarch64.yaml | 2 +- conda/environments/all_cuda-125_arch-x86_64.yaml | 2 +- dependencies.yaml | 4 ++-- python/kvikio/pyproject.toml | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) diff --git a/conda/environments/all_cuda-118_arch-aarch64.yaml b/conda/environments/all_cuda-118_arch-aarch64.yaml index daeaa4272c..2de4597657 100644 --- a/conda/environments/all_cuda-118_arch-aarch64.yaml +++ b/conda/environments/all_cuda-118_arch-aarch64.yaml @@ -9,7 +9,7 @@ dependencies: - boto3>=1.21.21 - c-compiler - cmake>=3.26.4,!=3.30.0 -- cuda-python>=11.7.1,<12.0a0,!=11.8.4 +- cuda-python>=11.7.1,<12.0a0,<=11.8.3 - cuda-version=11.8 - cudatoolkit - cupy>=12.0.0 diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index 0a9517431b..8b63a6726b 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -9,7 +9,7 @@ dependencies: - boto3>=1.21.21 - c-compiler - cmake>=3.26.4,!=3.30.0 -- cuda-python>=11.7.1,<12.0a0,!=11.8.4 +- cuda-python>=11.7.1,<12.0a0,<=11.8.3 - cuda-version=11.8 - cudatoolkit - cupy>=12.0.0 diff --git a/conda/environments/all_cuda-125_arch-aarch64.yaml b/conda/environments/all_cuda-125_arch-aarch64.yaml index bbf2c33a6f..87759faf53 100644 --- a/conda/environments/all_cuda-125_arch-aarch64.yaml +++ b/conda/environments/all_cuda-125_arch-aarch64.yaml @@ -10,7 +10,7 @@ dependencies: - c-compiler - cmake>=3.26.4,!=3.30.0 - cuda-nvcc -- cuda-python>=12.0,<13.0a0,!=12.6.1 +- cuda-python>=12.0,<13.0a0,<=12.6.0 - cuda-version=12.5 - cupy>=12.0.0 - cxx-compiler diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml index 261017db97..e0a10af655 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-125_arch-x86_64.yaml @@ -10,7 +10,7 @@ dependencies: - c-compiler - cmake>=3.26.4,!=3.30.0 - cuda-nvcc -- cuda-python>=12.0,<13.0a0,!=12.6.1 +- cuda-python>=12.0,<13.0a0,<=12.6.0 - cuda-version=12.5 - cupy>=12.0.0 - cxx-compiler diff --git a/dependencies.yaml b/dependencies.yaml index 7ec7ab6b8a..fe85506ce5 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -384,7 +384,7 @@ dependencies: - matrix: cuda: "12.*" packages: - - cuda-python>=12.0,<13.0a0,!=12.6.1 + - cuda-python>=12.0,<13.0a0,<=12.6.0 - matrix: # All CUDA 11 versions packages: - - cuda-python>=11.7.1,<12.0a0,!=11.8.4 + - cuda-python>=11.7.1,<12.0a0,<=11.8.3 diff --git a/python/kvikio/pyproject.toml b/python/kvikio/pyproject.toml index 79ad94ecde..b002569646 100644 --- a/python/kvikio/pyproject.toml +++ b/python/kvikio/pyproject.toml @@ -41,7 +41,7 @@ classifiers = [ [project.optional-dependencies] test = [ "boto3>=1.21.21", - "cuda-python>=11.7.1,<12.0a0,!=11.8.4", + "cuda-python>=11.7.1,<12.0a0,<=11.8.3", "dask>=2022.05.2", "moto[server]>=4.0.8", "pytest", From d66e3fa9dd949e59b4860435b3276c823487572b Mon Sep 17 00:00:00 2001 From: James Lamb Date: Fri, 8 Nov 2024 11:39:03 -0600 Subject: [PATCH 08/10] remove WheelHelpers.cmake (#545) Related to https://github.com/rapidsai/build-planning/issues/33 and https://github.com/rapidsai/build-planning/issues/74 The last use of CMake function `install_aliased_imported_targets()` here was removed in #478. This proposes removing the file holding its definition. Authors: - James Lamb (https://github.com/jameslamb) Approvers: - Kyle Edwards (https://github.com/KyleFromNVIDIA) URL: https://github.com/rapidsai/kvikio/pull/545 --- python/kvikio/cmake/CMakeLists.txt | 2 - .../cmake/thirdparty/WheelHelpers.cmake | 59 ------------------- 2 files changed, 61 deletions(-) delete mode 100644 python/kvikio/cmake/thirdparty/WheelHelpers.cmake diff --git a/python/kvikio/cmake/CMakeLists.txt b/python/kvikio/cmake/CMakeLists.txt index fa94bc3f8e..d3882b5ab3 100644 --- a/python/kvikio/cmake/CMakeLists.txt +++ b/python/kvikio/cmake/CMakeLists.txt @@ -13,5 +13,3 @@ # ============================================================================= include(thirdparty/get_nvcomp.cmake) -# Needed for install_aliased_imported_targets -include(thirdparty/WheelHelpers.cmake) diff --git a/python/kvikio/cmake/thirdparty/WheelHelpers.cmake b/python/kvikio/cmake/thirdparty/WheelHelpers.cmake deleted file mode 100644 index 3abe98a064..0000000000 --- a/python/kvikio/cmake/thirdparty/WheelHelpers.cmake +++ /dev/null @@ -1,59 +0,0 @@ -# ============================================================================= -# Copyright (c) 2022-2024, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except -# in compliance with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software distributed under the License -# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express -# or implied. See the License for the specific language governing permissions and limitations under -# the License. -# ============================================================================= -include_guard(GLOBAL) - -# Making libraries available inside wheels by installing the associated targets. -function(install_aliased_imported_targets) - list(APPEND CMAKE_MESSAGE_CONTEXT "install_aliased_imported_targets") - - set(options "") - set(one_value "DESTINATION") - set(multi_value "TARGETS") - cmake_parse_arguments(_ "${options}" "${one_value}" "${multi_value}" ${ARGN}) - - message(VERBOSE "Installing targets '${__TARGETS}' into lib_dir '${__DESTINATION}'") - - foreach(target IN LISTS __TARGETS) - - if(NOT TARGET ${target}) - message(VERBOSE "No target named ${target}") - continue() - endif() - - get_target_property(alias_target ${target} ALIASED_TARGET) - if(alias_target) - set(target ${alias_target}) - endif() - - get_target_property(is_imported ${target} IMPORTED) - if(NOT is_imported) - # If the target isn't imported, install it into the wheel - install(TARGETS ${target} DESTINATION ${__DESTINATION}) - message(VERBOSE "install(TARGETS ${target} DESTINATION ${__DESTINATION})") - else() - # If the target is imported, make sure it's global - get_target_property(type ${target} TYPE) - if(${type} STREQUAL "UNKNOWN_LIBRARY") - install(FILES $ DESTINATION ${__DESTINATION}) - message(VERBOSE "install(FILES $ DESTINATION ${__DESTINATION})") - else() - install(IMPORTED_RUNTIME_ARTIFACTS ${target} DESTINATION ${__DESTINATION}) - message( - VERBOSE - "install(IMPORTED_RUNTIME_ARTIFACTS $ DESTINATION ${__DESTINATION})" - ) - endif() - endif() - endforeach() -endfunction() From 0f7c0c7b0f1e01295c5e5e52009eec8e3fa3c6ba Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Sat, 9 Nov 2024 11:40:15 -0500 Subject: [PATCH 09/10] Replace FindcuFile with upstream FindCUDAToolkit support (#542) CMake's `FindCUDAToolkit` has supported cuFile since 3.25. Use this support and remove the custom `FindcuFile` module. Authors: - Kyle Edwards (https://github.com/KyleFromNVIDIA) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/kvikio/pull/542 --- cpp/CMakeLists.txt | 101 ++++++++++++++---------- cpp/cmake/Modules/FindcuFile.cmake | 120 ----------------------------- python/kvikio/CMakeLists.txt | 3 - 3 files changed, 61 insertions(+), 163 deletions(-) delete mode 100644 cpp/cmake/Modules/FindcuFile.cmake diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 8e3302f991..9c1450518b 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -64,6 +64,7 @@ if(KvikIO_REMOTE_SUPPORT) include(cmake/thirdparty/get_libcurl.cmake) endif() +set(cuFile_FOUND 0) if(KvikIO_CUDA_SUPPORT) rapids_find_package( CUDAToolkit REQUIRED @@ -71,35 +72,44 @@ if(KvikIO_CUDA_SUPPORT) INSTALL_EXPORT_SET kvikio-exports ) include(cmake/thirdparty/get_nvtx.cmake) -endif() - -rapids_find_package( - cuFile - BUILD_EXPORT_SET kvikio-exports - INSTALL_EXPORT_SET kvikio-exports -) -if(NOT cuFile_FOUND) - message( - WARNING "Cannot find cuFile - KvikIO will still work but won't use GPUDirect Storage (GDS)" - ) -else() - # Check batch and stream API support (cuFile_BATCH_API_FOUND and cuFile_STREAM_API_FOUND) - file(READ "${cuFile_INCLUDE_DIRS}/cufile.h" CUFILE_H_STR) - string(FIND "${CUFILE_H_STR}" "cuFileBatchIOSetUp" cuFileBatchIOSetUp_location) - if(cuFileBatchIOSetUp_location EQUAL "-1") - set(cuFile_BATCH_API_FOUND FALSE) + if(NOT TARGET CUDA::cuFile) + message( + WARNING "Cannot find cuFile - KvikIO will still work but won't use GPUDirect Storage (GDS)" + ) else() - set(cuFile_BATCH_API_FOUND TRUE) + set(cuFile_FOUND 1) + + # Check batch and stream API support (cuFile_BATCH_API_FOUND and cuFile_STREAM_API_FOUND) + try_compile( + cuFile_BATCH_API_FOUND SOURCE_FROM_CONTENT + batch.cpp + [[#include + int main() { + cuFileBatchIOSetUp(nullptr, 0); + return 0; + } + ]] + LINK_LIBRARIES CUDA::cuFile rt ${CMAKE_DL_LIBS} + OUTPUT_VARIABLE batch_output + ) + message(STATUS "Found cuFile Batch API: ${cuFile_BATCH_API_FOUND}") + try_compile( + cuFile_STREAM_API_FOUND SOURCE_FROM_CONTENT + stream.cpp + [[#include + int main() { + CUfileHandle_t fh; + CUstream stream; + cuFileReadAsync(fh, nullptr, nullptr, nullptr, nullptr, nullptr, stream); + return 0; + } + ]] + LINK_LIBRARIES CUDA::cuFile rt ${CMAKE_DL_LIBS} + OUTPUT_VARIABLE stream_output + ) + message(STATUS "Found cuFile Stream API: ${cuFile_STREAM_API_FOUND}") endif() - message(STATUS "Found cuFile Batch API: ${cuFile_BATCH_API_FOUND}") - string(FIND "${CUFILE_H_STR}" "cuFileReadAsync" cuFileReadAsync_location) - if(cuFileReadAsync_location EQUAL "-1") - set(cuFile_STREAM_API_FOUND FALSE) - else() - set(cuFile_STREAM_API_FOUND TRUE) - endif() - message(STATUS "Found cuFile Stream API: ${cuFile_STREAM_API_FOUND}") endif() include(cmake/thirdparty/get_thread_pool.cmake) @@ -126,18 +136,14 @@ add_library(kvikio::kvikio ALIAS kvikio) target_include_directories( kvikio PUBLIC "$" + "$:${CUDAToolkit_INCLUDE_DIRS}>>" INTERFACE "$" ) # Notice, we do not link to cuda or cufile since KvikIO opens them manually using `dlopen()`. target_link_libraries( kvikio - PUBLIC Threads::Threads - BS::thread_pool - ${CMAKE_DL_LIBS} - $ - $> - $> + PUBLIC Threads::Threads BS::thread_pool ${CMAKE_DL_LIBS} $ PRIVATE $ ) @@ -200,26 +206,40 @@ install( install(DIRECTORY include/kvikio/ DESTINATION include/kvikio) install(FILES ${KvikIO_BINARY_DIR}/include/kvikio/version_config.hpp DESTINATION include/kvikio) -include("${rapids-cmake-dir}/export/find_package_file.cmake") -rapids_export_find_package_file( - BUILD "${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules/FindcuFile.cmake" EXPORT_SET kvikio-exports -) -rapids_export_find_package_file( - INSTALL "${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules/FindcuFile.cmake" EXPORT_SET kvikio-exports -) - set(doc_string [=[ Provide targets for KvikIO. ]=] ) +set(final_code_string + " +set(KvikIO_CUDA_SUPPORT [=[${KvikIO_CUDA_SUPPORT}]=]) +set(KvikIO_CUFILE_SUPPORT [=[${cuFile_FOUND}]=]) +" +) +string( + APPEND + final_code_string + [=[ +if(KvikIO_CUDA_SUPPORT) + find_package(CUDAToolkit REQUIRED QUIET) + target_include_directories(kvikio::kvikio INTERFACE ${CUDAToolkit_INCLUDE_DIRS}) + + if(KvikIO_CUFILE_SUPPORT AND NOT TARGET CUDA::cuFile) + message(FATAL_ERROR "Compiled with cuFile support but cuFile not found") + endif() +endif() +]=] +) + rapids_export( INSTALL kvikio EXPORT_SET kvikio-exports GLOBAL_TARGETS kvikio NAMESPACE kvikio:: DOCUMENTATION doc_string + FINAL_CODE_BLOCK final_code_string ) rapids_export( @@ -228,4 +248,5 @@ rapids_export( GLOBAL_TARGETS kvikio NAMESPACE kvikio:: DOCUMENTATION doc_string + FINAL_CODE_BLOCK final_code_string ) diff --git a/cpp/cmake/Modules/FindcuFile.cmake b/cpp/cmake/Modules/FindcuFile.cmake deleted file mode 100644 index 1df4f12d23..0000000000 --- a/cpp/cmake/Modules/FindcuFile.cmake +++ /dev/null @@ -1,120 +0,0 @@ -# ============================================================================= -# Copyright (c) 2020-2022, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except -# in compliance with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software distributed under the License -# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express -# or implied. See the License for the specific language governing permissions and limitations under -# the License. - -#[=======================================================================[.rst: -FindcuFile ----------- - -Find cuFile headers and libraries. - -Imported Targets -^^^^^^^^^^^^^^^^ - -``cufile::cuFile`` - The cuFile library, if found. -``cufile::cuFileRDMA`` - The cuFile RDMA library, if found. - -Result Variables -^^^^^^^^^^^^^^^^ - -This will define the following variables in your project: - -``cuFile_FOUND`` - true if (the requested version of) cuFile is available. -``cuFile_VERSION`` - the version of cuFile. -``cuFile_LIBRARIES`` - the libraries to link against to use cuFile. -``cuFileRDMA_LIBRARIES`` - the libraries to link against to use cuFile RDMA. -``cuFile_INCLUDE_DIRS`` - where to find the cuFile headers. -``cuFile_COMPILE_OPTIONS`` - this should be passed to target_compile_options(), if the - target is not used for linking - -#]=======================================================================] - -# use pkg-config to get the directories and then use these values in the FIND_PATH() and -# FIND_LIBRARY() calls -find_package(PkgConfig QUIET) -pkg_check_modules(PKG_cuFile QUIET cuFile) - -set(cuFile_COMPILE_OPTIONS ${PKG_cuFile_CFLAGS_OTHER}) -set(cuFile_VERSION ${PKG_cuFile_VERSION}) - -# Find the location of the CUDA Toolkit -find_package(CUDAToolkit QUIET) -find_path( - cuFile_INCLUDE_DIR - NAMES cufile.h - HINTS ${PKG_cuFile_INCLUDE_DIRS} ${CUDAToolkit_INCLUDE_DIRS} -) - -find_library( - cuFile_LIBRARY - NAMES cufile - HINTS ${PKG_cuFile_LIBRARY_DIRS} ${CUDAToolkit_LIBRARY_DIR} -) - -find_library( - cuFileRDMA_LIBRARY - NAMES cufile_rdma - HINTS ${PKG_cuFile_LIBRARY_DIRS} ${CUDAToolkit_LIBRARY_DIR} -) - -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args( - cuFile - FOUND_VAR cuFile_FOUND - REQUIRED_VARS cuFile_LIBRARY cuFileRDMA_LIBRARY cuFile_INCLUDE_DIR - VERSION_VAR cuFile_VERSION -) - -if(cuFile_INCLUDE_DIR AND NOT TARGET cufile::cuFile_interface) - add_library(cufile::cuFile_interface INTERFACE IMPORTED GLOBAL) - target_include_directories( - cufile::cuFile_interface INTERFACE "$" - ) - target_compile_options(cufile::cuFile_interface INTERFACE "${cuFile_COMPILE_OPTIONS}") - target_compile_definitions(cufile::cuFile_interface INTERFACE CUFILE_FOUND) -endif() - -if(cuFile_FOUND AND NOT TARGET cufile::cuFile) - add_library(cufile::cuFile UNKNOWN IMPORTED GLOBAL) - set_target_properties( - cufile::cuFile - PROPERTIES IMPORTED_LOCATION "${cuFile_LIBRARY}" - INTERFACE_COMPILE_OPTIONS "${cuFile_COMPILE_OPTIONS}" - INTERFACE_INCLUDE_DIRECTORIES "${cuFile_INCLUDE_DIR}" - ) -endif() - -if(cuFile_FOUND AND NOT TARGET cufile::cuFileRDMA) - add_library(cufile::cuFileRDMA UNKNOWN IMPORTED GLOBAL) - set_target_properties( - cufile::cuFileRDMA - PROPERTIES IMPORTED_LOCATION "${cuFileRDMA_LIBRARY}" - INTERFACE_COMPILE_OPTIONS "${cuFile_COMPILE_OPTIONS}" - INTERFACE_INCLUDE_DIRECTORIES "${cuFile_INCLUDE_DIR}" - ) -endif() - -mark_as_advanced(cuFile_LIBRARY cuFileRDMA_LIBRARY cuFile_INCLUDE_DIR) - -if(cuFile_FOUND) - set(cuFile_LIBRARIES ${cuFile_LIBRARY}) - set(cuFileRDMA_LIBRARIES ${cuFileRDMA_LIBRARY}) - set(cuFile_INCLUDE_DIRS ${cuFile_INCLUDE_DIR}) -endif() diff --git a/python/kvikio/CMakeLists.txt b/python/kvikio/CMakeLists.txt index 4db09123d4..6e54a5dff5 100644 --- a/python/kvikio/CMakeLists.txt +++ b/python/kvikio/CMakeLists.txt @@ -28,9 +28,6 @@ project( option(USE_NVCOMP_RUNTIME_WHEEL "Use the nvcomp wheel at runtime instead of the system library" OFF) -# TODO: Should we symlink FindcuFile.cmake into python/cmake? find cuFile -include(../../cpp/cmake/Modules/FindcuFile.cmake) - find_package(kvikio REQUIRED "${RAPIDS_VERSION}") find_package(CUDAToolkit REQUIRED) From 2e5996f2281c312f6a4cbf12f698abc1e0c45a64 Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Tue, 12 Nov 2024 20:48:57 +0100 Subject: [PATCH 10/10] S3 benchmark: adding cudf-kvikio and cudf-fsspec (#509) Authors: - Mads R. B. Kristensen (https://github.com/madsbk) Approvers: - Lawrence Mitchell (https://github.com/wence-) URL: https://github.com/rapidsai/kvikio/pull/509 --- python/kvikio/kvikio/benchmarks/s3_io.py | 26 +++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/python/kvikio/kvikio/benchmarks/s3_io.py b/python/kvikio/kvikio/benchmarks/s3_io.py index 7941462650..5e1846a1e5 100644 --- a/python/kvikio/kvikio/benchmarks/s3_io.py +++ b/python/kvikio/kvikio/benchmarks/s3_io.py @@ -99,9 +99,33 @@ def run() -> float: yield run() +def run_cudf(args, kvikio_remote_io: bool): + import cudf + + cudf.set_option("kvikio_remote_io", kvikio_remote_io) + url = f"s3://{args.bucket}/data" + + # Upload data to S3 server + create_client_and_bucket() + data = cupy.random.rand(args.nelem).astype(args.dtype) + df = cudf.DataFrame({"a": data}) + df.to_parquet(url) + + def run() -> float: + t0 = time.perf_counter() + cudf.read_parquet(url) + t1 = time.perf_counter() + return t1 - t0 + + for _ in range(args.nruns): + yield run() + + API = { "cupy": partial(run_numpy_like, xp=cupy), "numpy": partial(run_numpy_like, xp=numpy), + "cudf-kvikio": partial(run_cudf, kvikio_remote_io=True), + "cudf-fsspec": partial(run_cudf, kvikio_remote_io=False), } @@ -135,7 +159,7 @@ def main(args): def pprint_api_res(name, samples): samples = [args.nbytes / s for s in samples] # Convert to throughput mean = statistics.harmonic_mean(samples) if len(samples) > 1 else samples[0] - ret = f"{api}-{name}".ljust(12) + ret = f"{api}-{name}".ljust(18) ret += f"| {format_bytes(mean).rjust(10)}/s".ljust(14) if len(samples) > 1: stdev = statistics.stdev(samples) / mean * 100