diff --git a/ci/build_wheel_cpp.sh b/ci/build_wheel_cpp.sh index 0367842a8c..ca27717769 100755 --- a/ci/build_wheel_cpp.sh +++ b/ci/build_wheel_cpp.sh @@ -36,4 +36,6 @@ python -m auditwheel repair \ -w ${package_dir}/final_dist \ ${package_dir}/dist/* +./ci/validate_wheel.sh ${package_dir} final_dist + RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 cpp "${package_dir}/final_dist" diff --git a/ci/build_wheel_python.sh b/ci/build_wheel_python.sh index 0c709f2fe6..088e8e8e8f 100755 --- a/ci/build_wheel_python.sh +++ b/ci/build_wheel_python.sh @@ -25,4 +25,6 @@ python -m auditwheel repair \ -w ${package_dir}/final_dist \ ${package_dir}/dist/* +./ci/validate_wheel.sh ${package_dir} final_dist + RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 python ${package_dir}/final_dist diff --git a/ci/validate_wheel.sh b/ci/validate_wheel.sh new file mode 100755 index 0000000000..5910a5c59f --- /dev/null +++ b/ci/validate_wheel.sh @@ -0,0 +1,21 @@ +#!/bin/bash +# Copyright (c) 2024, NVIDIA CORPORATION. + +set -euo pipefail + +package_dir=$1 +wheel_dir_relative_path=$2 + +cd "${package_dir}" + +rapids-logger "validate packages with 'pydistcheck'" + +pydistcheck \ + --inspect \ + "$(echo ${wheel_dir_relative_path}/*.whl)" + +rapids-logger "validate packages with 'twine'" + +twine check \ + --strict \ + "$(echo ${wheel_dir_relative_path}/*.whl)" diff --git a/python/kvikio/kvikio/benchmarks/s3_io.py b/python/kvikio/kvikio/benchmarks/s3_io.py index 7941462650..5e1846a1e5 100644 --- a/python/kvikio/kvikio/benchmarks/s3_io.py +++ b/python/kvikio/kvikio/benchmarks/s3_io.py @@ -99,9 +99,33 @@ def run() -> float: yield run() +def run_cudf(args, kvikio_remote_io: bool): + import cudf + + cudf.set_option("kvikio_remote_io", kvikio_remote_io) + url = f"s3://{args.bucket}/data" + + # Upload data to S3 server + create_client_and_bucket() + data = cupy.random.rand(args.nelem).astype(args.dtype) + df = cudf.DataFrame({"a": data}) + df.to_parquet(url) + + def run() -> float: + t0 = time.perf_counter() + cudf.read_parquet(url) + t1 = time.perf_counter() + return t1 - t0 + + for _ in range(args.nruns): + yield run() + + API = { "cupy": partial(run_numpy_like, xp=cupy), "numpy": partial(run_numpy_like, xp=numpy), + "cudf-kvikio": partial(run_cudf, kvikio_remote_io=True), + "cudf-fsspec": partial(run_cudf, kvikio_remote_io=False), } @@ -135,7 +159,7 @@ def main(args): def pprint_api_res(name, samples): samples = [args.nbytes / s for s in samples] # Convert to throughput mean = statistics.harmonic_mean(samples) if len(samples) > 1 else samples[0] - ret = f"{api}-{name}".ljust(12) + ret = f"{api}-{name}".ljust(18) ret += f"| {format_bytes(mean).rjust(10)}/s".ljust(14) if len(samples) > 1: stdev = statistics.stdev(samples) / mean * 100 diff --git a/python/kvikio/pyproject.toml b/python/kvikio/pyproject.toml index b002569646..cafde598bd 100644 --- a/python/kvikio/pyproject.toml +++ b/python/kvikio/pyproject.toml @@ -139,6 +139,14 @@ provider = "scikit_build_core.metadata.regex" input = "kvikio/VERSION" regex = "(?P.*)" +[tool.pydistcheck] +select = [ + "distro-too-large-compressed", +] + +# PyPI limit is 100 MiB, fail CI before we get too close to that +max_allowed_size_compressed = '75M' + [tool.pytest.ini_options] filterwarnings = [ "error", diff --git a/python/libkvikio/pyproject.toml b/python/libkvikio/pyproject.toml index 3be7cbc0ae..9504cb3755 100644 --- a/python/libkvikio/pyproject.toml +++ b/python/libkvikio/pyproject.toml @@ -59,3 +59,11 @@ requires = [ [project.entry-points."cmake.prefix"] libkvikio = "libkvikio" + +[tool.pydistcheck] +select = [ + "distro-too-large-compressed", +] + +# PyPI limit is 100 MiB, fail CI before we get too close to that +max_allowed_size_compressed = '75M'