From 2e5996f2281c312f6a4cbf12f698abc1e0c45a64 Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Tue, 12 Nov 2024 20:48:57 +0100 Subject: [PATCH 1/2] S3 benchmark: adding cudf-kvikio and cudf-fsspec (#509) Authors: - Mads R. B. Kristensen (https://github.com/madsbk) Approvers: - Lawrence Mitchell (https://github.com/wence-) URL: https://github.com/rapidsai/kvikio/pull/509 --- python/kvikio/kvikio/benchmarks/s3_io.py | 26 +++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/python/kvikio/kvikio/benchmarks/s3_io.py b/python/kvikio/kvikio/benchmarks/s3_io.py index 7941462650..5e1846a1e5 100644 --- a/python/kvikio/kvikio/benchmarks/s3_io.py +++ b/python/kvikio/kvikio/benchmarks/s3_io.py @@ -99,9 +99,33 @@ def run() -> float: yield run() +def run_cudf(args, kvikio_remote_io: bool): + import cudf + + cudf.set_option("kvikio_remote_io", kvikio_remote_io) + url = f"s3://{args.bucket}/data" + + # Upload data to S3 server + create_client_and_bucket() + data = cupy.random.rand(args.nelem).astype(args.dtype) + df = cudf.DataFrame({"a": data}) + df.to_parquet(url) + + def run() -> float: + t0 = time.perf_counter() + cudf.read_parquet(url) + t1 = time.perf_counter() + return t1 - t0 + + for _ in range(args.nruns): + yield run() + + API = { "cupy": partial(run_numpy_like, xp=cupy), "numpy": partial(run_numpy_like, xp=numpy), + "cudf-kvikio": partial(run_cudf, kvikio_remote_io=True), + "cudf-fsspec": partial(run_cudf, kvikio_remote_io=False), } @@ -135,7 +159,7 @@ def main(args): def pprint_api_res(name, samples): samples = [args.nbytes / s for s in samples] # Convert to throughput mean = statistics.harmonic_mean(samples) if len(samples) > 1 else samples[0] - ret = f"{api}-{name}".ljust(12) + ret = f"{api}-{name}".ljust(18) ret += f"| {format_bytes(mean).rjust(10)}/s".ljust(14) if len(samples) > 1: stdev = statistics.stdev(samples) / mean * 100 From d40b7e4541a77842d0af911858b4f0708fe19225 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Wed, 13 Nov 2024 09:11:32 -0600 Subject: [PATCH 2/2] enforce wheel size limits, README formatting in CI (#548) Contributes to https://github.com/rapidsai/build-planning/issues/110 Proposes adding 2 types of validation on wheels in CI, to ensure we continue to produce wheels that are suitable for PyPI. * checks on wheel size (compressed), - *to be sure they're under PyPI limits* - *and to prompt discussion on PRs that significantly increase wheel sizes* * checks on README formatting - *to ensure they'll render properly as the PyPI project homepages* - *e.g. like how https://github.com/scikit-learn/scikit-learn/blob/main/README.rst becomes https://pypi.org/project/scikit-learn/* Authors: - James Lamb (https://github.com/jameslamb) Approvers: - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/kvikio/pull/548 --- ci/build_wheel_cpp.sh | 2 ++ ci/build_wheel_python.sh | 2 ++ ci/validate_wheel.sh | 21 +++++++++++++++++++++ python/kvikio/pyproject.toml | 8 ++++++++ python/libkvikio/pyproject.toml | 8 ++++++++ 5 files changed, 41 insertions(+) create mode 100755 ci/validate_wheel.sh diff --git a/ci/build_wheel_cpp.sh b/ci/build_wheel_cpp.sh index 0367842a8c..ca27717769 100755 --- a/ci/build_wheel_cpp.sh +++ b/ci/build_wheel_cpp.sh @@ -36,4 +36,6 @@ python -m auditwheel repair \ -w ${package_dir}/final_dist \ ${package_dir}/dist/* +./ci/validate_wheel.sh ${package_dir} final_dist + RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 cpp "${package_dir}/final_dist" diff --git a/ci/build_wheel_python.sh b/ci/build_wheel_python.sh index 0c709f2fe6..088e8e8e8f 100755 --- a/ci/build_wheel_python.sh +++ b/ci/build_wheel_python.sh @@ -25,4 +25,6 @@ python -m auditwheel repair \ -w ${package_dir}/final_dist \ ${package_dir}/dist/* +./ci/validate_wheel.sh ${package_dir} final_dist + RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 python ${package_dir}/final_dist diff --git a/ci/validate_wheel.sh b/ci/validate_wheel.sh new file mode 100755 index 0000000000..5910a5c59f --- /dev/null +++ b/ci/validate_wheel.sh @@ -0,0 +1,21 @@ +#!/bin/bash +# Copyright (c) 2024, NVIDIA CORPORATION. + +set -euo pipefail + +package_dir=$1 +wheel_dir_relative_path=$2 + +cd "${package_dir}" + +rapids-logger "validate packages with 'pydistcheck'" + +pydistcheck \ + --inspect \ + "$(echo ${wheel_dir_relative_path}/*.whl)" + +rapids-logger "validate packages with 'twine'" + +twine check \ + --strict \ + "$(echo ${wheel_dir_relative_path}/*.whl)" diff --git a/python/kvikio/pyproject.toml b/python/kvikio/pyproject.toml index b002569646..cafde598bd 100644 --- a/python/kvikio/pyproject.toml +++ b/python/kvikio/pyproject.toml @@ -139,6 +139,14 @@ provider = "scikit_build_core.metadata.regex" input = "kvikio/VERSION" regex = "(?P.*)" +[tool.pydistcheck] +select = [ + "distro-too-large-compressed", +] + +# PyPI limit is 100 MiB, fail CI before we get too close to that +max_allowed_size_compressed = '75M' + [tool.pytest.ini_options] filterwarnings = [ "error", diff --git a/python/libkvikio/pyproject.toml b/python/libkvikio/pyproject.toml index 3be7cbc0ae..9504cb3755 100644 --- a/python/libkvikio/pyproject.toml +++ b/python/libkvikio/pyproject.toml @@ -59,3 +59,11 @@ requires = [ [project.entry-points."cmake.prefix"] libkvikio = "libkvikio" + +[tool.pydistcheck] +select = [ + "distro-too-large-compressed", +] + +# PyPI limit is 100 MiB, fail CI before we get too close to that +max_allowed_size_compressed = '75M'