From 55d678dcf34f78ab2f1eacbc39d1f4ae78b9c316 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Thu, 16 Nov 2023 00:30:55 -0500 Subject: [PATCH] bump CUDA version to 12.2 for pre-built packages (#2960) TensorFlow 2.15 bumps the CUDA version to 12.2. See https://github.com/tensorflow/tensorflow/commit/3de44168950a5972ba4cfa7e3c6cbf4cffa67fe6. --------- Signed-off-by: Jinzhe Zeng --- .github/workflows/build_cc.yml | 2 +- .github/workflows/test_cuda.yml | 13 +++++++------ backend/find_tensorflow.py | 9 +++++++++ doc/install/easy-install-dev.md | 4 ++-- doc/install/easy-install.md | 8 ++++---- doc/install/install-from-c-library.md | 2 +- pyproject.toml | 9 ++++----- source/install/docker/Dockerfile | 2 +- source/install/docker_package_c.sh | 2 +- 9 files changed, 30 insertions(+), 21 deletions(-) diff --git a/.github/workflows/build_cc.yml b/.github/workflows/build_cc.yml index 964a11ce37..e6377f4fab 100644 --- a/.github/workflows/build_cc.yml +++ b/.github/workflows/build_cc.yml @@ -37,7 +37,7 @@ jobs: wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.0-1_all.deb \ && sudo dpkg -i cuda-keyring_1.0-1_all.deb \ && sudo apt-get update \ - && sudo apt-get -y install cuda-cudart-dev-12-0 cuda-nvcc-12-0 + && sudo apt-get -y install cuda-cudart-dev-12-2 cuda-nvcc-12-2 if: matrix.variant == 'cuda120' env: DEBIAN_FRONTEND: noninteractive diff --git a/.github/workflows/test_cuda.yml b/.github/workflows/test_cuda.yml index 5e754226ae..d8eddaa44f 100644 --- a/.github/workflows/test_cuda.yml +++ b/.github/workflows/test_cuda.yml @@ -11,7 +11,7 @@ jobs: runs-on: nvidia # https://github.com/deepmodeling/deepmd-kit/pull/2884#issuecomment-1744216845 container: - image: nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04 + image: nvidia/cuda:12.2.0-devel-ubuntu22.04 options: --gpus all if: github.repository_owner == 'deepmodeling' && github.event.label.name == 'Test CUDA' || github.event_name == 'workflow_dispatch' steps: @@ -31,16 +31,17 @@ jobs: wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.0-1_all.deb \ && sudo dpkg -i cuda-keyring_1.0-1_all.deb \ && sudo apt-get update \ - && sudo apt-get -y install cuda-11-8 libcudnn8=8.9.5.*-1+cuda11.8 + && sudo apt-get -y install cuda-12-2 libcudnn8=8.9.5.*-1+cuda12.2 if: false # skip as we use nvidia image - name: Set PyPI mirror for Aliyun cloud machine run: python -m pip config --user set global.index-url https://mirrors.aliyun.com/pypi/simple/ - run: python -m pip install -U "pip>=21.3.1,!=23.0.0" - - run: python -m pip install -v -e .[gpu,test,lmp,cu11] "ase @ https://gitlab.com/ase/ase/-/archive/8c5aa5fd6448c5cfb517a014dccf2b214a9dfa8f/ase-8c5aa5fd6448c5cfb517a014dccf2b214a9dfa8f.tar.gz" + - run: python -m pip install "tensorflow>=2.15.0rc0" + - run: python -m pip install -v -e .[gpu,test,lmp,cu12] "ase @ https://gitlab.com/ase/ase/-/archive/8c5aa5fd6448c5cfb517a014dccf2b214a9dfa8f/ase-8c5aa5fd6448c5cfb517a014dccf2b214a9dfa8f.tar.gz" env: DP_BUILD_TESTING: 1 DP_VARIANT: cuda - CUDA_PATH: /usr/local/cuda-11.8 + CUDA_PATH: /usr/local/cuda-12.2 - run: dp --version - run: python -m pytest -s --cov=deepmd --cov=deepmd_utils source/tests --durations=0 - run: source/install/test_cc_local.sh @@ -52,7 +53,7 @@ jobs: CMAKE_GENERATOR: Ninja DP_VARIANT: cuda DP_USE_MPICH2: 1 - CUDA_PATH: /usr/local/cuda-11.8 + CUDA_PATH: /usr/local/cuda-12.2 - run: | export LD_LIBRARY_PATH=$GITHUB_WORKSPACE/dp_test/lib:$CUDA_PATH/lib64:$LD_LIBRARY_PATH export PATH=$GITHUB_WORKSPACE/dp_test/bin:$PATH @@ -63,7 +64,7 @@ jobs: TF_INTRA_OP_PARALLELISM_THREADS: 1 TF_INTER_OP_PARALLELISM_THREADS: 1 LAMMPS_PLUGIN_PATH: ${{ github.workspace }}/dp_test/lib/deepmd_lmp - CUDA_PATH: /usr/local/cuda-11.8 + CUDA_PATH: /usr/local/cuda-12.2 - uses: codecov/codecov-action@v3 with: gcov: true diff --git a/backend/find_tensorflow.py b/backend/find_tensorflow.py index 6d7ce5087d..fbbe0e56c0 100644 --- a/backend/find_tensorflow.py +++ b/backend/find_tensorflow.py @@ -87,6 +87,13 @@ def find_tensorflow() -> Tuple[Optional[str], List[str]]: # TypeError if submodule_search_locations are None # IndexError if submodule_search_locations is an empty list except (AttributeError, TypeError, IndexError): + if os.environ.get("CIBUILDWHEEL", "0") == "1": + # CUDA 12.2 + requires.extend( + [ + "tensorflow-cpu>=2.15.0rc0; platform_machine=='x86_64' and platform_system == 'Linux'", + ] + ) requires.extend(get_tf_requirement()["cpu"]) # setuptools will re-find tensorflow after installing setup_requires tf_install_dir = None @@ -129,6 +136,8 @@ def get_tf_requirement(tf_version: str = "") -> dict: "cpu": [ "tensorflow-cpu; platform_machine!='aarch64' and (platform_machine!='arm64' or platform_system != 'Darwin')", "tensorflow; platform_machine=='aarch64' or (platform_machine=='arm64' and platform_system == 'Darwin')", + # https://github.com/tensorflow/tensorflow/issues/61830 + "tensorflow-cpu<2.15; platform_system=='Windows'", *extra_requires, ], "gpu": [ diff --git a/doc/install/easy-install-dev.md b/doc/install/easy-install-dev.md index dd943c37af..f3d4fa1a32 100644 --- a/doc/install/easy-install-dev.md +++ b/doc/install/easy-install-dev.md @@ -17,10 +17,10 @@ docker pull ghcr.io/deepmodeling/deepmd-kit:devel Below is an one-line shell command to download the [artifact](https://nightly.link/deepmodeling/deepmd-kit/workflows/build_wheel/devel/artifact.zip) containing wheels and install it with `pip`: ```sh -pip install -U --pre deepmd-kit[gpu,cu11,lmp] --extra-index-url https://deepmodeling.github.io/deepmd-kit/simple +pip install -U --pre deepmd-kit[gpu,cu12,lmp] --extra-index-url https://deepmodeling.github.io/deepmd-kit/simple ``` -`cu11` and `lmp` are optional, which is the same as the stable version. +`cu12` and `lmp` are optional, which is the same as the stable version. ## Download pre-compiled C Library diff --git a/doc/install/easy-install.md b/doc/install/easy-install.md index f033310f8f..7bd632694b 100644 --- a/doc/install/easy-install.md +++ b/doc/install/easy-install.md @@ -84,13 +84,13 @@ docker pull deepmodeling/dpmdkit-rocm:dp2.0.3-rocm4.5.2-tf2.6-lmp29Sep2021 ## Install Python interface with pip -If you have no existing TensorFlow installed, you can use `pip` to install the pre-built package of the Python interface with CUDA 11 supported: +If you have no existing TensorFlow installed, you can use `pip` to install the pre-built package of the Python interface with CUDA 12 supported: ```bash -pip install deepmd-kit[gpu,cu11] +pip install deepmd-kit[gpu,cu12] ``` -`cu11` is required only when CUDA Toolkit and cuDNN were not installed. +`cu12` is required only when CUDA Toolkit and cuDNN were not installed. Or install the CPU version without CUDA supported: ```bash @@ -99,7 +99,7 @@ pip install deepmd-kit[cpu] [The LAMMPS module](../third-party/lammps-command.md) and [the i-Pi driver](../third-party/ipi.md) are only provided on Linux and macOS. To install LAMMPS and/or i-Pi, add `lmp` and/or `ipi` to extras: ```bash -pip install deepmd-kit[gpu,cu11,lmp,ipi] +pip install deepmd-kit[gpu,cu12,lmp,ipi] ``` MPICH is required for parallel running. (The macOS arm64 package doesn't support MPI yet.) diff --git a/doc/install/install-from-c-library.md b/doc/install/install-from-c-library.md index 343446888c..04b71234db 100644 --- a/doc/install/install-from-c-library.md +++ b/doc/install/install-from-c-library.md @@ -2,7 +2,7 @@ DeePMD-kit provides pre-compiled C library package (`libdeepmd_c.tar.gz`) in each [release](https://github.com/deepmodeling/deepmd-kit/releases). It can be used to build the [LAMMPS plugin](./install-lammps.md) and [GROMACS patch](./install-gromacs.md), as well as many [third-party software packages](../third-party/out-of-deepmd-kit.md), without building TensorFlow and DeePMD-kit on one's own. -The library is built in Linux (GLIBC 2.17) with CUDA 11.8. It's noted that this package does not contain CUDA Toolkit and cuDNN, so one needs to download them from the NVIDIA website. +The library is built in Linux (GLIBC 2.17) with CUDA 12.2. It's noted that this package does not contain CUDA Toolkit and cuDNN, so one needs to download them from the NVIDIA website. ## Use Pre-compiled C Library to build the LAMMPS plugin and GROMACS patch diff --git a/pyproject.toml b/pyproject.toml index 4ba3bb81e1..e9ee563960 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,7 +25,7 @@ classifiers = [ "Programming Language :: C", "Programming Language :: C++", "Programming Language :: Python :: 3 :: Only", - "Environment :: GPU :: NVIDIA CUDA :: 11.8", + "Environment :: GPU :: NVIDIA CUDA :: 12 :: 12.2", "Intended Audience :: Science/Research", "Programming Language :: Python :: 3.7", "License :: OSI Approved :: GNU Lesser General Public License v3 (LGPLv3)", @@ -133,9 +133,8 @@ test-command = [ test-extras = ["cpu", "test", "lmp", "ipi"] build = ["cp310-*"] skip = ["*-win32", "*-manylinux_i686", "*-musllinux*"] -# TODO: bump to "latest" tag when CUDA supports GCC 12 -manylinux-x86_64-image = "quay.io/pypa/manylinux_2_28_x86_64:2022-11-19-1b19e81" -manylinux-aarch64-image = "quay.io/pypa/manylinux_2_28_aarch64:2022-11-19-1b19e81" +manylinux-x86_64-image = "manylinux_2_28" +manylinux-aarch64-image = "manylinux_2_28" [tool.cibuildwheel.macos] environment = { PIP_PREFER_BINARY="1", DP_LAMMPS_VERSION="stable_2Aug2023_update1", DP_ENABLE_IPI="1" } @@ -152,7 +151,7 @@ repair-wheel-command = "auditwheel repair --exclude libtensorflow_framework.so.2 environment-pass = ["CIBW_BUILD", "DP_VARIANT"] environment = { PIP_PREFER_BINARY="1", DP_LAMMPS_VERSION="stable_2Aug2023_update1", DP_ENABLE_IPI="1", MPI_HOME="/usr/lib64/mpich", PATH="/usr/lib64/mpich/bin:$PATH" } before-all = [ - """{ if [ "$(uname -m)" = "x86_64" ] ; then yum config-manager --add-repo http://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo && yum install -y cuda-nvcc-11-8 cuda-cudart-devel-11-8; fi }""", + """{ if [ "$(uname -m)" = "x86_64" ] ; then yum config-manager --add-repo http://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo && yum install -y cuda-nvcc-12-2 cuda-cudart-devel-12-2; fi }""", "yum install -y mpich-devel", ] diff --git a/source/install/docker/Dockerfile b/source/install/docker/Dockerfile index c5fa878e2a..9ac905dcd0 100644 --- a/source/install/docker/Dockerfile +++ b/source/install/docker/Dockerfile @@ -4,7 +4,7 @@ RUN python -m venv /opt/deepmd-kit ENV PATH="/opt/deepmd-kit/bin:$PATH" # Install package COPY dist /dist -RUN pip install "$(ls /dist/deepmd_kit-*manylinux*_x86_64.whl)[gpu,cu11,lmp,ipi]" \ +RUN pip install "$(ls /dist/deepmd_kit-*manylinux*_x86_64.whl)[gpu,cu12,lmp,ipi]" \ && dp -h \ && lmp -h \ && dp_ipi \ diff --git a/source/install/docker_package_c.sh b/source/install/docker_package_c.sh index d6fb269acd..75f2d1138b 100755 --- a/source/install/docker_package_c.sh +++ b/source/install/docker_package_c.sh @@ -3,7 +3,7 @@ set -e SCRIPT_PATH=$(dirname $(realpath -s $0)) docker run --rm -v ${SCRIPT_PATH}/../..:/root/deepmd-kit -w /root/deepmd-kit \ - tensorflow/build:2.13-python3.11 \ + tensorflow/build:2.15-python3.11 \ /bin/sh -c "pip install tensorflow cmake \ && cd /root/deepmd-kit/source/install \ && CC=/dt9/usr/bin/gcc \