From c03416d0be874ebda9fc831d001817c3eec159a8 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Sun, 26 Nov 2023 04:42:41 -0500 Subject: [PATCH] add cu11 prebuilt packages (#3002) Signed-off-by: Jinzhe Zeng --- .github/workflows/build_wheel.yml | 24 ++++++++++++++++++++++-- .github/workflows/package_c.yml | 18 ++++++++++++++++-- backend/find_tensorflow.py | 23 +++++++++++++++++------ doc/install/easy-install-dev.md | 4 +++- doc/install/easy-install.md | 6 ++++++ doc/install/install-from-c-library.md | 2 +- pyproject.toml | 14 +++++++++++--- source/install/docker/Dockerfile | 8 +++++--- source/install/docker_package_c.sh | 4 ++-- 9 files changed, 83 insertions(+), 20 deletions(-) diff --git a/.github/workflows/build_wheel.yml b/.github/workflows/build_wheel.yml index 2ff3ade215..c58a5925bf 100644 --- a/.github/workflows/build_wheel.yml +++ b/.github/workflows/build_wheel.yml @@ -33,6 +33,13 @@ jobs: python: 311 platform_id: manylinux_x86_64 dp_variant: cuda + cuda_version: 12.2 + - os: ubuntu-latest + python: 311 + platform_id: manylinux_x86_64 + dp_variant: cuda + cuda_version: 11.8 + dp_pkg_name: deepmd-kit-cu11 # macos-x86-64 - os: macos-latest python: 311 @@ -68,6 +75,8 @@ jobs: CIBW_ARCHS: all CIBW_BUILD: cp${{ matrix.python }}-${{ matrix.platform_id }} DP_VARIANT: ${{ matrix.dp_variant }} + CUDA_VERSION: ${{ matrix.cuda_version }} + DP_PKG_NAME: ${{ matrix.dp_pkg_name }} - uses: actions/upload-artifact@v3 with: path: ./wheelhouse/*.whl @@ -109,6 +118,14 @@ jobs: # use the already built wheels to build docker needs: [build_wheels] runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + include: + - variant: "" + cuda_version: "12" + - variant: "_cu11" + cuda_version: "11" steps: - uses: actions/checkout@v4 - uses: actions/download-artifact@v3 @@ -133,8 +150,11 @@ jobs: with: context: source/install/docker push: ${{ github.repository_owner == 'deepmodeling' && github.event_name == 'push' }} - tags: ${{ steps.meta.outputs.tags }} + tags: ${{ steps.meta.outputs.tags }}${{ matrix.variant }} labels: ${{ steps.meta.outputs.labels }} + build-args: | + VARIANT=${{ matrix.variant }} + CUDA_VERSION=${{ matrix.cuda_version }} build_pypi_index: needs: [build_wheels, build_sdist] @@ -173,7 +193,7 @@ jobs: pass: name: Pass testing build wheels - needs: [build_wheels, build_sdist] + needs: [build_wheels, build_sdist, build_docker, build_pypi_index] runs-on: ubuntu-latest if: always() steps: diff --git a/.github/workflows/package_c.yml b/.github/workflows/package_c.yml index ada205be00..2b5f74b97d 100644 --- a/.github/workflows/package_c.yml +++ b/.github/workflows/package_c.yml @@ -8,23 +8,37 @@ jobs: build_c: name: Build C library runs-on: ubuntu-22.04 + strategy: + matrix: + include: + - tensorflow_build_version: "2.15" + tensorflow_version: "" + filename: libdeepmd_c.tar.gz + - tensorflow_build_version: "2.14" + tensorflow_version: ">=2.5.0rc0,<2.15" + filename: libdeepmd_c_cu11.tar.gz steps: - uses: actions/checkout@v4 - name: Package C library run: ./source/install/docker_package_c.sh + env: + TENSORFLOW_VERSION: ${{ matrix.tensorflow_version }} + TENSORFLOW_BUILD_VERSION: ${{ matrix.tensorflow_build_version }} + - run: cp libdeepmd_c.tar.gz ${{ matrix.filename }} + if: matrix.filename != 'libdeepmd_c.tar.gz' # for download and debug - name: Upload artifact uses: actions/upload-artifact@v3 with: name: libdeepmd_c - path: ./libdeepmd_c.tar.gz + path: ${{ matrix.filename }} - name: Test C library run: ./source/install/docker_test_package_c.sh - name: Release uses: softprops/action-gh-release@v1 if: startsWith(github.ref, 'refs/tags/') with: - files: libdeepmd_c.tar.gz + files: ${{ matrix.filename }} test_c: name: Test building from C library needs: [build_c] diff --git a/backend/find_tensorflow.py b/backend/find_tensorflow.py index fbbe0e56c0..08a73f7252 100644 --- a/backend/find_tensorflow.py +++ b/backend/find_tensorflow.py @@ -88,12 +88,23 @@ def find_tensorflow() -> Tuple[Optional[str], List[str]]: # IndexError if submodule_search_locations is an empty list except (AttributeError, TypeError, IndexError): if os.environ.get("CIBUILDWHEEL", "0") == "1": - # CUDA 12.2 - requires.extend( - [ - "tensorflow-cpu>=2.15.0rc0; platform_machine=='x86_64' and platform_system == 'Linux'", - ] - ) + cuda_version = os.environ.get("CUDA_VERSION", "12.2") + if cuda_version == "" or cuda_version in SpecifierSet(">=12,<13"): + # CUDA 12.2 + requires.extend( + [ + "tensorflow-cpu>=2.15.0rc0; platform_machine=='x86_64' and platform_system == 'Linux'", + ] + ) + elif cuda_version in SpecifierSet(">=11,<12"): + # CUDA 11.8 + requires.extend( + [ + "tensorflow-cpu>=2.5.0rc0,<2.15; platform_machine=='x86_64' and platform_system == 'Linux'", + ] + ) + else: + raise RuntimeError("Unsupported CUDA version") requires.extend(get_tf_requirement()["cpu"]) # setuptools will re-find tensorflow after installing setup_requires tf_install_dir = None diff --git a/doc/install/easy-install-dev.md b/doc/install/easy-install-dev.md index f3d4fa1a32..6fd9171730 100644 --- a/doc/install/easy-install-dev.md +++ b/doc/install/easy-install-dev.md @@ -6,12 +6,14 @@ The following is the way to install the pre-compiled packages without [building ## Install with docker -The [`devel` tag](https://github.com/deepmodeling/deepmd-kit/pkgs/container/deepmd-kit/131827568?tag=devel) is used to mark the latest development version with CUDA support: +The [`devel` tag](https://github.com/deepmodeling/deepmd-kit/pkgs/container/deepmd-kit/131827568?tag=devel) is used to mark the latest development version with CUDA 12.2 support: ```bash docker pull ghcr.io/deepmodeling/deepmd-kit:devel ``` +For CUDA 11.8 support, use the `devel_cu11` tag. + ## Install with pip Below is an one-line shell command to download the [artifact](https://nightly.link/deepmodeling/deepmd-kit/workflows/build_wheel/devel/artifact.zip) containing wheels and install it with `pip`: diff --git a/doc/install/easy-install.md b/doc/install/easy-install.md index 741ef632a8..3bc1f4b944 100644 --- a/doc/install/easy-install.md +++ b/doc/install/easy-install.md @@ -92,6 +92,12 @@ pip install deepmd-kit[gpu,cu12] `cu12` is required only when CUDA Toolkit and cuDNN were not installed. +To install the package built against CUDA 11.8, use + +```bash +pip install deepmd-kit-cu11[gpu,cu11] +``` + Or install the CPU version without CUDA supported: ```bash pip install deepmd-kit[cpu] diff --git a/doc/install/install-from-c-library.md b/doc/install/install-from-c-library.md index 04b71234db..eb89538277 100644 --- a/doc/install/install-from-c-library.md +++ b/doc/install/install-from-c-library.md @@ -2,7 +2,7 @@ DeePMD-kit provides pre-compiled C library package (`libdeepmd_c.tar.gz`) in each [release](https://github.com/deepmodeling/deepmd-kit/releases). It can be used to build the [LAMMPS plugin](./install-lammps.md) and [GROMACS patch](./install-gromacs.md), as well as many [third-party software packages](../third-party/out-of-deepmd-kit.md), without building TensorFlow and DeePMD-kit on one's own. -The library is built in Linux (GLIBC 2.17) with CUDA 12.2. It's noted that this package does not contain CUDA Toolkit and cuDNN, so one needs to download them from the NVIDIA website. +The library is built in Linux (GLIBC 2.17) with CUDA 12.2 (`libdeepmd_c.tar.gz`) or 11.8 (`libdeepmd_c_cu11.tar.gz`). It's noted that this package does not contain CUDA Toolkit and cuDNN, so one needs to download them from the NVIDIA website. ## Use Pre-compiled C Library to build the LAMMPS plugin and GROMACS patch diff --git a/pyproject.toml b/pyproject.toml index e9ee563960..04bcc69f75 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -133,7 +133,9 @@ test-command = [ test-extras = ["cpu", "test", "lmp", "ipi"] build = ["cp310-*"] skip = ["*-win32", "*-manylinux_i686", "*-musllinux*"] -manylinux-x86_64-image = "manylinux_2_28" +# TODO: uncomment when CUDA 11 is deprecated +# manylinux-x86_64-image = "manylinux_2_28" +manylinux-x86_64-image = "quay.io/pypa/manylinux_2_28_x86_64:2022-11-19-1b19e81" manylinux-aarch64-image = "manylinux_2_28" [tool.cibuildwheel.macos] @@ -148,10 +150,16 @@ repair-wheel-command = """if [[ "$CIBW_BUILD" == *macosx_arm64* ]]; then rm -rf [tool.cibuildwheel.linux] repair-wheel-command = "auditwheel repair --exclude libtensorflow_framework.so.2 --exclude libtensorflow_framework.so.1 --exclude libtensorflow_framework.so --exclude _pywrap_tensorflow_internal.so --exclude libtensorflow_cc.so.2 -w {dest_dir} {wheel}" -environment-pass = ["CIBW_BUILD", "DP_VARIANT"] +environment-pass = [ + "CIBW_BUILD", + "DP_VARIANT", + "CUDA_VERSION", + "DP_PKG_NAME", +] environment = { PIP_PREFER_BINARY="1", DP_LAMMPS_VERSION="stable_2Aug2023_update1", DP_ENABLE_IPI="1", MPI_HOME="/usr/lib64/mpich", PATH="/usr/lib64/mpich/bin:$PATH" } before-all = [ - """{ if [ "$(uname -m)" = "x86_64" ] ; then yum config-manager --add-repo http://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo && yum install -y cuda-nvcc-12-2 cuda-cudart-devel-12-2; fi }""", + """if [ ! -z "${DP_PKG_NAME}" ]; then sed -i "s/name = \\"deepmd-kit\\"/name = \\"${DP_PKG_NAME}\\"/g" pyproject.toml; fi""", + """{ if [ "$(uname -m)" = "x86_64" ] ; then yum config-manager --add-repo http://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo && yum install -y cuda-nvcc-${CUDA_VERSION/./-} cuda-cudart-devel-${CUDA_VERSION/./-}; fi }""", "yum install -y mpich-devel", ] diff --git a/source/install/docker/Dockerfile b/source/install/docker/Dockerfile index 9ac905dcd0..26b7be9f19 100644 --- a/source/install/docker/Dockerfile +++ b/source/install/docker/Dockerfile @@ -1,16 +1,18 @@ -FROM python:3.10 AS compile-image +FROM python:3.11 AS compile-image +ARG VARIANT="" +ARG CUDA_VERSION="12" RUN python -m venv /opt/deepmd-kit # Make sure we use the virtualenv ENV PATH="/opt/deepmd-kit/bin:$PATH" # Install package COPY dist /dist -RUN pip install "$(ls /dist/deepmd_kit-*manylinux*_x86_64.whl)[gpu,cu12,lmp,ipi]" \ +RUN pip install "$(ls /dist/deepmd_kit${VARIANT}-*manylinux*_x86_64.whl)[gpu,cu${CUDA_VERSION},lmp,ipi]" \ && dp -h \ && lmp -h \ && dp_ipi \ && python -m deepmd -h -FROM python:3.10 AS build-image +FROM python:3.11 AS build-image COPY --from=compile-image /opt/deepmd-kit /opt/deepmd-kit ENV PATH="/opt/deepmd-kit/bin:$PATH" CMD ["/bin/bash"] diff --git a/source/install/docker_package_c.sh b/source/install/docker_package_c.sh index 75f2d1138b..544c175a0a 100755 --- a/source/install/docker_package_c.sh +++ b/source/install/docker_package_c.sh @@ -3,8 +3,8 @@ set -e SCRIPT_PATH=$(dirname $(realpath -s $0)) docker run --rm -v ${SCRIPT_PATH}/../..:/root/deepmd-kit -w /root/deepmd-kit \ - tensorflow/build:2.15-python3.11 \ - /bin/sh -c "pip install tensorflow cmake \ + tensorflow/build:${TENSORFLOW_BUILD_VERSION:-2.15}-python3.11 \ + /bin/sh -c "pip install \"tensorflow${TENSORFLOW_VERSION}\" cmake \ && cd /root/deepmd-kit/source/install \ && CC=/dt9/usr/bin/gcc \ CXX=/dt9/usr/bin/g++ \