Skip to content

Commit

Permalink
add test cuda workflow (#2848)
Browse files Browse the repository at this point in the history
Signed-off-by: Jinzhe Zeng <[email protected]>
  • Loading branch information
njzjz authored Sep 21, 2023
1 parent 80b2195 commit 544875e
Show file tree
Hide file tree
Showing 7 changed files with 117 additions and 5 deletions.
18 changes: 18 additions & 0 deletions .github/workflows/remove_test_cuda_label.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
on:
pull_request_target:
types:
- "labeled"
name: Test CUDA
jobs:
remove_label:
permissions:
contents: read
pull-requests: write
# so one can re-trigger the workflow without manually removing the label
runs-on: ubuntu-latest
if: github.repository_owner == 'deepmodeling' && github.event.label.name == 'Test CUDA'
steps:
- uses: actions-ecosystem/action-remove-labels@v1
with:
labels: Test CUDA
number: ${{ github.event.pull_request.number }}
60 changes: 60 additions & 0 deletions .github/workflows/test_cuda.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
on:
# manually trigger
workflow_dispatch:
pull_request:
types:
- "labeled"
name: Test CUDA
jobs:
test_cuda:
name: Test Python and C++ on CUDA
runs-on: nvidia
if: github.repository_owner == 'deepmodeling' && github.event.label.name == 'Test CUDA' || github.event_name == 'workflow_dispatch'
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v4
with:
python-version: '3.11'
cache: 'pip'
- name: Setup MPI
uses: mpi4py/setup-mpi@v1
with:
mpi: mpich
- uses: lukka/get-cmake@latest
- run: |
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.0-1_all.deb \
&& sudo dpkg -i cuda-keyring_1.0-1_all.deb \
&& sudo apt-get update \
&& sudo apt-get -y install cuda-11-8 libcudnn8=8.9.5.*-1+cuda11.8
- run: python -m pip install -U "pip>=21.3.1,!=23.0.0"
- run: pip install -v -e .[gpu,test,lmp,cu11] "ase @ https://github.com/rosswhitfield/ase/archive/edd03571aff6944b77b4a4b055239f3c3e4eeb66.zip"
env:
DP_BUILD_TESTING: 1
DP_VARIANT: cuda
CUDA_PATH: /usr/local/cuda-11.8
- run: dp --version
- run: pytest -s --cov=deepmd --cov=deepmd_cli source/tests --durations=0
- run: source/install/test_cc_local.sh
env:
OMP_NUM_THREADS: 1
TF_INTRA_OP_PARALLELISM_THREADS: 1
TF_INTER_OP_PARALLELISM_THREADS: 1
LMP_CXX11_ABI_0: 1
CMAKE_GENERATOR: Ninja
DP_VARIANT: cuda
DP_USE_MPICH2: 1
CUDA_PATH: /usr/local/cuda-11.8
- run: |
export LD_LIBRARY_PATH=${{ github.workspace }}/dp_test/lib:$CUDA_PATH/lib64:$LD_LIBRARY_PATH
export PATH=${{ github.workspace }}/dp_test/bin:$PATH
pytest -s --cov=deepmd source/lmp/tests
pytest -s --cov=deepmd source/ipi/tests
env:
OMP_NUM_THREADS: 1
TF_INTRA_OP_PARALLELISM_THREADS: 1
TF_INTER_OP_PARALLELISM_THREADS: 1
LAMMPS_PLUGIN_PATH: ${{ github.workspace }}/dp_test/lib/deepmd_lmp
CUDA_PATH: /usr/local/cuda-11.8
- uses: codecov/codecov-action@v3
with:
gcov: true
15 changes: 15 additions & 0 deletions doc/development/cicd.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# CI/CD

<!-- TODO: To be written... -->

## CI

<!-- TODO: To be written... -->

### Test CUDA

`Test CUDA` action runs tests on a self-hosted runner with the NVIDIA card. It is not triggered by every PR. The developer who has the permission to manage the label can apply the label `Test CUDA` to a PR to trigger this action.

<!-- ## CD -->

<!-- TODO: To be written... -->
8 changes: 5 additions & 3 deletions doc/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@ DeePMD-kit is a package written in Python/C++, designed to minimize the effort r
.. toctree::
:maxdepth: 2
:caption: Tutorial
:glob:

Tutorials <https://tutorials.deepmodeling.com/>
Publications <https://deepmodeling.com/blog/papers/deepmd-kit/>
Expand All @@ -62,9 +61,12 @@ DeePMD-kit is a package written in Python/C++, designed to minimize the effort r
.. toctree::
:maxdepth: 5
:caption: Developer Guide
:glob:

development/*
development/cmake
development/create-a-model
development/type-embedding
development/coding-conventions
development/cicd
api_py/api_py
api_op
API_CC/api_cc
Expand Down
8 changes: 7 additions & 1 deletion source/install/test_cc.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
set -e

if [ "$DP_VARIANT" = "cuda" ]; then
CUDA_ARGS="-DUSE_CUDA_TOOLKIT=TRUE"
elif [ "$DP_VARIANT" = "rocm" ]; then
CUDA_ARGS="-DUSE_ROCM_TOOLKIT=TRUE"
fi

#------------------

SCRIPT_PATH=$(dirname $(realpath -s $0))
Expand All @@ -11,7 +17,7 @@ INSTALL_PREFIX=${SCRIPT_PATH}/../../dp_test
BUILD_TMP_DIR=${SCRIPT_PATH}/../build_tests
mkdir -p ${BUILD_TMP_DIR}
cd ${BUILD_TMP_DIR}
cmake -DINSTALL_TENSORFLOW=TRUE -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} -DTENSORFLOW_ROOT=${INSTALL_PREFIX} -DBUILD_TESTING:BOOL=TRUE -DLAMMPS_VERSION=stable_2Aug2023 ..
cmake -DINSTALL_TENSORFLOW=TRUE -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} -DTENSORFLOW_ROOT=${INSTALL_PREFIX} -DBUILD_TESTING:BOOL=TRUE -DLAMMPS_VERSION=stable_2Aug2023 ${CUDA_ARGS} ..
cmake --build . -j${NPROC}
cmake --install .
ctest --output-on-failure
8 changes: 7 additions & 1 deletion source/install/test_cc_local.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
set -e

if [ "$DP_VARIANT" = "cuda" ]; then
CUDA_ARGS="-DUSE_CUDA_TOOLKIT=TRUE"
elif [ "$DP_VARIANT" = "rocm" ]; then
CUDA_ARGS="-DUSE_ROCM_TOOLKIT=TRUE"
fi

#------------------

SCRIPT_PATH=$(dirname $(realpath -s $0))
Expand All @@ -12,7 +18,7 @@ INSTALL_PREFIX=${SCRIPT_PATH}/../../dp_test
BUILD_TMP_DIR=${SCRIPT_PATH}/../build_tests
mkdir -p ${BUILD_TMP_DIR}
cd ${BUILD_TMP_DIR}
cmake -DINSTALL_TENSORFLOW=FALSE -DUSE_TF_PYTHON_LIBS=TRUE -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} -DBUILD_TESTING:BOOL=TRUE -DLAMMPS_VERSION=stable_2Aug2023 ..
cmake -DINSTALL_TENSORFLOW=FALSE -DUSE_TF_PYTHON_LIBS=TRUE -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} -DBUILD_TESTING:BOOL=TRUE -DLAMMPS_VERSION=stable_2Aug2023 ${CUDA_ARGS} ..
cmake --build . -j${NPROC}
cmake --install .
ctest --output-on-failure
5 changes: 5 additions & 0 deletions source/lmp/plugin/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,11 @@ if(DEFINED LAMMPS_SOURCE_ROOT OR DEFINED LAMMPS_VERSION)

target_include_directories(lammps_interface INTERFACE ${LAMMPS_HEADER_DIR})

if("$ENV{DP_USE_MPICH2}" STREQUAL "1")
# See https://stackoverflow.com/a/47976518/9567349
set(MPI_EXECUTABLE_SUFFIX ".mpich")
endif()

find_package(MPI)
if(MPI_FOUND)
set(LAMMPS_MPI_INCLUDE_DIRS ${MPI_CXX_INCLUDE_DIRS})
Expand Down

0 comments on commit 544875e

Please sign in to comment.