From 5a341844cb0d8d5935bea4d9cec252feee4b7310 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Tue, 3 Oct 2023 02:52:16 -0400 Subject: [PATCH 1/9] run Test CUDA in container See https://github.com/deepmodeling/deepmd-kit/pull/2884#issuecomment-1744216845 Signed-off-by: Jinzhe Zeng --- .github/workflows/test_cuda.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/test_cuda.yml b/.github/workflows/test_cuda.yml index adc20c27a9..2850290d85 100644 --- a/.github/workflows/test_cuda.yml +++ b/.github/workflows/test_cuda.yml @@ -9,6 +9,10 @@ jobs: test_cuda: name: Test Python and C++ on CUDA runs-on: nvidia + # https://github.com/deepmodeling/deepmd-kit/pull/2884#issuecomment-1744216845 + container: + image: python:3.11 + options: --gpus all if: github.repository_owner == 'deepmodeling' && github.event.label.name == 'Test CUDA' || github.event_name == 'workflow_dispatch' steps: - uses: actions/checkout@v4 From 3354e821b3a79222197ae43d44def7387d7b208d Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Tue, 3 Oct 2023 03:18:51 -0400 Subject: [PATCH 2/9] Update test_cuda.yml Signed-off-by: Jinzhe Zeng --- .github/workflows/test_cuda.yml | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/.github/workflows/test_cuda.yml b/.github/workflows/test_cuda.yml index 2850290d85..8c2569928f 100644 --- a/.github/workflows/test_cuda.yml +++ b/.github/workflows/test_cuda.yml @@ -15,11 +15,13 @@ jobs: options: --gpus all if: github.repository_owner == 'deepmodeling' && github.event.label.name == 'Test CUDA' || github.event_name == 'workflow_dispatch' steps: + - name: Make sudo work + run: apt-get update && apt-get install -y sudo - uses: actions/checkout@v4 - - uses: actions/setup-python@v4 - with: - python-version: '3.11' - cache: 'pip' + # - uses: actions/setup-python@v4 + # with: + # python-version: '3.11' + # cache: 'pip' - name: Setup MPI uses: mpi4py/setup-mpi@v1 with: @@ -30,6 +32,8 @@ jobs: && sudo dpkg -i cuda-keyring_1.0-1_all.deb \ && sudo apt-get update \ && sudo apt-get -y install cuda-11-8 libcudnn8=8.9.5.*-1+cuda11.8 + - name: Set PyPI mirror for Aliyun cloud machine + run: python -m pip config --user set global.index-url http://mirrors.cloud.aliyuncs.com/pypi/simple/ - run: python -m pip install -U "pip>=21.3.1,!=23.0.0" - run: pip install -v -e .[gpu,test,lmp,cu11] "ase @ https://github.com/rosswhitfield/ase/archive/edd03571aff6944b77b4a4b055239f3c3e4eeb66.zip" env: From fe54bdeef5974dbd6bf3c9dca7c799ef590cfb30 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Tue, 3 Oct 2023 03:37:38 -0400 Subject: [PATCH 3/9] use ubuntu image Signed-off-by: Jinzhe Zeng --- .github/workflows/test_cuda.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/test_cuda.yml b/.github/workflows/test_cuda.yml index 8c2569928f..a57d57763b 100644 --- a/.github/workflows/test_cuda.yml +++ b/.github/workflows/test_cuda.yml @@ -11,17 +11,17 @@ jobs: runs-on: nvidia # https://github.com/deepmodeling/deepmd-kit/pull/2884#issuecomment-1744216845 container: - image: python:3.11 + image: ubuntu:22.04 options: --gpus all if: github.repository_owner == 'deepmodeling' && github.event.label.name == 'Test CUDA' || github.event_name == 'workflow_dispatch' steps: - name: Make sudo work run: apt-get update && apt-get install -y sudo - uses: actions/checkout@v4 - # - uses: actions/setup-python@v4 - # with: - # python-version: '3.11' - # cache: 'pip' + - uses: actions/setup-python@v4 + with: + python-version: '3.11' + # cache: 'pip' - name: Setup MPI uses: mpi4py/setup-mpi@v1 with: From b748719b0bba5c5365483f7ed4d52a2c2abee811 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Tue, 3 Oct 2023 03:44:03 -0400 Subject: [PATCH 4/9] install wget --- .github/workflows/test_cuda.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test_cuda.yml b/.github/workflows/test_cuda.yml index a57d57763b..0b10efc8a8 100644 --- a/.github/workflows/test_cuda.yml +++ b/.github/workflows/test_cuda.yml @@ -16,7 +16,7 @@ jobs: if: github.repository_owner == 'deepmodeling' && github.event.label.name == 'Test CUDA' || github.event_name == 'workflow_dispatch' steps: - name: Make sudo work - run: apt-get update && apt-get install -y sudo + run: apt-get update && apt-get install -y sudo wget - uses: actions/checkout@v4 - uses: actions/setup-python@v4 with: From e11450d79b1976dcd6b422224724165e34bb9fd7 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Tue, 3 Oct 2023 14:11:50 -0400 Subject: [PATCH 5/9] use nvidia image Signed-off-by: Jinzhe Zeng --- .github/workflows/test_cuda.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test_cuda.yml b/.github/workflows/test_cuda.yml index 0b10efc8a8..53bdc07ac1 100644 --- a/.github/workflows/test_cuda.yml +++ b/.github/workflows/test_cuda.yml @@ -11,12 +11,12 @@ jobs: runs-on: nvidia # https://github.com/deepmodeling/deepmd-kit/pull/2884#issuecomment-1744216845 container: - image: ubuntu:22.04 + image: nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04 options: --gpus all if: github.repository_owner == 'deepmodeling' && github.event.label.name == 'Test CUDA' || github.event_name == 'workflow_dispatch' steps: - name: Make sudo work - run: apt-get update && apt-get install -y sudo wget + run: apt-get update && apt-get install -y sudo - uses: actions/checkout@v4 - uses: actions/setup-python@v4 with: @@ -32,6 +32,7 @@ jobs: && sudo dpkg -i cuda-keyring_1.0-1_all.deb \ && sudo apt-get update \ && sudo apt-get -y install cuda-11-8 libcudnn8=8.9.5.*-1+cuda11.8 + if: false # skip as we use nvidia image - name: Set PyPI mirror for Aliyun cloud machine run: python -m pip config --user set global.index-url http://mirrors.cloud.aliyuncs.com/pypi/simple/ - run: python -m pip install -U "pip>=21.3.1,!=23.0.0" From 3ae2da60b134bed9015a63b91c598a14d12e07e0 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Tue, 3 Oct 2023 14:33:12 -0400 Subject: [PATCH 6/9] fix cmd Signed-off-by: Jinzhe Zeng --- .github/workflows/test_cuda.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/test_cuda.yml b/.github/workflows/test_cuda.yml index 53bdc07ac1..b8189d6cb1 100644 --- a/.github/workflows/test_cuda.yml +++ b/.github/workflows/test_cuda.yml @@ -34,9 +34,9 @@ jobs: && sudo apt-get -y install cuda-11-8 libcudnn8=8.9.5.*-1+cuda11.8 if: false # skip as we use nvidia image - name: Set PyPI mirror for Aliyun cloud machine - run: python -m pip config --user set global.index-url http://mirrors.cloud.aliyuncs.com/pypi/simple/ + run: python -m pip config --user set global.index-url https://mirrors.aliyun.com/pypi/simple/ - run: python -m pip install -U "pip>=21.3.1,!=23.0.0" - - run: pip install -v -e .[gpu,test,lmp,cu11] "ase @ https://github.com/rosswhitfield/ase/archive/edd03571aff6944b77b4a4b055239f3c3e4eeb66.zip" + - run: python -m pip install -v -e .[gpu,test,lmp,cu11] "ase @ https://github.com/rosswhitfield/ase/archive/edd03571aff6944b77b4a4b055239f3c3e4eeb66.zip" env: DP_BUILD_TESTING: 1 DP_VARIANT: cuda @@ -52,7 +52,7 @@ jobs: CMAKE_GENERATOR: Ninja DP_VARIANT: cuda DP_USE_MPICH2: 1 - CUDA_PATH: /usr/local/cuda-11.8 + # CUDA_PATH: /usr/local/cuda-11.8 - run: | export LD_LIBRARY_PATH=${{ github.workspace }}/dp_test/lib:$CUDA_PATH/lib64:$LD_LIBRARY_PATH export PATH=${{ github.workspace }}/dp_test/bin:$PATH @@ -63,7 +63,7 @@ jobs: TF_INTRA_OP_PARALLELISM_THREADS: 1 TF_INTER_OP_PARALLELISM_THREADS: 1 LAMMPS_PLUGIN_PATH: ${{ github.workspace }}/dp_test/lib/deepmd_lmp - CUDA_PATH: /usr/local/cuda-11.8 + # CUDA_PATH: /usr/local/cuda-11.8 - uses: codecov/codecov-action@v3 with: gcov: true From 06f4eb36fb38859e948816b14bc87f7b68e46885 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Tue, 3 Oct 2023 15:18:33 -0400 Subject: [PATCH 7/9] install git Signed-off-by: Jinzhe Zeng --- .github/workflows/test_cuda.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test_cuda.yml b/.github/workflows/test_cuda.yml index b8189d6cb1..d5da92ae21 100644 --- a/.github/workflows/test_cuda.yml +++ b/.github/workflows/test_cuda.yml @@ -15,8 +15,8 @@ jobs: options: --gpus all if: github.repository_owner == 'deepmodeling' && github.event.label.name == 'Test CUDA' || github.event_name == 'workflow_dispatch' steps: - - name: Make sudo work - run: apt-get update && apt-get install -y sudo + - name: Make sudo and git work + run: apt-get update && apt-get install -y sudo git - uses: actions/checkout@v4 - uses: actions/setup-python@v4 with: From 42bf3a8218e6d608f710a6d95e403e743f42fa1c Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Tue, 3 Oct 2023 15:29:05 -0400 Subject: [PATCH 8/9] not clear where pytest is installed... Signed-off-by: Jinzhe Zeng --- .github/workflows/test_cuda.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/test_cuda.yml b/.github/workflows/test_cuda.yml index d5da92ae21..a9c3586e27 100644 --- a/.github/workflows/test_cuda.yml +++ b/.github/workflows/test_cuda.yml @@ -42,7 +42,7 @@ jobs: DP_VARIANT: cuda CUDA_PATH: /usr/local/cuda-11.8 - run: dp --version - - run: pytest -s --cov=deepmd --cov=deepmd_cli source/tests --durations=0 + - run: python -m pytest -s --cov=deepmd --cov=deepmd_cli source/tests --durations=0 - run: source/install/test_cc_local.sh env: OMP_NUM_THREADS: 1 @@ -52,18 +52,18 @@ jobs: CMAKE_GENERATOR: Ninja DP_VARIANT: cuda DP_USE_MPICH2: 1 - # CUDA_PATH: /usr/local/cuda-11.8 + CUDA_PATH: /usr/local/cuda-11.8 - run: | export LD_LIBRARY_PATH=${{ github.workspace }}/dp_test/lib:$CUDA_PATH/lib64:$LD_LIBRARY_PATH export PATH=${{ github.workspace }}/dp_test/bin:$PATH - pytest -s --cov=deepmd source/lmp/tests - pytest -s --cov=deepmd source/ipi/tests + python -m pytest -s --cov=deepmd source/lmp/tests + python -m pytest -s --cov=deepmd source/ipi/tests env: OMP_NUM_THREADS: 1 TF_INTRA_OP_PARALLELISM_THREADS: 1 TF_INTER_OP_PARALLELISM_THREADS: 1 LAMMPS_PLUGIN_PATH: ${{ github.workspace }}/dp_test/lib/deepmd_lmp - # CUDA_PATH: /usr/local/cuda-11.8 + CUDA_PATH: /usr/local/cuda-11.8 - uses: codecov/codecov-action@v3 with: gcov: true From 67511be823c00a550b9eb412994c9fc971e4b2eb Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Tue, 3 Oct 2023 16:12:45 -0400 Subject: [PATCH 9/9] use $GITHUB_WORKSPACE See https://github.com/actions/runner/issues/2058 Signed-off-by: Jinzhe Zeng --- .github/workflows/test_cuda.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test_cuda.yml b/.github/workflows/test_cuda.yml index a9c3586e27..7b95e6d37b 100644 --- a/.github/workflows/test_cuda.yml +++ b/.github/workflows/test_cuda.yml @@ -54,8 +54,8 @@ jobs: DP_USE_MPICH2: 1 CUDA_PATH: /usr/local/cuda-11.8 - run: | - export LD_LIBRARY_PATH=${{ github.workspace }}/dp_test/lib:$CUDA_PATH/lib64:$LD_LIBRARY_PATH - export PATH=${{ github.workspace }}/dp_test/bin:$PATH + export LD_LIBRARY_PATH=$GITHUB_WORKSPACE/dp_test/lib:$CUDA_PATH/lib64:$LD_LIBRARY_PATH + export PATH=$GITHUB_WORKSPACE/dp_test/bin:$PATH python -m pytest -s --cov=deepmd source/lmp/tests python -m pytest -s --cov=deepmd source/ipi/tests env: