From 9f4603f9b36280da9a543f40ade56b64d1aea297 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Sat, 17 Feb 2024 07:31:03 -0500 Subject: [PATCH] speed up cuda test (#3284) Signed-off-by: Jinzhe Zeng --- .github/workflows/test_cuda.yml | 23 +++++++++-------------- source/tests/tf/test_tabulate.py | 4 ++-- 2 files changed, 11 insertions(+), 16 deletions(-) diff --git a/.github/workflows/test_cuda.yml b/.github/workflows/test_cuda.yml index 7c08e50912..2f06e4a572 100644 --- a/.github/workflows/test_cuda.yml +++ b/.github/workflows/test_cuda.yml @@ -11,7 +11,7 @@ jobs: runs-on: nvidia # https://github.com/deepmodeling/deepmd-kit/pull/2884#issuecomment-1744216845 container: - image: nvidia/cuda:12.2.0-devel-ubuntu22.04 + image: nvidia/cuda:12.3.1-devel-ubuntu22.04 options: --gpus all if: github.repository_owner == 'deepmodeling' && github.event.label.name == 'Test CUDA' || github.event_name == 'workflow_dispatch' steps: @@ -27,24 +27,24 @@ jobs: with: mpi: mpich - uses: lukka/get-cmake@latest + with: + useLocalCache: true + useCloudCache: false - run: | wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.0-1_all.deb \ && sudo dpkg -i cuda-keyring_1.0-1_all.deb \ && sudo apt-get update \ - && sudo apt-get -y install cuda-12-2 libcudnn8=8.9.5.*-1+cuda12.2 + && sudo apt-get -y install cuda-12-3 libcudnn8=8.9.5.*-1+cuda12.3 if: false # skip as we use nvidia image - - name: Set PyPI mirror for Aliyun cloud machine - run: python -m pip config --user set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple/ - run: python -m pip install -U "pip>=21.3.1,!=23.0.0" - run: python -m pip install "tensorflow>=2.15.0rc0" "torch>=2.2.0" - run: python -m pip install -v -e .[gpu,test,lmp,cu12,torch] "ase @ https://gitlab.com/ase/ase/-/archive/8c5aa5fd6448c5cfb517a014dccf2b214a9dfa8f/ase-8c5aa5fd6448c5cfb517a014dccf2b214a9dfa8f.tar.gz" env: - DP_BUILD_TESTING: 1 DP_VARIANT: cuda - CUDA_PATH: /usr/local/cuda-12.2 NUM_WORKERS: 0 + DP_ENABLE_NATIVE_OPTIMIZATION: 1 - run: dp --version - - run: python -m pytest --cov=deepmd source/tests --durations=0 + - run: python -m pytest source/tests --durations=0 - run: source/install/test_cc_local.sh env: OMP_NUM_THREADS: 1 @@ -54,18 +54,13 @@ jobs: CMAKE_GENERATOR: Ninja DP_VARIANT: cuda DP_USE_MPICH2: 1 - CUDA_PATH: /usr/local/cuda-12.2 - run: | export LD_LIBRARY_PATH=$GITHUB_WORKSPACE/dp_test/lib:$CUDA_PATH/lib64:$LD_LIBRARY_PATH export PATH=$GITHUB_WORKSPACE/dp_test/bin:$PATH - python -m pytest --cov=deepmd source/lmp/tests - python -m pytest --cov=deepmd source/ipi/tests + python -m pytest source/lmp/tests + python -m pytest source/ipi/tests env: OMP_NUM_THREADS: 1 TF_INTRA_OP_PARALLELISM_THREADS: 1 TF_INTER_OP_PARALLELISM_THREADS: 1 LAMMPS_PLUGIN_PATH: ${{ github.workspace }}/dp_test/lib/deepmd_lmp - CUDA_PATH: /usr/local/cuda-12.2 - - uses: codecov/codecov-action@v4 - env: - CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} diff --git a/source/tests/tf/test_tabulate.py b/source/tests/tf/test_tabulate.py index 2ffb5e19c6..0d46293b62 100644 --- a/source/tests/tf/test_tabulate.py +++ b/source/tests/tf/test_tabulate.py @@ -58,7 +58,7 @@ def test_op_tanh(self): ] ) - places = 18 + places = 15 np.testing.assert_almost_equal(dy_array, answer, places) def test_op_gelu(self): @@ -104,7 +104,7 @@ def test_op_gelu(self): ] ) - places = 18 + places = 15 np.testing.assert_almost_equal(dy_array, answer, places)