From 0e5827ac235089bfd52b3f28467f74de0073d96b Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Fri, 2 Feb 2024 01:07:27 -0500 Subject: [PATCH 1/3] debug test cuda Signed-off-by: Jinzhe Zeng --- .github/workflows/test_cuda.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test_cuda.yml b/.github/workflows/test_cuda.yml index 4e9725103a..a294a10666 100644 --- a/.github/workflows/test_cuda.yml +++ b/.github/workflows/test_cuda.yml @@ -40,7 +40,7 @@ jobs: - run: python -m pip install -v -e .[gpu,test,lmp,cu12,torch] "ase @ https://gitlab.com/ase/ase/-/archive/8c5aa5fd6448c5cfb517a014dccf2b214a9dfa8f/ase-8c5aa5fd6448c5cfb517a014dccf2b214a9dfa8f.tar.gz" env: DP_BUILD_TESTING: 1 - DP_VARIANT: cuda + DP_VARIANT: cpu CUDA_PATH: /usr/local/cuda-12.2 NUM_WORKERS: 0 - run: dp --version @@ -52,7 +52,7 @@ jobs: TF_INTER_OP_PARALLELISM_THREADS: 1 LMP_CXX11_ABI_0: 1 CMAKE_GENERATOR: Ninja - DP_VARIANT: cuda + DP_VARIANT: cpu DP_USE_MPICH2: 1 CUDA_PATH: /usr/local/cuda-12.2 - run: | From f87d1f3a911367b2c94123aad7340b9159fbb325 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Fri, 2 Feb 2024 02:30:55 -0500 Subject: [PATCH 2/3] Revert "debug test cuda" This reverts commit 0e5827ac235089bfd52b3f28467f74de0073d96b. --- .github/workflows/test_cuda.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test_cuda.yml b/.github/workflows/test_cuda.yml index a294a10666..4e9725103a 100644 --- a/.github/workflows/test_cuda.yml +++ b/.github/workflows/test_cuda.yml @@ -40,7 +40,7 @@ jobs: - run: python -m pip install -v -e .[gpu,test,lmp,cu12,torch] "ase @ https://gitlab.com/ase/ase/-/archive/8c5aa5fd6448c5cfb517a014dccf2b214a9dfa8f/ase-8c5aa5fd6448c5cfb517a014dccf2b214a9dfa8f.tar.gz" env: DP_BUILD_TESTING: 1 - DP_VARIANT: cpu + DP_VARIANT: cuda CUDA_PATH: /usr/local/cuda-12.2 NUM_WORKERS: 0 - run: dp --version @@ -52,7 +52,7 @@ jobs: TF_INTER_OP_PARALLELISM_THREADS: 1 LMP_CXX11_ABI_0: 1 CMAKE_GENERATOR: Ninja - DP_VARIANT: cpu + DP_VARIANT: cuda DP_USE_MPICH2: 1 CUDA_PATH: /usr/local/cuda-12.2 - run: | From 89dc97b4a1a4bb1777908d798987091f7a24f5e6 Mon Sep 17 00:00:00 2001 From: Jinzhe Zeng Date: Fri, 2 Feb 2024 02:39:01 -0500 Subject: [PATCH 3/3] skip registering ProdEnvMat to locate the issue Signed-off-by: Jinzhe Zeng --- source/op/prod_env_mat_multi_device.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/op/prod_env_mat_multi_device.cc b/source/op/prod_env_mat_multi_device.cc index 7037a00a6c..ca2778a20b 100644 --- a/source/op/prod_env_mat_multi_device.cc +++ b/source/op/prod_env_mat_multi_device.cc @@ -1765,7 +1765,7 @@ REGISTER_CPU(double); // Register the GPU kernels. // Compatible with v1.3 -#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM +#if 0 // GOOGLE_CUDA || TENSORFLOW_USE_ROCM #define REGISTER_GPU(T) \ REGISTER_KERNEL_BUILDER(Name("ProdEnvMatA") \ .Device(DEVICE_GPU) \