diff --git a/.github/workflows/hyperstack_tfhe_gpu_tests.yml b/.github/workflows/hyperstack_tfhe_gpu_tests.yml index 9808dc5f2b..33195ca045 100644 --- a/.github/workflows/hyperstack_tfhe_gpu_tests.yml +++ b/.github/workflows/hyperstack_tfhe_gpu_tests.yml @@ -144,20 +144,20 @@ jobs: - name: Run core crypto and internal CUDA backend tests run: | - make test_core_crypto_gpu - make test_cuda_backend + BIG_TESTS_INSTANCE=TRUE make test_core_crypto_gpu + BIG_TESTS_INSTANCE=TRUE make test_cuda_backend - name: Run user docs tests run: | - make test_user_doc_gpu + BIG_TESTS_INSTANCE=TRUE make test_user_doc_gpu - name: Test C API run: | - make test_c_api_gpu + BIG_TESTS_INSTANCE=TRUE make test_c_api_gpu - name: Run High Level API Tests run: | - make test_high_level_api_gpu + BIG_TESTS_INSTANCE=TRUE make test_high_level_api_gpu slack-notify: name: Slack Notification diff --git a/.github/workflows/hyperstack_tfhe_signed_integer_gpu_tests.yml b/.github/workflows/hyperstack_tfhe_signed_integer_gpu_tests.yml index 661df21d37..2b3fb27e28 100644 --- a/.github/workflows/hyperstack_tfhe_signed_integer_gpu_tests.yml +++ b/.github/workflows/hyperstack_tfhe_signed_integer_gpu_tests.yml @@ -144,11 +144,11 @@ jobs: - name: Run signed integer tests run: | - make test_signed_integer_gpu_ci + BIG_TESTS_INSTANCE=TRUE make test_signed_integer_gpu_ci - name: Run signed integer multi-bit tests run: | - make test_signed_integer_multi_bit_gpu_ci + BIG_TESTS_INSTANCE=TRUE make test_signed_integer_multi_bit_gpu_ci slack-notify: name: Slack Notification diff --git a/.github/workflows/hyperstack_tfhe_unsigned_integer_gpu_tests.yml b/.github/workflows/hyperstack_tfhe_unsigned_integer_gpu_tests.yml index 625b991320..e82fffdd63 100644 --- a/.github/workflows/hyperstack_tfhe_unsigned_integer_gpu_tests.yml +++ b/.github/workflows/hyperstack_tfhe_unsigned_integer_gpu_tests.yml @@ -144,11 +144,11 @@ jobs: - name: Run unsigned integer tests run: | - make test_unsigned_integer_gpu_ci + BIG_TESTS_INSTANCE=TRUE make test_unsigned_integer_gpu_ci - name: Run unsigned integer multi-bit tests run: | - make test_unsigned_integer_multi_bit_gpu_ci + BIG_TESTS_INSTANCE=TRUE make test_unsigned_integer_multi_bit_gpu_ci slack-notify: name: Slack Notification diff --git a/backends/tfhe-cuda-backend/cuda/src/device.cu b/backends/tfhe-cuda-backend/cuda/src/device.cu index 29bea24de6..eb881fedcd 100644 --- a/backends/tfhe-cuda-backend/cuda/src/device.cu +++ b/backends/tfhe-cuda-backend/cuda/src/device.cu @@ -247,5 +247,14 @@ int cuda_get_max_shared_memory(uint32_t gpu_index) { cudaDeviceGetAttribute(&max_shared_memory, cudaDevAttrMaxSharedMemoryPerBlock, gpu_index); check_cuda_error(cudaGetLastError()); +#if CUDA_ARCH == 900 + max_shared_memory = 226000; +#elif CUDA_ARCH == 890 + max_shared_memory = 127000; +#elif CUDA_ARCH == 800 + max_shared_memory = 163000; +#elif CUDA_ARCH == 700 + max_shared_memory = 95000; +#endif return max_shared_memory; } diff --git a/backends/tfhe-cuda-backend/cuda/src/integer/multiplication.cuh b/backends/tfhe-cuda-backend/cuda/src/integer/multiplication.cuh index fc81a409c5..2e9d1f3e55 100644 --- a/backends/tfhe-cuda-backend/cuda/src/integer/multiplication.cuh +++ b/backends/tfhe-cuda-backend/cuda/src/integer/multiplication.cuh @@ -234,7 +234,12 @@ __host__ void host_integer_sum_ciphertexts_vec_kb( int32_t h_smart_copy_in[r * num_blocks]; int32_t h_smart_copy_out[r * num_blocks]; - auto max_shared_memory = cuda_get_max_shared_memory(gpu_indexes[0]); + /// Here it is important to query the default max shared memory on device 0 + /// instead of cuda_get_max_shared_memory, + /// to avoid bugs with tree_add_chunks trying to use too much shared memory + int max_shared_memory = 0; + check_cuda_error(cudaDeviceGetAttribute( + &max_shared_memory, cudaDevAttrMaxSharedMemoryPerBlock, 0)); // create lut object for message and carry // we allocate luts_message_carry in the host function (instead of scratch) diff --git a/scripts/integer-tests.sh b/scripts/integer-tests.sh index 2c44dd9b10..e6a5e6d980 100755 --- a/scripts/integer-tests.sh +++ b/scripts/integer-tests.sh @@ -129,8 +129,13 @@ fi # Override test-threads number to avoid Out-of-memory issues on GPU instances if [[ "${backend}" == "gpu" ]]; then - test_threads=5 - doctest_threads=5 + if [[ "${BIG_TESTS_INSTANCE}" == TRUE ]]; then + test_threads=5 + doctest_threads=5 + else + test_threads=3 + doctest_threads=3 + fi fi filter_expression=$(/usr/bin/python3 scripts/test_filtering.py --layer integer --backend "${backend}" ${fast_tests_argument} ${nightly_tests_argument} ${multi_bit_argument} ${sign_argument} ${no_big_params_argument})