Skip to content

Commit

Permalink
chore(ci): add randomized long run tests on CPU and GPU
Browse files Browse the repository at this point in the history
  • Loading branch information
agnesLeroy committed Dec 19, 2024
1 parent d6e4585 commit 9c19e28
Show file tree
Hide file tree
Showing 17 changed files with 4,274 additions and 84 deletions.
13 changes: 7 additions & 6 deletions .github/workflows/gpu_integer_long_run_tests.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: AWS Long Run Tests on GPU
name: Long Run Tests on GPU

env:
CARGO_TERM_COLOR: always
Expand All @@ -15,8 +15,8 @@ on:
# Allows you to run this workflow manually from the Actions tab as an alternative.
workflow_dispatch:
schedule:
# Weekly tests will be triggered each Friday at 1a.m.
- cron: '0 1 * * FRI'
# Weekly tests will be triggered each Friday at 9p.m.
- cron: "0 21 * * 5"

jobs:
setup-instance:
Expand All @@ -36,10 +36,10 @@ jobs:
slab-url: ${{ secrets.SLAB_BASE_URL }}
job-secret: ${{ secrets.JOB_SECRET }}
backend: hyperstack
profile: 2-h100
profile: multi-gpu-test

cuda-tests:
name: Long run GPU H100 tests
name: Long run GPU tests
needs: [ setup-instance ]
concurrency:
group: ${{ github.workflow }}_${{github.event_name}}_${{ github.ref }}
Expand All @@ -53,6 +53,7 @@ jobs:
- os: ubuntu-22.04
cuda: "12.2"
gcc: 11
timeout-minutes: 4320 # 72 hours
steps:
- name: Checkout tfhe-rs
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
Expand Down Expand Up @@ -87,7 +88,7 @@ jobs:
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
env:
SLACK_COLOR: ${{ needs.cuda-tests.result }}
SLACK_MESSAGE: "Integer GPU H100 long run tests finished with status: ${{ needs.cuda-tests.result }}. (${{ env.ACTION_RUN_URL }})"
SLACK_MESSAGE: "Integer GPU long run tests finished with status: ${{ needs.cuda-tests.result }}. (${{ env.ACTION_RUN_URL }})"

teardown-instance:
name: Teardown instance (gpu-tests)
Expand Down
5 changes: 3 additions & 2 deletions .github/workflows/integer_long_run_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ on:
# Allows you to run this workflow manually from the Actions tab as an alternative.
workflow_dispatch:
schedule:
# Weekly tests will be triggered each Friday at 1a.m.
- cron: '0 1 * * FRI'
# Weekly tests will be triggered each Friday at 9p.m.
- cron: "0 21 * * 5"

jobs:
setup-instance:
Expand Down Expand Up @@ -45,6 +45,7 @@ jobs:
group: ${{ github.workflow }}_${{github.event_name}}_${{ github.ref }}
cancel-in-progress: true
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
timeout-minutes: 4320 # 72 hours
steps:
- name: Checkout tfhe-rs
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
Expand Down
23 changes: 14 additions & 9 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -583,10 +583,13 @@ test_integer_gpu: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --doc --profile $(CARGO_PROFILE) \
--features=integer,gpu -p $(TFHE_SPEC) -- integer::gpu::server_key::

.PHONY: test_integer_long_run_gpu # Run the tests of the integer module including experimental on the gpu backend
test_integer_long_run_gpu: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
--features=integer,gpu,__long_run_tests -p $(TFHE_SPEC) -- integer::gpu::server_key::radix::tests_long_run --test-threads=6
.PHONY: test_integer_long_run_gpu # Run the long run integer tests on the gpu backend
test_integer_long_run_gpu: install_rs_check_toolchain install_cargo_nextest
BIG_TESTS_INSTANCE="$(BIG_TESTS_INSTANCE)" \
LONG_TESTS=TRUE \
./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_BUILD_TOOLCHAIN) \
--cargo-profile "$(CARGO_PROFILE)" --avx512-support "$(AVX512_SUPPORT)" \
--tfhe-package "$(TFHE_SPEC)" --backend "gpu"

.PHONY: test_integer_compression
test_integer_compression: install_rs_build_toolchain
Expand Down Expand Up @@ -768,11 +771,13 @@ test_signed_integer_multi_bit_ci: install_rs_check_toolchain install_cargo_nexte
--cargo-profile "$(CARGO_PROFILE)" --multi-bit --avx512-support "$(AVX512_SUPPORT)" \
--signed-only --tfhe-package "$(TFHE_SPEC)"

.PHONY: test_integer_long_run # Run the long run tests for integer
test_integer_long_run: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
--features=integer,internal-keycache,__long_run_tests -p $(TFHE_SPEC) -- integer::server_key::radix_parallel::tests_long_run

.PHONY: test_integer_long_run # Run the long run integer tests
test_integer_long_run: install_rs_check_toolchain install_cargo_nextest
BIG_TESTS_INSTANCE="$(BIG_TESTS_INSTANCE)" \
LONG_TESTS=TRUE \
./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_BUILD_TOOLCHAIN) \
--cargo-profile "$(CARGO_PROFILE)" --avx512-support "$(AVX512_SUPPORT)" \
--tfhe-package "$(TFHE_SPEC)"

.PHONY: test_safe_serialization # Run the tests for safe serialization
test_safe_serialization: install_rs_build_toolchain install_cargo_nextest
Expand Down
20 changes: 17 additions & 3 deletions scripts/integer-tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ function usage() {
echo "--multi-bit Run multi-bit tests only: default off"
echo "--unsigned-only Run only unsigned integer tests, by default both signed and unsigned tests are run"
echo "--signed-only Run only signed integer tests, by default both signed and unsigned tests are run"
echo "--nightly-tests Run integer tests configured for nightly runs (3_3 params)"
echo "--fast-tests Run integer set but skip a subset of longer tests"
echo "--long-tests Run only long run integer tests"
echo "--cargo-profile The cargo profile used to build tests"
echo "--backend Backend to use with tfhe-rs"
echo "--avx512-support Set to ON to enable avx512"
Expand All @@ -21,6 +24,7 @@ RUST_TOOLCHAIN="+stable"
multi_bit_argument=
sign_argument=
fast_tests_argument=
long_tests_argument=
nightly_tests_argument=
no_big_params_argument=
cargo_profile="release"
Expand Down Expand Up @@ -91,6 +95,10 @@ if [[ "${FAST_TESTS}" == TRUE ]]; then
fast_tests_argument=--fast-tests
fi

if [[ "${LONG_TESTS}" == TRUE ]]; then
long_tests_argument=--long-tests
fi

if [[ "${NIGHTLY_TESTS}" == TRUE ]]; then
nightly_tests_argument=--nightly-tests
fi
Expand Down Expand Up @@ -137,18 +145,24 @@ if [[ "${backend}" == "gpu" ]]; then
fi
fi

filter_expression=$(/usr/bin/python3 scripts/test_filtering.py --layer integer --backend "${backend}" ${fast_tests_argument} ${nightly_tests_argument} ${multi_bit_argument} ${sign_argument} ${no_big_params_argument})
filter_expression=$(/usr/bin/python3 scripts/test_filtering.py --layer integer --backend "${backend}" ${fast_tests_argument} ${long_tests_argument} ${nightly_tests_argument} ${multi_bit_argument} ${sign_argument} ${no_big_params_argument})

if [[ "${FAST_TESTS}" == "TRUE" ]]; then
echo "Running 'fast' test set"
else
elif [[ "${LONG_TESTS}" == "FALSE" ]]; then
echo "Running 'slow' test set"
fi

if [[ "${LONG_TESTS}" == "TRUE" ]]; then
echo "Running 'long run' test set"
fi

if [[ "${NIGHTLY_TESTS}" == "TRUE" ]]; then
echo "Running 'nightly' test set"
fi

echo "${filter_expression}"

cargo "${RUST_TOOLCHAIN}" nextest run \
--tests \
--cargo-profile "${cargo_profile}" \
Expand All @@ -158,7 +172,7 @@ cargo "${RUST_TOOLCHAIN}" nextest run \
--test-threads "${test_threads}" \
-E "$filter_expression"

if [[ -z ${multi_bit_argument} ]]; then
if [[ -z ${multi_bit_argument} && -z ${long_tests_argument} ]]; then
cargo "${RUST_TOOLCHAIN}" test \
--profile "${cargo_profile}" \
--package "${tfhe_package}" \
Expand Down
95 changes: 54 additions & 41 deletions scripts/test_filtering.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,12 @@
action="store_true",
help="Run only a small subset of test suite",
)
parser.add_argument(
"--long-tests",
dest="long_tests",
action="store_true",
help="Run only the long tests suite",
)
parser.add_argument(
"--nightly-tests",
dest="nightly_tests",
Expand Down Expand Up @@ -80,6 +86,7 @@
"/.*test_wopbs_bivariate_crt_wopbs_param_message_[34]_carry_[34]_ks_pbs_gaussian_2m64$/",
"/.*test_integer_smart_mul_param_message_4_carry_4_ks_pbs_gaussian_2m64$/",
"/.*test_integer_default_add_sequence_multi_thread_param_message_4_carry_4_ks_pbs_gaussian_2m64$/",
"/.*::tests_long_run::.*/",
]

# skip default_div, default_rem which are covered by default_div_rem
Expand All @@ -94,55 +101,61 @@
"/.*_param_message_4_carry_4_ks_pbs_gaussian_2m64$/",
]


def filter_integer_tests(input_args):
(multi_bit_filter, group_filter) = (
("_multi_bit", "_group_[0-9]") if input_args.multi_bit else ("", "")
)
backend_filter = ""
if input_args.backend == "gpu":
backend_filter = "gpu::"
if multi_bit_filter:
# For now, GPU only has specific parameters set for multi-bit
multi_bit_filter = "_gpu_multi_bit"

filter_expression = [f"test(/^integer::{backend_filter}.*/)"]

if input_args.multi_bit:
filter_expression.append("test(~_multi_bit)")
else:
filter_expression.append("not test(~_multi_bit)")

if input_args.signed_only:
filter_expression.append("test(~_signed)")
if input_args.unsigned_only:
filter_expression.append("not test(~_signed)")

if input_args.no_big_params:
for pattern in EXCLUDED_BIG_PARAMETERS:
if not input_args.long_tests:
if input_args.backend == "gpu":
backend_filter = "gpu::"
if multi_bit_filter:
# For now, GPU only has specific parameters set for multi-bit
multi_bit_filter = "_gpu_multi_bit"

filter_expression = [f"test(/^integer::{backend_filter}.*/)"]

if input_args.multi_bit:
filter_expression.append("test(~_multi_bit)")
else:
filter_expression.append("not test(~_multi_bit)")

if input_args.signed_only:
filter_expression.append("test(~_signed)")
if input_args.unsigned_only:
filter_expression.append("not test(~_signed)")

if input_args.no_big_params:
for pattern in EXCLUDED_BIG_PARAMETERS:
filter_expression.append(f"not test({pattern})")

if input_args.fast_tests and input_args.nightly_tests:
filter_expression.append(
f"test(/.*_default_.*?_param{multi_bit_filter}{group_filter}_message_[2-3]_carry_[2-3]_.*/)"
)
elif input_args.fast_tests:
# Test only fast default operations with only one set of parameters
filter_expression.append(
f"test(/.*_default_.*?_param{multi_bit_filter}{group_filter}_message_2_carry_2_.*/)"
)
elif input_args.nightly_tests:
# Test only fast default operations with only one set of parameters
# This subset would run slower than fast_tests hence the use of nightly_tests
filter_expression.append(
f"test(/.*_default_.*?_param{multi_bit_filter}{group_filter}_message_3_carry_3_.*/)"
)
excluded_tests = (
EXCLUDED_INTEGER_FAST_TESTS if input_args.fast_tests else EXCLUDED_INTEGER_TESTS
)
for pattern in excluded_tests:
filter_expression.append(f"not test({pattern})")

if input_args.fast_tests and input_args.nightly_tests:
filter_expression.append(
f"test(/.*_default_.*?_param{multi_bit_filter}{group_filter}_message_[2-3]_carry_[2-3]_.*/)"
)
elif input_args.fast_tests:
# Test only fast default operations with only one set of parameters
filter_expression.append(
f"test(/.*_default_.*?_param{multi_bit_filter}{group_filter}_message_2_carry_2_.*/)"
)
elif input_args.nightly_tests:
# Test only fast default operations with only one set of parameters
# This subset would run slower than fast_tests hence the use of nightly_tests
filter_expression.append(
f"test(/.*_default_.*?_param{multi_bit_filter}{group_filter}_message_3_carry_3_.*/)"
)
else:
if input_args.backend == "gpu":
filter_expression = [f"test(/^integer::gpu::server_key::radix::tests_long_run.*/)"]
elif input_args.backend == "cpu":
filter_expression = [f"test(/^integer::server_key::radix_parallel::tests_long_run.*/)"]

excluded_tests = (
EXCLUDED_INTEGER_FAST_TESTS if input_args.fast_tests else EXCLUDED_INTEGER_TESTS
)
for pattern in excluded_tests:
filter_expression.append(f"not test({pattern})")

return " and ".join(filter_expression)

Expand Down
1 change: 0 additions & 1 deletion tfhe/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,6 @@ nightly-avx512 = ["tfhe-fft/nightly", "tfhe-ntt/nightly", "pulp/nightly"]

# Private features
__profiling = []
__long_run_tests = []

software-prng = ["tfhe-csprng/software-prng"]

Expand Down
2 changes: 1 addition & 1 deletion tfhe/src/integer/gpu/server_key/radix/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ mod scalar_sub;
mod shift;
mod sub;

#[cfg(all(test, feature = "__long_run_tests"))]
#[cfg(test)]
mod tests_long_run;
#[cfg(test)]
mod tests_signed;
Expand Down
Loading

0 comments on commit 9c19e28

Please sign in to comment.