Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore(ci): add randomized long run tests on CPU and GPU #1848

Merged
merged 1 commit into from
Dec 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 7 additions & 6 deletions .github/workflows/gpu_integer_long_run_tests.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: AWS Long Run Tests on GPU
name: Long Run Tests on GPU

env:
CARGO_TERM_COLOR: always
Expand All @@ -15,8 +15,8 @@ on:
# Allows you to run this workflow manually from the Actions tab as an alternative.
workflow_dispatch:
schedule:
# Weekly tests will be triggered each Friday at 1a.m.
- cron: '0 1 * * FRI'
# Weekly tests will be triggered each Friday at 9p.m.
- cron: "0 21 * * 5"

jobs:
setup-instance:
Expand All @@ -36,10 +36,10 @@ jobs:
slab-url: ${{ secrets.SLAB_BASE_URL }}
job-secret: ${{ secrets.JOB_SECRET }}
backend: hyperstack
profile: 2-h100
profile: multi-gpu-test

cuda-tests:
name: Long run GPU H100 tests
name: Long run GPU tests
needs: [ setup-instance ]
concurrency:
group: ${{ github.workflow }}_${{github.event_name}}_${{ github.ref }}
Expand All @@ -53,6 +53,7 @@ jobs:
- os: ubuntu-22.04
cuda: "12.2"
gcc: 11
timeout-minutes: 4320 # 72 hours
steps:
- name: Checkout tfhe-rs
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
Expand Down Expand Up @@ -87,7 +88,7 @@ jobs:
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
env:
SLACK_COLOR: ${{ needs.cuda-tests.result }}
SLACK_MESSAGE: "Integer GPU H100 long run tests finished with status: ${{ needs.cuda-tests.result }}. (${{ env.ACTION_RUN_URL }})"
SLACK_MESSAGE: "Integer GPU long run tests finished with status: ${{ needs.cuda-tests.result }}. (${{ env.ACTION_RUN_URL }})"

teardown-instance:
name: Teardown instance (gpu-tests)
Expand Down
5 changes: 3 additions & 2 deletions .github/workflows/integer_long_run_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ on:
# Allows you to run this workflow manually from the Actions tab as an alternative.
workflow_dispatch:
schedule:
# Weekly tests will be triggered each Friday at 1a.m.
- cron: '0 1 * * FRI'
# Weekly tests will be triggered each Friday at 9p.m.
- cron: "0 21 * * 5"

jobs:
setup-instance:
Expand Down Expand Up @@ -45,6 +45,7 @@ jobs:
group: ${{ github.workflow }}_${{github.event_name}}_${{ github.ref }}
cancel-in-progress: true
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
timeout-minutes: 4320 # 72 hours
steps:
- name: Checkout tfhe-rs
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
Expand Down
23 changes: 14 additions & 9 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -583,10 +583,13 @@ test_integer_gpu: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --doc --profile $(CARGO_PROFILE) \
--features=integer,gpu -p $(TFHE_SPEC) -- integer::gpu::server_key::

.PHONY: test_integer_long_run_gpu # Run the tests of the integer module including experimental on the gpu backend
test_integer_long_run_gpu: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
--features=integer,gpu,__long_run_tests -p $(TFHE_SPEC) -- integer::gpu::server_key::radix::tests_long_run --test-threads=6
.PHONY: test_integer_long_run_gpu # Run the long run integer tests on the gpu backend
test_integer_long_run_gpu: install_rs_check_toolchain install_cargo_nextest
BIG_TESTS_INSTANCE="$(BIG_TESTS_INSTANCE)" \
LONG_TESTS=TRUE \
./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_BUILD_TOOLCHAIN) \
--cargo-profile "$(CARGO_PROFILE)" --avx512-support "$(AVX512_SUPPORT)" \
--tfhe-package "$(TFHE_SPEC)" --backend "gpu"

.PHONY: test_integer_compression
test_integer_compression: install_rs_build_toolchain
Expand Down Expand Up @@ -768,11 +771,13 @@ test_signed_integer_multi_bit_ci: install_rs_check_toolchain install_cargo_nexte
--cargo-profile "$(CARGO_PROFILE)" --multi-bit --avx512-support "$(AVX512_SUPPORT)" \
--signed-only --tfhe-package "$(TFHE_SPEC)"

.PHONY: test_integer_long_run # Run the long run tests for integer
test_integer_long_run: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
--features=integer,internal-keycache,__long_run_tests -p $(TFHE_SPEC) -- integer::server_key::radix_parallel::tests_long_run

.PHONY: test_integer_long_run # Run the long run integer tests
test_integer_long_run: install_rs_check_toolchain install_cargo_nextest
BIG_TESTS_INSTANCE="$(BIG_TESTS_INSTANCE)" \
LONG_TESTS=TRUE \
./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_BUILD_TOOLCHAIN) \
--cargo-profile "$(CARGO_PROFILE)" --avx512-support "$(AVX512_SUPPORT)" \
--tfhe-package "$(TFHE_SPEC)"

.PHONY: test_safe_serialization # Run the tests for safe serialization
test_safe_serialization: install_rs_build_toolchain install_cargo_nextest
Expand Down
20 changes: 17 additions & 3 deletions scripts/integer-tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ function usage() {
echo "--multi-bit Run multi-bit tests only: default off"
echo "--unsigned-only Run only unsigned integer tests, by default both signed and unsigned tests are run"
echo "--signed-only Run only signed integer tests, by default both signed and unsigned tests are run"
echo "--nightly-tests Run integer tests configured for nightly runs (3_3 params)"
echo "--fast-tests Run integer set but skip a subset of longer tests"
echo "--long-tests Run only long run integer tests"
echo "--cargo-profile The cargo profile used to build tests"
echo "--backend Backend to use with tfhe-rs"
echo "--avx512-support Set to ON to enable avx512"
Expand All @@ -21,6 +24,7 @@ RUST_TOOLCHAIN="+stable"
multi_bit_argument=
sign_argument=
fast_tests_argument=
long_tests_argument=
nightly_tests_argument=
no_big_params_argument=
cargo_profile="release"
Expand Down Expand Up @@ -91,6 +95,10 @@ if [[ "${FAST_TESTS}" == TRUE ]]; then
fast_tests_argument=--fast-tests
fi

if [[ "${LONG_TESTS}" == TRUE ]]; then
long_tests_argument=--long-tests
fi

if [[ "${NIGHTLY_TESTS}" == TRUE ]]; then
nightly_tests_argument=--nightly-tests
fi
Expand Down Expand Up @@ -137,18 +145,24 @@ if [[ "${backend}" == "gpu" ]]; then
fi
fi

filter_expression=$(/usr/bin/python3 scripts/test_filtering.py --layer integer --backend "${backend}" ${fast_tests_argument} ${nightly_tests_argument} ${multi_bit_argument} ${sign_argument} ${no_big_params_argument})
filter_expression=$(/usr/bin/python3 scripts/test_filtering.py --layer integer --backend "${backend}" ${fast_tests_argument} ${long_tests_argument} ${nightly_tests_argument} ${multi_bit_argument} ${sign_argument} ${no_big_params_argument})

if [[ "${FAST_TESTS}" == "TRUE" ]]; then
echo "Running 'fast' test set"
else
elif [[ "${LONG_TESTS}" == "FALSE" ]]; then
echo "Running 'slow' test set"
fi

if [[ "${LONG_TESTS}" == "TRUE" ]]; then
echo "Running 'long run' test set"
fi

if [[ "${NIGHTLY_TESTS}" == "TRUE" ]]; then
echo "Running 'nightly' test set"
fi

echo "${filter_expression}"

cargo "${RUST_TOOLCHAIN}" nextest run \
--tests \
--cargo-profile "${cargo_profile}" \
Expand All @@ -158,7 +172,7 @@ cargo "${RUST_TOOLCHAIN}" nextest run \
--test-threads "${test_threads}" \
-E "$filter_expression"

if [[ -z ${multi_bit_argument} ]]; then
if [[ -z ${multi_bit_argument} && -z ${long_tests_argument} ]]; then
cargo "${RUST_TOOLCHAIN}" test \
--profile "${cargo_profile}" \
--package "${tfhe_package}" \
Expand Down
95 changes: 54 additions & 41 deletions scripts/test_filtering.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,12 @@
action="store_true",
help="Run only a small subset of test suite",
)
parser.add_argument(
"--long-tests",
dest="long_tests",
action="store_true",
help="Run only the long tests suite",
)
parser.add_argument(
"--nightly-tests",
dest="nightly_tests",
Expand Down Expand Up @@ -80,6 +86,7 @@
"/.*test_wopbs_bivariate_crt_wopbs_param_message_[34]_carry_[34]_ks_pbs_gaussian_2m64$/",
"/.*test_integer_smart_mul_param_message_4_carry_4_ks_pbs_gaussian_2m64$/",
"/.*test_integer_default_add_sequence_multi_thread_param_message_4_carry_4_ks_pbs_gaussian_2m64$/",
"/.*::tests_long_run::.*/",
]

# skip default_div, default_rem which are covered by default_div_rem
Expand All @@ -94,55 +101,61 @@
"/.*_param_message_4_carry_4_ks_pbs_gaussian_2m64$/",
]


def filter_integer_tests(input_args):
(multi_bit_filter, group_filter) = (
("_multi_bit", "_group_[0-9]") if input_args.multi_bit else ("", "")
)
backend_filter = ""
if input_args.backend == "gpu":
backend_filter = "gpu::"
if multi_bit_filter:
# For now, GPU only has specific parameters set for multi-bit
multi_bit_filter = "_gpu_multi_bit"

filter_expression = [f"test(/^integer::{backend_filter}.*/)"]

if input_args.multi_bit:
filter_expression.append("test(~_multi_bit)")
else:
filter_expression.append("not test(~_multi_bit)")

if input_args.signed_only:
filter_expression.append("test(~_signed)")
if input_args.unsigned_only:
filter_expression.append("not test(~_signed)")

if input_args.no_big_params:
for pattern in EXCLUDED_BIG_PARAMETERS:
if not input_args.long_tests:
if input_args.backend == "gpu":
backend_filter = "gpu::"
if multi_bit_filter:
# For now, GPU only has specific parameters set for multi-bit
multi_bit_filter = "_gpu_multi_bit"

filter_expression = [f"test(/^integer::{backend_filter}.*/)"]

if input_args.multi_bit:
filter_expression.append("test(~_multi_bit)")
else:
filter_expression.append("not test(~_multi_bit)")

if input_args.signed_only:
filter_expression.append("test(~_signed)")
if input_args.unsigned_only:
filter_expression.append("not test(~_signed)")

if input_args.no_big_params:
for pattern in EXCLUDED_BIG_PARAMETERS:
filter_expression.append(f"not test({pattern})")

if input_args.fast_tests and input_args.nightly_tests:
filter_expression.append(
f"test(/.*_default_.*?_param{multi_bit_filter}{group_filter}_message_[2-3]_carry_[2-3]_.*/)"
)
elif input_args.fast_tests:
# Test only fast default operations with only one set of parameters
filter_expression.append(
f"test(/.*_default_.*?_param{multi_bit_filter}{group_filter}_message_2_carry_2_.*/)"
)
elif input_args.nightly_tests:
# Test only fast default operations with only one set of parameters
# This subset would run slower than fast_tests hence the use of nightly_tests
filter_expression.append(
f"test(/.*_default_.*?_param{multi_bit_filter}{group_filter}_message_3_carry_3_.*/)"
)
excluded_tests = (
EXCLUDED_INTEGER_FAST_TESTS if input_args.fast_tests else EXCLUDED_INTEGER_TESTS
)
for pattern in excluded_tests:
filter_expression.append(f"not test({pattern})")

if input_args.fast_tests and input_args.nightly_tests:
filter_expression.append(
f"test(/.*_default_.*?_param{multi_bit_filter}{group_filter}_message_[2-3]_carry_[2-3]_.*/)"
)
elif input_args.fast_tests:
# Test only fast default operations with only one set of parameters
filter_expression.append(
f"test(/.*_default_.*?_param{multi_bit_filter}{group_filter}_message_2_carry_2_.*/)"
)
elif input_args.nightly_tests:
# Test only fast default operations with only one set of parameters
# This subset would run slower than fast_tests hence the use of nightly_tests
filter_expression.append(
f"test(/.*_default_.*?_param{multi_bit_filter}{group_filter}_message_3_carry_3_.*/)"
)
else:
if input_args.backend == "gpu":
filter_expression = [f"test(/^integer::gpu::server_key::radix::tests_long_run.*/)"]
elif input_args.backend == "cpu":
filter_expression = [f"test(/^integer::server_key::radix_parallel::tests_long_run.*/)"]

excluded_tests = (
EXCLUDED_INTEGER_FAST_TESTS if input_args.fast_tests else EXCLUDED_INTEGER_TESTS
)
for pattern in excluded_tests:
filter_expression.append(f"not test({pattern})")

return " and ".join(filter_expression)

Expand Down
1 change: 0 additions & 1 deletion tfhe/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,6 @@ nightly-avx512 = ["tfhe-fft/nightly", "tfhe-ntt/nightly", "pulp/nightly"]

# Private features
__profiling = []
__long_run_tests = []

software-prng = ["tfhe-csprng/software-prng"]

Expand Down
2 changes: 1 addition & 1 deletion tfhe/src/integer/gpu/server_key/radix/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ mod scalar_sub;
mod shift;
mod sub;

#[cfg(all(test, feature = "__long_run_tests"))]
#[cfg(test)]
mod tests_long_run;
#[cfg(test)]
mod tests_signed;
Expand Down
Loading
Loading