From c519f1fae8267ef5bf48be001534689ff3c993aa Mon Sep 17 00:00:00 2001 From: Chakru <161002324+chakru-r@users.noreply.github.com> Date: Fri, 6 Dec 2024 20:44:48 +0530 Subject: [PATCH] test(smoke-tests): parallelise smoke test execution --- .github/workflows/docker-unified.yml | 50 +++++++++++++------ smoke-test/.gitignore | 4 +- smoke-test/build.gradle | 38 ++++++-------- smoke-test/conftest.py | 52 ++++++++++++++++++++ smoke-test/smoke.sh | 25 ++++++---- smoke-test/tests/cypress/integration_test.py | 49 ++++++++++++------ 6 files changed, 155 insertions(+), 63 deletions(-) diff --git a/.github/workflows/docker-unified.yml b/.github/workflows/docker-unified.yml index 03a9b3afc3bc5..47c26068347c0 100644 --- a/.github/workflows/docker-unified.yml +++ b/.github/workflows/docker-unified.yml @@ -1011,18 +1011,39 @@ jobs: needs: setup outputs: matrix: ${{ steps.set-matrix.outputs.matrix }} + cypress_batch_count: ${{ steps.set-batch-count.outputs.cypress_batch_count }} + python_batch_count: ${{ steps.set-batch-count.outputs.python_batch_count }} steps: + - id: set-batch-count + # Tests are split simply to ensure the configured number of batches for parallelization. This may need some + # increase as a new tests added increase the duration where an additional parallel batch helps. + # python_batch_count is used to split pytests in the smoke-test (batches of actual test functions) + # cypress_batch_count is used to split the collection of cypress test specs into batches. + run: | + echo "cypress_batch_count=11" >> "$GITHUB_OUTPUT" + echo "python_batch_count=5" >> "$GITHUB_OUTPUT" + - id: set-matrix + # For m batches for python and n batches for cypress, we need a test matrix of python x m + cypress x n. + # while the github action matrix generation can handle these two parts individually, there isnt a way to use the + # two generated matrices for the same job. So, produce that matrix with scripting and use the include directive + # to add it to the test matrix. run: | - if [ '${{ needs.setup.outputs.frontend_only }}' == 'true' ]; then - echo 'matrix=["cypress_suite1","cypress_rest"]' >> "$GITHUB_OUTPUT" - elif [ '${{ needs.setup.outputs.ingestion_only }}' == 'true' ]; then - echo 'matrix=["no_cypress_suite0","no_cypress_suite1"]' >> "$GITHUB_OUTPUT" - elif [[ '${{ needs.setup.outputs.backend_change }}' == 'true' || '${{ needs.setup.outputs.smoke_test_change }}' == 'true' ]]; then - echo 'matrix=["no_cypress_suite0","no_cypress_suite1","cypress_suite1","cypress_rest"]' >> "$GITHUB_OUTPUT" - else - echo 'matrix=[]' >> "$GITHUB_OUTPUT" + python_batch_count=${{ steps.set-batch-count.outputs.python_batch_count }} + python_matrix=$(printf "{\"test_strategy\":\"pytests\",\"batch\":\"0\",\"batch_count\":\"$python_batch_count\"}"; for ((i=1;i> "$GITHUB_OUTPUT" smoke_test: name: Run Smoke Tests @@ -1043,8 +1064,7 @@ jobs: ] strategy: fail-fast: false - matrix: - test_strategy: ${{ fromJson(needs.smoke_test_matrix.outputs.matrix) }} + matrix: ${{ fromJson(needs.smoke_test_matrix.outputs.matrix) }} if: ${{ always() && !failure() && !cancelled() && needs.smoke_test_matrix.outputs.matrix != '[]' }} steps: - name: Free up disk space @@ -1220,6 +1240,8 @@ jobs: CYPRESS_RECORD_KEY: ${{ secrets.CYPRESS_RECORD_KEY }} CLEANUP_DATA: "false" TEST_STRATEGY: ${{ matrix.test_strategy }} + BATCH_COUNT: ${{ matrix.batch_count }} + BATCH_NUMBER: ${{ matrix.batch }} run: | echo "$DATAHUB_VERSION" ./gradlew --stop @@ -1230,25 +1252,25 @@ jobs: if: failure() run: | docker ps -a - TEST_STRATEGY="-${{ matrix.test_strategy }}" + TEST_STRATEGY="-${{ matrix.test_strategy }}-${{ matrix.batch }}" source .github/scripts/docker_logs.sh - name: Upload logs uses: actions/upload-artifact@v3 if: failure() with: - name: docker-logs-${{ matrix.test_strategy }} + name: docker-logs-${{ matrix.test_strategy }}-${{ matrix.batch }} path: "docker_logs/*.log" retention-days: 5 - name: Upload screenshots uses: actions/upload-artifact@v3 if: failure() with: - name: cypress-snapshots-${{ matrix.test_strategy }} + name: cypress-snapshots-${{ matrix.test_strategy }}-${{ matrix.batch }} path: smoke-test/tests/cypress/cypress/screenshots/ - uses: actions/upload-artifact@v3 if: always() with: - name: Test Results (smoke tests) ${{ matrix.test_strategy }} + name: Test Results (smoke tests) ${{ matrix.test_strategy }} ${{ matrix.batch }} path: | **/build/reports/tests/test/** **/build/test-results/test/** diff --git a/smoke-test/.gitignore b/smoke-test/.gitignore index b8af2eef535a0..d8cfd65ff81b9 100644 --- a/smoke-test/.gitignore +++ b/smoke-test/.gitignore @@ -29,6 +29,8 @@ share/python-wheels/ .installed.cfg *.egg MANIFEST +**/cypress/node_modules + # PyInstaller # Usually these files are written by a python script from a template @@ -132,4 +134,4 @@ dmypy.json # Pyre type checker .pyre/ junit* -tests/cypress/onboarding.json \ No newline at end of file +tests/cypress/onboarding.json diff --git a/smoke-test/build.gradle b/smoke-test/build.gradle index 73ecdcb08ea14..60d08e0206cda 100644 --- a/smoke-test/build.gradle +++ b/smoke-test/build.gradle @@ -91,39 +91,31 @@ task pythonLintFix(type: Exec, dependsOn: installDev) { * The following tasks assume an already running quickstart. * ./gradlew quickstart (or another variation `quickstartDebug`) */ -task noCypressSuite0(type: Exec, dependsOn: [installDev, ':metadata-ingestion:installDev']) { - environment 'RUN_QUICKSTART', 'false' - environment 'TEST_STRATEGY', 'no_cypress_suite0' - - workingDir = project.projectDir - commandLine 'bash', '-c', - "source ${venv_name}/bin/activate && set -x && " + - "./smoke.sh" -} +// ./gradlew :smoke-test:pytest -PbatchNumber=2 (default 0) +task pytest(type: Exec, dependsOn: [installDev, ':metadata-ingestion:installDev']) { + // Get BATCH_NUMBER from command line argument with default value of 0 + def batchNumber = project.hasProperty('batchNumber') ? project.property('batchNumber') : '0' -task noCypressSuite1(type: Exec, dependsOn: [installDev, ':metadata-ingestion:installDev']) { environment 'RUN_QUICKSTART', 'false' - environment 'TEST_STRATEGY', 'no_cypress_suite1' + environment 'TEST_STRATEGY', 'pytests' + environment 'BATCH_COUNT', 5 + environment 'BATCH_NUMBER', batchNumber workingDir = project.projectDir commandLine 'bash', '-c', "source ${venv_name}/bin/activate && set -x && " + - "./smoke.sh" + "./smoke.sh" } -task cypressSuite1(type: Exec, dependsOn: [installDev, ':metadata-ingestion:installDev']) { - environment 'RUN_QUICKSTART', 'false' - environment 'TEST_STRATEGY', 'cypress_suite1' - - workingDir = project.projectDir - commandLine 'bash', '-c', - "source ${venv_name}/bin/activate && set -x && " + - "./smoke.sh" -} +// ./gradlew :smoke-test:cypressTest -PbatchNumber=2 (default 0) +task cypressTest(type: Exec, dependsOn: [installDev, ':metadata-ingestion:installDev']) { + // Get BATCH_NUMBER from command line argument with default value of 0 + def batchNumber = project.hasProperty('batchNumber') ? project.property('batchNumber') : '0' -task cypressRest(type: Exec, dependsOn: [installDev, ':metadata-ingestion:installDev']) { environment 'RUN_QUICKSTART', 'false' - environment 'TEST_STRATEGY', 'cypress_rest' + environment 'TEST_STRATEGY', 'cypress' + environment 'BATCH_COUNT', 11 + environment 'BATCH_NUMBER', batchNumber workingDir = project.projectDir commandLine 'bash', '-c', diff --git a/smoke-test/conftest.py b/smoke-test/conftest.py index 6d148db9886a4..d48a92b22ab48 100644 --- a/smoke-test/conftest.py +++ b/smoke-test/conftest.py @@ -1,6 +1,8 @@ import os import pytest +from typing import List, Tuple +from _pytest.nodes import Item import requests from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph @@ -45,3 +47,53 @@ def graph_client(auth_session) -> DataHubGraph: def pytest_sessionfinish(session, exitstatus): """whole test run finishes.""" send_message(exitstatus) + + +def get_batch_start_end(num_tests: int) -> Tuple[int, int]: + batch_count_env = os.getenv("BATCH_COUNT", 1) + batch_count = int(batch_count_env) + + batch_number_env = os.getenv("BATCH_NUMBER", 0) + batch_number = int(batch_number_env) + + if batch_count == 0 or batch_count > num_tests: + raise ValueError( + f"Invalid batch count {batch_count}: must be >0 and <= {num_tests} (num_tests)" + ) + if batch_number >= batch_count: + raise ValueError( + f"Invalid batch number: {batch_number}, must be less than {batch_count} (zer0 based index)" + ) + + batch_size = round(num_tests / batch_count) + + batch_start = batch_size * batch_number + batch_end = batch_start + batch_size + # We must have exactly as many batches as specified by BATCH_COUNT. + if ( + num_tests - batch_end < batch_size + ): # We must have exactly as many batches as specified by BATCH_COUNT, put the remaining in the last batch. + batch_end = num_tests + + if batch_count > 0: + print(f"Running tests for batch {batch_number} of {batch_count}") + + return batch_start, batch_end + + +def pytest_collection_modifyitems( + session: pytest.Session, config: pytest.Config, items: List[Item] +) -> None: + if os.getenv("TEST_STRATEGY") == "cypress": + return # We launch cypress via pytests, but needs a different batching mechanism at cypress level. + + # If BATCH_COUNT and BATCH_ENV vars are set, splits the pytests to batches and runs filters only the BATCH_NUMBER + # batch for execution. Enables multiple parallel launches. Current implementation assumes all test are of equal + # weight for batching. TODO. A weighted batching method can help make batches more equal sized by cost. + # this effectively is a no-op if BATCH_COUNT=1 + start_index, end_index = get_batch_start_end(num_tests=len(items)) + + items.sort(key=lambda x: x.nodeid) # we want the order to be stable across batches + # replace items with the filtered list + print(f"Running tests for batch {start_index}-{end_index}") + items[:] = items[start_index:end_index] diff --git a/smoke-test/smoke.sh b/smoke-test/smoke.sh index 888a60f488e1f..ec8188ebf5f4d 100755 --- a/smoke-test/smoke.sh +++ b/smoke-test/smoke.sh @@ -34,15 +34,20 @@ source ./set-cypress-creds.sh # set environment variables for the test source ./set-test-env-vars.sh -# no_cypress_suite0, no_cypress_suite1, cypress_suite1, cypress_rest -if [[ -z "${TEST_STRATEGY}" ]]; then - pytest -rP --durations=20 -vv --continue-on-collection-errors --junit-xml=junit.smoke.xml +# TEST_STRATEGY: +# if set to pytests, runs all pytests, skips cypress tests(though cypress test launch is via a pytest). +# if set tp cypress, runs all cypress tests +# if blank, runs all. +# When invoked via the github action, BATCH_COUNT and BATCH_NUM env vars are set to run a slice of those tests per +# worker for parallelism. docker-unified.yml generates a test matrix of pytests/cypress in batches. As number of tests +# increase, the batch_count config (in docker-unified.yml) may need adjustment. +if [[ "${TEST_STRATEGY}" == "pytests" ]]; then + #pytests only - github test matrix runs pytests in one of the runners when applicable. + pytest -rP --durations=20 -vv --continue-on-collection-errors --junit-xml=junit.smoke-pytests.xml -k 'not test_run_cypress' +elif [[ "${TEST_STRATEGY}" == "cypress" ]]; then + # run only cypress tests. The test inspects BATCH_COUNT and BATCH_NUMBER and runs only a subset of tests in that batch. + # github workflow test matrix will invoke this in multiple runners for each batch. + pytest -rP --durations=20 -vv --continue-on-collection-errors --junit-xml=junit.smoke-cypress${BATCH_NUMBER}.xml tests/cypress/integration_test.py else - if [ "$TEST_STRATEGY" == "no_cypress_suite0" ]; then - pytest -rP --durations=20 -vv --continue-on-collection-errors --junit-xml=junit.smoke_non_cypress.xml -k 'not test_run_cypress' -m 'not no_cypress_suite1' - elif [ "$TEST_STRATEGY" == "no_cypress_suite1" ]; then - pytest -rP --durations=20 -vv --continue-on-collection-errors --junit-xml=junit.smoke_non_cypress.xml -m 'no_cypress_suite1' - else - pytest -rP --durations=20 -vv --continue-on-collection-errors --junit-xml=junit.smoke_cypress_${TEST_STRATEGY}.xml tests/cypress/integration_test.py - fi + pytest -rP --durations=20 -vv --continue-on-collection-errors --junit-xml=junit.smoke-all.xml fi diff --git a/smoke-test/tests/cypress/integration_test.py b/smoke-test/tests/cypress/integration_test.py index 0d824a96810d0..33c67a923c278 100644 --- a/smoke-test/tests/cypress/integration_test.py +++ b/smoke-test/tests/cypress/integration_test.py @@ -1,10 +1,11 @@ import datetime import os import subprocess -from typing import List, Set +from typing import List import pytest +from conftest import get_batch_start_end from tests.setup.lineage.ingest_time_lineage import ( get_time_lineage_urns, ingest_time_lineage, @@ -169,10 +170,29 @@ def ingest_cleanup_data(auth_session, graph_client): print("deleted onboarding data") -def _get_spec_map(items: Set[str]) -> str: - if len(items) == 0: - return "" - return ",".join([f"**/{item}/*.js" for item in items]) +def _get_js_files(base_path: str): + file_paths = [] + for root, dirs, files in os.walk(base_path): + for file in files: + if file.endswith(".js"): + file_paths.append(os.path.relpath(os.path.join(root, file), base_path)) + return sorted(file_paths) # sort to make the order stable across batch runs + + +def _get_cypress_tests_batch(): + """ + Batching is configured via env vars BATCH_COUNT and BATCH_NUMBER. All cypress tests are split into exactly + BATCH_COUNT batches. When BATCH_NUMBER env var is set (zero based index), that batch alone is run. + Github workflow via test_matrix, runs all batches in parallel to speed up the test elapsed time. + If either of these vars are not set, all tests are run sequentially. + :return: + """ + all_tests = _get_js_files("tests/cypress/cypress/e2e") + + batch_start, batch_end = get_batch_start_end(num_tests=len(all_tests)) + + return all_tests[batch_start:batch_end] + # return test_batches[int(batch_number)] #if BATCH_NUMBER was set, we this test just runs that one batch. def test_run_cypress(auth_session): @@ -182,24 +202,23 @@ def test_run_cypress(auth_session): test_strategy = os.getenv("TEST_STRATEGY", None) if record_key: record_arg = " --record " - tag_arg = f" --tag {test_strategy} " + batch_number = os.getenv("BATCH_NUMBER") + batch_count = os.getenv("BATCH_COUNT") + if batch_number and batch_count: + batch_suffix = f"-{batch_number}{batch_count}" + else: + batch_suffix = "" + tag_arg = f" --tag {test_strategy}{batch_suffix}" else: record_arg = " " rest_specs = set(os.listdir("tests/cypress/cypress/e2e")) cypress_suite1_specs = {"mutations", "search", "views"} rest_specs.difference_update(set(cypress_suite1_specs)) - strategy_spec_map = { - "cypress_suite1": cypress_suite1_specs, - "cypress_rest": rest_specs, - } print(f"test strategy is {test_strategy}") test_spec_arg = "" - if test_strategy is not None: - specs = strategy_spec_map.get(test_strategy) - assert specs is not None - specs_str = _get_spec_map(specs) - test_spec_arg = f" --spec '{specs_str}' " + specs_str = ",".join([f"**/{f}" for f in _get_cypress_tests_batch()]) + test_spec_arg = f" --spec '{specs_str}' " print("Running Cypress tests with command") command = f"NO_COLOR=1 npx cypress run {record_arg} {test_spec_arg} {tag_arg}"