Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

test: improve performance of tests by caching repos #3484

Open
wants to merge 5 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .github/workflows/test_deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ on:
branches:
- "**"
- "!master"

env:
RENKU_TEST_RECREATE_CACHE: "${{ (endsWith(github.ref, 'master') || endsWith(github.ref, 'develop') || startsWith(github.ref, 'refs/tags/') || startsWith(github.ref, 'refs/heads/release/' ) ) && '1' || '0' }}"
jobs:
set-matrix:
runs-on: ubuntu-latest
Expand Down
2 changes: 2 additions & 0 deletions conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,10 +78,12 @@ def pytest_configure(config):
os.environ["RENKU_DISABLE_VERSION_CHECK"] = "1"
# NOTE: Set an env var during during tests to mark that Renku is running in a test session.
os.environ["RENKU_RUNNING_UNDER_TEST"] = "1"
os.environ["RENKU_SKIP_HOOK_CHECKS"] = "1"


def pytest_unconfigure(config):
"""Hook that is called by pytest after all tests are executed."""
os.environ.pop("RENKU_SKIP_MIN_VERSION_CHECK", None)
os.environ.pop("RENKU_DISABLE_VERSION_CHECK", None)
os.environ.pop("RENKU_RUNNING_UNDER_TEST", None)
os.environ.pop("RENKU_SKIP_HOOK_CHECKS", None)
4 changes: 4 additions & 0 deletions renku/data/pre-commit.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@
# RENKU HOOK. DO NOT REMOVE OR MODIFY.
######################################

if [ "$RENKU_SKIP_HOOK_CHECKS" == "1" ]; then
exit 0
fi

# Find all modified or added files, and do nothing if there aren't any.
export RENKU_DISABLE_VERSION_CHECK=true

Expand Down
98 changes: 98 additions & 0 deletions tests/cli/fixtures/cli_projects.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@

from renku.core.config import set_value
from renku.infrastructure.repository import Repository
from renku.ui.cli import cli
from tests.fixtures.repository import RenkuProject
from tests.utils import format_result_exception


@pytest.fixture()
Expand Down Expand Up @@ -131,3 +133,99 @@ def workflow_file_project(project, request) -> Generator[RenkuWorkflowFileProjec
(project.path / "data" / "collection" / "colors.csv").write_text("\n".join(f"color-{i}" for i in range(99)))

yield workflow_file_project


@pytest.fixture
def project_with_merge_conflict(runner, project, directory_tree, run_shell, cache_test_project):
"""Project with a merge conflict."""
if not cache_test_project.setup():
result = runner.invoke(cli, ["mergetool", "install"])

assert 0 == result.exit_code, format_result_exception(result)

# create a common dataset
result = runner.invoke(
cli, ["dataset", "add", "--copy", "--create", "shared-dataset", str(directory_tree)], catch_exceptions=False
)
assert 0 == result.exit_code, format_result_exception(result)

# Create a common workflow
output = run_shell('renku run --name "shared-workflow" echo "a unique string" > my_output_file')

assert b"" == output[0]
assert output[1] is None

# switch to a new branch
output = run_shell("git checkout -b remote-branch")

assert b"Switched to a new branch 'remote-branch'\n" == output[0]
assert output[1] is None

# edit the dataset
result = runner.invoke(cli, ["dataset", "edit", "-d", "remote description", "shared-dataset"])
assert 0 == result.exit_code, format_result_exception(result)

result = runner.invoke(
cli, ["dataset", "add", "--copy", "--create", "remote-dataset", str(directory_tree)], catch_exceptions=False
)
assert 0 == result.exit_code, format_result_exception(result)

# Create a new workflow
output = run_shell('renku run --name "remote-workflow" echo "a unique string" > remote_output_file')

assert b"" == output[0]
assert output[1] is None

# Create a downstream workflow
output = run_shell('renku run --name "remote-downstream-workflow" cp my_output_file my_remote_downstream')

assert b"" == output[0]
assert output[1] is None

# Create another downstream workflow
output = run_shell('renku run --name "remote-downstream-workflow2" cp remote_output_file my_remote_downstream2')

assert b"" == output[0]
assert output[1] is None

# Edit the project metadata
result = runner.invoke(cli, ["project", "edit", "-k", "remote"])

assert 0 == result.exit_code, format_result_exception(result)

# Switch back to master
output = run_shell("git checkout master")

assert b"Switched to branch 'master'\n" == output[0]
assert output[1] is None

# Add a new dataset
result = runner.invoke(
cli, ["dataset", "add", "--copy", "--create", "local-dataset", str(directory_tree)], catch_exceptions=False
)
assert 0 == result.exit_code, format_result_exception(result)

# Create a local workflow
output = run_shell('renku run --name "local-workflow" echo "a unique string" > local_output_file')

assert b"" == output[0]
assert output[1] is None

# Create a local downstream workflow
output = run_shell('renku run --name "local-downstream-workflow" cp my_output_file my_local_downstream')

assert b"" == output[0]
assert output[1] is None

# Create another local downstream workflow
output = run_shell('renku run --name "local-downstream-workflow2" cp local_output_file my_local_downstream2')

assert b"" == output[0]
assert output[1] is None

# Edit the project in master as well
result = runner.invoke(cli, ["project", "edit", "-k", "local"])

assert 0 == result.exit_code, format_result_exception(result)
cache_test_project.save()
yield project
47 changes: 33 additions & 14 deletions tests/cli/fixtures/cli_workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,21 +17,40 @@

import pytest

from renku.ui.cli import cli


@pytest.fixture
def workflow_graph(run_shell, project):
def workflow_graph(run_shell, project, cache_test_project):
"""Setup a project with a workflow graph."""
cache_test_project.set_name("workflow_graph_fixture")
if not cache_test_project.setup():

def _run_workflow(name, command, extra_args=""):
output = run_shell(f"renku run --name {name} {extra_args} -- {command}")
# Assert not allocated stderr.
assert output[1] is None

_run_workflow("r1", "echo 'test' > A")
_run_workflow("r2", "tee B C < A")
_run_workflow("r3", "cp A Z")
_run_workflow("r4", "cp B X")
_run_workflow("r5", "cat C Z > Y")
_run_workflow("r6", "bash -c 'cat X Y | tee R S'", extra_args="--input X --input Y --output R --output S")
_run_workflow("r7", "echo 'other' > H")
_run_workflow("r8", "tee I J < H")
cache_test_project.save()


@pytest.fixture
def project_with_dataset_and_workflows(runner, run_shell, project, directory_tree, cache_test_project):
"""Project with a dataset and some workflows."""
if not cache_test_project.setup():
assert 0 == runner.invoke(cli, ["dataset", "create", "my-data"]).exit_code
assert 0 == runner.invoke(cli, ["dataset", "add", "--copy", "my-data", str(directory_tree)]).exit_code

def _run_workflow(name, command, extra_args=""):
output = run_shell(f"renku run --name {name} {extra_args} -- {command}")
# Assert not allocated stderr.
assert output[1] is None

_run_workflow("r1", "echo 'test' > A")
_run_workflow("r2", "tee B C < A")
_run_workflow("r3", "cp A Z")
_run_workflow("r4", "cp B X")
_run_workflow("r5", "cat C Z > Y")
_run_workflow("r6", "bash -c 'cat X Y | tee R S'", extra_args="--input X --input Y --output R --output S")
_run_workflow("r7", "echo 'other' > H")
_run_workflow("r8", "tee I J < H")
assert run_shell('renku run --name run1 echo "my input string" > my_output_file')[1] is None
assert run_shell("renku run --name run2 cp my_output_file my_output_file2")[1] is None
assert run_shell("renku workflow compose my-composite-plan run1 run2")[1] is None
cache_test_project.save()
yield project
8 changes: 4 additions & 4 deletions tests/cli/test_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -1790,7 +1790,7 @@ def test_pull_data_from_lfs(runner, project, tmpdir, subdirectory, no_lfs_size_l
assert 0 == result.exit_code, format_result_exception(result)


def test_lfs_hook(project_with_injection, subdirectory, large_file):
def test_lfs_hook(project_with_injection, subdirectory, large_file, enable_precommit_hook):
"""Test committing large files to Git."""
filenames = {"large-file", "large file with whitespace", "large*file?with wildcards"}

Expand Down Expand Up @@ -1819,7 +1819,7 @@ def test_lfs_hook(project_with_injection, subdirectory, large_file):


@pytest.mark.parametrize("use_env_var", [False, True])
def test_lfs_hook_autocommit(runner, project, subdirectory, large_file, use_env_var):
def test_lfs_hook_autocommit(runner, project, subdirectory, large_file, use_env_var, enable_precommit_hook):
"""Test committing large files to Git gets automatically added to lfs."""
if use_env_var:
os.environ["AUTOCOMMIT_LFS"] = "true"
Expand Down Expand Up @@ -1851,7 +1851,7 @@ def test_lfs_hook_autocommit(runner, project, subdirectory, large_file, use_env_
assert filenames == tracked_lfs_files


def test_lfs_hook_can_be_avoided(runner, project, subdirectory, large_file):
def test_lfs_hook_can_be_avoided(runner, project, subdirectory, large_file, enable_precommit_hook):
"""Test committing large files to Git."""
result = runner.invoke(
cli, ["--no-external-storage", "dataset", "add", "--copy", "-c", "my-dataset", str(large_file)]
Expand All @@ -1860,7 +1860,7 @@ def test_lfs_hook_can_be_avoided(runner, project, subdirectory, large_file):
assert "OK" in result.output


def test_datadir_hook(runner, project, subdirectory):
def test_datadir_hook(runner, project, subdirectory, enable_precommit_hook):
"""Test pre-commit hook fir checking datadir files."""
set_value(section="renku", key="check_datadir_files", value="true", global_only=True)

Expand Down
67 changes: 17 additions & 50 deletions tests/cli/test_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,26 +15,16 @@
# limitations under the License.
"""Test ``graph`` command."""

import os

import pytest

from renku.core.constant import DEFAULT_DATA_DIR as DATA_DIR
from renku.domain_model.dataset import Url
from renku.ui.cli import cli
from tests.utils import format_result_exception, modified_environ, with_dataset


@pytest.mark.parametrize("revision", ["", "HEAD", "HEAD^", "HEAD^..HEAD"])
def test_graph_export_validation(runner, project, directory_tree, run, revision):
def test_graph_export_validation(runner, project_with_dataset_and_workflows, revision):
"""Test graph validation when exporting."""
assert 0 == runner.invoke(cli, ["dataset", "add", "--copy", "-c", "my-data", str(directory_tree)]).exit_code

file1 = project.path / DATA_DIR / "my-data" / directory_tree.name / "file1"
file2 = project.path / DATA_DIR / "my-data" / directory_tree.name / "dir1" / "file2"
assert 0 == run(["run", "head", str(file1)], stdout="out1")
assert 0 == run(["run", "tail", str(file2)], stdout="out2")

result = runner.invoke(cli, ["graph", "export", "--format", "json-ld", "--strict", "--revision", revision])

assert 0 == result.exit_code, format_result_exception(result)
Expand All @@ -51,17 +41,17 @@ def test_graph_export_validation(runner, project, directory_tree, run, revision)
assert "https://renkulab.io" in result.output

# Make sure that nothing has changed during export which is a read-only operation
assert not project.repository.is_dirty()
assert not project_with_dataset_and_workflows.repository.is_dirty()


@pytest.mark.serial
@pytest.mark.shelled
def test_graph_export_strict_run(runner, project, run_shell):
"""Test graph export output of run command."""
# Run a shell command with pipe.
assert run_shell('renku run --name run1 echo "my input string" > my_output_file')[1] is None
assert run_shell("renku run --name run2 cp my_output_file my_output_file2")[1] is None
assert run_shell("renku workflow compose my-composite-plan run1 run2")[1] is None
def test_graph_export_strict(
runner,
project_with_dataset_and_workflows,
run_shell,
):
"""Test strict graph export output command."""

# Assert created output file.
result = runner.invoke(cli, ["graph", "export", "--full", "--strict", "--format=json-ld"])
Expand All @@ -71,55 +61,32 @@ def test_graph_export_strict_run(runner, project, run_shell):
assert "my_output_file2" in result.output
assert "my-composite-plan" in result.output

assert run_shell("renku workflow remove composite")[1] is None
assert run_shell("renku workflow remove run2")[1] is None

# Assert created output file.
result = runner.invoke(cli, ["graph", "export", "--strict", "--format=json-ld"])
assert 0 == result.exit_code, format_result_exception(result)


def test_graph_export_strict_dataset(tmpdir, runner, project, subdirectory):
"""Test output of graph export for dataset add."""
result = runner.invoke(cli, ["dataset", "create", "my-dataset"])
assert 0 == result.exit_code, format_result_exception(result)
paths = []
test_paths = []
for i in range(3):
new_file = tmpdir.join(f"file_{i}")
new_file.write(str(i))
paths.append(str(new_file))
test_paths.append(os.path.relpath(str(new_file), str(project.path)))

# add data
result = runner.invoke(cli, ["dataset", "add", "--copy", "my-dataset"] + paths)
result = runner.invoke(cli, ["graph", "export", "--strict", "--format=json-ld", "--revision", "HEAD^^^^"])
assert 0 == result.exit_code, format_result_exception(result)

result = runner.invoke(cli, ["graph", "export", "--strict", "--format=json-ld", "--revision", "HEAD"])
assert 0 == result.exit_code, format_result_exception(result)
assert all(p in result.output for p in test_paths), result.output

# check that only most recent dataset is exported
assert 1 == result.output.count("http://schema.org/Dataset")

# NOTE: Don't pass ``--full`` to check it's the default action.
assert run_shell("renku workflow remove composite")[1] is None
assert run_shell("renku workflow remove run2")[1] is None

# Assert created output file.
result = runner.invoke(cli, ["graph", "export", "--strict", "--format=json-ld"])
assert 0 == result.exit_code, format_result_exception(result)
assert all(p in result.output for p in test_paths), result.output

# check that all datasets are exported
assert all(p in result.output for p in ["my_output_file2", "my_output_file"]), result.output

assert 2 == result.output.count("http://schema.org/Dataset")

# remove and readd dataset
result = runner.invoke(cli, ["dataset", "rm", "my-dataset"])
result = runner.invoke(cli, ["dataset", "rm", "my-data"])
assert 0 == result.exit_code, format_result_exception(result)

result = runner.invoke(cli, ["dataset", "create", "my-dataset"])
result = runner.invoke(cli, ["dataset", "create", "my-data"])
assert 0 == result.exit_code, format_result_exception(result)

result = runner.invoke(cli, ["graph", "export", "--strict", "--format=json-ld"])
assert 0 == result.exit_code, format_result_exception(result)
assert all(p in result.output for p in test_paths), result.output

# check that all datasets are exported
assert 4 == result.output.count("http://schema.org/Dataset")
Expand Down
Loading