SwissDataScienceCenter · Panaetius · May 25, 2023 · May 26, 2023 · Jun 7, 2023 · Jun 7, 2023
@@ -14,7 +14,8 @@ on:
     branches:
       - "**"
       - "!master"
-
+env:
+  RENKU_TEST_RECREATE_CACHE: "${{ (endsWith(github.ref, 'master') || endsWith(github.ref, 'develop') || startsWith(github.ref, 'refs/tags/') || startsWith(github.ref, 'refs/heads/release/' ) ) && '1' || '0' }}"
 jobs:
   set-matrix:
     runs-on: ubuntu-latest

@@ -78,10 +78,12 @@ def pytest_configure(config):
     os.environ["RENKU_DISABLE_VERSION_CHECK"] = "1"
     # NOTE: Set an env var during during tests to mark that Renku is running in a test session.
     os.environ["RENKU_RUNNING_UNDER_TEST"] = "1"
+    os.environ["RENKU_SKIP_HOOK_CHECKS"] = "1"
 
 
 def pytest_unconfigure(config):
     """Hook that is called by pytest after all tests are executed."""
     os.environ.pop("RENKU_SKIP_MIN_VERSION_CHECK", None)
     os.environ.pop("RENKU_DISABLE_VERSION_CHECK", None)
     os.environ.pop("RENKU_RUNNING_UNDER_TEST", None)
+    os.environ.pop("RENKU_SKIP_HOOK_CHECKS", None)
@@ -19,6 +19,10 @@
 # RENKU HOOK. DO NOT REMOVE OR MODIFY.
 ######################################
 
+if [ "$RENKU_SKIP_HOOK_CHECKS" == "1" ]; then
+  exit 0
+fi
+
 # Find all modified or added files, and do nothing if there aren't any.
 export RENKU_DISABLE_VERSION_CHECK=true
 

@@ -24,7 +24,9 @@
 
 from renku.core.config import set_value
 from renku.infrastructure.repository import Repository
+from renku.ui.cli import cli
 from tests.fixtures.repository import RenkuProject
+from tests.utils import format_result_exception
 
 
 @pytest.fixture()
@@ -131,3 +133,99 @@ def workflow_file_project(project, request) -> Generator[RenkuWorkflowFileProjec
     (project.path / "data" / "collection" / "colors.csv").write_text("\n".join(f"color-{i}" for i in range(99)))
 
     yield workflow_file_project
+
+
+@pytest.fixture
+def project_with_merge_conflict(runner, project, directory_tree, run_shell, cache_test_project):
+    """Project with a merge conflict."""
+    if not cache_test_project.setup():
+        result = runner.invoke(cli, ["mergetool", "install"])
+
+        assert 0 == result.exit_code, format_result_exception(result)
+
+        # create a common dataset
+        result = runner.invoke(
+            cli, ["dataset", "add", "--copy", "--create", "shared-dataset", str(directory_tree)], catch_exceptions=False
+        )
+        assert 0 == result.exit_code, format_result_exception(result)
+
+        # Create a common workflow
+        output = run_shell('renku run --name "shared-workflow" echo "a unique string" > my_output_file')
+
+        assert b"" == output[0]
+        assert output[1] is None
+
+        # switch to a new branch
+        output = run_shell("git checkout -b remote-branch")
+
+        assert b"Switched to a new branch 'remote-branch'\n" == output[0]
+        assert output[1] is None
+
+        # edit the dataset
+        result = runner.invoke(cli, ["dataset", "edit", "-d", "remote description", "shared-dataset"])
+        assert 0 == result.exit_code, format_result_exception(result)
+
+        result = runner.invoke(
+            cli, ["dataset", "add", "--copy", "--create", "remote-dataset", str(directory_tree)], catch_exceptions=False
+        )
+        assert 0 == result.exit_code, format_result_exception(result)
+
+        # Create a new workflow
+        output = run_shell('renku run --name "remote-workflow" echo "a unique string" > remote_output_file')
+
+        assert b"" == output[0]
+        assert output[1] is None
+
+        # Create a downstream workflow
+        output = run_shell('renku run --name "remote-downstream-workflow" cp my_output_file my_remote_downstream')
+
+        assert b"" == output[0]
+        assert output[1] is None
+
+        # Create another downstream workflow
+        output = run_shell('renku run --name "remote-downstream-workflow2" cp remote_output_file my_remote_downstream2')
+
+        assert b"" == output[0]
+        assert output[1] is None
+
+        # Edit the project metadata
+        result = runner.invoke(cli, ["project", "edit", "-k", "remote"])
+
+        assert 0 == result.exit_code, format_result_exception(result)
+
+        # Switch back to master
+        output = run_shell("git checkout master")
+
+        assert b"Switched to branch 'master'\n" == output[0]
+        assert output[1] is None
+
+        # Add a new dataset
+        result = runner.invoke(
+            cli, ["dataset", "add", "--copy", "--create", "local-dataset", str(directory_tree)], catch_exceptions=False
+        )
+        assert 0 == result.exit_code, format_result_exception(result)
+
+        # Create a local workflow
+        output = run_shell('renku run --name "local-workflow" echo "a unique string" > local_output_file')
+
+        assert b"" == output[0]
+        assert output[1] is None
+
+        # Create a local downstream workflow
+        output = run_shell('renku run --name "local-downstream-workflow" cp my_output_file my_local_downstream')
+
+        assert b"" == output[0]
+        assert output[1] is None
+
+        # Create another local downstream workflow
+        output = run_shell('renku run --name "local-downstream-workflow2" cp local_output_file my_local_downstream2')
+
+        assert b"" == output[0]
+        assert output[1] is None
+
+        # Edit the project in master as well
+        result = runner.invoke(cli, ["project", "edit", "-k", "local"])
+
+        assert 0 == result.exit_code, format_result_exception(result)
+        cache_test_project.save()
+    yield project
@@ -17,21 +17,40 @@
 
 import pytest
 
+from renku.ui.cli import cli
+
 
 @pytest.fixture
-def workflow_graph(run_shell, project):
+def workflow_graph(run_shell, project, cache_test_project):
     """Setup a project with a workflow graph."""
+    cache_test_project.set_name("workflow_graph_fixture")
+    if not cache_test_project.setup():
+
+        def _run_workflow(name, command, extra_args=""):
+            output = run_shell(f"renku run --name {name} {extra_args} -- {command}")
+            # Assert not allocated stderr.
+            assert output[1] is None
+
+        _run_workflow("r1", "echo 'test' > A")
+        _run_workflow("r2", "tee B C < A")
+        _run_workflow("r3", "cp A Z")
+        _run_workflow("r4", "cp B X")
+        _run_workflow("r5", "cat C Z > Y")
+        _run_workflow("r6", "bash -c 'cat X Y | tee R S'", extra_args="--input X --input Y --output R --output S")
+        _run_workflow("r7", "echo 'other' > H")
+        _run_workflow("r8", "tee I J < H")
+        cache_test_project.save()
+
+
+@pytest.fixture
+def project_with_dataset_and_workflows(runner, run_shell, project, directory_tree, cache_test_project):
+    """Project with a dataset and some workflows."""
+    if not cache_test_project.setup():
+        assert 0 == runner.invoke(cli, ["dataset", "create", "my-data"]).exit_code
+        assert 0 == runner.invoke(cli, ["dataset", "add", "--copy", "my-data", str(directory_tree)]).exit_code
 
-    def _run_workflow(name, command, extra_args=""):
-        output = run_shell(f"renku run --name {name} {extra_args} -- {command}")
-        # Assert not allocated stderr.
-        assert output[1] is None
-
-    _run_workflow("r1", "echo 'test' > A")
-    _run_workflow("r2", "tee B C < A")
-    _run_workflow("r3", "cp A Z")
-    _run_workflow("r4", "cp B X")
-    _run_workflow("r5", "cat C Z > Y")
-    _run_workflow("r6", "bash -c 'cat X Y | tee R S'", extra_args="--input X --input Y --output R --output S")
-    _run_workflow("r7", "echo 'other' > H")
-    _run_workflow("r8", "tee I J < H")
+        assert run_shell('renku run --name run1 echo "my input string" > my_output_file')[1] is None
+        assert run_shell("renku run --name run2 cp my_output_file my_output_file2")[1] is None
+        assert run_shell("renku workflow compose my-composite-plan run1 run2")[1] is None
+        cache_test_project.save()
+    yield project
@@ -1790,7 +1790,7 @@ def test_pull_data_from_lfs(runner, project, tmpdir, subdirectory, no_lfs_size_l
     assert 0 == result.exit_code, format_result_exception(result)
 
 
-def test_lfs_hook(project_with_injection, subdirectory, large_file):
+def test_lfs_hook(project_with_injection, subdirectory, large_file, enable_precommit_hook):
     """Test committing large files to Git."""
     filenames = {"large-file", "large file with whitespace", "large*file?with wildcards"}
 
@@ -1819,7 +1819,7 @@ def test_lfs_hook(project_with_injection, subdirectory, large_file):
 
 
 @pytest.mark.parametrize("use_env_var", [False, True])
-def test_lfs_hook_autocommit(runner, project, subdirectory, large_file, use_env_var):
+def test_lfs_hook_autocommit(runner, project, subdirectory, large_file, use_env_var, enable_precommit_hook):
     """Test committing large files to Git gets automatically added to lfs."""
     if use_env_var:
         os.environ["AUTOCOMMIT_LFS"] = "true"
@@ -1851,7 +1851,7 @@ def test_lfs_hook_autocommit(runner, project, subdirectory, large_file, use_env_
     assert filenames == tracked_lfs_files
 
 
-def test_lfs_hook_can_be_avoided(runner, project, subdirectory, large_file):
+def test_lfs_hook_can_be_avoided(runner, project, subdirectory, large_file, enable_precommit_hook):
     """Test committing large files to Git."""
     result = runner.invoke(
         cli, ["--no-external-storage", "dataset", "add", "--copy", "-c", "my-dataset", str(large_file)]
@@ -1860,7 +1860,7 @@ def test_lfs_hook_can_be_avoided(runner, project, subdirectory, large_file):
     assert "OK" in result.output
 
 
-def test_datadir_hook(runner, project, subdirectory):
+def test_datadir_hook(runner, project, subdirectory, enable_precommit_hook):
     """Test pre-commit hook fir checking datadir files."""
     set_value(section="renku", key="check_datadir_files", value="true", global_only=True)
 

@@ -15,26 +15,16 @@
 # limitations under the License.
 """Test ``graph`` command."""
 
-import os
-
 import pytest
 
-from renku.core.constant import DEFAULT_DATA_DIR as DATA_DIR
 from renku.domain_model.dataset import Url
 from renku.ui.cli import cli
 from tests.utils import format_result_exception, modified_environ, with_dataset
 
 
 @pytest.mark.parametrize("revision", ["", "HEAD", "HEAD^", "HEAD^..HEAD"])
-def test_graph_export_validation(runner, project, directory_tree, run, revision):
+def test_graph_export_validation(runner, project_with_dataset_and_workflows, revision):
     """Test graph validation when exporting."""
-    assert 0 == runner.invoke(cli, ["dataset", "add", "--copy", "-c", "my-data", str(directory_tree)]).exit_code
-
-    file1 = project.path / DATA_DIR / "my-data" / directory_tree.name / "file1"
-    file2 = project.path / DATA_DIR / "my-data" / directory_tree.name / "dir1" / "file2"
-    assert 0 == run(["run", "head", str(file1)], stdout="out1")
-    assert 0 == run(["run", "tail", str(file2)], stdout="out2")
-
     result = runner.invoke(cli, ["graph", "export", "--format", "json-ld", "--strict", "--revision", revision])
 
     assert 0 == result.exit_code, format_result_exception(result)
@@ -51,17 +41,17 @@ def test_graph_export_validation(runner, project, directory_tree, run, revision)
         assert "https://renkulab.io" in result.output
 
     # Make sure that nothing has changed during export which is a read-only operation
-    assert not project.repository.is_dirty()
+    assert not project_with_dataset_and_workflows.repository.is_dirty()
 
 
 @pytest.mark.serial
 @pytest.mark.shelled
-def test_graph_export_strict_run(runner, project, run_shell):
-    """Test graph export output of run command."""
-    # Run a shell command with pipe.
-    assert run_shell('renku run --name run1 echo "my input string" > my_output_file')[1] is None
-    assert run_shell("renku run --name run2 cp my_output_file my_output_file2")[1] is None
-    assert run_shell("renku workflow compose my-composite-plan run1 run2")[1] is None
+def test_graph_export_strict(
+    runner,
+    project_with_dataset_and_workflows,
+    run_shell,
+):
+    """Test strict graph export output command."""
 
     # Assert created output file.
     result = runner.invoke(cli, ["graph", "export", "--full", "--strict", "--format=json-ld"])
@@ -71,55 +61,32 @@ def test_graph_export_strict_run(runner, project, run_shell):
     assert "my_output_file2" in result.output
     assert "my-composite-plan" in result.output
 
-    assert run_shell("renku workflow remove composite")[1] is None
-    assert run_shell("renku workflow remove run2")[1] is None
-
-    # Assert created output file.
-    result = runner.invoke(cli, ["graph", "export", "--strict", "--format=json-ld"])
-    assert 0 == result.exit_code, format_result_exception(result)
-
-
-def test_graph_export_strict_dataset(tmpdir, runner, project, subdirectory):
-    """Test output of graph export for dataset add."""
-    result = runner.invoke(cli, ["dataset", "create", "my-dataset"])
-    assert 0 == result.exit_code, format_result_exception(result)
-    paths = []
-    test_paths = []
-    for i in range(3):
-        new_file = tmpdir.join(f"file_{i}")
-        new_file.write(str(i))
-        paths.append(str(new_file))
-        test_paths.append(os.path.relpath(str(new_file), str(project.path)))
-
-    # add data
-    result = runner.invoke(cli, ["dataset", "add", "--copy", "my-dataset"] + paths)
+    result = runner.invoke(cli, ["graph", "export", "--strict", "--format=json-ld", "--revision", "HEAD^^^^"])
     assert 0 == result.exit_code, format_result_exception(result)
 
-    result = runner.invoke(cli, ["graph", "export", "--strict", "--format=json-ld", "--revision", "HEAD"])
-    assert 0 == result.exit_code, format_result_exception(result)
-    assert all(p in result.output for p in test_paths), result.output
-
     # check that only most recent dataset is exported
     assert 1 == result.output.count("http://schema.org/Dataset")
 
-    # NOTE: Don't pass ``--full`` to check it's the default action.
+    assert run_shell("renku workflow remove composite")[1] is None
+    assert run_shell("renku workflow remove run2")[1] is None
+
+    # Assert created output file.
     result = runner.invoke(cli, ["graph", "export", "--strict", "--format=json-ld"])
     assert 0 == result.exit_code, format_result_exception(result)
-    assert all(p in result.output for p in test_paths), result.output
 
-    # check that all datasets are exported
+    assert all(p in result.output for p in ["my_output_file2", "my_output_file"]), result.output
+
     assert 2 == result.output.count("http://schema.org/Dataset")
 
     # remove and readd dataset
-    result = runner.invoke(cli, ["dataset", "rm", "my-dataset"])
+    result = runner.invoke(cli, ["dataset", "rm", "my-data"])
     assert 0 == result.exit_code, format_result_exception(result)
 
-    result = runner.invoke(cli, ["dataset", "create", "my-dataset"])
+    result = runner.invoke(cli, ["dataset", "create", "my-data"])
     assert 0 == result.exit_code, format_result_exception(result)
 
     result = runner.invoke(cli, ["graph", "export", "--strict", "--format=json-ld"])
     assert 0 == result.exit_code, format_result_exception(result)
-    assert all(p in result.output for p in test_paths), result.output
 
     # check that all datasets are exported
     assert 4 == result.output.count("http://schema.org/Dataset")