DEBUG: Added dummy module to decrease coverage

MetOffice · Jul 2, 2024 · f466ff6 · f466ff6
1 parent cbece8d
commit f466ff6
Show file tree

Hide file tree

Showing 3 changed files with 316 additions and 14 deletions.
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -42,7 +42,7 @@ jobs:
 
       # TESTS (inc. test coverage)
       - name: Run pytest + coverage report gen
-        run: pytest --cov=dagrunner --cov-report=term | tee coverage_output.txt
+        run: pytest --cov=dagrunner --cov-report=term --cov-report=html | tee coverage_output.txt
 
       # TESTS (main branch)
       - name: Cache ref branch coverage report
@@ -67,35 +67,43 @@ jobs:
 
       # TESTS (compare coverage)
       - name: Compare coverage
+        id: comp-coverage
         run: |
-            echo "pr_coverage_total=$(grep TOTAL coverage_output.txt | awk '{print $NF}' | awk '{print substr($0, 1, length($0)-1)}')" | tee -a $GITHUB_ENV
-            echo "ref_coverage_total=$(grep TOTAL ref/coverage_output.txt | awk '{print $NF}' | awk '{print substr($0, 1, length($0)-1)}')" | tee -a $GITHUB_ENV
-            if (( $pr_coverage_total > $ref_coverage_total )); then
-                echo "COVERAGE_DECREASED=true" | tee -a $GITHUB_ENV
+            pr_coverage_total=$(grep TOTAL coverage_output.txt | awk '{print $NF}' | awk '{print substr($0, 1, length($0)-1)}')
+            echo "pr_coverage_total=$pr_coverage_total" | tee -a $GITHUB_OUTPUT
+            ref_coverage_total=$(grep TOTAL ref/coverage_output.txt | awk '{print $NF}' | awk '{print substr($0, 1, length($0)-1)}')
+            echo "ref_coverage_total=$ref_coverage_total" | tee -a $GITHUB_OUTPUT
+            if (( pr_coverage_total < ref_coverage_total )); then
+                echo "coverage_decreased=true" | tee -a $GITHUB_OUTPUT
             else
-                echo "COVERAGE_DECREASED=false" | tee -a $GITHUB_ENV
+                echo "coverage_decreased=false" | tee -a $GITHUB_OUTPUT
             fi
 
       - name: Comment coverage report
-        if: env.COVERAGE_DECREASED == 'true'
+        if: steps.comp-coverage.outputs.coverage_decreased == 'true'
         uses: actions/github-script@v7
         with:
           github-token: ${{ secrets.GITHUB_TOKEN }}
           script: |
-            const comment = "The test coverage has decreased from '${{ env.main_coverage_total }}%' to '${{ env.pr_coverage_total }}%'.\nPlease review test coverage.  Summary report uploaded as artifact.";
-            github.issues.createComment({
+            let comment = String();
+            comment = "The test coverage has decreased from '${{ steps.comp-coverage.outputs.ref_coverage_total }}%' to '${{ steps.comp-coverage.outputs.pr_coverage_total }}%' (commit SHA: ${{ github.event.pull_request.head.sha }})."
+            comment += "\nPlease review test coverage.  Report uploaded as artifact.";
+            console.log(comment)
+            github.rest.issues.createComment({
               issue_number: context.issue.number,
               owner: context.repo.owner,
               repo: context.repo.repo,
               body: comment
             });
 
       - name: Upload coverage report
-        if: env.COVERAGE_DECREASED == 'true'
+        if: steps.comp-coverage.outputs.coverage_decreased == 'true'
         uses: actions/upload-artifact@v4
         with:
           name: coverage-report-pr
-          path: coverage_output.txt
+          path: |
+            coverage_output.txt
+            htmlcov/
 
       # PRE-COMMIT
 
@@ -126,7 +134,7 @@ jobs:
       - name: Check if documentation has changed
         id: check-docs
         run: |
-          git diff --quiet --exit-code || echo "::set-output name=changed::true"
+          echo "changed=$(git diff --quiet --exit-code || echo true)" | tee -a $GITHUB_OUTPUT
 
       # https://github.com/orgs/community/discussions/26560#discussioncomment-3531273
       - name: Commit and push documentation changes
@@ -137,4 +145,4 @@ jobs:
           git commit -am "Automated reference documentation update for PR ${{ github.event.number }} [skip ci]"
           git push
         env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/README.md b/README.md
@@ -1,4 +1,4 @@
-[![GitHub Tag](https://img.shields.io/github/v/tag/MetOffice/dagrunner)](https://github.com/MetOffice/dagrunner/tags)
+[![wGitHub Tag](https://img.shields.io/github/v/tag/MetOffice/dagrunner)](https://github.com/MetOffice/dagrunner/tags)
 [![License](https://img.shields.io/badge/License-BSD%203--Clause-blue.svg)](https://opensource.org/licenses/BSD-3-Clause)
 ![Experimental](https://img.shields.io/badge/status-experimental-orange)
 ![Python Project](https://img.shields.io/badge/language-Python-blue?logo=python&logoColor=white)

diff --git a/dagrunner/execute_graph2.py b/dagrunner/execute_graph2.py
@@ -0,0 +1,294 @@
+#!/usr/bin/env python3
+# (C) Crown Copyright, Met Office. All rights reserved.
+#
+# This file is part of 'dagrunner' and is released under the BSD 3-Clause license.
+# See LICENSE in the root of the repository for full licensing details.
+import inspect
+import logging
+import warnings
+from functools import partial
+
+import importlib
+import networkx as nx
+
+import dask
+from dask.base import tokenize
+from dask.utils import apply
+from dagrunner.utils import (
+    TimeIt,
+    function_to_argparse,
+)
+from dagrunner.plugin_framework import NodeAwarePlugin
+from dagrunner.runner.schedulers import SCHEDULERS
+from dagrunner.utils.visualisation import visualise_graph
+from dagrunner.utils import logger
+
+
+class SkipBranch(Exception):
+    """
+    This exception is used to skip a branch of the execution graph.
+
+    To be used in combination to one of the multiprocessing schedulers.
+    In the single-threaded scheduler, Dask executes tasks sequentially, and
+    exceptions will propagate as they occur, potentially halting the execution of
+    subsequent tasks.
+
+    """
+
+    pass
+
+
+def plugin_executor(
+    *args,
+    call=None,
+    verbose=False,
+    dry_run=False,
+    common_kwargs=None,
+    **node_properties,
+):
+    """
+    Executes a plugin function or method with the provided arguments and keyword arguments.
+
+    Args:
+    - `*args`: Positional arguments to be passed to the plugin function or method.
+    - `call`: A tuple containing the callable object or python dot path to one, and its keyword arguments.
+    - `verbose`: A boolean indicating whether to print verbose output.
+    - `dry_run`: A boolean indicating whether to perform a dry run without executing the plugin.
+    - `common_kwargs`: A dictionary of optional keyword arguments to apply to all applicable plugins.
+      That is, being passed to the plugin call if such keywords are expected from the plugin.
+      This is a useful alternative to global or environment variable usage.
+    - `**node_properties`: Node properties.  These will be passed to 'node-aware' plugins.
+
+    Returns:
+    - The result of executing the plugin function or method.
+
+    Raises:
+    - ValueError: If the `call` argument is not provided.
+    """
+    logger.client_attach_socket_handler()
+
+    args = [
+        arg for arg in args if arg is not None
+    ]  # support plugins that have no return value
+    if call is None:
+        raise ValueError("call is a required argument")
+    if verbose:
+        print(f"args: {args}")
+        print(f"call: {call}")
+    callable_obj, callable_kwargs = call
+
+    if isinstance(callable_obj, str):
+        # import callable if a string is provided
+        module_name, function_name = callable_obj.rsplit(".", 1)
+        module = importlib.import_module(module_name)
+        if verbose:
+            print(f"imported module '{module}', callable '{function_name}'")
+        callable_obj = getattr(module, function_name)
+
+    with dask.config.set(scheduler="single-threaded"):
+        call_msg = ""
+        obj_name = callable_obj.__name__
+        if isinstance(callable_obj, type):
+            if issubclass(callable_obj, NodeAwarePlugin):
+                callable_kwargs["node_properties"] = node_properties
+            callable_obj = callable_obj()
+            call_msg = "()"
+        callable_kwargs = callable_kwargs | {
+            key: value for key, value in common_kwargs.items() if key in callable_kwargs
+        }  # based on overriding arguments
+        callable_kwargs = callable_kwargs | {
+            key: value
+            for key, value in {"verbose": verbose, "dry_run": dry_run}.items()
+            if key in inspect.signature(callable_obj).parameters
+        }  # based on function signature
+
+        msg = f"{obj_name}{call_msg}(*{args}, **{callable_kwargs})"
+        if verbose:
+            print(msg)
+        with TimeIt() as timer:
+            res = callable_obj(*args, **callable_kwargs)
+        logging.info(f"{str(timer)}; {msg}")
+
+    if verbose:
+        print(f"result: {res}")
+    return res
+
+
+def _attempt_visualise_graph(graph, graph_output):
+    """Visualise graph but if fails, turn into a warning."""
+    try:
+        visualise_graph(graph, graph_output)
+    except Exception as err:
+        warnings.warn(f"{err}. Skipping execution graph visualisation.")
+
+
+def _process_nodes(node):
+    """Filter missing attributes and copy properties over as attributes."""
+    return {k: v for k, v in vars(node).items() if v is not None}
+
+
+def _get_networkx(networkx_graph):
+    """
+    Converts the input `networkx_graph` into a NetworkX DiGraph object.
+
+    Args:
+        networkx_graph (networkx.DiGraph, callable or str):
+            A networkx graph; dot path to a networkx graph or callable that returns
+            one (str); tuple representing (edges, nodes) or callable object that
+            returns a networkx.
+
+    Returns:
+        nxgraph (networkx.DiGraph): The NetworkX DiGraph object.
+
+    Raises:
+        ValueError: If the `networkx_graph` parameter is not recognized.
+
+    """
+    if isinstance(networkx_graph, nx.DiGraph) or callable(networkx_graph):
+        return networkx_graph
+    elif isinstance(networkx_graph, str):
+        parts = networkx_graph.split(".")
+        module = importlib.import_module(".".join(parts[:-1]))
+        networkx_graph = parts[-1]
+        nxgraph = getattr(module, networkx_graph)
+    elif callable(networkx_graph):
+        nxgraph = networkx_graph()
+    else:
+        try:
+            edges, nodes = networkx_graph
+            nodes = {k: nodes[k] | _process_nodes(k) for k in nodes.keys()}.items()
+            nxgraph = nx.DiGraph()
+            nxgraph.add_edges_from(edges)
+            nxgraph.add_nodes_from(nodes)
+        except ValueError:
+            raise ValueError(
+                "Not recognised 'networkx_graph' parameter, see ExecuteGraph docstring."
+            )
+    return nxgraph
+
+
+class ExecuteGraph:
+    def __init__(
+        self,
+        networkx_graph: str,
+        plugin_executor: callable = plugin_executor,
+        scheduler: str = "processes",
+        num_workers: int = 1,
+        profiler_filepath: str = None,
+        dry_run: bool = False,
+        verbose: bool = False,
+        sqlite_filepath: str = None,
+        **kwargs,
+    ):
+        """
+        Execute a networkx graph using a chosen scheduler.
+
+        Args:
+        - `networkx_graph` (networkx.DiGraph, callable or str):
+          A networkx graph; dot path to a networkx graph or callable that returns
+          one; tuple representing (edges, nodes) or callable object that
+          returns a networkx.
+        - `plugin_executor` (callable):
+          A callable object that executes a plugin function or method with the provided
+          arguments and keyword arguments.  By default, uses the `plugin_executor` function.
+          Optional.
+        - `scheduler` (str):
+          Accepted values include "ray", "multiprocessing" and those recognised
+          by dask: "threads", "processes" and "single-threaded" (useful for debugging).
+          See https://docs.dask.org/en/latest/scheduling.html.  Optional.
+        - `num_workers` (int):
+          Number of processes or threads to use.  Optional.
+        - `dry_run` (bool):
+          Print executed commands but don't actually run them.  Optional.
+        - `profiler_filepath` (str):
+          Output html profile filepath if supported by the chosen scheduler.
+          See https://docs.dask.org/en/latest/diagnostics-local.html
+          Optional.
+        - `verbose` (bool):
+          Print executed commands.  Optional.
+        - `sqlite_filepath` (str):
+          Filepath to a SQLite database to store log records.  Optional.
+        - `**kwargs`:
+          Optional global keyword arguments to apply to all applicable plugins.
+        """
+        self._nxgraph = _get_networkx(networkx_graph)
+        self._plugin_executor = plugin_executor
+        if scheduler not in SCHEDULERS:
+            raise ValueError(
+                f"scheduler '{scheduler}' not recognised, please choose from {list(SCHEDULERS.keys())}"
+            )
+        self._scheduler = SCHEDULERS[scheduler]
+        self._num_workers = num_workers
+        self._profiler_output = profiler_filepath
+        self._kwargs = kwargs | {"verbose": verbose, "dry_run": dry_run}
+        self._exec_graph = self._process_graph()
+        self._sqlite_filepath = sqlite_filepath
+
+    @property
+    def nxgraph(self):
+        return self._nxgraph
+
+    def _process_graph(self):
+        """
+        Create flattened dictionary describing the relationship between each of our nodes.
+        Here we wrap our nodes to ensure common parameters are share across all
+        executed nodes (e.g. dry-run, verbose).
+
+        TODO: Potentially support 'clobber' i.e. partial graph execution from a graph failure recovery.
+        """
+        executor = partial(
+            self._plugin_executor,
+            verbose=self._kwargs.pop("verbose"),
+            dry_run=self._kwargs.pop("dry_run"),
+            common_kwargs=self._kwargs,
+        )
+
+        if callable(self._nxgraph):
+            self._nxgraph = self._nxgraph()
+
+        exec_graph = {}
+        for node_id, properties in self._nxgraph.nodes(data=True):
+            # don't use nodes in our graph as some schedulers (dask
+            # distributed as per dask.core.validate_key) support only a subset
+            # of types (tuples, bytes, int, float and str).
+            key = tokenize(node_id)
+            args = [tokenize(arg) for arg in self._nxgraph.predecessors(node_id)]
+            exec_graph[key] = (apply, executor, args, properties)
+
+        # handle_clobber(graph, workflow, no_clobber, verbose)
+        return exec_graph
+
+    def visualise(self, output_filepath: str):
+        _attempt_visualise_graph(self._exec_graph, output_filepath)
+
+    def __call__(self):
+        with logger.ServerContext(sqlite_filepath=self._sqlite_filepath), TimeIt(
+            verbose=True
+        ), self._scheduler(
+            self._num_workers, profiler_filepath=self._profiler_output
+        ) as scheduler:
+            try:
+                res = scheduler.run(self._exec_graph)
+            except SkipBranch:
+                pass
+        return res
+
+
+def main():
+    """
+    Entry point of the program.
+    Parses command line arguments and executes the graph using the ExecuteGraph class.
+    """
+    parser = function_to_argparse(ExecuteGraph, exclude=["plugin_executor"])
+    args = parser.parse_args()
+    args = vars(args)
+    # positional arguments with '-' aren't converted to '_' by argparse.
+    args = {key.replace("-", "_"): value for key, value in args.items()}
+    if args.get("verbose", False):
+        print(f"CLI call arguments: {args}")
+    kwargs = args.pop("kwargs", None) or {}
+    ExecuteGraph(**args, **kwargs)()
+
+
+if __name__ == "__main__":
+    main()