Skip to content

Commit

Permalink
DEBUG: Added dummy module to decrease coverage
Browse files Browse the repository at this point in the history
  • Loading branch information
cpelley committed Jul 2, 2024
1 parent cbece8d commit f466ff6
Show file tree
Hide file tree
Showing 3 changed files with 316 additions and 14 deletions.
34 changes: 21 additions & 13 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ jobs:
# TESTS (inc. test coverage)
- name: Run pytest + coverage report gen
run: pytest --cov=dagrunner --cov-report=term | tee coverage_output.txt
run: pytest --cov=dagrunner --cov-report=term --cov-report=html | tee coverage_output.txt

# TESTS (main branch)
- name: Cache ref branch coverage report
Expand All @@ -67,35 +67,43 @@ jobs:
# TESTS (compare coverage)
- name: Compare coverage
id: comp-coverage
run: |
echo "pr_coverage_total=$(grep TOTAL coverage_output.txt | awk '{print $NF}' | awk '{print substr($0, 1, length($0)-1)}')" | tee -a $GITHUB_ENV
echo "ref_coverage_total=$(grep TOTAL ref/coverage_output.txt | awk '{print $NF}' | awk '{print substr($0, 1, length($0)-1)}')" | tee -a $GITHUB_ENV
if (( $pr_coverage_total > $ref_coverage_total )); then
echo "COVERAGE_DECREASED=true" | tee -a $GITHUB_ENV
pr_coverage_total=$(grep TOTAL coverage_output.txt | awk '{print $NF}' | awk '{print substr($0, 1, length($0)-1)}')
echo "pr_coverage_total=$pr_coverage_total" | tee -a $GITHUB_OUTPUT
ref_coverage_total=$(grep TOTAL ref/coverage_output.txt | awk '{print $NF}' | awk '{print substr($0, 1, length($0)-1)}')
echo "ref_coverage_total=$ref_coverage_total" | tee -a $GITHUB_OUTPUT
if (( pr_coverage_total < ref_coverage_total )); then
echo "coverage_decreased=true" | tee -a $GITHUB_OUTPUT
else
echo "COVERAGE_DECREASED=false" | tee -a $GITHUB_ENV
echo "coverage_decreased=false" | tee -a $GITHUB_OUTPUT
fi
- name: Comment coverage report
if: env.COVERAGE_DECREASED == 'true'
if: steps.comp-coverage.outputs.coverage_decreased == 'true'
uses: actions/github-script@v7
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
script: |
const comment = "The test coverage has decreased from '${{ env.main_coverage_total }}%' to '${{ env.pr_coverage_total }}%'.\nPlease review test coverage. Summary report uploaded as artifact.";
github.issues.createComment({
let comment = String();
comment = "The test coverage has decreased from '${{ steps.comp-coverage.outputs.ref_coverage_total }}%' to '${{ steps.comp-coverage.outputs.pr_coverage_total }}%' (commit SHA: ${{ github.event.pull_request.head.sha }})."
comment += "\nPlease review test coverage. Report uploaded as artifact.";
console.log(comment)
github.rest.issues.createComment({
issue_number: context.issue.number,
owner: context.repo.owner,
repo: context.repo.repo,
body: comment
});
- name: Upload coverage report
if: env.COVERAGE_DECREASED == 'true'
if: steps.comp-coverage.outputs.coverage_decreased == 'true'
uses: actions/upload-artifact@v4
with:
name: coverage-report-pr
path: coverage_output.txt
path: |
coverage_output.txt
htmlcov/
# PRE-COMMIT

Expand Down Expand Up @@ -126,7 +134,7 @@ jobs:
- name: Check if documentation has changed
id: check-docs
run: |
git diff --quiet --exit-code || echo "::set-output name=changed::true"
echo "changed=$(git diff --quiet --exit-code || echo true)" | tee -a $GITHUB_OUTPUT
# https://github.com/orgs/community/discussions/26560#discussioncomment-3531273
- name: Commit and push documentation changes
Expand All @@ -137,4 +145,4 @@ jobs:
git commit -am "Automated reference documentation update for PR ${{ github.event.number }} [skip ci]"
git push
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
[![GitHub Tag](https://img.shields.io/github/v/tag/MetOffice/dagrunner)](https://github.com/MetOffice/dagrunner/tags)
[![wGitHub Tag](https://img.shields.io/github/v/tag/MetOffice/dagrunner)](https://github.com/MetOffice/dagrunner/tags)
[![License](https://img.shields.io/badge/License-BSD%203--Clause-blue.svg)](https://opensource.org/licenses/BSD-3-Clause)
![Experimental](https://img.shields.io/badge/status-experimental-orange)
![Python Project](https://img.shields.io/badge/language-Python-blue?logo=python&logoColor=white)
Expand Down
294 changes: 294 additions & 0 deletions dagrunner/execute_graph2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,294 @@
#!/usr/bin/env python3
# (C) Crown Copyright, Met Office. All rights reserved.
#
# This file is part of 'dagrunner' and is released under the BSD 3-Clause license.
# See LICENSE in the root of the repository for full licensing details.
import inspect
import logging
import warnings
from functools import partial

import importlib
import networkx as nx

import dask
from dask.base import tokenize
from dask.utils import apply
from dagrunner.utils import (
TimeIt,
function_to_argparse,
)
from dagrunner.plugin_framework import NodeAwarePlugin
from dagrunner.runner.schedulers import SCHEDULERS
from dagrunner.utils.visualisation import visualise_graph
from dagrunner.utils import logger


class SkipBranch(Exception):
"""
This exception is used to skip a branch of the execution graph.
To be used in combination to one of the multiprocessing schedulers.
In the single-threaded scheduler, Dask executes tasks sequentially, and
exceptions will propagate as they occur, potentially halting the execution of
subsequent tasks.
"""

pass


def plugin_executor(
*args,
call=None,
verbose=False,
dry_run=False,
common_kwargs=None,
**node_properties,
):
"""
Executes a plugin function or method with the provided arguments and keyword arguments.
Args:
- `*args`: Positional arguments to be passed to the plugin function or method.
- `call`: A tuple containing the callable object or python dot path to one, and its keyword arguments.
- `verbose`: A boolean indicating whether to print verbose output.
- `dry_run`: A boolean indicating whether to perform a dry run without executing the plugin.
- `common_kwargs`: A dictionary of optional keyword arguments to apply to all applicable plugins.
That is, being passed to the plugin call if such keywords are expected from the plugin.
This is a useful alternative to global or environment variable usage.
- `**node_properties`: Node properties. These will be passed to 'node-aware' plugins.
Returns:
- The result of executing the plugin function or method.
Raises:
- ValueError: If the `call` argument is not provided.
"""
logger.client_attach_socket_handler()

args = [
arg for arg in args if arg is not None
] # support plugins that have no return value
if call is None:
raise ValueError("call is a required argument")
if verbose:
print(f"args: {args}")
print(f"call: {call}")
callable_obj, callable_kwargs = call

if isinstance(callable_obj, str):
# import callable if a string is provided
module_name, function_name = callable_obj.rsplit(".", 1)
module = importlib.import_module(module_name)
if verbose:
print(f"imported module '{module}', callable '{function_name}'")
callable_obj = getattr(module, function_name)

with dask.config.set(scheduler="single-threaded"):
call_msg = ""
obj_name = callable_obj.__name__
if isinstance(callable_obj, type):
if issubclass(callable_obj, NodeAwarePlugin):
callable_kwargs["node_properties"] = node_properties
callable_obj = callable_obj()
call_msg = "()"
callable_kwargs = callable_kwargs | {
key: value for key, value in common_kwargs.items() if key in callable_kwargs
} # based on overriding arguments
callable_kwargs = callable_kwargs | {
key: value
for key, value in {"verbose": verbose, "dry_run": dry_run}.items()
if key in inspect.signature(callable_obj).parameters
} # based on function signature

msg = f"{obj_name}{call_msg}(*{args}, **{callable_kwargs})"
if verbose:
print(msg)
with TimeIt() as timer:
res = callable_obj(*args, **callable_kwargs)
logging.info(f"{str(timer)}; {msg}")

if verbose:
print(f"result: {res}")
return res


def _attempt_visualise_graph(graph, graph_output):
"""Visualise graph but if fails, turn into a warning."""
try:
visualise_graph(graph, graph_output)
except Exception as err:
warnings.warn(f"{err}. Skipping execution graph visualisation.")


def _process_nodes(node):
"""Filter missing attributes and copy properties over as attributes."""
return {k: v for k, v in vars(node).items() if v is not None}


def _get_networkx(networkx_graph):
"""
Converts the input `networkx_graph` into a NetworkX DiGraph object.
Args:
networkx_graph (networkx.DiGraph, callable or str):
A networkx graph; dot path to a networkx graph or callable that returns
one (str); tuple representing (edges, nodes) or callable object that
returns a networkx.
Returns:
nxgraph (networkx.DiGraph): The NetworkX DiGraph object.
Raises:
ValueError: If the `networkx_graph` parameter is not recognized.
"""
if isinstance(networkx_graph, nx.DiGraph) or callable(networkx_graph):
return networkx_graph
elif isinstance(networkx_graph, str):
parts = networkx_graph.split(".")
module = importlib.import_module(".".join(parts[:-1]))
networkx_graph = parts[-1]
nxgraph = getattr(module, networkx_graph)
elif callable(networkx_graph):
nxgraph = networkx_graph()
else:
try:
edges, nodes = networkx_graph
nodes = {k: nodes[k] | _process_nodes(k) for k in nodes.keys()}.items()
nxgraph = nx.DiGraph()
nxgraph.add_edges_from(edges)
nxgraph.add_nodes_from(nodes)
except ValueError:
raise ValueError(
"Not recognised 'networkx_graph' parameter, see ExecuteGraph docstring."
)
return nxgraph


class ExecuteGraph:
def __init__(
self,
networkx_graph: str,
plugin_executor: callable = plugin_executor,
scheduler: str = "processes",
num_workers: int = 1,
profiler_filepath: str = None,
dry_run: bool = False,
verbose: bool = False,
sqlite_filepath: str = None,
**kwargs,
):
"""
Execute a networkx graph using a chosen scheduler.
Args:
- `networkx_graph` (networkx.DiGraph, callable or str):
A networkx graph; dot path to a networkx graph or callable that returns
one; tuple representing (edges, nodes) or callable object that
returns a networkx.
- `plugin_executor` (callable):
A callable object that executes a plugin function or method with the provided
arguments and keyword arguments. By default, uses the `plugin_executor` function.
Optional.
- `scheduler` (str):
Accepted values include "ray", "multiprocessing" and those recognised
by dask: "threads", "processes" and "single-threaded" (useful for debugging).
See https://docs.dask.org/en/latest/scheduling.html. Optional.
- `num_workers` (int):
Number of processes or threads to use. Optional.
- `dry_run` (bool):
Print executed commands but don't actually run them. Optional.
- `profiler_filepath` (str):
Output html profile filepath if supported by the chosen scheduler.
See https://docs.dask.org/en/latest/diagnostics-local.html
Optional.
- `verbose` (bool):
Print executed commands. Optional.
- `sqlite_filepath` (str):
Filepath to a SQLite database to store log records. Optional.
- `**kwargs`:
Optional global keyword arguments to apply to all applicable plugins.
"""
self._nxgraph = _get_networkx(networkx_graph)
self._plugin_executor = plugin_executor
if scheduler not in SCHEDULERS:
raise ValueError(
f"scheduler '{scheduler}' not recognised, please choose from {list(SCHEDULERS.keys())}"
)
self._scheduler = SCHEDULERS[scheduler]
self._num_workers = num_workers
self._profiler_output = profiler_filepath
self._kwargs = kwargs | {"verbose": verbose, "dry_run": dry_run}
self._exec_graph = self._process_graph()
self._sqlite_filepath = sqlite_filepath

@property
def nxgraph(self):
return self._nxgraph

def _process_graph(self):
"""
Create flattened dictionary describing the relationship between each of our nodes.
Here we wrap our nodes to ensure common parameters are share across all
executed nodes (e.g. dry-run, verbose).
TODO: Potentially support 'clobber' i.e. partial graph execution from a graph failure recovery.
"""
executor = partial(
self._plugin_executor,
verbose=self._kwargs.pop("verbose"),
dry_run=self._kwargs.pop("dry_run"),
common_kwargs=self._kwargs,
)

if callable(self._nxgraph):
self._nxgraph = self._nxgraph()

exec_graph = {}
for node_id, properties in self._nxgraph.nodes(data=True):
# don't use nodes in our graph as some schedulers (dask
# distributed as per dask.core.validate_key) support only a subset
# of types (tuples, bytes, int, float and str).
key = tokenize(node_id)
args = [tokenize(arg) for arg in self._nxgraph.predecessors(node_id)]
exec_graph[key] = (apply, executor, args, properties)

# handle_clobber(graph, workflow, no_clobber, verbose)
return exec_graph

def visualise(self, output_filepath: str):
_attempt_visualise_graph(self._exec_graph, output_filepath)

def __call__(self):
with logger.ServerContext(sqlite_filepath=self._sqlite_filepath), TimeIt(
verbose=True
), self._scheduler(
self._num_workers, profiler_filepath=self._profiler_output
) as scheduler:
try:
res = scheduler.run(self._exec_graph)
except SkipBranch:
pass
return res


def main():
"""
Entry point of the program.
Parses command line arguments and executes the graph using the ExecuteGraph class.
"""
parser = function_to_argparse(ExecuteGraph, exclude=["plugin_executor"])
args = parser.parse_args()
args = vars(args)
# positional arguments with '-' aren't converted to '_' by argparse.
args = {key.replace("-", "_"): value for key, value in args.items()}
if args.get("verbose", False):
print(f"CLI call arguments: {args}")
kwargs = args.pop("kwargs", None) or {}
ExecuteGraph(**args, **kwargs)()


if __name__ == "__main__":
main()

0 comments on commit f466ff6

Please sign in to comment.