Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New CLI functionalities: tree, report, job info #180

Merged
merged 6 commits into from
Oct 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions .github/workflows/testing.yml
Original file line number Diff line number Diff line change
Expand Up @@ -68,10 +68,16 @@ jobs:
pip install .[tests]

- name: Unit tests
run: pytest --cov=jobflow_remote --cov-report=xml --cov-config pyproject.toml --ignore tests/integration
run: COVERAGE_FILE=.coverage.1 pytest --cov=jobflow_remote --cov-report= --cov-config pyproject.toml --ignore tests/integration

- name: Integration tests
run: pytest --cov=jobflow_remote --cov-append --cov-report=xml --cov-config pyproject.toml tests/integration
run: COVERAGE_FILE=.coverage.2 pytest --cov=jobflow_remote --cov-report= --cov-config pyproject.toml tests/integration

# combining the reports with --cov-append did not seem to work
- name: Generate coverage report
run: |
coverage combine .coverage.1 .coverage.2
coverage xml

- name: Upload coverage reports to Codecov
uses: codecov/codecov-action@v4
Expand Down
1 change: 1 addition & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ repos:
- tokenize-rt==4.1.0
- types-paramiko
- pydantic~=2.0
- types-python-dateutil
- repo: https://github.com/codespell-project/codespell
rev: v2.3.0
hooks:
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ dependencies = [
"jobflow >= 0.1.14",
"psutil >= 5.9,< 7.0",
"pydantic ~= 2.4",
"python-dateutil>=2.8.2",
"qtoolkit ~= 0.1, >= 0.1.4",
"rich ~= 13.7",
"ruamel.yaml >= 0.17",
Expand Down
43 changes: 42 additions & 1 deletion src/jobflow_remote/cli/flow.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,18 @@
from datetime import datetime
from typing import Annotated, Optional

import typer
from dateutil.tz import tzlocal
from jobflow.utils.graph import draw_graph
from rich.prompt import Confirm
from rich.text import Text

from jobflow_remote import SETTINGS
from jobflow_remote.cli.formatting import format_flow_info, get_flow_info_table
from jobflow_remote.cli.formatting import (
format_flow_info,
get_flow_info_table,
get_flow_report_components,
)
from jobflow_remote.cli.jf import app
from jobflow_remote.cli.jfr_typer import JFRTyper
from jobflow_remote.cli.types import (
Expand All @@ -31,6 +37,7 @@
verbosity_opt,
)
from jobflow_remote.cli.utils import (
ReportInterval,
SortOption,
check_incompatible_opt,
exit_with_error_msg,
Expand All @@ -42,6 +49,7 @@
out_console,
)
from jobflow_remote.jobs.graph import get_graph, plot_dash
from jobflow_remote.jobs.report import FlowsReport

app_flow = JFRTyper(
name="flow", help="Commands for managing the flows", no_args_is_help=True
Expand Down Expand Up @@ -306,3 +314,36 @@ def graph(
plt.savefig(file_path)
else:
plt.show()


@app_flow.command()
def report(
interval: Annotated[
ReportInterval,
typer.Argument(
help="The interval of the trends for the report",
metavar="INTERVAL",
),
] = ReportInterval.DAYS,
num_intervals: Annotated[
Optional[int],
typer.Argument(
help="The number of intervals to consider. Default depends on the interval type",
metavar="NUM_INTERVALS",
),
] = None,
):
"""
Generate a report about the Flows in the database.
"""
jc = get_job_controller()

timezone = datetime.now(tzlocal()).tzname()

jobs_report = FlowsReport.generate_report(
job_controller=jc,
interval=interval.value,
num_intervals=num_intervals,
timezone=timezone,
)
out_console.print(*get_flow_report_components(jobs_report))
227 changes: 225 additions & 2 deletions src/jobflow_remote/cli/formatting.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,21 @@
from typing import TYPE_CHECKING

from monty.json import jsanitize
from rich.panel import Panel
from rich.scope import render_scope
from rich.table import Table
from rich.text import Text

from jobflow_remote.cli.utils import ReprStr, fmt_datetime
from jobflow_remote.jobs.state import JobState
from jobflow_remote.jobs.state import FlowState, JobState
from jobflow_remote.utils.data import convert_utc_time

if TYPE_CHECKING:
from rich.console import RenderableType

from jobflow_remote.config.base import ExecutionConfig, WorkerBase
from jobflow_remote.jobs.data import FlowInfo, JobDoc, JobInfo
from jobflow_remote.jobs.report import FlowsReport, JobsReport


def get_job_info_table(jobs_info: list[JobInfo], verbosity: int):
Expand Down Expand Up @@ -136,6 +140,33 @@ def get_flow_info_table(flows_info: list[FlowInfo], verbosity: int):
return table


JOB_INFO_ORDER = [
"db_id",
"uuid",
"index",
"name",
"state",
"error",
"remote",
"previous_state",
"job",
"created_on",
"updated_on",
"start_time",
"end_time",
"metadata",
"run_dir",
"parents",
"priority",
"worker",
"resources",
"exec_config",
"lock_id",
"lock_time",
"stored_data",
]


def format_job_info(
job_info: JobInfo | JobDoc, verbosity: int, show_none: bool = False
):
Expand All @@ -162,7 +193,14 @@ def format_job_info(
if remote_error:
d["remote"]["error"] = ReprStr(remote_error)

return render_scope(d)
# reorder the keys
# Do not check here that all the keys in JobInfo are in JOB_INFO_ORDER. Check in the tests
sorted_d = {}
for k in JOB_INFO_ORDER:
if k in d:
sorted_d[k] = d[k]

return render_scope(sorted_d, sort_keys=False)


def format_flow_info(flow_info: FlowInfo):
Expand Down Expand Up @@ -255,3 +293,188 @@ def get_worker_table(workers: dict[str, WorkerBase], verbosity: int = 0):
table.add_row(*row)

return table


def create_bar(count, max_count, size=30, color="white"):
"""Creates a text-based bar for a histogram with fixed color per state."""
bar_filled = "█" * int(size * count / max_count)
bar_empty = " " * (size - len(bar_filled))
return f"[{color}]{bar_filled}[white]{bar_empty}"


def get_job_report_components(report: JobsReport) -> list[RenderableType]:
components = []

# Summary of Key Metrics
summary_table = Table(title="Job Summary", title_style="bold green")
summary_table.add_column("Metric", style="cyan", justify="right")
summary_table.add_column("Count", style="green", justify="center")

summary_table.add_row("Completed Jobs", str(report.completed))
summary_table.add_row("Running Jobs", str(report.running))
summary_table.add_row("Error Jobs", str(report.error))
summary_table.add_row("Active Jobs", str(report.active))

components.append(summary_table)

# Job State Distribution
components.append(
Panel("[bold green]Job State Distribution[/bold green]", expand=False)
)

# Remove COMPLETED, as this will likely account for most of the jobs present in the DB
state_counts = dict(report.state_counts)
state_counts.pop(JobState.COMPLETED)

# Find the max count to normalize the histograms
max_count = max(*state_counts.values(), 1)

total_count = sum(state_counts.values()) or 1

# Display job states in a histogram
state_colors = {
JobState.WAITING: "grey39",
JobState.READY: "cyan",
JobState.CHECKED_OUT: "bright_cyan",
JobState.UPLOADED: "deep_sky_blue1",
JobState.SUBMITTED: "blue",
JobState.RUNNING: "green",
JobState.TERMINATED: "red",
JobState.DOWNLOADED: "blue_violet",
JobState.REMOTE_ERROR: "yellow",
JobState.COMPLETED: "green",
JobState.FAILED: "red",
JobState.PAUSED: "magenta",
JobState.STOPPED: "dark_orange",
JobState.USER_STOPPED: "orange4",
JobState.BATCH_SUBMITTED: "light_slate_blue",
JobState.BATCH_RUNNING: "chartreuse3",
}

newline = ""
for state, color in state_colors.items():
if state not in state_counts:
continue
count = state_counts[state]
percentage = round((count / total_count) * 100)
bar = create_bar(count, max_count, color=color)
components.extend(
[f"{newline}{state.name:15} [{count:>3}] ({percentage:>3}%):", bar]
)
newline = "\n"

# Longest Running Jobs
if report.longest_running:
longest_running_table = get_job_info_table(report.longest_running, verbosity=1)
longest_running_table.title = "Longest running jobs"
longest_running_table.title_style = "bold green"
components.append(longest_running_table)

# Worker Utilization
if report.worker_utilization:
worker_table = Table(title="Worker Jobs Distribution", title_style="bold green")
worker_table.add_column("Worker", style="cyan", justify="center")
worker_table.add_column("Job Count", style="green", justify="center")

for worker, count in report.worker_utilization.items():
worker_table.add_row(worker, str(count))

components.append(worker_table)

# Job Trends
if report.trends:
trends = report.trends
trends_table = Table(
title=f"Job Trends ({trends.num_intervals} {trends.interval}) [{trends.timezone}]",
title_style="bold green",
)
trends_table.add_column("Date", justify="center", style="cyan", no_wrap=True)
trends_table.add_column("Completed", justify="center", style="green")
trends_table.add_column("Failed", justify="center", style="red")
trends_table.add_column("Remote Error", justify="center", style="yellow")

for i in range(trends.num_intervals):
trends_table.add_row(
trends.dates[i],
f"{trends.completed[i]}",
f"{trends.failed[i]}",
f"{trends.remote_error[i]}",
)

components.append(trends_table)

return components


def get_flow_report_components(report: FlowsReport) -> list[RenderableType]:
components = []

# Summary of Key Metrics
summary_table = Table(title="Flow Summary", title_style="bold green")
summary_table.add_column("Metric", style="cyan", justify="right")
summary_table.add_column("Count", style="green", justify="center")

summary_table.add_row("Completed Flows", str(report.completed))
summary_table.add_row("Running Flows", str(report.running))
summary_table.add_row("Error Flows", str(report.error))

components.append(summary_table)

# Job State Distribution
components.append(
Panel("[bold green]Flow State Distribution[/bold green]", expand=False)
)

# Remove COMPLETED, as this will likely account for most of the jobs present in the DB
state_counts = dict(report.state_counts)
state_counts.pop(FlowState.COMPLETED)

# Find the max count to normalize the histograms
max_count = max(*state_counts.values(), 1)

total_count = sum(state_counts.values()) or 1

# Display job states in a histogram
state_colors = {
FlowState.WAITING: "grey39",
FlowState.READY: "cyan",
FlowState.RUNNING: "green",
FlowState.COMPLETED: "green",
FlowState.FAILED: "red",
FlowState.PAUSED: "magenta",
FlowState.STOPPED: "dark_orange",
}

newline = ""
for state, color in state_colors.items():
if state not in state_counts:
continue
count = state_counts[state]
percentage = round((count / total_count) * 100)
bar = create_bar(count, max_count, color=color)
components.extend(
[f"{newline}{state.name:15} [{count:>3}] ({percentage:>3}%):", bar]
)
newline = "\n"

# Job Trends
if report.trends:
trends = report.trends
trends_table = Table(
title=f"Flow Trends ({trends.num_intervals} {trends.interval}) [{trends.timezone}]",
title_style="bold green",
)
trends_table.add_column("Date", justify="center", style="cyan", no_wrap=True)
trends_table.add_column("Completed", justify="center", style="green")
trends_table.add_column("Failed", justify="center", style="red")

for i in range(trends.num_intervals):
trends_table.add_row(
trends.dates[i],
f"{trends.completed[i]}",
f"{trends.failed[i]}",
)

components.append(trends_table)

return components
2 changes: 2 additions & 0 deletions src/jobflow_remote/cli/jf.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from rich.text import Text

from jobflow_remote.cli.jfr_typer import JFRTyper
from jobflow_remote.cli.types import tree_opt
from jobflow_remote.cli.utils import (
cleanup_job_controller,
complete_profiling,
Expand Down Expand Up @@ -65,6 +66,7 @@ def main(
hidden=True,
),
] = False,
print_tree: tree_opt = False, # If selected will print the tree of the CLI and exit
) -> None:
"""The controller CLI for jobflow-remote."""
from jobflow_remote import SETTINGS
Expand Down
Loading