Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add report of Pbench Agent version statistics #3624

Merged
merged 2 commits into from
Jul 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
130 changes: 127 additions & 3 deletions lib/pbench/cli/server/report.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from collections import defaultdict
from dataclasses import dataclass
import datetime
from operator import and_
from pathlib import Path
Expand Down Expand Up @@ -384,9 +385,6 @@ def summarize_dates(base_query: Query, options: dict[str, Any]):
since = options.get("since")
until = options.get("until")

if since and until and since > until:
raise Exception("The --until value must be later than the --since value")

by_year = defaultdict(int)
by_month = defaultdict(int)
by_day = defaultdict(int)
Expand Down Expand Up @@ -514,6 +512,118 @@ def report_uploads(options: dict[str, Any]):
summarize_dates(rows, options)


def report_agent(options: dict[str, Any]):
"""Report dataset statistics by agent version"""

v_pattern = re.compile(r"(?P<major>\d+\.\d+)(?:\.\d+)?(?:-\w+)")

@dataclass
class Daterange:
first: Optional[datetime.datetime] = None
last: Optional[datetime.datetime] = None

def add(self, date: datetime.datetime):
if self.first is None or date < self.first:
self.first = date
if self.last is None or date > self.last:
self.last = date

def print_versions(
target: dict[str, Daterange], counts: dict[str, int], quote: bool = False
):
cw = 10
vw = 23
dw = 11
click.echo(
f" {'Count':^{cw}s} {'Version':^{vw}s} {'First':^{dw}s} {'Last':^{dw}s}"
)
click.echo(f" {'':-<{cw}} {'':-<{vw}} {'':-<{dw}} {'':-<{dw}}")
for version, dates in sorted(target.items(), key=lambda k: k[1].last):
count = counts[version]
first = humanize.naturaldate(dates.first)
last = humanize.naturaldate(dates.last)
v = "'" + version + "'" if quote else version
click.echo(f" {count:>{cw},d} {v:^{vw}s} {first:>{dw}s} {last:>{dw}s}")

watcher.update("analyzing version patterns")
since = options.get("since")
until = options.get("until")

# Create a subquery from our basic select parameters so that we can use
# the label (SQL "AS date") in our WHERE filter clauses. (In a direct query
# PostgreSQL doesn't allow filtering on renamed columns.)
subquery = (
Database.db_session.query(
cast(Metadata.value["pbench", "date"].as_string(), TZDateTime).label(
"date"
),
Metadata.value["pbench", "rpm-version"].as_string().label("version"),
)
.filter(Metadata.key == "metalog")
.subquery()
)

count = 0
dateless = 0
versionless = 0

versions = defaultdict(int)
majorversions = defaultdict(int)
nonversions = defaultdict(int)
range = defaultdict(Daterange)
majorrange = defaultdict(Daterange)
nonversionrange = defaultdict(Daterange)

filters = []

query = Database.db_session.query(subquery.c.date, subquery.c.version).order_by(
subquery.c.date
)

if since:
verifier.status(f"Filter since {since}")
filters.append(subquery.c.date >= since)
if until:
verifier.status(f"Filter until {until}")
filters.append(subquery.c.date <= until)
if filters:
query = query.filter(*filters)
rows = query.execution_options(stream_results=True).yield_per(SQL_CHUNK)

webbnh marked this conversation as resolved.
Show resolved Hide resolved
for row in rows:
count += 1
date: datetime.datetime = row[0]
version = row[1]
if not isinstance(version, str):
versionless += 1
continue
if not isinstance(date, datetime.datetime):
dateless += 1
date = datetime.datetime.fromtimestamp(0.0)
m = v_pattern.search(version)
if m:
maj = m.group("major")
versions[version] += 1
majorversions[maj] += 1
range[version].add(date)
majorrange[maj].add(date)
else:
nonversions[version] += 1
nonversionrange[version].add(date)

click.echo("Dataset statistics by Pbench Agent version:")
print_versions(majorrange, majorversions)
if options.get("detail"):
click.echo("Dataset statistics by full Pbench Agent version:")
print_versions(range, versions)
click.echo("Datasets with nonsensical version metadata:")
print_versions(nonversionrange, nonversions, quote=True)
if dateless:
click.echo(f"{dateless:,d} datasets lack a date")
if versionless:
click.echo(f"{versionless:,d} datasets lack a Pbench Agent version")


def report_audit():
"""Report audit log statistics."""

Expand Down Expand Up @@ -693,6 +803,12 @@ def report_states():

@click.command(name="pbench-report-generator")
@pass_cli_context
@click.option(
"--agent",
default=False,
is_flag=True,
help="Display Pbench Agent version statistics",
)
@click.option("--all", "-a", default=False, is_flag=True, help="Display full report")
@click.option(
"--archive", "-A", default=False, is_flag=True, help="Display archive statistics"
Expand Down Expand Up @@ -766,6 +882,12 @@ def report(context: object, **kwargs):
rv = 0

try:

since = kwargs.get("since")
until = kwargs.get("until")
if since and until and since > until:
raise Exception("The --until value must be later than the --since value")

config = config_setup(context)
logger = get_pbench_logger("pbench-report-generator", config)
cache_m = CacheManager(config, logger)
Expand All @@ -790,6 +912,8 @@ def report(context: object, **kwargs):
else:
click.echo(f"Unexpected statistics option {stats}", err=True)
rv = 1
if kwargs.get("all") or kwargs.get("agent"):
report_agent(kwargs)
if kwargs.get("all") or kwargs.get("audit"):
report_audit()
if kwargs.get("all") or kwargs.get("sql"):
Expand Down
1 change: 1 addition & 0 deletions server/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ flask-sqlalchemy
gunicorn
humanfriendly
humanize
numpy<2.0 # Indirect: elasticsearch
webbnh marked this conversation as resolved.
Show resolved Hide resolved
pquisby
psycopg2
pyesbulk>=2.0.1
Expand Down
Loading