Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add report of Pbench Agent version statistics #3624

Merged
merged 2 commits into from
Jul 22, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 34 additions & 21 deletions lib/pbench/cli/server/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -385,9 +385,6 @@ def summarize_dates(base_query: Query, options: dict[str, Any]):
since = options.get("since")
until = options.get("until")

if since and until and since > until:
raise Exception("The --until value must be later than the --since value")

by_year = defaultdict(int)
by_month = defaultdict(int)
by_day = defaultdict(int)
Expand Down Expand Up @@ -518,7 +515,7 @@ def report_uploads(options: dict[str, Any]):
def report_agent(options: dict[str, Any]):
"""Report dataset statistics by agent version"""

v_pattern = re.compile(r"v?(?P<major>\d+\.\d+)(?:\.\d+)?(?:-\w+)")
v_pattern = re.compile(r"(?P<major>\d+\.\d+)(?:\.\d+)?(?:-\w+)")

@dataclass
class Daterange:
Expand All @@ -531,26 +528,40 @@ def add(self, date: datetime.datetime):
if self.last is None or date > self.last:
self.last = date

def print_versions(target: dict[str, Daterange], counts: dict[str, int]):
click.echo(f" {'Count':^10s} {'Version':^22s} {'First':^12s} {'Last':^12s}")
click.echo(f" {'':-<10} {'':-<22} {'':-<12} {'':-<12}")
def print_versions(
target: dict[str, Daterange], counts: dict[str, int], quote: bool = False
):
cw = 10
vw = 23
dw = 11
click.echo(
f" {'Count':^{cw}s} {'Version':^{vw}s} {'First':^{dw}s} {'Last':^{dw}s}"
)
click.echo(f" {'':-<{cw}} {'':-<{vw}} {'':-<{dw}} {'':-<{dw}}")
for version, dates in sorted(target.items(), key=lambda k: k[1].last):
count = counts[version]
first = humanize.naturaldate(dates.first)
last = humanize.naturaldate(dates.last)
click.echo(f" {count:>10,d} {version!r:^22s} {first:>12s} {last:>12s}")
v = "'" + version + "'" if quote else version
click.echo(f" {count:>{cw},d} {v:^{vw}s} {first:>{dw}s} {last:>{dw}s}")

watcher.update("analyzing version patterns")
since = options.get("since")
until = options.get("until")

if since and until and since > until:
raise Exception("The --until value must be later than the --since value")

rows = Database.db_session.query(
cast(Metadata.value["pbench", "date"].as_string(), TZDateTime).label("date"),
Metadata.value["pbench", "rpm-version"].as_string().label("version"),
).filter(Metadata.key == "metalog")
# Create a subquery from our basic select parameters so that we can use
# the label (SQL "AS date") in our WHERE filter clauses. (In a direct query
# PostgreSQL doesn't allow filtering on renamed columns.)
subquery = (
Database.db_session.query(
cast(Metadata.value["pbench", "date"].as_string(), TZDateTime).label(
"date"
),
Metadata.value["pbench", "rpm-version"].as_string().label("version"),
)
.filter(Metadata.key == "metalog")
.subquery()
)

count = 0
dateless = 0
Expand All @@ -565,10 +576,6 @@ def print_versions(target: dict[str, Daterange], counts: dict[str, int]):

filters = []

# Create a subquery from our basic select parameters so that we can use
# the label (SQL "AS date") in our WHERE filter clauses. (In a direct query
# PostgreSQL doesn't allow filtering on renamed columns.)
subquery = rows.subquery()
query = Database.db_session.query(subquery.c.date, subquery.c.version).order_by(
subquery.c.date
)
Expand All @@ -593,7 +600,7 @@ def print_versions(target: dict[str, Daterange], counts: dict[str, int]):
if not isinstance(date, datetime.datetime):
dateless += 1
date = datetime.datetime.fromtimestamp(0.0)
m = v_pattern.match(version)
m = v_pattern.search(version)
if m:
maj = m.group("major")
versions[version] += 1
Expand All @@ -610,7 +617,7 @@ def print_versions(target: dict[str, Daterange], counts: dict[str, int]):
click.echo("Dataset statistics by full Pbench Agent version:")
print_versions(range, versions)
click.echo("Datasets with nonsensical version metadata:")
print_versions(nonversionrange, nonversions)
print_versions(nonversionrange, nonversions, quote=True)
if dateless:
click.echo(f"{dateless:,d} datasets lack a date")
if versionless:
Expand Down Expand Up @@ -875,6 +882,12 @@ def report(context: object, **kwargs):
rv = 0

try:

since = kwargs.get("since")
until = kwargs.get("until")
if since and until and since > until:
raise Exception("The --until value must be later than the --since value")

config = config_setup(context)
logger = get_pbench_logger("pbench-report-generator", config)
cache_m = CacheManager(config, logger)
Expand Down
1 change: 1 addition & 0 deletions server/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ flask-sqlalchemy
gunicorn
humanfriendly
humanize
numpy<2.0 # Indirect: elasticsearch
webbnh marked this conversation as resolved.
Show resolved Hide resolved
pquisby
psycopg2
pyesbulk>=2.0.1
Expand Down
Loading