Skip to content

Commit

Permalink
A small reporting hack
Browse files Browse the repository at this point in the history
I'd considered some way to report dataset create/upload statistics based on a
date range, but never got around to it. Today, writing up the draft report for
April, I of course didn't have a report "for April", and pasted in one from
April 25...

And then thought, well, how hard would it be to add `--since` and `--until` to
set the range?

It's not perfect, but it's a potentially useful twist. If you like it, we can
merge it; if not, maybe some evening or weekend I'll get around to working
on it some more.

```
$ pbench-report-generator --statistics=creation --since 2024-04-01 --until 2024-05-01
Dataset statistics by creation date:
  540 since 2024-04-01 00:00 until 2024-05-01 00:00
    540 in year 2024
    540 in month April 2024
    131 in week April 23 to April 30
    8 on 30 April 2024
 Total by year:
    2024:      540
 Total by month of year:
    Apr:      540
 Total by day of month:
    02:       25    03:       26    04:        2    06:        9
    07:        9    08:        3    09:       28    10:       29
    11:        9    12:       23    13:       47    14:       29
    15:       33    16:       24    17:       31    18:        6
    19:        2    20:       38    21:       36    23:       31
    24:       30    25:       11    26:        4    27:       23
    28:       23    29:        1    30:        8
 Total by day of week:
    Mon:       37    Tue:      116    Wed:      116    Thu:       28
    Fri:       29    Sat:      117    Sun:       97
 Total by hour of day:
    00:       24    01:       12    02:       10    03:       14
    04:       32    05:       51    06:       34    07:       14
    08:       26    09:       31    10:       30    11:       29
    12:       31    13:       38    14:       19    15:       26
    16:       17    17:       17    18:       17    19:       13
    20:       11    21:       19    22:       12    23:       13
```
  • Loading branch information
dbutenhof committed May 7, 2024
1 parent 81eb077 commit 66728cd
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 34 deletions.
17 changes: 10 additions & 7 deletions lib/pbench/cli/server/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,16 @@ class DateParser(ParamType):
def convert(
self, value: Any, param: Optional[Parameter], ctx: Optional[Context]
) -> Any:
if isinstance(value, datetime.datetime):
return value

try:
return parser.parse(value)
except Exception as e:
self.fail(f"{value!r} cannot be converted to a datetime: {str(e)!r}")
if isinstance(value, str):
try:
value = parser.parse(value)
except Exception as e:
self.fail(f"{value!r} cannot be converted to a datetime: {str(e)!r}")
if not isinstance(value, datetime.datetime):
self.fail(f"{value!r} ({type(value).__name__}) is unsupported.")
if value.tzinfo is None:
value = value.replace(tzinfo=datetime.timezone.utc)
return value


class Detail:
Expand Down
86 changes: 59 additions & 27 deletions lib/pbench/cli/server/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,15 @@
import re
import shutil
import time
from typing import Any, Iterator, Optional, Union
from typing import Any, Optional, Union

import click
import humanize
from sqlalchemy import cast, inspect, Row, select, text
from sqlalchemy import cast, inspect, select, text
from sqlalchemy.orm import Query

from pbench.cli import pass_cli_context
from pbench.cli.server import config_setup, Detail, Verify, Watch
from pbench.cli.server import config_setup, DateParser, Detail, Verify, Watch
from pbench.cli.server.options import common_options
from pbench.common.logger import get_pbench_logger
from pbench.server import BadConfig
Expand Down Expand Up @@ -368,16 +369,26 @@ def columnize(
click.echo(line)


def summarize_dates(rows: Iterator[Row], width: int = 80):
def summarize_dates(query: Query, options: dict[str, Any]):
width: int = options.get("width")
by_year = defaultdict(int)
by_month = defaultdict(int)
by_day = defaultdict(int)
by_weekday = defaultdict(int)
by_hour = defaultdict(int)

day = datetime.datetime.now(datetime.timezone.utc).replace(
hour=0, minute=0, second=0, microsecond=0
)
since = options.get("since")
until = options.get("until")

start = since if since else datetime.datetime.fromtimestamp(0.0)
end = until if until else datetime.datetime.now(datetime.timezone.utc)

# It's convenient to use `--until YYYY-MM-01` to see a month (though
# technically that would include a YYYY-MM-01:00:00.00 timestamp), but
# bucketizing the day or week based on that anomaly isn't very useful, so
# back up the "day" one millisecond to move it into the last day of the
# previous month.
day = end - datetime.timedelta(milliseconds=1)
day = day.replace(hour=0, minute=0, second=0, microsecond=0)
month = day.replace(day=1)
year = month.replace(month=1)
week = day - datetime.timedelta(days=7)
Expand All @@ -386,6 +397,20 @@ def summarize_dates(rows: Iterator[Row], width: int = 80):
this_month = 0
this_week = 0
this_day = 0
in_range = 0

filters = []
subquery = query.subquery()
query = Database.db_session.query(subquery.c.date)
if since:
verifier.status(f"Filter since {since}")
filters.append(subquery.c.date >= since)
if until:
verifier.status(f"Filter until {until}")
filters.append(subquery.c.date <= until)
if filters:
query = query.filter(*filters)
rows = query.execution_options(stream_results=True).yield_per(SQL_CHUNK)

for row in rows:
date: datetime.datetime = row[0]
Expand All @@ -407,10 +432,16 @@ def summarize_dates(rows: Iterator[Row], width: int = 80):
if date >= day:
this_day += 1

click.echo(f" {this_year:,d} this year ({year:%Y})")
click.echo(f" {this_month:,d} this month ({month:%B %Y})")
click.echo(f" {this_week:,d} this week ({week:%B %d} to {day:%B %d})")
click.echo(f" {this_day:,d} today ({day:%d %B %Y})")
if date >= start and date < end:
in_range += 1

click.echo(
f" {in_range:,d} since {start:%Y-%m-%d %H:%M} until {end:%Y-%m-%d %H:%M}"
)
click.echo(f" {this_year:,d} in year {year:%Y}")
click.echo(f" {this_month:,d} in month {month:%B %Y}")
click.echo(f" {this_week:,d} in week {week:%B %d} to {day:%B %d}")
click.echo(f" {this_day:,d} on {day:%d %B %Y}")

click.echo(" Total by year:")
columnize(by_year, width)
Expand All @@ -429,30 +460,21 @@ def report_creation(options: dict[str, Any]):

watcher.update("analyzing upload patterns")

rows = (
Database.db_session.query(
cast(Metadata.value["pbench", "date"].as_string(), TZDateTime)
)
.filter(Metadata.key == "metalog")
.execution_options(stream_results=True)
.yield_per(SQL_CHUNK)
)
rows = Database.db_session.query(
cast(Metadata.value["pbench", "date"].as_string(), TZDateTime).label("date")
).filter(Metadata.key == "metalog")
click.echo("Dataset statistics by creation date:")
summarize_dates(rows, options.get("width"))
summarize_dates(rows, options)


def report_uploads(options: dict[str, Any]):
"""Report dataset statistics by upload date"""

watcher.update("analyzing upload patterns")

rows = (
Database.db_session.query(Dataset.uploaded)
.execution_options(stream_results=True)
.yield_per(SQL_CHUNK)
)
rows = Database.db_session.query(Dataset.uploaded.label("date"))
click.echo("Dataset statistics by upload date:")
summarize_dates(rows, options.get("width"))
summarize_dates(rows, options)


def report_audit():
Expand Down Expand Up @@ -664,6 +686,11 @@ def report_states():
@click.option(
"--progress", "-p", type=float, default=0.0, help="Show periodic progress messages"
)
@click.option(
"--since",
type=DateParser(),
help="Confine statistics to datasets uploaded/created since date/time",
)
@click.option("--sql", "-s", default=False, is_flag=True, help="Display SQL statistics")
@click.option(
"--states", "-S", default=False, is_flag=True, help="Display operational states"
Expand All @@ -673,6 +700,11 @@ def report_states():
type=click.Choice(["creation", "upload"], case_sensitive=False),
help="Show upload statistics",
)
@click.option(
"--until",
type=DateParser(),
help="Confine statistics to datasets uploaded/created until date/time",
)
@click.option(
"--verify", "-v", default=False, is_flag=True, help="Display intermediate messages"
)
Expand Down

0 comments on commit 66728cd

Please sign in to comment.