Skip to content

Commit

Permalink
Add simplified report for SQL statistics
Browse files Browse the repository at this point in the history
  • Loading branch information
Raul Martinez committed Oct 22, 2023
1 parent 16dff3e commit 23f21f4
Show file tree
Hide file tree
Showing 7 changed files with 290 additions and 51 deletions.
6 changes: 3 additions & 3 deletions pg_stats_tools/pg/stats/buffers/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def table_cache_hits(
format: Annotated[
TableFormatOption,
typer.Option(help="Output table format", case_sensitive=True),
] = TableFormatOption.grid,
] = TableFormatOption.github,
schema: Annotated[
str,
typer.Option(help="Schema. Default: public. Use _all to get all schemas"),
Expand All @@ -36,7 +36,7 @@ def index_cache_hits(
format: Annotated[
TableFormatOption,
typer.Option(help="Output table format", case_sensitive=True),
] = TableFormatOption.grid,
] = TableFormatOption.github,
schema: Annotated[
str,
typer.Option(help="Schema. Default: public. Use _all to get all schemas"),
Expand All @@ -53,7 +53,7 @@ def usage(
format: Annotated[
TableFormatOption,
typer.Option(help="Output table format", case_sensitive=True),
] = TableFormatOption.grid,
] = TableFormatOption.github,
schema: Annotated[
str,
typer.Option(help="Schema. Default: public. Use _all to get all schemas"),
Expand Down
4 changes: 2 additions & 2 deletions pg_stats_tools/pg/stats/indexes/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def index_usage_hints(
format: Annotated[
TableFormatOption,
typer.Option(help="Output table format", case_sensitive=True),
] = TableFormatOption.grid,
] = TableFormatOption.github,
schema: Annotated[
str,
typer.Option(help="Schema. Default: public. Use _all to get all schemas"),
Expand All @@ -36,7 +36,7 @@ def index_usage(
format: Annotated[
TableFormatOption,
typer.Option(help="Output table format", case_sensitive=True),
] = TableFormatOption.grid,
] = TableFormatOption.github,
schema: Annotated[
str,
typer.Option(help="Schema. Default: public. Use _all to get all schemas"),
Expand Down
74 changes: 67 additions & 7 deletions pg_stats_tools/pg/stats/sql/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,14 @@

from enum import Enum
from typing import Annotated, Any, Dict, List, Union
from datetime import datetime, timedelta

import typer

from pg_stats_tools.time_fn import parse_timestamp
from pg_stats_tools.format import TableFormatOption
from pg_stats_tools.pg.cli import pg_params
from pg_stats_tools.pg.stats.sql.reports import SQLStatsBySQLType, SQLTimeStatsBySQLType, ActiveLongRunningSQL
from pg_stats_tools.pg.stats.sql.reports import SQLStatsBySQLType, SQLTimeStatsBySQLType, ActiveLongRunningSQL, SQLStatsSimplifiedBySQLType

sql = typer.Typer(
help="""Performance reports for SQL statements based on pg_stat_statements
Expand Down Expand Up @@ -45,6 +47,24 @@ class SQLStatsFields(str, Enum):
blk_write_time = "blk_write_time"


# https://documentation.red-gate.com/sm13/postgresql-top-queries-199098901.html
class SQLSimplifiedStatsFields(str, Enum):
calls = "calls"
rows = "rows"
arows = "arows"
time = "time"
atime = "atime"
iotime = "iotime"
aiotime = "aiotime"
blk_r = "blk_r"
ablk_r = "ablk_r"
buff_blk_r = "buff_blk_r"
abuff_blk_r = "abuff_blk_r"
buff_blk_r_pct = "buff_blk_r_pct"
blk_w = "blk_w"
ablk_w = "ablk_w"


class ActiveSQLStatsFields(str, Enum):
application_name = "application_name"
client_addr = "client_addr"
Expand Down Expand Up @@ -82,6 +102,11 @@ class SQLTypes(str, Enum):
TRANSACTION = "BEGIN"


class SortDir(str, Enum):
ASC = "ASC"
DESC = "DESC"


@sql.command(help=SQLTimeStatsBySQLType.get_help())
def sql_time_stats_by_type(
order_by: Annotated[
Expand Down Expand Up @@ -124,7 +149,7 @@ def top_sql_stats_by_type(
format: Annotated[
TableFormatOption,
typer.Option(help="Output table format", case_sensitive=True),
] = TableFormatOption.grid,
] = TableFormatOption.github,
dbname: Annotated[
str,
typer.Option(help="Database name"),
Expand Down Expand Up @@ -159,12 +184,49 @@ def top_sql_stats_by_type(
SQLStatsBySQLType(pg_conn_params=pg_params, sql_types=sql_types, fetch_fields=fetch_fields, **command_args).run()


@sql.command(help=SQLStatsSimplifiedBySQLType.get_help())
def top_sql_stats_simplified_by_type(
top_stat_field: Annotated[
SQLSimplifiedStatsFields,
typer.Option(
help="Fielt to use to obtain expensive SQL statements",
case_sensitive=True,
),
] = SQLSimplifiedStatsFields.atime,
format: Annotated[
TableFormatOption,
typer.Option(help="Output table format", case_sensitive=True),
] = TableFormatOption.github,
dbname: Annotated[
str,
typer.Option(help="Database name"),
] = "_all",
count: Annotated[
int,
typer.Option(help="Number of SQL to fecth (for each SQL type)"),
] = 10,
sort: Annotated[
SortDir,
typer.Option(help="Sort direction. Defines the relevance of high/low values for top_stat_field"),
] = SortDir.DESC,
sql_type: Annotated[
List[SQLTypes],
typer.Option(help="SQL Types"),
] = [SQLTypes.SELECT, SQLTypes.INSERT, SQLTypes.UPDATE, SQLTypes.DELETE],
) -> None:
# frame: Union[FrameType, None] = inspect.currentframe()
# f_name = frame.f_code.co_name if frame else "unknown_function"
command_args: Dict[str, Any] = {"top_stat_field": top_stat_field.value, "sort": sort.value, "format": format.value, "dbname": dbname, "count": count}
sql_types = {sql_type.name: sql_type.value for sql_type in sql_type}
SQLStatsSimplifiedBySQLType(pg_conn_params=pg_params, sql_types=sql_types, **command_args).run()


@sql.command(help=ActiveLongRunningSQL.get_help())
def active_sql_long_running(
format: Annotated[
TableFormatOption,
typer.Option(help="Output table format", case_sensitive=True),
] = TableFormatOption.grid,
] = TableFormatOption.github,
dbname: Annotated[
str,
typer.Option(help="Database name"),
Expand All @@ -178,14 +240,12 @@ def active_sql_long_running(
typer.Option(help="Additional field to be fechted"),
] = None,
sql_type: Annotated[
Union[List[SQLTypes], None],
List[SQLTypes],
typer.Option(help="SQL Types"),
] = None,
] = [SQLTypes.SELECT, SQLTypes.INSERT, SQLTypes.UPDATE, SQLTypes.DELETE],
) -> None:
# frame: Union[FrameType, None] = inspect.currentframe()
# f_name = frame.f_code.co_name if frame else "unknown_function"
if not sql_type:
sql_type = [SQLTypes.SELECT, SQLTypes.INSERT, SQLTypes.UPDATE, SQLTypes.DELETE]
if not fetch_field:
fetch_field = [
ActiveSQLStatsFields.application_name,
Expand Down
143 changes: 107 additions & 36 deletions pg_stats_tools/pg/stats/sql/reports.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,13 @@ def __init__(self, pg_conn_params: Dict[str, Any], **kvargs: Any) -> None:

@classmethod
def get_help(cls) -> str:
return """Time statistics for SQL statements grouped by SQL type \n
Columns:\n
- sql_type: The type of SQL statement\n
- avg_time_ms: The average amount of time each SQL statement type took to run, in milliseconds\n
- num_calls: The number of times each SQL statement type was called\n
- total_time_ms: The total amount of time each SQL statement type took to run, in milliseconds\n
- max_time_ms: The maximum amount of time each SQL statement type took to run, in millisecondsq\n
return """Time statistics for SQL statements grouped by SQL type
Columns:
- sql_type: The type of SQL statement
- avg_time_ms: The average amount of time each SQL statement type took to run, in milliseconds
- num_calls: The number of times each SQL statement type was called
- total_time_ms: The total amount of time each SQL statement type took to run, in milliseconds
- max_time_ms: The maximum amount of time each SQL statement type took to run, in millisecondsq
"""

Expand Down Expand Up @@ -76,31 +76,31 @@ def __init__(self, pg_conn_params: Dict[str, Any], sql_types: Dict[str, str], fe

@classmethod
def get_help(cls) -> str:
return """Statistics for SQL statements grouped by SQL type \n
Columns:\n
- user: OID of user who executed the statement\n
- database: Database in which the statement was executed\n
- queryid:Hash code to identify identical normalized queries.\n
- query: Text of a representative statement (just first 15 chars displayed)\n
- calls: Number of times the statement was executed\n
- total_time: Total time spent executing the statement, in milliseconds\n
- min_time: Minimum time spent executing the statement, in milliseconds\n
- max_time: Maximum time spent executing the statement, in milliseconds\n
- mean_time: Mean time spent executing the statement, in milliseconds\n
- stddev_time: Population standard deviation of time spent executing the statement, in milliseconds\n
- rows: Total number of rows retrieved or affected by the statement\n
- shared_blks_hit: Total number of shared block cache hits by the statement\n
- shared_blks_read: Total number of shared blocks read by the statement\n
- shared_blks_dirtied: Total number of shared blocks dirtied by the statementd\n
- shared_blks_written: Total number of shared blocks written by the statement\n
- local_blks_hit: Total number of local block cache hits by the statement\n
- local_blks_read: Total number of local blocks read by the statement\n
- local_blks_dirtied: Total number of local blocks dirtied by the statement\n
- local_blks_written: Total number of local blocks written by the statement\n
- temp_blks_read: Total number of temp blocks read by the statement\n
- temp_blks_written: Total number of temp blocks written by the statement\n
- blk_read_time: Total time the statement spent reading data file blocks, in milliseconds (if track_io_timing is enabled, otherwise zero)\n
- blk_write_time: Number of times the statement was executed\n
return """Statistics for SQL statements grouped by SQL type
Columns:
- user: OID of user who executed the statement
- database: Database in which the statement was executed
- queryid:Hash code to identify identical normalized queries.
- query: Text of a representative statement (just first 15 chars displayed)
- calls: Number of times the statement was executed
- total_time: Total time spent executing the statement, in milliseconds
- min_time: Minimum time spent executing the statement, in milliseconds
- max_time: Maximum time spent executing the statement, in milliseconds
- mean_time: Mean time spent executing the statement, in milliseconds
- stddev_time: Population standard deviation of time spent executing the statement, in milliseconds
- rows: Total number of rows retrieved or affected by the statement
- shared_blks_hit: Total number of shared block cache hits by the statement
- shared_blks_read: Total number of shared blocks read by the statement
- shared_blks_dirtied: Total number of shared blocks dirtied by the statementd
- shared_blks_written: Total number of shared blocks written by the statement
- local_blks_hit: Total number of local block cache hits by the statement
- local_blks_read: Total number of local blocks read by the statement
- local_blks_dirtied: Total number of local blocks dirtied by the statement
- local_blks_written: Total number of local blocks written by the statement
- temp_blks_read: Total number of temp blocks read by the statement
- temp_blks_written: Total number of temp blocks written by the statement
- blk_read_time: Total time the statement spent reading data file blocks, in milliseconds (if track_io_timing is enabled, otherwise zero)
- blk_write_time: Number of times the statement was executed
"""

def get_name(self) -> str:
Expand All @@ -120,7 +120,78 @@ def execute_sql(self, sql_type: str) -> pd.DataFrame:

def print_header(self) -> None:
help_panel = Panel(self.get_help(), title="Help", height=len(self.get_help().splitlines()))
input_panel = Panel(Pretty(f"Args: {self._command_args}\n{self._sql_types}"), title="Input", height=len(self.get_args()) + 3)
input_panel = Panel(Pretty(f"Args: {self._command_args}{self._sql_types}"), title="Input", height=len(self.get_args()) + 3)
print(help_panel)
print(input_panel)

def print_data(self, sql_type: str, data: pd.DataFrame) -> None:
print("-" * 50)
print(f"SQL Type: {sql_type}")
print(tabulate(data, headers="keys", tablefmt=self._command_args["format"])) # pyright: ignore

def run(self) -> None:
self.print_header()
for k, v in self._sql_types.items():
data = self.execute_sql(sql_type=v)
self.print_data(sql_type=k, data=data)


class SQLStatsSimplifiedBySQLType(Report):
"""
Standard SQL Report
"""

def __init__(self, pg_conn_params: Dict[str, Any], sql_types: Dict[str, str], **kvargs: Any) -> None:
self._pg_conn_params = pg_conn_params
self._command_args = kvargs
self._sql_types = sql_types

@classmethod
def get_help(cls) -> str:
return """Statistics for SQL statements grouped by SQL type
Columns:
- user: OID of user who executed the statement
- database: Database in which the statement was executed. Displayed only if dbname is not specified
- queryid: Hash code to identify identical normalized queries
- calls: Number of times the statement was executed
- rows: Total number of rows retrieved or affected by the statement
- arows: Average of rows per exexution
- time: Total time spent executing the statement, in milliseconds
- atime: Average time spent executing the statement, in milliseconds
- iotime: Total io time spent executing the statement.This is the sum of blk_read_time and blk_write_time
- aiotime: Total io time spent executing the statement.This is the sum of blk_read_time and blk_write_time
- blk_r: The number of blocks read that were requested from the Operating System because they weren't available in shared buffers
- ablk_r: Average of blk_r
- buff_blk_r: The number of blocks read from the buffer cache. This is typically the fastest way blocks can be read
This is the sum of shared_blks_hit and local_blks_hit.
- abuff_blk_r: Average of buff_blk_r
- buff_blk_r_pct: Percentage of buff_blk_read
- blk_w: The number of blocks written. This is the sum of shared_blks_written, local_blks_written and temp_blks_written
- ablk_w: Average of blk_written
"""

def get_name(self) -> str:
return "top_sql_stats_simplified_by_type"

def get_args(self) -> Dict[str, Any]:
return self._command_args

def read_sql(self, sql_type: str) -> str:
return read_sql_input(self.get_name(), sql_type=sql_type, **self.get_args())

def execute_sql(self, sql_type: str) -> pd.DataFrame:
return execute_sql(
sql=self.read_sql(sql_type=sql_type),
**self._pg_conn_params,
)

def print_header(self) -> None:
help_panel = Panel(self.get_help(), title="Help", height=len(self.get_help().splitlines()) + 1)
input_panel = Panel(
Pretty(f"""Args: {self._command_args} --- SQL Types: {" ".join(self._sql_types.values())}"""),
title="Input",
height=len(self.get_args()) + 3,
)
print(help_panel)
print(input_panel)

Expand Down Expand Up @@ -149,8 +220,8 @@ def __init__(self, pg_conn_params: Dict[str, Any], sql_types: Dict[str, str], fe

@classmethod
def get_help(cls) -> str:
return """Active long running SQL queries \n
Columns:\n
return """Active long running SQL queries
Columns:
- datid: OID of the database this backend is connected to
- datname: Name of the database this backend is connected to
Expand Down Expand Up @@ -225,7 +296,7 @@ def execute_sql(self, sql_type: str) -> pd.DataFrame:

def print_header(self) -> None:
help_panel = Panel(self.get_help(), title="Help", height=len(self.get_help().splitlines()))
input_panel = Panel(Pretty(f"Args: {self._command_args}\n{self._sql_types}"), title="Input", height=len(self.get_args()) + 3)
input_panel = Panel(Pretty(f"Args: {self._command_args}{self._sql_types}"), title="Input", height=len(self.get_args()) + 3)
print(help_panel)
print(input_panel)

Expand Down
6 changes: 3 additions & 3 deletions pg_stats_tools/reports_inputs/top_sql_stats_by_type.sql
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
-- Executed for each sql_type
SELECT
pg_user.usename AS user,
pg_stat_database.datname AS database,
{% if dbname =="_all" %} pg_stat_database.datname AS database,{% endif %}
queryid,
LEFT(query, 50) AS query,
{{top_stat_field}}
Expand All @@ -12,9 +12,9 @@ FROM pg_stat_statements
JOIN pg_catalog.pg_user ON pg_stat_statements.userid = pg_catalog.pg_user.usesysid
JOIN pg_stat_database ON pg_stat_statements.dbid = pg_stat_database.datid
WHERE
query ILIKE '{{sql_type}}%'
query ~* '^\s*{{sql_type}}'
{% if dbname !="_all" %}
AND database = {{database}}
AND pg_stat_database.datname = {{database}}
{% endif %}
ORDER BY {{top_stat_field}} DESC
LIMIT {{count}};
Loading

0 comments on commit 23f21f4

Please sign in to comment.