Add simplified report for SQL statistics

raulmartinezr · Oct 22, 2023 · 23f21f4 · 23f21f4
1 parent 16dff3e
commit 23f21f4
Show file tree

Hide file tree

Showing 7 changed files with 290 additions and 51 deletions.
diff --git a/pg_stats_tools/pg/stats/buffers/cli.py b/pg_stats_tools/pg/stats/buffers/cli.py
@@ -19,7 +19,7 @@ def table_cache_hits(
     format: Annotated[
         TableFormatOption,
         typer.Option(help="Output table format", case_sensitive=True),
-    ] = TableFormatOption.grid,
+    ] = TableFormatOption.github,
     schema: Annotated[
         str,
         typer.Option(help="Schema. Default: public. Use _all to get all schemas"),
@@ -36,7 +36,7 @@ def index_cache_hits(
     format: Annotated[
         TableFormatOption,
         typer.Option(help="Output table format", case_sensitive=True),
-    ] = TableFormatOption.grid,
+    ] = TableFormatOption.github,
     schema: Annotated[
         str,
         typer.Option(help="Schema. Default: public. Use _all to get all schemas"),
@@ -53,7 +53,7 @@ def usage(
     format: Annotated[
         TableFormatOption,
         typer.Option(help="Output table format", case_sensitive=True),
-    ] = TableFormatOption.grid,
+    ] = TableFormatOption.github,
     schema: Annotated[
         str,
         typer.Option(help="Schema. Default: public. Use _all to get all schemas"),

diff --git a/pg_stats_tools/pg/stats/indexes/cli.py b/pg_stats_tools/pg/stats/indexes/cli.py
@@ -19,7 +19,7 @@ def index_usage_hints(
     format: Annotated[
         TableFormatOption,
         typer.Option(help="Output table format", case_sensitive=True),
-    ] = TableFormatOption.grid,
+    ] = TableFormatOption.github,
     schema: Annotated[
         str,
         typer.Option(help="Schema. Default: public. Use _all to get all schemas"),
@@ -36,7 +36,7 @@ def index_usage(
     format: Annotated[
         TableFormatOption,
         typer.Option(help="Output table format", case_sensitive=True),
-    ] = TableFormatOption.grid,
+    ] = TableFormatOption.github,
     schema: Annotated[
         str,
         typer.Option(help="Schema. Default: public. Use _all to get all schemas"),

diff --git a/pg_stats_tools/pg/stats/sql/cli.py b/pg_stats_tools/pg/stats/sql/cli.py
@@ -3,12 +3,14 @@
 
 from enum import Enum
 from typing import Annotated, Any, Dict, List, Union
+from datetime import datetime, timedelta
 
 import typer
 
+from pg_stats_tools.time_fn import parse_timestamp
 from pg_stats_tools.format import TableFormatOption
 from pg_stats_tools.pg.cli import pg_params
-from pg_stats_tools.pg.stats.sql.reports import SQLStatsBySQLType, SQLTimeStatsBySQLType, ActiveLongRunningSQL
+from pg_stats_tools.pg.stats.sql.reports import SQLStatsBySQLType, SQLTimeStatsBySQLType, ActiveLongRunningSQL, SQLStatsSimplifiedBySQLType
 
 sql = typer.Typer(
     help="""Performance reports for SQL statements based on pg_stat_statements
@@ -45,6 +47,24 @@ class SQLStatsFields(str, Enum):
     blk_write_time = "blk_write_time"
 
 
+# https://documentation.red-gate.com/sm13/postgresql-top-queries-199098901.html
+class SQLSimplifiedStatsFields(str, Enum):
+    calls = "calls"
+    rows = "rows"
+    arows = "arows"
+    time = "time"
+    atime = "atime"
+    iotime = "iotime"
+    aiotime = "aiotime"
+    blk_r = "blk_r"
+    ablk_r = "ablk_r"
+    buff_blk_r = "buff_blk_r"
+    abuff_blk_r = "abuff_blk_r"
+    buff_blk_r_pct = "buff_blk_r_pct"
+    blk_w = "blk_w"
+    ablk_w = "ablk_w"
+
+
 class ActiveSQLStatsFields(str, Enum):
     application_name = "application_name"
     client_addr = "client_addr"
@@ -82,6 +102,11 @@ class SQLTypes(str, Enum):
     TRANSACTION = "BEGIN"
 
 
+class SortDir(str, Enum):
+    ASC = "ASC"
+    DESC = "DESC"
+
+
 @sql.command(help=SQLTimeStatsBySQLType.get_help())
 def sql_time_stats_by_type(
     order_by: Annotated[
@@ -124,7 +149,7 @@ def top_sql_stats_by_type(
     format: Annotated[
         TableFormatOption,
         typer.Option(help="Output table format", case_sensitive=True),
-    ] = TableFormatOption.grid,
+    ] = TableFormatOption.github,
     dbname: Annotated[
         str,
         typer.Option(help="Database name"),
@@ -159,12 +184,49 @@ def top_sql_stats_by_type(
     SQLStatsBySQLType(pg_conn_params=pg_params, sql_types=sql_types, fetch_fields=fetch_fields, **command_args).run()
 
 
+@sql.command(help=SQLStatsSimplifiedBySQLType.get_help())
+def top_sql_stats_simplified_by_type(
+    top_stat_field: Annotated[
+        SQLSimplifiedStatsFields,
+        typer.Option(
+            help="Fielt to use to obtain expensive SQL statements",
+            case_sensitive=True,
+        ),
+    ] = SQLSimplifiedStatsFields.atime,
+    format: Annotated[
+        TableFormatOption,
+        typer.Option(help="Output table format", case_sensitive=True),
+    ] = TableFormatOption.github,
+    dbname: Annotated[
+        str,
+        typer.Option(help="Database name"),
+    ] = "_all",
+    count: Annotated[
+        int,
+        typer.Option(help="Number of SQL to fecth (for each SQL type)"),
+    ] = 10,
+    sort: Annotated[
+        SortDir,
+        typer.Option(help="Sort direction. Defines the relevance of high/low values for top_stat_field"),
+    ] = SortDir.DESC,
+    sql_type: Annotated[
+        List[SQLTypes],
+        typer.Option(help="SQL Types"),
+    ] = [SQLTypes.SELECT, SQLTypes.INSERT, SQLTypes.UPDATE, SQLTypes.DELETE],
+) -> None:
+    # frame: Union[FrameType, None] = inspect.currentframe()
+    # f_name = frame.f_code.co_name if frame else "unknown_function"
+    command_args: Dict[str, Any] = {"top_stat_field": top_stat_field.value, "sort": sort.value, "format": format.value, "dbname": dbname, "count": count}
+    sql_types = {sql_type.name: sql_type.value for sql_type in sql_type}
+    SQLStatsSimplifiedBySQLType(pg_conn_params=pg_params, sql_types=sql_types, **command_args).run()
+
+
 @sql.command(help=ActiveLongRunningSQL.get_help())
 def active_sql_long_running(
     format: Annotated[
         TableFormatOption,
         typer.Option(help="Output table format", case_sensitive=True),
-    ] = TableFormatOption.grid,
+    ] = TableFormatOption.github,
     dbname: Annotated[
         str,
         typer.Option(help="Database name"),
@@ -178,14 +240,12 @@ def active_sql_long_running(
         typer.Option(help="Additional field to be fechted"),
     ] = None,
     sql_type: Annotated[
-        Union[List[SQLTypes], None],
+        List[SQLTypes],
         typer.Option(help="SQL Types"),
-    ] = None,
+    ] = [SQLTypes.SELECT, SQLTypes.INSERT, SQLTypes.UPDATE, SQLTypes.DELETE],
 ) -> None:
     # frame: Union[FrameType, None] = inspect.currentframe()
     # f_name = frame.f_code.co_name if frame else "unknown_function"
-    if not sql_type:
-        sql_type = [SQLTypes.SELECT, SQLTypes.INSERT, SQLTypes.UPDATE, SQLTypes.DELETE]
     if not fetch_field:
         fetch_field = [
             ActiveSQLStatsFields.application_name,

diff --git a/pg_stats_tools/pg/stats/sql/reports.py b/pg_stats_tools/pg/stats/sql/reports.py
@@ -24,13 +24,13 @@ def __init__(self, pg_conn_params: Dict[str, Any], **kvargs: Any) -> None:
 
     @classmethod
     def get_help(cls) -> str:
-        return """Time statistics for SQL statements grouped by SQL type \n
-        Columns:\n
-            - sql_type: The type of SQL statement\n
-            - avg_time_ms: The average amount of time each SQL statement type took to run, in milliseconds\n
-            - num_calls: The number of times each SQL statement type was called\n
-            - total_time_ms: The total amount of time each SQL statement type took to run, in milliseconds\n
-            - max_time_ms: The maximum amount of time each SQL statement  type took to run, in millisecondsq\n
+        return """Time statistics for SQL statements grouped by SQL type
+        Columns:
+            - sql_type: The type of SQL statement
+            - avg_time_ms: The average amount of time each SQL statement type took to run, in milliseconds
+            - num_calls: The number of times each SQL statement type was called
+            - total_time_ms: The total amount of time each SQL statement type took to run, in milliseconds
+            - max_time_ms: The maximum amount of time each SQL statement  type took to run, in millisecondsq
 
         """
 
@@ -76,31 +76,31 @@ def __init__(self, pg_conn_params: Dict[str, Any], sql_types: Dict[str, str], fe
 
     @classmethod
     def get_help(cls) -> str:
-        return """Statistics for SQL statements grouped by SQL type \n
-        Columns:\n
-            - user: OID of user who executed the statement\n
-            - database: Database in which the statement was executed\n
-            - queryid:Hash code to identify identical normalized queries.\n
-            - query: Text of a representative statement (just first 15 chars displayed)\n
-            - calls: Number of times the statement was executed\n
-            - total_time: Total time spent executing the statement, in milliseconds\n
-            - min_time: Minimum time spent executing the statement, in milliseconds\n
-            - max_time: Maximum time spent executing the statement, in milliseconds\n
-            - mean_time: Mean time spent executing the statement, in milliseconds\n
-            - stddev_time: Population standard deviation of time spent executing the statement, in milliseconds\n
-            - rows: Total number of rows retrieved or affected by the statement\n
-            - shared_blks_hit: Total number of shared block cache hits by the statement\n
-            - shared_blks_read: Total number of shared blocks read by the statement\n
-            - shared_blks_dirtied: Total number of shared blocks dirtied by the statementd\n
-            - shared_blks_written: Total number of shared blocks written by the statement\n
-            - local_blks_hit: Total number of local block cache hits by the statement\n
-            - local_blks_read: Total number of local blocks read by the statement\n
-            - local_blks_dirtied: Total number of local blocks dirtied by the statement\n
-            - local_blks_written: Total number of local blocks written by the statement\n
-            - temp_blks_read: Total number of temp blocks read by the statement\n
-            - temp_blks_written: Total number of temp blocks written by the statement\n
-            - blk_read_time: Total time the statement spent reading data file blocks, in milliseconds (if track_io_timing is enabled, otherwise zero)\n
-            - blk_write_time: Number of times the statement was executed\n
+        return """Statistics for SQL statements grouped by SQL type
+        Columns:
+            - user: OID of user who executed the statement
+            - database: Database in which the statement was executed
+            - queryid:Hash code to identify identical normalized queries.
+            - query: Text of a representative statement (just first 15 chars displayed)
+            - calls: Number of times the statement was executed
+            - total_time: Total time spent executing the statement, in milliseconds
+            - min_time: Minimum time spent executing the statement, in milliseconds
+            - max_time: Maximum time spent executing the statement, in milliseconds
+            - mean_time: Mean time spent executing the statement, in milliseconds
+            - stddev_time: Population standard deviation of time spent executing the statement, in milliseconds
+            - rows: Total number of rows retrieved or affected by the statement
+            - shared_blks_hit: Total number of shared block cache hits by the statement
+            - shared_blks_read: Total number of shared blocks read by the statement
+            - shared_blks_dirtied: Total number of shared blocks dirtied by the statementd
+            - shared_blks_written: Total number of shared blocks written by the statement
+            - local_blks_hit: Total number of local block cache hits by the statement
+            - local_blks_read: Total number of local blocks read by the statement
+            - local_blks_dirtied: Total number of local blocks dirtied by the statement
+            - local_blks_written: Total number of local blocks written by the statement
+            - temp_blks_read: Total number of temp blocks read by the statement
+            - temp_blks_written: Total number of temp blocks written by the statement
+            - blk_read_time: Total time the statement spent reading data file blocks, in milliseconds (if track_io_timing is enabled, otherwise zero)
+            - blk_write_time: Number of times the statement was executed
         """
 
     def get_name(self) -> str:
@@ -120,7 +120,78 @@ def execute_sql(self, sql_type: str) -> pd.DataFrame:
 
     def print_header(self) -> None:
         help_panel = Panel(self.get_help(), title="Help", height=len(self.get_help().splitlines()))
-        input_panel = Panel(Pretty(f"Args: {self._command_args}\n{self._sql_types}"), title="Input", height=len(self.get_args()) + 3)
+        input_panel = Panel(Pretty(f"Args: {self._command_args}{self._sql_types}"), title="Input", height=len(self.get_args()) + 3)
+        print(help_panel)
+        print(input_panel)
+
+    def print_data(self, sql_type: str, data: pd.DataFrame) -> None:
+        print("-" * 50)
+        print(f"SQL Type: {sql_type}")
+        print(tabulate(data, headers="keys", tablefmt=self._command_args["format"]))  # pyright: ignore
+
+    def run(self) -> None:
+        self.print_header()
+        for k, v in self._sql_types.items():
+            data = self.execute_sql(sql_type=v)
+            self.print_data(sql_type=k, data=data)
+
+
+class SQLStatsSimplifiedBySQLType(Report):
+    """
+    Standard SQL Report
+    """
+
+    def __init__(self, pg_conn_params: Dict[str, Any], sql_types: Dict[str, str], **kvargs: Any) -> None:
+        self._pg_conn_params = pg_conn_params
+        self._command_args = kvargs
+        self._sql_types = sql_types
+
+    @classmethod
+    def get_help(cls) -> str:
+        return """Statistics for SQL statements grouped by SQL type
+        Columns:
+            - user: OID of user who executed the statement
+            - database: Database in which the statement was executed. Displayed only if dbname is not specified
+            - queryid: Hash code to identify identical normalized queries
+            - calls: Number of times the statement was executed
+            - rows: Total number of rows retrieved or affected by the statement
+            - arows: Average of rows per exexution
+            - time: Total time spent executing the statement, in milliseconds
+            - atime: Average time spent executing the statement, in milliseconds
+            - iotime: Total io time spent executing the statement.This is the sum of blk_read_time and blk_write_time
+            - aiotime: Total io time spent executing the statement.This is the sum of blk_read_time and blk_write_time
+            - blk_r: The number of blocks read that were requested from the Operating System because they weren't available in shared buffers
+            - ablk_r: Average of  blk_r
+            - buff_blk_r: The number of blocks read from the buffer cache. This is typically the fastest way blocks can be read
+                          This is the sum of shared_blks_hit and local_blks_hit.
+            - abuff_blk_r: Average of buff_blk_r
+            - buff_blk_r_pct: Percentage of buff_blk_read
+            - blk_w: The number of blocks written. This is the sum of shared_blks_written, local_blks_written and temp_blks_written
+            - ablk_w: Average of blk_written
+        """
+
+    def get_name(self) -> str:
+        return "top_sql_stats_simplified_by_type"
+
+    def get_args(self) -> Dict[str, Any]:
+        return self._command_args
+
+    def read_sql(self, sql_type: str) -> str:
+        return read_sql_input(self.get_name(), sql_type=sql_type, **self.get_args())
+
+    def execute_sql(self, sql_type: str) -> pd.DataFrame:
+        return execute_sql(
+            sql=self.read_sql(sql_type=sql_type),
+            **self._pg_conn_params,
+        )
+
+    def print_header(self) -> None:
+        help_panel = Panel(self.get_help(), title="Help", height=len(self.get_help().splitlines()) + 1)
+        input_panel = Panel(
+            Pretty(f"""Args: {self._command_args} --- SQL Types: {" ".join(self._sql_types.values())}"""),
+            title="Input",
+            height=len(self.get_args()) + 3,
+        )
         print(help_panel)
         print(input_panel)
 
@@ -149,8 +220,8 @@ def __init__(self, pg_conn_params: Dict[str, Any], sql_types: Dict[str, str], fe
 
     @classmethod
     def get_help(cls) -> str:
-        return """Active long running SQL queries \n
-        Columns:\n
+        return """Active long running SQL queries
+        Columns:
         - datid: OID of the database this backend is connected to
 
         - datname: Name of the database this backend is connected to
@@ -225,7 +296,7 @@ def execute_sql(self, sql_type: str) -> pd.DataFrame:
 
     def print_header(self) -> None:
         help_panel = Panel(self.get_help(), title="Help", height=len(self.get_help().splitlines()))
-        input_panel = Panel(Pretty(f"Args: {self._command_args}\n{self._sql_types}"), title="Input", height=len(self.get_args()) + 3)
+        input_panel = Panel(Pretty(f"Args: {self._command_args}{self._sql_types}"), title="Input", height=len(self.get_args()) + 3)
         print(help_panel)
         print(input_panel)
 

diff --git a/pg_stats_tools/reports_inputs/top_sql_stats_by_type.sql b/pg_stats_tools/reports_inputs/top_sql_stats_by_type.sql
@@ -1,7 +1,7 @@
 -- Executed for each sql_type
 SELECT
     pg_user.usename AS user,
-    pg_stat_database.datname AS database,
+    {% if dbname =="_all" %} pg_stat_database.datname AS database,{% endif %}
     queryid,
     LEFT(query, 50) AS query,
     {{top_stat_field}}
@@ -12,9 +12,9 @@ FROM pg_stat_statements
 JOIN pg_catalog.pg_user ON pg_stat_statements.userid = pg_catalog.pg_user.usesysid
 JOIN pg_stat_database ON pg_stat_statements.dbid = pg_stat_database.datid
 WHERE
-    query ILIKE '{{sql_type}}%'
+    query ~*  '^\s*{{sql_type}}'
 {% if dbname !="_all" %}
-    AND database = {{database}}
+    AND pg_stat_database.datname = {{database}}
 {% endif %}
 ORDER BY {{top_stat_field}} DESC
 LIMIT {{count}};