From c5c905662d32b188f73637faa03ed925d178420a Mon Sep 17 00:00:00 2001 From: Julien Rouhaud Date: Sat, 14 Sep 2024 15:44:55 +0800 Subject: [PATCH] Track entry creation timestamp This is a direct adaptation of upstream commit for the same feature, commit reference dc9f8a798307244d791b71f666f50de319799e7c. --- expected/{pgsk.out => 01_basic.out} | 0 pg_stat_kcache--2.2.3--2.3.0.sql | 118 ++++++++++++++++++++++++++++ pg_stat_kcache--2.3.0.sql | 12 ++- pg_stat_kcache.c | 68 ++++++++++------ pg_stat_kcache.h | 3 +- test/sql/{pgsk.sql => 01_basic.sql} | 0 6 files changed, 173 insertions(+), 28 deletions(-) rename expected/{pgsk.out => 01_basic.out} (100%) rename test/sql/{pgsk.sql => 01_basic.sql} (100%) diff --git a/expected/pgsk.out b/expected/01_basic.out similarity index 100% rename from expected/pgsk.out rename to expected/01_basic.out diff --git a/pg_stat_kcache--2.2.3--2.3.0.sql b/pg_stat_kcache--2.2.3--2.3.0.sql index 7385211..267f8c0 100644 --- a/pg_stat_kcache--2.2.3--2.3.0.sql +++ b/pg_stat_kcache--2.2.3--2.3.0.sql @@ -7,3 +7,121 @@ -- complain if script is sourced in psql, rather than via CREATE EXTENSION \echo Use "ALTER EXTENSION pg_stat_kcache" to load this file. \quit +DROP VIEW pg_stat_kcache_detail; +DROP VIEW pg_stat_kcache; +DROP FUNCTION pg_stat_kcache(); + +CREATE FUNCTION pg_stat_kcache( + OUT queryid bigint, + OUT top bool, + OUT userid oid, + OUT dbid oid, + /* planning time */ + OUT plan_reads bigint, /* total reads, in bytes */ + OUT plan_writes bigint, /* total writes, in bytes */ + OUT plan_user_time double precision, /* total user CPU time used */ + OUT plan_system_time double precision, /* total system CPU time used */ + OUT plan_minflts bigint, /* total page reclaims (soft page faults) */ + OUT plan_majflts bigint, /* total page faults (hard page faults) */ + OUT plan_nswaps bigint, /* total swaps */ + OUT plan_msgsnds bigint, /* total IPC messages sent */ + OUT plan_msgrcvs bigint, /* total IPC messages received */ + OUT plan_nsignals bigint, /* total signals received */ + OUT plan_nvcsws bigint, /* total voluntary context switches */ + OUT plan_nivcsws bigint, /* total involuntary context switches */ + /* execution time */ + OUT exec_reads bigint, /* total reads, in bytes */ + OUT exec_writes bigint, /* total writes, in bytes */ + OUT exec_user_time double precision, /* total user CPU time used */ + OUT exec_system_time double precision, /* total system CPU time used */ + OUT exec_minflts bigint, /* total page reclaims (soft page faults) */ + OUT exec_majflts bigint, /* total page faults (hard page faults) */ + OUT exec_nswaps bigint, /* total swaps */ + OUT exec_msgsnds bigint, /* total IPC messages sent */ + OUT exec_msgrcvs bigint, /* total IPC messages received */ + OUT exec_nsignals bigint, /* total signals received */ + OUT exec_nvcsws bigint, /* total voluntary context switches */ + OUT exec_nivcsws bigint, /* total involuntary context switches */ + /* metadata */ + OUT stats_since timestamptz /* entry creation time */ +) +RETURNS SETOF record +LANGUAGE c COST 1000 +AS '$libdir/pg_stat_kcache', 'pg_stat_kcache_2_3'; +GRANT ALL ON FUNCTION pg_stat_kcache() TO public; + +CREATE VIEW pg_stat_kcache_detail AS +SELECT s.query, k.top, d.datname, r.rolname, + k.plan_user_time, + k.plan_system_time, + k.plan_minflts, + k.plan_majflts, + k.plan_nswaps, + k.plan_reads AS plan_reads, + k.plan_reads/(current_setting('block_size')::integer) AS plan_reads_blks, + k.plan_writes AS plan_writes, + k.plan_writes/(current_setting('block_size')::integer) AS plan_writes_blks, + k.plan_msgsnds, + k.plan_msgrcvs, + k.plan_nsignals, + k.plan_nvcsws, + k.plan_nivcsws, + k.exec_user_time, + k.exec_system_time, + k.exec_minflts, + k.exec_majflts, + k.exec_nswaps, + k.exec_reads AS exec_reads, + k.exec_reads/(current_setting('block_size')::integer) AS exec_reads_blks, + k.exec_writes AS exec_writes, + k.exec_writes/(current_setting('block_size')::integer) AS exec_writes_blks, + k.exec_msgsnds, + k.exec_msgrcvs, + k.exec_nsignals, + k.exec_nvcsws, + k.exec_nivcsws, + k.stats_since + FROM pg_stat_kcache() k + JOIN pg_stat_statements s + ON k.queryid = s.queryid AND k.dbid = s.dbid AND k.userid = s.userid + JOIN pg_database d + ON d.oid = s.dbid + JOIN pg_roles r + ON r.oid = s.userid; +GRANT SELECT ON pg_stat_kcache_detail TO public; + +CREATE VIEW pg_stat_kcache AS +SELECT datname, + SUM(plan_user_time) AS plan_user_time, + SUM(plan_system_time) AS plan_system_time, + SUM(plan_minflts) AS plan_minflts, + SUM(plan_majflts) AS plan_majflts, + SUM(plan_nswaps) AS plan_nswaps, + SUM(plan_reads) AS plan_reads, + SUM(plan_reads_blks) AS plan_reads_blks, + SUM(plan_writes) AS plan_writes, + SUM(plan_writes_blks) AS plan_writes_blks, + SUM(plan_msgsnds) AS plan_msgsnds, + SUM(plan_msgrcvs) AS plan_msgrcvs, + SUM(plan_nsignals) AS plan_nsignals, + SUM(plan_nvcsws) AS plan_nvcsws, + SUM(plan_nivcsws) AS plan_nivcsws, + SUM(exec_user_time) AS exec_user_time, + SUM(exec_system_time) AS exec_system_time, + SUM(exec_minflts) AS exec_minflts, + SUM(exec_majflts) AS exec_majflts, + SUM(exec_nswaps) AS exec_nswaps, + SUM(exec_reads) AS exec_reads, + SUM(exec_reads_blks) AS exec_reads_blks, + SUM(exec_writes) AS exec_writes, + SUM(exec_writes_blks) AS exec_writes_blks, + SUM(exec_msgsnds) AS exec_msgsnds, + SUM(exec_msgrcvs) AS exec_msgrcvs, + SUM(exec_nsignals) AS exec_nsignals, + SUM(exec_nvcsws) AS exec_nvcsws, + SUM(exec_nivcsws) AS exec_nivcsws, + MIN(stats_since) AS stats_since + FROM pg_stat_kcache_detail + WHERE top IS TRUE + GROUP BY datname; +GRANT SELECT ON pg_stat_kcache TO public; diff --git a/pg_stat_kcache--2.3.0.sql b/pg_stat_kcache--2.3.0.sql index d315059..61b7600 100644 --- a/pg_stat_kcache--2.3.0.sql +++ b/pg_stat_kcache--2.3.0.sql @@ -39,11 +39,13 @@ CREATE FUNCTION pg_stat_kcache( OUT exec_msgrcvs bigint, /* total IPC messages received */ OUT exec_nsignals bigint, /* total signals received */ OUT exec_nvcsws bigint, /* total voluntary context switches */ - OUT exec_nivcsws bigint /* total involuntary context switches */ + OUT exec_nivcsws bigint, /* total involuntary context switches */ + /* metadata */ + OUT stats_since timestamptz /* entry creation time */ ) RETURNS SETOF record LANGUAGE c COST 1000 -AS '$libdir/pg_stat_kcache', 'pg_stat_kcache_2_2'; +AS '$libdir/pg_stat_kcache', 'pg_stat_kcache_2_3'; GRANT ALL ON FUNCTION pg_stat_kcache() TO public; CREATE FUNCTION pg_stat_kcache_reset() @@ -81,7 +83,8 @@ SELECT s.query, k.top, d.datname, r.rolname, k.exec_msgrcvs, k.exec_nsignals, k.exec_nvcsws, - k.exec_nivcsws + k.exec_nivcsws, + k.stats_since FROM pg_stat_kcache() k JOIN pg_stat_statements s ON k.queryid = s.queryid AND k.dbid = s.dbid AND k.userid = s.userid @@ -120,7 +123,8 @@ SELECT datname, SUM(exec_msgrcvs) AS exec_msgrcvs, SUM(exec_nsignals) AS exec_nsignals, SUM(exec_nvcsws) AS exec_nvcsws, - SUM(exec_nivcsws) AS exec_nivcsws + SUM(exec_nivcsws) AS exec_nivcsws, + MIN(stats_since) AS stats_since FROM pg_stat_kcache_detail WHERE top IS TRUE GROUP BY datname; diff --git a/pg_stat_kcache.c b/pg_stat_kcache.c index 7865d87..88bd9c8 100644 --- a/pg_stat_kcache.c +++ b/pg_stat_kcache.c @@ -63,6 +63,7 @@ #if PG_VERSION_NUM >= 160000 #include "utils/pg_rusage.h" #endif +#include "utils/timestamp.h" #include "pg_stat_kcache.h" @@ -108,10 +109,12 @@ typedef enum pgskVersion { PGSK_V2_0 = 0, PGSK_V2_1, - PGSK_V2_2 + PGSK_V2_2, + PGSK_V2_3 } pgskVersion; -static const uint32 PGSK_FILE_HEADER = 0x0d756e11; +/* Magic number identifying the stats file format */ +static const uint32 PGSK_FILE_HEADER = 0x20240914; static struct rusage exec_rusage_start[PGSK_MAX_NESTED_LEVEL]; #if PG_VERSION_NUM >= 130000 @@ -140,6 +143,7 @@ typedef struct pgskEntry pgskHashKey key; /* hash key of entry - MUST BE FIRST */ pgskCounters counters[PGSK_NUMKIND]; /* statistics for this query */ slock_t mutex; /* protects the counters only */ + TimestampTz stats_since; /* timestamp of entry allocation */ } pgskEntry; /* @@ -215,11 +219,13 @@ extern PGDLLEXPORT Datum pg_stat_kcache_reset(PG_FUNCTION_ARGS); extern PGDLLEXPORT Datum pg_stat_kcache(PG_FUNCTION_ARGS); extern PGDLLEXPORT Datum pg_stat_kcache_2_1(PG_FUNCTION_ARGS); extern PGDLLEXPORT Datum pg_stat_kcache_2_2(PG_FUNCTION_ARGS); +extern PGDLLEXPORT Datum pg_stat_kcache_2_3(PG_FUNCTION_ARGS); PG_FUNCTION_INFO_V1(pg_stat_kcache_reset); PG_FUNCTION_INFO_V1(pg_stat_kcache); PG_FUNCTION_INFO_V1(pg_stat_kcache_2_1); PG_FUNCTION_INFO_V1(pg_stat_kcache_2_2); +PG_FUNCTION_INFO_V1(pg_stat_kcache_2_3); static void pg_stat_kcache_internal(FunctionCallInfo fcinfo, pgskVersion api_version); @@ -546,11 +552,13 @@ pgsk_shmem_startup(void) if (fread(&temp, sizeof(pgskEntry), 1, file) != 1) goto error; + /* make the hashtable entry (discards old entries if too many) */ entry = pgsk_entry_alloc(&temp.key); /* copy in the actual stats */ entry->counters[0] = temp.counters[0]; entry->counters[1] = temp.counters[1]; + entry->stats_since = temp.stats_since; /* don't initialize spinlock, already done */ } @@ -815,6 +823,7 @@ static pgskEntry *pgsk_entry_alloc(pgskHashKey *key) entry->counters[0].usage = USAGE_INIT; /* re-initialize the mutex each time ... we assume no one using it */ SpinLockInit(&entry->mutex); + entry->stats_since = GetCurrentTimestamp(); } return entry; @@ -1186,6 +1195,14 @@ pg_stat_kcache_2_2(PG_FUNCTION_ARGS) return (Datum) 0; } +PGDLLEXPORT Datum +pg_stat_kcache_2_3(PG_FUNCTION_ARGS) +{ + pg_stat_kcache_internal(fcinfo, PGSK_V2_3); + + return (Datum) 0; +} + static void pg_stat_kcache_internal(FunctionCallInfo fcinfo, pgskVersion api_version) { @@ -1235,12 +1252,13 @@ pg_stat_kcache_internal(FunctionCallInfo fcinfo, pgskVersion api_version) { Datum values[PG_STAT_KCACHE_COLS]; bool nulls[PG_STAT_KCACHE_COLS]; - pgskCounters tmp; + volatile pgskCounters *tmp; int i = 0; int kind, min_kind = 0; #ifdef HAVE_GETRUSAGE int64 reads, writes; #endif + TimestampTz stats_since; memset(values, 0, sizeof(values)); memset(nulls, 0, sizeof(nulls)); @@ -1255,39 +1273,40 @@ pg_stat_kcache_internal(FunctionCallInfo fcinfo, pgskVersion api_version) if (api_version < PGSK_V2_2) min_kind = 1; - for (kind = min_kind; kind < PGSK_NUMKIND; kind++) + /* copy counters to a local variable to keep locking time short */ { - /* copy counters to a local variable to keep locking time short */ - { - volatile pgskEntry *e = (volatile pgskEntry *) entry; + volatile pgskEntry *e = (volatile pgskEntry *) entry; - SpinLockAcquire(&e->mutex); - tmp = e->counters[kind]; - SpinLockRelease(&e->mutex); - } + SpinLockAcquire(&e->mutex); + tmp = e->counters; + stats_since = e->stats_since; + SpinLockRelease(&e->mutex); + } + for (kind = min_kind; kind < PGSK_NUMKIND; kind++) + { #ifdef HAVE_GETRUSAGE - reads = tmp.reads * RUSAGE_BLOCK_SIZE; - writes = tmp.writes * RUSAGE_BLOCK_SIZE; + reads = tmp[kind].reads * RUSAGE_BLOCK_SIZE; + writes = tmp[kind].writes * RUSAGE_BLOCK_SIZE; values[i++] = Int64GetDatumFast(reads); values[i++] = Int64GetDatumFast(writes); #else nulls[i++] = true; /* reads */ nulls[i++] = true; /* writes */ #endif - values[i++] = Float8GetDatumFast(tmp.utime); - values[i++] = Float8GetDatumFast(tmp.stime); + values[i++] = Float8GetDatumFast(tmp[kind].utime); + values[i++] = Float8GetDatumFast(tmp[kind].stime); if (api_version >= PGSK_V2_1) { #ifdef HAVE_GETRUSAGE - values[i++] = Int64GetDatumFast(tmp.minflts); - values[i++] = Int64GetDatumFast(tmp.majflts); - values[i++] = Int64GetDatumFast(tmp.nswaps); - values[i++] = Int64GetDatumFast(tmp.msgsnds); - values[i++] = Int64GetDatumFast(tmp.msgrcvs); - values[i++] = Int64GetDatumFast(tmp.nsignals); - values[i++] = Int64GetDatumFast(tmp.nvcsws); - values[i++] = Int64GetDatumFast(tmp.nivcsws); + values[i++] = Int64GetDatumFast(tmp[kind].minflts); + values[i++] = Int64GetDatumFast(tmp[kind].majflts); + values[i++] = Int64GetDatumFast(tmp[kind].nswaps); + values[i++] = Int64GetDatumFast(tmp[kind].msgsnds); + values[i++] = Int64GetDatumFast(tmp[kind].msgrcvs); + values[i++] = Int64GetDatumFast(tmp[kind].nsignals); + values[i++] = Int64GetDatumFast(tmp[kind].nvcsws); + values[i++] = Int64GetDatumFast(tmp[kind].nivcsws); #else nulls[i++] = true; /* minflts */ nulls[i++] = true; /* majflts */ @@ -1300,10 +1319,13 @@ pg_stat_kcache_internal(FunctionCallInfo fcinfo, pgskVersion api_version) #endif } } + if (api_version >= PGSK_V2_3) + values[i++] = TimestampTzGetDatum(stats_since); Assert(i == (api_version == PGSK_V2_0 ? PG_STAT_KCACHE_COLS_V2_0 : api_version == PGSK_V2_1 ? PG_STAT_KCACHE_COLS_V2_1 : api_version == PGSK_V2_2 ? PG_STAT_KCACHE_COLS_V2_2 : + api_version == PGSK_V2_3 ? PG_STAT_KCACHE_COLS_V2_3 : -1 /* fail if you forget to update this assert */ )); tuplestore_putvalues(tupstore, tupdesc, values, nulls); diff --git a/pg_stat_kcache.h b/pg_stat_kcache.h index d5329e8..e1d041c 100644 --- a/pg_stat_kcache.h +++ b/pg_stat_kcache.h @@ -4,7 +4,8 @@ #define PG_STAT_KCACHE_COLS_V2_0 7 #define PG_STAT_KCACHE_COLS_V2_1 15 #define PG_STAT_KCACHE_COLS_V2_2 28 -#define PG_STAT_KCACHE_COLS 28 /* maximum of above */ +#define PG_STAT_KCACHE_COLS_V2_3 29 +#define PG_STAT_KCACHE_COLS 29 /* maximum of above */ /* ru_inblock block size is 512 bytes with Linux * see http://lkml.indiana.edu/hypermail/linux/kernel/0703.2/0937.html diff --git a/test/sql/pgsk.sql b/test/sql/01_basic.sql similarity index 100% rename from test/sql/pgsk.sql rename to test/sql/01_basic.sql