From 52752a96812a0646dc7aa408b8c1fb32dafa113b Mon Sep 17 00:00:00 2001 From: Anthonin Bonnefoy Date: Fri, 29 Mar 2024 17:08:11 +0100 Subject: [PATCH] Generate spans from planstate (#4) * Generate spans from planstate During ExecutorEnd, we can process the executed plan to generate spans from the instrumented nodes. * Update tests with the expected planstate spans * Add GUC parameter to enable spans from planstate * pgindent * Correctly handle deparse offset * Output plan counters, parameters and deparse info for span nodes * Avoid cascade in cleanup test * Update wal test with span node expected values * Added tests for planstate * Fix full buffer test expectation * Remove unecessary prototype * Don't instrument query if planstate spans are not requested * Rename planstate spans parameter * Update doc with new parameters * Typo * pgindent * Export exec_nested_level * Correctly propagate parent end time to process_query_desc * Stop node instrumentation on error * Renamed planstate_start to traced_planstate and fix parent link * Update test expectations * Update nested test * Fix typo in deparse help text --- Makefile | 7 +- doc/pg_tracing.md | 11 +- expected/cleanup.out | 7 +- expected/extended.out | 15 +- expected/full_buffer.out | 4 +- expected/insert.out | 8 +- expected/nested.out | 160 ++++++++--- expected/planstate.out | 79 +++++ expected/sample.out | 11 +- expected/select.out | 41 ++- expected/subxact.out | 9 +- expected/trigger.out | 107 ++++--- expected/utility.out | 14 +- expected/wal.out | 33 ++- pg_tracing--0.1.0.sql | 3 +- pg_tracing.conf | 2 +- sql/cleanup.sql | 4 +- sql/full_buffer.sql | 4 +- sql/nested.sql | 119 +++++--- sql/planstate.sql | 31 ++ sql/trigger.sql | 8 +- sql/utility.sql | 2 +- src/pg_tracing.c | 122 +++++++- src/pg_tracing.h | 61 ++++ src/pg_tracing_explain.c | 559 ++++++++++++++++++++++++++++++++++++ src/pg_tracing_planstate.c | 571 +++++++++++++++++++++++++++++++++++++ src/pg_tracing_span.c | 14 + src/version_compat.h | 2 +- 28 files changed, 1818 insertions(+), 190 deletions(-) create mode 100644 expected/planstate.out create mode 100644 sql/planstate.sql create mode 100644 src/pg_tracing_explain.c create mode 100644 src/pg_tracing_planstate.c diff --git a/Makefile b/Makefile index fd00397..d29b13d 100644 --- a/Makefile +++ b/Makefile @@ -14,11 +14,14 @@ OBJS = \ src/pg_tracing.o \ src/pg_tracing_query_process.o \ src/pg_tracing_parallel.o \ + src/pg_tracing_planstate.o \ + src/pg_tracing_explain.o \ src/pg_tracing_span.o REGRESSCHECKS = utility select extended insert trigger sample \ - parallel subxact full_buffer nested wal cleanup -REGRESSCHECKS_OPTS = --no-locale --encoding=UTF8 --temp-config pg_tracing.conf --temp-instance=./tmp_check + planstate parallel subxact full_buffer nested wal \ + cleanup +REGRESSCHECKS_OPTS = --no-locale --encoding=UTF8 --temp-config pg_tracing.conf PGXS := $(shell $(PG_CONFIG) --pgxs) diff --git a/doc/pg_tracing.md b/doc/pg_tracing.md index 7257265..6cf171a 100644 --- a/doc/pg_tracing.md +++ b/doc/pg_tracing.md @@ -85,6 +85,7 @@ The spans generated by the module are made available via a view named `pg_tracin | `jit_optimization_time` | double precision | Total time spent by the node on optimizing, in milliseconds | | `startup` | bigint | Time to the first tuple in nanoseconds | | `parameters` | text | Value of the query's parameters | +| `deparse_info` | text | Information extracted from deparsing a plan node | ## Functions @@ -120,6 +121,10 @@ Controls span buffer's behaviour when `pg_tracing.max_span` spans is reached. If Controls the fraction of statements with SQLCommenter tracecontext and an enabled sampled flag that will generate spans. The default value is 1. +### pg_tracing.deparse_plan (boolean) + +Controls whether the query plan should be deparsed. Deparsing plan allows to add more details in span's name. A `BitmapHeapScan` node may have `Recheck Cond: (a = 1)` as deparsing information. The default value is `on`. + ### pg_tracing.export_parameters (boolean) Controls whether the query's parameters should be exported in spans metadata. The default value is `on`. @@ -149,13 +154,17 @@ FROM pg_shmem_allocations WHERE name ='PgTracing Spans'; ``` +### pg_tracing.planstate_spans (boolean) + +Controls if spans should be generated from the executed query plan. The default value is true. + ### pg_tracing.sample_rate (real) Controls the fraction of statements that generate spans. Statements with tracecontext propagated with SQLCommenter and sampled flag enabled are not impacted by this parameter. For traces with nested statements, either all will be explained or none. The default value is 0. ### pg_tracing.trace_parallel_workers (boolean) -Controls whether spans should be generated for workers created by parallel queries. +Controls whether spans should be generated for workers created by parallel queries. The default value is true. ### pg_tracing.track (enum) diff --git a/expected/cleanup.out b/expected/cleanup.out index 7ab6696..0757394 100644 --- a/expected/cleanup.out +++ b/expected/cleanup.out @@ -2,7 +2,6 @@ DROP TABLE pg_tracing_test; DROP function test_function_project_set; DROP function test_function_result; -DROP EXTENSION pg_tracing CASCADE; -NOTICE: drop cascades to 2 other objects -DETAIL: drop cascades to view peek_spans_with_level -drop cascades to view peek_ordered_spans +DROP VIEW peek_ordered_spans; +DROP VIEW peek_spans_with_level; +DROP EXTENSION pg_tracing; diff --git a/expected/extended.out b/expected/extended.out index 90400d7..6155dc4 100644 --- a/expected/extended.out +++ b/expected/extended.out @@ -13,7 +13,8 @@ SELECT span_type, span_operation, parameters, lvl FROM peek_ordered_spans; Select query | SELECT $1, $2 | $1 = '1', $2 = '2' | 1 Planner | Planner | | 2 Executor | ExecutorRun | | 2 -(3 rows) + Result | Result | | 3 +(4 rows) CALL clean_spans(); -- Trigger an error due to mismatching number of parameters @@ -60,12 +61,14 @@ SELECT span_type, span_operation, parameters, lvl FROM peek_ordered_spans; Select query | SELECT $1 | $1 = '1' | 1 Planner | Planner | | 2 Executor | ExecutorRun | | 2 + Result | Result | | 3 Select query | SELECT $1, $2 | $1 = '2', $2 = '3' | 1 Planner | Planner | | 2 Executor | ExecutorRun | | 2 + Result | Result | | 3 Utility query | COMMIT; | | 1 ProcessUtility | ProcessUtility | | 2 -(10 rows) +(12 rows) CALL clean_spans(); -- Mix extended protocol and simple protocol @@ -104,15 +107,18 @@ SELECT span_type, span_operation, parameters, lvl FROM peek_ordered_spans; Select query | SELECT $1 | $1 = '1' | 1 Planner | Planner | | 2 Executor | ExecutorRun | | 2 + Result | Result | | 3 Select query | SELECT $1, $2, $3; | $1 = 5, $2 = 6, $3 = 7 | 1 Planner | Planner | | 2 Executor | ExecutorRun | | 2 + Result | Result | | 3 Select query | SELECT $1, $2 | $1 = '2', $2 = '3' | 1 Planner | Planner | | 2 Executor | ExecutorRun | | 2 + Result | Result | | 3 Utility query | COMMIT; | | 1 ProcessUtility | ProcessUtility | | 2 -(13 rows) +(16 rows) CALL clean_spans(); -- gdesc calls a single parse command then execute a query. Make sure we handle this case @@ -128,6 +134,7 @@ SELECT span_type, span_operation, parameters, lvl FROM peek_ordered_spans; Select query | SELECT name AS "Column", pg_catalog.format_type(tp, tpm) AS "Type"FROM (VALUES ($1, $2::pg_catalog.oid,$3)) s(name, tp, tpm) | $1 = '?column?', $2 = '23', $3 = -1 | 1 Planner | Planner | | 2 Executor | ExecutorRun | | 2 -(3 rows) + Result | Result | | 3 +(4 rows) CALL clean_spans(); diff --git a/expected/full_buffer.out b/expected/full_buffer.out index 6079a27..5f81f55 100644 --- a/expected/full_buffer.out +++ b/expected/full_buffer.out @@ -25,7 +25,7 @@ select processed_traces, processed_spans, dropped_traces, dropped_spans from pg_ -- Saturate the span buffer. Each call should create at least 2 spans /*dddbs='postgres.db',traceparent='00-00000000000000000000000000000001-0000000000000001-01'*/ CALL loop_select(20); -- Check that we have dropped spans. The trace was still partially processed -select processed_traces = 1, processed_spans = 40, dropped_traces = 0, dropped_spans > 0 from pg_tracing_info(); +select processed_traces = 1, processed_spans = 50, dropped_traces = 0, dropped_spans > 0 from pg_tracing_info(); ?column? | ?column? | ?column? | ?column? ----------+----------+----------+---------- t | t | t | t @@ -45,7 +45,7 @@ select processed_traces = 1, processed_spans = 40, dropped_traces = 0, dropped_s (1 row) -- We should have only one additional dropped trace -select processed_traces = 1, processed_spans = 40, dropped_traces = 1 from pg_tracing_info(); +select processed_traces = 1, processed_spans = 50, dropped_traces = 1 from pg_tracing_info(); ?column? | ?column? | ?column? ----------+----------+---------- t | t | t diff --git a/expected/insert.out b/expected/insert.out index e8b3c69..d429133 100644 --- a/expected/insert.out +++ b/expected/insert.out @@ -19,7 +19,9 @@ SELECT span_type, span_operation from peek_ordered_spans where trace_id='0000000 Insert query | INSERT INTO pg_tracing_test_table_with_constraint VALUES($1, $2); Planner | Planner Executor | ExecutorRun -(3 rows) + Insert | Insert on pg_tracing_test_table_with_constraint + Result | Result +(5 rows) -- Trigger constraint violation /*dddbs='postgres.db',traceparent='00-00000000000000000000000000000003-0000000000000003-01'*/ INSERT INTO pg_tracing_test_table_with_constraint VALUES(1, 'aaa'); @@ -31,7 +33,9 @@ SELECT span_type, span_operation, sql_error_code, lvl from peek_ordered_spans wh Insert query | INSERT INTO pg_tracing_test_table_with_constraint VALUES($1, $2); | 23505 | 1 Planner | Planner | 00000 | 2 Executor | ExecutorRun | 23505 | 2 -(3 rows) + Insert | Insert on pg_tracing_test_table_with_constraint | 23505 | 3 + Result | Result | 23505 | 4 +(5 rows) -- Trigger an error while calling pg_tracing_peek_spans which resets tracing, nothing should be generated CALL clean_spans(); diff --git a/expected/nested.out b/expected/nested.out index 1270487..2012623 100644 --- a/expected/nested.out +++ b/expected/nested.out @@ -20,38 +20,62 @@ LANGUAGE plpgsql; (0 rows) -- The test function call will generate the following spans (non exhaustive list): --- +------------------------------------------------------------+ --- | A: Select test_function_project_set(1); | --- +-+----------+-+------------------------------------------+--+ --- |C: Planner| |E: ExecutorRun | --- +----------+ +-----------+------------------------------+ --- | I: Select a from b where... | --- +---+--------------+-----------+ --- |J: ExecutorRun| --- +--------------+ +-- +---------------------------------------------------------------------------------+ +-- | A: Select test_function_project_set(1); | +-- +-+----------++----------------++-------------------------------------------------+ +-- |B: Planner||C: ExecutorStart||D: ExecutorRun | +-- +----------++----------------+-+---------------------------------------------+ +-- |E: ProjectSet | +-- ++---------+------------------------------+-+ +-- |F: Result| G: Select a from b where... | +-- +---------+--------------+---------------+ +-- |H: ExecutorRun| +-- +--------------+ -- Gather span_id, span start and span end of function call statement SELECT span_id AS span_a_id, get_span_start(span_start) as span_a_start, get_span_end(span_start) as span_a_end from pg_tracing_peek_spans where parent_id='0000000000000051' \gset +SELECT span_id AS span_d_id, + get_span_start(span_start) as span_d_start, + get_span_end(span_start) as span_d_end + from pg_tracing_peek_spans where parent_id=:'span_a_id' and span_type='Executor' and span_operation='ExecutorRun' \gset SELECT span_id AS span_e_id, get_span_start(span_start) as span_e_start, get_span_end(span_start) as span_e_end - from pg_tracing_peek_spans where parent_id=:'span_a_id' and span_type='Executor' and span_operation='ExecutorRun' \gset -SELECT span_id AS span_i_id, - get_span_start(span_start) as span_i_start, - get_span_end(span_start) as span_i_end + from pg_tracing_peek_spans where parent_id=:'span_d_id' and span_type='ProjectSet' \gset +SELECT span_id AS span_g_id, + get_span_start(span_start) as span_g_start, + get_span_end(span_start) as span_g_end + from pg_tracing_peek_spans where parent_id=:'span_e_id' and span_type='Result' \gset +SELECT span_id AS span_g_id, + get_span_start(span_start) as span_g_start, + get_span_end(span_start) as span_g_end from pg_tracing_peek_spans where parent_id=:'span_e_id' and span_type='Select query' \gset -SELECT span_id AS span_j_id, - get_span_start(span_start) as span_j_start, - get_span_end(span_start) as span_j_end - from pg_tracing_peek_spans where parent_id=:'span_i_id' and span_operation='ExecutorRun' \gset +SELECT span_id AS span_h_id, + get_span_start(span_start) as span_h_start, + get_span_end(span_start) as span_h_end + from pg_tracing_peek_spans where parent_id=:'span_g_id' and span_operation='ExecutorRun' \gset -- Check that spans' start and end are within expection -SELECT :span_a_start <= :span_e_start AS top_query_before_run, - :span_a_end >= :span_e_end AS top_ends_after_run_end; - top_query_before_run | top_ends_after_run_end -----------------------+------------------------ - t | t +SELECT :span_a_start <= :span_d_start AS top_query_before_run, + :span_a_end >= :span_d_end AS top_ends_after_run_end, + :span_d_start <= :span_e_start AS top_run_starts_before_project, + :span_d_end >= :span_e_end AS top_run_ends_after_project_end, + :span_d_end >= :span_h_end AS top_run_ends_before_select_end, + :span_d_end >= :span_g_end AS top_run_ends_after_nested_run_end; + top_query_before_run | top_ends_after_run_end | top_run_starts_before_project | top_run_ends_after_project_end | top_run_ends_before_select_end | top_run_ends_after_nested_run_end +----------------------+------------------------+-------------------------------+--------------------------------+--------------------------------+----------------------------------- + t | t | t | t | t | t +(1 row) + +SELECT + :span_g_end >= :span_h_start AS nested_result_ends_before_parse, + :span_h_end <= :span_g_start AS nested_parse_ends_before_select, + :span_h_start >= :span_g_start AS run_starts_after_parent_select, + :span_h_end <= :span_g_end AS run_ends_after_select_end; + nested_result_ends_before_parse | nested_parse_ends_before_select | run_starts_after_parent_select | run_ends_after_select_end +---------------------------------+---------------------------------+--------------------------------+--------------------------- + t | t | t | t (1 row) -- Check that the root span is the longest one @@ -63,6 +87,13 @@ SELECT span_end = max_end.max from pg_tracing_peek_spans, max_end t (1 row) +-- Check that ExecutorRun is attached to the nested top span +SELECT span_operation, deparse_info from pg_tracing_peek_spans where parent_id=:'span_h_id' order by span_operation; + span_operation | deparse_info +----------------------------------------------------+------------------------------ + IndexOnlyScan using pg_class_oid_index on pg_class | Index Cond: (oid = '1'::oid) +(1 row) + -- Check tracking with top tracking SET pg_tracing.track = 'top'; /*dddbs='postgres.db',traceparent='00-00000000000000000000000000000052-0000000000000052-01'*/ select test_function_project_set(1); @@ -73,7 +104,7 @@ SET pg_tracing.track = 'top'; SELECT count(*) from pg_tracing_consume_spans where trace_id='00000000000000000000000000000052'; count ------- - 3 + 5 (1 row) -- Check tracking with no tracking @@ -110,7 +141,8 @@ select span_operation, lvl FROM peek_ordered_spans where trace_id='0000000000000 Planner | 4 Planner | 4 ExecutorRun | 4 -(6 rows) + Result | 5 +(7 rows) -- Test again with utility tracking disabled SET pg_tracing.track_utility=off; @@ -137,17 +169,54 @@ select span_operation, lvl FROM peek_ordered_spans where trace_id='0000000000000 Planner | 2 Planner | 3 ExecutorRun | 2 + Result | 3 +(5 rows) + +-- Create function with generate series +CREATE OR REPLACE FUNCTION test_generate_series(IN anyarray, OUT x anyelement) + RETURNS SETOF anyelement + LANGUAGE sql + AS 'select * from pg_catalog.generate_series(array_lower($1, 1), array_upper($1, 1), 1)'; +/*dddbs='postgres.db',traceparent='00-00000000000000000000000000000057-0000000000000057-01'*/ select test_generate_series('{1,2,3,4}'::int[]) FROM (VALUES (1,2)); + test_generate_series +---------------------- + 1 + 2 + 3 + 4 (4 rows) --- +-------------------------------------------------------------------+ --- | A: Select test_function(1); | --- +-+----------++----------------++-------------------------------+---+ --- |C: Planner||D: ExecutorStart||E: ExecutorRun | --- +----------++----------------+++------------------------------+ --- |H: Insert INTO... | --- +--+--------------+---+ --- |I: ExecutorRun| --- +--------------+ +SELECT span_id AS span_project_set_id, + get_span_start(span_start) as span_project_set_start, + get_span_end(span_start) as span_project_set_end + from pg_tracing_peek_spans where span_type='ProjectSet' \gset +SELECT span_id AS span_result_id, + get_span_start(span_start) as span_result_start, + get_span_end(span_start) as span_result_end + from pg_tracing_peek_spans where parent_id=:'span_project_set_id' and span_type='Result' \gset +select span_operation, lvl FROM peek_ordered_spans where trace_id='00000000000000000000000000000057'; + span_operation | lvl +----------------------------------------------------------------------------------------+----- + select test_generate_series($1::int[]) FROM (VALUES ($2,$3)); | 1 + Planner | 2 + ExecutorRun | 2 + ProjectSet | 3 + Result | 4 + select * from pg_catalog.generate_series(array_lower($1, $2), array_upper($1, $3), $4) | 4 + Planner | 5 +(7 rows) + +-- +-----------------------------------------------------------+ +-- | A: Select test_function(1); | +-- +-+----------+---+--------------------------------------+---+ +-- |B: Planner| |C: ExecutorRun | +-- +----------+ ++-------------------------------------+ +-- |D: Result | +-- +----+-----------------------------+-+ +-- |E: Insert INTO... | +-- +---+--------------+----------+ +-- |F: ExecutorRun| +-- +--------------+ -- Check function with result node /*dddbs='postgres.db',traceparent='00-00000000000000000000000000000058-0000000000000058-01'*/ select test_function_result(1, 'test'); test_function_result @@ -160,24 +229,31 @@ SELECT span_id AS span_a_id, get_span_start(span_start) as span_a_start, get_span_end(span_start) as span_a_end from pg_tracing_peek_spans where parent_id='0000000000000058' \gset +SELECT span_id AS span_c_id, + get_span_start(span_start) as span_c_start, + get_span_end(span_start) as span_c_end + from pg_tracing_peek_spans where parent_id=:'span_a_id' and span_type='Executor' and span_operation='ExecutorRun' \gset +SELECT span_id AS span_d_id, + get_span_start(span_start) as span_d_start, + get_span_end(span_start) as span_d_end + from pg_tracing_peek_spans where parent_id=:'span_c_id' and span_type='Result' \gset SELECT span_id AS span_e_id, get_span_start(span_start) as span_e_start, get_span_end(span_start) as span_e_end - from pg_tracing_peek_spans where parent_id=:'span_a_id' and span_type='Executor' and span_operation='ExecutorRun' \gset -SELECT span_id AS span_h_id, - get_span_start(span_start) as span_h_start, - get_span_end(span_start) as span_h_end - from pg_tracing_peek_spans where parent_id=:'span_e_id' and span_type='Insert query' \gset + from pg_tracing_peek_spans where parent_id=:'span_d_id' and span_type='Insert query' \gset select span_operation, lvl FROM peek_ordered_spans where trace_id='00000000000000000000000000000058'; span_operation | lvl -------------------------------------------------+----- select test_function_result($1, $2); | 1 Planner | 2 ExecutorRun | 2 - INSERT INTO pg_tracing_test(a, b) VALUES (a, b) | 3 - Planner | 4 - ExecutorRun | 4 -(6 rows) + Result | 3 + INSERT INTO pg_tracing_test(a, b) VALUES (a, b) | 4 + Planner | 5 + ExecutorRun | 5 + Insert on pg_tracing_test | 6 + Result | 7 +(9 rows) -- Cleanup CALL clean_spans(); diff --git a/expected/planstate.out b/expected/planstate.out new file mode 100644 index 0000000..4ae2407 --- /dev/null +++ b/expected/planstate.out @@ -0,0 +1,79 @@ +-- Test with planstate_spans disabled +SET pg_tracing.planstate_spans = false; +/*dddbs='postgres.db',traceparent='00-00000000000000000000000000000001-0000000000000001-01'*/ SELECT s.relation_size + s.index_size +FROM (SELECT + pg_relation_size(C.oid) as relation_size, + pg_indexes_size(C.oid) as index_size + FROM pg_class C) as s limit 1; + ?column? +---------- + 0 +(1 row) + +SELECT span_type, span_operation, deparse_info FROM peek_ordered_spans where trace_id='00000000000000000000000000000001'; + span_type | span_operation | deparse_info +--------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------+-------------- + Select query | SELECT s.relation_size + s.index_sizeFROM (SELECT pg_relation_size(C.oid) as relation_size, pg_indexes_size(C.oid) as index_size FROM pg_class C) as s limit $1; | + Planner | Planner | + Executor | ExecutorRun | +(3 rows) + +-- Test with planstate_spans enabled +SET pg_tracing.planstate_spans = true; +/*dddbs='postgres.db',traceparent='00-00000000000000000000000000000002-0000000000000002-01'*/ SELECT s.relation_size + s.index_size +FROM (SELECT + pg_relation_size(C.oid) as relation_size, + pg_indexes_size(C.oid) as index_size + FROM pg_class C) as s limit 1; + ?column? +---------- + 0 +(1 row) + +SELECT span_type, span_operation, deparse_info FROM peek_ordered_spans where trace_id='00000000000000000000000000000002'; + span_type | span_operation | deparse_info +--------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------+-------------- + Select query | SELECT s.relation_size + s.index_sizeFROM (SELECT pg_relation_size(C.oid) as relation_size, pg_indexes_size(C.oid) as index_size FROM pg_class C) as s limit $1; | + Planner | Planner | + Executor | ExecutorRun | + Limit | Limit | + SubqueryScan | SubqueryScan on s | + SeqScan | SeqScan on pg_class c | +(6 rows) + +-- Check generated spans when deparse is disabled +SET pg_tracing.deparse_plan=false; +/*dddbs='postgres.db',traceparent='00-00000000000000000000000000000003-0000000000000003-01'*/ SELECT * from pg_tracing_test where a=1; + a | b +---+--- +(0 rows) + +SELECT span_operation, deparse_info, parameters, lvl from peek_ordered_spans where trace_id='00000000000000000000000000000003'; + span_operation | deparse_info | parameters | lvl +-------------------------------------------+--------------+------------+----- + SELECT * from pg_tracing_test where a=$1; | | $1 = 1 | 1 + Planner | | | 2 + ExecutorRun | | | 2 + BitmapHeapScan on pg_tracing_test | | | 3 + BitmapIndexScan on pg_tracing_index | | | 4 +(5 rows) + +-- Check generated spans when deparse is enabled +SET pg_tracing.deparse_plan=true; +/*dddbs='postgres.db',traceparent='00-00000000000000000000000000000004-0000000000000004-01'*/ SELECT * from pg_tracing_test where a=1; + a | b +---+--- +(0 rows) + +SELECT span_operation, deparse_info, parameters, lvl from peek_ordered_spans where trace_id='00000000000000000000000000000004'; + span_operation | deparse_info | parameters | lvl +-------------------------------------------+-----------------------+------------+----- + SELECT * from pg_tracing_test where a=$1; | | $1 = 1 | 1 + Planner | | | 2 + ExecutorRun | | | 2 + BitmapHeapScan on pg_tracing_test | Recheck Cond: (a = 1) | | 3 + BitmapIndexScan on pg_tracing_index | Index Cond: (a = 1) | | 4 +(5 rows) + +-- Clean created spans +CALL clean_spans(); diff --git a/expected/sample.out b/expected/sample.out index c56c502..be4f4ce 100644 --- a/expected/sample.out +++ b/expected/sample.out @@ -68,16 +68,20 @@ select span_operation, parameters, lvl from peek_ordered_spans; SELECT $1; | $1 = 1 | 1 Planner | | 2 ExecutorRun | | 2 + Result | | 3 SELECT $1; | $1 = 2 | 1 Planner | | 2 ExecutorRun | | 2 + Result | | 3 SELECT $1; | $1 = 3 | 1 Planner | | 2 ExecutorRun | | 2 + Result | | 3 SELECT $1; | $1 = 4 | 1 Planner | | 2 ExecutorRun | | 2 -(12 rows) + Result | | 3 +(16 rows) -- Top spans should reuse generated ids and have trace_id = parent_id select span_operation, parameters from peek_ordered_spans where right(trace_id, 16) = parent_id; @@ -142,13 +146,16 @@ select span_operation, parameters, lvl from peek_ordered_spans; SELECT $1; | $1 = 1 | 1 Planner | | 2 ExecutorRun | | 2 + Result | | 3 SELECT $1; | $1 = 3 | 1 Planner | | 2 ExecutorRun | | 2 + Result | | 3 SELECT $1; | $1 = 4 | 1 Planner | | 2 ExecutorRun | | 2 -(9 rows) + Result | | 3 +(12 rows) -- Cleaning CALL clean_spans(); diff --git a/expected/select.out b/expected/select.out index ab87f92..2d7a69f 100644 --- a/expected/select.out +++ b/expected/select.out @@ -31,7 +31,8 @@ SELECT span_type, span_operation from pg_tracing_peek_spans where trace_id='0000 Select query | SELECT $1; Planner | Planner Executor | ExecutorRun -(3 rows) + Result | Result +(4 rows) -- Check userid SELECT userid = (SELECT usesysid FROM pg_user WHERE usename = current_user) FROM pg_tracing_peek_spans GROUP BY userid; @@ -70,7 +71,9 @@ SELECT span_type, span_operation, lvl FROM peek_ordered_spans where trace_id='00 Select query | SELECT count(*) from current_database(); | 1 Planner | Planner | 2 Executor | ExecutorRun | 2 -(3 rows) + Aggregate | Aggregate | 3 + FunctionScan | FunctionScan on current_database | 4 +(5 rows) -- Check expected reported number of trace SELECT processed_traces = :processed_traces + 1 from pg_tracing_info; @@ -97,7 +100,10 @@ SELECT span_type, span_operation, lvl FROM peek_ordered_spans where trace_id='00 Select query | SELECT s.relation_size + s.index_sizeFROM (SELECT pg_relation_size(C.oid) as relation_size, pg_indexes_size(C.oid) as index_size FROM pg_class C) as s limit $1; | 1 Planner | Planner | 2 Executor | ExecutorRun | 2 -(3 rows) + Limit | Limit | 3 + SubqueryScan | SubqueryScan on s | 4 + SeqScan | SeqScan on pg_class c | 5 +(6 rows) -- Check that we're in a correct state after a timeout set statement_timeout=200; @@ -110,7 +116,8 @@ SELECT span_type, span_operation, sql_error_code, lvl FROM peek_ordered_spans wh Select query | select * from pg_sleep($1); | 57014 | 1 Planner | Planner | 00000 | 2 Executor | ExecutorRun | 57014 | 2 -(3 rows) + FunctionScan | FunctionScan on pg_sleep | 57014 | 3 +(4 rows) -- Cleanup statement setting set statement_timeout=0; @@ -128,7 +135,8 @@ SELECT span_type, span_operation, sql_error_code, lvl FROM peek_ordered_spans wh Select query | select $1; | 00000 | 1 Planner | Planner | 00000 | 2 Executor | ExecutorRun | 00000 | 2 -(3 rows) + Result | Result | 00000 | 3 +(4 rows) -- Cleanup SET plan_cache_mode='auto'; @@ -144,7 +152,8 @@ SELECT span_operation, parameters, lvl from peek_ordered_spans where trace_id='0 select $1 limit $2; | $1 = 1, $2 = 0 | 1 Planner | | 2 ExecutorRun | | 2 -(3 rows) + Limit | | 3 +(4 rows) -- Test multiple statements in a single query /*dddbs='postgres.db',traceparent='00-0000000000000000000000000000000c-000000000000000c-01'*/ select 1; select 2; @@ -164,7 +173,8 @@ SELECT span_operation, parameters, lvl from peek_ordered_spans where trace_id='0 select $1; | $1 = 1 | 1 Planner | | 2 ExecutorRun | | 2 -(3 rows) + Result | | 3 +(4 rows) -- Check that parameters are not exported when disabled SET pg_tracing.export_parameters=false; @@ -180,7 +190,8 @@ SELECT span_operation, parameters, lvl from peek_ordered_spans where trace_id='0 select $1, $2, $3; | | 1 Planner | | 2 ExecutorRun | | 2 -(3 rows) + Result | | 3 +(4 rows) SET pg_tracing.export_parameters=true; -- Check multi statement query @@ -204,7 +215,8 @@ SELECT span_type, span_operation, parameters, lvl from peek_ordered_spans; Select query | SELECT $1; | $1 = 1 | 1 Planner | Planner | | 2 Executor | ExecutorRun | | 2 -(3 rows) + Result | Result | | 3 +(4 rows) CALL clean_spans(); -- Check standalone trace @@ -221,7 +233,8 @@ SELECT count(span_id) from pg_tracing_consume_spans group by span_id; 1 1 1 -(3 rows) + 1 +(4 rows) -- Trigger a planner error SELECT '\xDEADBEEF'::bytea::text::int; @@ -258,9 +271,11 @@ SELECT span_type, span_operation, parameters, lvl from peek_ordered_spans; Select query | SELECT lazy_function($1::int[]) FROM (VALUES ($2,$3)); | $1 = '{1,2,3,4}', $2 = 1, $3 = 2 | 1 Planner | Planner | | 2 Executor | ExecutorRun | | 2 - Select query | select * from pg_catalog.generate_series(array_lower($1, $2), array_upper($1, $3), $4) | $1 = 1, $2 = 1, $3 = 1 | 3 - Planner | Planner | | 4 -(7 rows) + ProjectSet | ProjectSet | | 3 + Result | Result | | 4 + Select query | select * from pg_catalog.generate_series(array_lower($1, $2), array_upper($1, $3), $4) | $1 = 1, $2 = 1, $3 = 1 | 4 + Planner | Planner | | 5 +(9 rows) -- Cleanup SET pg_tracing.sample_rate = 0.0; diff --git a/expected/subxact.out b/expected/subxact.out index 471acf9..6644a19 100644 --- a/expected/subxact.out +++ b/expected/subxact.out @@ -25,19 +25,26 @@ select span_operation, parameters, subxact_count, lvl FROM peek_ordered_spans; INSERT INTO pg_tracing_test VALUES(generate_series($1, $2), $3); | $1 = 1, $2 = 2, $3 = 'aaa' | 0 | 1 Planner | | 0 | 2 ExecutorRun | | 0 | 2 + Insert on pg_tracing_test | | 1 | 3 + ProjectSet | | 1 | 4 + Result | | 1 | 5 SAVEPOINT s2; | | 1 | 1 ProcessUtility | | 1 | 2 INSERT INTO pg_tracing_test VALUES(generate_series($1, $2), $3); | $1 = 1, $2 = 2, $3 = 'aaa' | 1 | 1 Planner | | 1 | 2 ExecutorRun | | 1 | 2 + Insert on pg_tracing_test | | 2 | 3 + ProjectSet | | 2 | 4 + Result | | 2 | 5 SAVEPOINT s3; | | 2 | 1 ProcessUtility | | 2 | 2 SELECT $1; | $1 = 1 | 2 | 1 Planner | | 2 | 2 ExecutorRun | | 2 | 2 + Result | | 2 | 3 COMMIT; | | 2 | 1 ProcessUtility | | 2 | 2 -(19 rows) +(26 rows) -- Cleaning CALL clean_spans(); diff --git a/expected/trigger.out b/expected/trigger.out index 9c0b11a..ba34fc6 100644 --- a/expected/trigger.out +++ b/expected/trigger.out @@ -140,14 +140,20 @@ SELECT span_type, span_operation, lvl from peek_ordered_spans where trace_id='00 Insert query | INSERT INTO Employee VALUES($1,$2); | 1 Planner | Planner | 2 Executor | ExecutorRun | 2 + Insert | Insert on employee | 3 + Result | Result | 4 Executor | ExecutorFinish | 2 Insert query | INSERT INTO Employee_Audit (EmployeeId, LastName, EmpAdditionTime) VALUES(NEW.EmployeeId,NEW.LastName,current_date) | 3 Planner | Planner | 4 Executor | ExecutorRun | 4 + Insert | Insert on employee_audit | 5 + Result | Result | 6 Insert query | INSERT INTO Employee_Audit (EmployeeId, LastName, EmpAdditionTime) VALUES(NEW.EmployeeId,NEW.LastName,current_date) | 3 Planner | Planner | 4 Executor | ExecutorRun | 4 -(10 rows) + Insert | Insert on employee_audit | 5 + Result | Result | 6 +(16 rows) -- Check count of query_id SELECT count(distinct query_id) from pg_tracing_consume_spans where trace_id='00000000000000000000000000000001'; @@ -183,59 +189,72 @@ CREATE TRIGGER employee_delete_trigger -> Result (cost=0.00..0.01 rows=1 width=4) (2 rows) -SELECT span_type, span_operation, lvl from peek_ordered_spans where trace_id='fed00000000000000000000000000001'; - span_type | span_operation | lvl -----------------+------------------------------------------------------+----- - Utility query | explain INSERT INTO before_trigger_table VALUES($1); | 1 - ProcessUtility | ProcessUtility | 2 - Insert query | explain INSERT INTO before_trigger_table VALUES($1); | 3 - Planner | Planner | 4 +SELECT trace_id, span_type, span_operation, lvl from peek_ordered_spans where trace_id='fed00000000000000000000000000001'; + trace_id | span_type | span_operation | lvl +----------------------------------+----------------+------------------------------------------------------+----- + fed00000000000000000000000000001 | Utility query | explain INSERT INTO before_trigger_table VALUES($1); | 1 + fed00000000000000000000000000001 | ProcessUtility | ProcessUtility | 2 + fed00000000000000000000000000001 | Insert query | explain INSERT INTO before_trigger_table VALUES($1); | 3 + fed00000000000000000000000000001 | Planner | Planner | 4 (4 rows) -- Call before trigger insert /*dddbs='postgres.db',traceparent='00-00000000000000000000000000000002-0000000000000002-01'*/ INSERT INTO before_trigger_table SELECT generate_series(1,2); -SELECT span_type, span_operation, lvl from peek_ordered_spans where trace_id='00000000000000000000000000000002'; - span_type | span_operation | lvl ---------------+-----------------------------------------------------------------+----- - Insert query | INSERT INTO before_trigger_table SELECT generate_series($1,$2); | 1 - Planner | Planner | 2 - Executor | ExecutorRun | 2 - Select query | SELECT $1 | 3 - Planner | Planner | 4 - Executor | ExecutorRun | 4 - Select query | SELECT 'SELECT 1' | 3 - Executor | ExecutorRun | 4 -(8 rows) +SELECT trace_id, span_type, span_operation, lvl from peek_ordered_spans where trace_id='00000000000000000000000000000002'; + trace_id | span_type | span_operation | lvl +----------------------------------+--------------+-----------------------------------------------------------------+----- + 00000000000000000000000000000002 | Insert query | INSERT INTO before_trigger_table SELECT generate_series($1,$2); | 1 + 00000000000000000000000000000002 | Planner | Planner | 2 + 00000000000000000000000000000002 | Executor | ExecutorRun | 2 + 00000000000000000000000000000002 | Insert | Insert on before_trigger_table | 3 + 00000000000000000000000000000002 | ProjectSet | ProjectSet | 4 + 00000000000000000000000000000002 | Result | Result | 5 + 00000000000000000000000000000002 | Select query | SELECT $1 | 5 + 00000000000000000000000000000002 | Planner | Planner | 6 + 00000000000000000000000000000002 | Executor | ExecutorRun | 6 + 00000000000000000000000000000002 | Result | Result | 7 + 00000000000000000000000000000002 | Select query | SELECT 'SELECT 1' | 5 + 00000000000000000000000000000002 | Executor | ExecutorRun | 6 + 00000000000000000000000000000002 | Result | Result | 7 +(13 rows) -- Call before trigger update /*dddbs='postgres.db',traceparent='00-00000000000000000000000000000003-0000000000000003-01'*/ UPDATE before_trigger_table set a=1; -SELECT span_type, span_operation, lvl from peek_ordered_spans where trace_id='00000000000000000000000000000003'; - span_type | span_operation | lvl ---------------+---------------------------------------+----- - Update query | UPDATE before_trigger_table set a=$1; | 1 - Planner | Planner | 2 - Executor | ExecutorRun | 2 - Select query | SELECT $1 | 3 - Planner | Planner | 4 - Executor | ExecutorRun | 4 - Select query | SELECT 'SELECT 1' | 3 - Executor | ExecutorRun | 4 -(8 rows) +SELECT trace_id, span_type, span_operation, lvl from peek_ordered_spans where trace_id='00000000000000000000000000000003'; + trace_id | span_type | span_operation | lvl +----------------------------------+--------------+---------------------------------------+----- + 00000000000000000000000000000003 | Update query | UPDATE before_trigger_table set a=$1; | 1 + 00000000000000000000000000000003 | Planner | Planner | 2 + 00000000000000000000000000000003 | Executor | ExecutorRun | 2 + 00000000000000000000000000000003 | Update | Update on before_trigger_table | 3 + 00000000000000000000000000000003 | SeqScan | SeqScan on before_trigger_table | 4 + 00000000000000000000000000000003 | Select query | SELECT $1 | 3 + 00000000000000000000000000000003 | Planner | Planner | 4 + 00000000000000000000000000000003 | Executor | ExecutorRun | 4 + 00000000000000000000000000000003 | Result | Result | 5 + 00000000000000000000000000000003 | Select query | SELECT 'SELECT 1' | 3 + 00000000000000000000000000000003 | Executor | ExecutorRun | 4 + 00000000000000000000000000000003 | Result | Result | 5 +(12 rows) -- Call before trigger delete /*dddbs='postgres.db',traceparent='00-00000000000000000000000000000004-0000000000000004-01'*/ DELETE FROM before_trigger_table where a=1; -SELECT span_type, span_operation, lvl from peek_ordered_spans where trace_id='00000000000000000000000000000004'; - span_type | span_operation | lvl ---------------+----------------------------------------------+----- - Delete query | DELETE FROM before_trigger_table where a=$1; | 1 - Planner | Planner | 2 - Executor | ExecutorRun | 2 - Select query | SELECT $1 | 3 - Planner | Planner | 4 - Executor | ExecutorRun | 4 - Select query | SELECT 'SELECT 1' | 3 - Executor | ExecutorRun | 4 -(8 rows) +SELECT trace_id, span_type, span_operation, lvl from peek_ordered_spans where trace_id='00000000000000000000000000000004'; + trace_id | span_type | span_operation | lvl +----------------------------------+--------------+----------------------------------------------+----- + 00000000000000000000000000000004 | Delete query | DELETE FROM before_trigger_table where a=$1; | 1 + 00000000000000000000000000000004 | Planner | Planner | 2 + 00000000000000000000000000000004 | Executor | ExecutorRun | 2 + 00000000000000000000000000000004 | Delete | Delete on before_trigger_table | 3 + 00000000000000000000000000000004 | SeqScan | SeqScan on before_trigger_table | 4 + 00000000000000000000000000000004 | Select query | SELECT $1 | 3 + 00000000000000000000000000000004 | Planner | Planner | 4 + 00000000000000000000000000000004 | Executor | ExecutorRun | 4 + 00000000000000000000000000000004 | Result | Result | 5 + 00000000000000000000000000000004 | Select query | SELECT 'SELECT 1' | 3 + 00000000000000000000000000000004 | Executor | ExecutorRun | 4 + 00000000000000000000000000000004 | Result | Result | 5 +(12 rows) -- Check count of query_id SELECT count(distinct query_id) from pg_tracing_consume_spans; diff --git a/expected/utility.out b/expected/utility.out index bf4d787..de9c85f 100644 --- a/expected/utility.out +++ b/expected/utility.out @@ -84,7 +84,7 @@ CREATE EXTENSION pg_tracing; SET pg_tracing.sample_rate = 0.0; -- View displaying spans with their nested level CREATE VIEW peek_spans_with_level AS - WITH RECURSIVE list_trace_spans(trace_id, parent_id, span_id, query_id, span_type, span_operation, span_start, span_end, sql_error_code, userid, dbid, pid, subxact_count, plan_startup_cost, plan_total_cost, plan_rows, plan_width, rows, nloops, shared_blks_hit, shared_blks_read, shared_blks_dirtied, shared_blks_written, local_blks_hit, local_blks_read, local_blks_dirtied, local_blks_written, blk_read_time, blk_write_time, temp_blks_read, temp_blks_written, temp_blk_read_time, temp_blk_write_time, wal_records, wal_fpi, wal_bytes, jit_functions, jit_generation_time, jit_inlining_time, jit_optimization_time, jit_emission_time, startup, parameters, lvl) AS ( + WITH RECURSIVE list_trace_spans(trace_id, parent_id, span_id, query_id, span_type, span_operation, span_start, span_end, sql_error_code, userid, dbid, pid, subxact_count, plan_startup_cost, plan_total_cost, plan_rows, plan_width, rows, nloops, shared_blks_hit, shared_blks_read, shared_blks_dirtied, shared_blks_written, local_blks_hit, local_blks_read, local_blks_dirtied, local_blks_written, blk_read_time, blk_write_time, temp_blks_read, temp_blks_written, temp_blk_read_time, temp_blk_write_time, wal_records, wal_fpi, wal_bytes, jit_functions, jit_generation_time, jit_inlining_time, jit_optimization_time, jit_emission_time, startup, parameters, deparse_info, lvl) AS ( SELECT p.*, 1 FROM pg_tracing_peek_spans p where not parent_id=ANY(SELECT span_id from pg_tracing_peek_spans) UNION ALL @@ -166,7 +166,8 @@ select span_operation, parameters, lvl from peek_ordered_spans; PREPARE test_prepared_one_param_2 (integer) AS SELECT $1; | $1 = '100' | 3 Planner | | 4 ExecutorRun | | 4 -(8 rows) + Result | | 5 +(9 rows) -- Check the top span (standalone top span has trace_id=parent_id) select span_operation, parameters, lvl from peek_ordered_spans where right(trace_id, 16) = parent_id; @@ -203,9 +204,10 @@ select span_operation, parameters, lvl from peek_ordered_spans; ProcessUtility | | 2 PREPARE test_prepared_one_param (integer) AS SELECT $1; | | 3 ExecutorRun | | 4 + Result | | 5 SET plan_cache_mode TO DEFAULT; | | 1 ProcessUtility | | 2 -(8 rows) +(9 rows) -- Check the top span (standalone top span has trace_id=parent_id) select span_operation, parameters, lvl from peek_ordered_spans where right(trace_id, 16) = parent_id; @@ -262,8 +264,10 @@ select trace_id, span_type, span_operation, sql_error_code, lvl from peek_ordere 00000000000000000000000000000004 | Select query | select function_with_error($1::int[]); | 42P13 | 1 00000000000000000000000000000004 | Planner | Planner | 00000 | 2 00000000000000000000000000000004 | Executor | ExecutorRun | 42P13 | 2 - 00000000000000000000000000000004 | Select query | select s from pg_catalog.generate_series($1, $2, $3) as g(s) | 00000 | 3 -(4 rows) + 00000000000000000000000000000004 | ProjectSet | ProjectSet | 42P13 | 3 + 00000000000000000000000000000004 | Result | Result | 42P13 | 4 + 00000000000000000000000000000004 | Select query | select s from pg_catalog.generate_series($1, $2, $3) as g(s) | 00000 | 4 +(6 rows) -- Cleanup CALL clean_spans(); diff --git a/expected/wal.out b/expected/wal.out index 9e8fa02..19c8b7c 100644 --- a/expected/wal.out +++ b/expected/wal.out @@ -7,18 +7,27 @@ SELECT trace_id, span_type, span_operation, wal_records > 0 as wal_records, wal_bytes > 0 as wal_bytes FROM peek_ordered_spans; - trace_id | span_type | span_operation | wal_records | wal_bytes -----------------------------------+--------------+------------------------------------------------------------------+-------------+----------- - 00000000000000000000000000000001 | Insert query | INSERT INTO pg_tracing_test VALUES(generate_series($1, $2), $3); | t | t - 00000000000000000000000000000001 | Planner | Planner | f | f - 00000000000000000000000000000001 | Executor | ExecutorRun | | - 00000000000000000000000000000002 | Update query | UPDATE pg_tracing_test SET b = $1 WHERE a = $2; | t | t - 00000000000000000000000000000002 | Planner | Planner | f | f - 00000000000000000000000000000002 | Executor | ExecutorRun | | - 00000000000000000000000000000003 | Delete query | DELETE FROM pg_tracing_test WHERE a = $1; | t | t - 00000000000000000000000000000003 | Planner | Planner | f | f - 00000000000000000000000000000003 | Executor | ExecutorRun | | -(9 rows) + trace_id | span_type | span_operation | wal_records | wal_bytes +----------------------------------+-----------------+------------------------------------------------------------------+-------------+----------- + 00000000000000000000000000000001 | Insert query | INSERT INTO pg_tracing_test VALUES(generate_series($1, $2), $3); | t | t + 00000000000000000000000000000001 | Planner | Planner | f | f + 00000000000000000000000000000001 | Executor | ExecutorRun | | + 00000000000000000000000000000001 | Insert | Insert on pg_tracing_test | t | t + 00000000000000000000000000000001 | ProjectSet | ProjectSet | f | f + 00000000000000000000000000000001 | Result | Result | f | f + 00000000000000000000000000000002 | Update query | UPDATE pg_tracing_test SET b = $1 WHERE a = $2; | t | t + 00000000000000000000000000000002 | Planner | Planner | f | f + 00000000000000000000000000000002 | Executor | ExecutorRun | | + 00000000000000000000000000000002 | Update | Update on pg_tracing_test | t | t + 00000000000000000000000000000002 | BitmapHeapScan | BitmapHeapScan on pg_tracing_test | f | f + 00000000000000000000000000000002 | BitmapIndexScan | BitmapIndexScan on pg_tracing_index | f | f + 00000000000000000000000000000003 | Delete query | DELETE FROM pg_tracing_test WHERE a = $1; | t | t + 00000000000000000000000000000003 | Planner | Planner | f | f + 00000000000000000000000000000003 | Executor | ExecutorRun | | + 00000000000000000000000000000003 | Delete | Delete on pg_tracing_test | t | t + 00000000000000000000000000000003 | BitmapHeapScan | BitmapHeapScan on pg_tracing_test | f | f + 00000000000000000000000000000003 | BitmapIndexScan | BitmapIndexScan on pg_tracing_index | f | f +(18 rows) CALL clean_spans(); -- Cleanup diff --git a/pg_tracing--0.1.0.sql b/pg_tracing--0.1.0.sql index 6567007..3ce2c0a 100644 --- a/pg_tracing--0.1.0.sql +++ b/pg_tracing--0.1.0.sql @@ -77,7 +77,8 @@ CREATE FUNCTION pg_tracing_spans( -- Span node specific data OUT startup bigint, -- First tuple - OUT parameters text + OUT parameters text, + OUT deparse_info text ) RETURNS SETOF record AS 'MODULE_PATHNAME' diff --git a/pg_tracing.conf b/pg_tracing.conf index ab40cd2..3a91c1d 100644 --- a/pg_tracing.conf +++ b/pg_tracing.conf @@ -1,2 +1,2 @@ shared_preload_libraries = 'pg_tracing' -pg_tracing.max_span = 40 +pg_tracing.max_span = 50 diff --git a/sql/cleanup.sql b/sql/cleanup.sql index 43b5483..0757394 100644 --- a/sql/cleanup.sql +++ b/sql/cleanup.sql @@ -2,4 +2,6 @@ DROP TABLE pg_tracing_test; DROP function test_function_project_set; DROP function test_function_result; -DROP EXTENSION pg_tracing CASCADE; +DROP VIEW peek_ordered_spans; +DROP VIEW peek_spans_with_level; +DROP EXTENSION pg_tracing; diff --git a/sql/full_buffer.sql b/sql/full_buffer.sql index d046b32..ea130cf 100644 --- a/sql/full_buffer.sql +++ b/sql/full_buffer.sql @@ -17,14 +17,14 @@ select processed_traces, processed_spans, dropped_traces, dropped_spans from pg_ -- Saturate the span buffer. Each call should create at least 2 spans /*dddbs='postgres.db',traceparent='00-00000000000000000000000000000001-0000000000000001-01'*/ CALL loop_select(20); -- Check that we have dropped spans. The trace was still partially processed -select processed_traces = 1, processed_spans = 40, dropped_traces = 0, dropped_spans > 0 from pg_tracing_info(); +select processed_traces = 1, processed_spans = 50, dropped_traces = 0, dropped_spans > 0 from pg_tracing_info(); -- Try to create new traces while buffer is full /*dddbs='postgres.db',traceparent='00-00000000000000000000000000000002-0000000000000002-01'*/ SELECT 1; /*dddbs='postgres.db',traceparent='00-00000000000000000000000000000003-0000000000000003-00'*/ SELECT 1; -- We should have only one additional dropped trace -select processed_traces = 1, processed_spans = 40, dropped_traces = 1 from pg_tracing_info(); +select processed_traces = 1, processed_spans = 50, dropped_traces = 1 from pg_tracing_info(); -- Clean current spans CALL clean_spans(); diff --git a/sql/nested.sql b/sql/nested.sql index 1f0af18..02040e2 100644 --- a/sql/nested.sql +++ b/sql/nested.sql @@ -20,43 +20,69 @@ LANGUAGE plpgsql; /*dddbs='postgres.db',traceparent='00-00000000000000000000000000000051-0000000000000051-01'*/ select test_function_project_set(1); -- The test function call will generate the following spans (non exhaustive list): --- +------------------------------------------------------------+ --- | A: Select test_function_project_set(1); | --- +-+----------+-+------------------------------------------+--+ --- |C: Planner| |E: ExecutorRun | --- +----------+ +-----------+------------------------------+ --- | I: Select a from b where... | --- +---+--------------+-----------+ --- |J: ExecutorRun| --- +--------------+ +-- +---------------------------------------------------------------------------------+ +-- | A: Select test_function_project_set(1); | +-- +-+----------++----------------++-------------------------------------------------+ +-- |B: Planner||C: ExecutorStart||D: ExecutorRun | +-- +----------++----------------+-+---------------------------------------------+ +-- |E: ProjectSet | +-- ++---------+------------------------------+-+ +-- |F: Result| G: Select a from b where... | +-- +---------+--------------+---------------+ +-- |H: ExecutorRun| +-- +--------------+ -- Gather span_id, span start and span end of function call statement SELECT span_id AS span_a_id, get_span_start(span_start) as span_a_start, get_span_end(span_start) as span_a_end from pg_tracing_peek_spans where parent_id='0000000000000051' \gset +SELECT span_id AS span_d_id, + get_span_start(span_start) as span_d_start, + get_span_end(span_start) as span_d_end + from pg_tracing_peek_spans where parent_id=:'span_a_id' and span_type='Executor' and span_operation='ExecutorRun' \gset SELECT span_id AS span_e_id, get_span_start(span_start) as span_e_start, get_span_end(span_start) as span_e_end - from pg_tracing_peek_spans where parent_id=:'span_a_id' and span_type='Executor' and span_operation='ExecutorRun' \gset -SELECT span_id AS span_i_id, - get_span_start(span_start) as span_i_start, - get_span_end(span_start) as span_i_end + from pg_tracing_peek_spans where parent_id=:'span_d_id' and span_type='ProjectSet' \gset +SELECT span_id AS span_g_id, + get_span_start(span_start) as span_g_start, + get_span_end(span_start) as span_g_end + from pg_tracing_peek_spans where parent_id=:'span_e_id' and span_type='Result' \gset +SELECT span_id AS span_g_id, + get_span_start(span_start) as span_g_start, + get_span_end(span_start) as span_g_end from pg_tracing_peek_spans where parent_id=:'span_e_id' and span_type='Select query' \gset -SELECT span_id AS span_j_id, - get_span_start(span_start) as span_j_start, - get_span_end(span_start) as span_j_end - from pg_tracing_peek_spans where parent_id=:'span_i_id' and span_operation='ExecutorRun' \gset +SELECT span_id AS span_h_id, + get_span_start(span_start) as span_h_start, + get_span_end(span_start) as span_h_end + from pg_tracing_peek_spans where parent_id=:'span_g_id' and span_operation='ExecutorRun' \gset -- Check that spans' start and end are within expection -SELECT :span_a_start <= :span_e_start AS top_query_before_run, - :span_a_end >= :span_e_end AS top_ends_after_run_end; +SELECT :span_a_start <= :span_d_start AS top_query_before_run, + :span_a_end >= :span_d_end AS top_ends_after_run_end, + + :span_d_start <= :span_e_start AS top_run_starts_before_project, + + :span_d_end >= :span_e_end AS top_run_ends_after_project_end, + :span_d_end >= :span_h_end AS top_run_ends_before_select_end, + :span_d_end >= :span_g_end AS top_run_ends_after_nested_run_end; + +SELECT + :span_g_end >= :span_h_start AS nested_result_ends_before_parse, + :span_h_end <= :span_g_start AS nested_parse_ends_before_select, + + :span_h_start >= :span_g_start AS run_starts_after_parent_select, + :span_h_end <= :span_g_end AS run_ends_after_select_end; -- Check that the root span is the longest one WITH max_end AS (select max(span_end) from pg_tracing_peek_spans) SELECT span_end = max_end.max from pg_tracing_peek_spans, max_end where span_id = :'span_a_id'; +-- Check that ExecutorRun is attached to the nested top span +SELECT span_operation, deparse_info from pg_tracing_peek_spans where parent_id=:'span_h_id' order by span_operation; + -- Check tracking with top tracking SET pg_tracing.track = 'top'; /*dddbs='postgres.db',traceparent='00-00000000000000000000000000000052-0000000000000052-01'*/ select test_function_project_set(1); @@ -96,15 +122,38 @@ LANGUAGE sql IMMUTABLE; /*dddbs='postgres.db',traceparent='00-00000000000000000000000000000056-0000000000000056-01'*/ select test_immutable_function(1); select span_operation, lvl FROM peek_ordered_spans where trace_id='00000000000000000000000000000056'; --- +-------------------------------------------------------------------+ --- | A: Select test_function(1); | --- +-+----------++----------------++-------------------------------+---+ --- |C: Planner||D: ExecutorStart||E: ExecutorRun | --- +----------++----------------+++------------------------------+ --- |H: Insert INTO... | --- +--+--------------+---+ --- |I: ExecutorRun| --- +--------------+ +-- Create function with generate series + +CREATE OR REPLACE FUNCTION test_generate_series(IN anyarray, OUT x anyelement) + RETURNS SETOF anyelement + LANGUAGE sql + AS 'select * from pg_catalog.generate_series(array_lower($1, 1), array_upper($1, 1), 1)'; + +/*dddbs='postgres.db',traceparent='00-00000000000000000000000000000057-0000000000000057-01'*/ select test_generate_series('{1,2,3,4}'::int[]) FROM (VALUES (1,2)); + +SELECT span_id AS span_project_set_id, + get_span_start(span_start) as span_project_set_start, + get_span_end(span_start) as span_project_set_end + from pg_tracing_peek_spans where span_type='ProjectSet' \gset + +SELECT span_id AS span_result_id, + get_span_start(span_start) as span_result_start, + get_span_end(span_start) as span_result_end + from pg_tracing_peek_spans where parent_id=:'span_project_set_id' and span_type='Result' \gset + +select span_operation, lvl FROM peek_ordered_spans where trace_id='00000000000000000000000000000057'; + +-- +-----------------------------------------------------------+ +-- | A: Select test_function(1); | +-- +-+----------+---+--------------------------------------+---+ +-- |B: Planner| |C: ExecutorRun | +-- +----------+ ++-------------------------------------+ +-- |D: Result | +-- +----+-----------------------------+-+ +-- |E: Insert INTO... | +-- +---+--------------+----------+ +-- |F: ExecutorRun| +-- +--------------+ -- Check function with result node /*dddbs='postgres.db',traceparent='00-00000000000000000000000000000058-0000000000000058-01'*/ select test_function_result(1, 'test'); @@ -114,14 +163,18 @@ SELECT span_id AS span_a_id, get_span_start(span_start) as span_a_start, get_span_end(span_start) as span_a_end from pg_tracing_peek_spans where parent_id='0000000000000058' \gset +SELECT span_id AS span_c_id, + get_span_start(span_start) as span_c_start, + get_span_end(span_start) as span_c_end + from pg_tracing_peek_spans where parent_id=:'span_a_id' and span_type='Executor' and span_operation='ExecutorRun' \gset +SELECT span_id AS span_d_id, + get_span_start(span_start) as span_d_start, + get_span_end(span_start) as span_d_end + from pg_tracing_peek_spans where parent_id=:'span_c_id' and span_type='Result' \gset SELECT span_id AS span_e_id, get_span_start(span_start) as span_e_start, get_span_end(span_start) as span_e_end - from pg_tracing_peek_spans where parent_id=:'span_a_id' and span_type='Executor' and span_operation='ExecutorRun' \gset -SELECT span_id AS span_h_id, - get_span_start(span_start) as span_h_start, - get_span_end(span_start) as span_h_end - from pg_tracing_peek_spans where parent_id=:'span_e_id' and span_type='Insert query' \gset + from pg_tracing_peek_spans where parent_id=:'span_d_id' and span_type='Insert query' \gset select span_operation, lvl FROM peek_ordered_spans where trace_id='00000000000000000000000000000058'; diff --git a/sql/planstate.sql b/sql/planstate.sql new file mode 100644 index 0000000..74a96f9 --- /dev/null +++ b/sql/planstate.sql @@ -0,0 +1,31 @@ + +-- Test with planstate_spans disabled +SET pg_tracing.planstate_spans = false; +/*dddbs='postgres.db',traceparent='00-00000000000000000000000000000001-0000000000000001-01'*/ SELECT s.relation_size + s.index_size +FROM (SELECT + pg_relation_size(C.oid) as relation_size, + pg_indexes_size(C.oid) as index_size + FROM pg_class C) as s limit 1; +SELECT span_type, span_operation, deparse_info FROM peek_ordered_spans where trace_id='00000000000000000000000000000001'; + +-- Test with planstate_spans enabled +SET pg_tracing.planstate_spans = true; +/*dddbs='postgres.db',traceparent='00-00000000000000000000000000000002-0000000000000002-01'*/ SELECT s.relation_size + s.index_size +FROM (SELECT + pg_relation_size(C.oid) as relation_size, + pg_indexes_size(C.oid) as index_size + FROM pg_class C) as s limit 1; +SELECT span_type, span_operation, deparse_info FROM peek_ordered_spans where trace_id='00000000000000000000000000000002'; + +-- Check generated spans when deparse is disabled +SET pg_tracing.deparse_plan=false; +/*dddbs='postgres.db',traceparent='00-00000000000000000000000000000003-0000000000000003-01'*/ SELECT * from pg_tracing_test where a=1; +SELECT span_operation, deparse_info, parameters, lvl from peek_ordered_spans where trace_id='00000000000000000000000000000003'; + +-- Check generated spans when deparse is enabled +SET pg_tracing.deparse_plan=true; +/*dddbs='postgres.db',traceparent='00-00000000000000000000000000000004-0000000000000004-01'*/ SELECT * from pg_tracing_test where a=1; +SELECT span_operation, deparse_info, parameters, lvl from peek_ordered_spans where trace_id='00000000000000000000000000000004'; + +-- Clean created spans +CALL clean_spans(); diff --git a/sql/trigger.sql b/sql/trigger.sql index bc995f2..5b14d5c 100644 --- a/sql/trigger.sql +++ b/sql/trigger.sql @@ -153,19 +153,19 @@ CREATE TRIGGER employee_delete_trigger -- Test explain on a before trigger -- This will go through ExecutorEnd without any parent executor run /*dddbs='postgres.db',traceparent='00-fed00000000000000000000000000001-0000000000000003-01'*/ explain INSERT INTO before_trigger_table VALUES(10); -SELECT span_type, span_operation, lvl from peek_ordered_spans where trace_id='fed00000000000000000000000000001'; +SELECT trace_id, span_type, span_operation, lvl from peek_ordered_spans where trace_id='fed00000000000000000000000000001'; -- Call before trigger insert /*dddbs='postgres.db',traceparent='00-00000000000000000000000000000002-0000000000000002-01'*/ INSERT INTO before_trigger_table SELECT generate_series(1,2); -SELECT span_type, span_operation, lvl from peek_ordered_spans where trace_id='00000000000000000000000000000002'; +SELECT trace_id, span_type, span_operation, lvl from peek_ordered_spans where trace_id='00000000000000000000000000000002'; -- Call before trigger update /*dddbs='postgres.db',traceparent='00-00000000000000000000000000000003-0000000000000003-01'*/ UPDATE before_trigger_table set a=1; -SELECT span_type, span_operation, lvl from peek_ordered_spans where trace_id='00000000000000000000000000000003'; +SELECT trace_id, span_type, span_operation, lvl from peek_ordered_spans where trace_id='00000000000000000000000000000003'; -- Call before trigger delete /*dddbs='postgres.db',traceparent='00-00000000000000000000000000000004-0000000000000004-01'*/ DELETE FROM before_trigger_table where a=1; -SELECT span_type, span_operation, lvl from peek_ordered_spans where trace_id='00000000000000000000000000000004'; +SELECT trace_id, span_type, span_operation, lvl from peek_ordered_spans where trace_id='00000000000000000000000000000004'; -- Check count of query_id SELECT count(distinct query_id) from pg_tracing_consume_spans; diff --git a/sql/utility.sql b/sql/utility.sql index 118abe4..946da44 100644 --- a/sql/utility.sql +++ b/sql/utility.sql @@ -96,7 +96,7 @@ SET pg_tracing.sample_rate = 0.0; -- View displaying spans with their nested level CREATE VIEW peek_spans_with_level AS - WITH RECURSIVE list_trace_spans(trace_id, parent_id, span_id, query_id, span_type, span_operation, span_start, span_end, sql_error_code, userid, dbid, pid, subxact_count, plan_startup_cost, plan_total_cost, plan_rows, plan_width, rows, nloops, shared_blks_hit, shared_blks_read, shared_blks_dirtied, shared_blks_written, local_blks_hit, local_blks_read, local_blks_dirtied, local_blks_written, blk_read_time, blk_write_time, temp_blks_read, temp_blks_written, temp_blk_read_time, temp_blk_write_time, wal_records, wal_fpi, wal_bytes, jit_functions, jit_generation_time, jit_inlining_time, jit_optimization_time, jit_emission_time, startup, parameters, lvl) AS ( + WITH RECURSIVE list_trace_spans(trace_id, parent_id, span_id, query_id, span_type, span_operation, span_start, span_end, sql_error_code, userid, dbid, pid, subxact_count, plan_startup_cost, plan_total_cost, plan_rows, plan_width, rows, nloops, shared_blks_hit, shared_blks_read, shared_blks_dirtied, shared_blks_written, local_blks_hit, local_blks_read, local_blks_dirtied, local_blks_written, blk_read_time, blk_write_time, temp_blks_read, temp_blks_written, temp_blk_read_time, temp_blk_write_time, wal_records, wal_fpi, wal_bytes, jit_functions, jit_generation_time, jit_inlining_time, jit_optimization_time, jit_emission_time, startup, parameters, deparse_info, lvl) AS ( SELECT p.*, 1 FROM pg_tracing_peek_spans p where not parent_id=ANY(SELECT span_id from pg_tracing_peek_spans) UNION ALL diff --git a/src/pg_tracing.c b/src/pg_tracing.c index 10eec8b..d904817 100644 --- a/src/pg_tracing.c +++ b/src/pg_tracing.c @@ -60,6 +60,7 @@ #include "tcop/utility.h" #include "utils/builtins.h" #include "utils/varlena.h" +#include "utils/ruleutils.h" PG_MODULE_MAGIC; @@ -93,6 +94,12 @@ typedef struct pgTracingPerLevelBuffer { uint64 query_id; /* Query id by for this level when available */ pgTracingSpans *top_spans; /* top spans for the nested level */ + uint64 executor_run_span_id; /* executor run span id for this + * level. Executor run is used as + * parent for spans generated from + * planstate */ + TimestampTz executor_start; + TimestampTz executor_end; } pgTracingPerLevelBuffer; /* @@ -108,6 +115,10 @@ typedef struct pgTracingQueryIdFilter static int pg_tracing_max_span; /* Maximum number of spans to store */ static int pg_tracing_max_parameter_str; /* Maximum number of spans to * store */ +static bool pg_tracing_planstate_spans = true; /* Generate spans from the + * execution plan */ +static bool pg_tracing_deparse_plan = true; /* Deparse plan to generate more + * detailed spans */ static bool pg_tracing_trace_parallel_workers = true; /* True to generate * spans from parallel * workers */ @@ -149,6 +160,7 @@ static const struct config_enum_entry buffer_mode_options[] = #define pg_tracing_enabled(trace_context, level) \ (trace_context->traceparent.sampled && pg_tracking_level(level)) +#define US_IN_S INT64CONST(1000000) #define INT64_HEX_FORMAT "%016" INT64_MODIFIER "x" PG_FUNCTION_INFO_V1(pg_tracing_info); @@ -160,7 +172,7 @@ PG_FUNCTION_INFO_V1(pg_tracing_reset); */ /* Memory context for pg_tracing. */ -static MemoryContext pg_tracing_mem_ctx; +MemoryContext pg_tracing_mem_ctx; /* trace context at the root level of parse/planning hook */ static struct pgTracingTraceContext root_trace_context; @@ -206,7 +218,7 @@ static int max_nested_level = -1; static int plan_nested_level = 0; /* Current nesting depth of ExecutorRun+ProcessUtility calls */ -static int exec_nested_level = 0; +int exec_nested_level = 0; /* Number of spans initially allocated at the start of a trace. */ static int pg_tracing_initial_allocated_spans = 25; @@ -252,7 +264,6 @@ static pgTracingStats get_empty_pg_tracing_stats(void); static bool check_filter_query_ids(char **newval, void **extra, GucSource source); static void assign_filter_query_ids(const char *newval, void *extra); - static void cleanup_tracing(void); /* @@ -301,6 +312,28 @@ _PG_init(void) NULL, NULL); + DefineCustomBoolVariable("pg_tracing.deparse_plan", + "Deparse query plan to generate more details on a plan node.", + NULL, + &pg_tracing_deparse_plan, + true, + PGC_USERSET, + 0, + NULL, + NULL, + NULL); + + DefineCustomBoolVariable("pg_tracing.planstate_spans", + "Generate spans from the executed plan.", + NULL, + &pg_tracing_planstate_spans, + true, + PGC_USERSET, + 0, + NULL, + NULL, + NULL); + DefineCustomEnumVariable("pg_tracing.track", "Selects which statements are tracked by pg_tracing.", NULL, @@ -525,7 +558,7 @@ pg_tracing_shmem_request(void) /* * Append a string current_trace_text StringInfo */ -static int +int add_str_to_trace_buffer(const char *str, int str_len) { int position = current_trace_text->cursor; @@ -556,7 +589,7 @@ add_worker_name_to_trace_buffer(StringInfo str_info, int parallel_worker_number) /* * Store a span in the current_trace_spans buffer */ -static void +void store_span(const Span * span) { Assert(span->ended); @@ -821,7 +854,7 @@ pg_tracing_shmem_startup(void) */ static void process_query_desc(pgTracingTraceContext * trace_context, const QueryDesc *queryDesc, - int sql_error_code, TimestampTz *end_time) + int sql_error_code, TimestampTz parent_end) { NodeCounters *node_counters = &get_latest_top_span(exec_nested_level)->node_counters; @@ -836,6 +869,32 @@ process_query_desc(pgTracingTraceContext * trace_context, const QueryDesc *query if (queryDesc->estate->es_jit) node_counters->jit_usage = queryDesc->estate->es_jit->instr; node_counters->rows = queryDesc->estate->es_total_processed; + + + /* Process planstate */ + if (queryDesc->planstate && queryDesc->planstate->instrument != NULL && pg_tracing_planstate_spans) + { + Bitmapset *rels_used = NULL; + planstateTraceContext planstateTraceContext; + TimestampTz latest_end = 0; + uint64 parent_id = per_level_buffers[exec_nested_level].executor_run_span_id; + uint64 query_id = per_level_buffers[exec_nested_level].query_id; + TimestampTz parent_start = per_level_buffers[exec_nested_level].executor_start; + + planstateTraceContext.rtable_names = select_rtable_names_for_explain(queryDesc->plannedstmt->rtable, rels_used); + planstateTraceContext.trace_id = trace_context->traceparent.trace_id; + planstateTraceContext.ancestors = NULL; + planstateTraceContext.sql_error_code = sql_error_code; + /* Prepare the planstate context for deparsing */ + planstateTraceContext.deparse_ctx = NULL; + if (pg_tracing_deparse_plan) + planstateTraceContext.deparse_ctx = + deparse_context_for_plan_tree(queryDesc->plannedstmt, + planstateTraceContext.rtable_names); + + generate_span_from_planstate(queryDesc->planstate, &planstateTraceContext, + parent_id, query_id, parent_start, parent_end, &latest_end); + } } /* @@ -1028,6 +1087,7 @@ cleanup_tracing(void) max_nested_level = -1; current_trace_spans = NULL; per_level_buffers = NULL; + cleanup_planstarts(); } /* @@ -1125,7 +1185,7 @@ handle_pg_error(pgTracingTraceContext * trace_context, Span * ongoing_span, sql_error_code = geterrcode(); if (queryDesc != NULL) - process_query_desc(trace_context, queryDesc, sql_error_code, &span_end_time); + process_query_desc(trace_context, queryDesc, sql_error_code, span_end_time); /* End all ongoing top spans */ for (int i = 0; i <= max_nested_level; i++) @@ -1239,7 +1299,19 @@ begin_top_span(pgTracingTraceContext * trace_context, Span * top_span, /* Root top span, use the parent id from the trace context */ parent_id = trace_context->traceparent.parent_id; else - parent_id = get_latest_top_span(exec_nested_level - 1)->span_id; + { + TracedPlanstate *parent_traced_planstate = get_parent_traced_planstate(); + Span *latest_top_span = get_latest_top_span(exec_nested_level - 1); + + /* + * Both planstate and previous top span can be the parent for the new + * top span, we use the most recent as a parent + */ + if (parent_traced_planstate != NULL && parent_traced_planstate->node_start > latest_top_span->start) + parent_id = parent_traced_planstate->span_id; + else + parent_id = latest_top_span->span_id; + } begin_span(trace_context->traceparent.trace_id, top_span, command_type_to_span_type(commandType), @@ -1607,6 +1679,13 @@ pg_tracing_ExecutorStart(QueryDesc *queryDesc, int eflags) */ initialize_top_span(trace_context, queryDesc->operation, NULL, NULL, NULL, queryDesc->sourceText, start_span_time, false); + + /* + * We only need full instrumenation if we generate spans from + * planstate + */ + if (pg_tracing_planstate_spans) + queryDesc->instrument_options = INSTRUMENT_ALL; } if (prev_ExecutorStart) @@ -1650,6 +1729,8 @@ pg_tracing_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 cou begin_span(trace_context->traceparent.trace_id, executor_run_span, SPAN_EXECUTOR_RUN, NULL, parent_id, per_level_buffers[exec_nested_level].query_id, &span_start_time); + per_level_buffers[exec_nested_level].executor_run_span_id = executor_run_span->span_id; + per_level_buffers[exec_nested_level].executor_start = executor_run_span->start; /* * If this query starts parallel worker, push the trace context for @@ -1658,6 +1739,14 @@ pg_tracing_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 cou if (queryDesc->plannedstmt->parallelModeNeeded && pg_tracing_trace_parallel_workers) add_parallel_context(trace_context, executor_run_span->span_id, per_level_buffers[exec_nested_level].query_id); + + if (pg_tracing_planstate_spans) + + /* + * Setup ExecProcNode override to capture node start if planstate + * spans were requested + */ + setup_ExecProcNode_override(queryDesc, exec_nested_level); } exec_nested_level++; @@ -1694,6 +1783,7 @@ pg_tracing_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 cou { span_end_time = end_nested_level(); /* End ExecutorRun span and store it */ + per_level_buffers[exec_nested_level].executor_end = span_end_time; end_latest_top_span(&span_end_time, true); } else @@ -1790,7 +1880,12 @@ pg_tracing_ExecutorEnd(QueryDesc *queryDesc) bool executor_sampled = pg_tracing_enabled(trace_context, exec_nested_level) && queryDesc->totaltime != NULL; if (executor_sampled) - process_query_desc(trace_context, queryDesc, 0, NULL); + { + TimestampTz parent_end = per_level_buffers[exec_nested_level].executor_end; + + process_query_desc(trace_context, queryDesc, 0, parent_end); + drop_traced_planstate(exec_nested_level); + } /* No need to increment nested level here */ if (prev_ExecutorEnd) @@ -2056,7 +2151,7 @@ static void add_result_span(ReturnSetInfo *rsinfo, Span * span, const char *qbuffer, Size qbuffer_size) { -#define PG_TRACING_TRACES_COLS 43 +#define PG_TRACING_TRACES_COLS 44 Datum values[PG_TRACING_TRACES_COLS] = {0}; bool nulls[PG_TRACING_TRACES_COLS] = {0}; const char *span_type; @@ -2097,17 +2192,20 @@ add_result_span(ReturnSetInfo *rsinfo, Span * span, values[i++] = UInt8GetDatum(span->subxact_count); /* Only node and top spans have counters */ - if ((span->type >= SPAN_TOP_SELECT && span->type <= SPAN_TOP_UNKNOWN) + if ((span->type >= SPAN_NODE && span->type <= SPAN_TOP_UNKNOWN) || span->type == SPAN_PLANNER) { i = add_plan_counters(&span->plan_counters, i, values); i = add_node_counters(&span->node_counters, i, values); - values[i++] = Int64GetDatumFast(span->startup); + if (span->parameter_offset != -1 && qbuffer_size > 0 && qbuffer_size > span->parameter_offset) values[i++] = CStringGetTextDatum(qbuffer + span->parameter_offset); else nulls[i++] = 1; + + if (span->deparse_info_offset != -1 && qbuffer_size > 0 && qbuffer_size > span->deparse_info_offset) + values[i++] = CStringGetTextDatum(qbuffer + span->deparse_info_offset); } for (int j = i; j < PG_TRACING_TRACES_COLS; j++) diff --git a/src/pg_tracing.h b/src/pg_tracing.h index 13d7c0d..88c5a4b 100644 --- a/src/pg_tracing.h +++ b/src/pg_tracing.h @@ -57,6 +57,11 @@ typedef enum SpanType SPAN_EXECUTOR_RUN, /* Wraps Executor run hook */ SPAN_EXECUTOR_FINISH, /* Wraps Executor finish hook */ + /* Represents a node execution, generated from planstate */ + SPAN_NODE, + SPAN_NODE_INIT_PLAN, + SPAN_NODE_SUBPLAN, + /* Top Span types. They are created from the query cmdType */ SPAN_TOP_SELECT, SPAN_TOP_INSERT, @@ -117,6 +122,7 @@ typedef struct Span Size node_type_offset; /* name of the node type */ Size operation_name_offset; /* operation represented by the span */ Size parameter_offset; /* query parameters values */ + Size deparse_info_offset; /* info from deparsed plan */ PlanCounters plan_counters; /* Counters with plan costs */ NodeCounters node_counters; /* Counters with node costs (jit, wal, @@ -152,6 +158,16 @@ typedef struct pgTracingSharedState pgTracingStats stats; /* global statistics for pg_tracing */ } pgTracingSharedState; +/* Context needed when generating spans from planstate */ +typedef struct planstateTraceContext +{ + TraceId trace_id; + int sql_error_code; + List *ancestors; + List *deparse_ctx; + List *rtable_names; +} planstateTraceContext; + /* * Traceparent values propagated by the caller */ @@ -191,6 +207,11 @@ typedef struct pgTracingParallelWorkers pgTracingParallelContext trace_contexts[FLEXIBLE_ARRAY_MEMBER]; } pgTracingParallelWorkers; +/* pg_tracing_explain.c */ +extern const char *plan_to_node_type(const Plan *plan); +extern const char *plan_to_operation(const planstateTraceContext * planstateTraceContext, const PlanState *planstate, const char *spanName); +extern const char *plan_to_deparse_info(const planstateTraceContext * planstateTraceContext, const PlanState *planstate); + /* pg_tracing_parallel.c */ extern void pg_tracing_shmem_parallel_startup(void); extern void add_parallel_context(const struct pgTracingTraceContext *trace_context, @@ -198,6 +219,37 @@ extern void add_parallel_context(const struct pgTracingTraceContext *trace_conte extern void remove_parallel_context(void); extern void fetch_parallel_context(pgTracingTraceContext * trace_context); +/* pg_tracing_planstate.c */ + +/* + * Match a planstate to the first start of a node. + * This is needed to set the start for spans generated from planstate. + */ +typedef struct TracedPlanstate +{ + PlanState *planstate; + TimestampTz node_start; + uint64 span_id; + int nested_level; +} TracedPlanstate; + +extern Span +create_span_node(PlanState *planstate, const planstateTraceContext * planstateTraceContext, + uint64 *span_id, uint64 parent_id, uint64 queryId, SpanType span_type, + char *subplan_name, TimestampTz span_start, TimestampTz span_end); +extern TimestampTz + generate_span_from_planstate(PlanState *planstate, planstateTraceContext * planstateTraceContext, + uint64 parent_id, uint64 query_id, + TimestampTz parent_start, TimestampTz root_end, TimestampTz *latest_end); +extern void + setup_ExecProcNode_override(QueryDesc *queryDesc, int exec_nested_level); +extern void + cleanup_planstarts(void); +extern TracedPlanstate * +get_parent_traced_planstate(void); +extern void + drop_traced_planstate(int exec_nested_level); + /* pg_tracing_query_process.c */ extern const char *normalise_query_parameters(const JumbleState *jstate, const char *query, int query_loc, int *query_len_p, char **param_str, @@ -220,4 +272,13 @@ extern const char *get_operation_name(const Span * span, const char *qbuffer, Si extern void adjust_file_offset(Span * span, Size file_position); extern bool traceid_zero(TraceId trace_id); + +/* pg_tracing.c */ +extern MemoryContext pg_tracing_mem_ctx; +extern int exec_nested_level; +extern void + store_span(const Span * span); +extern int + add_str_to_trace_buffer(const char *str, int str_len); + #endif diff --git a/src/pg_tracing_explain.c b/src/pg_tracing_explain.c new file mode 100644 index 0000000..68dfa92 --- /dev/null +++ b/src/pg_tracing_explain.c @@ -0,0 +1,559 @@ +/*------------------------------------------------------------------------- + * + * pg_tracing_explain.c + * pg_tracing plan explain functions. + * + * IDENTIFICATION + * contrib/pg_tracing/pg_tracing_explain.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "nodes/makefuncs.h" +#include "utils/builtins.h" +#include "parser/parsetree.h" +#include "nodes/extensible.h" +#include "pg_tracing.h" +#include "utils/lsyscache.h" +#include "utils/ruleutils.h" + + +/* + * Add the target relation of a scan or modity node to the stringinfo + */ +static void +ExplainTargetRel(const planstateTraceContext * planstateTraceContext, const PlanState *planstate, Index rti, StringInfo str) +{ + char *objectname = NULL; + char *namespace = NULL; + RangeTblEntry *rte; + char *refname; + + List *rtable; + + rtable = planstate->state->es_range_table; + rte = rt_fetch(rti, rtable); + + refname = (char *) list_nth(planstateTraceContext->rtable_names, rti - 1); + if (refname == NULL) + refname = rte->eref->aliasname; + + switch (nodeTag(planstate->plan)) + { + case T_SeqScan: + case T_SampleScan: + case T_IndexScan: + case T_IndexOnlyScan: + case T_BitmapHeapScan: + case T_TidScan: + case T_TidRangeScan: + case T_ForeignScan: + case T_CustomScan: + case T_ModifyTable: + /* Assert it's on a real relation */ + Assert(rte->rtekind == RTE_RELATION); + objectname = get_rel_name(rte->relid); + break; + case T_FunctionScan: + { + FunctionScan *fscan = (FunctionScan *) planstate->plan; + + /* Assert it's on a RangeFunction */ + Assert(rte->rtekind == RTE_FUNCTION); + + /* + * If the expression is still a function call of a single + * function, we can get the real name of the function. + * Otherwise, punt. (Even if it was a single function call + * originally, the optimizer could have simplified it away.) + */ + if (list_length(fscan->functions) == 1) + { + RangeTblFunction *rtfunc = (RangeTblFunction *) linitial(fscan->functions); + + if (IsA(rtfunc->funcexpr, FuncExpr)) + { + FuncExpr *funcexpr = (FuncExpr *) rtfunc->funcexpr; + Oid funcid = funcexpr->funcid; + + objectname = get_func_name(funcid); + } + } + } + break; + case T_TableFuncScan: + Assert(rte->rtekind == RTE_TABLEFUNC); + objectname = "xmltable"; + break; + case T_ValuesScan: + Assert(rte->rtekind == RTE_VALUES); + break; + case T_CteScan: + /* Assert it's on a non-self-reference CTE */ + Assert(rte->rtekind == RTE_CTE); + Assert(!rte->self_reference); + objectname = rte->ctename; + break; + case T_NamedTuplestoreScan: + Assert(rte->rtekind == RTE_NAMEDTUPLESTORE); + objectname = rte->enrname; + break; + case T_WorkTableScan: + /* Assert it's on a self-reference CTE */ + Assert(rte->rtekind == RTE_CTE); + Assert(rte->self_reference); + objectname = rte->ctename; + break; + default: + break; + } + + appendStringInfoString(str, " on"); + if (namespace != NULL) + appendStringInfo(str, " %s.%s", quote_identifier(namespace), + quote_identifier(objectname)); + else if (objectname != NULL) + appendStringInfo(str, " %s", quote_identifier(objectname)); + if (objectname == NULL || strcmp(refname, objectname) != 0) + appendStringInfo(str, " %s", quote_identifier(refname)); +} + +/* + * Add the target of a Scan node to the stringinfo + */ +static void +ExplainScanTarget(const planstateTraceContext * planstateTraceContext, const PlanState *planstate, const Scan *plan, StringInfo str) +{ + ExplainTargetRel(planstateTraceContext, planstate, plan->scanrelid, str); +} + +/* + * Add the target of a ModifyTable node to the stringinfo + */ +static void +ExplainModifyTarget(const planstateTraceContext * planstateTraceContext, const PlanState *planstate, const ModifyTable *plan, StringInfo str) +{ + ExplainTargetRel(planstateTraceContext, planstate, plan->nominalRelation, str); +} + +/* + * Add some additioanl details about an IndexScan or IndexOnlyScan to the stringinfo + */ +static void +ExplainIndexScanDetails(Oid indexId, ScanDirection indexorderdir, + StringInfo str) +{ + const char *indexname = get_rel_name(indexId); + + if (ScanDirectionIsBackward(indexorderdir)) + appendStringInfoString(str, " Backward"); + appendStringInfo(str, " using %s", quote_identifier(indexname)); +} + +/* + * Generate a relation name from a planstate and add it to the stringinfo + */ +static void +plan_to_rel_name(const PlanState *planstate, const planstateTraceContext * planstateTraceContext, StringInfo str) +{ + const Plan *plan = planstate->plan; + + switch (nodeTag(plan)) + { + case T_SeqScan: + case T_SampleScan: + case T_BitmapHeapScan: + case T_TidScan: + case T_TidRangeScan: + case T_SubqueryScan: + case T_FunctionScan: + case T_TableFuncScan: + case T_ValuesScan: + case T_CteScan: + case T_WorkTableScan: + ExplainScanTarget(planstateTraceContext, planstate, (Scan *) plan, str); + break; + case T_ForeignScan: + case T_CustomScan: + if (((Scan *) plan)->scanrelid > 0) + ExplainScanTarget(planstateTraceContext, planstate, (Scan *) plan, str); + break; + case T_IndexScan: + { + IndexScan *indexscan = (IndexScan *) plan; + + ExplainIndexScanDetails(indexscan->indexid, + indexscan->indexorderdir, + str); + ExplainScanTarget(planstateTraceContext, planstate, (Scan *) indexscan, str); + } + break; + case T_IndexOnlyScan: + { + IndexOnlyScan *indexonlyscan = (IndexOnlyScan *) plan; + + ExplainIndexScanDetails(indexonlyscan->indexid, + indexonlyscan->indexorderdir, + str); + ExplainScanTarget(planstateTraceContext, planstate, (Scan *) indexonlyscan, str); + } + break; + case T_BitmapIndexScan: + { + BitmapIndexScan *bitmapindexscan = (BitmapIndexScan *) plan; + const char *indexname = get_rel_name(bitmapindexscan->indexid); + + appendStringInfo(str, " on %s", + quote_identifier(indexname)); + } + break; + case T_ModifyTable: + ExplainModifyTarget(planstateTraceContext, planstate, (ModifyTable *) plan, str); + break; + case T_NestLoop: + case T_MergeJoin: + case T_HashJoin: + { + const char *jointype; + + switch (((Join *) plan)->jointype) + { + case JOIN_INNER: + jointype = "Inner"; + break; + case JOIN_LEFT: + jointype = "Left"; + break; + case JOIN_FULL: + jointype = "Full"; + break; + case JOIN_RIGHT: + jointype = "Right"; + break; + case JOIN_SEMI: + jointype = "Semi"; + break; + case JOIN_ANTI: + jointype = "Anti"; + break; + case JOIN_RIGHT_ANTI: + jointype = "Right Anti"; + break; + default: + jointype = "???"; + break; + } + if (((Join *) plan)->jointype != JOIN_INNER) + appendStringInfo(str, " %s Join", jointype); + else if (!IsA(plan, NestLoop)) + appendStringInfoString(str, " Join"); + } + break; + case T_SetOp: + { + const char *setopcmd; + + switch (((SetOp *) plan)->cmd) + { + case SETOPCMD_INTERSECT: + setopcmd = "Intersect"; + break; + case SETOPCMD_INTERSECT_ALL: + setopcmd = "Intersect All"; + break; + case SETOPCMD_EXCEPT: + setopcmd = "Except"; + break; + case SETOPCMD_EXCEPT_ALL: + setopcmd = "Except All"; + break; + default: + setopcmd = "???"; + break; + } + appendStringInfo(str, " %s", setopcmd); + } + break; + default: + break; + } +} + +/* + * Generate scan qualifier from deparsed expression and add it to stringinfo + */ +static void +add_scan_qual(StringInfo str, const PlanState *planstate, List *qual, + const char *qlabel, List *ancestors, List *deparse_ctx, + bool useprefix) +{ + List *context; + char *exprstr; + Node *node; + + if (qual == NIL) + return; + + node = (Node *) make_ands_explicit(qual); + + Assert(deparse_ctx != NULL); + /* Set up deparsing context */ + context = set_deparse_context_plan(deparse_ctx, + planstate->plan, ancestors); + + /* Deparse the expression */ + exprstr = deparse_expression(node, context, useprefix, false); + if (str->len > 0) + appendStringInfoChar(str, '|'); + appendStringInfoString(str, qlabel); + appendStringInfoString(str, exprstr); +} + +const char * +plan_to_deparse_info(const planstateTraceContext * planstateTraceContext, const PlanState *planstate) +{ + StringInfo deparse_info = makeStringInfo(); + Plan const *plan = planstate->plan; + List *ancestors = planstateTraceContext->ancestors; + List *deparse_ctx = planstateTraceContext->deparse_ctx; + + if (deparse_ctx == NULL) + return ""; + + switch (nodeTag(plan)) + { + case T_IndexScan: + add_scan_qual(deparse_info, planstate, ((IndexScan *) plan)->indexqualorig, "Index Cond: ", + ancestors, deparse_ctx, false); + break; + case T_IndexOnlyScan: + add_scan_qual(deparse_info, planstate, ((IndexOnlyScan *) plan)->indexqual, "Index Cond: ", + ancestors, deparse_ctx, false); + break; + case T_BitmapIndexScan: + add_scan_qual(deparse_info, planstate, ((BitmapIndexScan *) plan)->indexqualorig, "Index Cond: ", + ancestors, deparse_ctx, false); + break; + case T_BitmapHeapScan: + add_scan_qual(deparse_info, planstate, ((BitmapHeapScan *) plan)->bitmapqualorig, "Recheck Cond: ", + ancestors, deparse_ctx, false); + break; + case T_SeqScan: + case T_ValuesScan: + case T_CteScan: + case T_NamedTuplestoreScan: + case T_WorkTableScan: + case T_SubqueryScan: + case T_Gather: + case T_GatherMerge: + case T_FunctionScan: + case T_TableFuncScan: + case T_TidRangeScan: + case T_ForeignScan: + case T_CustomScan: + case T_Agg: + case T_WindowAgg: + case T_Group: + case T_Result: + add_scan_qual(deparse_info, planstate, plan->qual, "Filter : ", + ancestors, deparse_ctx, false); + break; + case T_NestLoop: + add_scan_qual(deparse_info, planstate, ((NestLoop *) plan)->join.joinqual, "Join Filter : ", + ancestors, deparse_ctx, false); + add_scan_qual(deparse_info, planstate, plan->qual, "Filter : ", + ancestors, deparse_ctx, false); + break; + case T_MergeJoin: + add_scan_qual(deparse_info, planstate, ((MergeJoin *) plan)->mergeclauses, "Merge Cond: ", + ancestors, deparse_ctx, false); + add_scan_qual(deparse_info, planstate, ((MergeJoin *) plan)->join.joinqual, "Join Filter: ", + ancestors, deparse_ctx, false); + add_scan_qual(deparse_info, planstate, plan->qual, "Filter : ", + ancestors, deparse_ctx, false); + break; + case T_HashJoin: + add_scan_qual(deparse_info, planstate, ((HashJoin *) plan)->hashclauses, "Hash Cond: ", + ancestors, deparse_ctx, false); + add_scan_qual(deparse_info, planstate, ((MergeJoin *) plan)->join.joinqual, "Join Filter: ", + ancestors, deparse_ctx, false); + add_scan_qual(deparse_info, planstate, plan->qual, "Filter : ", + ancestors, deparse_ctx, false); + break; + default: + break; + } + return deparse_info->data; +} + +/* + * Generate an operation name from a planstate + */ +char const * +plan_to_operation(const planstateTraceContext * planstateTraceContext, const PlanState *planstate, const char *span_type) +{ + StringInfo operation_name = makeStringInfo(); + Plan const *plan = planstate->plan; + + if (plan->parallel_aware) + appendStringInfoString(operation_name, "Parallel "); + if (plan->async_capable) + appendStringInfoString(operation_name, "Async "); + appendStringInfoString(operation_name, span_type); + + plan_to_rel_name(planstate, planstateTraceContext, operation_name); + + return operation_name->data; +} + +/* + * Get the node type name from a plan node + */ +const char * +plan_to_node_type(const Plan *plan) +{ + const char *custom_name; + + switch (nodeTag(plan)) + { + case T_Result: + return "Result"; + case T_ProjectSet: + return "ProjectSet"; + case T_ModifyTable: + switch (((ModifyTable *) plan)->operation) + { + case CMD_INSERT: + return "Insert"; + case CMD_UPDATE: + return "Update"; + case CMD_DELETE: + return "Delete"; + case CMD_MERGE: + return "Merge"; + default: + return "???"; + } + case T_Append: + return "Append"; + case T_MergeAppend: + return "MergeAppend"; + case T_RecursiveUnion: + return "RecursiveUnion"; + case T_BitmapAnd: + return "BitmapAnd"; + case T_BitmapOr: + return "BitmapOr"; + case T_NestLoop: + return "NestedLoop"; + case T_MergeJoin: + return "Merge"; /* "Join" gets added by jointype switch */ + case T_HashJoin: + return "Hash"; /* "Join" gets added by jointype switch */ + case T_SeqScan: + return "SeqScan"; + case T_SampleScan: + return "SampleScan"; + case T_Gather: + return "Gather"; + case T_GatherMerge: + return "GatherMerge"; + case T_IndexScan: + return "IndexScan"; + case T_IndexOnlyScan: + return "IndexOnlyScan"; + case T_BitmapIndexScan: + return "BitmapIndexScan"; + case T_BitmapHeapScan: + return "BitmapHeapScan"; + case T_TidScan: + return "TidScan"; + case T_TidRangeScan: + return "TidRangeScan"; + case T_SubqueryScan: + return "SubqueryScan"; + case T_FunctionScan: + return "FunctionScan"; + case T_TableFuncScan: + return "TableFunctionScan"; + case T_ValuesScan: + return "ValuesScan"; + case T_CteScan: + return "CTEScan"; + case T_NamedTuplestoreScan: + return "NamedTuplestoreScan"; + case T_WorkTableScan: + return "WorkTableScan"; + case T_ForeignScan: + switch (((ForeignScan *) plan)->operation) + { + case CMD_SELECT: + return "ForeignScan"; + case CMD_INSERT: + return "ForeignInsert"; + case CMD_UPDATE: + return "ForeignUpdate"; + case CMD_DELETE: + return "ForeignDelete"; + default: + return "???"; + } + case T_CustomScan: + custom_name = ((CustomScan *) plan)->methods->CustomName; + if (custom_name) + return psprintf("CustomScan (%s)", custom_name); + else + return "CustomScan"; + case T_Material: + return "Materialize"; + case T_Memoize: + return "Memoize"; + case T_Sort: + return "Sort"; + case T_IncrementalSort: + return "IncrementalSort"; + case T_Group: + return "Group"; + case T_Agg: + { + Agg *agg = (Agg *) plan; + + switch (agg->aggstrategy) + { + case AGG_PLAIN: + return "Aggregate"; + case AGG_SORTED: + return "GroupAggregate"; + case AGG_HASHED: + return "HashAggregate"; + case AGG_MIXED: + return "MixedAggregate"; + default: + return "Aggregate ???"; + } + } + case T_WindowAgg: + return "WindowAgg"; + case T_Unique: + return "Unique"; + case T_SetOp: + switch (((SetOp *) plan)->strategy) + { + case SETOP_SORTED: + return "SetOp"; + case SETOP_HASHED: + return "HashSetOp"; + default: + return "SetOp ???"; + } + case T_LockRows: + return "LockRows"; + case T_Limit: + return "Limit"; + case T_Hash: + return "Hash"; + default: + return "???"; + } +} diff --git a/src/pg_tracing_planstate.c b/src/pg_tracing_planstate.c new file mode 100644 index 0000000..ee1caf9 --- /dev/null +++ b/src/pg_tracing_planstate.c @@ -0,0 +1,571 @@ +/*------------------------------------------------------------------------- + * + * pg_tracing_planstate.c + * pg_tracing planstate manipulation functions. + * + * IDENTIFICATION + * contrib/pg_tracing/pg_tracing_planstate.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "common/pg_prng.h" +#include "pg_tracing.h" +#include "utils/timestamp.h" + +#define US_IN_S INT64CONST(1000000) + +/* List of planstate to start found for the current query */ +static TracedPlanstate * traced_planstates = NULL; +/* Previous value of the ExecProcNode pointer in planstates */ +static ExecProcNodeMtd previous_ExecProcNode = NULL; + +/* Current available slot in the traced_planstates array */ +static int index_planstart = 0; + +/* Maximum elements allocated in the traced_planstates array */ +static int max_planstart = 0; + +static void override_ExecProcNode(PlanState *planstate); + +/* + * Reset the traced_planstates array + */ +void +cleanup_planstarts(void) +{ + traced_planstates = NULL; + max_planstart = 0; + index_planstart = 0; +} + +/* + * Drop all traced_planstates after the provided nested level + */ +void +drop_traced_planstate(int exec_nested_level) +{ + int i; + int new_index_start; + + for (i = index_planstart; i > 0; i--) + { + if (traced_planstates[i - 1].nested_level <= exec_nested_level) + { + /* + * Found a new planstate from a previous nested level, we can stop + * here + */ + index_planstart = i; + return; + } + else + /* the traced_planstate should be dropped */ + new_index_start = i - 1; + } + index_planstart = new_index_start; +} + +/* + * If spans from planstate is requested, we override planstate's ExecProcNode's pointer with this function. It will track the time of the first node call needed to place the planstate span. + */ +static TupleTableSlot * +ExecProcNodeFirstPgTracing(PlanState *node) +{ + if (max_planstart == 0) + + /* + * Queries calling pg_tracing_spans will have aborted tracing and + * everything cleaned. + */ + goto exit; + + if (index_planstart >= max_planstart) + { + /* + * We need to extend the traced_planstates array, switch to pg_tracing + * memory context beforehand + */ + MemoryContext oldcxt; + int old_max_planstart = max_planstart; + + Assert(traced_planstates != NULL); + max_planstart += 5; + oldcxt = MemoryContextSwitchTo(pg_tracing_mem_ctx); + traced_planstates = repalloc0(traced_planstates, + old_max_planstart * sizeof(TracedPlanstate), + max_planstart * sizeof(TracedPlanstate)); + MemoryContextSwitchTo(oldcxt); + } + + /* Register planstate start */ + traced_planstates[index_planstart].planstate = node; + traced_planstates[index_planstart].node_start = GetCurrentTimestamp(); + traced_planstates[index_planstart].span_id = pg_prng_uint64(&pg_global_prng_state); + traced_planstates[index_planstart].nested_level = exec_nested_level; + index_planstart++; + +exit: + /* Restore previous exec proc */ + node->ExecProcNode = previous_ExecProcNode; + return node->ExecProcNode(node); +} + +/* + * Override ExecProcNode on all planstate members + */ +static void +override_member_nodes(PlanState **planstates, int nplans) +{ + int j; + TimestampTz child_end; + + for (j = 0; j < nplans; j++) + override_ExecProcNode(planstates[j]); +} + +/* + * Override ExecProcNode on all planstate of a custom scan + */ +static void +override_custom_scan(CustomScanState *css) +{ + ListCell *cell; + + foreach(cell, css->custom_ps) + override_ExecProcNode((PlanState *) lfirst(cell)); +} + +/* + * Walk the planstate and override all executor pointer + */ +static void +override_ExecProcNode(PlanState *planstate) +{ + ListCell *l; + + if (planstate->instrument == NULL) + /* No instrumentation set, do nothing */ + return; + + /* Walk the outerplan */ + if (outerPlanState(planstate)) + override_ExecProcNode(outerPlanState(planstate)); + /* Walk the innerplan */ + if (innerPlanState(planstate)) + override_ExecProcNode(innerPlanState(planstate)); + + /* Handle init plans */ + foreach(l, planstate->initPlan) + { + SubPlanState *sstate = (SubPlanState *) lfirst(l); + PlanState *splan = sstate->planstate; + + override_ExecProcNode(splan); + } + + /* Handle sub plans */ + foreach(l, planstate->subPlan) + { + SubPlanState *sstate = (SubPlanState *) lfirst(l); + PlanState *splan = sstate->planstate; + + override_ExecProcNode(splan); + } + + /* Handle special nodes with children nodes */ + switch (nodeTag(planstate->plan)) + { + case T_Append: + override_member_nodes(((AppendState *) planstate)->appendplans, + ((AppendState *) planstate)->as_nplans); + break; + case T_MergeAppend: + override_member_nodes(((MergeAppendState *) planstate)->mergeplans, + ((MergeAppendState *) planstate)->ms_nplans); + break; + case T_BitmapAnd: + override_member_nodes(((BitmapAndState *) planstate)->bitmapplans, + ((BitmapAndState *) planstate)->nplans); + break; + case T_BitmapOr: + override_member_nodes(((BitmapOrState *) planstate)->bitmapplans, + ((BitmapOrState *) planstate)->nplans); + break; + case T_SubqueryScan: + override_ExecProcNode(((SubqueryScanState *) planstate)->subplan); + break; + case T_CustomScan: + override_custom_scan((CustomScanState *) planstate); + break; + default: + break; + } + + planstate->ExecProcNode = ExecProcNodeFirstPgTracing; +} + +/* + * Override all ExecProcNode pointer of the planstate with ExecProcNodeFirstPgTracing to track first call of a node. + */ +void +setup_ExecProcNode_override(QueryDesc *queryDesc, int exec_nested_level) +{ + if (max_planstart == 0) + { + MemoryContext oldcxt; + + max_planstart = 10; + oldcxt = MemoryContextSwitchTo(pg_tracing_mem_ctx); + traced_planstates = palloc0(max_planstart * sizeof(TracedPlanstate)); + MemoryContextSwitchTo(oldcxt); + } + Assert(queryDesc->planstate->instrument); + /* Pointer should target ExecProcNodeFirst. Save it to restore it later. */ + previous_ExecProcNode = queryDesc->planstate->ExecProcNode; + override_ExecProcNode(queryDesc->planstate); +} + +/* + * Iterate over a list of planstate to generate span node + */ +static TimestampTz +generate_member_nodes(PlanState **planstates, int nplans, planstateTraceContext * planstateTraceContext, uint64 parent_id, + uint64 query_id, TimestampTz parent_start, TimestampTz root_end, TimestampTz *latest_end) +{ + int j; + TimestampTz child_end; + + for (j = 0; j < nplans; j++) + child_end = generate_span_from_planstate(planstates[j], planstateTraceContext, parent_id, query_id, parent_start, root_end, latest_end); + return child_end; +} + +/* + * Iterate over custom scan planstates to generate span node + */ +static TimestampTz +generate_span_from_custom_scan(CustomScanState *css, planstateTraceContext * planstateTraceContext, uint64 parent_id, + uint64 query_id, TimestampTz parent_start, TimestampTz root_end, TimestampTz *latest_end) +{ + ListCell *cell; + TimestampTz last_end; + + foreach(cell, css->custom_ps) + last_end = generate_span_from_planstate((PlanState *) lfirst(cell), planstateTraceContext, parent_id, query_id, parent_start, root_end, latest_end); + return last_end; +} + +/* + * Get end time for a span node from the provided planstate. + */ +static TimestampTz +get_span_end_from_planstate(PlanState *planstate, TimestampTz plan_start, TimestampTz root_end) +{ + TimestampTz span_end_time; + + if (!INSTR_TIME_IS_ZERO(planstate->instrument->starttime) && root_end > 0) + + /* + * Node was ongoing but aborted due to an error, use root end as the + * end + */ + span_end_time = root_end; + else if (planstate->instrument->total == 0) + span_end_time = GetCurrentTimestamp(); + else + { + span_end_time = plan_start + planstate->instrument->total * US_IN_S; + + /* + * Since we convert from double seconds to microseconds again, we can + * have a span_end_time greater to the root due to the loss of + * precision for long durations. Fallback to the root end in this + * case. + */ + if (span_end_time > root_end) + span_end_time = root_end; + } + return span_end_time; +} + +/* + * Fetch the node start of a planstate + */ +static TracedPlanstate * +get_traced_planstate(PlanState *planstate) +{ + for (int i = 0; i < index_planstart; i++) + { + if (planstate == traced_planstates[i].planstate) + return &traced_planstates[i]; + } + return NULL; +} + +/* + * Get possible parent traced_planstate + */ +TracedPlanstate * +get_parent_traced_planstate(void) +{ + TracedPlanstate *traced_planstate; + + if (index_planstart >= 2) + { + traced_planstate = &traced_planstates[index_planstart - 2]; + if (nodeTag(traced_planstate->planstate->plan) == T_ProjectSet) + return traced_planstate; + } + if (index_planstart >= 1) + { + traced_planstate = &traced_planstates[index_planstart - 1]; + if (nodeTag(traced_planstate->planstate->plan) == T_Result) + return traced_planstate; + } + return NULL; +} + +/* + * Walk through the planstate tree generating a node span for each node. + */ +TimestampTz +generate_span_from_planstate(PlanState *planstate, planstateTraceContext * planstateTraceContext, + uint64 parent_id, uint64 query_id, + TimestampTz parent_start, TimestampTz root_end, TimestampTz *latest_end) +{ + ListCell *l; + uint64 span_id; + Span span_node; + TracedPlanstate *traced_planstate = NULL; + TimestampTz span_start; + TimestampTz span_end; + TimestampTz outer_span_end = 0; + NodeTag node_tag; + + /* The node was never executed, skip it */ + if (planstate->instrument == NULL) + return 0; + + if (!planstate->state->es_finished && !INSTR_TIME_IS_ZERO(planstate->instrument->starttime)) + { + /* + * If the query is in an unfinished state, it means that we're in an + * error handler. Stop the node instrumentation to get the latest + * known state. + */ + InstrStopNode(planstate->instrument, planstate->state->es_processed); + } + + /* + * Make sure stats accumulation is done. Function is a no-op if if was + * already done. + */ + InstrEndLoop(planstate->instrument); + + if (planstate->instrument->total == 0) + /* The node was never executed, ignore it */ + return 0; + + switch (nodeTag(planstate->plan)) + { + case T_BitmapIndexScan: + case T_Hash: + case T_BitmapAnd: + case T_BitmapOr: + + /* + * Those nodes won't go through ExecProcNode so we won't have + * their start. Fallback to the parent start. TODO: Use the child + * start as a fallback instead + */ + span_start = parent_start; + break; + default: + traced_planstate = get_traced_planstate(planstate); + /* TODO: better fallback if planstate is not found */ + Assert(traced_planstate != NULL); + span_start = traced_planstate->node_start; + break; + } + Assert(span_start > 0); + + if (traced_planstate != NULL) + { + switch (nodeTag(traced_planstate->planstate->plan)) + { + case T_Result: + case T_ProjectSet: + span_id = traced_planstate->span_id; + break; + default: + span_id = pg_prng_uint64(&pg_global_prng_state); + } + } + else + span_id = pg_prng_uint64(&pg_global_prng_state); + + span_end = get_span_end_from_planstate(planstate, span_start, root_end); + + /* Keep track of the last child end */ + if (*latest_end < span_end) + *latest_end = span_end; + + planstateTraceContext->ancestors = lcons(planstate->plan, planstateTraceContext->ancestors); + + /* Walk the outerplan */ + if (outerPlanState(planstate)) + outer_span_end = generate_span_from_planstate(outerPlanState(planstate), planstateTraceContext, span_id, query_id, span_start, root_end, latest_end); + /* Walk the innerplan */ + if (innerPlanState(planstate)) + generate_span_from_planstate(innerPlanState(planstate), planstateTraceContext, span_id, query_id, span_start, root_end, latest_end); + + /* Handle init plans */ + foreach(l, planstate->initPlan) + { + SubPlanState *sstate = (SubPlanState *) lfirst(l); + PlanState *splan = sstate->planstate; + Span init_span; + TracedPlanstate *initplan_traced_planstate; + TimestampTz initplan_span_end; + + if (splan->instrument->total == 0) + continue; + initplan_traced_planstate = get_traced_planstate(planstate); + initplan_span_end = get_span_end_from_planstate(planstate, initplan_traced_planstate->node_start, root_end); + init_span = create_span_node(splan, planstateTraceContext, + &initplan_traced_planstate->span_id, parent_id, query_id, + SPAN_NODE_INIT_PLAN, sstate->subplan->plan_name, initplan_traced_planstate->node_start, initplan_span_end); + store_span(&init_span); + generate_span_from_planstate(splan, planstateTraceContext, init_span.span_id, query_id, span_start, root_end, latest_end); + } + + /* Handle sub plans */ + foreach(l, planstate->subPlan) + { + SubPlanState *sstate = (SubPlanState *) lfirst(l); + PlanState *splan = sstate->planstate; + Span subplan_span; + TracedPlanstate *subplan_traced_planstate; + TimestampTz subplan_span_end; + + if (splan->instrument->total == 0) + continue; + subplan_traced_planstate = get_traced_planstate(planstate); + subplan_span_end = get_span_end_from_planstate(planstate, subplan_traced_planstate->node_start, root_end); + subplan_span = create_span_node(splan, planstateTraceContext, + &subplan_traced_planstate->span_id, parent_id, query_id, + SPAN_NODE_SUBPLAN, sstate->subplan->plan_name, subplan_traced_planstate->node_start, subplan_span_end); + store_span(&subplan_span); + generate_span_from_planstate(splan, planstateTraceContext, subplan_span.span_id, query_id, span_start, root_end, latest_end); + } + + /* Handle special nodes with children nodes */ + switch (nodeTag(planstate->plan)) + { + case T_Append: + generate_member_nodes(((AppendState *) planstate)->appendplans, + ((AppendState *) planstate)->as_nplans, planstateTraceContext, span_id, query_id, span_start, root_end, latest_end); + break; + case T_MergeAppend: + generate_member_nodes(((MergeAppendState *) planstate)->mergeplans, + ((MergeAppendState *) planstate)->ms_nplans, planstateTraceContext, span_id, query_id, span_start, root_end, latest_end); + break; + case T_BitmapAnd: + generate_member_nodes(((BitmapAndState *) planstate)->bitmapplans, + ((BitmapAndState *) planstate)->nplans, planstateTraceContext, span_id, query_id, span_start, root_end, latest_end); + break; + case T_BitmapOr: + generate_member_nodes(((BitmapOrState *) planstate)->bitmapplans, + ((BitmapOrState *) planstate)->nplans, planstateTraceContext, span_id, query_id, span_start, root_end, latest_end); + break; + case T_SubqueryScan: + generate_span_from_planstate(((SubqueryScanState *) planstate)->subplan, planstateTraceContext, span_id, query_id, span_start, root_end, latest_end); + break; + case T_CustomScan: + generate_span_from_custom_scan((CustomScanState *) planstate, planstateTraceContext, span_id, query_id, span_start, root_end, latest_end); + break; + default: + break; + } + planstateTraceContext->ancestors = list_delete_first(planstateTraceContext->ancestors); + + /* If node had no duration, use the latest end of its child */ + if (planstate->instrument->total == 0) + span_end = *latest_end; + + span_node = create_span_node(planstate, planstateTraceContext, &span_id, parent_id, query_id, SPAN_NODE, NULL, span_start, span_end); + store_span(&span_node); + return span_end; +} + +/* + * Create span node for the provided planstate + */ +Span +create_span_node(PlanState *planstate, const planstateTraceContext * planstateTraceContext, + uint64 *span_id, uint64 parent_id, uint64 query_id, SpanType span_type, + char *subplan_name, TimestampTz span_start, TimestampTz span_end) +{ + Span span; + Plan const *plan = planstate->plan; + + /* + * Make sure stats accumulation is done. Function is a no-op if if was + * already done. + */ + InstrEndLoop(planstate->instrument); + + /* We only create span node on node that were executed */ + Assert(planstate->instrument->total > 0); + + begin_span(planstateTraceContext->trace_id, &span, span_type, span_id, parent_id, + query_id, &span_start); + + /* first tuple time */ + span.startup = planstate->instrument->startup * NS_PER_S; + + if (span_type == SPAN_NODE) + { + /* Generate node specific variable strings and store them */ + const char *node_type; + const char *deparse_info; + char const *operation_name; + int deparse_info_len; + + node_type = plan_to_node_type(plan); + span.node_type_offset = add_str_to_trace_buffer(node_type, strlen(node_type)); + + operation_name = plan_to_operation(planstateTraceContext, planstate, node_type); + span.operation_name_offset = add_str_to_trace_buffer(operation_name, strlen(operation_name)); + + /* deparse_ctx is NULL if deparsing was disabled */ + if (planstateTraceContext->deparse_ctx != NULL) + { + deparse_info = plan_to_deparse_info(planstateTraceContext, planstate); + deparse_info_len = strlen(deparse_info); + if (deparse_info_len > 0) + span.deparse_info_offset = add_str_to_trace_buffer(deparse_info, deparse_info_len); + } + } + else if (subplan_name != NULL) + span.operation_name_offset = add_str_to_trace_buffer(subplan_name, strlen(subplan_name)); + + span.node_counters.rows = (int64) planstate->instrument->ntuples / planstate->instrument->nloops; + span.node_counters.nloops = (int64) planstate->instrument->nloops; + span.node_counters.buffer_usage = planstate->instrument->bufusage; + span.node_counters.wal_usage = planstate->instrument->walusage; + + span.plan_counters.startup_cost = plan->startup_cost; + span.plan_counters.total_cost = plan->total_cost; + span.plan_counters.plan_rows = plan->plan_rows; + span.plan_counters.plan_width = plan->plan_width; + + if (!planstate->state->es_finished) + span.sql_error_code = planstateTraceContext->sql_error_code; + Assert(planstate->instrument->total > 0); + end_span(&span, &span_end); + + return span; +} diff --git a/src/pg_tracing_span.c b/src/pg_tracing_span.c index f97adc4..946f280 100644 --- a/src/pg_tracing_span.c +++ b/src/pg_tracing_span.c @@ -69,6 +69,7 @@ initialize_span_fields(Span * span, SpanType type, TraceId trace_id, uint64 *spa span->node_type_offset = -1; span->operation_name_offset = -1; span->parameter_offset = -1; + span->deparse_info_offset = -1; span->sql_error_code = 0; span->startup = 0; span->be_pid = MyProcPid; @@ -197,6 +198,13 @@ get_span_type(const Span * span, const char *qbuffer, Size qbuffer_size) return "Nothing query"; case SPAN_TOP_UNKNOWN: return "Unknown query"; + + case SPAN_NODE_INIT_PLAN: + return "InitPlan"; + case SPAN_NODE_SUBPLAN: + return "SubPlan"; + case SPAN_NODE: + return "Node"; } return "???"; } @@ -233,6 +241,10 @@ get_operation_name(const Span * span, const char *qbuffer, Size qbuffer_size) case SPAN_TOP_NOTHING: case SPAN_TOP_UNKNOWN: return "Top"; + case SPAN_NODE_INIT_PLAN: + case SPAN_NODE_SUBPLAN: + case SPAN_NODE: + return "Node"; } return "Unknown type"; } @@ -249,6 +261,8 @@ adjust_file_offset(Span * span, Size file_position) span->operation_name_offset += file_position; if (span->parameter_offset != -1) span->parameter_offset += file_position; + if (span->deparse_info_offset != -1) + span->deparse_info_offset += file_position; } /* diff --git a/src/version_compat.h b/src/version_compat.h index 909bad7..6fef79c 100644 --- a/src/version_compat.h +++ b/src/version_compat.h @@ -25,4 +25,4 @@ #endif -#endif /* VERSION_COMPAT_H */ +#endif /* VERSION_COMPAT_H */