From d3490068619eab79998982d3a357469a9c8b3139 Mon Sep 17 00:00:00 2001 From: Alexander Kondakov Date: Tue, 15 Oct 2024 06:18:13 +0300 Subject: [PATCH 01/69] Initial solution --- gpcontrib/arenadata_toolkit/Makefile | 25 +- .../arenadata_toolkit--1.6--1.7.sql | 98 ++ .../arenadata_toolkit.control | 2 +- .../expected/arenadata_toolkit_guc.out | 233 ++++ .../expected/arenadata_toolkit_test.out | 182 ++- .../expected/arenadata_toolkit_tracking.out | 217 +++ .../expected/upgrade_test.out | 7 +- .../sql/arenadata_toolkit_guc.sql | 144 ++ .../sql/arenadata_toolkit_tracking.sql | 103 ++ .../arenadata_toolkit/src/arenadata_toolkit.c | 43 + .../src/arenadata_toolkit_guc.c | 295 ++++ .../src/arenadata_toolkit_worker.c | 450 ++++++ gpcontrib/arenadata_toolkit/src/bloom.c | 80 ++ gpcontrib/arenadata_toolkit/src/bloom_set.c | 280 ++++ .../{arenadata_toolkit.c => src/dbsize.c} | 100 +- gpcontrib/arenadata_toolkit/src/drops_track.c | 223 +++ gpcontrib/arenadata_toolkit/src/file_hook.c | 147 ++ .../src/include/arenadata_toolkit_guc.h | 22 + .../src/include/arenadata_toolkit_worker.h | 6 + .../arenadata_toolkit/src/include/bloom.h | 23 + .../arenadata_toolkit/src/include/bloom_set.h | 41 + .../arenadata_toolkit/src/include/dbsize.h | 6 + .../src/include/drops_track.h | 16 + .../arenadata_toolkit/src/include/file_hook.h | 7 + .../arenadata_toolkit/src/include/tf_shmem.h | 18 + gpcontrib/arenadata_toolkit/src/tf_shmem.c | 56 + gpcontrib/arenadata_toolkit/src/track_files.c | 1228 +++++++++++++++++ 27 files changed, 3951 insertions(+), 101 deletions(-) create mode 100644 gpcontrib/arenadata_toolkit/arenadata_toolkit--1.6--1.7.sql create mode 100644 gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_guc.out create mode 100644 gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_tracking.out create mode 100644 gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_guc.sql create mode 100644 gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_tracking.sql create mode 100644 gpcontrib/arenadata_toolkit/src/arenadata_toolkit.c create mode 100644 gpcontrib/arenadata_toolkit/src/arenadata_toolkit_guc.c create mode 100644 gpcontrib/arenadata_toolkit/src/arenadata_toolkit_worker.c create mode 100644 gpcontrib/arenadata_toolkit/src/bloom.c create mode 100644 gpcontrib/arenadata_toolkit/src/bloom_set.c rename gpcontrib/arenadata_toolkit/{arenadata_toolkit.c => src/dbsize.c} (85%) create mode 100644 gpcontrib/arenadata_toolkit/src/drops_track.c create mode 100644 gpcontrib/arenadata_toolkit/src/file_hook.c create mode 100644 gpcontrib/arenadata_toolkit/src/include/arenadata_toolkit_guc.h create mode 100644 gpcontrib/arenadata_toolkit/src/include/arenadata_toolkit_worker.h create mode 100644 gpcontrib/arenadata_toolkit/src/include/bloom.h create mode 100644 gpcontrib/arenadata_toolkit/src/include/bloom_set.h create mode 100644 gpcontrib/arenadata_toolkit/src/include/dbsize.h create mode 100644 gpcontrib/arenadata_toolkit/src/include/drops_track.h create mode 100644 gpcontrib/arenadata_toolkit/src/include/file_hook.h create mode 100644 gpcontrib/arenadata_toolkit/src/include/tf_shmem.h create mode 100644 gpcontrib/arenadata_toolkit/src/tf_shmem.c create mode 100644 gpcontrib/arenadata_toolkit/src/track_files.c diff --git a/gpcontrib/arenadata_toolkit/Makefile b/gpcontrib/arenadata_toolkit/Makefile index 446afcbc399d..665c4ae401e5 100644 --- a/gpcontrib/arenadata_toolkit/Makefile +++ b/gpcontrib/arenadata_toolkit/Makefile @@ -3,7 +3,7 @@ MODULES = arenadata_toolkit EXTENSION = arenadata_toolkit -EXTENSION_VERSION = 1.6 +EXTENSION_VERSION = 1.7 DATA = \ arenadata_toolkit--1.0.sql \ arenadata_toolkit--1.0--1.1.sql \ @@ -12,15 +12,29 @@ DATA = \ arenadata_toolkit--1.3--1.4.sql \ arenadata_toolkit--1.4--1.5.sql \ arenadata_toolkit--1.5--1.6.sql \ + arenadata_toolkit--1.6--1.7.sql \ DATA_built = $(EXTENSION)--$(EXTENSION_VERSION).sql -$(DATA_built): $(DATA) - cat $(DATA) > $(DATA_built) +MODULE_big = arenadata_toolkit +OBJS = \ + src/arenadata_toolkit_guc.o \ + src/bloom.o \ + src/bloom_set.o \ + src/drops_track.o \ + src/file_hook.o \ + src/tf_shmem.o \ + src/arenadata_toolkit.o \ + src/arenadata_toolkit_worker.o \ + src/track_files.o \ + src/dbsize.o \ + +PG_CFLAGS = -I$(libpq_srcdir) -I$(CURDIR)/src/include REGRESS = arenadata_toolkit_test arenadata_toolkit_skew_test adb_get_relfilenodes_test \ adb_collect_table_stats_test adb_vacuum_strategy_test adb_relation_storage_size_test \ - tablespace_location upgrade_test adb_hba_file_rules_view_test + tablespace_location upgrade_test adb_hba_file_rules_view_test \ + arenadata_toolkit_guc arenadata_toolkit_tracking REGRESS_OPTS += --init-file=$(top_srcdir)/src/test/regress/init_file ifdef USE_PGXS @@ -33,3 +47,6 @@ top_builddir = ../.. include $(top_builddir)/src/Makefile.global include $(top_srcdir)/contrib/contrib-global.mk endif + +$(DATA_built): $(DATA) + cat $(DATA) > $(DATA_built) diff --git a/gpcontrib/arenadata_toolkit/arenadata_toolkit--1.6--1.7.sql b/gpcontrib/arenadata_toolkit/arenadata_toolkit--1.6--1.7.sql new file mode 100644 index 000000000000..9038b32c8ef9 --- /dev/null +++ b/gpcontrib/arenadata_toolkit/arenadata_toolkit--1.6--1.7.sql @@ -0,0 +1,98 @@ +/* gpcontrib/arenadata_toolkit/arenadata_toolkit--1.6--1.7.sql */ + +CREATE FUNCTION arenadata_toolkit.tracking_register_db(dbid OID default 0) +returns BOOL AS '$libdir/arenadata_toolkit', +'tracking_register_db' LANGUAGE C; + +REVOKE ALL ON FUNCTION arenadata_toolkit.tracking_register_db(dbid OID) FROM public; + +CREATE FUNCTION arenadata_toolkit.tracking_unregister_db(dbid OID default 0) +returns BOOL AS '$libdir/arenadata_toolkit', +'tracking_unregister_db' LANGUAGE C; + +REVOKE ALL ON FUNCTION arenadata_toolkit.tracking_unregister_db(dbid OID) FROM public; + +CREATE FUNCTION arenadata_toolkit.tracking_register_schema(schemaname NAME, dbid OID default 0) +returns BOOL AS '$libdir/arenadata_toolkit', +'tracking_register_schema' LANGUAGE C EXECUTE ON master; + +REVOKE ALL ON FUNCTION arenadata_toolkit.tracking_register_schema(schema NAME, dbid OID) FROM public; + +CREATE FUNCTION arenadata_toolkit.tracking_unregister_schema(schema NAME, dbid OID default 0) +returns BOOL AS '$libdir/arenadata_toolkit', +'tracking_unregister_schema' LANGUAGE C EXECUTE ON master; + +REVOKE ALL ON FUNCTION arenadata_toolkit.tracking_unregister_schema(schema NAME, dbid OID) FROM public; + +CREATE FUNCTION arenadata_toolkit.tracking_set_relkinds(relkinds NAME, dbid OID DEFAULT 0) +returns BOOL AS '$libdir/arenadata_toolkit', +'tracking_set_relkinds' LANGUAGE C EXECUTE ON master; + +REVOKE ALL ON FUNCTION arenadata_toolkit.tracking_set_relkinds(relkinds NAME, dbid OID) FROM public; + +CREATE FUNCTION arenadata_toolkit.tracking_set_relstorages(relstorages NAME, dbid OID DEFAULT 0) +returns BOOL AS '$libdir/arenadata_toolkit', +'tracking_set_relstorages' LANGUAGE C EXECUTE ON master; + +REVOKE ALL ON FUNCTION arenadata_toolkit.tracking_set_relstorages(relstorages NAME, dbid OID) FROM public; + +CREATE FUNCTION arenadata_toolkit.tracking_set_snapshot_on_recovery(val BOOL, dbid OID DEFAULT 0) +returns BOOL AS '$libdir/arenadata_toolkit', +'tracking_set_snapshot_on_recovery' LANGUAGE C EXECUTE ON master; + +REVOKE ALL ON FUNCTION arenadata_toolkit.tracking_set_snapshot_on_recovery(val BOOL, dbid OID) FROM public; + +CREATE FUNCTION arenadata_toolkit.tracking_trigger_initial_snapshot(dbid OID DEFAULT 0) +returns BOOL AS '$libdir/arenadata_toolkit', +'tracking_trigger_initial_snapshot' LANGUAGE C; + +CREATE FUNCTION arenadata_toolkit.tracking_is_initial_snapshot_triggered(dbid OID DEFAULT 0) +returns BOOL AS '$libdir/arenadata_toolkit', +'tracking_is_initial_snapshot_triggered' LANGUAGE C; + +REVOKE ALL ON FUNCTION arenadata_toolkit.tracking_is_initial_snapshot_triggered(dbid OID) FROM public; + +CREATE FUNCTION arenadata_toolkit.tracking_is_initial_snapshot_triggered_master(dbid OID DEFAULT 0) +returns BOOL AS '$libdir/arenadata_toolkit', +'tracking_is_initial_snapshot_triggered' LANGUAGE C EXECUTE ON master; + +CREATE FUNCTION arenadata_toolkit.tracking_is_initial_snapshot_triggered_segments(dbid OID DEFAULT 0) +returns BOOL AS '$libdir/arenadata_toolkit', +'tracking_is_initial_snapshot_triggered' LANGUAGE C EXECUTE ON ALL segments; + +CREATE FUNCTION arenadata_toolkit.tracking_is_segment_initialized() +returns TABLE(segindex INT, is_initialized BOOL) AS '$libdir/arenadata_toolkit', +'tracking_is_segment_initialized' LANGUAGE C; + +REVOKE ALL ON FUNCTION arenadata_toolkit.tracking_is_segment_initialized() FROM public; + +CREATE FUNCTION arenadata_toolkit.tracking_get_track_main() +RETURNS TABLE(relid OID, relname NAME, relfilenode OID, size BIGINT, state "char", segid INT, +relnamespace OID, relkind "char", relstorage "char") AS '$libdir/arenadata_toolkit', +'tracking_get_track_main' LANGUAGE C; + +CREATE FUNCTION arenadata_toolkit.tracking_get_track() +RETURNS TABLE(relid OID, relname NAME, relfilenode OID, size BIGINT, state "char", segid INT, +relnamespace OID, relkind "char", relstorage "char") AS '$libdir/arenadata_toolkit', +'tracking_get_track' LANGUAGE C EXECUTE ON master; + +CREATE VIEW arenadata_toolkit.tables_track AS +SELECT t.*, coalesce(c.oid, i.indrelid, vm.relid, blk.relid, seg.relid) AS parent_relid +FROM arenadata_toolkit.tracking_get_track() AS t +LEFT JOIN pg_class AS c + ON c.reltoastrelid = t.relid AND t.relkind = 't' +LEFT JOIN pg_index AS i + ON i.indexrelid = t.relid AND t.relkind = 'i' +LEFT JOIN pg_catalog.pg_appendonly AS vm + ON vm.visimaprelid = t.relid AND t.relkind = 'M' +LEFT JOIN pg_catalog.pg_appendonly AS blk + ON blk.blkdirrelid = t.relid AND t.relkind = 'b' +LEFT JOIN pg_catalog.pg_appendonly AS seg + ON seg.segrelid = t.relid AND t.relkind = 'o'; + +CREATE VIEW arenadata_toolkit.is_initial_snapshot_triggered AS +SELECT CASE + WHEN TRUE = ALL(select arenadata_toolkit.tracking_is_initial_snapshot_triggered_segments()) + AND + arenadata_toolkit.tracking_is_initial_snapshot_triggered_master() + THEN 1 ELSE NULL END AS is_triggered; diff --git a/gpcontrib/arenadata_toolkit/arenadata_toolkit.control b/gpcontrib/arenadata_toolkit/arenadata_toolkit.control index 9d7f496cca3d..cf6ec33cb566 100644 --- a/gpcontrib/arenadata_toolkit/arenadata_toolkit.control +++ b/gpcontrib/arenadata_toolkit/arenadata_toolkit.control @@ -1,5 +1,5 @@ # arenadata_toolkit extension comment = 'extension is used for manipulation of objects created by adb-bundle' -default_version = '1.6' +default_version = '1.7' module_pathname = '$libdir/arenadata_toolkit' relocatable = false diff --git a/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_guc.out b/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_guc.out new file mode 100644 index 000000000000..017768eae89d --- /dev/null +++ b/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_guc.out @@ -0,0 +1,233 @@ +-- start_matchsubs +-- +-- m/ERROR: \[arenadata_toolkit\] exceeded maximum number of tracked databases \(track_files\.c:\d+\)/ +-- s/\d+/XXX/g +-- +-- end_matchsubs +-- Test database registering GUC. +CREATE DATABASE tracking1; +\c tracking1; +CREATE EXTENSION arenadata_toolkit; +SHOW arenadata_toolkit.tracking_is_db_tracked; + arenadata_toolkit.tracking_is_db_tracked +------------------------------------------ + off +(1 row) + +SELECT datname, setconfig FROM pg_db_role_setting JOIN pg_database ON +setdatabase=oid WHERE datname=current_database(); + datname | setconfig +---------+----------- +(0 rows) + +\c -; +\c tracking1; +SELECT arenadata_toolkit.tracking_register_db(); + tracking_register_db +---------------------- + t +(1 row) + +SHOW arenadata_toolkit.tracking_is_db_tracked; + arenadata_toolkit.tracking_is_db_tracked +------------------------------------------ + off +(1 row) + +SELECT datname, setconfig FROM pg_db_role_setting JOIN pg_database ON +setdatabase=oid WHERE datname=current_database(); + datname | setconfig +-----------+---------------------------------------------- + tracking1 | {arenadata_toolkit.tracking_is_db_tracked=t} +(1 row) + +SELECT arenadata_toolkit.tracking_unregister_db(); + tracking_unregister_db +------------------------ + t +(1 row) + +\c -; +\c tracking1; +SHOW arenadata_toolkit.tracking_is_db_tracked; + arenadata_toolkit.tracking_is_db_tracked +------------------------------------------ + off +(1 row) + +SELECT datname, setconfig FROM pg_db_role_setting JOIN pg_database ON +setdatabase=oid WHERE datname=current_database(); + datname | setconfig +-----------+---------------------------------------------- + tracking1 | {arenadata_toolkit.tracking_is_db_tracked=f} +(1 row) + +-- Prohibit manual GUC setting. +SET arenadata_toolkit.tracking_is_db_tracked = true; +ERROR: cannot change tracking status outside the tracking_register_db function +ALTER DATABASE tracking1 SET arenadata_toolkit.tracking_is_db_tracked = true; +ERROR: cannot change tracking status outside the tracking_register_db function +-- Test limit of tracking databases. +SHOW arenadata_toolkit.tracking_db_track_count; + arenadata_toolkit.tracking_db_track_count +------------------------------------------- + 5 +(1 row) + +CREATE DATABASE tracking2; +CREATE DATABASE tracking3; +CREATE DATABASE tracking4; +CREATE DATABASE tracking5; +CREATE DATABASE tracking6; +SELECT arenadata_toolkit.tracking_register_db(oid) FROM pg_database WHERE datname IN +('tracking1', 'tracking2', 'tracking3', 'tracking4', 'tracking5'); +ERROR: [arenadata_toolkit] exceeded maximum number of tracked databases (track_files.c:XXX) +SELECT arenadata_toolkit.tracking_register_db(oid) FROM pg_database WHERE datname IN +('tracking6'); +ERROR: [arenadata_toolkit] exceeded maximum number of tracked databases (track_files.c:XXX) +SELECT arenadata_toolkit.tracking_unregister_db(oid) FROM pg_database WHERE datname IN +('tracking1', 'tracking2', 'tracking3', 'tracking4', 'tracking5', 'tracking6'); + tracking_unregister_db +------------------------ + t + t + t + t + t + t +(6 rows) + +DROP DATABASE IF EXISTS tracking2; +DROP DATABASE IF EXISTS tracking3; +DROP DATABASE IF EXISTS tracking4; +DROP DATABASE IF EXISTS tracking5; +DROP DATABASE IF EXISTS tracking6; +-- Test arenadata_toolkit.tracking_snapshot_on_recovery GUC +SELECT arenadata_toolkit.tracking_set_snapshot_on_recovery(true); + tracking_set_snapshot_on_recovery +----------------------------------- + t +(1 row) + +SELECT datname, setconfig FROM pg_db_role_setting JOIN pg_database ON +setdatabase=oid WHERE datname=current_database(); + datname | setconfig +-----------+------------------------------------------------------------------------------------------------ + tracking1 | {arenadata_toolkit.tracking_is_db_tracked=f,arenadata_toolkit.tracking_snapshot_on_recovery=t} +(1 row) + +-- Prohibit manual GUC setting. +SET arenadata_toolkit.tracking_set_snapshot_on_recovery = false; +ALTER DATABASE tracking1 SET arenadata_toolkit.tracking_set_snapshot_on_recovery = false; +-- Test arenadata_toolkit.tracking_relstorages GUC +SELECT arenadata_toolkit.tracking_set_relstorages('f,a,x'); + tracking_set_relstorages +-------------------------- + t +(1 row) + +SELECT datname, setconfig FROM pg_db_role_setting JOIN pg_database ON +setdatabase=oid WHERE datname=current_database(); + datname | setconfig +-----------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + tracking1 | {arenadata_toolkit.tracking_is_db_tracked=f,arenadata_toolkit.tracking_snapshot_on_recovery=t,arenadata_toolkit.tracking_set_snapshot_on_recovery=false,"arenadata_toolkit.tracking_relstorages=f,a,x"} +(1 row) + +SELECT arenadata_toolkit.tracking_set_relstorages('v,v,v,,,'); + tracking_set_relstorages +-------------------------- + t +(1 row) + +SELECT datname, setconfig FROM pg_db_role_setting JOIN pg_database ON +setdatabase=oid WHERE datname=current_database(); + datname | setconfig +-----------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + tracking1 | {arenadata_toolkit.tracking_is_db_tracked=f,arenadata_toolkit.tracking_snapshot_on_recovery=t,arenadata_toolkit.tracking_set_snapshot_on_recovery=false,arenadata_toolkit.tracking_relstorages=v} +(1 row) + +SELECT arenadata_toolkit.tracking_set_relstorages('d,b,c'); +ERROR: Invalid relstorage type: d +HINT: Valid relstorages are: 'h', 'x', 'a', 'v', 'c', 'f' +-- Prohibit manual GUC setting. +SET arenadata_toolkit.tracking_relstorages = "h, a, x"; +ERROR: cannot change tracking status outside the tracking_register_relstorages function +ALTER DATABASE tracking1 SET arenadata_toolkit.tracking_relstorages = "h, a, x"; +ERROR: cannot change tracking status outside the tracking_register_relstorages function +-- Test arenadata_toolkit.tracking_relkinds GUC +SELECT arenadata_toolkit.tracking_set_relkinds('r,t,o,S'); + tracking_set_relkinds +----------------------- + t +(1 row) + +SELECT datname, setconfig FROM pg_db_role_setting JOIN pg_database ON +setdatabase=oid WHERE datname=current_database(); + datname | setconfig +-----------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + tracking1 | {arenadata_toolkit.tracking_is_db_tracked=f,arenadata_toolkit.tracking_snapshot_on_recovery=t,arenadata_toolkit.tracking_set_snapshot_on_recovery=false,arenadata_toolkit.tracking_relstorages=v,"arenadata_toolkit.tracking_relkinds=r,t,o,S"} +(1 row) + +SELECT arenadata_toolkit.tracking_set_relkinds('m,M,o,,,'); + tracking_set_relkinds +----------------------- + t +(1 row) + +SELECT datname, setconfig FROM pg_db_role_setting JOIN pg_database ON +setdatabase=oid WHERE datname=current_database(); + datname | setconfig +-----------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + tracking1 | {arenadata_toolkit.tracking_is_db_tracked=f,arenadata_toolkit.tracking_snapshot_on_recovery=t,arenadata_toolkit.tracking_set_snapshot_on_recovery=false,arenadata_toolkit.tracking_relstorages=v,"arenadata_toolkit.tracking_relkinds=m,M,o"} +(1 row) + +SELECT arenadata_toolkit.tracking_set_relkinds('d,b,c'); +ERROR: Invalid relkind: d +HINT: Valid relkinds are: 'r', 'i', 'S', 't', 'v', 'c', 'f', 'u', 'm', 'o', 'b', 'M' +-- Prohibit manual GUC setting. +SET arenadata_toolkit.tracking_relkinds = "h, a, x"; +ERROR: cannot change tracking status outside the tracking_register_relkinds function +ALTER DATABASE tracking1 SET arenadata_toolkit.tracking_relkinds = "h, a, x"; +ERROR: cannot change tracking status outside the tracking_register_relkinds function +-- Test arenadata_toolkit.tracking_schemas GUC +SELECT arenadata_toolkit.tracking_register_schema('arenadata_toolkit'); + tracking_register_schema +-------------------------- + t +(1 row) + +SELECT arenadata_toolkit.tracking_register_schema('public'); + tracking_register_schema +-------------------------- + t +(1 row) + +SELECT datname, setconfig FROM pg_db_role_setting JOIN pg_database ON +setdatabase=oid WHERE datname=current_database(); + datname | setconfig +-----------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + tracking1 | {arenadata_toolkit.tracking_is_db_tracked=f,arenadata_toolkit.tracking_snapshot_on_recovery=t,arenadata_toolkit.tracking_set_snapshot_on_recovery=false,arenadata_toolkit.tracking_relstorages=v,"arenadata_toolkit.tracking_relkinds=m,M,o","arenadata_toolkit.tracking_schemas=arenadata_toolkit,public"} +(1 row) + +SELECT arenadata_toolkit.tracking_unregister_schema('public'); + tracking_unregister_schema +---------------------------- + t +(1 row) + +SELECT datname, setconfig FROM pg_db_role_setting JOIN pg_database ON +setdatabase=oid WHERE datname=current_database(); + datname | setconfig +-----------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + tracking1 | {arenadata_toolkit.tracking_is_db_tracked=f,arenadata_toolkit.tracking_snapshot_on_recovery=t,arenadata_toolkit.tracking_set_snapshot_on_recovery=false,arenadata_toolkit.tracking_relstorages=v,"arenadata_toolkit.tracking_relkinds=m,M,o",arenadata_toolkit.tracking_schemas=arenadata_toolkit} +(1 row) + +SELECT arenadata_toolkit.tracking_register_schema('pg_pg'); +ERROR: schema pg_pg does not exist +-- Prohibit manual GUC setting. +SET arenadata_toolkit.tracking_schemas = "pg_catalog, mychema"; +ERROR: cannot change tracking status outside the tracking_register_schema function +ALTER DATABASE tracking1 SET arenadata_toolkit.tracking_schemas = "pg_catalog, mychema"; +ERROR: cannot change tracking status outside the tracking_register_schema function +\c contrib_regression; +DROP DATABASE tracking1; diff --git a/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_test.out b/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_test.out index 97e9c1ac4dd8..ef9b5a9c5b9c 100644 --- a/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_test.out +++ b/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_test.out @@ -112,31 +112,63 @@ SELECT objname, objtype, objstorage, objacl FROM toolkit_objects_info ORDER BY o db_files_history_backup_YYYYMMDDtHHMMSS | table | a | db_files_history_backup_YYYYMMDDtHHMMSS_1_prt_default_part | table | a | db_files_history_backup_YYYYMMDDtHHMMSS_1_prt_pYYYYMM | table | a | + is_initial_snapshot_triggered | table | v | operation_exclude | table | a | -(24 rows) + tables_track | table | v | + tracking_get_track | proc | - | + tracking_get_track_main | proc | - | + tracking_is_initial_snapshot_triggered | proc | - | {owner=X/owner} + tracking_is_initial_snapshot_triggered_master | proc | - | + tracking_is_initial_snapshot_triggered_segments | proc | - | + tracking_is_segment_initialized | proc | - | {owner=X/owner} + tracking_register_db | proc | - | {owner=X/owner} + tracking_register_schema | proc | - | {owner=X/owner} + tracking_set_relkinds | proc | - | {owner=X/owner} + tracking_set_relstorages | proc | - | {owner=X/owner} + tracking_set_snapshot_on_recovery | proc | - | {owner=X/owner} + tracking_trigger_initial_snapshot | proc | - | + tracking_unregister_db | proc | - | {owner=X/owner} + tracking_unregister_schema | proc | - | {owner=X/owner} +(40 rows) -- check that toolkit objects now depends on extension SELECT objname, objtype, extname, deptype FROM pg_depend d JOIN toolkit_objects_info objs ON d.objid = objs.objid JOIN pg_extension e ON d.refobjid = e.oid WHERE d.deptype = 'e' AND e.extname = 'arenadata_toolkit' ORDER BY objname; - objname | objtype | extname | deptype ----------------------------------------+---------+-------------------+--------- - __db_files_current | table | arenadata_toolkit | e - __db_files_current_unmapped | table | arenadata_toolkit | e - __db_segment_files | table | arenadata_toolkit | e - adb_collect_table_stats | proc | arenadata_toolkit | e - adb_create_tables | proc | arenadata_toolkit | e - adb_get_relfilenodes | proc | arenadata_toolkit | e - adb_hba_file_rules | proc | arenadata_toolkit | e - adb_hba_file_rules_view | table | arenadata_toolkit | e - adb_relation_storage_size | proc | arenadata_toolkit | e - adb_relation_storage_size_on_segments | proc | arenadata_toolkit | e - adb_skew_coefficients | table | arenadata_toolkit | e - adb_vacuum_strategy | proc | arenadata_toolkit | e - adb_vacuum_strategy_newest_first | proc | arenadata_toolkit | e - adb_vacuum_strategy_newest_last | proc | arenadata_toolkit | e -(14 rows) + objname | objtype | extname | deptype +-------------------------------------------------+---------+-------------------+--------- + __db_files_current | table | arenadata_toolkit | e + __db_files_current_unmapped | table | arenadata_toolkit | e + __db_segment_files | table | arenadata_toolkit | e + adb_collect_table_stats | proc | arenadata_toolkit | e + adb_create_tables | proc | arenadata_toolkit | e + adb_get_relfilenodes | proc | arenadata_toolkit | e + adb_hba_file_rules | proc | arenadata_toolkit | e + adb_hba_file_rules_view | table | arenadata_toolkit | e + adb_relation_storage_size | proc | arenadata_toolkit | e + adb_relation_storage_size_on_segments | proc | arenadata_toolkit | e + adb_skew_coefficients | table | arenadata_toolkit | e + adb_vacuum_strategy | proc | arenadata_toolkit | e + adb_vacuum_strategy_newest_first | proc | arenadata_toolkit | e + adb_vacuum_strategy_newest_last | proc | arenadata_toolkit | e + is_initial_snapshot_triggered | table | arenadata_toolkit | e + tables_track | table | arenadata_toolkit | e + tracking_get_track | proc | arenadata_toolkit | e + tracking_get_track_main | proc | arenadata_toolkit | e + tracking_is_initial_snapshot_triggered | proc | arenadata_toolkit | e + tracking_is_initial_snapshot_triggered_master | proc | arenadata_toolkit | e + tracking_is_initial_snapshot_triggered_segments | proc | arenadata_toolkit | e + tracking_is_segment_initialized | proc | arenadata_toolkit | e + tracking_register_db | proc | arenadata_toolkit | e + tracking_register_schema | proc | arenadata_toolkit | e + tracking_set_relkinds | proc | arenadata_toolkit | e + tracking_set_relstorages | proc | arenadata_toolkit | e + tracking_set_snapshot_on_recovery | proc | arenadata_toolkit | e + tracking_trigger_initial_snapshot | proc | arenadata_toolkit | e + tracking_unregister_db | proc | arenadata_toolkit | e + tracking_unregister_schema | proc | arenadata_toolkit | e +(30 rows) DROP EXTENSION arenadata_toolkit; DROP SCHEMA arenadata_toolkit CASCADE; @@ -153,53 +185,85 @@ SELECT arenadata_toolkit.adb_create_tables(); -- show toolkit objects (and their grants) that belongs to arenadata_toolkit schema after creating -- extension and calling adb_create_tables SELECT objname, objtype, objstorage, objacl FROM toolkit_objects_info ORDER BY objname; - objname | objtype | objstorage | objacl ----------------------------------------+---------+------------+-------------------------------- - __db_files_current | table | v | {owner=arwdDxt/owner,=r/owner} - __db_files_current_unmapped | table | v | {owner=arwdDxt/owner,=r/owner} - __db_segment_files | table | v | {owner=arwdDxt/owner,=r/owner} - adb_collect_table_stats | proc | - | {owner=X/owner} - adb_create_tables | proc | - | {owner=X/owner} - adb_get_relfilenodes | proc | - | {=X/owner,owner=X/owner} - adb_hba_file_rules | proc | - | {owner=X/owner} - adb_hba_file_rules_view | table | v | {owner=arwdDxt/owner} - adb_relation_storage_size | proc | - | {=X/owner,owner=X/owner} - adb_relation_storage_size_on_segments | proc | - | {=X/owner,owner=X/owner} - adb_skew_coefficients | table | v | {owner=arwdDxt/owner,=r/owner} - adb_vacuum_strategy | proc | - | {owner=X/owner} - adb_vacuum_strategy_newest_first | proc | - | {owner=X/owner} - adb_vacuum_strategy_newest_last | proc | - | {owner=X/owner} - arenadata_toolkit | schema | - | {owner=UC/owner,=U/owner} - daily_operation | table | a | {owner=arwdDxt/owner} - db_files_current | table | h | {owner=arwdDxt/owner,=r/owner} - db_files_history | table | a | {owner=arwdDxt/owner} - db_files_history_1_prt_default_part | table | a | {owner=arwdDxt/owner} - db_files_history_1_prt_pYYYYMM | table | a | {owner=arwdDxt/owner} - operation_exclude | table | a | {owner=arwdDxt/owner} -(21 rows) + objname | objtype | objstorage | objacl +-------------------------------------------------+---------+------------+-------------------------------- + __db_files_current | table | v | {owner=arwdDxt/owner,=r/owner} + __db_files_current_unmapped | table | v | {owner=arwdDxt/owner,=r/owner} + __db_segment_files | table | v | {owner=arwdDxt/owner,=r/owner} + adb_collect_table_stats | proc | - | {owner=X/owner} + adb_create_tables | proc | - | {owner=X/owner} + adb_get_relfilenodes | proc | - | {=X/owner,owner=X/owner} + adb_hba_file_rules | proc | - | {owner=X/owner} + adb_hba_file_rules_view | table | v | {owner=arwdDxt/owner} + adb_relation_storage_size | proc | - | {=X/owner,owner=X/owner} + adb_relation_storage_size_on_segments | proc | - | {=X/owner,owner=X/owner} + adb_skew_coefficients | table | v | {owner=arwdDxt/owner,=r/owner} + adb_vacuum_strategy | proc | - | {owner=X/owner} + adb_vacuum_strategy_newest_first | proc | - | {owner=X/owner} + adb_vacuum_strategy_newest_last | proc | - | {owner=X/owner} + arenadata_toolkit | schema | - | {owner=UC/owner,=U/owner} + daily_operation | table | a | {owner=arwdDxt/owner} + db_files_current | table | h | {owner=arwdDxt/owner,=r/owner} + db_files_history | table | a | {owner=arwdDxt/owner} + db_files_history_1_prt_default_part | table | a | {owner=arwdDxt/owner} + db_files_history_1_prt_p202410 | table | a | {owner=arwdDxt/owner} + is_initial_snapshot_triggered | table | v | + operation_exclude | table | a | {owner=arwdDxt/owner} + tables_track | table | v | + tracking_get_track | proc | - | + tracking_get_track_main | proc | - | + tracking_is_initial_snapshot_triggered | proc | - | {owner=X/owner} + tracking_is_initial_snapshot_triggered_master | proc | - | + tracking_is_initial_snapshot_triggered_segments | proc | - | + tracking_is_segment_initialized | proc | - | {owner=X/owner} + tracking_register_db | proc | - | {owner=X/owner} + tracking_register_schema | proc | - | {owner=X/owner} + tracking_set_relkinds | proc | - | {owner=X/owner} + tracking_set_relstorages | proc | - | {owner=X/owner} + tracking_set_snapshot_on_recovery | proc | - | {owner=X/owner} + tracking_trigger_initial_snapshot | proc | - | + tracking_unregister_db | proc | - | {owner=X/owner} + tracking_unregister_schema | proc | - | {owner=X/owner} +(37 rows) -- check that toolkit objects now depends on extension SELECT objname, objtype, extname, deptype FROM pg_depend d JOIN toolkit_objects_info objs ON d.objid = objs.objid JOIN pg_extension e ON d.refobjid = e.oid WHERE d.deptype = 'e' AND e.extname = 'arenadata_toolkit' ORDER BY objname; - objname | objtype | extname | deptype ----------------------------------------+---------+-------------------+--------- - __db_files_current | table | arenadata_toolkit | e - __db_files_current_unmapped | table | arenadata_toolkit | e - __db_segment_files | table | arenadata_toolkit | e - adb_collect_table_stats | proc | arenadata_toolkit | e - adb_create_tables | proc | arenadata_toolkit | e - adb_get_relfilenodes | proc | arenadata_toolkit | e - adb_hba_file_rules | proc | arenadata_toolkit | e - adb_hba_file_rules_view | table | arenadata_toolkit | e - adb_relation_storage_size | proc | arenadata_toolkit | e - adb_relation_storage_size_on_segments | proc | arenadata_toolkit | e - adb_skew_coefficients | table | arenadata_toolkit | e - adb_vacuum_strategy | proc | arenadata_toolkit | e - adb_vacuum_strategy_newest_first | proc | arenadata_toolkit | e - adb_vacuum_strategy_newest_last | proc | arenadata_toolkit | e -(14 rows) + objname | objtype | extname | deptype +-------------------------------------------------+---------+-------------------+--------- + __db_files_current | table | arenadata_toolkit | e + __db_files_current_unmapped | table | arenadata_toolkit | e + __db_segment_files | table | arenadata_toolkit | e + adb_collect_table_stats | proc | arenadata_toolkit | e + adb_create_tables | proc | arenadata_toolkit | e + adb_get_relfilenodes | proc | arenadata_toolkit | e + adb_hba_file_rules | proc | arenadata_toolkit | e + adb_hba_file_rules_view | table | arenadata_toolkit | e + adb_relation_storage_size | proc | arenadata_toolkit | e + adb_relation_storage_size_on_segments | proc | arenadata_toolkit | e + adb_skew_coefficients | table | arenadata_toolkit | e + adb_vacuum_strategy | proc | arenadata_toolkit | e + adb_vacuum_strategy_newest_first | proc | arenadata_toolkit | e + adb_vacuum_strategy_newest_last | proc | arenadata_toolkit | e + is_initial_snapshot_triggered | table | arenadata_toolkit | e + tables_track | table | arenadata_toolkit | e + tracking_get_track | proc | arenadata_toolkit | e + tracking_get_track_main | proc | arenadata_toolkit | e + tracking_is_initial_snapshot_triggered | proc | arenadata_toolkit | e + tracking_is_initial_snapshot_triggered_master | proc | arenadata_toolkit | e + tracking_is_initial_snapshot_triggered_segments | proc | arenadata_toolkit | e + tracking_is_segment_initialized | proc | arenadata_toolkit | e + tracking_register_db | proc | arenadata_toolkit | e + tracking_register_schema | proc | arenadata_toolkit | e + tracking_set_relkinds | proc | arenadata_toolkit | e + tracking_set_relstorages | proc | arenadata_toolkit | e + tracking_set_snapshot_on_recovery | proc | arenadata_toolkit | e + tracking_trigger_initial_snapshot | proc | arenadata_toolkit | e + tracking_unregister_db | proc | arenadata_toolkit | e + tracking_unregister_schema | proc | arenadata_toolkit | e +(30 rows) DROP EXTENSION arenadata_toolkit; DROP SCHEMA arenadata_toolkit CASCADE; diff --git a/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_tracking.out b/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_tracking.out new file mode 100644 index 000000000000..3f2d7ef47e4e --- /dev/null +++ b/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_tracking.out @@ -0,0 +1,217 @@ +-- Tests for size tracking logic introduced in version 1.7 +-- start_matchsubs +-- m/ERROR: database \d+ is not tracked \(track_files\.c:\d+\)/ +-- s/\d+/XXX/g +-- end_matchsubs +CREATE DATABASE tracking_db1; +\c tracking_db1; +CREATE EXTENSION arenadata_toolkit; +-- 1. Test getting track on not registered database; +SELECT * FROM arenadata_toolkit.tracking_get_track(); +ERROR: database 44817 is not tracked (track_files.c:347) +CONTEXT: SQL statement "SELECT * FROM arenadata_toolkit.tracking_get_track_main()" +SELECT arenadata_toolkit.tracking_register_db(); + tracking_register_db +---------------------- + t +(1 row) + +-- 2. Test initial snapshot behaviour. Triggering initial snapshot leads to +-- setting up the bloom filter such that all relfilenodes are considered. +SELECT arenadata_toolkit.tracking_trigger_initial_snapshot(); + tracking_trigger_initial_snapshot +----------------------------------- + t +(1 row) + +SELECT is_triggered FROM arenadata_toolkit.is_initial_snapshot_triggered; + is_triggered +-------------- + 1 +(1 row) + +-- 3. If user hasn't registered any schema, the default schemas are used. +-- See arenadata_toolkit_guc.c. At commit the bloom filter is cleared. The next +-- call of tracking_get_track() will return nothing if database is not modified in between. +SELECT count(*) FROM arenadata_toolkit.tracking_get_track(); + count +------- + 1056 +(1 row) + +-- 4. Create table in specific schema and register that schema. +CREATE TABLE arenadata_toolkit.tracking_t1 (i INT) +WITH (appendonly=true, orientation=column) DISTRIBUTED BY (i); +SELECT arenadata_toolkit.tracking_register_schema('arenadata_toolkit'); + tracking_register_schema +-------------------------- + t +(1 row) + +-- Getting the track. Only created table with size 0 is expected; +SELECT relname, size, state, segid, relkind, relstorage +FROM arenadata_toolkit.tracking_get_track(); + relname | size | state | segid | relkind | relstorage +-------------+------+-------+-------+---------+------------ + tracking_t1 | 0 | a | -1 | r | c + tracking_t1 | 0 | a | 0 | r | c + tracking_t1 | 0 | a | 1 | r | c + tracking_t1 | 0 | a | 2 | r | c +(4 rows) + +-- 5. Test data extending event. Bloom should capture it. +INSERT INTO arenadata_toolkit.tracking_t1 SELECT generate_series(1,100000); +SELECT relname, size, state, segid, relkind, relstorage +FROM arenadata_toolkit.tracking_get_track(); + relname | size | state | segid | relkind | relstorage +-------------+--------+-------+-------+---------+------------ + tracking_t1 | 134064 | a | 0 | r | c + tracking_t1 | 133528 | a | 1 | r | c + tracking_t1 | 133064 | a | 2 | r | c +(3 rows) + +-- 6. Dropping table. The track shows only relfilenodes without names and other additional info with status 'd'. +DROP TABLE arenadata_toolkit.tracking_t1; +SELECT relname, size, state, segid, relkind, relstorage +FROM arenadata_toolkit.tracking_get_track(); + relname | size | state | segid | relkind | relstorage +---------+------+-------+-------+---------+------------ + | 0 | d | -1 | | + | 0 | d | -1 | | + | 0 | d | -1 | | + | 0 | d | -1 | | + | 0 | d | 0 | | + | 0 | d | 0 | | + | 0 | d | 0 | | + | 0 | d | 0 | | + | 0 | d | 1 | | + | 0 | d | 1 | | + | 0 | d | 1 | | + | 0 | d | 1 | | + | 0 | d | 2 | | + | 0 | d | 2 | | + | 0 | d | 2 | | + | 0 | d | 2 | | +(16 rows) + +-- 8. Test actions on commit and rollback +CREATE TABLE arenadata_toolkit.tracking_t1 (i INT) +WITH (appendonly=true, orientation=column) DISTRIBUTED BY (i); +INSERT INTO arenadata_toolkit.tracking_t1 SELECT generate_series(1,100000); +-- If the wrapping transaction rollbacks, the Bloom filter is not cleared up. +BEGIN; +SELECT relname, size, state, segid, relkind, relstorage +FROM arenadata_toolkit.tracking_get_track(); + relname | size | state | segid | relkind | relstorage +-------------+--------+-------+-------+---------+------------ + tracking_t1 | 0 | a | -1 | r | c + tracking_t1 | 134064 | a | 0 | r | c + tracking_t1 | 133528 | a | 1 | r | c + tracking_t1 | 133064 | a | 2 | r | c +(4 rows) + +ROLLBACK; +-- If commits, filter is cleared. +BEGIN; +SELECT relname, size, state, segid, relkind, relstorage +FROM arenadata_toolkit.tracking_get_track(); + relname | size | state | segid | relkind | relstorage +-------------+--------+-------+-------+---------+------------ + tracking_t1 | 0 | a | -1 | r | c + tracking_t1 | 134064 | a | 0 | r | c + tracking_t1 | 133528 | a | 1 | r | c + tracking_t1 | 133064 | a | 2 | r | c +(4 rows) + +COMMIT; +SELECT relname, size, state, segid, relkind, relstorage +FROM arenadata_toolkit.tracking_get_track(); + relname | size | state | segid | relkind | relstorage +---------+------+-------+-------+---------+------------ +(0 rows) + +-- 9. Test repetitive track call within the same transaction. In case of +-- rollback only first changes shoul be present. +INSERT INTO arenadata_toolkit.tracking_t1 SELECT generate_series(1,10000); +BEGIN; +SELECT relname, size, state, segid, relkind, relstorage +FROM arenadata_toolkit.tracking_get_track(); + relname | size | state | segid | relkind | relstorage +-------------+--------+-------+-------+---------+------------ + tracking_t1 | 147576 | a | 0 | r | c + tracking_t1 | 147112 | a | 1 | r | c + tracking_t1 | 146096 | a | 2 | r | c +(3 rows) + +CREATE TABLE arenadata_toolkit.tracking_t2 (j BIGINT) DISTRIBUTED BY (j); +INSERT INTO arenadata_toolkit.tracking_t2 SELECT generate_series(1,10000); +INSERT INTO arenadata_toolkit.tracking_t1 SELECT generate_series(1,10000); +SELECT relname, size, state, segid, relkind, relstorage +FROM arenadata_toolkit.tracking_get_track(); + relname | size | state | segid | relkind | relstorage +-------------+--------+-------+-------+---------+------------ + tracking_t1 | 161088 | a | 0 | r | c + tracking_t2 | 229376 | a | 0 | r | h + tracking_t1 | 160696 | a | 1 | r | c + tracking_t2 | 229376 | a | 1 | r | h + tracking_t1 | 159128 | a | 2 | r | c + tracking_t2 | 229376 | a | 2 | r | h +(6 rows) + +ROLLBACK; +SELECT relname, size, state, segid, relkind, relstorage +FROM arenadata_toolkit.tracking_get_track(); + relname | size | state | segid | relkind | relstorage +-------------+--------+-------+-------+---------+------------ + | 0 | d | -1 | | + tracking_t1 | 161088 | a | 0 | r | c + | 0 | d | 0 | | + tracking_t1 | 160696 | a | 1 | r | c + | 0 | d | 1 | | + tracking_t1 | 159128 | a | 2 | r | c + | 0 | d | 2 | | +(7 rows) + +-- 10. Test relkind filtering. +CREATE TABLE arenadata_toolkit.tracking_t1 (i INT) +WITH (appendonly=true, orientation=column) DISTRIBUTED BY (i); +ERROR: relation "tracking_t1" already exists +INSERT INTO arenadata_toolkit.tracking_t1 SELECT generate_series(1,100000); +CREATE INDEX ON arenadata_toolkit.tracking_t1(i); +-- Want to see index and block dir relation. +SELECT arenadata_toolkit.tracking_register_schema('pg_aoseg'); + tracking_register_schema +-------------------------- + t +(1 row) + +SELECT arenadata_toolkit.tracking_set_relkinds('o,i'); + tracking_set_relkinds +----------------------- + t +(1 row) + +SELECT size, state, segid, relkind, relstorage +FROM arenadata_toolkit.tracking_get_track(); + size | state | segid | relkind | relstorage +---------+-------+-------+---------+------------ + 32768 | a | -1 | i | h + 32768 | a | -1 | i | h + 1638400 | a | 0 | i | h + 65536 | a | 0 | i | h + 1638400 | a | 1 | i | h + 65536 | a | 1 | i | h + 1638400 | a | 2 | i | h + 65536 | a | 2 | i | h +(8 rows) + +DROP TABLE arenadata_toolkit.tracking_t1; +-- Clean up +SELECT arenadata_toolkit.tracking_unregister_db(); + tracking_unregister_db +------------------------ + t +(1 row) + +\c contrib_regression; +DROP DATABASE tracking_db1; diff --git a/gpcontrib/arenadata_toolkit/expected/upgrade_test.out b/gpcontrib/arenadata_toolkit/expected/upgrade_test.out index 35cb2c00ccab..22915ab53bc9 100644 --- a/gpcontrib/arenadata_toolkit/expected/upgrade_test.out +++ b/gpcontrib/arenadata_toolkit/expected/upgrade_test.out @@ -175,7 +175,12 @@ ORDER BY 1; 1.5: column tablespace_location check 1.5: create the latest check 1.5: only alter check -(34 rows) + 1.6: alter and create_tables check + 1.6: alter, create_tables and collect_table_stats check + 1.6: column tablespace_location check + 1.6: create the latest check + 1.6: only alter check +(39 rows) -- Cleanup DROP FUNCTION do_upgrade_test_for_arenadata_toolkit(TEXT); diff --git a/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_guc.sql b/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_guc.sql new file mode 100644 index 000000000000..9ecf2ef897c2 --- /dev/null +++ b/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_guc.sql @@ -0,0 +1,144 @@ +-- start_matchsubs +-- +-- m/ERROR: \[arenadata_toolkit\] exceeded maximum number of tracked databases \(track_files\.c:\d+\)/ +-- s/\d+/XXX/g +-- +-- end_matchsubs +--start_ignore +DROP DATABASE IF EXISTS tracking1; +DROP DATABASE IF EXISTS tracking2; +DROP DATABASE IF EXISTS tracking3; +DROP DATABASE IF EXISTS tracking4; +DROP DATABASE IF EXISTS tracking5; +DROP DATABASE IF EXISTS tracking6; +--end_ignore + +-- Test database registering GUC. +CREATE DATABASE tracking1; +\c tracking1; +CREATE EXTENSION arenadata_toolkit; + +SHOW arenadata_toolkit.tracking_is_db_tracked; + +SELECT datname, setconfig FROM pg_db_role_setting JOIN pg_database ON +setdatabase=oid WHERE datname=current_database(); + +\c -; +\c tracking1; + +SELECT arenadata_toolkit.tracking_register_db(); + +SHOW arenadata_toolkit.tracking_is_db_tracked; + +SELECT datname, setconfig FROM pg_db_role_setting JOIN pg_database ON +setdatabase=oid WHERE datname=current_database(); + +SELECT arenadata_toolkit.tracking_unregister_db(); + +\c -; +\c tracking1; + +SHOW arenadata_toolkit.tracking_is_db_tracked; + +SELECT datname, setconfig FROM pg_db_role_setting JOIN pg_database ON +setdatabase=oid WHERE datname=current_database(); + +-- Prohibit manual GUC setting. +SET arenadata_toolkit.tracking_is_db_tracked = true; + +ALTER DATABASE tracking1 SET arenadata_toolkit.tracking_is_db_tracked = true; + +-- Test limit of tracking databases. +SHOW arenadata_toolkit.tracking_db_track_count; + +CREATE DATABASE tracking2; +CREATE DATABASE tracking3; +CREATE DATABASE tracking4; +CREATE DATABASE tracking5; +CREATE DATABASE tracking6; + +SELECT arenadata_toolkit.tracking_register_db(oid) FROM pg_database WHERE datname IN +('tracking1', 'tracking2', 'tracking3', 'tracking4', 'tracking5'); + +SELECT arenadata_toolkit.tracking_register_db(oid) FROM pg_database WHERE datname IN +('tracking6'); + +SELECT arenadata_toolkit.tracking_unregister_db(oid) FROM pg_database WHERE datname IN +('tracking1', 'tracking2', 'tracking3', 'tracking4', 'tracking5', 'tracking6'); + +DROP DATABASE IF EXISTS tracking2; +DROP DATABASE IF EXISTS tracking3; +DROP DATABASE IF EXISTS tracking4; +DROP DATABASE IF EXISTS tracking5; +DROP DATABASE IF EXISTS tracking6; + +-- Test arenadata_toolkit.tracking_snapshot_on_recovery GUC +SELECT arenadata_toolkit.tracking_set_snapshot_on_recovery(true); + +SELECT datname, setconfig FROM pg_db_role_setting JOIN pg_database ON +setdatabase=oid WHERE datname=current_database(); + +-- Prohibit manual GUC setting. +SET arenadata_toolkit.tracking_set_snapshot_on_recovery = false; + +ALTER DATABASE tracking1 SET arenadata_toolkit.tracking_set_snapshot_on_recovery = false; + +-- Test arenadata_toolkit.tracking_relstorages GUC +SELECT arenadata_toolkit.tracking_set_relstorages('f,a,x'); + +SELECT datname, setconfig FROM pg_db_role_setting JOIN pg_database ON +setdatabase=oid WHERE datname=current_database(); + +SELECT arenadata_toolkit.tracking_set_relstorages('v,v,v,,,'); + +SELECT datname, setconfig FROM pg_db_role_setting JOIN pg_database ON +setdatabase=oid WHERE datname=current_database(); + +SELECT arenadata_toolkit.tracking_set_relstorages('d,b,c'); + +-- Prohibit manual GUC setting. +SET arenadata_toolkit.tracking_relstorages = "h, a, x"; + +ALTER DATABASE tracking1 SET arenadata_toolkit.tracking_relstorages = "h, a, x"; + +-- Test arenadata_toolkit.tracking_relkinds GUC +SELECT arenadata_toolkit.tracking_set_relkinds('r,t,o,S'); + +SELECT datname, setconfig FROM pg_db_role_setting JOIN pg_database ON +setdatabase=oid WHERE datname=current_database(); + +SELECT arenadata_toolkit.tracking_set_relkinds('m,M,o,,,'); + +SELECT datname, setconfig FROM pg_db_role_setting JOIN pg_database ON +setdatabase=oid WHERE datname=current_database(); + +SELECT arenadata_toolkit.tracking_set_relkinds('d,b,c'); + +-- Prohibit manual GUC setting. +SET arenadata_toolkit.tracking_relkinds = "h, a, x"; + +ALTER DATABASE tracking1 SET arenadata_toolkit.tracking_relkinds = "h, a, x"; + +-- Test arenadata_toolkit.tracking_schemas GUC +SELECT arenadata_toolkit.tracking_register_schema('arenadata_toolkit'); + +SELECT arenadata_toolkit.tracking_register_schema('public'); + +SELECT datname, setconfig FROM pg_db_role_setting JOIN pg_database ON +setdatabase=oid WHERE datname=current_database(); + +SELECT arenadata_toolkit.tracking_unregister_schema('public'); + +SELECT datname, setconfig FROM pg_db_role_setting JOIN pg_database ON +setdatabase=oid WHERE datname=current_database(); + +SELECT arenadata_toolkit.tracking_register_schema('pg_pg'); + +-- Prohibit manual GUC setting. +SET arenadata_toolkit.tracking_schemas = "pg_catalog, mychema"; + +ALTER DATABASE tracking1 SET arenadata_toolkit.tracking_schemas = "pg_catalog, mychema"; + +\c contrib_regression; + +DROP DATABASE tracking1; diff --git a/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_tracking.sql b/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_tracking.sql new file mode 100644 index 000000000000..0465d606fecf --- /dev/null +++ b/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_tracking.sql @@ -0,0 +1,103 @@ +-- Tests for size tracking logic introduced in version 1.7 +-- start_matchsubs +-- m/ERROR: database \d+ is not tracked \(track_files\.c:\d+\)/ +-- s/\d+/XXX/g +-- end_matchsubs +--start_ignore +DROP DATABASE IF EXISTS tracking1; +--end_ignore +CREATE DATABASE tracking_db1; +\c tracking_db1; +CREATE EXTENSION arenadata_toolkit; + +-- 1. Test getting track on not registered database; +SELECT * FROM arenadata_toolkit.tracking_get_track(); + +SELECT arenadata_toolkit.tracking_register_db(); + +-- 2. Test initial snapshot behaviour. Triggering initial snapshot leads to +-- setting up the bloom filter such that all relfilenodes are considered. +SELECT arenadata_toolkit.tracking_trigger_initial_snapshot(); +SELECT is_triggered FROM arenadata_toolkit.is_initial_snapshot_triggered; + +-- 3. If user hasn't registered any schema, the default schemas are used. +-- See arenadata_toolkit_guc.c. At commit the bloom filter is cleared. The next +-- call of tracking_get_track() will return nothing if database is not modified in between. +SELECT count(*) FROM arenadata_toolkit.tracking_get_track(); + +-- 4. Create table in specific schema and register that schema. +CREATE TABLE arenadata_toolkit.tracking_t1 (i INT) +WITH (appendonly=true, orientation=column) DISTRIBUTED BY (i); + +SELECT arenadata_toolkit.tracking_register_schema('arenadata_toolkit'); + +-- Getting the track. Only created table with size 0 is expected; +SELECT relname, size, state, segid, relkind, relstorage +FROM arenadata_toolkit.tracking_get_track(); + +-- 5. Test data extending event. Bloom should capture it. +INSERT INTO arenadata_toolkit.tracking_t1 SELECT generate_series(1,100000); +SELECT relname, size, state, segid, relkind, relstorage +FROM arenadata_toolkit.tracking_get_track(); + +-- 6. Dropping table. The track shows only relfilenodes without names and other additional info with status 'd'. +DROP TABLE arenadata_toolkit.tracking_t1; +SELECT relname, size, state, segid, relkind, relstorage +FROM arenadata_toolkit.tracking_get_track(); + +-- 8. Test actions on commit and rollback +CREATE TABLE arenadata_toolkit.tracking_t1 (i INT) +WITH (appendonly=true, orientation=column) DISTRIBUTED BY (i); +INSERT INTO arenadata_toolkit.tracking_t1 SELECT generate_series(1,100000); + +-- If the wrapping transaction rollbacks, the Bloom filter is not cleared up. +BEGIN; +SELECT relname, size, state, segid, relkind, relstorage +FROM arenadata_toolkit.tracking_get_track(); +ROLLBACK; + +-- If commits, filter is cleared. +BEGIN; +SELECT relname, size, state, segid, relkind, relstorage +FROM arenadata_toolkit.tracking_get_track(); +COMMIT; +SELECT relname, size, state, segid, relkind, relstorage +FROM arenadata_toolkit.tracking_get_track(); + +-- 9. Test repetitive track call within the same transaction. In case of +-- rollback only first changes shoul be present. +INSERT INTO arenadata_toolkit.tracking_t1 SELECT generate_series(1,10000); +BEGIN; +SELECT relname, size, state, segid, relkind, relstorage +FROM arenadata_toolkit.tracking_get_track(); + +CREATE TABLE arenadata_toolkit.tracking_t2 (j BIGINT) DISTRIBUTED BY (j); +INSERT INTO arenadata_toolkit.tracking_t2 SELECT generate_series(1,10000); +INSERT INTO arenadata_toolkit.tracking_t1 SELECT generate_series(1,10000); + +SELECT relname, size, state, segid, relkind, relstorage +FROM arenadata_toolkit.tracking_get_track(); +ROLLBACK; +SELECT relname, size, state, segid, relkind, relstorage +FROM arenadata_toolkit.tracking_get_track(); + +-- 10. Test relkind filtering. +CREATE TABLE arenadata_toolkit.tracking_t1 (i INT) +WITH (appendonly=true, orientation=column) DISTRIBUTED BY (i); +INSERT INTO arenadata_toolkit.tracking_t1 SELECT generate_series(1,100000); +CREATE INDEX ON arenadata_toolkit.tracking_t1(i); + +-- Want to see index and block dir relation. +SELECT arenadata_toolkit.tracking_register_schema('pg_aoseg'); +SELECT arenadata_toolkit.tracking_set_relkinds('o,i'); + +SELECT size, state, segid, relkind, relstorage +FROM arenadata_toolkit.tracking_get_track(); + +DROP TABLE arenadata_toolkit.tracking_t1; + +-- Clean up +SELECT arenadata_toolkit.tracking_unregister_db(); + +\c contrib_regression; +DROP DATABASE tracking_db1; diff --git a/gpcontrib/arenadata_toolkit/src/arenadata_toolkit.c b/gpcontrib/arenadata_toolkit/src/arenadata_toolkit.c new file mode 100644 index 000000000000..9a58b4c19f55 --- /dev/null +++ b/gpcontrib/arenadata_toolkit/src/arenadata_toolkit.c @@ -0,0 +1,43 @@ +#include "postgres.h" + +#include "access/xlog.h" +#include "cdb/cdbvars.h" +#include "miscadmin.h" +#include "postmaster/bgworker.h" +#include "storage/shmem.h" + +#include "arenadata_toolkit_guc.h" +#include "arenadata_toolkit_worker.h" +#include "drops_track.h" +#include "file_hook.h" +#include "tf_shmem.h" + +void _PG_init(void); +void _PG_fini(void); + +void +_PG_init(void) +{ + if (!process_shared_preload_libraries_in_progress) + return; + + tf_guc_define(); + tf_shmem_init(); + file_hook_init(); + + drops_track_init(); + + if (IS_QUERY_DISPATCHER()) + { + arenadata_toolkit_worker_register(); + } +} + +void +_PG_fini(void) +{ + tf_shmem_deinit(); + file_hook_deinit(); + file_hook_deinit(); + drops_track_deinit(); +} diff --git a/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_guc.c b/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_guc.c new file mode 100644 index 000000000000..ad4603984869 --- /dev/null +++ b/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_guc.c @@ -0,0 +1,295 @@ +#include "arenadata_toolkit_guc.h" + + +#include "cdb/cdbvars.h" +#include "catalog/objectaccess.h" +#include "catalog/pg_db_role_setting.h" +#include +#include "utils/guc.h" +#include "tf_shmem.h" + +#define DEFAULT_BLOOM_SIZE 1000000 +#define DEFAULT_DB_TRACK_COUNT 5 +#define DEFAULT_IS_TRACKED false +#define DEFAULT_DROPS_COUNT 100000 +#define DEFAULT_TRACKED_SCHEMAS "public,arenadata_toolkit,pg_catalog,pg_toast,pg_aoseg,information_schema" +#define DEFAULT_GET_FULL_SNAPSHOT_ON_RECOVERY true +#define DEFAULT_TRACKED_REL_STORAGES "h,a,c" +#define DEFAULT_TRACKED_REL_KINDS "r,i,t,m,o,b,M" +#define DEFAULT_NAPTIME 60 + +#define MIN_BLOOM_SIZE 1 +#define MIN_DB_TRACK_COUNT 1 +#define MIN_DROPS_COUNT 1 +#define MIN_NAPTIME 1 + +#define MAX_BLOOM_SIZE 128000000 +#define MAX_DB_TRACK_COUNT 1000 +#define MAX_DROPS_COUNT 1000000 +#define MAX_NAPTIME OID_MAX & 0x7FFFFFFF + +int bloom_size = DEFAULT_BLOOM_SIZE; +int db_track_count = DEFAULT_DB_TRACK_COUNT; +bool is_tracked = DEFAULT_IS_TRACKED; +bool get_full_snapshot_on_recovery = DEFAULT_GET_FULL_SNAPSHOT_ON_RECOVERY; +int drops_count = DEFAULT_DROPS_COUNT; +char *tracked_schemas = DEFAULT_TRACKED_SCHEMAS; +char *tracked_rel_storages = DEFAULT_TRACKED_REL_STORAGES; +char *tracked_rel_kinds = DEFAULT_TRACKED_REL_KINDS; +int tracking_worker_naptime_sec = DEFAULT_NAPTIME; + +static bool is_tracked_unlocked = false; +static bool is_get_full_snapshot_on_recovery_unlocked = false; +static bool is_schemas_unlocked = false; +static bool is_relkinds_unlocked = false; +static bool is_relstorages_unlocked = false; + +void +tf_guc_unlock_tracked_once(void) +{ + if (!is_tracked_unlocked) + is_tracked_unlocked = true; +} + +void +tf_guc_unlock_full_snapshot_on_recovery_once(void) +{ + if (!is_get_full_snapshot_on_recovery_unlocked) + is_get_full_snapshot_on_recovery_unlocked = true; +} + +void +tf_guc_unlock_schemas_once(void) +{ + if (!is_schemas_unlocked) + is_schemas_unlocked = true; +} + +void +tf_guc_unlock_relkinds_once(void) +{ + if (!is_relkinds_unlocked) + is_relkinds_unlocked = true; +} + +void +tf_guc_unlock_relstorages_once(void) +{ + if (!is_relstorages_unlocked) + is_relstorages_unlocked = true; +} + +/* Prohibit changing the GUC value manually except several cases. + * This is not called for RESET, so RESET is not guarded + */ +static bool +check_tracked(bool *newval, void **extra, GucSource source) +{ + if (IsInitProcessingMode() || Gp_role == GP_ROLE_EXECUTE || + (Gp_role == GP_ROLE_DISPATCH && is_tracked_unlocked)) + { + if (is_tracked_unlocked) + is_tracked_unlocked = false; + + if (source != PGC_S_DATABASE && source != PGC_S_DEFAULT && source != PGC_S_TEST) + return false; + + return true; + } + + GUC_check_errmsg("cannot change tracking status outside the tracking_register_db function"); + return false; +} + +/* Prohibit changing the GUC value manually except several cases. + * This is not called for RESET, so RESET is not guarded + */ +static bool +check_get_full_snapshot_on_recovery(bool *newval, void **extra, GucSource source) +{ + if (IsInitProcessingMode() || Gp_role == GP_ROLE_EXECUTE || + (Gp_role == GP_ROLE_DISPATCH && is_get_full_snapshot_on_recovery_unlocked)) + { + if (is_get_full_snapshot_on_recovery_unlocked) + is_get_full_snapshot_on_recovery_unlocked = false; + + if (source != PGC_S_DATABASE && source != PGC_S_DEFAULT && source != PGC_S_TEST) + return false; + + return true; + } + + GUC_check_errmsg("cannot change tracking status outside the tracking_set_snapshot_on_recovery function"); + return false; +} + +static bool +check_relkinds(char **newval, void **extra, GucSource source) +{ + if (IsInitProcessingMode() || Gp_role == GP_ROLE_EXECUTE || + (Gp_role == GP_ROLE_DISPATCH && is_relkinds_unlocked)) + { + if (is_relkinds_unlocked) + is_relkinds_unlocked = false; + + if (source != PGC_S_DATABASE && source != PGC_S_DEFAULT && source != PGC_S_TEST) + return false; + + return true; + } + + GUC_check_errmsg("cannot change tracking status outside the tracking_register_relkinds function"); + return false; +} + +static bool +check_schemas(char **newval, void **extra, GucSource source) +{ + if (IsInitProcessingMode() || Gp_role == GP_ROLE_EXECUTE || + (Gp_role == GP_ROLE_DISPATCH && is_schemas_unlocked)) + { + if (is_schemas_unlocked) + is_schemas_unlocked = false; + + if (source != PGC_S_DATABASE && source != PGC_S_DEFAULT && source != PGC_S_TEST) + return false; + + return true; + } + + GUC_check_errmsg("cannot change tracking status outside the tracking_register_schema function"); + return false; +} + +static bool +check_relstorages(char **newval, void **extra, GucSource source) +{ + if (IsInitProcessingMode() || Gp_role == GP_ROLE_EXECUTE || + (Gp_role == GP_ROLE_DISPATCH && is_relstorages_unlocked)) + { + if (is_relstorages_unlocked) + is_relstorages_unlocked = false; + + if (source != PGC_S_DATABASE && source != PGC_S_DEFAULT && source != PGC_S_TEST) + return false; + + return true; + } + + GUC_check_errmsg("cannot change tracking status outside the tracking_register_relstorages function"); + return false; +} + +void +tf_guc_define(void) +{ + DefineCustomIntVariable("arenadata_toolkit.tracking_bloom_size", + "Size of bloom filter in bytes for each tracked database", + NULL, + &bloom_size, + DEFAULT_BLOOM_SIZE, + MIN_BLOOM_SIZE, + MAX_BLOOM_SIZE, + PGC_POSTMASTER, + 0, + NULL, + NULL, + NULL + ); + + DefineCustomIntVariable("arenadata_toolkit.tracking_db_track_count", + "Count of tracked databases.", + NULL, + &db_track_count, + DEFAULT_DB_TRACK_COUNT, + MIN_DB_TRACK_COUNT, + MAX_DB_TRACK_COUNT, + PGC_POSTMASTER, + 0, + NULL, + NULL, + NULL + ); + + DefineCustomBoolVariable("arenadata_toolkit.tracking_is_db_tracked", + "Is current database tracked.", + NULL, + &is_tracked, + DEFAULT_IS_TRACKED, + PGC_SUSET, + 0, + &check_tracked, + NULL, + NULL); + + DefineCustomBoolVariable("arenadata_toolkit.tracking_snapshot_on_recovery", + "Return full snapshot at startup/recovery.", + NULL, + &get_full_snapshot_on_recovery, + DEFAULT_GET_FULL_SNAPSHOT_ON_RECOVERY, + PGC_SUSET, + 0, + &check_get_full_snapshot_on_recovery, + NULL, + NULL); + + DefineCustomIntVariable("arenadata_toolkit.tracking_drops_count", + "Count of max monitored drop events.", + NULL, + &drops_count, + DEFAULT_DROPS_COUNT, + MIN_DROPS_COUNT, + MAX_DROPS_COUNT, + PGC_POSTMASTER, + 0, + NULL, + NULL, + NULL); + + DefineCustomStringVariable("arenadata_toolkit.tracking_schemas", + "Tracked schema names.", + NULL, + &tracked_schemas, + DEFAULT_TRACKED_SCHEMAS, + PGC_SUSET, + 0, + &check_schemas, + NULL, + NULL); + + DefineCustomStringVariable("arenadata_toolkit.tracking_relstorages", + "Tracked relation storage types.", + NULL, + &tracked_rel_storages, + DEFAULT_TRACKED_REL_STORAGES, + PGC_SUSET, + 0, + &check_relstorages, + NULL, + NULL); + + DefineCustomStringVariable("arenadata_toolkit.tracking_relkinds", + "Tracked relation kinds.", + NULL, + &tracked_rel_kinds, + DEFAULT_TRACKED_REL_KINDS, + PGC_SUSET, + 0, + &check_relkinds, + NULL, + NULL); + + + DefineCustomIntVariable("arenadata_toolkit.tracking_worker_naptime_sec", + "Toolkit background worker nap time", + NULL, + &tracking_worker_naptime_sec, + DEFAULT_NAPTIME, + 1, + MAX_NAPTIME, + PGC_POSTMASTER, + 0, + NULL, + NULL, + NULL); +} diff --git a/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_worker.c b/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_worker.c new file mode 100644 index 000000000000..46addc365c28 --- /dev/null +++ b/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_worker.c @@ -0,0 +1,450 @@ +#include "postgres.h" + +#include "access/xact.h" +#include "access/genam.h" +#include "access/heapam.h" +#include "catalog/pg_db_role_setting.h" +#include "catalog/pg_extension.h" +#include "catalog/indexing.h" +#include "cdb/cdbdisp_query.h" +#include "cdb/cdbdispatchresult.h" +#include "libpq-fe.h" +#include "postmaster/bgworker.h" +#include "storage/proc.h" +#include "storage/ipc.h" +#include "utils/snapmgr.h" +#include "utils/builtins.h" +#include "utils/fmgroids.h" + +#include "arenadata_toolkit_worker.h" +#include "arenadata_toolkit_guc.h" +#include "tf_shmem.h" + +#define EXTENSIONNAME "arenadata_toolkit" + +typedef struct +{ + Oid dbid; + bool get_full_snapshot_on_recovery; +} tracked_db_t; + +static BackgroundWorker worker; + +/* flags set by signal handlers */ +static volatile sig_atomic_t got_sighup = false; +static volatile sig_atomic_t got_sigterm = false; +static volatile sig_atomic_t got_sigusr1 = false; + +/* parse array of GUCs, find desired and analyze it */ +static bool +is_db_tracked(ArrayType *array) +{ + bool is_tracked = false; + Datum *elems; + bool *nulls; + int nelems; + + deconstruct_array(array, TEXTOID, -1, false, 'i', + &elems, &nulls, &nelems); + for (int i = 0; i < nelems; i++) + { + char *s; + char *name; + char *value; + + if (nulls[i]) + continue; + + s = TextDatumGetCString(elems[i]); + ParseLongOption(s, &name, &value); + + if (!value) + { + free(name); + continue; + } + + if (strcmp(name, "arenadata_toolkit.tracking_is_db_tracked") == 0 && + strcmp(value, "t") == 0) + { + is_tracked = true; + break; + } + + free(name); + if (value) + free(value); + pfree(s); + } + + return is_tracked; +} + +static bool +full_snapshot_on_recovery(ArrayType *array) +{ + bool take_snapshot = false; + bool found = false; + Datum *elems; + bool *nulls; + int nelems; + + deconstruct_array(array, TEXTOID, -1, false, 'i', + &elems, &nulls, &nelems); + + for (int i = 0; i < nelems; i++) + { + char *s; + char *name; + char *value; + + if (nulls[i]) + continue; + + s = TextDatumGetCString(elems[i]); + ParseLongOption(s, &name, &value); + + if (!value) + { + free(name); + continue; + } + + if (strcmp(name, "arenadata_toolkit.tracking_snapshot_on_recovery") == 0) + { + found = true; + if (strcmp(value, "t") == 0) + take_snapshot = true; + break; + } + + free(name); + if (value) + free(value); + pfree(s); + } + + if (!found) + take_snapshot = get_full_snapshot_on_recovery; + + return take_snapshot; +} + +static List * +get_uninitialized_segments() +{ + int i; + CdbPgResults cdb_pgresults = {NULL, 0}; + List *list = NIL; + + CdbDispatchCommand("select * from arenadata_toolkit.tracking_is_segment_initialized()", 0, &cdb_pgresults); + + for (i = 0; i < cdb_pgresults.numResults; i++) + { + struct pg_result *pgresult = cdb_pgresults.pg_results[i]; + + if (PQresultStatus(pgresult) != PGRES_TUPLES_OK) + { + cdbdisp_clearCdbPgResults(&cdb_pgresults); + elog(ERROR, "is_initialized: resultStatus not tuples_Ok: %s %s", + PQresStatus(PQresultStatus(pgresult)), PQresultErrorMessage(pgresult)); + } + else + { + int32 segindex = 0; + bool is_initialized = false; + + segindex = atoi(PQgetvalue(pgresult, 0, 0)); + is_initialized = strcmp(PQgetvalue(pgresult, 0, 1), "t") == 0; + + elog(LOG, "get_uninitialized_segments, segindex: %d, is_initialized: %d", segindex, is_initialized); + + if (!is_initialized) + list = lappend_int(list, segindex); + } + } + + cdbdisp_clearCdbPgResults(&cdb_pgresults); + + return list; +} + +/* + * Signal handler for SIGTERM + * Set a flag to let the main loop to terminate, and set our latch to wake + * it up. + */ +static void +tracking_sigterm(SIGNAL_ARGS) +{ + int save_errno = errno; + + got_sigterm = true; + if (MyProc) + SetLatch(&MyProc->procLatch); + + errno = save_errno; +} + +/* + * Signal handler for SIGHUP + * Set a flag to tell the main loop to reread the config file, and set + * our latch to wake it up. + */ +static void +tracking_sighup(SIGNAL_ARGS) +{ + int save_errno = errno; + + got_sighup = true; + if (MyProc) + SetLatch(&MyProc->procLatch); + + errno = save_errno; +} + +/* + * Signal handler for SIGUSR1 + * Set a flag to tell the launcher to handle extension ddl message + */ +static void +tracking_sigusr1(SIGNAL_ARGS) +{ + int save_errno = errno; + + got_sigusr1 = true; + if (MyProc) + SetLatch(&MyProc->procLatch); + + errno = save_errno; +} + +static bool +extension_created() +{ + bool exists = false; + Relation rel; + SysScanDesc scandesc; + HeapTuple tuple; + ScanKeyData entry[1]; + + rel = heap_open(ExtensionRelationId, AccessShareLock); + + ScanKeyInit(&entry[0], + Anum_pg_extension_extname, + BTEqualStrategyNumber, F_NAMEEQ, + CStringGetDatum(EXTENSIONNAME)); + + scandesc = systable_beginscan(rel, ExtensionNameIndexId, true, + NULL, 1, entry); + + tuple = systable_getnext(scandesc); + + exists = HeapTupleIsValid(tuple); + + systable_endscan(scandesc); + heap_close(rel, AccessShareLock); + + return exists; +} + +static void +dispatch_register_to_master(List *dbids) +{ + ListCell *cell; + tracked_db_t *trackedDb; + + foreach(cell, dbids) + { + trackedDb = (tracked_db_t *) lfirst(cell); + + bloom_set_bind(&tf_shared_state->bloom_set, trackedDb->dbid); + bloom_set_trigger_bits(&tf_shared_state->bloom_set, trackedDb->dbid, + trackedDb->get_full_snapshot_on_recovery); + } + + LWLockAcquire(tf_shared_state->bloom_set.lock, LW_EXCLUSIVE); + tf_shared_state->is_initialized = true; + LWLockRelease(tf_shared_state->bloom_set.lock); +} + +static void +dispatch_register_to_segments(List *dbids, List *uninitialized_segments) +{ + ListCell *cell; + tracked_db_t *trackedDb; + CdbPgResults cdb_pgresults = {NULL, 0}; + + if (uninitialized_segments == NIL) + return; + + foreach(cell, dbids) + { + trackedDb = (tracked_db_t *) lfirst(cell); + + char *cmd = psprintf("select arenadata_toolkit.tracking_register_db(%u)", trackedDb->dbid); + + CdbDispatchCommandToSegments(cmd, + 0, + uninitialized_segments, + &cdb_pgresults); + + if (trackedDb->get_full_snapshot_on_recovery) + { + cmd = psprintf("select arenadata_toolkit.tracking_trigger_initial_snapshot(%u)", trackedDb->dbid); + + CdbDispatchCommandToSegments(cmd, + 0, + uninitialized_segments, + &cdb_pgresults); + } + } +} + +static void +dispatch_register(bool dispatch_to_master, List *uninitialized_segments) +{ + Relation rel; + SysScanDesc scan; + HeapTuple tup; + List *dbids = NIL; + tracked_db_t *trackedDb; + + rel = heap_open(DbRoleSettingRelationId, RowExclusiveLock); + scan = systable_beginscan(rel, InvalidOid, false, NULL, 0, NULL); + while (HeapTupleIsValid(tup = systable_getnext(scan))) + { + bool isnull; + Datum str_datum; + Datum oid_datum; + ArrayType *a; + + str_datum = heap_getattr(tup, Anum_pg_db_role_setting_setconfig, + RelationGetDescr(rel), &isnull); + if (isnull) + continue; + + oid_datum = heap_getattr(tup, Anum_pg_db_role_setting_setrole, + RelationGetDescr(rel), &isnull); + if (DatumGetObjectId(oid_datum) != InvalidOid) + continue; + + oid_datum = heap_getattr(tup, Anum_pg_db_role_setting_setdatabase, + RelationGetDescr(rel), &isnull); + if (DatumGetObjectId(oid_datum) == InvalidOid) + continue; + + a = DatumGetArrayTypeP(str_datum); + + if (is_db_tracked(a)) + { + trackedDb = (tracked_db_t *) palloc0(sizeof(tracked_db_t)); + + trackedDb->dbid = DatumGetObjectId(oid_datum); + trackedDb->get_full_snapshot_on_recovery = full_snapshot_on_recovery(a); + dbids = lappend(dbids, trackedDb); + } + } + + systable_endscan(scan); + heap_close(rel, RowExclusiveLock); + + if (dbids != NIL) + { + ListCell *cell; + + if (dispatch_to_master) + dispatch_register_to_master(dbids); + + dispatch_register_to_segments(dbids, uninitialized_segments); + + foreach(cell, dbids) + { + pfree(lfirst(cell)); + } + + list_free(dbids); + } + + LWLockAcquire(tf_shared_state->bloom_set.lock, LW_EXCLUSIVE); + tf_shared_state->bgworker_ready = true; + LWLockRelease(tf_shared_state->bloom_set.lock); +} + +/* scan pg_db_role_setting, find all databases, bind blooms if necessary */ +static void +arenadata_toolkit_worker(Datum main_arg) +{ + elog(LOG, "[arenadata toolkit] Starting background worker"); + + bool master_initialized = false; + + pqsignal(SIGHUP, tracking_sighup); + pqsignal(SIGTERM, tracking_sigterm); + pqsignal(SIGUSR1, tracking_sigusr1); + + BackgroundWorkerUnblockSignals(); + + BackgroundWorkerInitializeConnection(DB_FOR_COMMON_ACCESS, NULL); + + while (!got_sigterm) + { + int rc; + List *uninitialized_segments = NIL; + + CHECK_FOR_INTERRUPTS(); + + StartTransactionCommand(); + + if (extension_created()) + { + elog(LOG, "[arenadata toolkit] Getting uninitialized segments"); + uninitialized_segments = get_uninitialized_segments(uninitialized_segments); + + if (!master_initialized || list_length(uninitialized_segments) > 0) + { + elog(LOG, "Dispatching register to segments"); + dispatch_register(!master_initialized, uninitialized_segments); + list_free(uninitialized_segments); + uninitialized_segments = NIL; + master_initialized = true; + } + } + CommitTransactionCommand(); + + rc = WaitLatch(&MyProc->procLatch, WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, + tracking_worker_naptime_sec * 1000); + ResetLatch(&MyProc->procLatch); + + /* Emergency bailout if postmaster has died */ + if (rc & WL_POSTMASTER_DEATH) + { + ereport(LOG, (errmsg("[arenadata toolkit] bgworker is being terminated by postmaster death."))); + proc_exit(1); + } + + if (got_sighup) + { + got_sighup = false; + } + } + + if (got_sigterm) + ereport(LOG, (errmsg("[arenadata toolkit] stop worker process"))); + + proc_exit(0); +} + +void +arenadata_toolkit_worker_register(void) +{ + worker.bgw_flags = BGWORKER_SHMEM_ACCESS | BGWORKER_BACKEND_DATABASE_CONNECTION; + worker.bgw_start_time = BgWorkerStart_ConsistentState; + worker.bgw_restart_time = BGW_DEFAULT_RESTART_INTERVAL; + worker.bgw_main = arenadata_toolkit_worker; + worker.bgw_notify_pid = 0; + worker.bgw_start_rule = NULL; + sprintf(worker.bgw_name, "arenadata_toolkit"); + + RegisterBackgroundWorker(&worker); +} diff --git a/gpcontrib/arenadata_toolkit/src/bloom.c b/gpcontrib/arenadata_toolkit/src/bloom.c new file mode 100644 index 000000000000..b1e24acc3f92 --- /dev/null +++ b/gpcontrib/arenadata_toolkit/src/bloom.c @@ -0,0 +1,80 @@ +/* + * Simple bloom filter without using postgres primitives. + */ +#include "bloom.h" + +#include + +bloom_t * +bloom_init(const uint32_t bloom_size, void *mem) +{ + bloom_t *bloom = mem; + + bloom->size = bloom_size; + bloom_clear(bloom); + + return bloom; +} + +static uint32_t +calc_idx(bloom_t * bloom, uint64_t hash, uint8_t *bit_idx) +{ + uint64_t bloom_bit_idx = hash % (8 * bloom->size); + + *bit_idx = bloom_bit_idx % 8; + + return bloom_bit_idx / 8; +} + +int +bloom_isset(bloom_t * bloom, uint64_t hash) +{ + uint8_t bit_idx; + uint32_t byte_idx = calc_idx(bloom, hash, &bit_idx); + + return bloom->map[byte_idx] & (1 << bit_idx); +} + +void +bloom_set(bloom_t * bloom, uint64_t hash) +{ + uint8_t bit_idx; + uint32_t byte_idx = calc_idx(bloom, hash, &bit_idx); + + bloom->map[byte_idx] |= (1 << bit_idx); +} + +void +bloom_set_all(bloom_t * bloom) +{ + memset(bloom->map, 0xFF, bloom->size); + bloom->is_set_all = 1; +} + +void +bloom_clear(bloom_t * bloom) +{ + memset(bloom->map, 0, bloom->size); + bloom->is_set_all = 0; +} + +void +bloom_merge(bloom_t * dst, bloom_t * src) +{ + for (uint32_t i = 0; i < dst->size; i++) + dst->map[i] |= src->map[i]; + if (src->is_set_all) + dst->is_set_all = src->is_set_all; +} + +bloom_t * +bloom_copy(bloom_t * bloom, void *mem) +{ + bloom_t *copy; + + copy = bloom_init(bloom->size, mem); + memcpy(copy->map, bloom->map, bloom->size); + copy->is_set_all = bloom->is_set_all; + + return copy; +} diff --git a/gpcontrib/arenadata_toolkit/src/bloom_set.c b/gpcontrib/arenadata_toolkit/src/bloom_set.c new file mode 100644 index 000000000000..66e7907ea6c7 --- /dev/null +++ b/gpcontrib/arenadata_toolkit/src/bloom_set.c @@ -0,0 +1,280 @@ +/* + * Set of blooms. Main entry point to find a bloom and work with it. + * Used to track create, extend, truncate events. + */ + +#include "bloom_set.h" + +#include + +#define BLOOM_ENTRY_GET(set, i) (void *)(set->bloom_entries + i * FULL_BLOOM_ENTRY_SIZE(set->bloom_size)); + +static bloom_entry_t * bloom_entry_init(const uint32_t bloom_size, void *mem) +{ + bloom_entry_t *bloom_entry = mem; + + bloom_entry->lock = LWLockAssign(); + bloom_entry->dbid = InvalidOid; + (void)bloom_init(bloom_size, &bloom_entry->bloom); + + return bloom_entry; +} + +bloom_set_t * +bloom_set_init(const uint32_t bloom_count, const uint32_t bloom_size, void *mem) +{ + bloom_set_t *bloom_set = mem; + + bloom_set->bloom_count = bloom_count; + bloom_set->bloom_size = bloom_size; + bloom_set->lock = LWLockAssign(); + + for (uint32_t i = 0; i < bloom_count; i++) + { + void *bloom_entry_mem = BLOOM_ENTRY_GET(bloom_set, i); + + (void)bloom_entry_init(bloom_size, bloom_entry_mem); + } + + return bloom_set; +} + +/* simple linear search, probably should be reworked (depends on target dbs count) */ +static bloom_entry_t * find_bloom_entry(bloom_set_t * bloom_set, Oid dbid, bool *found) +{ + bloom_entry_t *bloom_entry; + int i; + + *found = false; + + for (i = 0; i < bloom_set->bloom_count; i++) + { + bloom_entry = BLOOM_ENTRY_GET(bloom_set, i); + if (bloom_entry->dbid == dbid || bloom_entry->dbid == InvalidOid) + break; + } + + if (i == bloom_set->bloom_count) + return NULL; + + if (bloom_entry->dbid != InvalidOid) + *found = true; + + return bloom_entry; +} + +/* bind not used filter to given dbid */ +bool +bloom_set_bind(bloom_set_t * bloom_set, Oid dbid) +{ + bloom_entry_t *bloom_entry; + bool found; + + LWLockAcquire(bloom_set->lock, LW_SHARED); + bloom_entry = find_bloom_entry(bloom_set, dbid, &found); + LWLockRelease(bloom_set->lock); + + if (found) + return true; + else if (!bloom_entry) + { + return false; + } + + LWLockAcquire(bloom_set->lock, LW_EXCLUSIVE); + bloom_entry = find_bloom_entry(bloom_set, dbid, &found); + if (bloom_entry && !found) + LWLockAcquire(bloom_entry->lock, LW_EXCLUSIVE); + LWLockRelease(bloom_set->lock); + + if (!bloom_entry) + { + elog(WARNING, "Our bloom filter was stolen :("); + return false; + } + + if (!found) + { + bloom_entry->dbid = dbid; + LWLockRelease(bloom_entry->lock); + elog(DEBUG1, "Bloom binded %d", dbid); + } + + + return true; +} + +bool +bloom_set_trigger_bits(bloom_set_t * bloom_set, Oid dbid, bool on) +{ + bloom_entry_t *bloom_entry; + bool found; + + LWLockAcquire(bloom_set->lock, LW_SHARED); + { + bloom_entry = find_bloom_entry(bloom_set, dbid, &found); + } + LWLockRelease(bloom_set->lock); + + if (!found) + { + elog(LOG, "[arenadata toolkit] tracking_initial_snapshot Bloom filter not found"); + return false; + } + + LWLockAcquire(bloom_entry->lock, LW_EXCLUSIVE); + { + if (on) + bloom_set_all(&bloom_entry->bloom); + else + bloom_clear(&bloom_entry->bloom); + } + LWLockRelease(bloom_entry->lock); + return true; +} + +/* unbind used filter by given dbid */ +void +bloom_set_unbind(bloom_set_t * bloom_set, Oid dbid) +{ + bloom_entry_t *bloom_entry; + bool found; + + LWLockAcquire(bloom_set->lock, LW_SHARED); + bloom_entry = find_bloom_entry(bloom_set, dbid, &found); + LWLockRelease(bloom_set->lock); + + if (!found) + return; + + LWLockAcquire(bloom_set->lock, LW_EXCLUSIVE); + bloom_entry = find_bloom_entry(bloom_set, dbid, &found); + if (bloom_entry && found) + LWLockAcquire(bloom_entry->lock, LW_EXCLUSIVE); + LWLockRelease(bloom_set->lock); + + if (found) + { + bloom_entry->dbid = InvalidOid; + bloom_clear(&bloom_entry->bloom); + LWLockRelease(bloom_entry->lock); + elog(DEBUG1, "Bloom unbinded %d", dbid); + } +} + +uint64_t +bloom_set_calc_hash(const void *buf, size_t len) +{ + struct wide_hash + { + uint64_t i1; + uint64_t i2; + }; + struct wide_hash w_hash; + bool hash_res = pg_md5_binary(buf, len, &w_hash); + + Assert(hash_res); + + return w_hash.i1 ^ w_hash.i2; +} + +/* find bloom by dbid, set bit based on relNode hash */ +void +bloom_set_set(bloom_set_t * bloom_s, Oid dbid, Oid relNode) +{ + bloom_entry_t *bloom_entry; + bool found; + uint64_t hash; + + LWLockAcquire(bloom_s->lock, LW_SHARED); + bloom_entry = find_bloom_entry(bloom_s, dbid, &found); + if (found) + LWLockAcquire(bloom_entry->lock, LW_EXCLUSIVE); + LWLockRelease(bloom_s->lock); + + if (!found) + return; + + hash = bloom_set_calc_hash(&relNode, sizeof(relNode)); + bloom_set(&bloom_entry->bloom, hash); + LWLockRelease(bloom_entry->lock); + + elog(DEBUG1, "Bloom set %d %d", dbid, relNode); +} + +/* find bloom by dbid, copy all bytes to new filter, clear old (but keep it) */ +bloom_t * +bloom_set_move(bloom_set_t * bloom_set, Oid dbid, void *mem) +{ + bloom_entry_t *bloom_entry; + bool found; + bloom_t *copy; + + LWLockAcquire(bloom_set->lock, LW_SHARED); + bloom_entry = find_bloom_entry(bloom_set, dbid, &found); + if (found) + LWLockAcquire(bloom_entry->lock, LW_EXCLUSIVE); + LWLockRelease(bloom_set->lock); + + /* no bloom for the database */ + if (!found) + return NULL; + + copy = bloom_copy(&bloom_entry->bloom, mem); + bloom_clear(&bloom_entry->bloom); + LWLockRelease(bloom_entry->lock); + + elog(DEBUG1, "Bloom moved %d", dbid); + + return copy; +} + +/* find bloom by dbid, merge bytes from another bloom to it */ +bool +bloom_set_merge(bloom_set_t * bloom_set, Oid dbid, bloom_t * m_bloom) +{ + bloom_entry_t *bloom_entry; + bool found; + + if (!m_bloom) + return false; + + LWLockAcquire(bloom_set->lock, LW_SHARED); + bloom_entry = find_bloom_entry(bloom_set, dbid, &found); + if (found) + LWLockAcquire(bloom_entry->lock, LW_EXCLUSIVE); + LWLockRelease(bloom_set->lock); + + if (!found) + return false; + + bloom_merge(&bloom_entry->bloom, m_bloom); + LWLockRelease(bloom_entry->lock); + + elog(DEBUG1, "Bloom merged %d", dbid); + + return true; +} + +bool +bloom_set_is_all_bits_triggered(bloom_set_t * bloom_set, Oid dbid) +{ + bloom_entry_t *bloom_entry; + bool found; + bool is_triggered; + + LWLockAcquire(bloom_set->lock, LW_SHARED); + bloom_entry = find_bloom_entry(bloom_set, dbid, &found); + LWLockRelease(bloom_set->lock); + + if (!found) + { + return false; + } + + LWLockAcquire(bloom_entry->lock, LW_SHARED); + is_triggered = bloom_entry->bloom.is_set_all; + LWLockRelease(bloom_entry->lock); + + return is_triggered; +} diff --git a/gpcontrib/arenadata_toolkit/arenadata_toolkit.c b/gpcontrib/arenadata_toolkit/src/dbsize.c similarity index 85% rename from gpcontrib/arenadata_toolkit/arenadata_toolkit.c rename to gpcontrib/arenadata_toolkit/src/dbsize.c index 11a5bd9868ff..9731f744f44d 100644 --- a/gpcontrib/arenadata_toolkit/arenadata_toolkit.c +++ b/gpcontrib/arenadata_toolkit/src/dbsize.c @@ -1,30 +1,27 @@ -#include #include -#include -#include #include "postgres.h" #include "access/aomd.h" #include "access/heapam.h" +#include "catalog/pg_tablespace.h" #include "cdb/cdbvars.h" #include "common/relpath.h" #include "fmgr.h" #include "funcapi.h" +#include "libpq/hba.h" #include "miscadmin.h" -#include "storage/fd.h" #include "nodes/execnodes.h" -#include "cdb/cdbvars.h" -#include "libpq/hba.h" +#include "storage/fd.h" +#include "storage/lock.h" #include "utils/builtins.h" #include "utils/relfilenodemap.h" #include "utils/timestamp.h" #include "utils/elog.h" #include "utils/rel.h" #include "utils/relcache.h" -#include "catalog/pg_tablespace.h" -#include "storage/lock.h" +#include "dbsize.h" PG_MODULE_MAGIC; /* @@ -35,11 +32,11 @@ PG_MODULE_MAGIC; static int64 calculate_relation_size(Relation rel, ForkNumber forknum); static int64 get_heap_storage_total_bytes(Relation rel, - ForkNumber forknum, char *relpath); + ForkNumber forknum, char *relpath); static int64 get_ao_storage_total_bytes(Relation rel, char *relpath); static bool calculate_ao_storage_perSegFile(const int segno, void *ctx); static void fill_relation_seg_path(char *buf, int bufLen, - const char *relpath, int segNo); + const char *relpath, int segNo); static int64 calculate_toast_table_size(Oid toastrelid, ForkNumber forknum); /* @@ -162,8 +159,8 @@ calculate_ao_storage_perSegFile(const int segno, void *ctx) static int64 calculate_toast_table_size(Oid toastrelid, ForkNumber forknum) { - Relation toastRel = relation_open(toastrelid, AccessShareLock); - int64 size = calculate_relation_size(toastRel, forknum); + Relation toastRel = relation_open(toastrelid, AccessShareLock); + int64 size = calculate_relation_size(toastRel, forknum); relation_close(toastRel, AccessShareLock); return size; @@ -223,7 +220,7 @@ get_ao_storage_total_bytes(Relation rel, char *relpath) * operations (for ex: CTAS) zero segment will store tuples). Thus * calculate segno=0 manually. */ - (void) calculate_ao_storage_perSegFile(0, &ctx); + (void)calculate_ao_storage_perSegFile(0, &ctx); ao_foreach_extent_file(calculate_ao_storage_perSegFile, &ctx); return ctx.total_size; @@ -231,37 +228,41 @@ get_ao_storage_total_bytes(Relation rel, char *relpath) typedef struct { - char *datpath; - DIR *dirdesc; - TupleDesc tupdesc; -} user_fctx_data; + char *datpath; + DIR *dirdesc; + TupleDesc tupdesc; +} user_fctx_data; /* * Name of file must be "XXX.X" or "XXX" * where XXX is Oid. OID must be not more than OID_MAX. */ -static Oid get_oid_from_filename(const char *filename) +static Oid +get_oid_from_filename(const char *filename) { - unsigned long int oid, segment; - char trailer; + unsigned long int oid, + segment; + char trailer; + + int count = sscanf(filename, "%lu.%lu%c", &oid, &segment, &trailer); - int count = sscanf(filename, "%lu.%lu%c", &oid, &segment, &trailer); if (count < 1 || count > 2) return InvalidOid; if (oid > OID_MAX) return InvalidOid; - return (Oid) oid; + return (Oid)oid; } PG_FUNCTION_INFO_V1(adb_get_relfilenodes); -Datum adb_get_relfilenodes(PG_FUNCTION_ARGS) +Datum +adb_get_relfilenodes(PG_FUNCTION_ARGS) { - Oid datoid = MyDatabaseId; - Oid tablespace_oid = PG_GETARG_OID(0); + Oid datoid = MyDatabaseId; + Oid tablespace_oid = PG_GETARG_OID(0); - struct dirent *direntry; - user_fctx_data *fctx_data; + struct dirent *direntry; + user_fctx_data *fctx_data; FuncCallContext *funcctx; if (tablespace_oid == GLOBALTABLESPACE_OID) @@ -280,13 +281,14 @@ Datum adb_get_relfilenodes(PG_FUNCTION_ARGS) if (!fctx_data->dirdesc) { - /* Nothing to do: empty tablespace (maybe it has been just created)*/ + /* Nothing to do: empty tablespace (maybe it has been just + * created) */ MemoryContextSwitchTo(oldcontext); SRF_RETURN_DONE(funcctx); } if (get_call_result_type(fcinfo, NULL, &fctx_data->tupdesc) - != TYPEFUNC_COMPOSITE) + != TYPEFUNC_COMPOSITE) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("return type must be a row type"))); @@ -303,12 +305,12 @@ Datum adb_get_relfilenodes(PG_FUNCTION_ARGS) while ((direntry = ReadDir(fctx_data->dirdesc, fctx_data->datpath)) != NULL) { struct stat fst; - Datum values[10]; - bool nulls[10]; - char *filename; - Oid reloid; - Oid relfilenode_oid; - HeapTuple tuple; + Datum values[10]; + bool nulls[10]; + char *filename; + Oid reloid; + Oid relfilenode_oid; + HeapTuple tuple; CHECK_FOR_INTERRUPTS(); @@ -366,7 +368,33 @@ Datum adb_get_relfilenodes(PG_FUNCTION_ARGS) } PG_FUNCTION_INFO_V1(adb_hba_file_rules); -Datum adb_hba_file_rules(PG_FUNCTION_ARGS) +Datum +adb_hba_file_rules(PG_FUNCTION_ARGS) { return pg_hba_file_rules(fcinfo); } + +/* */ +int64 +dbsize_calc_size(Oid relid) +{ + Relation rel; + int64 size = 0; + + rel = try_relation_open(relid, AccessShareLock, false); + + if (rel == NULL) + return size; + + if (rel->rd_node.relNode == 0) + return size; + + size += calculate_relation_size(rel, MAIN_FORKNUM); + size += calculate_relation_size(rel, FSM_FORKNUM); + size += calculate_relation_size(rel, VISIBILITYMAP_FORKNUM); + size += calculate_relation_size(rel, INIT_FORKNUM); + + relation_close(rel, AccessShareLock); + + return size; +} diff --git a/gpcontrib/arenadata_toolkit/src/drops_track.c b/gpcontrib/arenadata_toolkit/src/drops_track.c new file mode 100644 index 000000000000..b54f7ce37e4f --- /dev/null +++ b/gpcontrib/arenadata_toolkit/src/drops_track.c @@ -0,0 +1,223 @@ +/* + * Track unlink hook events. + */ + +#include "drops_track.h" + +#include "lib/ilist.h" +#include "storage/lwlock.h" +#include "storage/ipc.h" +#include "storage/shmem.h" + +#include "arenadata_toolkit_guc.h" + +#define TRACK_NODE_GET(track, i) (void *)(track->nodes + i * sizeof(drops_track_node_t)); + +typedef struct +{ + Oid relNode; + Oid dbNode; +} track_relfilenode_t; + +/* doubly linked list node of dropped file nodes */ +typedef struct +{ + dlist_node node; + uint32_t idx; /* idx in 'nodes' array; just for info */ + track_relfilenode_t relfileNode; +} drops_track_node_t; + + +typedef struct +{ + LWLock *lock; + dlist_head head; + uint32_t used_count; /* count of used nodes */ + int unused_idx; /* next unused idx or -1 if unknown; for + * faster search */ + char nodes[FLEXIBLE_ARRAY_MEMBER]; /* array of drops_track_node_t */ +} drops_track_t; + +static shmem_startup_hook_type next_shmem_startup_hook = NULL; +static drops_track_t * drops_track; + +static Size +drops_track_calc_size() +{ + Size size; + + size = offsetof(drops_track_t, nodes); + size = add_size(size, mul_size(drops_count, sizeof(drops_track_node_t))); + + return size; +} + +static void +drops_track_hook(void) +{ + bool found; + Size size = drops_track_calc_size(); + + drops_track = ShmemInitStruct("adb_track_files_drops", size, &found); + + if (!found) + { + drops_track->lock = LWLockAssign(); + drops_track->used_count = 0; + drops_track->unused_idx = 0; + dlist_init(&drops_track->head); + + for (uint32_t i = 0; i < drops_count; i++) + { + drops_track_node_t *track_node = TRACK_NODE_GET(drops_track, i); + + track_node->relfileNode.relNode = InvalidOid; + track_node->relfileNode.dbNode = InvalidOid; + track_node->idx = i; + } + } + + if (next_shmem_startup_hook) + next_shmem_startup_hook(); +} + +void +drops_track_init(void) +{ + RequestAddinLWLocks(1); + RequestAddinShmemSpace(drops_track_calc_size()); + + next_shmem_startup_hook = shmem_startup_hook; + shmem_startup_hook = drops_track_hook; +} + +void +drops_track_deinit(void) +{ + shmem_startup_hook = next_shmem_startup_hook; +} + +/* find unused node; this should be heavily reworked or optimized */ +static drops_track_node_t * find_empty_node() +{ + drops_track_node_t *track_node = NULL; + + if (drops_track->unused_idx >= 0) + { + track_node = TRACK_NODE_GET(drops_track, drops_track->unused_idx); + drops_track->unused_idx++; + if (drops_track->unused_idx >= drops_count) + drops_track->unused_idx = -1; + else + { + drops_track_node_t *unused_node = TRACK_NODE_GET(drops_track, drops_track->unused_idx); + + if (unused_node->relfileNode.relNode != InvalidOid) + drops_track->unused_idx = -1; + } + } + else + { + for (uint32_t i = 0; i < drops_count; i++) + { + track_node = TRACK_NODE_GET(drops_track, i); + if (track_node->relfileNode.relNode == InvalidOid) + break; + } + } + return track_node; +} + +/* add relNode to track; old node is dropped if no space */ +void +drops_track_add(RelFileNode relfileNode) +{ + drops_track_node_t *track_node; + + LWLockAcquire(drops_track->lock, LW_EXCLUSIVE); + + if (drops_track->used_count >= drops_count) + { + track_node = (drops_track_node_t *) dlist_pop_head_node(&drops_track->head); + elog(DEBUG1, "No space for drop track. Oldest node removed (%d).", track_node->relfileNode.relNode); + } + else + { + track_node = find_empty_node(); + drops_track->used_count++; + Assert(track_node); + } + + track_node->relfileNode.relNode = relfileNode.relNode; + track_node->relfileNode.dbNode = relfileNode.dbNode; + dlist_push_tail(&drops_track->head, &track_node->node); + + LWLockRelease(drops_track->lock); +} + +/* move relfilenodes from track to list */ +List * +drops_track_move(Oid dbid) +{ + List *oids = NIL; + dlist_mutable_iter iter; + + LWLockAcquire(drops_track->lock, LW_EXCLUSIVE); + + if (drops_track->used_count == 0) + { + LWLockRelease(drops_track->lock); + return oids; + } + + dlist_foreach_modify(iter, &drops_track->head) + { + drops_track_node_t *track_node = (drops_track_node_t *) iter.cur; + + /* newest in head, oldest in tail */ + if (track_node->relfileNode.dbNode == dbid) + { + oids = lcons_oid(track_node->relfileNode.relNode, oids); + drops_track->used_count--; + track_node->relfileNode.relNode = InvalidOid; + track_node->relfileNode.dbNode = InvalidOid; + dlist_delete(&track_node->node); + } + } + + LWLockRelease(drops_track->lock); + + return oids; +} + +/* undo moving of relfilenodes; old nodes are dropped if no space */ +void +drops_track_move_undo(List *oids, Oid dbid) +{ + ListCell *cell; + + if (oids == NIL) + return; + + LWLockAcquire(drops_track->lock, LW_EXCLUSIVE); + + foreach(cell, oids) + { + Oid oid = lfirst_oid(cell); + drops_track_node_t *track_node; + + if (drops_track->used_count >= drops_count) + { + elog(DEBUG1, "No space for move back. Oldest node removed (%d).", oid); + continue; + } + + track_node = find_empty_node(); + drops_track->used_count++; + track_node->relfileNode.relNode = oid; + track_node->relfileNode.dbNode = dbid; + dlist_push_head(&drops_track->head, &track_node->node); + } + + LWLockRelease(drops_track->lock); +} diff --git a/gpcontrib/arenadata_toolkit/src/file_hook.c b/gpcontrib/arenadata_toolkit/src/file_hook.c new file mode 100644 index 000000000000..c29042169358 --- /dev/null +++ b/gpcontrib/arenadata_toolkit/src/file_hook.c @@ -0,0 +1,147 @@ +/* + * File hooks to track events. + */ + +#include "file_hook.h" + +#include "postgres.h" +#include "storage/smgr.h" +#include "access/xact.h" +#include "catalog/namespace.h" +#include "utils/lsyscache.h" +#include "access/heapam.h" +#include "access/htup_details.h" +#include "catalog/objectaccess.h" + +#include "tf_shmem.h" +#include "arenadata_toolkit_guc.h" +#include "drops_track.h" + +static file_create_hook_type next_file_create_hook = NULL; +static file_extend_hook_type next_file_extend_hook = NULL; +static file_truncate_hook_type next_file_truncate_hook = NULL; +static file_unlink_hook_type next_file_unlink_hook = NULL; + +static bloom_t * non_committed_bloom = NULL; +static Oid non_committed_dbid = InvalidOid; + +static bool +is_file_node_trackable(RelFileNodeBackend * rnode) +{ + return !(rnode->node.dbNode == InvalidOid); +} + +static void +file_node_set(RelFileNodeBackend * rnode) +{ + if (!is_file_node_trackable(rnode)) + return; + + bloom_set_set(&tf_shared_state->bloom_set, rnode->node.dbNode, rnode->node.relNode); +} + +/* 'create' events stored in local bloom and merged only on commit, when changes are already in catalog */ +static void +xact_end_create_callback(XactEvent event, void *arg) +{ + if (event != XACT_EVENT_COMMIT && event != XACT_EVENT_ABORT) + return; + + elog(DEBUG1, "xact_end_create_callback"); + + if (event == XACT_EVENT_COMMIT) + bloom_set_merge(&tf_shared_state->bloom_set, non_committed_dbid, non_committed_bloom); + + pfree(non_committed_bloom); + non_committed_bloom = NULL; + non_committed_dbid = InvalidOid; +} + +static void +hook_create(RelFileNodeBackend rnode) +{ + uint64_t hash; + + if (next_file_create_hook) + next_file_create_hook(rnode); + + if (!is_file_node_trackable(&rnode)) + return; + + if (!non_committed_bloom) + { + non_committed_bloom = + MemoryContextAlloc(TopMemoryContext, FULL_BLOOM_SIZE(bloom_size)); + bloom_init(bloom_size, non_committed_bloom); + non_committed_dbid = rnode.node.dbNode; + RegisterXactCallbackOnce(xact_end_create_callback, NULL); + } + + elog(DEBUG1, "hook_create: %d %d %d %d", + rnode.backend, rnode.node.dbNode, rnode.node.spcNode, rnode.node.relNode); + + hash = bloom_set_calc_hash(&rnode.node.relNode, sizeof(rnode.node.relNode)); + bloom_set(non_committed_bloom, hash); + +} + +static void +hook_extend(RelFileNodeBackend rnode) +{ + if (next_file_extend_hook) + next_file_extend_hook(rnode); + + elog(DEBUG1, "hook_extend: %d %d %d %d", + rnode.backend, rnode.node.dbNode, rnode.node.spcNode, rnode.node.relNode); + + file_node_set(&rnode); +} + +static void +hook_truncate(RelFileNodeBackend rnode) +{ + if (next_file_truncate_hook) + next_file_truncate_hook(rnode); + + elog(DEBUG1, "hook_truncate: %d %d %d %d", + rnode.backend, rnode.node.dbNode, rnode.node.spcNode, rnode.node.relNode); + + file_node_set(&rnode); +} + +static void +hook_unlink(RelFileNodeBackend rnode) +{ + if (next_file_unlink_hook) + next_file_unlink_hook(rnode); + + elog(DEBUG1, "hook_unlink: %d %d %d %d", + rnode.backend, rnode.node.dbNode, rnode.node.spcNode, rnode.node.relNode); + + drops_track_add(rnode.node); +} + +void +file_hook_init() +{ + next_file_create_hook = file_create_hook; + file_create_hook = hook_create; + + next_file_extend_hook = file_extend_hook; + file_extend_hook = hook_extend; + + next_file_truncate_hook = file_truncate_hook; + file_truncate_hook = hook_truncate; + + next_file_unlink_hook = file_unlink_hook; + file_unlink_hook = hook_unlink; +} + +void +file_hook_deinit() +{ + file_create_hook = next_file_create_hook; + file_extend_hook = next_file_extend_hook; + file_truncate_hook = next_file_truncate_hook; + file_unlink_hook = next_file_unlink_hook; +} diff --git a/gpcontrib/arenadata_toolkit/src/include/arenadata_toolkit_guc.h b/gpcontrib/arenadata_toolkit/src/include/arenadata_toolkit_guc.h new file mode 100644 index 000000000000..155efe645fb5 --- /dev/null +++ b/gpcontrib/arenadata_toolkit/src/include/arenadata_toolkit_guc.h @@ -0,0 +1,22 @@ +#ifndef ARENADATA_TOOLKIT_GUC_H +#define ARENADATA_TOOLKIT_GUC_H + +#include "postgres.h" + +extern int bloom_size; +extern int db_track_count; +extern int drops_count; +extern bool get_full_snapshot_on_recovery; +extern char *tracked_schemas; +extern char *tracked_rel_storages; +extern char *tracked_rel_kinds; +extern int tracking_worker_naptime_sec; + +void tf_guc_unlock_tracked_once(void); +void tf_guc_unlock_full_snapshot_on_recovery_once(void); +void tf_guc_define(void); +void tf_guc_unlock_schemas_once(void); +void tf_guc_unlock_relkinds_once(void); +void tf_guc_unlock_relstorages_once(void); + +#endif /* ARENADATA_TOOLKIT_GUC_H */ diff --git a/gpcontrib/arenadata_toolkit/src/include/arenadata_toolkit_worker.h b/gpcontrib/arenadata_toolkit/src/include/arenadata_toolkit_worker.h new file mode 100644 index 000000000000..a07ba0ab75e3 --- /dev/null +++ b/gpcontrib/arenadata_toolkit/src/include/arenadata_toolkit_worker.h @@ -0,0 +1,6 @@ +#ifndef BLOOM_WORKER_H +#define BLOOM_WORKER_H + +void arenadata_toolkit_worker_register(void); + +#endif /* BLOOM_WORKER_H */ diff --git a/gpcontrib/arenadata_toolkit/src/include/bloom.h b/gpcontrib/arenadata_toolkit/src/include/bloom.h new file mode 100644 index 000000000000..598d044ad2ce --- /dev/null +++ b/gpcontrib/arenadata_toolkit/src/include/bloom.h @@ -0,0 +1,23 @@ +#ifndef BLOOM_H +#define BLOOM_H + +#include + +#define FULL_BLOOM_SIZE(size) (offsetof(bloom_t, map) + size) + +typedef struct +{ + uint32_t size; /* size in bytes of 'map' */ + int is_set_all; /* is all bits sets by bloom_set_all */ + char map[] /* filter itself, array of bytes */ ; +} bloom_t; + +bloom_t *bloom_init(const uint32_t bloom_size, void *mem); +int bloom_isset(bloom_t * bloom, uint64_t hash); +void bloom_set(bloom_t * bloom, uint64_t hash); +void bloom_set_all(bloom_t * bloom); +void bloom_clear(bloom_t * bloom); +void bloom_merge(bloom_t * dst, bloom_t * src); +bloom_t *bloom_copy(bloom_t * bloom, void *mem); + +#endif /* BLOOM_H */ diff --git a/gpcontrib/arenadata_toolkit/src/include/bloom_set.h b/gpcontrib/arenadata_toolkit/src/include/bloom_set.h new file mode 100644 index 000000000000..e2409d4b579e --- /dev/null +++ b/gpcontrib/arenadata_toolkit/src/include/bloom_set.h @@ -0,0 +1,41 @@ +#ifndef BLOOM_SET_H +#define BLOOM_SET_H + +#include "postgres.h" + +#include "storage/lwlock.h" + +#include "bloom.h" + +#define FULL_BLOOM_ENTRY_SIZE(size) (offsetof(bloom_entry_t, bloom) + FULL_BLOOM_SIZE(size)) +#define FULL_BLOOM_SET_SIZE(size, count) (offsetof(bloom_set_t, bloom_entries) + FULL_BLOOM_ENTRY_SIZE(size) * count) + +/* bloom filter extended by dbid */ +typedef struct +{ + LWLock *lock; + Oid dbid; /* dbid if binded, InvalidOid if unbinded */ + bloom_t bloom; +} bloom_entry_t; + +/* static set of all bloom filters */ +typedef struct +{ + LWLock *lock; + uint8_t bloom_count; /* count of bloom_entry_t in bloom_entries */ + uint32_t bloom_size; /* size of bloom filter */ + char bloom_entries[FLEXIBLE_ARRAY_MEMBER]; /* array of + * bloom_entry_t */ +} bloom_set_t; + +bloom_set_t *bloom_set_init(const uint32_t bloom_count, const uint32_t bloom_size, void *mem); +bool bloom_set_bind(bloom_set_t * bloom_set, Oid dbid); +void bloom_set_unbind(bloom_set_t * bloom_set, Oid dbid); +uint64_t bloom_set_calc_hash(const void *buf, size_t len); +void bloom_set_set(bloom_set_t * bloom_set, Oid dbid, Oid relNode); +bloom_t *bloom_set_move(bloom_set_t * bloom_set, Oid dbid, void *mem); +bool bloom_set_merge(bloom_set_t * bloom_set, Oid dbid, bloom_t * m_bloom); +bool bloom_set_trigger_bits(bloom_set_t * bloom_set, Oid dbid, bool on); +bool bloom_set_is_all_bits_triggered(bloom_set_t * bloom_set, Oid dbid); + +#endif /* BLOOM_SET_H */ diff --git a/gpcontrib/arenadata_toolkit/src/include/dbsize.h b/gpcontrib/arenadata_toolkit/src/include/dbsize.h new file mode 100644 index 000000000000..228cea9806db --- /dev/null +++ b/gpcontrib/arenadata_toolkit/src/include/dbsize.h @@ -0,0 +1,6 @@ +#ifndef DBSIZE_H +#define DBSIZE_H + +int64 dbsize_calc_size(Oid relid); + +#endif /* DBSIZE_H */ diff --git a/gpcontrib/arenadata_toolkit/src/include/drops_track.h b/gpcontrib/arenadata_toolkit/src/include/drops_track.h new file mode 100644 index 000000000000..e30c65f05632 --- /dev/null +++ b/gpcontrib/arenadata_toolkit/src/include/drops_track.h @@ -0,0 +1,16 @@ +#ifndef DROPS_TRACK_H +#define DROPS_TRACK_H + +#include "postgres.h" +#include "nodes/pg_list.h" +#include "storage/relfilenode.h" + +void drops_track_init(void); +void drops_track_deinit(void); + +void drops_track_add(RelFileNode relNode); +List *drops_track_move(Oid dbid); +void drops_track_move_undo(List *oids, Oid dbid); + + +#endif /* DROPS_TRACK_H */ diff --git a/gpcontrib/arenadata_toolkit/src/include/file_hook.h b/gpcontrib/arenadata_toolkit/src/include/file_hook.h new file mode 100644 index 000000000000..d5f4c3048b10 --- /dev/null +++ b/gpcontrib/arenadata_toolkit/src/include/file_hook.h @@ -0,0 +1,7 @@ +#ifndef FILE_HOOK_H +#define FILE_HOOK_H + +void file_hook_init(void); +void file_hook_deinit(void); + +#endif /* FILE_HOOK_H */ diff --git a/gpcontrib/arenadata_toolkit/src/include/tf_shmem.h b/gpcontrib/arenadata_toolkit/src/include/tf_shmem.h new file mode 100644 index 000000000000..98beae2abb68 --- /dev/null +++ b/gpcontrib/arenadata_toolkit/src/include/tf_shmem.h @@ -0,0 +1,18 @@ +#ifndef TF_SHMEM_H +#define TF_SHMEM_H + +#include "bloom_set.h" + +typedef struct +{ + bool bgworker_ready; /* is bgworker complete with its job */ + bool is_initialized; + bloom_set_t bloom_set; +} tf_shared_state_t; + +extern tf_shared_state_t * tf_shared_state; + +void tf_shmem_init(void); +void tf_shmem_deinit(void); + +#endif /* TF_SHMEM_H */ diff --git a/gpcontrib/arenadata_toolkit/src/tf_shmem.c b/gpcontrib/arenadata_toolkit/src/tf_shmem.c new file mode 100644 index 000000000000..9615c60b8682 --- /dev/null +++ b/gpcontrib/arenadata_toolkit/src/tf_shmem.c @@ -0,0 +1,56 @@ +#include "tf_shmem.h" + +#include "storage/ipc.h" +#include "storage/shmem.h" + +#include "arenadata_toolkit_guc.h" +#include "bloom_set.h" + +static shmem_startup_hook_type next_shmem_startup_hook = NULL; +tf_shared_state_t *tf_shared_state; + +static Size +tf_shmem_calc_size(void) +{ + Size size; + + size = offsetof(tf_shared_state_t, bloom_set); + size = add_size(size, FULL_BLOOM_SET_SIZE(bloom_size, db_track_count)); + + return size; +} + +static void +tf_shmem_hook(void) +{ + bool found; + Size size = tf_shmem_calc_size(); + + tf_shared_state = ShmemInitStruct("toolkit_track_files", size, &found); + + if (!found) + { + tf_shared_state->bgworker_ready = false; + bloom_set_init(db_track_count, bloom_size, &tf_shared_state->bloom_set); + } + + if (next_shmem_startup_hook) + next_shmem_startup_hook(); +} + +void +tf_shmem_init() +{ + /* don't forget to add additional locks */ + RequestAddinLWLocks(1 + db_track_count); + RequestAddinShmemSpace(tf_shmem_calc_size()); + + next_shmem_startup_hook = shmem_startup_hook; + shmem_startup_hook = tf_shmem_hook; +} + +void +tf_shmem_deinit(void) +{ + shmem_startup_hook = next_shmem_startup_hook; +} diff --git a/gpcontrib/arenadata_toolkit/src/track_files.c b/gpcontrib/arenadata_toolkit/src/track_files.c new file mode 100644 index 000000000000..3c34e4e7c58a --- /dev/null +++ b/gpcontrib/arenadata_toolkit/src/track_files.c @@ -0,0 +1,1228 @@ +#include "postgres.h" + + +#include "access/genam.h" +#include "access/xact.h" +#include "catalog/indexing.h" +#include "catalog/pg_namespace.h" +#include "catalog/pg_db_role_setting.h" +#include "cdb/cdbdisp_query.h" +#include "cdb/cdbdispatchresult.h" +#include "cdb/cdbvars.h" +#include "cdb/cdbutil.h" +#include "commands/dbcommands.h" +#include "executor/spi.h" +#include "fmgr.h" +#include "funcapi.h" +#include "libpq-fe.h" +#include "miscadmin.h" +#include "storage/shmem.h" +#include "utils/relcache.h" +#include "utils/builtins.h" +#include "utils/memutils.h" +#include "utils/fmgroids.h" +#include "utils/syscache.h" +#include "utils/lsyscache.h" + +#include "arenadata_toolkit_guc.h" +#include "arenadata_toolkit_worker.h" +#include "drops_track.h" +#include "dbsize.h" +#include "file_hook.h" +#include "tf_shmem.h" + +PG_FUNCTION_INFO_V1(tracking_register_db); +PG_FUNCTION_INFO_V1(tracking_unregister_db); +PG_FUNCTION_INFO_V1(tracking_set_snapshot_on_recovery); +PG_FUNCTION_INFO_V1(tracking_register_schema); +PG_FUNCTION_INFO_V1(tracking_unregister_schema); +PG_FUNCTION_INFO_V1(tracking_set_relkinds); +PG_FUNCTION_INFO_V1(tracking_set_relstorages); +PG_FUNCTION_INFO_V1(tracking_is_segment_initialized); +PG_FUNCTION_INFO_V1(tracking_trigger_initial_snapshot); +PG_FUNCTION_INFO_V1(tracking_is_initial_snapshot_triggered); +PG_FUNCTION_INFO_V1(tracking_get_track); +PG_FUNCTION_INFO_V1(tracking_get_track_main); + +typedef struct +{ + Relation pg_class_rel; + SysScanDesc scan; +} tf_main_func_state_t; + +typedef struct +{ + bloom_t *bloom; + bloom_t *rollback_bloom; + List *drops; + ListCell *next_drop; + List *relkinds; + List *relstorages; + List *schema_oids; +} tf_get_global_state_t; + +typedef struct +{ + CdbPgResults cdb_results; + int current_result; + int current_row; + + SPITupleTable *entry_result; + uint64 entry_processed; + int entry_current_row; + + FmgrInfo *inputFuncInfos; + Oid *typIOParams; +} tf_get_func_state_t; + +tf_get_global_state_t tf_get_global_state = {NULL, NULL, NIL, NULL, NIL, NIL, NIL}; + +/* if get function complete with commit, just free resources; if with abort, move bloom and drops back */ +static void +xact_end_get_callback(XactEvent event, void *arg) +{ + if (event != XACT_EVENT_COMMIT && event != XACT_EVENT_ABORT) + return; + + if (tf_get_global_state.bloom == NULL) + return; + + if (event == XACT_EVENT_ABORT) + { + if (tf_get_global_state.rollback_bloom) + bloom_set_merge(&tf_shared_state->bloom_set, MyDatabaseId, tf_get_global_state.rollback_bloom); + else + bloom_set_merge(&tf_shared_state->bloom_set, MyDatabaseId, tf_get_global_state.bloom); + drops_track_move_undo(tf_get_global_state.drops, MyDatabaseId); + } + + if (tf_get_global_state.bloom) + { + pfree(tf_get_global_state.bloom); + tf_get_global_state.bloom = NULL; + } + + if (tf_get_global_state.rollback_bloom) + { + pfree(tf_get_global_state.rollback_bloom); + tf_get_global_state.rollback_bloom = NULL; + } + + if (tf_get_global_state.drops != NIL) + { + pfree(tf_get_global_state.drops); + tf_get_global_state.drops = NIL; + tf_get_global_state.next_drop = NULL; + } + + if (tf_get_global_state.relkinds != NIL) + { + pfree(tf_get_global_state.relkinds); + tf_get_global_state.relkinds = NIL; + } + + if (tf_get_global_state.relstorages != NIL) + { + pfree(tf_get_global_state.relstorages); + tf_get_global_state.relstorages = NIL; + } + + if (tf_get_global_state.schema_oids != NIL) + { + pfree(tf_get_global_state.schema_oids); + tf_get_global_state.schema_oids = NIL; + } + +} + +static List * +split_string_to_list(const char *input) +{ + List *result = NIL; + char *input_copy; + char *token; + + if (input == NULL) + return NIL; + + input_copy = pstrdup(input); + + token = strtok(input_copy, ","); + + while (token != NULL) + { + if (*token != '\0') + { + result = lappend(result, pstrdup(token)); + } + + token = strtok(NULL, ","); + } + + pfree(input_copy); + + return result; +} + +static void +get_filters_from_guc() +{ + Relation rel; + ScanKeyData skey[2]; + SysScanDesc scan; + HeapTuple tuple; + char *current_schemas = NULL; + char *current_relkinds = NULL; + char *current_relstorages = NULL; + List *schema_names = NIL; + ListCell *lc; + + rel = heap_open(DbRoleSettingRelationId, RowExclusiveLock); + ScanKeyInit(&skey[0], + Anum_pg_db_role_setting_setdatabase, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(MyDatabaseId)); + + /* + * Lookup for not role specific configuration + */ + ScanKeyInit(&skey[1], + Anum_pg_db_role_setting_setrole, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(InvalidOid)); + scan = systable_beginscan(rel, DbRoleSettingDatidRolidIndexId, true, NULL, 2, skey); + + tuple = systable_getnext(scan); + if (HeapTupleIsValid(tuple)) + { + bool isnull; + Datum str_datum; + + str_datum = heap_getattr(tuple, Anum_pg_db_role_setting_setconfig, + RelationGetDescr(rel), &isnull); + if (!isnull) + { + ArrayType *array; + Datum *elems; + bool *nulls; + int nelems; + + array = DatumGetArrayTypeP(str_datum); + deconstruct_array(array, TEXTOID, -1, false, 'i', + &elems, &nulls, &nelems); + for (int i = 0; i < nelems; i++) + { + if (nulls[i]) + continue; + char *str = TextDatumGetCString(elems[i]); + + if (strncmp(str, "arenadata_toolkit.tracking_schemas=", 35) == 0) + current_schemas = pstrdup(str + 35); + else if (strncmp(str, "arenadata_toolkit.tracking_relstorages=", 39) == 0) + current_relstorages = pstrdup(str + 39); + else if (strncmp(str, "arenadata_toolkit.tracking_relkinds=", 36) == 0) + current_relkinds = pstrdup(str + 36); + pfree(str); + } + } + } + systable_endscan(scan); + heap_close(rel, RowExclusiveLock); + + if (current_schemas) + schema_names = split_string_to_list(current_schemas); + else + schema_names = split_string_to_list(tracked_schemas); + if (current_relstorages) + tf_get_global_state.relstorages = split_string_to_list(current_relstorages); + else + tf_get_global_state.relstorages = split_string_to_list(tracked_rel_storages); + if (current_relkinds) + tf_get_global_state.relkinds = split_string_to_list(current_relkinds); + else + tf_get_global_state.relkinds = split_string_to_list(tracked_rel_kinds); + + foreach(lc, schema_names) + { + Oid nspOid; + char *name = (char *)lfirst(lc); + + nspOid = GetSysCacheOid1(NAMESPACENAME, CStringGetDatum(name)); + + if (!OidIsValid(nspOid)) + { + elog(DEBUG1, "[tracking_get_track] schema \"%s\" does not exist", name); + continue; + } + + tf_get_global_state.schema_oids = lappend_oid(tf_get_global_state.schema_oids, nspOid); + } + + if (schema_names) + pfree(schema_names); +} + + +static bool +schema_is_tracked(Oid schema) +{ + ListCell *lc; + + if (tf_get_global_state.schema_oids == NIL) + return false; + + foreach(lc, tf_get_global_state.schema_oids) + { + Oid tracked_schema = lfirst_oid(lc); + + if (tracked_schema == schema) + return true; + } + + return false; +} + +static bool +relkind_is_tracked(char relkind) +{ + ListCell *lc; + + if (tf_get_global_state.relkinds == NIL) + return false; + + foreach(lc, tf_get_global_state.relkinds) + { + char *tracked_relkind = (char *)lfirst(lc); + + if (tracked_relkind != NULL && *tracked_relkind == relkind) + return true; + } + + return false; +} + +static bool +relstorage_is_tracked(char relstorage) +{ + ListCell *lc; + + if (tf_get_global_state.relstorages == NIL) + return false; + + foreach(lc, tf_get_global_state.relstorages) + { + char *tracked_relstorage = (char *)lfirst(lc); + + if (tracked_relstorage != NULL && *tracked_relstorage == relstorage) + return true; + } + + return false; +} + +/* + * Main logic for getting the size track. + */ +Datum +tracking_get_track_main(PG_FUNCTION_ARGS) +{ + FuncCallContext *funcctx; + tf_main_func_state_t *state; + HeapTuple result; + Datum datums[9]; + bool nulls[9] = {0}; + + LWLockAcquire(tf_shared_state->bloom_set.lock, LW_EXCLUSIVE); + if (!tf_shared_state->bgworker_ready && Gp_role == GP_ROLE_DISPATCH) + { + LWLockRelease(tf_shared_state->bloom_set.lock); + elog(ERROR, "Can't get track before bgworker updates the tracking status."); + } + LWLockRelease(tf_shared_state->bloom_set.lock); + + if (SRF_IS_FIRSTCALL()) + { + MemoryContext oldcontext; + + funcctx = SRF_FIRSTCALL_INIT(); + + RegisterXactCallbackOnce(xact_end_get_callback, NULL); + + oldcontext = MemoryContextSwitchTo(CurTransactionContext); + + if (tf_get_global_state.bloom == NULL) + { + tf_get_global_state.bloom = palloc(FULL_BLOOM_SIZE(bloom_size)); + if (!bloom_set_move(&tf_shared_state->bloom_set, MyDatabaseId, tf_get_global_state.bloom)) + elog(ERROR, "database %u is not tracked", MyDatabaseId); + } + else + { + /* + * This code is needed for the cases when there are several track + * requests within the same transaction. rollback_bloom stands for + * preserving initial filter state at the moment of the first + * function call within the transaction. + */ + if (tf_get_global_state.rollback_bloom == NULL) + { + tf_get_global_state.rollback_bloom = palloc(FULL_BLOOM_SIZE(bloom_size)); + bloom_copy(tf_get_global_state.bloom, tf_get_global_state.rollback_bloom); + } + bloom_clear(tf_get_global_state.bloom); + if (!bloom_set_move(&tf_shared_state->bloom_set, MyDatabaseId, tf_get_global_state.bloom)) + elog(ERROR, "database %u is not tracked", MyDatabaseId); + } + /* initial snapshot shouldn't return drops */ + if (tf_get_global_state.bloom && !tf_get_global_state.bloom->is_set_all) + { + tf_get_global_state.drops = drops_track_move(MyDatabaseId); + tf_get_global_state.next_drop = list_head(tf_get_global_state.drops); + } + + /* + * Let's retrieve tracking information only once for the transaction. + */ + if (tf_get_global_state.schema_oids == NIL) + get_filters_from_guc(); + + if (tf_get_global_state.relstorages == NIL || + tf_get_global_state.relkinds == NIL || + tf_get_global_state.schema_oids == NIL) + elog(ERROR, "cannot get tracking configuration (schemas, relkinds, reltorage) for database %u", MyDatabaseId); + + MemoryContextSwitchTo(oldcontext); + + oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + + funcctx->tuple_desc = CreateTemplateTupleDesc(9, false); + TupleDescInitEntry(funcctx->tuple_desc, (AttrNumber)1, "relid", OIDOID, -1, 0); + TupleDescInitEntry(funcctx->tuple_desc, (AttrNumber)2, "name", NAMEOID, -1, 0); + TupleDescInitEntry(funcctx->tuple_desc, (AttrNumber)3, "relfilenode", OIDOID, -1, 0); + TupleDescInitEntry(funcctx->tuple_desc, (AttrNumber)4, "size", INT8OID, -1, 0); + TupleDescInitEntry(funcctx->tuple_desc, (AttrNumber)5, "state", CHAROID, -1, 0); + TupleDescInitEntry(funcctx->tuple_desc, (AttrNumber)6, "gp_segment_id", INT4OID, -1, 0); + TupleDescInitEntry(funcctx->tuple_desc, (AttrNumber)7, "relnamespace", OIDOID, -1, 0); + TupleDescInitEntry(funcctx->tuple_desc, (AttrNumber)8, "relkind", CHAROID, -1, 0); + TupleDescInitEntry(funcctx->tuple_desc, (AttrNumber)9, "relstorage", CHAROID, -1, 0); + funcctx->tuple_desc = BlessTupleDesc(funcctx->tuple_desc); + + state = (tf_main_func_state_t *) palloc0(sizeof(tf_main_func_state_t)); + funcctx->user_fctx = (void *)state; + + if (tf_get_global_state.bloom) + { + state->pg_class_rel = heap_open(RelationRelationId, AccessShareLock); + state->scan = systable_beginscan(state->pg_class_rel, InvalidOid, false, NULL, 0, NULL); + } + + MemoryContextSwitchTo(oldcontext); + } + + funcctx = SRF_PERCALL_SETUP(); + state = funcctx->user_fctx; + + while (true) + { + Oid filenode; + Oid relnamespace; + char relkind; + char relstorage; + HeapTuple pg_class_tuple; + uint64_t hash; + + if (!state->scan) + break; + + pg_class_tuple = systable_getnext(state->scan); + + if (!HeapTupleIsValid(pg_class_tuple)) + { + systable_endscan(state->scan); + heap_close(state->pg_class_rel, AccessShareLock); + state->scan = NULL; + state->pg_class_rel = NULL; + break; + } + + datums[6] = heap_getattr(pg_class_tuple, Anum_pg_class_relnamespace, RelationGetDescr(state->pg_class_rel), &nulls[6]); + relnamespace = DatumGetObjectId(datums[6]); + + if (!schema_is_tracked(relnamespace)) + continue; + + datums[7] = heap_getattr(pg_class_tuple, Anum_pg_class_relkind, RelationGetDescr(state->pg_class_rel), &nulls[7]); + relkind = CharGetDatum(datums[7]); + + if (!relkind_is_tracked(relkind)) + continue; + + datums[8] = heap_getattr(pg_class_tuple, Anum_pg_class_relstorage, RelationGetDescr(state->pg_class_rel), &nulls[8]); + relstorage = CharGetDatum(datums[8]); + + if (!relstorage_is_tracked(relstorage)) + continue; + + datums[0] = ObjectIdGetDatum(HeapTupleGetOid(pg_class_tuple)); + + datums[1] = heap_getattr(pg_class_tuple, Anum_pg_class_relname, RelationGetDescr(state->pg_class_rel), &nulls[1]); + + datums[2] = heap_getattr(pg_class_tuple, Anum_pg_class_relfilenode, RelationGetDescr(state->pg_class_rel), &nulls[2]); + filenode = DatumGetObjectId(datums[2]); + + if (nulls[2]) + continue; + + /* Bloom filter check */ + hash = bloom_set_calc_hash(&filenode, sizeof(filenode)); + if (!bloom_isset(tf_get_global_state.bloom, hash)) + continue; + + /* + * Taking a lock and calling relation_open in dbsize_calc_size is + * quite suboptimal. The size calculation strategy should be revised + * in future. + */ + datums[3] = Int64GetDatum(dbsize_calc_size(HeapTupleGetOid(pg_class_tuple))); + datums[4] = CharGetDatum(tf_get_global_state.bloom->is_set_all ? 'i' : 'a'); + datums[5] = Int32GetDatum(GpIdentity.segindex); + + result = heap_form_tuple(funcctx->tuple_desc, datums, nulls); + + SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(result)); + } + + while (true) + { + Oid filenode; + + if (!tf_get_global_state.next_drop) + break; + + filenode = lfirst_oid(tf_get_global_state.next_drop); + tf_get_global_state.next_drop = lnext(tf_get_global_state.next_drop); + + nulls[0] = true; + nulls[1] = true; + datums[2] = filenode; + datums[3] = Int64GetDatum(0); + datums[4] = CharGetDatum('d'); + datums[5] = Int32GetDatum(GpIdentity.segindex); + nulls[6] = true; + nulls[7] = true; + nulls[8] = true; + + result = heap_form_tuple(funcctx->tuple_desc, datums, nulls); + + SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(result)); + } + + SRF_RETURN_DONE(funcctx); +} + +/* + * Function used in "arenadata_toolkit.tables_track" view. In order to keep bloom filter + * in consistent state across segments this function dispatches main tracking logic to the + * segments in a distributed transaction. + */ +Datum +tracking_get_track(PG_FUNCTION_ARGS) +{ + FuncCallContext *funcctx; + tf_get_func_state_t *state; + HeapTuple result; + Datum values[9]; + bool nulls[9] = {0}; + + if (SRF_IS_FIRSTCALL()) + { + MemoryContext oldcontext = CurrentMemoryContext; + + funcctx = SRF_FIRSTCALL_INIT(); + + /* + * If we use CdbDispatchCommandToSegments, we will face the problem + * that entry db slice won't be part of global transaction and + * immediately commits, killing the chance for bloom filter to + * restore. Therefore, the spi approach for retrieving track at -1 + * segment is chosen. + */ + if (SPI_connect() != SPI_OK_CONNECT) + ereport(ERROR, (errmsg("SPI_connect failed"))); + if (SPI_execute("SELECT * FROM arenadata_toolkit.tracking_get_track_main()", true, 0) != SPI_OK_SELECT) + ereport(ERROR, (errmsg("SPI_execute failed"))); + + MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + + state = (tf_get_func_state_t *) palloc0(sizeof(tf_get_func_state_t)); + funcctx->user_fctx = (void *)state; + + state->entry_result = SPI_tuptable; + state->entry_processed = SPI_processed; + state->entry_current_row = 0; + + CdbDispatchCommand("SELECT * FROM arenadata_toolkit.tracking_get_track_main()", DF_NEED_TWO_PHASE | DF_CANCEL_ON_ERROR, + &state->cdb_results); + + state->current_result = 0; + state->current_row = 0; + + funcctx->tuple_desc = CreateTemplateTupleDesc(9, false); + TupleDescInitEntry(funcctx->tuple_desc, (AttrNumber)1, "relid", OIDOID, -1, 0); + TupleDescInitEntry(funcctx->tuple_desc, (AttrNumber)2, "name", NAMEOID, -1, 0); + TupleDescInitEntry(funcctx->tuple_desc, (AttrNumber)3, "relfilenode", OIDOID, -1, 0); + TupleDescInitEntry(funcctx->tuple_desc, (AttrNumber)4, "size", INT8OID, -1, 0); + TupleDescInitEntry(funcctx->tuple_desc, (AttrNumber)5, "state", CHAROID, -1, 0); + TupleDescInitEntry(funcctx->tuple_desc, (AttrNumber)6, "gp_segment_id", INT4OID, -1, 0); + TupleDescInitEntry(funcctx->tuple_desc, (AttrNumber)7, "relnamespace", OIDOID, -1, 0); + TupleDescInitEntry(funcctx->tuple_desc, (AttrNumber)8, "relkind", CHAROID, -1, 0); + TupleDescInitEntry(funcctx->tuple_desc, (AttrNumber)9, "relstorage", CHAROID, -1, 0); + funcctx->tuple_desc = BlessTupleDesc(funcctx->tuple_desc); + + if (state->cdb_results.numResults > 0) + { + int natts = funcctx->tuple_desc->natts; + + state->inputFuncInfos = (FmgrInfo *)palloc0(natts * sizeof(FmgrInfo)); + state->typIOParams = (Oid *)palloc0(natts * sizeof(Oid)); + for (int i = 0; i < natts; i++) + { + Oid type = TupleDescAttr(funcctx->tuple_desc, i)->atttypid; + + getTypeInputInfo(type, &state->inputFuncInfos[i].fn_oid, &state->typIOParams[i]); + fmgr_info(state->inputFuncInfos[i].fn_oid, &state->inputFuncInfos[i]); + } + } + + MemoryContextSwitchTo(oldcontext); + } + + funcctx = SRF_PERCALL_SETUP(); + state = funcctx->user_fctx; + + if (state->entry_current_row < state->entry_processed) + { + HeapTuple inputTuple = state->entry_result->vals[state->entry_current_row]; + TupleDesc inputTupleDesc = state->entry_result->tupdesc; + + for (int i = 0; i < funcctx->tuple_desc->natts; i++) + { + values[i] = SPI_getbinval(inputTuple, inputTupleDesc, i + 1, &nulls[i]); + } + HeapTuple resultTuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); + + state->entry_current_row++; + SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(resultTuple)); + } + + SPI_finish(); + + while (state->current_result < state->cdb_results.numResults) + { + struct pg_result *pgresult = state->cdb_results.pg_results[state->current_result]; + + if (pgresult) + { + int nrows = PQntuples(pgresult); + int ncols = PQnfields(pgresult); + + if (state->current_row < nrows) + { + for (int col = 0; col < ncols; col++) + { + if (PQgetisnull(pgresult, state->current_row, col)) + { + values[col] = (Datum)0; + nulls[col] = true; + } + else + { + char *value = PQgetvalue(pgresult, state->current_row, col); + + values[col] = InputFunctionCall(&state->inputFuncInfos[col], value, state->typIOParams[col], -1); + } + } + result = heap_form_tuple(funcctx->tuple_desc, values, nulls); + state->current_row++; + SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(result)); + } + else + { + state->current_row = 0; + state->current_result++; + } + } + else + state->current_result++; + } + + SRF_RETURN_DONE(funcctx); +} + +static void +track_db(Oid dbid, bool reg) +{ + if (Gp_role == GP_ROLE_DISPATCH) + { + AlterDatabaseSetStmt stmt; + VariableSetStmt v_stmt; + A_Const aconst = + {.type = T_A_Const,.val = {.type = T_String,.val.str = reg ? "t" : "f"}}; + + stmt.type = T_AlterDatabaseSetStmt; + stmt.dbname = get_database_name(dbid); + + if (stmt.dbname == NULL) + elog(ERROR, "[arenadata_toolkit] database %u does not exist", dbid); + + stmt.setstmt = &v_stmt; + + v_stmt.type = T_VariableSetStmt; + v_stmt.kind = VAR_SET_VALUE; + v_stmt.name = "arenadata_toolkit.tracking_is_db_tracked"; + v_stmt.args = lappend(NIL, &aconst); + v_stmt.is_local = false; + + tf_guc_unlock_tracked_once(); + + AlterDatabaseSet(&stmt); + } + + if (!reg) + bloom_set_unbind(&tf_shared_state->bloom_set, dbid); + else if (!bloom_set_bind(&tf_shared_state->bloom_set, dbid)) + elog(ERROR, "[arenadata_toolkit] exceeded maximum number of tracked databases"); +} + +/* + * Registers current (if dbid is 0) or specific database as tracked by arenadata_toolkit tables tracking. + * Dispatches call to segments by itself. Binds a bloom filter to the registered database if possible. + */ +Datum +tracking_register_db(PG_FUNCTION_ARGS) +{ + Oid dbid = PG_GETARG_OID(0); + + dbid = dbid == InvalidOid ? MyDatabaseId : dbid; + elog(LOG, "[arenadata_toolkit] registering database %u for tracking", dbid); + + track_db(dbid, true); + + LWLockAcquire(tf_shared_state->bloom_set.lock, LW_EXCLUSIVE); + tf_shared_state->is_initialized = true; + LWLockRelease(tf_shared_state->bloom_set.lock); + + if (Gp_role == GP_ROLE_DISPATCH) + { + char *cmd = + psprintf("select arenadata_toolkit.tracking_register_db(%u)", dbid); + + CdbDispatchCommand(cmd, 0, NULL); + } + + PG_RETURN_BOOL(true); +} + +/* + * Stop tracking given database and unbind from bloom. + */ +Datum +tracking_unregister_db(PG_FUNCTION_ARGS) +{ + Oid dbid = PG_GETARG_OID(0); + + dbid = dbid == InvalidOid ? MyDatabaseId : dbid; + elog(LOG, "[arenadata_toolkit] unregistering database %u from tracking", dbid); + + track_db(dbid, false); + + if (Gp_role == GP_ROLE_DISPATCH) + { + char *cmd = + psprintf("select arenadata_toolkit.tracking_unregister_db(%u)", dbid); + + CdbDispatchCommand(cmd, 0, NULL); + } + + PG_RETURN_BOOL(true); +} + +Datum +tracking_set_snapshot_on_recovery(PG_FUNCTION_ARGS) +{ + bool set = PG_GETARG_OID(0); + Oid dbid = PG_GETARG_OID(1); + + dbid = dbid == InvalidOid ? MyDatabaseId : dbid; + + A_Const aconst = + {.type = T_A_Const,.val = {.type = T_String,.val.str = set ? "t" : "f"}}; + + if (Gp_role == GP_ROLE_DISPATCH) + { + AlterDatabaseSetStmt stmt; + VariableSetStmt v_stmt; + + stmt.type = T_AlterDatabaseSetStmt; + stmt.dbname = get_database_name(dbid); + stmt.setstmt = &v_stmt; + + if (stmt.dbname == NULL) + elog(ERROR, "[arenadata_toolkit] database %u does not exist", dbid); + + v_stmt.type = T_VariableSetStmt; + v_stmt.kind = VAR_SET_VALUE; + v_stmt.name = "arenadata_toolkit.tracking_snapshot_on_recovery"; + v_stmt.args = lappend(NIL, &aconst); + v_stmt.is_local = false; + + tf_guc_unlock_full_snapshot_on_recovery_once(); + + AlterDatabaseSet(&stmt); + } + + if (Gp_role == GP_ROLE_DISPATCH) + { + char *cmd = + psprintf("select arenadata_toolkit.tracking_set_snapshot_on_recovery(%s, %u)", + set ? "true" : "false", dbid); + + CdbDispatchCommand(cmd, 0, NULL); + } + + PG_RETURN_BOOL(true); +} + +/* Helper function to add or remove schema from configuration string */ +static char * +add_or_remove_schema(const char *schema_string, const char *schemaName, bool add) +{ + StringInfoData buf; + char *token; + char *str; + bool found = false; + + initStringInfo(&buf); + + if (schema_string && schema_string[0] != '\0') + { + str = pstrdup(schema_string); + token = strtok(str, ","); + while (token != NULL) + { + if (strcmp(token, schemaName) == 0) + { + found = true; + if (add) + { + appendStringInfo(&buf, "%s,", token); + } + } + else + { + appendStringInfo(&buf, "%s,", token); + } + token = strtok(NULL, ","); + } + pfree(str); + } + + if (add && !found) + { + appendStringInfo(&buf, "%s,", schemaName); + } + + if (buf.len > 0 && buf.data[buf.len - 1] == ',') + { + buf.data[buf.len - 1] = '\0'; + buf.len--; + } + + if (buf.len == 0) + { + pfree(buf.data); + return NULL; + } + + return buf.data; +} + +static void +track_schema(const char *schemaName, Oid dbid, bool reg) +{ + Relation rel; + ScanKeyData skey[2]; + SysScanDesc scan; + HeapTuple tuple; + char *current_schemas = NULL; + char *new_schemas = NULL; + AlterDatabaseSetStmt stmt; + VariableSetStmt v_stmt; + A_Const arg; + + rel = heap_open(DbRoleSettingRelationId, RowExclusiveLock); + ScanKeyInit(&skey[0], + Anum_pg_db_role_setting_setdatabase, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(dbid)); + + /* + * Lookup for not role specific configuration + */ + ScanKeyInit(&skey[1], + Anum_pg_db_role_setting_setrole, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(InvalidOid)); + scan = systable_beginscan(rel, DbRoleSettingDatidRolidIndexId, true, NULL, 2, skey); + + tuple = systable_getnext(scan); + if (HeapTupleIsValid(tuple)) + { + bool isnull; + Datum str_datum; + + str_datum = heap_getattr(tuple, Anum_pg_db_role_setting_setconfig, + RelationGetDescr(rel), &isnull); + if (!isnull) + { + ArrayType *array; + Datum *elems; + int nelems; + + array = DatumGetArrayTypeP(str_datum); + deconstruct_array(array, TEXTOID, -1, false, 'i', + &elems, NULL, &nelems); + for (int i = 0; i < nelems; i++) + { + char *str = TextDatumGetCString(elems[i]); + + if (strncmp(str, "arenadata_toolkit.tracking_schemas=", 35) == 0) + { + current_schemas = pstrdup(str + 35); + break; + } + pfree(str); + } + } + } + systable_endscan(scan); + heap_close(rel, RowExclusiveLock); + + new_schemas = add_or_remove_schema(current_schemas, schemaName, reg); + + stmt.type = T_AlterDatabaseSetStmt; + stmt.dbname = get_database_name(dbid); + + if (stmt.dbname == NULL) + elog(ERROR, "[arenadata_toolkit] database %u does not exist", dbid); + + stmt.setstmt = &v_stmt; + + v_stmt.type = T_VariableSetStmt; + v_stmt.name = "arenadata_toolkit.tracking_schemas"; + v_stmt.is_local = false; + + arg.type = T_A_Const; + arg.val.type = T_String; + arg.val.val.str = new_schemas; + arg.location = -1; + + if (new_schemas == NULL) + { + /* + * If new_schemas is NULL, we're removing the last schema, so let's + * just RESET the variable + */ + v_stmt.kind = VAR_RESET; + v_stmt.args = NIL; + } + else + { + v_stmt.kind = VAR_SET_VALUE; + v_stmt.args = list_make1(&arg); + } + + tf_guc_unlock_schemas_once(); + + AlterDatabaseSet(&stmt); + + if (current_schemas) + pfree(current_schemas); + if (new_schemas) + pfree(new_schemas); +} + +Datum +tracking_register_schema(PG_FUNCTION_ARGS) +{ + const char *schema_name = NameStr(*PG_GETARG_NAME(0)); + Oid dbid = PG_GETARG_OID(1); + + dbid = dbid == InvalidOid ? MyDatabaseId : dbid; + + if (!SearchSysCacheExists1(NAMESPACENAME, CStringGetDatum(schema_name))) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_SCHEMA), + errmsg("schema %s does not exist", schema_name))); + + elog(LOG, "[arenadata_toolkit] registering schema %s in database %u for tracking", schema_name, dbid); + + track_schema(schema_name, dbid, true); + + PG_RETURN_BOOL(true); +} + +Datum +tracking_unregister_schema(PG_FUNCTION_ARGS) +{ + const char *schema_name = NameStr(*PG_GETARG_NAME(0)); + Oid dbid = PG_GETARG_OID(1); + + dbid = dbid == InvalidOid ? MyDatabaseId : dbid; + + if (!SearchSysCacheExists1(NAMESPACENAME, CStringGetDatum(schema_name))) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_SCHEMA), + errmsg("schema with OID %s does not exist", schema_name))); + + elog(LOG, "[arenadata_toolkit] registering schema %s in database %u for tracking", schema_name, dbid); + + track_schema(schema_name, dbid, false); + + PG_RETURN_BOOL(true); +} + +static bool +is_valid_relkind(char relkind) +{ + return (relkind == 'r' || relkind == 'i' || relkind == 'S' || + relkind == 't' || relkind == 'v' || relkind == 'c' || + relkind == 'f' || relkind == 'u' || relkind == 'm' || + relkind == 'o' || relkind == 'b' || relkind == 'M'); +} + +Datum +tracking_set_relkinds(PG_FUNCTION_ARGS) +{ + char *relkinds_str = NameStr(*PG_GETARG_NAME(0)); + Oid dbid = PG_GETARG_OID(1); + char *token; + char *str_copy; + bool seen_relkinds[256] = {false}; + StringInfoData buf; + AlterDatabaseSetStmt stmt; + VariableSetStmt v_stmt; + A_Const arg; + + dbid = dbid == InvalidOid ? MyDatabaseId : dbid; + + initStringInfo(&buf); + str_copy = pstrdup(relkinds_str); + token = strtok(str_copy, ","); + while (token != NULL) + { + if (strlen(token) != 1 || !is_valid_relkind(token[0])) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Invalid relkind: %s", token), + errhint("Valid relkinds are: 'r', 'i', 'S', 't', 'v', 'c', 'f', 'u', 'm', 'o', 'b', 'M'"))); + + if (!seen_relkinds[(unsigned char)token[0]]) + { + appendStringInfoChar(&buf, token[0]); + appendStringInfoChar(&buf, ','); + seen_relkinds[(unsigned char)token[0]] = true; + } + token = strtok(NULL, ","); + } + pfree(str_copy); + + stmt.type = T_AlterDatabaseSetStmt; + stmt.dbname = get_database_name(dbid); + stmt.setstmt = &v_stmt; + + if (stmt.dbname == NULL) + elog(ERROR, "[arenadata_toolkit] database %u does not exist", dbid); + + v_stmt.type = T_VariableSetStmt; + v_stmt.name = "arenadata_toolkit.tracking_relkinds"; + v_stmt.is_local = false; + + arg.type = T_A_Const; + arg.val.type = T_String; + arg.val.val.str = buf.data; + arg.location = -1; + + if (buf.len > 0 && buf.data[buf.len - 1] == ',') + { + buf.data[buf.len - 1] = '\0'; + buf.len--; + } + + if (buf.len == 0) + { + v_stmt.kind = VAR_RESET; + v_stmt.args = NIL; + } + else + { + v_stmt.kind = VAR_SET_VALUE; + v_stmt.args = list_make1(&arg); + elog(LOG, "[arenadata_toolkit] setting relkinds %s in database %u for tracking", buf.data, dbid); + } + + tf_guc_unlock_relkinds_once(); + + AlterDatabaseSet(&stmt); + pfree(buf.data); + + PG_RETURN_BOOL(true); +} + +static bool +is_valid_relstorage(char relstorage) +{ + return (relstorage == 'h' || relstorage == 'a' || relstorage == 'c' || + relstorage == 'x' || relstorage == 'v' || relstorage == 'f'); +} + +Datum +tracking_set_relstorages(PG_FUNCTION_ARGS) +{ + char *relstorages_str = NameStr(*PG_GETARG_NAME(0)); + Oid dbid = PG_GETARG_OID(1); + char *token; + char *str_copy; + bool seen_relstorages[256] = {false}; + StringInfoData buf; + AlterDatabaseSetStmt stmt; + VariableSetStmt v_stmt; + A_Const arg; + + dbid = dbid == InvalidOid ? MyDatabaseId : dbid; + + initStringInfo(&buf); + str_copy = pstrdup(relstorages_str); + token = strtok(str_copy, ","); + while (token != NULL) + { + if (strlen(token) != 1 || !is_valid_relstorage(token[0])) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Invalid relstorage type: %s", token), + errhint("Valid relstorages are: 'h', 'x', 'a', 'v', 'c', 'f'"))); + + if (!seen_relstorages[(unsigned char)token[0]]) + { + appendStringInfoChar(&buf, token[0]); + appendStringInfoChar(&buf, ','); + seen_relstorages[(unsigned char)token[0]] = true; + } + token = strtok(NULL, ","); + } + pfree(str_copy); + + stmt.type = T_AlterDatabaseSetStmt; + stmt.dbname = get_database_name(dbid); + + if (stmt.dbname == NULL) + elog(ERROR, "[arenadata_toolkit] database %u does not exist", dbid); + + stmt.setstmt = &v_stmt; + + v_stmt.type = T_VariableSetStmt; + v_stmt.name = "arenadata_toolkit.tracking_relstorages"; + v_stmt.is_local = false; + + arg.type = T_A_Const; + arg.val.type = T_String; + arg.val.val.str = buf.data; + arg.location = -1; + + if (buf.len > 0 && buf.data[buf.len - 1] == ',') + { + buf.data[buf.len - 1] = '\0'; + buf.len--; + } + + if (buf.len == 0) + { + v_stmt.kind = VAR_RESET; + v_stmt.args = NIL; + } + else + { + v_stmt.kind = VAR_SET_VALUE; + v_stmt.args = list_make1(&arg); + elog(LOG, "[arenadata_toolkit] setting relstorages %s in database %u for tracking", buf.data, dbid); + } + + tf_guc_unlock_relstorages_once(); + + AlterDatabaseSet(&stmt); + + pfree(buf.data); + + PG_RETURN_BOOL(true); +} + +Datum +tracking_trigger_initial_snapshot(PG_FUNCTION_ARGS) +{ + Oid dbid = PG_GETARG_OID(0); + + dbid = dbid == InvalidOid ? MyDatabaseId : dbid; + elog(LOG, "[arenadata_toolkit] tracking_trigger_initial_snapshot dbid: %d", dbid); + + if (!bloom_set_trigger_bits(&tf_shared_state->bloom_set, dbid, true)) + elog(ERROR, "Failed to find corresponding filter to database %u", dbid); + + if (Gp_role == GP_ROLE_DISPATCH) + { + char *cmd = psprintf("select arenadata_toolkit.tracking_trigger_initial_snapshot(%d)", dbid); + + CdbDispatchCommand(cmd, 0, NULL); + } + + PG_RETURN_BOOL(true); +} + +Datum +tracking_is_initial_snapshot_triggered(PG_FUNCTION_ARGS) +{ + Oid dbid = PG_GETARG_OID(0); + bool is_triggered = false; + + dbid = dbid == InvalidOid ? MyDatabaseId : dbid; + + is_triggered = bloom_set_is_all_bits_triggered(&tf_shared_state->bloom_set, dbid); + + elog(LOG, "[arenadata_toolkit] is_initial_snapshot_triggered:%d dbid: %d", is_triggered, dbid); + + PG_RETURN_BOOL(is_triggered); +} + +Datum +tracking_is_segment_initialized(PG_FUNCTION_ARGS) +{ + ReturnSetInfo *rsi; + TupleDesc tupdesc; + HeapTuple tuple; + Datum values[2]; + bool nulls[2]; + Datum result; + + rsi = (ReturnSetInfo *)fcinfo->resultinfo; + tupdesc = rsi->expectedDesc; + + /* Populate an output tuple. */ + values[0] = Int32GetDatum(GpIdentity.segindex); + LWLockAcquire(tf_shared_state->bloom_set.lock, LW_EXCLUSIVE); + values[1] = BoolGetDatum(tf_shared_state->is_initialized); + LWLockRelease(tf_shared_state->bloom_set.lock); + nulls[0] = nulls[1] = false; + tuple = heap_form_tuple(tupdesc, values, nulls); + result = HeapTupleGetDatum(tuple); + + PG_RETURN_DATUM(result); +} From 87b02dc54ae42d92737860b430527029a7ab9c23 Mon Sep 17 00:00:00 2001 From: Alexander Kondakov Date: Tue, 15 Oct 2024 12:27:09 +0300 Subject: [PATCH 02/69] fix tests --- gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_guc.sql | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_guc.sql b/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_guc.sql index 9ecf2ef897c2..c0bcb2dafd9b 100644 --- a/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_guc.sql +++ b/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_guc.sql @@ -1,3 +1,8 @@ +--start_ignore +\! gpconfig -c shared_preload_libraries -v 'arenadata_toolkit'; +\! gpstop -raq -M fast; +\c +--end_ignore -- start_matchsubs -- -- m/ERROR: \[arenadata_toolkit\] exceeded maximum number of tracked databases \(track_files\.c:\d+\)/ From 3e876522f60fa5bdeed3a10cd24ea90c6d26f1c6 Mon Sep 17 00:00:00 2001 From: Alexander Kondakov Date: Tue, 15 Oct 2024 16:32:08 +0300 Subject: [PATCH 03/69] trying to fix tests --- gpcontrib/arenadata_toolkit/Makefile | 2 +- .../expected/arenadata_toolkit_guc.out | 26 +++++++++++++++++-- .../expected/setup_shared.out | 5 ++++ .../sql/arenadata_toolkit_guc.sql | 5 ---- .../arenadata_toolkit/sql/setup_shared.sql | 5 ++++ 5 files changed, 35 insertions(+), 8 deletions(-) create mode 100644 gpcontrib/arenadata_toolkit/expected/setup_shared.out create mode 100644 gpcontrib/arenadata_toolkit/sql/setup_shared.sql diff --git a/gpcontrib/arenadata_toolkit/Makefile b/gpcontrib/arenadata_toolkit/Makefile index 665c4ae401e5..d3c36358685d 100644 --- a/gpcontrib/arenadata_toolkit/Makefile +++ b/gpcontrib/arenadata_toolkit/Makefile @@ -34,7 +34,7 @@ PG_CFLAGS = -I$(libpq_srcdir) -I$(CURDIR)/src/include REGRESS = arenadata_toolkit_test arenadata_toolkit_skew_test adb_get_relfilenodes_test \ adb_collect_table_stats_test adb_vacuum_strategy_test adb_relation_storage_size_test \ tablespace_location upgrade_test adb_hba_file_rules_view_test \ - arenadata_toolkit_guc arenadata_toolkit_tracking + setup_shared arenadata_toolkit_guc arenadata_toolkit_tracking REGRESS_OPTS += --init-file=$(top_srcdir)/src/test/regress/init_file ifdef USE_PGXS diff --git a/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_guc.out b/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_guc.out index 017768eae89d..e6b567f81c11 100644 --- a/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_guc.out +++ b/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_guc.out @@ -4,6 +4,20 @@ -- s/\d+/XXX/g -- -- end_matchsubs +--start_ignore +DROP DATABASE IF EXISTS tracking1; +NOTICE: database "tracking1" does not exist, skipping +DROP DATABASE IF EXISTS tracking2; +NOTICE: database "tracking2" does not exist, skipping +DROP DATABASE IF EXISTS tracking3; +NOTICE: database "tracking3" does not exist, skipping +DROP DATABASE IF EXISTS tracking4; +NOTICE: database "tracking4" does not exist, skipping +DROP DATABASE IF EXISTS tracking5; +NOTICE: database "tracking5" does not exist, skipping +DROP DATABASE IF EXISTS tracking6; +NOTICE: database "tracking6" does not exist, skipping +--end_ignore -- Test database registering GUC. CREATE DATABASE tracking1; \c tracking1; @@ -81,10 +95,18 @@ CREATE DATABASE tracking5; CREATE DATABASE tracking6; SELECT arenadata_toolkit.tracking_register_db(oid) FROM pg_database WHERE datname IN ('tracking1', 'tracking2', 'tracking3', 'tracking4', 'tracking5'); -ERROR: [arenadata_toolkit] exceeded maximum number of tracked databases (track_files.c:XXX) + tracking_register_db +---------------------- + t + t + t + t + t +(5 rows) + SELECT arenadata_toolkit.tracking_register_db(oid) FROM pg_database WHERE datname IN ('tracking6'); -ERROR: [arenadata_toolkit] exceeded maximum number of tracked databases (track_files.c:XXX) +ERROR: [arenadata_toolkit] exceeded maximum number of tracked databases (track_files.c:694) SELECT arenadata_toolkit.tracking_unregister_db(oid) FROM pg_database WHERE datname IN ('tracking1', 'tracking2', 'tracking3', 'tracking4', 'tracking5', 'tracking6'); tracking_unregister_db diff --git a/gpcontrib/arenadata_toolkit/expected/setup_shared.out b/gpcontrib/arenadata_toolkit/expected/setup_shared.out new file mode 100644 index 000000000000..13c336b038d4 --- /dev/null +++ b/gpcontrib/arenadata_toolkit/expected/setup_shared.out @@ -0,0 +1,5 @@ +-- Setup for tracking test +-- start_ignore +\! gpconfig -c shared_preload_libraries -v 'arenadata_toolkit' +\! gpstop -air +-- end_ignore diff --git a/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_guc.sql b/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_guc.sql index c0bcb2dafd9b..9ecf2ef897c2 100644 --- a/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_guc.sql +++ b/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_guc.sql @@ -1,8 +1,3 @@ ---start_ignore -\! gpconfig -c shared_preload_libraries -v 'arenadata_toolkit'; -\! gpstop -raq -M fast; -\c ---end_ignore -- start_matchsubs -- -- m/ERROR: \[arenadata_toolkit\] exceeded maximum number of tracked databases \(track_files\.c:\d+\)/ diff --git a/gpcontrib/arenadata_toolkit/sql/setup_shared.sql b/gpcontrib/arenadata_toolkit/sql/setup_shared.sql new file mode 100644 index 000000000000..13c336b038d4 --- /dev/null +++ b/gpcontrib/arenadata_toolkit/sql/setup_shared.sql @@ -0,0 +1,5 @@ +-- Setup for tracking test +-- start_ignore +\! gpconfig -c shared_preload_libraries -v 'arenadata_toolkit' +\! gpstop -air +-- end_ignore From 9e64dc8c3c185f4c3958ede51ff7cbccd7f4fd1a Mon Sep 17 00:00:00 2001 From: Alexander Kondakov Date: Tue, 15 Oct 2024 20:59:02 +0300 Subject: [PATCH 04/69] attempt to fix tests --- gpcontrib/arenadata_toolkit/Makefile | 2 +- .../expected/arenadata_toolkit_tracking.out | 6 ++++++ .../arenadata_toolkit/expected/setup_shared.out | 5 ----- .../arenadata_toolkit/sql/arenadata_toolkit_guc.sql | 9 +++++++++ .../sql/arenadata_toolkit_tracking.sql | 13 +++++++++++++ gpcontrib/arenadata_toolkit/sql/setup_shared.sql | 5 ----- 6 files changed, 29 insertions(+), 11 deletions(-) delete mode 100644 gpcontrib/arenadata_toolkit/expected/setup_shared.out delete mode 100644 gpcontrib/arenadata_toolkit/sql/setup_shared.sql diff --git a/gpcontrib/arenadata_toolkit/Makefile b/gpcontrib/arenadata_toolkit/Makefile index d3c36358685d..665c4ae401e5 100644 --- a/gpcontrib/arenadata_toolkit/Makefile +++ b/gpcontrib/arenadata_toolkit/Makefile @@ -34,7 +34,7 @@ PG_CFLAGS = -I$(libpq_srcdir) -I$(CURDIR)/src/include REGRESS = arenadata_toolkit_test arenadata_toolkit_skew_test adb_get_relfilenodes_test \ adb_collect_table_stats_test adb_vacuum_strategy_test adb_relation_storage_size_test \ tablespace_location upgrade_test adb_hba_file_rules_view_test \ - setup_shared arenadata_toolkit_guc arenadata_toolkit_tracking + arenadata_toolkit_guc arenadata_toolkit_tracking REGRESS_OPTS += --init-file=$(top_srcdir)/src/test/regress/init_file ifdef USE_PGXS diff --git a/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_tracking.out b/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_tracking.out index 3f2d7ef47e4e..1c3c94b416c7 100644 --- a/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_tracking.out +++ b/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_tracking.out @@ -7,6 +7,12 @@ CREATE DATABASE tracking_db1; \c tracking_db1; CREATE EXTENSION arenadata_toolkit; -- 1. Test getting track on not registered database; +SELECT pg_sleep(current_setting('arenadata_toolkit.tracking_worker_naptime_sec')::int); + pg_sleep +---------- + +(1 row) + SELECT * FROM arenadata_toolkit.tracking_get_track(); ERROR: database 44817 is not tracked (track_files.c:347) CONTEXT: SQL statement "SELECT * FROM arenadata_toolkit.tracking_get_track_main()" diff --git a/gpcontrib/arenadata_toolkit/expected/setup_shared.out b/gpcontrib/arenadata_toolkit/expected/setup_shared.out deleted file mode 100644 index 13c336b038d4..000000000000 --- a/gpcontrib/arenadata_toolkit/expected/setup_shared.out +++ /dev/null @@ -1,5 +0,0 @@ --- Setup for tracking test --- start_ignore -\! gpconfig -c shared_preload_libraries -v 'arenadata_toolkit' -\! gpstop -air --- end_ignore diff --git a/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_guc.sql b/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_guc.sql index 9ecf2ef897c2..32033fc2f71f 100644 --- a/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_guc.sql +++ b/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_guc.sql @@ -1,3 +1,8 @@ +-- start_ignore +\! gpconfig -c shared_preload_libraries -v 'arenadata_toolkit' +\! gpstop -raq -M fast +\c +-- end_ignore -- start_matchsubs -- -- m/ERROR: \[arenadata_toolkit\] exceeded maximum number of tracked databases \(track_files\.c:\d+\)/ @@ -142,3 +147,7 @@ ALTER DATABASE tracking1 SET arenadata_toolkit.tracking_schemas = "pg_catalog, \c contrib_regression; DROP DATABASE tracking1; +-- start_ignore +\! gpconfig -r shared_preload_libraries; +\! gpstop -raq -M fast +-- end_ignore diff --git a/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_tracking.sql b/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_tracking.sql index 0465d606fecf..ba41e1a9de77 100644 --- a/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_tracking.sql +++ b/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_tracking.sql @@ -1,4 +1,11 @@ -- Tests for size tracking logic introduced in version 1.7 +-- start_ignore +\! gpconfig -c shared_preload_libraries -v 'arenadata_toolkit' +\! gpstop -raq -M fast +\! gpconfig -c arenadata_toolkit.tracking_worker_naptime_sec -v '5' +\! gpstop -raq -M fast +\c +-- end_ignore -- start_matchsubs -- m/ERROR: database \d+ is not tracked \(track_files\.c:\d+\)/ -- s/\d+/XXX/g @@ -11,6 +18,7 @@ CREATE DATABASE tracking_db1; CREATE EXTENSION arenadata_toolkit; -- 1. Test getting track on not registered database; +SELECT pg_sleep(current_setting('arenadata_toolkit.tracking_worker_naptime_sec')::int); SELECT * FROM arenadata_toolkit.tracking_get_track(); SELECT arenadata_toolkit.tracking_register_db(); @@ -101,3 +109,8 @@ SELECT arenadata_toolkit.tracking_unregister_db(); \c contrib_regression; DROP DATABASE tracking_db1; +-- start_ignore +\! gpconfig -r shared_preload_libraries +\! gpconfig -r arenadata_toolkit.tracking_worker_naptime_sec +\! gpstop -raq -M fast +-- end_ignore diff --git a/gpcontrib/arenadata_toolkit/sql/setup_shared.sql b/gpcontrib/arenadata_toolkit/sql/setup_shared.sql deleted file mode 100644 index 13c336b038d4..000000000000 --- a/gpcontrib/arenadata_toolkit/sql/setup_shared.sql +++ /dev/null @@ -1,5 +0,0 @@ --- Setup for tracking test --- start_ignore -\! gpconfig -c shared_preload_libraries -v 'arenadata_toolkit' -\! gpstop -air --- end_ignore From c5f0e1192ae3b201855beefd2adda272c7bc864b Mon Sep 17 00:00:00 2001 From: Alexander Kondakov Date: Thu, 17 Oct 2024 03:30:34 +0300 Subject: [PATCH 05/69] Add shared memory check --- gpcontrib/arenadata_toolkit/src/track_files.c | 38 ++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/gpcontrib/arenadata_toolkit/src/track_files.c b/gpcontrib/arenadata_toolkit/src/track_files.c index 3c34e4e7c58a..ea031aee1eb3 100644 --- a/gpcontrib/arenadata_toolkit/src/track_files.c +++ b/gpcontrib/arenadata_toolkit/src/track_files.c @@ -77,7 +77,19 @@ typedef struct tf_get_global_state_t tf_get_global_state = {NULL, NULL, NIL, NULL, NIL, NIL, NIL}; -/* if get function complete with commit, just free resources; if with abort, move bloom and drops back */ +static inline void +tf_check_shmem_error(void) +{ + if (tf_shared_state == NULL) + ereport(ERROR, + (errmsg("Failed to access shared memory due to wrong extension initialization"), + errhint("Load extension's code through shared_preload_library mechanism"))); +} + +/* + * If get function complete with commit, just free resources; + * In case of abort bloom is merged back as well as drops track. + */ static void xact_end_get_callback(XactEvent event, void *arg) { @@ -332,6 +344,8 @@ tracking_get_track_main(PG_FUNCTION_ARGS) Datum datums[9]; bool nulls[9] = {0}; + tf_check_shmem_error(); + LWLockAcquire(tf_shared_state->bloom_set.lock, LW_EXCLUSIVE); if (!tf_shared_state->bgworker_ready && Gp_role == GP_ROLE_DISPATCH) { @@ -534,6 +548,8 @@ tracking_get_track(PG_FUNCTION_ARGS) Datum values[9]; bool nulls[9] = {0}; + tf_check_shmem_error(); + if (SRF_IS_FIRSTCALL()) { MemoryContext oldcontext = CurrentMemoryContext; @@ -703,6 +719,8 @@ tracking_register_db(PG_FUNCTION_ARGS) { Oid dbid = PG_GETARG_OID(0); + tf_check_shmem_error(); + dbid = dbid == InvalidOid ? MyDatabaseId : dbid; elog(LOG, "[arenadata_toolkit] registering database %u for tracking", dbid); @@ -731,6 +749,8 @@ tracking_unregister_db(PG_FUNCTION_ARGS) { Oid dbid = PG_GETARG_OID(0); + tf_check_shmem_error(); + dbid = dbid == InvalidOid ? MyDatabaseId : dbid; elog(LOG, "[arenadata_toolkit] unregistering database %u from tracking", dbid); @@ -753,6 +773,8 @@ tracking_set_snapshot_on_recovery(PG_FUNCTION_ARGS) bool set = PG_GETARG_OID(0); Oid dbid = PG_GETARG_OID(1); + tf_check_shmem_error(); + dbid = dbid == InvalidOid ? MyDatabaseId : dbid; A_Const aconst = @@ -958,6 +980,8 @@ tracking_register_schema(PG_FUNCTION_ARGS) const char *schema_name = NameStr(*PG_GETARG_NAME(0)); Oid dbid = PG_GETARG_OID(1); + tf_check_shmem_error(); + dbid = dbid == InvalidOid ? MyDatabaseId : dbid; if (!SearchSysCacheExists1(NAMESPACENAME, CStringGetDatum(schema_name))) @@ -978,6 +1002,8 @@ tracking_unregister_schema(PG_FUNCTION_ARGS) const char *schema_name = NameStr(*PG_GETARG_NAME(0)); Oid dbid = PG_GETARG_OID(1); + tf_check_shmem_error(); + dbid = dbid == InvalidOid ? MyDatabaseId : dbid; if (!SearchSysCacheExists1(NAMESPACENAME, CStringGetDatum(schema_name))) @@ -1014,6 +1040,8 @@ tracking_set_relkinds(PG_FUNCTION_ARGS) VariableSetStmt v_stmt; A_Const arg; + tf_check_shmem_error(); + dbid = dbid == InvalidOid ? MyDatabaseId : dbid; initStringInfo(&buf); @@ -1099,6 +1127,8 @@ tracking_set_relstorages(PG_FUNCTION_ARGS) VariableSetStmt v_stmt; A_Const arg; + tf_check_shmem_error(); + dbid = dbid == InvalidOid ? MyDatabaseId : dbid; initStringInfo(&buf); @@ -1171,6 +1201,8 @@ tracking_trigger_initial_snapshot(PG_FUNCTION_ARGS) { Oid dbid = PG_GETARG_OID(0); + tf_check_shmem_error(); + dbid = dbid == InvalidOid ? MyDatabaseId : dbid; elog(LOG, "[arenadata_toolkit] tracking_trigger_initial_snapshot dbid: %d", dbid); @@ -1193,6 +1225,8 @@ tracking_is_initial_snapshot_triggered(PG_FUNCTION_ARGS) Oid dbid = PG_GETARG_OID(0); bool is_triggered = false; + tf_check_shmem_error(); + dbid = dbid == InvalidOid ? MyDatabaseId : dbid; is_triggered = bloom_set_is_all_bits_triggered(&tf_shared_state->bloom_set, dbid); @@ -1212,6 +1246,8 @@ tracking_is_segment_initialized(PG_FUNCTION_ARGS) bool nulls[2]; Datum result; + tf_check_shmem_error(); + rsi = (ReturnSetInfo *)fcinfo->resultinfo; tupdesc = rsi->expectedDesc; From 855b5aef920dade5e4d7343ff017493e07a94208 Mon Sep 17 00:00:00 2001 From: Alexander Kondakov Date: Mon, 21 Oct 2024 00:57:13 +0300 Subject: [PATCH 06/69] Make bgworker run on segments --- .../expected/arenadata_toolkit_tracking.out | 14 +- .../sql/arenadata_toolkit_tracking.sql | 8 +- .../arenadata_toolkit/src/arenadata_toolkit.c | 5 +- .../src/arenadata_toolkit_guc.c | 2 +- .../src/arenadata_toolkit_worker.c | 272 ++++++------------ gpcontrib/arenadata_toolkit/src/bloom_set.c | 19 ++ .../src/include/arenadata_toolkit_worker.h | 6 +- .../arenadata_toolkit/src/include/bloom_set.h | 1 + .../arenadata_toolkit/src/include/tf_shmem.h | 3 +- gpcontrib/arenadata_toolkit/src/tf_shmem.c | 6 +- gpcontrib/arenadata_toolkit/src/track_files.c | 34 ++- 11 files changed, 146 insertions(+), 224 deletions(-) diff --git a/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_tracking.out b/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_tracking.out index 1c3c94b416c7..4bc4aff59ac6 100644 --- a/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_tracking.out +++ b/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_tracking.out @@ -7,14 +7,8 @@ CREATE DATABASE tracking_db1; \c tracking_db1; CREATE EXTENSION arenadata_toolkit; -- 1. Test getting track on not registered database; -SELECT pg_sleep(current_setting('arenadata_toolkit.tracking_worker_naptime_sec')::int); - pg_sleep ----------- - -(1 row) - SELECT * FROM arenadata_toolkit.tracking_get_track(); -ERROR: database 44817 is not tracked (track_files.c:347) +ERROR: database 56212 is not tracked (track_files.c:368) CONTEXT: SQL statement "SELECT * FROM arenadata_toolkit.tracking_get_track_main()" SELECT arenadata_toolkit.tracking_register_db(); tracking_register_db @@ -22,6 +16,12 @@ SELECT arenadata_toolkit.tracking_register_db(); t (1 row) +SELECT pg_sleep(current_setting('arenadata_toolkit.tracking_worker_naptime_sec')::int * 2); + pg_sleep +---------- + +(1 row) + -- 2. Test initial snapshot behaviour. Triggering initial snapshot leads to -- setting up the bloom filter such that all relfilenodes are considered. SELECT arenadata_toolkit.tracking_trigger_initial_snapshot(); diff --git a/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_tracking.sql b/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_tracking.sql index ba41e1a9de77..27099c747f0e 100644 --- a/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_tracking.sql +++ b/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_tracking.sql @@ -2,8 +2,8 @@ -- start_ignore \! gpconfig -c shared_preload_libraries -v 'arenadata_toolkit' \! gpstop -raq -M fast -\! gpconfig -c arenadata_toolkit.tracking_worker_naptime_sec -v '5' -\! gpstop -raq -M fast +\! gpconfig -c arenadata_toolkit.tracking_worker_naptime_sec -v '1' +\! gpstop -u \c -- end_ignore -- start_matchsubs @@ -18,10 +18,10 @@ CREATE DATABASE tracking_db1; CREATE EXTENSION arenadata_toolkit; -- 1. Test getting track on not registered database; -SELECT pg_sleep(current_setting('arenadata_toolkit.tracking_worker_naptime_sec')::int); SELECT * FROM arenadata_toolkit.tracking_get_track(); SELECT arenadata_toolkit.tracking_register_db(); +SELECT pg_sleep(current_setting('arenadata_toolkit.tracking_worker_naptime_sec')::int * 2); -- 2. Test initial snapshot behaviour. Triggering initial snapshot leads to -- setting up the bloom filter such that all relfilenodes are considered. @@ -112,5 +112,5 @@ DROP DATABASE tracking_db1; -- start_ignore \! gpconfig -r shared_preload_libraries \! gpconfig -r arenadata_toolkit.tracking_worker_naptime_sec -\! gpstop -raq -M fast +\! gpstop -u -- end_ignore diff --git a/gpcontrib/arenadata_toolkit/src/arenadata_toolkit.c b/gpcontrib/arenadata_toolkit/src/arenadata_toolkit.c index 9a58b4c19f55..cc68858ea7c4 100644 --- a/gpcontrib/arenadata_toolkit/src/arenadata_toolkit.c +++ b/gpcontrib/arenadata_toolkit/src/arenadata_toolkit.c @@ -27,10 +27,7 @@ _PG_init(void) drops_track_init(); - if (IS_QUERY_DISPATCHER()) - { - arenadata_toolkit_worker_register(); - } + arenadata_toolkit_worker_register(); } void diff --git a/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_guc.c b/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_guc.c index ad4603984869..6800839cf9a8 100644 --- a/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_guc.c +++ b/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_guc.c @@ -287,7 +287,7 @@ tf_guc_define(void) DEFAULT_NAPTIME, 1, MAX_NAPTIME, - PGC_POSTMASTER, + PGC_SIGHUP, 0, NULL, NULL, diff --git a/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_worker.c b/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_worker.c index 46addc365c28..2a7d48649e97 100644 --- a/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_worker.c +++ b/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_worker.c @@ -8,6 +8,7 @@ #include "catalog/indexing.h" #include "cdb/cdbdisp_query.h" #include "cdb/cdbdispatchresult.h" +#include "cdb/cdbvars.h" #include "libpq-fe.h" #include "postmaster/bgworker.h" #include "storage/proc.h" @@ -18,22 +19,23 @@ #include "arenadata_toolkit_worker.h" #include "arenadata_toolkit_guc.h" +#include "bloom_set.h" #include "tf_shmem.h" -#define EXTENSIONNAME "arenadata_toolkit" +#define TOOLKIT_BINARY_NAME "arenadata_toolkit" typedef struct { Oid dbid; + Name dbname; bool get_full_snapshot_on_recovery; } tracked_db_t; -static BackgroundWorker worker; - /* flags set by signal handlers */ static volatile sig_atomic_t got_sighup = false; static volatile sig_atomic_t got_sigterm = false; -static volatile sig_atomic_t got_sigusr1 = false; + +void arenadata_toolkit_main(Datum); /* parse array of GUCs, find desired and analyze it */ static bool @@ -130,45 +132,6 @@ full_snapshot_on_recovery(ArrayType *array) return take_snapshot; } -static List * -get_uninitialized_segments() -{ - int i; - CdbPgResults cdb_pgresults = {NULL, 0}; - List *list = NIL; - - CdbDispatchCommand("select * from arenadata_toolkit.tracking_is_segment_initialized()", 0, &cdb_pgresults); - - for (i = 0; i < cdb_pgresults.numResults; i++) - { - struct pg_result *pgresult = cdb_pgresults.pg_results[i]; - - if (PQresultStatus(pgresult) != PGRES_TUPLES_OK) - { - cdbdisp_clearCdbPgResults(&cdb_pgresults); - elog(ERROR, "is_initialized: resultStatus not tuples_Ok: %s %s", - PQresStatus(PQresultStatus(pgresult)), PQresultErrorMessage(pgresult)); - } - else - { - int32 segindex = 0; - bool is_initialized = false; - - segindex = atoi(PQgetvalue(pgresult, 0, 0)); - is_initialized = strcmp(PQgetvalue(pgresult, 0, 1), "t") == 0; - - elog(LOG, "get_uninitialized_segments, segindex: %d, is_initialized: %d", segindex, is_initialized); - - if (!is_initialized) - list = lappend_int(list, segindex); - } - } - - cdbdisp_clearCdbPgResults(&cdb_pgresults); - - return list; -} - /* * Signal handler for SIGTERM * Set a flag to let the main loop to terminate, and set our latch to wake @@ -203,114 +166,16 @@ tracking_sighup(SIGNAL_ARGS) errno = save_errno; } -/* - * Signal handler for SIGUSR1 - * Set a flag to tell the launcher to handle extension ddl message - */ -static void -tracking_sigusr1(SIGNAL_ARGS) -{ - int save_errno = errno; - - got_sigusr1 = true; - if (MyProc) - SetLatch(&MyProc->procLatch); - - errno = save_errno; -} - -static bool -extension_created() -{ - bool exists = false; - Relation rel; - SysScanDesc scandesc; - HeapTuple tuple; - ScanKeyData entry[1]; - - rel = heap_open(ExtensionRelationId, AccessShareLock); - - ScanKeyInit(&entry[0], - Anum_pg_extension_extname, - BTEqualStrategyNumber, F_NAMEEQ, - CStringGetDatum(EXTENSIONNAME)); - - scandesc = systable_beginscan(rel, ExtensionNameIndexId, true, - NULL, 1, entry); - - tuple = systable_getnext(scandesc); - - exists = HeapTupleIsValid(tuple); - - systable_endscan(scandesc); - heap_close(rel, AccessShareLock); - - return exists; -} - -static void -dispatch_register_to_master(List *dbids) -{ - ListCell *cell; - tracked_db_t *trackedDb; - - foreach(cell, dbids) - { - trackedDb = (tracked_db_t *) lfirst(cell); - - bloom_set_bind(&tf_shared_state->bloom_set, trackedDb->dbid); - bloom_set_trigger_bits(&tf_shared_state->bloom_set, trackedDb->dbid, - trackedDb->get_full_snapshot_on_recovery); - } - - LWLockAcquire(tf_shared_state->bloom_set.lock, LW_EXCLUSIVE); - tf_shared_state->is_initialized = true; - LWLockRelease(tf_shared_state->bloom_set.lock); -} - -static void -dispatch_register_to_segments(List *dbids, List *uninitialized_segments) -{ - ListCell *cell; - tracked_db_t *trackedDb; - CdbPgResults cdb_pgresults = {NULL, 0}; - - if (uninitialized_segments == NIL) - return; - - foreach(cell, dbids) - { - trackedDb = (tracked_db_t *) lfirst(cell); - - char *cmd = psprintf("select arenadata_toolkit.tracking_register_db(%u)", trackedDb->dbid); - - CdbDispatchCommandToSegments(cmd, - 0, - uninitialized_segments, - &cdb_pgresults); - - if (trackedDb->get_full_snapshot_on_recovery) - { - cmd = psprintf("select arenadata_toolkit.tracking_trigger_initial_snapshot(%u)", trackedDb->dbid); - - CdbDispatchCommandToSegments(cmd, - 0, - uninitialized_segments, - &cdb_pgresults); - } - } -} - -static void -dispatch_register(bool dispatch_to_master, List *uninitialized_segments) +static List* +get_tracked_dbs() { Relation rel; SysScanDesc scan; HeapTuple tup; - List *dbids = NIL; + List *tracked_dbs = NIL; tracked_db_t *trackedDb; - rel = heap_open(DbRoleSettingRelationId, RowExclusiveLock); + rel = heap_open(DbRoleSettingRelationId, AccessShareLock); scan = systable_beginscan(rel, InvalidOid, false, NULL, 0, NULL); while (HeapTupleIsValid(tup = systable_getnext(scan))) { @@ -342,46 +207,58 @@ dispatch_register(bool dispatch_to_master, List *uninitialized_segments) trackedDb->dbid = DatumGetObjectId(oid_datum); trackedDb->get_full_snapshot_on_recovery = full_snapshot_on_recovery(a); - dbids = lappend(dbids, trackedDb); + tracked_dbs = lappend(tracked_dbs, trackedDb); } } systable_endscan(scan); - heap_close(rel, RowExclusiveLock); - - if (dbids != NIL) - { - ListCell *cell; + heap_close(rel, AccessShareLock); - if (dispatch_to_master) - dispatch_register_to_master(dbids); + return tracked_dbs; +} - dispatch_register_to_segments(dbids, uninitialized_segments); +static void +track_dbs(List *tracked_dbs) +{ + ListCell *cell; + tracked_db_t *trackedDb; - foreach(cell, dbids) - { - pfree(lfirst(cell)); - } + foreach(cell, tracked_dbs) + { + trackedDb = (tracked_db_t *) lfirst(cell); - list_free(dbids); + bloom_set_bind(&tf_shared_state->bloom_set, trackedDb->dbid); + bloom_set_trigger_bits(&tf_shared_state->bloom_set, trackedDb->dbid, + trackedDb->get_full_snapshot_on_recovery); } - - LWLockAcquire(tf_shared_state->bloom_set.lock, LW_EXCLUSIVE); - tf_shared_state->bgworker_ready = true; - LWLockRelease(tf_shared_state->bloom_set.lock); } /* scan pg_db_role_setting, find all databases, bind blooms if necessary */ -static void -arenadata_toolkit_worker(Datum main_arg) +void +arenadata_toolkit_main(Datum main_arg) { elog(LOG, "[arenadata toolkit] Starting background worker"); - bool master_initialized = false; + /* + * The worker shouldn't exist when the master boots in utility mode. + * Otherwise BackgroundWorkerInitializeConnection will explode with FATAL. + */ + if(IS_QUERY_DISPATCHER() && Gp_role != GP_ROLE_DISPATCH) + { + proc_exit(0); + } + + /* + * Kludge for scanning pg_db_role_setting on segments. + */ + if (!IS_QUERY_DISPATCHER() && Gp_role == GP_ROLE_DISPATCH) + { + Gp_role = GP_ROLE_UTILITY; + Gp_session_role = GP_ROLE_UTILITY; + } pqsignal(SIGHUP, tracking_sighup); pqsignal(SIGTERM, tracking_sigterm); - pqsignal(SIGUSR1, tracking_sigusr1); BackgroundWorkerUnblockSignals(); @@ -390,31 +267,43 @@ arenadata_toolkit_worker(Datum main_arg) while (!got_sigterm) { int rc; - List *uninitialized_segments = NIL; - - CHECK_FOR_INTERRUPTS(); + List *tracked_dbs = NIL; StartTransactionCommand(); + tracked_dbs = get_tracked_dbs(); - if (extension_created()) + if (!tf_shared_state->is_initialized && list_length(tracked_dbs) > 0) { - elog(LOG, "[arenadata toolkit] Getting uninitialized segments"); - uninitialized_segments = get_uninitialized_segments(uninitialized_segments); - - if (!master_initialized || list_length(uninitialized_segments) > 0) - { - elog(LOG, "Dispatching register to segments"); - dispatch_register(!master_initialized, uninitialized_segments); - list_free(uninitialized_segments); - uninitialized_segments = NIL; - master_initialized = true; - } + track_dbs(tracked_dbs); + LWLockAcquire(tf_shared_state->state_lock, LW_EXCLUSIVE); + tf_shared_state->is_initialized = true; + LWLockRelease(tf_shared_state->state_lock); } + + /* + * Here is quite a dump check, which imitates consistency validation. + * Written as an example of segment erroneous tracking status. + */ + if (list_length(tracked_dbs) != bloom_set_count(&tf_shared_state->bloom_set)) + { + LWLockAcquire(tf_shared_state->state_lock, LW_EXCLUSIVE); + tf_shared_state->has_error = true; + LWLockRelease(tf_shared_state->state_lock); + } + + if (tracked_dbs) + list_free_deep(tracked_dbs); + CommitTransactionCommand(); rc = WaitLatch(&MyProc->procLatch, WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, tracking_worker_naptime_sec * 1000); - ResetLatch(&MyProc->procLatch); + + if (rc & WL_LATCH_SET) + { + ResetLatch(&MyProc->procLatch); + CHECK_FOR_INTERRUPTS(); + } /* Emergency bailout if postmaster has died */ if (rc & WL_POSTMASTER_DEATH) @@ -425,8 +314,11 @@ arenadata_toolkit_worker(Datum main_arg) if (got_sighup) { + elog(DEBUG1, "[arenadata_tookit] got sighup"); got_sighup = false; + ProcessConfigFile(PGC_SIGHUP); } + } if (got_sigterm) @@ -436,15 +328,19 @@ arenadata_toolkit_worker(Datum main_arg) } void -arenadata_toolkit_worker_register(void) +arenadata_toolkit_worker_register() { + BackgroundWorker worker; + + memset(&worker, 0, sizeof(BackgroundWorker)); + worker.bgw_flags = BGWORKER_SHMEM_ACCESS | BGWORKER_BACKEND_DATABASE_CONNECTION; - worker.bgw_start_time = BgWorkerStart_ConsistentState; + worker.bgw_start_time = BgWorkerStart_RecoveryFinished; worker.bgw_restart_time = BGW_DEFAULT_RESTART_INTERVAL; - worker.bgw_main = arenadata_toolkit_worker; + snprintf(worker.bgw_library_name, BGW_MAXLEN, TOOLKIT_BINARY_NAME); + snprintf(worker.bgw_function_name, BGW_MAXLEN, "arenadata_toolkit_main"); worker.bgw_notify_pid = 0; - worker.bgw_start_rule = NULL; - sprintf(worker.bgw_name, "arenadata_toolkit"); + snprintf(worker.bgw_name, BGW_MAXLEN, "arenadata_toolkit"); RegisterBackgroundWorker(&worker); } diff --git a/gpcontrib/arenadata_toolkit/src/bloom_set.c b/gpcontrib/arenadata_toolkit/src/bloom_set.c index 66e7907ea6c7..93953a0015b0 100644 --- a/gpcontrib/arenadata_toolkit/src/bloom_set.c +++ b/gpcontrib/arenadata_toolkit/src/bloom_set.c @@ -278,3 +278,22 @@ bloom_set_is_all_bits_triggered(bloom_set_t * bloom_set, Oid dbid) return is_triggered; } + +int +bloom_set_count(bloom_set_t * bloom_set) +{ + int count = 0; + bloom_entry_t *bloom_entry; + + LWLockAcquire(bloom_set->lock, LW_SHARED); + for (int i = 0; i < bloom_set->bloom_count; ++i) + { + bloom_entry = BLOOM_ENTRY_GET(bloom_set, i); + LWLockAcquire(bloom_entry->lock, LW_SHARED); + if (bloom_entry->dbid != InvalidOid) + ++count; + LWLockRelease(bloom_entry->lock); + } + LWLockRelease(bloom_set->lock); + return count; +} diff --git a/gpcontrib/arenadata_toolkit/src/include/arenadata_toolkit_worker.h b/gpcontrib/arenadata_toolkit/src/include/arenadata_toolkit_worker.h index a07ba0ab75e3..7481463c391d 100644 --- a/gpcontrib/arenadata_toolkit/src/include/arenadata_toolkit_worker.h +++ b/gpcontrib/arenadata_toolkit/src/include/arenadata_toolkit_worker.h @@ -1,6 +1,6 @@ -#ifndef BLOOM_WORKER_H -#define BLOOM_WORKER_H +#ifndef ARENADATA_TOOLKIT_WORKER_H +#define ARENADATA_TOOLKIT_WORKER_H void arenadata_toolkit_worker_register(void); -#endif /* BLOOM_WORKER_H */ +#endif /* ARENADATA_TOOLKIT_WORKER_H */ diff --git a/gpcontrib/arenadata_toolkit/src/include/bloom_set.h b/gpcontrib/arenadata_toolkit/src/include/bloom_set.h index e2409d4b579e..83ef901e1dc3 100644 --- a/gpcontrib/arenadata_toolkit/src/include/bloom_set.h +++ b/gpcontrib/arenadata_toolkit/src/include/bloom_set.h @@ -37,5 +37,6 @@ bloom_t *bloom_set_move(bloom_set_t * bloom_set, Oid dbid, void *mem); bool bloom_set_merge(bloom_set_t * bloom_set, Oid dbid, bloom_t * m_bloom); bool bloom_set_trigger_bits(bloom_set_t * bloom_set, Oid dbid, bool on); bool bloom_set_is_all_bits_triggered(bloom_set_t * bloom_set, Oid dbid); +int bloom_set_count(bloom_set_t * bloom_set); #endif /* BLOOM_SET_H */ diff --git a/gpcontrib/arenadata_toolkit/src/include/tf_shmem.h b/gpcontrib/arenadata_toolkit/src/include/tf_shmem.h index 98beae2abb68..defaefea1dab 100644 --- a/gpcontrib/arenadata_toolkit/src/include/tf_shmem.h +++ b/gpcontrib/arenadata_toolkit/src/include/tf_shmem.h @@ -5,7 +5,8 @@ typedef struct { - bool bgworker_ready; /* is bgworker complete with its job */ + LWLock *state_lock; + bool has_error; bool is_initialized; bloom_set_t bloom_set; } tf_shared_state_t; diff --git a/gpcontrib/arenadata_toolkit/src/tf_shmem.c b/gpcontrib/arenadata_toolkit/src/tf_shmem.c index 9615c60b8682..a1996a687dd9 100644 --- a/gpcontrib/arenadata_toolkit/src/tf_shmem.c +++ b/gpcontrib/arenadata_toolkit/src/tf_shmem.c @@ -30,7 +30,9 @@ tf_shmem_hook(void) if (!found) { - tf_shared_state->bgworker_ready = false; + tf_shared_state->is_initialized = false; + tf_shared_state->has_error = false; + tf_shared_state->state_lock = LWLockAssign(); bloom_set_init(db_track_count, bloom_size, &tf_shared_state->bloom_set); } @@ -42,7 +44,7 @@ void tf_shmem_init() { /* don't forget to add additional locks */ - RequestAddinLWLocks(1 + db_track_count); + RequestAddinLWLocks(2 + db_track_count); RequestAddinShmemSpace(tf_shmem_calc_size()); next_shmem_startup_hook = shmem_startup_hook; diff --git a/gpcontrib/arenadata_toolkit/src/track_files.c b/gpcontrib/arenadata_toolkit/src/track_files.c index ea031aee1eb3..ace596d07bc8 100644 --- a/gpcontrib/arenadata_toolkit/src/track_files.c +++ b/gpcontrib/arenadata_toolkit/src/track_files.c @@ -83,7 +83,7 @@ tf_check_shmem_error(void) if (tf_shared_state == NULL) ereport(ERROR, (errmsg("Failed to access shared memory due to wrong extension initialization"), - errhint("Load extension's code through shared_preload_library mechanism"))); + errhint("Load extension's code through shared_preload_library configuration"))); } /* @@ -346,13 +346,10 @@ tracking_get_track_main(PG_FUNCTION_ARGS) tf_check_shmem_error(); - LWLockAcquire(tf_shared_state->bloom_set.lock, LW_EXCLUSIVE); - if (!tf_shared_state->bgworker_ready && Gp_role == GP_ROLE_DISPATCH) - { - LWLockRelease(tf_shared_state->bloom_set.lock); - elog(ERROR, "Can't get track before bgworker updates the tracking status."); - } - LWLockRelease(tf_shared_state->bloom_set.lock); + LWLockAcquire(tf_shared_state->state_lock, LW_SHARED); + if (tf_shared_state->has_error) + elog(ERROR, "Can't perform tracking properly due to internal error"); + LWLockRelease(tf_shared_state->state_lock); if (SRF_IS_FIRSTCALL()) { @@ -436,6 +433,19 @@ tracking_get_track_main(PG_FUNCTION_ARGS) funcctx = SRF_PERCALL_SETUP(); state = funcctx->user_fctx; + LWLockAcquire(tf_shared_state->state_lock, LW_SHARED); + if (!tf_shared_state->is_initialized) + { + LWLockRelease(tf_shared_state->state_lock); + systable_endscan(state->scan); + heap_close(state->pg_class_rel, AccessShareLock); + state->scan = NULL; + state->pg_class_rel = NULL; + elog(WARNING, "Nothing to return from segment %d due to uninitialized status of Bloom filter", GpIdentity.segindex); + SRF_RETURN_DONE(funcctx); + } + LWLockRelease(tf_shared_state->state_lock); + while (true) { Oid filenode; @@ -726,10 +736,6 @@ tracking_register_db(PG_FUNCTION_ARGS) track_db(dbid, true); - LWLockAcquire(tf_shared_state->bloom_set.lock, LW_EXCLUSIVE); - tf_shared_state->is_initialized = true; - LWLockRelease(tf_shared_state->bloom_set.lock); - if (Gp_role == GP_ROLE_DISPATCH) { char *cmd = @@ -1253,9 +1259,9 @@ tracking_is_segment_initialized(PG_FUNCTION_ARGS) /* Populate an output tuple. */ values[0] = Int32GetDatum(GpIdentity.segindex); - LWLockAcquire(tf_shared_state->bloom_set.lock, LW_EXCLUSIVE); + LWLockAcquire(tf_shared_state->state_lock, LW_SHARED); values[1] = BoolGetDatum(tf_shared_state->is_initialized); - LWLockRelease(tf_shared_state->bloom_set.lock); + LWLockRelease(tf_shared_state->state_lock); nulls[0] = nulls[1] = false; tuple = heap_form_tuple(tupdesc, values, nulls); result = HeapTupleGetDatum(tuple); From 7c9a6c97686c7607c8a7b45e82bd25df96bec365 Mon Sep 17 00:00:00 2001 From: Alexander Kondakov Date: Mon, 21 Oct 2024 01:31:17 +0300 Subject: [PATCH 07/69] elog to ereport and minor changes --- .../expected/arenadata_toolkit_tracking.out | 7 +-- .../sql/arenadata_toolkit_tracking.sql | 4 +- .../src/arenadata_toolkit_guc.c | 12 ++--- gpcontrib/arenadata_toolkit/src/dbsize.c | 6 ++- gpcontrib/arenadata_toolkit/src/track_files.c | 49 +++++++++++++------ 5 files changed, 49 insertions(+), 29 deletions(-) diff --git a/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_tracking.out b/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_tracking.out index 4bc4aff59ac6..0096928c62d6 100644 --- a/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_tracking.out +++ b/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_tracking.out @@ -1,6 +1,6 @@ -- Tests for size tracking logic introduced in version 1.7 -- start_matchsubs --- m/ERROR: database \d+ is not tracked \(track_files\.c:\d+\)/ +-- m/ERROR: database \d+ is not tracked/ -- s/\d+/XXX/g -- end_matchsubs CREATE DATABASE tracking_db1; @@ -8,7 +8,8 @@ CREATE DATABASE tracking_db1; CREATE EXTENSION arenadata_toolkit; -- 1. Test getting track on not registered database; SELECT * FROM arenadata_toolkit.tracking_get_track(); -ERROR: database 56212 is not tracked (track_files.c:368) +ERROR: database 102559 is not tracked +HINT: Call 'arenadata_toolkit.tracking_register_db()'to enable tracking CONTEXT: SQL statement "SELECT * FROM arenadata_toolkit.tracking_get_track_main()" SELECT arenadata_toolkit.tracking_register_db(); tracking_register_db @@ -137,7 +138,7 @@ FROM arenadata_toolkit.tracking_get_track(); (0 rows) -- 9. Test repetitive track call within the same transaction. In case of --- rollback only first changes shoul be present. +-- rollback only first changes should be present. INSERT INTO arenadata_toolkit.tracking_t1 SELECT generate_series(1,10000); BEGIN; SELECT relname, size, state, segid, relkind, relstorage diff --git a/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_tracking.sql b/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_tracking.sql index 27099c747f0e..a592c5a08a0d 100644 --- a/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_tracking.sql +++ b/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_tracking.sql @@ -7,7 +7,7 @@ \c -- end_ignore -- start_matchsubs --- m/ERROR: database \d+ is not tracked \(track_files\.c:\d+\)/ +-- m/ERROR: database \d+ is not tracked/ -- s/\d+/XXX/g -- end_matchsubs --start_ignore @@ -73,7 +73,7 @@ SELECT relname, size, state, segid, relkind, relstorage FROM arenadata_toolkit.tracking_get_track(); -- 9. Test repetitive track call within the same transaction. In case of --- rollback only first changes shoul be present. +-- rollback only first changes should be present. INSERT INTO arenadata_toolkit.tracking_t1 SELECT generate_series(1,10000); BEGIN; SELECT relname, size, state, segid, relkind, relstorage diff --git a/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_guc.c b/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_guc.c index 6800839cf9a8..60c5865d9c90 100644 --- a/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_guc.c +++ b/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_guc.c @@ -8,7 +8,7 @@ #include "utils/guc.h" #include "tf_shmem.h" -#define DEFAULT_BLOOM_SIZE 1000000 +#define DEFAULT_BLOOM_SIZE_BYTES 1000000 #define DEFAULT_DB_TRACK_COUNT 5 #define DEFAULT_IS_TRACKED false #define DEFAULT_DROPS_COUNT 100000 @@ -16,7 +16,7 @@ #define DEFAULT_GET_FULL_SNAPSHOT_ON_RECOVERY true #define DEFAULT_TRACKED_REL_STORAGES "h,a,c" #define DEFAULT_TRACKED_REL_KINDS "r,i,t,m,o,b,M" -#define DEFAULT_NAPTIME 60 +#define DEFAULT_NAPTIME_SEC 60 #define MIN_BLOOM_SIZE 1 #define MIN_DB_TRACK_COUNT 1 @@ -28,7 +28,7 @@ #define MAX_DROPS_COUNT 1000000 #define MAX_NAPTIME OID_MAX & 0x7FFFFFFF -int bloom_size = DEFAULT_BLOOM_SIZE; +int bloom_size = DEFAULT_BLOOM_SIZE_BYTES; int db_track_count = DEFAULT_DB_TRACK_COUNT; bool is_tracked = DEFAULT_IS_TRACKED; bool get_full_snapshot_on_recovery = DEFAULT_GET_FULL_SNAPSHOT_ON_RECOVERY; @@ -36,7 +36,7 @@ int drops_count = DEFAULT_DROPS_COUNT; char *tracked_schemas = DEFAULT_TRACKED_SCHEMAS; char *tracked_rel_storages = DEFAULT_TRACKED_REL_STORAGES; char *tracked_rel_kinds = DEFAULT_TRACKED_REL_KINDS; -int tracking_worker_naptime_sec = DEFAULT_NAPTIME; +int tracking_worker_naptime_sec = DEFAULT_NAPTIME_SEC; static bool is_tracked_unlocked = false; static bool is_get_full_snapshot_on_recovery_unlocked = false; @@ -187,7 +187,7 @@ tf_guc_define(void) "Size of bloom filter in bytes for each tracked database", NULL, &bloom_size, - DEFAULT_BLOOM_SIZE, + DEFAULT_BLOOM_SIZE_BYTES, MIN_BLOOM_SIZE, MAX_BLOOM_SIZE, PGC_POSTMASTER, @@ -284,7 +284,7 @@ tf_guc_define(void) "Toolkit background worker nap time", NULL, &tracking_worker_naptime_sec, - DEFAULT_NAPTIME, + DEFAULT_NAPTIME_SEC, 1, MAX_NAPTIME, PGC_SIGHUP, diff --git a/gpcontrib/arenadata_toolkit/src/dbsize.c b/gpcontrib/arenadata_toolkit/src/dbsize.c index 9731f744f44d..620ab276bb8b 100644 --- a/gpcontrib/arenadata_toolkit/src/dbsize.c +++ b/gpcontrib/arenadata_toolkit/src/dbsize.c @@ -32,7 +32,7 @@ PG_MODULE_MAGIC; static int64 calculate_relation_size(Relation rel, ForkNumber forknum); static int64 get_heap_storage_total_bytes(Relation rel, - ForkNumber forknum, char *relpath); + ForkNumber forknum, char *relpath); static int64 get_ao_storage_total_bytes(Relation rel, char *relpath); static bool calculate_ao_storage_perSegFile(const int segno, void *ctx); static void fill_relation_seg_path(char *buf, int bufLen, @@ -374,7 +374,9 @@ adb_hba_file_rules(PG_FUNCTION_ARGS) return pg_hba_file_rules(fcinfo); } -/* */ +/* + * Calculates relation size among all the forks. + */ int64 dbsize_calc_size(Oid relid) { diff --git a/gpcontrib/arenadata_toolkit/src/track_files.c b/gpcontrib/arenadata_toolkit/src/track_files.c index ace596d07bc8..e31b353f5bff 100644 --- a/gpcontrib/arenadata_toolkit/src/track_files.c +++ b/gpcontrib/arenadata_toolkit/src/track_files.c @@ -348,7 +348,8 @@ tracking_get_track_main(PG_FUNCTION_ARGS) LWLockAcquire(tf_shared_state->state_lock, LW_SHARED); if (tf_shared_state->has_error) - elog(ERROR, "Can't perform tracking properly due to internal error"); + ereport(ERROR, + (errmsg("Can't perform tracking for database %u properly due to internal error", MyDatabaseId))); LWLockRelease(tf_shared_state->state_lock); if (SRF_IS_FIRSTCALL()) @@ -365,7 +366,11 @@ tracking_get_track_main(PG_FUNCTION_ARGS) { tf_get_global_state.bloom = palloc(FULL_BLOOM_SIZE(bloom_size)); if (!bloom_set_move(&tf_shared_state->bloom_set, MyDatabaseId, tf_get_global_state.bloom)) - elog(ERROR, "database %u is not tracked", MyDatabaseId); + ereport(ERROR, + (errcode(ERRCODE_GP_COMMAND_ERROR), + errmsg("database %u is not tracked", MyDatabaseId), + errhint("Call 'arenadata_toolkit.tracking_register_db()'" + "to enable tracking"))); } else { @@ -382,7 +387,11 @@ tracking_get_track_main(PG_FUNCTION_ARGS) } bloom_clear(tf_get_global_state.bloom); if (!bloom_set_move(&tf_shared_state->bloom_set, MyDatabaseId, tf_get_global_state.bloom)) - elog(ERROR, "database %u is not tracked", MyDatabaseId); + ereport(ERROR, + (errcode(ERRCODE_GP_COMMAND_ERROR), + errmsg("database %u is not tracked", MyDatabaseId), + errhint("Call 'arenadata_toolkit.tracking_register_db()'" + "to enable tracking"))); } /* initial snapshot shouldn't return drops */ if (tf_get_global_state.bloom && !tf_get_global_state.bloom->is_set_all) @@ -400,7 +409,8 @@ tracking_get_track_main(PG_FUNCTION_ARGS) if (tf_get_global_state.relstorages == NIL || tf_get_global_state.relkinds == NIL || tf_get_global_state.schema_oids == NIL) - elog(ERROR, "cannot get tracking configuration (schemas, relkinds, reltorage) for database %u", MyDatabaseId); + ereport(ERROR, + (errmsg("Cannot get tracking configuration (schemas, relkinds, reltorage) for database %u", MyDatabaseId))); MemoryContextSwitchTo(oldcontext); @@ -476,13 +486,13 @@ tracking_get_track_main(PG_FUNCTION_ARGS) continue; datums[7] = heap_getattr(pg_class_tuple, Anum_pg_class_relkind, RelationGetDescr(state->pg_class_rel), &nulls[7]); - relkind = CharGetDatum(datums[7]); + relkind = DatumGetChar(datums[7]); if (!relkind_is_tracked(relkind)) continue; datums[8] = heap_getattr(pg_class_tuple, Anum_pg_class_relstorage, RelationGetDescr(state->pg_class_rel), &nulls[8]); - relstorage = CharGetDatum(datums[8]); + relstorage = DatumGetChar(datums[8]); if (!relstorage_is_tracked(relstorage)) continue; @@ -699,7 +709,8 @@ track_db(Oid dbid, bool reg) stmt.dbname = get_database_name(dbid); if (stmt.dbname == NULL) - elog(ERROR, "[arenadata_toolkit] database %u does not exist", dbid); + ereport(ERROR, + (errmsg("[arenadata_toolkit] database %u does not exist", dbid))); stmt.setstmt = &v_stmt; @@ -717,7 +728,8 @@ track_db(Oid dbid, bool reg) if (!reg) bloom_set_unbind(&tf_shared_state->bloom_set, dbid); else if (!bloom_set_bind(&tf_shared_state->bloom_set, dbid)) - elog(ERROR, "[arenadata_toolkit] exceeded maximum number of tracked databases"); + ereport(ERROR, + (errmsg("[arenadata_toolkit] exceeded maximum number of tracked databases"))); } /* @@ -796,7 +808,8 @@ tracking_set_snapshot_on_recovery(PG_FUNCTION_ARGS) stmt.setstmt = &v_stmt; if (stmt.dbname == NULL) - elog(ERROR, "[arenadata_toolkit] database %u does not exist", dbid); + ereport(ERROR, + (errmsg("[arenadata_toolkit] database %u does not exist", dbid))); v_stmt.type = T_VariableSetStmt; v_stmt.kind = VAR_SET_VALUE; @@ -942,7 +955,8 @@ track_schema(const char *schemaName, Oid dbid, bool reg) stmt.dbname = get_database_name(dbid); if (stmt.dbname == NULL) - elog(ERROR, "[arenadata_toolkit] database %u does not exist", dbid); + ereport(ERROR, + (errmsg("[arenadata_toolkit] database %u does not exist", dbid))); stmt.setstmt = &v_stmt; @@ -1076,7 +1090,8 @@ tracking_set_relkinds(PG_FUNCTION_ARGS) stmt.setstmt = &v_stmt; if (stmt.dbname == NULL) - elog(ERROR, "[arenadata_toolkit] database %u does not exist", dbid); + ereport(ERROR, + (errmsg("[arenadata_toolkit] database %u does not exist", dbid))); v_stmt.type = T_VariableSetStmt; v_stmt.name = "arenadata_toolkit.tracking_relkinds"; @@ -1162,7 +1177,8 @@ tracking_set_relstorages(PG_FUNCTION_ARGS) stmt.dbname = get_database_name(dbid); if (stmt.dbname == NULL) - elog(ERROR, "[arenadata_toolkit] database %u does not exist", dbid); + ereport(ERROR, + (errmsg("[arenadata_toolkit] database %u does not exist", dbid))); stmt.setstmt = &v_stmt; @@ -1210,14 +1226,15 @@ tracking_trigger_initial_snapshot(PG_FUNCTION_ARGS) tf_check_shmem_error(); dbid = dbid == InvalidOid ? MyDatabaseId : dbid; - elog(LOG, "[arenadata_toolkit] tracking_trigger_initial_snapshot dbid: %d", dbid); + elog(LOG, "[arenadata_toolkit] tracking_trigger_initial_snapshot dbid: %u", dbid); if (!bloom_set_trigger_bits(&tf_shared_state->bloom_set, dbid, true)) - elog(ERROR, "Failed to find corresponding filter to database %u", dbid); + ereport(ERROR, + (errmsg("Failed to find corresponding filter to database %u", dbid))); if (Gp_role == GP_ROLE_DISPATCH) { - char *cmd = psprintf("select arenadata_toolkit.tracking_trigger_initial_snapshot(%d)", dbid); + char *cmd = psprintf("select arenadata_toolkit.tracking_trigger_initial_snapshot(%u)", dbid); CdbDispatchCommand(cmd, 0, NULL); } @@ -1237,7 +1254,7 @@ tracking_is_initial_snapshot_triggered(PG_FUNCTION_ARGS) is_triggered = bloom_set_is_all_bits_triggered(&tf_shared_state->bloom_set, dbid); - elog(LOG, "[arenadata_toolkit] is_initial_snapshot_triggered:%d dbid: %d", is_triggered, dbid); + elog(LOG, "[arenadata_toolkit] is_initial_snapshot_triggered:%d dbid: %u", is_triggered, dbid); PG_RETURN_BOOL(is_triggered); } From 37cd981f5fd7014e572b4bd9df620a5aa8b3f4e0 Mon Sep 17 00:00:00 2001 From: Alexander Kondakov Date: Mon, 21 Oct 2024 01:33:35 +0300 Subject: [PATCH 08/69] Change order in PG_FINI --- gpcontrib/arenadata_toolkit/src/arenadata_toolkit.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/gpcontrib/arenadata_toolkit/src/arenadata_toolkit.c b/gpcontrib/arenadata_toolkit/src/arenadata_toolkit.c index cc68858ea7c4..5fbc2e5f3772 100644 --- a/gpcontrib/arenadata_toolkit/src/arenadata_toolkit.c +++ b/gpcontrib/arenadata_toolkit/src/arenadata_toolkit.c @@ -33,8 +33,7 @@ _PG_init(void) void _PG_fini(void) { - tf_shmem_deinit(); - file_hook_deinit(); - file_hook_deinit(); drops_track_deinit(); + file_hook_deinit(); + tf_shmem_deinit(); } From 6023a6313257fcb06cbc07601a41903f0fac660c Mon Sep 17 00:00:00 2001 From: Alexander Kondakov Date: Mon, 21 Oct 2024 01:51:10 +0300 Subject: [PATCH 09/69] GUC refactoring --- .../src/arenadata_toolkit_guc.c | 113 ++++++++---------- 1 file changed, 52 insertions(+), 61 deletions(-) diff --git a/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_guc.c b/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_guc.c index 60c5865d9c90..5e9cb6a46b20 100644 --- a/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_guc.c +++ b/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_guc.c @@ -47,49 +47,44 @@ static bool is_relstorages_unlocked = false; void tf_guc_unlock_tracked_once(void) { - if (!is_tracked_unlocked) - is_tracked_unlocked = true; + is_tracked_unlocked = true; } void tf_guc_unlock_full_snapshot_on_recovery_once(void) { - if (!is_get_full_snapshot_on_recovery_unlocked) - is_get_full_snapshot_on_recovery_unlocked = true; + is_get_full_snapshot_on_recovery_unlocked = true; } void tf_guc_unlock_schemas_once(void) { - if (!is_schemas_unlocked) - is_schemas_unlocked = true; + is_schemas_unlocked = true; } void tf_guc_unlock_relkinds_once(void) { - if (!is_relkinds_unlocked) - is_relkinds_unlocked = true; + is_relkinds_unlocked = true; } void tf_guc_unlock_relstorages_once(void) { - if (!is_relstorages_unlocked) - is_relstorages_unlocked = true; + is_relstorages_unlocked = true; } -/* Prohibit changing the GUC value manually except several cases. +/* + * Prohibit changing the GUC value manually except several cases. * This is not called for RESET, so RESET is not guarded */ static bool -check_tracked(bool *newval, void **extra, GucSource source) +check_guc(bool *toolkit_guc, GucSource source, bool *manual) { if (IsInitProcessingMode() || Gp_role == GP_ROLE_EXECUTE || - (Gp_role == GP_ROLE_DISPATCH && is_tracked_unlocked)) + (Gp_role == GP_ROLE_DISPATCH && *toolkit_guc)) { - if (is_tracked_unlocked) - is_tracked_unlocked = false; + *toolkit_guc = false; if (source != PGC_S_DATABASE && source != PGC_S_DEFAULT && source != PGC_S_TEST) return false; @@ -97,86 +92,82 @@ check_tracked(bool *newval, void **extra, GucSource source) return true; } - GUC_check_errmsg("cannot change tracking status outside the tracking_register_db function"); + *manual = true; return false; } -/* Prohibit changing the GUC value manually except several cases. - * This is not called for RESET, so RESET is not guarded +/* + * Prohibit changing the arenadata_toolkit.tracking_is_db_tracked value manually */ static bool -check_get_full_snapshot_on_recovery(bool *newval, void **extra, GucSource source) +check_tracked(bool *newval, void **extra, GucSource source) { - if (IsInitProcessingMode() || Gp_role == GP_ROLE_EXECUTE || - (Gp_role == GP_ROLE_DISPATCH && is_get_full_snapshot_on_recovery_unlocked)) - { - if (is_get_full_snapshot_on_recovery_unlocked) - is_get_full_snapshot_on_recovery_unlocked = false; + bool manual = false; + if (check_guc(&is_tracked_unlocked, source, &manual)) + return true; - if (source != PGC_S_DATABASE && source != PGC_S_DEFAULT && source != PGC_S_TEST) - return false; + if (manual) + GUC_check_errmsg("cannot change tracking status outside the tracking_register_db function"); + return false; +} +/* + * Prohibit changing the arenadata_toolkit.tracking_snapshot_on_recovery value manually + */ +static bool +check_get_full_snapshot_on_recovery(bool *newval, void **extra, GucSource source) +{ + bool manual = false; + if (check_guc(&is_get_full_snapshot_on_recovery_unlocked, source, &manual)) return true; - } - GUC_check_errmsg("cannot change tracking status outside the tracking_set_snapshot_on_recovery function"); + if (manual) + GUC_check_errmsg("cannot change tracking status outside the tracking_set_snapshot_on_recovery function"); return false; } +/* + * Prohibit changing the arenadata_toolkit.tracking_relkinds value manually + */ static bool check_relkinds(char **newval, void **extra, GucSource source) { - if (IsInitProcessingMode() || Gp_role == GP_ROLE_EXECUTE || - (Gp_role == GP_ROLE_DISPATCH && is_relkinds_unlocked)) - { - if (is_relkinds_unlocked) - is_relkinds_unlocked = false; - - if (source != PGC_S_DATABASE && source != PGC_S_DEFAULT && source != PGC_S_TEST) - return false; - + bool manual = false; + if (check_guc(&is_relkinds_unlocked, source, &manual)) return true; - } - GUC_check_errmsg("cannot change tracking status outside the tracking_register_relkinds function"); + if (manual) + GUC_check_errmsg("cannot change tracking status outside the tracking_register_relkinds function"); return false; } +/* + * Prohibit changing the arenadata_toolkit.tracking_schemas value manually + */ static bool check_schemas(char **newval, void **extra, GucSource source) { - if (IsInitProcessingMode() || Gp_role == GP_ROLE_EXECUTE || - (Gp_role == GP_ROLE_DISPATCH && is_schemas_unlocked)) - { - if (is_schemas_unlocked) - is_schemas_unlocked = false; - - if (source != PGC_S_DATABASE && source != PGC_S_DEFAULT && source != PGC_S_TEST) - return false; - + bool manual = false; + if (check_guc(&is_schemas_unlocked, source, &manual)) return true; - } - GUC_check_errmsg("cannot change tracking status outside the tracking_register_schema function"); + if (manual) + GUC_check_errmsg("cannot change tracking status outside the tracking_register_schema function"); return false; } +/* + * Prohibit changing the arenadata_toolkit.tracking_relstorages value manually + */ static bool check_relstorages(char **newval, void **extra, GucSource source) { - if (IsInitProcessingMode() || Gp_role == GP_ROLE_EXECUTE || - (Gp_role == GP_ROLE_DISPATCH && is_relstorages_unlocked)) - { - if (is_relstorages_unlocked) - is_relstorages_unlocked = false; - - if (source != PGC_S_DATABASE && source != PGC_S_DEFAULT && source != PGC_S_TEST) - return false; - + bool manual = false; + if (check_guc(&is_relstorages_unlocked, source, &manual)) return true; - } - GUC_check_errmsg("cannot change tracking status outside the tracking_register_relstorages function"); + if (manual) + GUC_check_errmsg("cannot change tracking status outside the tracking_register_relstorages function"); return false; } From d25e299f33a0354c53f8e7983b83ab06998637fd Mon Sep 17 00:00:00 2001 From: Alexander Kondakov Date: Mon, 21 Oct 2024 01:56:06 +0300 Subject: [PATCH 10/69] Restore dbsize file to original identation --- gpcontrib/arenadata_toolkit/src/dbsize.c | 74 ++++++++++++------------ 1 file changed, 36 insertions(+), 38 deletions(-) diff --git a/gpcontrib/arenadata_toolkit/src/dbsize.c b/gpcontrib/arenadata_toolkit/src/dbsize.c index 620ab276bb8b..004117659904 100644 --- a/gpcontrib/arenadata_toolkit/src/dbsize.c +++ b/gpcontrib/arenadata_toolkit/src/dbsize.c @@ -1,27 +1,32 @@ +#include #include +#include +#include #include "postgres.h" #include "access/aomd.h" #include "access/heapam.h" -#include "catalog/pg_tablespace.h" #include "cdb/cdbvars.h" #include "common/relpath.h" #include "fmgr.h" #include "funcapi.h" -#include "libpq/hba.h" #include "miscadmin.h" -#include "nodes/execnodes.h" #include "storage/fd.h" -#include "storage/lock.h" +#include "nodes/execnodes.h" +#include "cdb/cdbvars.h" +#include "libpq/hba.h" #include "utils/builtins.h" #include "utils/relfilenodemap.h" #include "utils/timestamp.h" #include "utils/elog.h" #include "utils/rel.h" #include "utils/relcache.h" +#include "catalog/pg_tablespace.h" +#include "storage/lock.h" #include "dbsize.h" + PG_MODULE_MAGIC; /* @@ -36,7 +41,7 @@ static int64 get_heap_storage_total_bytes(Relation rel, static int64 get_ao_storage_total_bytes(Relation rel, char *relpath); static bool calculate_ao_storage_perSegFile(const int segno, void *ctx); static void fill_relation_seg_path(char *buf, int bufLen, - const char *relpath, int segNo); + const char *relpath, int segNo); static int64 calculate_toast_table_size(Oid toastrelid, ForkNumber forknum); /* @@ -159,8 +164,8 @@ calculate_ao_storage_perSegFile(const int segno, void *ctx) static int64 calculate_toast_table_size(Oid toastrelid, ForkNumber forknum) { - Relation toastRel = relation_open(toastrelid, AccessShareLock); - int64 size = calculate_relation_size(toastRel, forknum); + Relation toastRel = relation_open(toastrelid, AccessShareLock); + int64 size = calculate_relation_size(toastRel, forknum); relation_close(toastRel, AccessShareLock); return size; @@ -220,7 +225,7 @@ get_ao_storage_total_bytes(Relation rel, char *relpath) * operations (for ex: CTAS) zero segment will store tuples). Thus * calculate segno=0 manually. */ - (void)calculate_ao_storage_perSegFile(0, &ctx); + (void) calculate_ao_storage_perSegFile(0, &ctx); ao_foreach_extent_file(calculate_ao_storage_perSegFile, &ctx); return ctx.total_size; @@ -228,41 +233,37 @@ get_ao_storage_total_bytes(Relation rel, char *relpath) typedef struct { - char *datpath; - DIR *dirdesc; - TupleDesc tupdesc; -} user_fctx_data; + char *datpath; + DIR *dirdesc; + TupleDesc tupdesc; +} user_fctx_data; /* * Name of file must be "XXX.X" or "XXX" * where XXX is Oid. OID must be not more than OID_MAX. */ -static Oid -get_oid_from_filename(const char *filename) +static Oid get_oid_from_filename(const char *filename) { - unsigned long int oid, - segment; - char trailer; - - int count = sscanf(filename, "%lu.%lu%c", &oid, &segment, &trailer); + unsigned long int oid, segment; + char trailer; + int count = sscanf(filename, "%lu.%lu%c", &oid, &segment, &trailer); if (count < 1 || count > 2) return InvalidOid; if (oid > OID_MAX) return InvalidOid; - return (Oid)oid; + return (Oid) oid; } PG_FUNCTION_INFO_V1(adb_get_relfilenodes); -Datum -adb_get_relfilenodes(PG_FUNCTION_ARGS) +Datum adb_get_relfilenodes(PG_FUNCTION_ARGS) { - Oid datoid = MyDatabaseId; - Oid tablespace_oid = PG_GETARG_OID(0); + Oid datoid = MyDatabaseId; + Oid tablespace_oid = PG_GETARG_OID(0); - struct dirent *direntry; - user_fctx_data *fctx_data; + struct dirent *direntry; + user_fctx_data *fctx_data; FuncCallContext *funcctx; if (tablespace_oid == GLOBALTABLESPACE_OID) @@ -281,14 +282,13 @@ adb_get_relfilenodes(PG_FUNCTION_ARGS) if (!fctx_data->dirdesc) { - /* Nothing to do: empty tablespace (maybe it has been just - * created) */ + /* Nothing to do: empty tablespace (maybe it has been just created)*/ MemoryContextSwitchTo(oldcontext); SRF_RETURN_DONE(funcctx); } if (get_call_result_type(fcinfo, NULL, &fctx_data->tupdesc) - != TYPEFUNC_COMPOSITE) + != TYPEFUNC_COMPOSITE) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("return type must be a row type"))); @@ -305,12 +305,12 @@ adb_get_relfilenodes(PG_FUNCTION_ARGS) while ((direntry = ReadDir(fctx_data->dirdesc, fctx_data->datpath)) != NULL) { struct stat fst; - Datum values[10]; - bool nulls[10]; - char *filename; - Oid reloid; - Oid relfilenode_oid; - HeapTuple tuple; + Datum values[10]; + bool nulls[10]; + char *filename; + Oid reloid; + Oid relfilenode_oid; + HeapTuple tuple; CHECK_FOR_INTERRUPTS(); @@ -368,12 +368,10 @@ adb_get_relfilenodes(PG_FUNCTION_ARGS) } PG_FUNCTION_INFO_V1(adb_hba_file_rules); -Datum -adb_hba_file_rules(PG_FUNCTION_ARGS) +Datum adb_hba_file_rules(PG_FUNCTION_ARGS) { return pg_hba_file_rules(fcinfo); } - /* * Calculates relation size among all the forks. */ From 6b01018d2b3474659ac11a2795dd97fccbebb585 Mon Sep 17 00:00:00 2001 From: Alexander Kondakov Date: Mon, 21 Oct 2024 01:58:11 +0300 Subject: [PATCH 11/69] ternary operator --- gpcontrib/arenadata_toolkit/src/track_files.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/gpcontrib/arenadata_toolkit/src/track_files.c b/gpcontrib/arenadata_toolkit/src/track_files.c index e31b353f5bff..32d95799405b 100644 --- a/gpcontrib/arenadata_toolkit/src/track_files.c +++ b/gpcontrib/arenadata_toolkit/src/track_files.c @@ -743,7 +743,7 @@ tracking_register_db(PG_FUNCTION_ARGS) tf_check_shmem_error(); - dbid = dbid == InvalidOid ? MyDatabaseId : dbid; + dbid = (dbid == InvalidOid) ? MyDatabaseId : dbid; elog(LOG, "[arenadata_toolkit] registering database %u for tracking", dbid); track_db(dbid, true); @@ -769,7 +769,7 @@ tracking_unregister_db(PG_FUNCTION_ARGS) tf_check_shmem_error(); - dbid = dbid == InvalidOid ? MyDatabaseId : dbid; + dbid = (dbid == InvalidOid) ? MyDatabaseId : dbid; elog(LOG, "[arenadata_toolkit] unregistering database %u from tracking", dbid); track_db(dbid, false); @@ -793,7 +793,7 @@ tracking_set_snapshot_on_recovery(PG_FUNCTION_ARGS) tf_check_shmem_error(); - dbid = dbid == InvalidOid ? MyDatabaseId : dbid; + dbid = (dbid == InvalidOid) ? MyDatabaseId : dbid; A_Const aconst = {.type = T_A_Const,.val = {.type = T_String,.val.str = set ? "t" : "f"}}; @@ -1002,7 +1002,7 @@ tracking_register_schema(PG_FUNCTION_ARGS) tf_check_shmem_error(); - dbid = dbid == InvalidOid ? MyDatabaseId : dbid; + dbid = (dbid == InvalidOid) ? MyDatabaseId : dbid; if (!SearchSysCacheExists1(NAMESPACENAME, CStringGetDatum(schema_name))) ereport(ERROR, @@ -1024,7 +1024,7 @@ tracking_unregister_schema(PG_FUNCTION_ARGS) tf_check_shmem_error(); - dbid = dbid == InvalidOid ? MyDatabaseId : dbid; + dbid = (dbid == InvalidOid) ? MyDatabaseId : dbid; if (!SearchSysCacheExists1(NAMESPACENAME, CStringGetDatum(schema_name))) ereport(ERROR, @@ -1062,7 +1062,7 @@ tracking_set_relkinds(PG_FUNCTION_ARGS) tf_check_shmem_error(); - dbid = dbid == InvalidOid ? MyDatabaseId : dbid; + dbid = (dbid == InvalidOid) ? MyDatabaseId : dbid; initStringInfo(&buf); str_copy = pstrdup(relkinds_str); @@ -1150,7 +1150,7 @@ tracking_set_relstorages(PG_FUNCTION_ARGS) tf_check_shmem_error(); - dbid = dbid == InvalidOid ? MyDatabaseId : dbid; + dbid = (dbid == InvalidOid) ? MyDatabaseId : dbid; initStringInfo(&buf); str_copy = pstrdup(relstorages_str); @@ -1225,7 +1225,7 @@ tracking_trigger_initial_snapshot(PG_FUNCTION_ARGS) tf_check_shmem_error(); - dbid = dbid == InvalidOid ? MyDatabaseId : dbid; + dbid = (dbid == InvalidOid) ? MyDatabaseId : dbid; elog(LOG, "[arenadata_toolkit] tracking_trigger_initial_snapshot dbid: %u", dbid); if (!bloom_set_trigger_bits(&tf_shared_state->bloom_set, dbid, true)) @@ -1250,7 +1250,7 @@ tracking_is_initial_snapshot_triggered(PG_FUNCTION_ARGS) tf_check_shmem_error(); - dbid = dbid == InvalidOid ? MyDatabaseId : dbid; + dbid = (dbid == InvalidOid) ? MyDatabaseId : dbid; is_triggered = bloom_set_is_all_bits_triggered(&tf_shared_state->bloom_set, dbid); From 58406f346f37b3da82c3483d9920171377632020 Mon Sep 17 00:00:00 2001 From: Alexander Kondakov Date: Mon, 21 Oct 2024 02:00:01 +0300 Subject: [PATCH 12/69] fix assignment expression --- gpcontrib/arenadata_toolkit/src/track_files.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gpcontrib/arenadata_toolkit/src/track_files.c b/gpcontrib/arenadata_toolkit/src/track_files.c index 32d95799405b..89c1f96a8901 100644 --- a/gpcontrib/arenadata_toolkit/src/track_files.c +++ b/gpcontrib/arenadata_toolkit/src/track_files.c @@ -1266,7 +1266,7 @@ tracking_is_segment_initialized(PG_FUNCTION_ARGS) TupleDesc tupdesc; HeapTuple tuple; Datum values[2]; - bool nulls[2]; + bool nulls[2] = {false, false}; Datum result; tf_check_shmem_error(); @@ -1279,7 +1279,7 @@ tracking_is_segment_initialized(PG_FUNCTION_ARGS) LWLockAcquire(tf_shared_state->state_lock, LW_SHARED); values[1] = BoolGetDatum(tf_shared_state->is_initialized); LWLockRelease(tf_shared_state->state_lock); - nulls[0] = nulls[1] = false; + tuple = heap_form_tuple(tupdesc, values, nulls); result = HeapTupleGetDatum(tuple); From f308583f3c6870c50c54095e6b3832cf11609e40 Mon Sep 17 00:00:00 2001 From: Alexander Kondakov Date: Mon, 21 Oct 2024 21:01:25 +0300 Subject: [PATCH 13/69] Locks logic refactoring + light refactoring of bloom_set and bloom --- .../src/arenadata_toolkit_guc.c | 20 -- .../src/arenadata_toolkit_worker.c | 8 +- gpcontrib/arenadata_toolkit/src/bloom.c | 22 +- gpcontrib/arenadata_toolkit/src/bloom_set.c | 276 ++++++++---------- .../src/include/arenadata_toolkit_guc.h | 20 ++ .../arenadata_toolkit/src/include/bloom.h | 4 +- .../arenadata_toolkit/src/include/bloom_set.h | 8 +- .../arenadata_toolkit/src/include/tf_shmem.h | 15 +- gpcontrib/arenadata_toolkit/src/tf_shmem.c | 83 +++++- gpcontrib/arenadata_toolkit/src/track_files.c | 14 +- 10 files changed, 255 insertions(+), 215 deletions(-) diff --git a/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_guc.c b/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_guc.c index 5e9cb6a46b20..b0df6e8fcd3b 100644 --- a/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_guc.c +++ b/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_guc.c @@ -8,26 +8,6 @@ #include "utils/guc.h" #include "tf_shmem.h" -#define DEFAULT_BLOOM_SIZE_BYTES 1000000 -#define DEFAULT_DB_TRACK_COUNT 5 -#define DEFAULT_IS_TRACKED false -#define DEFAULT_DROPS_COUNT 100000 -#define DEFAULT_TRACKED_SCHEMAS "public,arenadata_toolkit,pg_catalog,pg_toast,pg_aoseg,information_schema" -#define DEFAULT_GET_FULL_SNAPSHOT_ON_RECOVERY true -#define DEFAULT_TRACKED_REL_STORAGES "h,a,c" -#define DEFAULT_TRACKED_REL_KINDS "r,i,t,m,o,b,M" -#define DEFAULT_NAPTIME_SEC 60 - -#define MIN_BLOOM_SIZE 1 -#define MIN_DB_TRACK_COUNT 1 -#define MIN_DROPS_COUNT 1 -#define MIN_NAPTIME 1 - -#define MAX_BLOOM_SIZE 128000000 -#define MAX_DB_TRACK_COUNT 1000 -#define MAX_DROPS_COUNT 1000000 -#define MAX_NAPTIME OID_MAX & 0x7FFFFFFF - int bloom_size = DEFAULT_BLOOM_SIZE_BYTES; int db_track_count = DEFAULT_DB_TRACK_COUNT; bool is_tracked = DEFAULT_IS_TRACKED; diff --git a/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_worker.c b/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_worker.c index 2a7d48649e97..b573057053a2 100644 --- a/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_worker.c +++ b/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_worker.c @@ -275,9 +275,9 @@ arenadata_toolkit_main(Datum main_arg) if (!tf_shared_state->is_initialized && list_length(tracked_dbs) > 0) { track_dbs(tracked_dbs); - LWLockAcquire(tf_shared_state->state_lock, LW_EXCLUSIVE); + LWLockAcquire(tf_state_lock, LW_EXCLUSIVE); tf_shared_state->is_initialized = true; - LWLockRelease(tf_shared_state->state_lock); + LWLockRelease(tf_state_lock); } /* @@ -286,9 +286,9 @@ arenadata_toolkit_main(Datum main_arg) */ if (list_length(tracked_dbs) != bloom_set_count(&tf_shared_state->bloom_set)) { - LWLockAcquire(tf_shared_state->state_lock, LW_EXCLUSIVE); + LWLockAcquire(tf_state_lock, LW_EXCLUSIVE); tf_shared_state->has_error = true; - LWLockRelease(tf_shared_state->state_lock); + LWLockRelease(tf_state_lock); } if (tracked_dbs) diff --git a/gpcontrib/arenadata_toolkit/src/bloom.c b/gpcontrib/arenadata_toolkit/src/bloom.c index b1e24acc3f92..53d96bda2e13 100644 --- a/gpcontrib/arenadata_toolkit/src/bloom.c +++ b/gpcontrib/arenadata_toolkit/src/bloom.c @@ -5,15 +5,11 @@ #include -bloom_t * -bloom_init(const uint32_t bloom_size, void *mem) +void +bloom_init(const uint32_t bloom_size, bloom_t *bloom) { - bloom_t *bloom = mem; - bloom->size = bloom_size; bloom_clear(bloom); - - return bloom; } static uint32_t @@ -67,14 +63,10 @@ bloom_merge(bloom_t * dst, bloom_t * src) dst->is_set_all = src->is_set_all; } -bloom_t * -bloom_copy(bloom_t * bloom, void *mem) +void +bloom_copy(bloom_t * src, bloom_t *dest) { - bloom_t *copy; - - copy = bloom_init(bloom->size, mem); - memcpy(copy->map, bloom->map, bloom->size); - copy->is_set_all = bloom->is_set_all; - - return copy; + dest->size = src->size; + memcpy(dest->map, src->map, src->size); + dest->is_set_all = src->is_set_all; } diff --git a/gpcontrib/arenadata_toolkit/src/bloom_set.c b/gpcontrib/arenadata_toolkit/src/bloom_set.c index 93953a0015b0..51b52fe0a3f4 100644 --- a/gpcontrib/arenadata_toolkit/src/bloom_set.c +++ b/gpcontrib/arenadata_toolkit/src/bloom_set.c @@ -4,102 +4,78 @@ */ #include "bloom_set.h" +#include "tf_shmem.h" #include #define BLOOM_ENTRY_GET(set, i) (void *)(set->bloom_entries + i * FULL_BLOOM_ENTRY_SIZE(set->bloom_size)); -static bloom_entry_t * bloom_entry_init(const uint32_t bloom_size, void *mem) +static void +bloom_entry_init(const uint32_t bloom_size, bloom_entry_t *bloom_entry) { - bloom_entry_t *bloom_entry = mem; - - bloom_entry->lock = LWLockAssign(); bloom_entry->dbid = InvalidOid; - (void)bloom_init(bloom_size, &bloom_entry->bloom); - - return bloom_entry; + bloom_init(bloom_size, &bloom_entry->bloom); } -bloom_set_t * -bloom_set_init(const uint32_t bloom_count, const uint32_t bloom_size, void *mem) +void +bloom_set_init(const uint32_t bloom_count, const uint32_t bloom_size, bloom_set_t *bloom_set) { - bloom_set_t *bloom_set = mem; - bloom_set->bloom_count = bloom_count; bloom_set->bloom_size = bloom_size; - bloom_set->lock = LWLockAssign(); for (uint32_t i = 0; i < bloom_count; i++) { - void *bloom_entry_mem = BLOOM_ENTRY_GET(bloom_set, i); + bloom_entry_t *bloom_entry = BLOOM_ENTRY_GET(bloom_set, i); - (void)bloom_entry_init(bloom_size, bloom_entry_mem); + bloom_entry_init(bloom_size, bloom_entry); } - - return bloom_set; } -/* simple linear search, probably should be reworked (depends on target dbs count) */ -static bloom_entry_t * find_bloom_entry(bloom_set_t * bloom_set, Oid dbid, bool *found) +/* + * Finds the entry in bloom_set by given dbid. + * That's a simple linear search, probably should be reworked (depends on target dbs count). + */ +static bloom_entry_t * +find_bloom_entry(bloom_set_t * bloom_set, Oid dbid) { bloom_entry_t *bloom_entry; - int i; - - *found = false; + int i = 0; for (i = 0; i < bloom_set->bloom_count; i++) { bloom_entry = BLOOM_ENTRY_GET(bloom_set, i); - if (bloom_entry->dbid == dbid || bloom_entry->dbid == InvalidOid) + if (bloom_entry->dbid == dbid) break; } if (i == bloom_set->bloom_count) return NULL; - if (bloom_entry->dbid != InvalidOid) - *found = true; - return bloom_entry; } -/* bind not used filter to given dbid */ +/* Bind available filter to given dbid */ bool bloom_set_bind(bloom_set_t * bloom_set, Oid dbid) { bloom_entry_t *bloom_entry; - bool found; - - LWLockAcquire(bloom_set->lock, LW_SHARED); - bloom_entry = find_bloom_entry(bloom_set, dbid, &found); - LWLockRelease(bloom_set->lock); - if (found) - return true; - else if (!bloom_entry) + LWLockAcquire(bloom_set_lock, LW_EXCLUSIVE); + bloom_entry = find_bloom_entry(bloom_set, dbid); + if (bloom_entry) { - return false; + LWLockRelease(bloom_set_lock); + return true; } - - LWLockAcquire(bloom_set->lock, LW_EXCLUSIVE); - bloom_entry = find_bloom_entry(bloom_set, dbid, &found); - if (bloom_entry && !found) - LWLockAcquire(bloom_entry->lock, LW_EXCLUSIVE); - LWLockRelease(bloom_set->lock); - - if (!bloom_entry) + bloom_entry = find_bloom_entry(bloom_set, InvalidOid); + if (bloom_entry == NULL) { - elog(WARNING, "Our bloom filter was stolen :("); + LWLockRelease(bloom_set_lock); return false; } - - if (!found) - { - bloom_entry->dbid = dbid; - LWLockRelease(bloom_entry->lock); - elog(DEBUG1, "Bloom binded %d", dbid); - } - + bloom_entry->dbid = dbid; + LWLockBindEntry(dbid); + LWLockRelease(bloom_set_lock); return true; } @@ -108,58 +84,49 @@ bool bloom_set_trigger_bits(bloom_set_t * bloom_set, Oid dbid, bool on) { bloom_entry_t *bloom_entry; - bool found; + LWLock *entry_lock; - LWLockAcquire(bloom_set->lock, LW_SHARED); - { - bloom_entry = find_bloom_entry(bloom_set, dbid, &found); - } - LWLockRelease(bloom_set->lock); - - if (!found) - { - elog(LOG, "[arenadata toolkit] tracking_initial_snapshot Bloom filter not found"); - return false; - } - - LWLockAcquire(bloom_entry->lock, LW_EXCLUSIVE); + LWLockAcquire(bloom_set_lock, LW_SHARED); + entry_lock = LWLockAcquireEntry(dbid, LW_EXCLUSIVE); + bloom_entry = find_bloom_entry(bloom_set, dbid); + if (bloom_entry) { if (on) bloom_set_all(&bloom_entry->bloom); else bloom_clear(&bloom_entry->bloom); + if (entry_lock) + LWLockRelease(entry_lock); + LWLockRelease(bloom_set_lock); + return true; } - LWLockRelease(bloom_entry->lock); - return true; + if (entry_lock) + LWLockRelease(entry_lock); + LWLockRelease(bloom_set_lock); + + if (bloom_entry == NULL) + elog(LOG, "[arenadata toolkit] tracking_initial_snapshot Bloom filter not found"); + + return false; } -/* unbind used filter by given dbid */ +/* Unbind used filter by given dbid */ void bloom_set_unbind(bloom_set_t * bloom_set, Oid dbid) { bloom_entry_t *bloom_entry; - bool found; - - LWLockAcquire(bloom_set->lock, LW_SHARED); - bloom_entry = find_bloom_entry(bloom_set, dbid, &found); - LWLockRelease(bloom_set->lock); - if (!found) - return; - - LWLockAcquire(bloom_set->lock, LW_EXCLUSIVE); - bloom_entry = find_bloom_entry(bloom_set, dbid, &found); - if (bloom_entry && found) - LWLockAcquire(bloom_entry->lock, LW_EXCLUSIVE); - LWLockRelease(bloom_set->lock); - - if (found) + LWLockAcquire(bloom_set_lock, LW_EXCLUSIVE); + bloom_entry = find_bloom_entry(bloom_set, dbid); + if (bloom_entry == NULL) { - bloom_entry->dbid = InvalidOid; - bloom_clear(&bloom_entry->bloom); - LWLockRelease(bloom_entry->lock); - elog(DEBUG1, "Bloom unbinded %d", dbid); + LWLockRelease(bloom_set_lock); + return; } + bloom_entry->dbid = InvalidOid; + bloom_clear(&bloom_entry->bloom); + LWLockUnbindEntry(dbid); + LWLockRelease(bloom_set_lock); } uint64_t @@ -178,103 +145,98 @@ bloom_set_calc_hash(const void *buf, size_t len) return w_hash.i1 ^ w_hash.i2; } -/* find bloom by dbid, set bit based on relNode hash */ +/* Find bloom by dbid, set bit based on relNode hash */ void bloom_set_set(bloom_set_t * bloom_s, Oid dbid, Oid relNode) { bloom_entry_t *bloom_entry; - bool found; uint64_t hash; + LWLock *entry_lock; - LWLockAcquire(bloom_s->lock, LW_SHARED); - bloom_entry = find_bloom_entry(bloom_s, dbid, &found); - if (found) - LWLockAcquire(bloom_entry->lock, LW_EXCLUSIVE); - LWLockRelease(bloom_s->lock); - - if (!found) - return; - - hash = bloom_set_calc_hash(&relNode, sizeof(relNode)); - bloom_set(&bloom_entry->bloom, hash); - LWLockRelease(bloom_entry->lock); - - elog(DEBUG1, "Bloom set %d %d", dbid, relNode); + LWLockAcquire(bloom_set_lock, LW_SHARED); + entry_lock = LWLockAcquireEntry(dbid, LW_EXCLUSIVE); + bloom_entry = find_bloom_entry(bloom_s, dbid); + if (bloom_entry) + { + hash = bloom_set_calc_hash(&relNode, sizeof(relNode)); + bloom_set(&bloom_entry->bloom, hash); + } + if (entry_lock) + LWLockRelease(entry_lock); + LWLockRelease(bloom_set_lock); } -/* find bloom by dbid, copy all bytes to new filter, clear old (but keep it) */ -bloom_t * -bloom_set_move(bloom_set_t * bloom_set, Oid dbid, void *mem) +/* Find bloom by dbid, copy all bytes to new filter, clear old (but keep it) */ +bool +bloom_set_move(bloom_set_t * bloom_set, Oid dbid, bloom_t *dest) { bloom_entry_t *bloom_entry; - bool found; - bloom_t *copy; - - LWLockAcquire(bloom_set->lock, LW_SHARED); - bloom_entry = find_bloom_entry(bloom_set, dbid, &found); - if (found) - LWLockAcquire(bloom_entry->lock, LW_EXCLUSIVE); - LWLockRelease(bloom_set->lock); - - /* no bloom for the database */ - if (!found) - return NULL; + LWLock *entry_lock; - copy = bloom_copy(&bloom_entry->bloom, mem); - bloom_clear(&bloom_entry->bloom); - LWLockRelease(bloom_entry->lock); - - elog(DEBUG1, "Bloom moved %d", dbid); + LWLockAcquire(bloom_set_lock, LW_SHARED); + entry_lock = LWLockAcquireEntry(dbid, LW_EXCLUSIVE); + bloom_entry = find_bloom_entry(bloom_set, dbid); + if (bloom_entry) + { + bloom_copy(&bloom_entry->bloom, dest); + bloom_clear(&bloom_entry->bloom); + if (entry_lock) + LWLockRelease(entry_lock); + LWLockRelease(bloom_set_lock); + return true; + } + if (entry_lock) + LWLockRelease(entry_lock); + LWLockRelease(bloom_set_lock); - return copy; + return false; } -/* find bloom by dbid, merge bytes from another bloom to it */ +/* Find bloom by dbid, merge bytes from another bloom to it */ bool bloom_set_merge(bloom_set_t * bloom_set, Oid dbid, bloom_t * m_bloom) { bloom_entry_t *bloom_entry; - bool found; - - if (!m_bloom) - return false; - - LWLockAcquire(bloom_set->lock, LW_SHARED); - bloom_entry = find_bloom_entry(bloom_set, dbid, &found); - if (found) - LWLockAcquire(bloom_entry->lock, LW_EXCLUSIVE); - LWLockRelease(bloom_set->lock); + LWLock *entry_lock; - if (!found) + if (!m_bloom || !bloom_set) return false; - bloom_merge(&bloom_entry->bloom, m_bloom); - LWLockRelease(bloom_entry->lock); - - elog(DEBUG1, "Bloom merged %d", dbid); + LWLockAcquire(bloom_set_lock, LW_SHARED); + entry_lock = LWLockAcquireEntry(dbid, LW_EXCLUSIVE); + bloom_entry = find_bloom_entry(bloom_set, dbid); + if (bloom_entry) + { + bloom_merge(&bloom_entry->bloom, m_bloom); + if (entry_lock) + LWLockRelease(entry_lock); + LWLockRelease(bloom_set_lock); + return true; + } + if (entry_lock) + LWLockRelease(entry_lock); + LWLockRelease(bloom_set_lock); - return true; + return false; } bool bloom_set_is_all_bits_triggered(bloom_set_t * bloom_set, Oid dbid) { bloom_entry_t *bloom_entry; - bool found; - bool is_triggered; - - LWLockAcquire(bloom_set->lock, LW_SHARED); - bloom_entry = find_bloom_entry(bloom_set, dbid, &found); - LWLockRelease(bloom_set->lock); + bool is_triggered = false; + LWLock *entry_lock; - if (!found) + LWLockAcquire(bloom_set_lock, LW_SHARED); + entry_lock = LWLockAcquireEntry(dbid, LW_SHARED); + bloom_entry = find_bloom_entry(bloom_set, dbid); + if (bloom_entry) { - return false; + is_triggered = bloom_entry->bloom.is_set_all; } - - LWLockAcquire(bloom_entry->lock, LW_SHARED); - is_triggered = bloom_entry->bloom.is_set_all; - LWLockRelease(bloom_entry->lock); + if (entry_lock) + LWLockRelease(entry_lock); + LWLockRelease(bloom_set_lock); return is_triggered; } @@ -285,15 +247,13 @@ bloom_set_count(bloom_set_t * bloom_set) int count = 0; bloom_entry_t *bloom_entry; - LWLockAcquire(bloom_set->lock, LW_SHARED); + LWLockAcquire(bloom_set_lock, LW_EXCLUSIVE); for (int i = 0; i < bloom_set->bloom_count; ++i) { bloom_entry = BLOOM_ENTRY_GET(bloom_set, i); - LWLockAcquire(bloom_entry->lock, LW_SHARED); if (bloom_entry->dbid != InvalidOid) ++count; - LWLockRelease(bloom_entry->lock); } - LWLockRelease(bloom_set->lock); + LWLockRelease(bloom_set_lock); return count; } diff --git a/gpcontrib/arenadata_toolkit/src/include/arenadata_toolkit_guc.h b/gpcontrib/arenadata_toolkit/src/include/arenadata_toolkit_guc.h index 155efe645fb5..dc443b79b412 100644 --- a/gpcontrib/arenadata_toolkit/src/include/arenadata_toolkit_guc.h +++ b/gpcontrib/arenadata_toolkit/src/include/arenadata_toolkit_guc.h @@ -3,6 +3,26 @@ #include "postgres.h" +#define DEFAULT_BLOOM_SIZE_BYTES 1000000 +#define DEFAULT_DB_TRACK_COUNT 5 +#define DEFAULT_IS_TRACKED false +#define DEFAULT_DROPS_COUNT 100000 +#define DEFAULT_TRACKED_SCHEMAS "public,arenadata_toolkit,pg_catalog,pg_toast,pg_aoseg,information_schema" +#define DEFAULT_GET_FULL_SNAPSHOT_ON_RECOVERY true +#define DEFAULT_TRACKED_REL_STORAGES "h,a,c" +#define DEFAULT_TRACKED_REL_KINDS "r,i,t,m,o,b,M" +#define DEFAULT_NAPTIME_SEC 60 + +#define MIN_BLOOM_SIZE 1 +#define MIN_DB_TRACK_COUNT 1 +#define MIN_DROPS_COUNT 1 +#define MIN_NAPTIME 1 + +#define MAX_BLOOM_SIZE 128000000 +#define MAX_DB_TRACK_COUNT 1000 +#define MAX_DROPS_COUNT 1000000 +#define MAX_NAPTIME OID_MAX & 0x7FFFFFFF + extern int bloom_size; extern int db_track_count; extern int drops_count; diff --git a/gpcontrib/arenadata_toolkit/src/include/bloom.h b/gpcontrib/arenadata_toolkit/src/include/bloom.h index 598d044ad2ce..34d800784b41 100644 --- a/gpcontrib/arenadata_toolkit/src/include/bloom.h +++ b/gpcontrib/arenadata_toolkit/src/include/bloom.h @@ -12,12 +12,12 @@ typedef struct char map[] /* filter itself, array of bytes */ ; } bloom_t; -bloom_t *bloom_init(const uint32_t bloom_size, void *mem); +void bloom_init(const uint32_t bloom_size, bloom_t *bloom); int bloom_isset(bloom_t * bloom, uint64_t hash); void bloom_set(bloom_t * bloom, uint64_t hash); void bloom_set_all(bloom_t * bloom); void bloom_clear(bloom_t * bloom); void bloom_merge(bloom_t * dst, bloom_t * src); -bloom_t *bloom_copy(bloom_t * bloom, void *mem); +void bloom_copy(bloom_t * src, bloom_t *dest); #endif /* BLOOM_H */ diff --git a/gpcontrib/arenadata_toolkit/src/include/bloom_set.h b/gpcontrib/arenadata_toolkit/src/include/bloom_set.h index 83ef901e1dc3..500a547b2221 100644 --- a/gpcontrib/arenadata_toolkit/src/include/bloom_set.h +++ b/gpcontrib/arenadata_toolkit/src/include/bloom_set.h @@ -3,8 +3,6 @@ #include "postgres.h" -#include "storage/lwlock.h" - #include "bloom.h" #define FULL_BLOOM_ENTRY_SIZE(size) (offsetof(bloom_entry_t, bloom) + FULL_BLOOM_SIZE(size)) @@ -13,7 +11,6 @@ /* bloom filter extended by dbid */ typedef struct { - LWLock *lock; Oid dbid; /* dbid if binded, InvalidOid if unbinded */ bloom_t bloom; } bloom_entry_t; @@ -21,19 +18,18 @@ typedef struct /* static set of all bloom filters */ typedef struct { - LWLock *lock; uint8_t bloom_count; /* count of bloom_entry_t in bloom_entries */ uint32_t bloom_size; /* size of bloom filter */ char bloom_entries[FLEXIBLE_ARRAY_MEMBER]; /* array of * bloom_entry_t */ } bloom_set_t; -bloom_set_t *bloom_set_init(const uint32_t bloom_count, const uint32_t bloom_size, void *mem); +void bloom_set_init(const uint32_t bloom_count, const uint32_t bloom_size, bloom_set_t *bloom_set); bool bloom_set_bind(bloom_set_t * bloom_set, Oid dbid); void bloom_set_unbind(bloom_set_t * bloom_set, Oid dbid); uint64_t bloom_set_calc_hash(const void *buf, size_t len); void bloom_set_set(bloom_set_t * bloom_set, Oid dbid, Oid relNode); -bloom_t *bloom_set_move(bloom_set_t * bloom_set, Oid dbid, void *mem); +bool bloom_set_move(bloom_set_t * bloom_set, Oid dbid, bloom_t *dest); bool bloom_set_merge(bloom_set_t * bloom_set, Oid dbid, bloom_t * m_bloom); bool bloom_set_trigger_bits(bloom_set_t * bloom_set, Oid dbid, bool on); bool bloom_set_is_all_bits_triggered(bloom_set_t * bloom_set, Oid dbid); diff --git a/gpcontrib/arenadata_toolkit/src/include/tf_shmem.h b/gpcontrib/arenadata_toolkit/src/include/tf_shmem.h index defaefea1dab..5039eb4baa58 100644 --- a/gpcontrib/arenadata_toolkit/src/include/tf_shmem.h +++ b/gpcontrib/arenadata_toolkit/src/include/tf_shmem.h @@ -1,19 +1,32 @@ #ifndef TF_SHMEM_H #define TF_SHMEM_H +#include "storage/lwlock.h" + #include "bloom_set.h" typedef struct { - LWLock *state_lock; bool has_error; bool is_initialized; bloom_set_t bloom_set; } tf_shared_state_t; +typedef struct +{ + Oid dbid; + LWLock *lock; +} tf_entry_lock_t; + extern tf_shared_state_t * tf_shared_state; +extern LWLock *tf_state_lock; +extern LWLock *bloom_set_lock; +extern tf_entry_lock_t bloom_locks[]; void tf_shmem_init(void); void tf_shmem_deinit(void); +LWLock * LWLockAcquireEntry(Oid dbid, LWLockMode mode); +void LWLockBindEntry(Oid dbid); +void LWLockUnbindEntry(Oid dbid); #endif /* TF_SHMEM_H */ diff --git a/gpcontrib/arenadata_toolkit/src/tf_shmem.c b/gpcontrib/arenadata_toolkit/src/tf_shmem.c index a1996a687dd9..aff5226389f9 100644 --- a/gpcontrib/arenadata_toolkit/src/tf_shmem.c +++ b/gpcontrib/arenadata_toolkit/src/tf_shmem.c @@ -1,13 +1,29 @@ +#include "bloom_set.h" #include "tf_shmem.h" #include "storage/ipc.h" #include "storage/shmem.h" #include "arenadata_toolkit_guc.h" -#include "bloom_set.h" static shmem_startup_hook_type next_shmem_startup_hook = NULL; tf_shared_state_t *tf_shared_state; +LWLock *tf_state_lock; +LWLock *bloom_set_lock; +tf_entry_lock_t bloom_locks[MAX_DB_TRACK_COUNT]; + +static void +init_lwlocks(void) +{ + tf_state_lock = LWLockAssign(); + bloom_set_lock = LWLockAssign(); + + for (int i = 0; i < db_track_count; ++i) + { + bloom_locks[i].lock = LWLockAssign(); + bloom_locks[i].dbid = InvalidOid; + } +} static Size tf_shmem_calc_size(void) @@ -26,16 +42,21 @@ tf_shmem_hook(void) bool found; Size size = tf_shmem_calc_size(); + LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE); + tf_shared_state = ShmemInitStruct("toolkit_track_files", size, &found); if (!found) { tf_shared_state->is_initialized = false; tf_shared_state->has_error = false; - tf_shared_state->state_lock = LWLockAssign(); bloom_set_init(db_track_count, bloom_size, &tf_shared_state->bloom_set); } + init_lwlocks(); + + LWLockRelease(AddinShmemInitLock); + if (next_shmem_startup_hook) next_shmem_startup_hook(); } @@ -56,3 +77,61 @@ tf_shmem_deinit(void) { shmem_startup_hook = next_shmem_startup_hook; } + +LWLock * +LWLockAcquireEntry(Oid dbid, LWLockMode mode) +{ + LWLockAcquire(tf_state_lock, LW_SHARED); + for (int i = 0; i < db_track_count; ++i) + { + if (bloom_locks[i].dbid == dbid) + { + LWLockAcquire(bloom_locks[i].lock, mode); + LWLockRelease(tf_state_lock); + return bloom_locks[i].lock; + } + } + LWLockRelease(tf_state_lock); + + return NULL; +} + + +void +LWLockBindEntry(Oid dbid) +{ + int i; + LWLockAcquire(tf_state_lock, LW_EXCLUSIVE); + for (i = 0; i < db_track_count; ++i) + { + if (bloom_locks[i].dbid == InvalidOid) + { + bloom_locks[i].dbid = dbid; + break; + } + } + + if (i == db_track_count) + tf_shared_state->has_error = true; + LWLockRelease(tf_state_lock); +} + +void +LWLockUnbindEntry(Oid dbid) +{ + int i; + LWLockAcquire(tf_state_lock, LW_EXCLUSIVE); + for (i = 0; i < db_track_count; ++i) + { + if (bloom_locks[i].dbid == dbid) + { + bloom_locks[i].dbid = InvalidOid; + break; + } + } + + if (i == db_track_count) + tf_shared_state->has_error = true; + + LWLockRelease(tf_state_lock); +} diff --git a/gpcontrib/arenadata_toolkit/src/track_files.c b/gpcontrib/arenadata_toolkit/src/track_files.c index 89c1f96a8901..a93aaa58d843 100644 --- a/gpcontrib/arenadata_toolkit/src/track_files.c +++ b/gpcontrib/arenadata_toolkit/src/track_files.c @@ -346,11 +346,11 @@ tracking_get_track_main(PG_FUNCTION_ARGS) tf_check_shmem_error(); - LWLockAcquire(tf_shared_state->state_lock, LW_SHARED); + LWLockAcquire(tf_state_lock, LW_SHARED); if (tf_shared_state->has_error) ereport(ERROR, (errmsg("Can't perform tracking for database %u properly due to internal error", MyDatabaseId))); - LWLockRelease(tf_shared_state->state_lock); + LWLockRelease(tf_state_lock); if (SRF_IS_FIRSTCALL()) { @@ -443,10 +443,10 @@ tracking_get_track_main(PG_FUNCTION_ARGS) funcctx = SRF_PERCALL_SETUP(); state = funcctx->user_fctx; - LWLockAcquire(tf_shared_state->state_lock, LW_SHARED); + LWLockAcquire(tf_state_lock, LW_SHARED); if (!tf_shared_state->is_initialized) { - LWLockRelease(tf_shared_state->state_lock); + LWLockRelease(tf_state_lock); systable_endscan(state->scan); heap_close(state->pg_class_rel, AccessShareLock); state->scan = NULL; @@ -454,7 +454,7 @@ tracking_get_track_main(PG_FUNCTION_ARGS) elog(WARNING, "Nothing to return from segment %d due to uninitialized status of Bloom filter", GpIdentity.segindex); SRF_RETURN_DONE(funcctx); } - LWLockRelease(tf_shared_state->state_lock); + LWLockRelease(tf_state_lock); while (true) { @@ -1276,9 +1276,9 @@ tracking_is_segment_initialized(PG_FUNCTION_ARGS) /* Populate an output tuple. */ values[0] = Int32GetDatum(GpIdentity.segindex); - LWLockAcquire(tf_shared_state->state_lock, LW_SHARED); + LWLockAcquire(tf_state_lock, LW_SHARED); values[1] = BoolGetDatum(tf_shared_state->is_initialized); - LWLockRelease(tf_shared_state->state_lock); + LWLockRelease(tf_state_lock); tuple = heap_form_tuple(tupdesc, values, nulls); result = HeapTupleGetDatum(tuple); From d5fdfc4454e1414503f430ecff3d81a884d48f80 Mon Sep 17 00:00:00 2001 From: Alexander Kondakov Date: Tue, 22 Oct 2024 09:56:57 +0300 Subject: [PATCH 14/69] Calculate factual timeout in bgworker loop --- .../src/arenadata_toolkit_worker.c | 73 ++++++++++++------- 1 file changed, 45 insertions(+), 28 deletions(-) diff --git a/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_worker.c b/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_worker.c index b573057053a2..f481536293ae 100644 --- a/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_worker.c +++ b/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_worker.c @@ -233,10 +233,47 @@ track_dbs(List *tracked_dbs) } } +static void +worker_tracking_status_check() +{ + List *tracked_dbs = NIL; + + StartTransactionCommand(); + tracked_dbs = get_tracked_dbs(); + + if (!tf_shared_state->is_initialized && list_length(tracked_dbs) > 0) + { + track_dbs(tracked_dbs); + LWLockAcquire(tf_state_lock, LW_EXCLUSIVE); + tf_shared_state->is_initialized = true; + LWLockRelease(tf_state_lock); + } + + /* + * Here is quite a dump check, which imitates consistency validation. + * Written as an example of segment erroneous tracking status. + */ + if (list_length(tracked_dbs) != bloom_set_count(&tf_shared_state->bloom_set)) + { + LWLockAcquire(tf_state_lock, LW_EXCLUSIVE); + tf_shared_state->has_error = true; + LWLockRelease(tf_state_lock); + } + + if (tracked_dbs) + list_free_deep(tracked_dbs); + + CommitTransactionCommand(); +} + /* scan pg_db_role_setting, find all databases, bind blooms if necessary */ void arenadata_toolkit_main(Datum main_arg) { + instr_time current_time_timeout; + instr_time start_time_timeout; + long current_timeout = -1; + elog(LOG, "[arenadata toolkit] Starting background worker"); /* @@ -267,37 +304,13 @@ arenadata_toolkit_main(Datum main_arg) while (!got_sigterm) { int rc; - List *tracked_dbs = NIL; - - StartTransactionCommand(); - tracked_dbs = get_tracked_dbs(); - - if (!tf_shared_state->is_initialized && list_length(tracked_dbs) > 0) - { - track_dbs(tracked_dbs); - LWLockAcquire(tf_state_lock, LW_EXCLUSIVE); - tf_shared_state->is_initialized = true; - LWLockRelease(tf_state_lock); - } - - /* - * Here is quite a dump check, which imitates consistency validation. - * Written as an example of segment erroneous tracking status. - */ - if (list_length(tracked_dbs) != bloom_set_count(&tf_shared_state->bloom_set)) - { - LWLockAcquire(tf_state_lock, LW_EXCLUSIVE); - tf_shared_state->has_error = true; - LWLockRelease(tf_state_lock); - } - - if (tracked_dbs) - list_free_deep(tracked_dbs); + long timeout = tracking_worker_naptime_sec * 1000; - CommitTransactionCommand(); + if (current_timeout <= 0) + INSTR_TIME_SET_CURRENT(start_time_timeout); rc = WaitLatch(&MyProc->procLatch, WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, - tracking_worker_naptime_sec * 1000); + timeout); if (rc & WL_LATCH_SET) { @@ -319,6 +332,10 @@ arenadata_toolkit_main(Datum main_arg) ProcessConfigFile(PGC_SIGHUP); } + INSTR_TIME_SET_CURRENT(current_time_timeout); + INSTR_TIME_SUBTRACT(current_time_timeout, start_time_timeout); + current_timeout = timeout - (long) INSTR_TIME_GET_MILLISEC(current_time_timeout); + if (current_timeout <= 0) worker_tracking_status_check(); } if (got_sigterm) From 76849ef710bfc7657062639d19753cd5b04817da Mon Sep 17 00:00:00 2001 From: Alexander Kondakov Date: Wed, 23 Oct 2024 22:00:59 +0300 Subject: [PATCH 15/69] add atomics --- .../sql/arenadata_toolkit_tracking.sql | 2 +- .../src/arenadata_toolkit_worker.c | 12 +++++------- .../arenadata_toolkit/src/include/tf_shmem.h | 7 +++++-- gpcontrib/arenadata_toolkit/src/tf_shmem.c | 12 ++++++------ gpcontrib/arenadata_toolkit/src/track_files.c | 15 ++++----------- 5 files changed, 21 insertions(+), 27 deletions(-) diff --git a/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_tracking.sql b/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_tracking.sql index a592c5a08a0d..71f2193a2f2e 100644 --- a/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_tracking.sql +++ b/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_tracking.sql @@ -2,7 +2,7 @@ -- start_ignore \! gpconfig -c shared_preload_libraries -v 'arenadata_toolkit' \! gpstop -raq -M fast -\! gpconfig -c arenadata_toolkit.tracking_worker_naptime_sec -v '1' +\! gpconfig -c arenadata_toolkit.tracking_worker_naptime_sec -v '5' \! gpstop -u \c -- end_ignore diff --git a/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_worker.c b/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_worker.c index f481536293ae..1a8e2b69c387 100644 --- a/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_worker.c +++ b/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_worker.c @@ -241,12 +241,11 @@ worker_tracking_status_check() StartTransactionCommand(); tracked_dbs = get_tracked_dbs(); - if (!tf_shared_state->is_initialized && list_length(tracked_dbs) > 0) + if (pg_atomic_unlocked_test_flag(&tf_shared_state->tracking_is_initialized) && list_length(tracked_dbs) > 0) { track_dbs(tracked_dbs); - LWLockAcquire(tf_state_lock, LW_EXCLUSIVE); - tf_shared_state->is_initialized = true; - LWLockRelease(tf_state_lock); + + pg_atomic_test_set_flag(&tf_shared_state->tracking_is_initialized); } /* @@ -255,9 +254,8 @@ worker_tracking_status_check() */ if (list_length(tracked_dbs) != bloom_set_count(&tf_shared_state->bloom_set)) { - LWLockAcquire(tf_state_lock, LW_EXCLUSIVE); - tf_shared_state->has_error = true; - LWLockRelease(tf_state_lock); + if (pg_atomic_unlocked_test_flag(&tf_shared_state->tracking_error)) + pg_atomic_test_set_flag(&tf_shared_state->tracking_error); } if (tracked_dbs) diff --git a/gpcontrib/arenadata_toolkit/src/include/tf_shmem.h b/gpcontrib/arenadata_toolkit/src/include/tf_shmem.h index 5039eb4baa58..ac3aa95b0d13 100644 --- a/gpcontrib/arenadata_toolkit/src/include/tf_shmem.h +++ b/gpcontrib/arenadata_toolkit/src/include/tf_shmem.h @@ -2,13 +2,14 @@ #define TF_SHMEM_H #include "storage/lwlock.h" +#include "port/atomics.h" #include "bloom_set.h" typedef struct { - bool has_error; - bool is_initialized; + pg_atomic_flag tracking_is_initialized; + pg_atomic_flag tracking_error; bloom_set_t bloom_set; } tf_shared_state_t; @@ -22,6 +23,8 @@ extern tf_shared_state_t * tf_shared_state; extern LWLock *tf_state_lock; extern LWLock *bloom_set_lock; extern tf_entry_lock_t bloom_locks[]; +extern volatile pg_atomic_flag tracking_is_initialized; +extern volatile pg_atomic_flag tracking_error; void tf_shmem_init(void); void tf_shmem_deinit(void); diff --git a/gpcontrib/arenadata_toolkit/src/tf_shmem.c b/gpcontrib/arenadata_toolkit/src/tf_shmem.c index aff5226389f9..de10f1b87f07 100644 --- a/gpcontrib/arenadata_toolkit/src/tf_shmem.c +++ b/gpcontrib/arenadata_toolkit/src/tf_shmem.c @@ -48,8 +48,8 @@ tf_shmem_hook(void) if (!found) { - tf_shared_state->is_initialized = false; - tf_shared_state->has_error = false; + pg_atomic_init_flag(&tf_shared_state->tracking_is_initialized); + pg_atomic_init_flag(&tf_shared_state->tracking_error); bloom_set_init(db_track_count, bloom_size, &tf_shared_state->bloom_set); } @@ -111,8 +111,8 @@ LWLockBindEntry(Oid dbid) } } - if (i == db_track_count) - tf_shared_state->has_error = true; + if (i == db_track_count && pg_atomic_unlocked_test_flag(&tf_shared_state->tracking_error)) + pg_atomic_test_set_flag(&tf_shared_state->tracking_error); LWLockRelease(tf_state_lock); } @@ -130,8 +130,8 @@ LWLockUnbindEntry(Oid dbid) } } - if (i == db_track_count) - tf_shared_state->has_error = true; + if (i == db_track_count && pg_atomic_unlocked_test_flag(&tf_shared_state->tracking_error)) + pg_atomic_test_set_flag(&tf_shared_state->tracking_error); LWLockRelease(tf_state_lock); } diff --git a/gpcontrib/arenadata_toolkit/src/track_files.c b/gpcontrib/arenadata_toolkit/src/track_files.c index a93aaa58d843..30ca0349c310 100644 --- a/gpcontrib/arenadata_toolkit/src/track_files.c +++ b/gpcontrib/arenadata_toolkit/src/track_files.c @@ -346,11 +346,9 @@ tracking_get_track_main(PG_FUNCTION_ARGS) tf_check_shmem_error(); - LWLockAcquire(tf_state_lock, LW_SHARED); - if (tf_shared_state->has_error) + if (!pg_atomic_unlocked_test_flag(&tf_shared_state->tracking_error)) ereport(ERROR, (errmsg("Can't perform tracking for database %u properly due to internal error", MyDatabaseId))); - LWLockRelease(tf_state_lock); if (SRF_IS_FIRSTCALL()) { @@ -443,18 +441,15 @@ tracking_get_track_main(PG_FUNCTION_ARGS) funcctx = SRF_PERCALL_SETUP(); state = funcctx->user_fctx; - LWLockAcquire(tf_state_lock, LW_SHARED); - if (!tf_shared_state->is_initialized) + if (pg_atomic_unlocked_test_flag(&tf_shared_state->tracking_is_initialized)) { - LWLockRelease(tf_state_lock); systable_endscan(state->scan); heap_close(state->pg_class_rel, AccessShareLock); state->scan = NULL; state->pg_class_rel = NULL; - elog(WARNING, "Nothing to return from segment %d due to uninitialized status of Bloom filter", GpIdentity.segindex); + elog(LOG, "Nothing to return from segment %d due to uninitialized status of Bloom filter", GpIdentity.segindex); SRF_RETURN_DONE(funcctx); } - LWLockRelease(tf_state_lock); while (true) { @@ -1276,9 +1271,7 @@ tracking_is_segment_initialized(PG_FUNCTION_ARGS) /* Populate an output tuple. */ values[0] = Int32GetDatum(GpIdentity.segindex); - LWLockAcquire(tf_state_lock, LW_SHARED); - values[1] = BoolGetDatum(tf_shared_state->is_initialized); - LWLockRelease(tf_state_lock); + values[1] = BoolGetDatum(pg_atomic_unlocked_test_flag(&tf_shared_state->tracking_is_initialized) == false); tuple = heap_form_tuple(tupdesc, values, nulls); result = HeapTupleGetDatum(tuple); From 96ac88032a5497841791538bdd7eb4d6b229b3c2 Mon Sep 17 00:00:00 2001 From: Alexander Kondakov Date: Thu, 24 Oct 2024 15:12:20 +0300 Subject: [PATCH 16/69] Soft size calculation --- gpcontrib/arenadata_toolkit/src/dbsize.c | 169 +++++++++++++++--- .../arenadata_toolkit/src/include/dbsize.h | 4 +- .../arenadata_toolkit/src/include/tf_shmem.h | 2 - gpcontrib/arenadata_toolkit/src/track_files.c | 4 +- 4 files changed, 153 insertions(+), 26 deletions(-) diff --git a/gpcontrib/arenadata_toolkit/src/dbsize.c b/gpcontrib/arenadata_toolkit/src/dbsize.c index 004117659904..200db9b8c208 100644 --- a/gpcontrib/arenadata_toolkit/src/dbsize.c +++ b/gpcontrib/arenadata_toolkit/src/dbsize.c @@ -9,6 +9,7 @@ #include "access/heapam.h" #include "cdb/cdbvars.h" #include "common/relpath.h" +#include "catalog/namespace.h" #include "fmgr.h" #include "funcapi.h" #include "miscadmin.h" @@ -35,14 +36,16 @@ PG_MODULE_MAGIC; */ #define MAXPATHLEN_WITHSEGNO (MAXPGPATH + 12) -static int64 calculate_relation_size(Relation rel, ForkNumber forknum); +static int64 calculate_relation_size(Relation rel, ForkNumber forknum, bool softCalc); static int64 get_heap_storage_total_bytes(Relation rel, ForkNumber forknum, char *relpath); -static int64 get_ao_storage_total_bytes(Relation rel, char *relpath); +static int64 get_ao_storage_total_bytes(Relation rel, char *relpath, bool softCalc); static bool calculate_ao_storage_perSegFile(const int segno, void *ctx); static void fill_relation_seg_path(char *buf, int bufLen, const char *relpath, int segNo); static int64 calculate_toast_table_size(Oid toastrelid, ForkNumber forknum); +static int64 get_heap_storage_total_bytes_soft(Relation rel, ForkNumber forknum, char *relpath); +static bool calculate_ao_storage_perSegFile_soft(const int segno, void *ctx); /* * Structure used to accumulate the size of AO/CO relation from callback. @@ -77,7 +80,7 @@ adb_relation_storage_size(PG_FUNCTION_ARGS) if (relOid == 0 || rel->rd_node.relNode == 0) size = 0; else - size = calculate_relation_size(rel, forkNumber); + size = calculate_relation_size(rel, forkNumber, false); if (Gp_role == GP_ROLE_DISPATCH) { @@ -104,9 +107,14 @@ adb_relation_storage_size(PG_FUNCTION_ARGS) * dbsize.c. Thus calculation of size for heap/AO/CO relations is supported * (AO/CO relations don't have any extra forks, so only main fork is supported) * In other cases zero value is returned. + * + * softCalc parameter stands for indicator whether one can ignore stat() call + * errors. In this mode rel is not fully initialized and the lock on relation + * is not acquired. Therefore, the size calculation will be fuzzy since + * anything could happen with segment files. */ static int64 -calculate_relation_size(Relation rel, ForkNumber forknum) +calculate_relation_size(Relation rel, ForkNumber forknum, bool softCalc) { bool isAOMainFork = RelationIsAppendOptimized(rel) && forknum == MAIN_FORKNUM; @@ -116,9 +124,9 @@ calculate_relation_size(Relation rel, ForkNumber forknum) char *relpath = relpathbackend(rel->rd_node, rel->rd_backend, forknum); if (RelationIsHeap(rel)) - return get_heap_storage_total_bytes(rel, forknum, relpath); + return softCalc ? get_heap_storage_total_bytes_soft(rel, forknum, relpath) : get_heap_storage_total_bytes(rel, forknum, relpath); - return get_ao_storage_total_bytes(rel, relpath); + return get_ao_storage_total_bytes(rel, relpath, softCalc); } static void @@ -165,7 +173,7 @@ static int64 calculate_toast_table_size(Oid toastrelid, ForkNumber forknum) { Relation toastRel = relation_open(toastrelid, AccessShareLock); - int64 size = calculate_relation_size(toastRel, forknum); + int64 size = calculate_relation_size(toastRel, forknum, false); relation_close(toastRel, AccessShareLock); return size; @@ -209,9 +217,14 @@ get_heap_storage_total_bytes(Relation rel, ForkNumber forknum, char *relpath) /* * Function calculates the size of AO/CO tables. + * + * softCalc parameter stands for indicator whether one can ignore stat() call + * errors. In this mode rel is not fully initialized and the lock on relation + * is not acquired. Therefore, the size calculation will be fuzzy since + * anything could happen with segment files. */ static int64 -get_ao_storage_total_bytes(Relation rel, char *relpath) +get_ao_storage_total_bytes(Relation rel, char *relpath, bool softCalc) { struct calculate_ao_storage_callback_ctx ctx = { .relfilenode_path = relpath, @@ -225,9 +238,17 @@ get_ao_storage_total_bytes(Relation rel, char *relpath) * operations (for ex: CTAS) zero segment will store tuples). Thus * calculate segno=0 manually. */ - (void) calculate_ao_storage_perSegFile(0, &ctx); + if (softCalc) + { + (void) calculate_ao_storage_perSegFile_soft(0, &ctx); + ao_foreach_extent_file(calculate_ao_storage_perSegFile_soft, &ctx); + } + else + { + (void) calculate_ao_storage_perSegFile(0, &ctx); + ao_foreach_extent_file(calculate_ao_storage_perSegFile, &ctx); + } - ao_foreach_extent_file(calculate_ao_storage_perSegFile, &ctx); return ctx.total_size; } @@ -373,28 +394,132 @@ Datum adb_hba_file_rules(PG_FUNCTION_ARGS) return pg_hba_file_rules(fcinfo); } /* - * Calculates relation size among all the forks. + * Calculates relation size among all the forks. No lock is acquired on table. + * RelationData is partially initialized. Only necessary fields are taken from + * pg_class tuple to determine segment file location. */ int64 -dbsize_calc_size(Oid relid) +dbsize_calc_size(Form_pg_class pg_class_data) { - Relation rel; + RelationData rel; int64 size = 0; - rel = try_relation_open(relid, AccessShareLock, false); + memset(&rel, 0, sizeof(RelationData)); + /* + * Initialize Relfilenode field of RelationData. + */ + if (pg_class_data->reltablespace) + rel.rd_node.spcNode = pg_class_data->reltablespace; + else + rel.rd_node.spcNode = MyDatabaseTableSpace; + if (rel.rd_node.spcNode == GLOBALTABLESPACE_OID) + rel.rd_node.dbNode = InvalidOid; + else + rel.rd_node.dbNode = MyDatabaseId; - if (rel == NULL) - return size; + if (pg_class_data->relfilenode) + rel.rd_node.relNode = pg_class_data->relfilenode; - if (rel->rd_node.relNode == 0) + if (rel.rd_node.relNode == 0) return size; - size += calculate_relation_size(rel, MAIN_FORKNUM); - size += calculate_relation_size(rel, FSM_FORKNUM); - size += calculate_relation_size(rel, VISIBILITYMAP_FORKNUM); - size += calculate_relation_size(rel, INIT_FORKNUM); + rel.rd_rel = pg_class_data; - relation_close(rel, AccessShareLock); + /* + * Initialize BackendIf field of RelationData. + */ + switch (rel.rd_rel->relpersistence) + { + case RELPERSISTENCE_UNLOGGED: + case RELPERSISTENCE_PERMANENT: + rel.rd_backend = InvalidBackendId; + rel.rd_islocaltemp = false; + break; + case RELPERSISTENCE_TEMP: + if (isTempOrToastNamespace(rel.rd_rel->relnamespace)) + { + rel.rd_backend = TempRelBackendId; + rel.rd_islocaltemp = true; + } + else + { + rel.rd_backend = TempRelBackendId; + rel.rd_islocaltemp = false; + } + break; + default: + elog(ERROR, "invalid relpersistence: %c", + rel.rd_rel->relpersistence); + break; + } + + size += calculate_relation_size(&rel, MAIN_FORKNUM, true); + size += calculate_relation_size(&rel, FSM_FORKNUM, true); + size += calculate_relation_size(&rel, VISIBILITYMAP_FORKNUM, true); + size += calculate_relation_size(&rel, INIT_FORKNUM, true); return size; } + +/* + * Function calculates the size of heap tables. + * + * The errors of stat() call are ignored + */ +static int64 +get_heap_storage_total_bytes_soft(Relation rel, ForkNumber forknum, char *relpath) +{ + int64 totalsize = 0; + char segPath[MAXPATHLEN_WITHSEGNO]; + + /* + * Ordinary relation, including heap and index. They take form of + * relationpath, or relationpath.%d There will be no holes, therefore, we + * can stop when we reach the first non-existing file. + */ + for (int segno = 0;; segno++) + { + struct stat fst; + + CHECK_FOR_INTERRUPTS(); + + fill_relation_seg_path(segPath, MAXPATHLEN_WITHSEGNO, relpath, segno); + if (stat(segPath, &fst) < 0) + { + ereport(LOG, (errcode_for_file_access(), + errmsg("[arenadata_toolkit] could not stat file %s: %m", segPath))); + break; + } + totalsize += fst.st_size; + } + + return totalsize; +} + +/* + * Function calculates the size of ao segment files. + * + * The errors of stat() call are ignored. + */ +static bool +calculate_ao_storage_perSegFile_soft(const int segno, void *ctx) +{ + struct stat fst; + char segPath[MAXPATHLEN_WITHSEGNO]; + struct calculate_ao_storage_callback_ctx *calcCtx = ctx; + + CHECK_FOR_INTERRUPTS(); + + fill_relation_seg_path(segPath, MAXPATHLEN_WITHSEGNO, + calcCtx->relfilenode_path, segno); + + if (stat(segPath, &fst) < 0) + { + ereport(LOG, (errcode_for_file_access(), + errmsg("[arenadata_toolkit] could not access file %s: %m", segPath))); + } + else + calcCtx->total_size += fst.st_size; + + return true; +} diff --git a/gpcontrib/arenadata_toolkit/src/include/dbsize.h b/gpcontrib/arenadata_toolkit/src/include/dbsize.h index 228cea9806db..bd6cdcebce76 100644 --- a/gpcontrib/arenadata_toolkit/src/include/dbsize.h +++ b/gpcontrib/arenadata_toolkit/src/include/dbsize.h @@ -1,6 +1,8 @@ #ifndef DBSIZE_H #define DBSIZE_H -int64 dbsize_calc_size(Oid relid); +#include "catalog/pg_class.h" + +int64 dbsize_calc_size(Form_pg_class relInfo); #endif /* DBSIZE_H */ diff --git a/gpcontrib/arenadata_toolkit/src/include/tf_shmem.h b/gpcontrib/arenadata_toolkit/src/include/tf_shmem.h index ac3aa95b0d13..5d4f8f48a44b 100644 --- a/gpcontrib/arenadata_toolkit/src/include/tf_shmem.h +++ b/gpcontrib/arenadata_toolkit/src/include/tf_shmem.h @@ -23,8 +23,6 @@ extern tf_shared_state_t * tf_shared_state; extern LWLock *tf_state_lock; extern LWLock *bloom_set_lock; extern tf_entry_lock_t bloom_locks[]; -extern volatile pg_atomic_flag tracking_is_initialized; -extern volatile pg_atomic_flag tracking_error; void tf_shmem_init(void); void tf_shmem_deinit(void); diff --git a/gpcontrib/arenadata_toolkit/src/track_files.c b/gpcontrib/arenadata_toolkit/src/track_files.c index 30ca0349c310..5d76b09a359d 100644 --- a/gpcontrib/arenadata_toolkit/src/track_files.c +++ b/gpcontrib/arenadata_toolkit/src/track_files.c @@ -512,7 +512,9 @@ tracking_get_track_main(PG_FUNCTION_ARGS) * quite suboptimal. The size calculation strategy should be revised * in future. */ - datums[3] = Int64GetDatum(dbsize_calc_size(HeapTupleGetOid(pg_class_tuple))); + Form_pg_class relp = (Form_pg_class) GETSTRUCT(pg_class_tuple); + int64 size = dbsize_calc_size(relp); + datums[3] = Int64GetDatum(size); datums[4] = CharGetDatum(tf_get_global_state.bloom->is_set_all ? 'i' : 'a'); datums[5] = Int32GetDatum(GpIdentity.segindex); From 1ab4187e44b0634b58201623bde3e4c36d75b2cf Mon Sep 17 00:00:00 2001 From: Alexander Kondakov Date: Thu, 24 Oct 2024 17:12:22 +0300 Subject: [PATCH 17/69] remove &s --- .../src/arenadata_toolkit_guc.c | 19 +++++++++---------- .../src/include/arenadata_toolkit_guc.h | 8 ++++---- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_guc.c b/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_guc.c index b0df6e8fcd3b..002b0ef317a3 100644 --- a/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_guc.c +++ b/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_guc.c @@ -1,6 +1,5 @@ #include "arenadata_toolkit_guc.h" - #include "cdb/cdbvars.h" #include "catalog/objectaccess.h" #include "catalog/pg_db_role_setting.h" @@ -159,8 +158,8 @@ tf_guc_define(void) NULL, &bloom_size, DEFAULT_BLOOM_SIZE_BYTES, - MIN_BLOOM_SIZE, - MAX_BLOOM_SIZE, + MIN_BLOOM_SIZE_BYTES, + MAX_BLOOM_SIZE_BYTES, PGC_POSTMASTER, 0, NULL, @@ -189,7 +188,7 @@ tf_guc_define(void) DEFAULT_IS_TRACKED, PGC_SUSET, 0, - &check_tracked, + check_tracked, NULL, NULL); @@ -200,7 +199,7 @@ tf_guc_define(void) DEFAULT_GET_FULL_SNAPSHOT_ON_RECOVERY, PGC_SUSET, 0, - &check_get_full_snapshot_on_recovery, + check_get_full_snapshot_on_recovery, NULL, NULL); @@ -224,7 +223,7 @@ tf_guc_define(void) DEFAULT_TRACKED_SCHEMAS, PGC_SUSET, 0, - &check_schemas, + check_schemas, NULL, NULL); @@ -235,7 +234,7 @@ tf_guc_define(void) DEFAULT_TRACKED_REL_STORAGES, PGC_SUSET, 0, - &check_relstorages, + check_relstorages, NULL, NULL); @@ -246,7 +245,7 @@ tf_guc_define(void) DEFAULT_TRACKED_REL_KINDS, PGC_SUSET, 0, - &check_relkinds, + check_relkinds, NULL, NULL); @@ -256,8 +255,8 @@ tf_guc_define(void) NULL, &tracking_worker_naptime_sec, DEFAULT_NAPTIME_SEC, - 1, - MAX_NAPTIME, + MIN_NAPTIME_SEC, + MAX_NAPTIME_SEC, PGC_SIGHUP, 0, NULL, diff --git a/gpcontrib/arenadata_toolkit/src/include/arenadata_toolkit_guc.h b/gpcontrib/arenadata_toolkit/src/include/arenadata_toolkit_guc.h index dc443b79b412..139599b847ab 100644 --- a/gpcontrib/arenadata_toolkit/src/include/arenadata_toolkit_guc.h +++ b/gpcontrib/arenadata_toolkit/src/include/arenadata_toolkit_guc.h @@ -13,15 +13,15 @@ #define DEFAULT_TRACKED_REL_KINDS "r,i,t,m,o,b,M" #define DEFAULT_NAPTIME_SEC 60 -#define MIN_BLOOM_SIZE 1 +#define MIN_BLOOM_SIZE_BYTES 1 #define MIN_DB_TRACK_COUNT 1 #define MIN_DROPS_COUNT 1 -#define MIN_NAPTIME 1 +#define MIN_NAPTIME_SEC 1 -#define MAX_BLOOM_SIZE 128000000 +#define MAX_BLOOM_SIZE_BYTES 128000000 #define MAX_DB_TRACK_COUNT 1000 #define MAX_DROPS_COUNT 1000000 -#define MAX_NAPTIME OID_MAX & 0x7FFFFFFF +#define MAX_NAPTIME_SEC OID_MAX & 0x7FFFFFFF extern int bloom_size; extern int db_track_count; From 36cfd5a57ef1c1c87d8c5e237f21747bd1f24ed2 Mon Sep 17 00:00:00 2001 From: Alexander Kondakov Date: Thu, 24 Oct 2024 21:46:44 +0300 Subject: [PATCH 18/69] formatting --- gpcontrib/arenadata_toolkit/src/track_files.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/gpcontrib/arenadata_toolkit/src/track_files.c b/gpcontrib/arenadata_toolkit/src/track_files.c index 5d76b09a359d..e2cb40f167f9 100644 --- a/gpcontrib/arenadata_toolkit/src/track_files.c +++ b/gpcontrib/arenadata_toolkit/src/track_files.c @@ -459,6 +459,8 @@ tracking_get_track_main(PG_FUNCTION_ARGS) char relstorage; HeapTuple pg_class_tuple; uint64_t hash; + Form_pg_class relp; + int64 size; if (!state->scan) break; @@ -507,13 +509,8 @@ tracking_get_track_main(PG_FUNCTION_ARGS) if (!bloom_isset(tf_get_global_state.bloom, hash)) continue; - /* - * Taking a lock and calling relation_open in dbsize_calc_size is - * quite suboptimal. The size calculation strategy should be revised - * in future. - */ - Form_pg_class relp = (Form_pg_class) GETSTRUCT(pg_class_tuple); - int64 size = dbsize_calc_size(relp); + relp = (Form_pg_class) GETSTRUCT(pg_class_tuple); + size = dbsize_calc_size(relp); datums[3] = Int64GetDatum(size); datums[4] = CharGetDatum(tf_get_global_state.bloom->is_set_all ? 'i' : 'a'); datums[5] = Int32GetDatum(GpIdentity.segindex); From b32426c4cff7a4d718b180c21abfdd423f702f35 Mon Sep 17 00:00:00 2001 From: Alexander Kondakov Date: Sun, 27 Oct 2024 23:39:50 +0300 Subject: [PATCH 19/69] Rework worker --- .../src/arenadata_toolkit_worker.c | 175 +++++------------- 1 file changed, 44 insertions(+), 131 deletions(-) diff --git a/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_worker.c b/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_worker.c index 1a8e2b69c387..b59b881f6b4e 100644 --- a/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_worker.c +++ b/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_worker.c @@ -9,6 +9,7 @@ #include "cdb/cdbdisp_query.h" #include "cdb/cdbdispatchresult.h" #include "cdb/cdbvars.h" +#include "executor/spi.h" #include "libpq-fe.h" #include "postmaster/bgworker.h" #include "storage/proc.h" @@ -23,11 +24,11 @@ #include "tf_shmem.h" #define TOOLKIT_BINARY_NAME "arenadata_toolkit" +#define SQL(...) #__VA_ARGS__ typedef struct { Oid dbid; - Name dbname; bool get_full_snapshot_on_recovery; } tracked_db_t; @@ -37,101 +38,6 @@ static volatile sig_atomic_t got_sigterm = false; void arenadata_toolkit_main(Datum); -/* parse array of GUCs, find desired and analyze it */ -static bool -is_db_tracked(ArrayType *array) -{ - bool is_tracked = false; - Datum *elems; - bool *nulls; - int nelems; - - deconstruct_array(array, TEXTOID, -1, false, 'i', - &elems, &nulls, &nelems); - for (int i = 0; i < nelems; i++) - { - char *s; - char *name; - char *value; - - if (nulls[i]) - continue; - - s = TextDatumGetCString(elems[i]); - ParseLongOption(s, &name, &value); - - if (!value) - { - free(name); - continue; - } - - if (strcmp(name, "arenadata_toolkit.tracking_is_db_tracked") == 0 && - strcmp(value, "t") == 0) - { - is_tracked = true; - break; - } - - free(name); - if (value) - free(value); - pfree(s); - } - - return is_tracked; -} - -static bool -full_snapshot_on_recovery(ArrayType *array) -{ - bool take_snapshot = false; - bool found = false; - Datum *elems; - bool *nulls; - int nelems; - - deconstruct_array(array, TEXTOID, -1, false, 'i', - &elems, &nulls, &nelems); - - for (int i = 0; i < nelems; i++) - { - char *s; - char *name; - char *value; - - if (nulls[i]) - continue; - - s = TextDatumGetCString(elems[i]); - ParseLongOption(s, &name, &value); - - if (!value) - { - free(name); - continue; - } - - if (strcmp(name, "arenadata_toolkit.tracking_snapshot_on_recovery") == 0) - { - found = true; - if (strcmp(value, "t") == 0) - take_snapshot = true; - break; - } - - free(name); - if (value) - free(value); - pfree(s); - } - - if (!found) - take_snapshot = get_full_snapshot_on_recovery; - - return take_snapshot; -} - /* * Signal handler for SIGTERM * Set a flag to let the main loop to terminate, and set our latch to wake @@ -169,50 +75,54 @@ tracking_sighup(SIGNAL_ARGS) static List* get_tracked_dbs() { - Relation rel; - SysScanDesc scan; - HeapTuple tup; + StringInfoData query; List *tracked_dbs = NIL; tracked_db_t *trackedDb; + MemoryContext topcontext = CurrentMemoryContext; - rel = heap_open(DbRoleSettingRelationId, AccessShareLock); - scan = systable_beginscan(rel, InvalidOid, false, NULL, 0, NULL); - while (HeapTupleIsValid(tup = systable_getnext(scan))) - { - bool isnull; - Datum str_datum; - Datum oid_datum; - ArrayType *a; + initStringInfo(&query); + appendStringInfo(&query, SQL( + WITH _ AS ( + WITH _ AS ( + SELECT "setdatabase", regexp_split_to_array(UNNEST("setconfig"), '=') AS "setconfig" FROM "pg_db_role_setting" WHERE "setrole"=0 + ) SELECT "setdatabase", json_object(array_agg("setconfig"[1]), array_agg("setconfig"[2])) AS "setconfig" FROM _ GROUP BY 1 + ) select "setdatabase", + ("setconfig"->>'arenadata_toolkit.tracking_snapshot_on_recovery')::bool as "snapshot" FROM _ WHERE + ("setconfig"->>'arenadata_toolkit.tracking_is_db_tracked')::bool IS TRUE + )); - str_datum = heap_getattr(tup, Anum_pg_db_role_setting_setconfig, - RelationGetDescr(rel), &isnull); - if (isnull) - continue; + if (SPI_connect() != SPI_OK_CONNECT) + ereport(ERROR, (errmsg("SPI_connect failed"))); - oid_datum = heap_getattr(tup, Anum_pg_db_role_setting_setrole, - RelationGetDescr(rel), &isnull); - if (DatumGetObjectId(oid_datum) != InvalidOid) - continue; + PushActiveSnapshot(GetTransactionSnapshot()); - oid_datum = heap_getattr(tup, Anum_pg_db_role_setting_setdatabase, - RelationGetDescr(rel), &isnull); - if (DatumGetObjectId(oid_datum) == InvalidOid) - continue; + if (SPI_execute(query.data, true, 0) != SPI_OK_SELECT) + ereport(ERROR, (errmsg("SPI_execute failed"))); - a = DatumGetArrayTypeP(str_datum); + for (uint64 row = 0; row < SPI_processed; row++) + { + HeapTuple val = SPI_tuptable->vals[row]; + TupleDesc tupdesc = SPI_tuptable->tupdesc; + bool isnull = false; + Oid dbid = DatumGetObjectId(SPI_getbinval(val, tupdesc, SPI_fnumber(tupdesc, "setdatabase"), &isnull)); + bool get_snapshot_on_recovery = DatumGetBool(SPI_getbinval(val, tupdesc, SPI_fnumber(tupdesc, "snapshot"), &isnull)); - if (is_db_tracked(a)) - { - trackedDb = (tracked_db_t *) palloc0(sizeof(tracked_db_t)); + if (isnull) + get_snapshot_on_recovery = get_full_snapshot_on_recovery; - trackedDb->dbid = DatumGetObjectId(oid_datum); - trackedDb->get_full_snapshot_on_recovery = full_snapshot_on_recovery(a); - tracked_dbs = lappend(tracked_dbs, trackedDb); - } + MemoryContext oldcontext = MemoryContextSwitchTo(topcontext); + + trackedDb = (tracked_db_t *) palloc0(sizeof(tracked_db_t)); + trackedDb->dbid = dbid; + trackedDb->get_full_snapshot_on_recovery = get_snapshot_on_recovery; + tracked_dbs = lappend(tracked_dbs, trackedDb); + + MemoryContextSwitchTo(oldcontext); } + SPI_finish(); + PopActiveSnapshot(); - systable_endscan(scan); - heap_close(rel, AccessShareLock); + pfree(query.data); return tracked_dbs; } @@ -239,6 +149,7 @@ worker_tracking_status_check() List *tracked_dbs = NIL; StartTransactionCommand(); + tracked_dbs = get_tracked_dbs(); if (pg_atomic_unlocked_test_flag(&tf_shared_state->tracking_is_initialized) && list_length(tracked_dbs) > 0) @@ -260,7 +171,6 @@ worker_tracking_status_check() if (tracked_dbs) list_free_deep(tracked_dbs); - CommitTransactionCommand(); } @@ -305,10 +215,13 @@ arenadata_toolkit_main(Datum main_arg) long timeout = tracking_worker_naptime_sec * 1000; if (current_timeout <= 0) + { INSTR_TIME_SET_CURRENT(start_time_timeout); + current_timeout = timeout; + } rc = WaitLatch(&MyProc->procLatch, WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, - timeout); + current_timeout); if (rc & WL_LATCH_SET) { From f62b60911bc6f7d4e28a64c2596403d08db93ba3 Mon Sep 17 00:00:00 2001 From: Alexander Kondakov Date: Sun, 27 Oct 2024 23:41:12 +0300 Subject: [PATCH 20/69] More smart hashing strategy --- gpcontrib/arenadata_toolkit/src/bloom.c | 77 ++++++++++++++----- gpcontrib/arenadata_toolkit/src/bloom_set.c | 22 +----- gpcontrib/arenadata_toolkit/src/dbsize.c | 4 +- gpcontrib/arenadata_toolkit/src/file_hook.c | 3 +- .../src/include/arenadata_toolkit_guc.h | 4 +- .../arenadata_toolkit/src/include/bloom.h | 12 ++- .../arenadata_toolkit/src/include/bloom_set.h | 7 +- .../arenadata_toolkit/src/include/hashimpl.h | 49 ++++++++++++ .../arenadata_toolkit/src/include/tf_shmem.h | 4 + gpcontrib/arenadata_toolkit/src/tf_shmem.c | 37 ++++++++- gpcontrib/arenadata_toolkit/src/track_files.c | 4 +- 11 files changed, 164 insertions(+), 59 deletions(-) create mode 100644 gpcontrib/arenadata_toolkit/src/include/hashimpl.h diff --git a/gpcontrib/arenadata_toolkit/src/bloom.c b/gpcontrib/arenadata_toolkit/src/bloom.c index 53d96bda2e13..6cd1d57976cd 100644 --- a/gpcontrib/arenadata_toolkit/src/bloom.c +++ b/gpcontrib/arenadata_toolkit/src/bloom.c @@ -2,42 +2,79 @@ * Simple bloom filter without using postgres primitives. */ #include "bloom.h" +#include "hashimpl.h" +#include "tf_shmem.h" -#include - -void -bloom_init(const uint32_t bloom_size, bloom_t *bloom) +static inline uint32 +mod_m(uint32 val, uint64 m) { - bloom->size = bloom_size; - bloom_clear(bloom); + Assert(m <= PG_UINT32_MAX + UINT64CONST(1)); + Assert(((m - 1) & m) == 0); + + return val & (m - 1); } -static uint32_t -calc_idx(bloom_t * bloom, uint64_t hash, uint8_t *bit_idx) +static void +tracking_hashes(Oid node, uint32 bloom_size, uint32 *out_hashes) { - uint64_t bloom_bit_idx = hash % (8 * bloom->size); + uint64 hash; + uint32 x, + y; + uint64 m; + int i; + + /* Use 64-bit hashing to get two independent 32-bit hashes */ + hash = wyhash(node, bloom_hash_seed); + x = (uint32) hash; + y = (uint32) (hash >> 32); + m = bloom_size * 8; + + x = mod_m(x, m); + y = mod_m(y, m); - *bit_idx = bloom_bit_idx % 8; + /* Accumulate hashes */ + out_hashes[0] = x; + for (i = 1; i < bloom_hash_num; i++) + { + x = mod_m(x + y, m); + y = mod_m(y + i, m); - return bloom_bit_idx / 8; + out_hashes[i] = x; + } } -int -bloom_isset(bloom_t * bloom, uint64_t hash) +bool +bloom_isset(bloom_t * bloom, Oid relnode) { - uint8_t bit_idx; - uint32_t byte_idx = calc_idx(bloom, hash, &bit_idx); + uint32 hashes[MAX_BLOOM_HASH_FUNCS]; - return bloom->map[byte_idx] & (1 << bit_idx); + tracking_hashes(relnode, bloom->size, hashes); + + for (int i = 0; i < bloom_hash_num; ++i) + { + if (!(bloom->map[hashes[i] >> 3] & (1 << (hashes[i] & 7)))) + return false; + } + return true; } void -bloom_set(bloom_t * bloom, uint64_t hash) +bloom_set(bloom_t * bloom, Oid relnode) { - uint8_t bit_idx; - uint32_t byte_idx = calc_idx(bloom, hash, &bit_idx); + uint32 hashes[MAX_BLOOM_HASH_FUNCS]; + + tracking_hashes(relnode, bloom->size, hashes); + for (int i = 0; i < bloom_hash_num; ++i) + { + bloom->map[hashes[i] >> 3] |= 1 << (hashes[i] & 7); + } +} - bloom->map[byte_idx] |= (1 << bit_idx); +void +bloom_init(const uint32 bloom_size, bloom_t *bloom) +{ + bloom->size = bloom_size; + bloom_clear(bloom); } void diff --git a/gpcontrib/arenadata_toolkit/src/bloom_set.c b/gpcontrib/arenadata_toolkit/src/bloom_set.c index 51b52fe0a3f4..7893084007fc 100644 --- a/gpcontrib/arenadata_toolkit/src/bloom_set.c +++ b/gpcontrib/arenadata_toolkit/src/bloom_set.c @@ -6,8 +6,6 @@ #include "bloom_set.h" #include "tf_shmem.h" -#include - #define BLOOM_ENTRY_GET(set, i) (void *)(set->bloom_entries + i * FULL_BLOOM_ENTRY_SIZE(set->bloom_size)); static void @@ -129,28 +127,11 @@ bloom_set_unbind(bloom_set_t * bloom_set, Oid dbid) LWLockRelease(bloom_set_lock); } -uint64_t -bloom_set_calc_hash(const void *buf, size_t len) -{ - struct wide_hash - { - uint64_t i1; - uint64_t i2; - }; - struct wide_hash w_hash; - bool hash_res = pg_md5_binary(buf, len, &w_hash); - - Assert(hash_res); - - return w_hash.i1 ^ w_hash.i2; -} - /* Find bloom by dbid, set bit based on relNode hash */ void bloom_set_set(bloom_set_t * bloom_s, Oid dbid, Oid relNode) { bloom_entry_t *bloom_entry; - uint64_t hash; LWLock *entry_lock; LWLockAcquire(bloom_set_lock, LW_SHARED); @@ -158,8 +139,7 @@ bloom_set_set(bloom_set_t * bloom_s, Oid dbid, Oid relNode) bloom_entry = find_bloom_entry(bloom_s, dbid); if (bloom_entry) { - hash = bloom_set_calc_hash(&relNode, sizeof(relNode)); - bloom_set(&bloom_entry->bloom, hash); + bloom_set(&bloom_entry->bloom, relNode); } if (entry_lock) LWLockRelease(entry_lock); diff --git a/gpcontrib/arenadata_toolkit/src/dbsize.c b/gpcontrib/arenadata_toolkit/src/dbsize.c index 200db9b8c208..215df71613a7 100644 --- a/gpcontrib/arenadata_toolkit/src/dbsize.c +++ b/gpcontrib/arenadata_toolkit/src/dbsize.c @@ -486,7 +486,7 @@ get_heap_storage_total_bytes_soft(Relation rel, ForkNumber forknum, char *relpat fill_relation_seg_path(segPath, MAXPATHLEN_WITHSEGNO, relpath, segno); if (stat(segPath, &fst) < 0) { - ereport(LOG, (errcode_for_file_access(), + ereport(DEBUG1, (errcode_for_file_access(), errmsg("[arenadata_toolkit] could not stat file %s: %m", segPath))); break; } @@ -515,7 +515,7 @@ calculate_ao_storage_perSegFile_soft(const int segno, void *ctx) if (stat(segPath, &fst) < 0) { - ereport(LOG, (errcode_for_file_access(), + ereport(DEBUG1, (errcode_for_file_access(), errmsg("[arenadata_toolkit] could not access file %s: %m", segPath))); } else diff --git a/gpcontrib/arenadata_toolkit/src/file_hook.c b/gpcontrib/arenadata_toolkit/src/file_hook.c index c29042169358..0923f16a53cf 100644 --- a/gpcontrib/arenadata_toolkit/src/file_hook.c +++ b/gpcontrib/arenadata_toolkit/src/file_hook.c @@ -80,8 +80,7 @@ hook_create(RelFileNodeBackend rnode) elog(DEBUG1, "hook_create: %d %d %d %d", rnode.backend, rnode.node.dbNode, rnode.node.spcNode, rnode.node.relNode); - hash = bloom_set_calc_hash(&rnode.node.relNode, sizeof(rnode.node.relNode)); - bloom_set(non_committed_bloom, hash); + bloom_set(non_committed_bloom, rnode.node.relNode); } diff --git a/gpcontrib/arenadata_toolkit/src/include/arenadata_toolkit_guc.h b/gpcontrib/arenadata_toolkit/src/include/arenadata_toolkit_guc.h index 139599b847ab..91d4307a887c 100644 --- a/gpcontrib/arenadata_toolkit/src/include/arenadata_toolkit_guc.h +++ b/gpcontrib/arenadata_toolkit/src/include/arenadata_toolkit_guc.h @@ -3,7 +3,7 @@ #include "postgres.h" -#define DEFAULT_BLOOM_SIZE_BYTES 1000000 +#define DEFAULT_BLOOM_SIZE_BYTES 1048576 #define DEFAULT_DB_TRACK_COUNT 5 #define DEFAULT_IS_TRACKED false #define DEFAULT_DROPS_COUNT 100000 @@ -13,7 +13,7 @@ #define DEFAULT_TRACKED_REL_KINDS "r,i,t,m,o,b,M" #define DEFAULT_NAPTIME_SEC 60 -#define MIN_BLOOM_SIZE_BYTES 1 +#define MIN_BLOOM_SIZE_BYTES 64 #define MIN_DB_TRACK_COUNT 1 #define MIN_DROPS_COUNT 1 #define MIN_NAPTIME_SEC 1 diff --git a/gpcontrib/arenadata_toolkit/src/include/bloom.h b/gpcontrib/arenadata_toolkit/src/include/bloom.h index 34d800784b41..3e11ccd6ed8d 100644 --- a/gpcontrib/arenadata_toolkit/src/include/bloom.h +++ b/gpcontrib/arenadata_toolkit/src/include/bloom.h @@ -1,20 +1,24 @@ #ifndef BLOOM_H #define BLOOM_H +#include "postgres.h" + #include +#define MAX_BLOOM_HASH_FUNCS 6 +#define TOTAL_ELEMENTS 100000000UL #define FULL_BLOOM_SIZE(size) (offsetof(bloom_t, map) + size) typedef struct { uint32_t size; /* size in bytes of 'map' */ int is_set_all; /* is all bits sets by bloom_set_all */ - char map[] /* filter itself, array of bytes */ ; + char map[FLEXIBLE_ARRAY_MEMBER]; /* filter itself, array of bytes */ ; } bloom_t; -void bloom_init(const uint32_t bloom_size, bloom_t *bloom); -int bloom_isset(bloom_t * bloom, uint64_t hash); -void bloom_set(bloom_t * bloom, uint64_t hash); +void bloom_init(const uint32 bloom_size, bloom_t* bloom); +bool bloom_isset(bloom_t * bloom, Oid relnode); +void bloom_set(bloom_t * bloom, Oid relnode); void bloom_set_all(bloom_t * bloom); void bloom_clear(bloom_t * bloom); void bloom_merge(bloom_t * dst, bloom_t * src); diff --git a/gpcontrib/arenadata_toolkit/src/include/bloom_set.h b/gpcontrib/arenadata_toolkit/src/include/bloom_set.h index 500a547b2221..c2585ef41ee6 100644 --- a/gpcontrib/arenadata_toolkit/src/include/bloom_set.h +++ b/gpcontrib/arenadata_toolkit/src/include/bloom_set.h @@ -18,16 +18,15 @@ typedef struct /* static set of all bloom filters */ typedef struct { - uint8_t bloom_count; /* count of bloom_entry_t in bloom_entries */ - uint32_t bloom_size; /* size of bloom filter */ + uint8 bloom_count; /* count of bloom_entry_t in bloom_entries */ + uint32 bloom_size; /* size of bloom filter */ char bloom_entries[FLEXIBLE_ARRAY_MEMBER]; /* array of * bloom_entry_t */ } bloom_set_t; -void bloom_set_init(const uint32_t bloom_count, const uint32_t bloom_size, bloom_set_t *bloom_set); +void bloom_set_init(const uint32 bloom_count, const uint32 bloom_size, bloom_set_t *bloom_set); bool bloom_set_bind(bloom_set_t * bloom_set, Oid dbid); void bloom_set_unbind(bloom_set_t * bloom_set, Oid dbid); -uint64_t bloom_set_calc_hash(const void *buf, size_t len); void bloom_set_set(bloom_set_t * bloom_set, Oid dbid, Oid relNode); bool bloom_set_move(bloom_set_t * bloom_set, Oid dbid, bloom_t *dest); bool bloom_set_merge(bloom_set_t * bloom_set, Oid dbid, bloom_t * m_bloom); diff --git a/gpcontrib/arenadata_toolkit/src/include/hashimpl.h b/gpcontrib/arenadata_toolkit/src/include/hashimpl.h new file mode 100644 index 000000000000..f862398b61fe --- /dev/null +++ b/gpcontrib/arenadata_toolkit/src/include/hashimpl.h @@ -0,0 +1,49 @@ +#ifndef HASHIMPL_H +#define HASHIMPL_H + +#include "c.h" + +#define _wyrot(x) (((x)>>32)|((x)<<32)) + +static inline void _wymum(uint64 *A, uint64 *B){ +#if(SIZEOF_VOID_P < 8) + uint64 hh=(*A>>32)*(*B>>32), + hl=(*A>>32)*(uint32)*B, + lh=(uint32)*A*(*B>>32), + ll=(uint64)(uint32)*A*(uint32)*B; + *A=_wyrot(hl)^hh; + *B=_wyrot(lh)^ll; +#elif defined(HAVE_INT128) + uint128 r=*A; + r*=*B; + *A=(uint64)r; + *B=(uint64)(r>>64); +#else + uint64 ha=*A>>32, hb=*B>>32, la=(uint32)*A, lb=(uint32)*B, hi, lo; + uint64 rh=ha*hb, rm0=ha*lb, rm1=hb*la, rl=la*lb, t=rl+(rm0<<32), c=t>32)+(rm1>>32)+c; + *A=lo; *B=hi; +#endif +} + +//multiply and xor mix function, aka MUM +static inline uint64 _wymix(uint64 A, uint64 B){ _wymum(&A,&B); return A^B; } + +static inline uint64 wyhash(uint32 key, uint64 seed){ + seed^=_wymix(seed^0x2d358dccaa6c78a5ull,0x8bb84b93962eacc9ull); +#if (WORDS_BIGENDIAN) +#if defined(HAVE__BUILTIN_BSWAP32) + key = __builtin_bswap32(key); +#else + key = (((key >> 24) & 0xff)| ((key >> 8) & 0xff00)| ((key << 8) & 0xff0000)| ((key << 24) & 0xff000000)); +#endif +#endif + uint64 a = ((uint64) key << 32) | key; + uint64 b = 0; + a^=0x8bb84b93962eacc9ull; + b^=seed; + _wymum(&a,&b); + return _wymix(a^0x2d358dccaa6c78a5ull^4,b^0x8bb84b93962eacc9ull); +} + +#endif /* HASHIMPL_H */ diff --git a/gpcontrib/arenadata_toolkit/src/include/tf_shmem.h b/gpcontrib/arenadata_toolkit/src/include/tf_shmem.h index 5d4f8f48a44b..b528a35dfc1e 100644 --- a/gpcontrib/arenadata_toolkit/src/include/tf_shmem.h +++ b/gpcontrib/arenadata_toolkit/src/include/tf_shmem.h @@ -6,6 +6,8 @@ #include "bloom_set.h" +#define MAX_BLOOM_HASH_FUNCS 6 + typedef struct { pg_atomic_flag tracking_is_initialized; @@ -23,6 +25,8 @@ extern tf_shared_state_t * tf_shared_state; extern LWLock *tf_state_lock; extern LWLock *bloom_set_lock; extern tf_entry_lock_t bloom_locks[]; +extern uint64 bloom_hash_seed; +extern int bloom_hash_num; void tf_shmem_init(void); void tf_shmem_deinit(void); diff --git a/gpcontrib/arenadata_toolkit/src/tf_shmem.c b/gpcontrib/arenadata_toolkit/src/tf_shmem.c index de10f1b87f07..ece5a4b5e495 100644 --- a/gpcontrib/arenadata_toolkit/src/tf_shmem.c +++ b/gpcontrib/arenadata_toolkit/src/tf_shmem.c @@ -6,11 +6,15 @@ #include "arenadata_toolkit_guc.h" +#include + static shmem_startup_hook_type next_shmem_startup_hook = NULL; tf_shared_state_t *tf_shared_state; LWLock *tf_state_lock; LWLock *bloom_set_lock; tf_entry_lock_t bloom_locks[MAX_DB_TRACK_COUNT]; +uint64 bloom_hash_seed; +int bloom_hash_num; static void init_lwlocks(void) @@ -25,6 +29,34 @@ init_lwlocks(void) } } +static int +my_bloom_power(uint64 target_bitset_bits) +{ + int bloom_power = -1; + + while (target_bitset_bits > 0 && bloom_power < 32) + { + bloom_power++; + target_bitset_bits >>= 1; + } + + return bloom_power; +} + +static void +init_bloom_invariants() +{ + uint64 total_bits; + int k; + + total_bits = UINT64CONST(1) << my_bloom_power(bloom_size * 8); + bloom_size = total_bits / 8; + + k = rint(log(2.0) * total_bits / TOTAL_ELEMENTS); + bloom_hash_num = Max(1, Min(k, MAX_BLOOM_HASH_FUNCS)); + bloom_hash_seed = (uint64) random(); +} + static Size tf_shmem_calc_size(void) { @@ -40,7 +72,10 @@ static void tf_shmem_hook(void) { bool found; - Size size = tf_shmem_calc_size(); + Size size; + + init_bloom_invariants(); + size = tf_shmem_calc_size(); LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE); diff --git a/gpcontrib/arenadata_toolkit/src/track_files.c b/gpcontrib/arenadata_toolkit/src/track_files.c index e2cb40f167f9..d7c04367a81f 100644 --- a/gpcontrib/arenadata_toolkit/src/track_files.c +++ b/gpcontrib/arenadata_toolkit/src/track_files.c @@ -458,7 +458,6 @@ tracking_get_track_main(PG_FUNCTION_ARGS) char relkind; char relstorage; HeapTuple pg_class_tuple; - uint64_t hash; Form_pg_class relp; int64 size; @@ -505,8 +504,7 @@ tracking_get_track_main(PG_FUNCTION_ARGS) continue; /* Bloom filter check */ - hash = bloom_set_calc_hash(&filenode, sizeof(filenode)); - if (!bloom_isset(tf_get_global_state.bloom, hash)) + if (!bloom_isset(tf_get_global_state.bloom, filenode)) continue; relp = (Form_pg_class) GETSTRUCT(pg_class_tuple); From d13acdc28cce1e41d8460a06c36b4eae39076da6 Mon Sep 17 00:00:00 2001 From: Alexander Kondakov Date: Sun, 27 Oct 2024 23:57:41 +0300 Subject: [PATCH 21/69] use uint8 in bloom --- gpcontrib/arenadata_toolkit/src/bloom.c | 3 +++ gpcontrib/arenadata_toolkit/src/include/bloom.h | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/gpcontrib/arenadata_toolkit/src/bloom.c b/gpcontrib/arenadata_toolkit/src/bloom.c index 6cd1d57976cd..ddcf2c787e1e 100644 --- a/gpcontrib/arenadata_toolkit/src/bloom.c +++ b/gpcontrib/arenadata_toolkit/src/bloom.c @@ -48,6 +48,9 @@ bloom_isset(bloom_t * bloom, Oid relnode) { uint32 hashes[MAX_BLOOM_HASH_FUNCS]; + if (bloom->is_set_all) + return true; + tracking_hashes(relnode, bloom->size, hashes); for (int i = 0; i < bloom_hash_num; ++i) diff --git a/gpcontrib/arenadata_toolkit/src/include/bloom.h b/gpcontrib/arenadata_toolkit/src/include/bloom.h index 3e11ccd6ed8d..fc097d65c481 100644 --- a/gpcontrib/arenadata_toolkit/src/include/bloom.h +++ b/gpcontrib/arenadata_toolkit/src/include/bloom.h @@ -12,8 +12,8 @@ typedef struct { uint32_t size; /* size in bytes of 'map' */ - int is_set_all; /* is all bits sets by bloom_set_all */ - char map[FLEXIBLE_ARRAY_MEMBER]; /* filter itself, array of bytes */ ; + uint8 is_set_all; /* is all bits sets by bloom_set_all */ + uint8 map[FLEXIBLE_ARRAY_MEMBER]; /* filter itself, array of bytes */ ; } bloom_t; void bloom_init(const uint32 bloom_size, bloom_t* bloom); From 7847bab58caa2845a9b6576c48ef5661217e6baf Mon Sep 17 00:00:00 2001 From: Alexander Kondakov Date: Mon, 28 Oct 2024 00:09:59 +0300 Subject: [PATCH 22/69] Rework drops track --- gpcontrib/arenadata_toolkit/src/drops_track.c | 25 +++++++++++-------- gpcontrib/arenadata_toolkit/src/file_hook.c | 2 -- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/gpcontrib/arenadata_toolkit/src/drops_track.c b/gpcontrib/arenadata_toolkit/src/drops_track.c index b54f7ce37e4f..bfd6dd68e172 100644 --- a/gpcontrib/arenadata_toolkit/src/drops_track.c +++ b/gpcontrib/arenadata_toolkit/src/drops_track.c @@ -23,14 +23,12 @@ typedef struct typedef struct { dlist_node node; - uint32_t idx; /* idx in 'nodes' array; just for info */ track_relfilenode_t relfileNode; } drops_track_node_t; typedef struct { - LWLock *lock; dlist_head head; uint32_t used_count; /* count of used nodes */ int unused_idx; /* next unused idx or -1 if unknown; for @@ -40,6 +38,7 @@ typedef struct static shmem_startup_hook_type next_shmem_startup_hook = NULL; static drops_track_t * drops_track; +LWLock *drops_track_lock; static Size drops_track_calc_size() @@ -58,11 +57,12 @@ drops_track_hook(void) bool found; Size size = drops_track_calc_size(); + LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE); + drops_track = ShmemInitStruct("adb_track_files_drops", size, &found); if (!found) { - drops_track->lock = LWLockAssign(); drops_track->used_count = 0; drops_track->unused_idx = 0; dlist_init(&drops_track->head); @@ -73,10 +73,13 @@ drops_track_hook(void) track_node->relfileNode.relNode = InvalidOid; track_node->relfileNode.dbNode = InvalidOid; - track_node->idx = i; } } + drops_track_lock = LWLockAssign(); + + LWLockRelease(AddinShmemInitLock); + if (next_shmem_startup_hook) next_shmem_startup_hook(); } @@ -134,7 +137,7 @@ drops_track_add(RelFileNode relfileNode) { drops_track_node_t *track_node; - LWLockAcquire(drops_track->lock, LW_EXCLUSIVE); + LWLockAcquire(drops_track_lock, LW_EXCLUSIVE); if (drops_track->used_count >= drops_count) { @@ -152,7 +155,7 @@ drops_track_add(RelFileNode relfileNode) track_node->relfileNode.dbNode = relfileNode.dbNode; dlist_push_tail(&drops_track->head, &track_node->node); - LWLockRelease(drops_track->lock); + LWLockRelease(drops_track_lock); } /* move relfilenodes from track to list */ @@ -162,11 +165,11 @@ drops_track_move(Oid dbid) List *oids = NIL; dlist_mutable_iter iter; - LWLockAcquire(drops_track->lock, LW_EXCLUSIVE); + LWLockAcquire(drops_track_lock, LW_EXCLUSIVE); if (drops_track->used_count == 0) { - LWLockRelease(drops_track->lock); + LWLockRelease(drops_track_lock); return oids; } @@ -185,7 +188,7 @@ drops_track_move(Oid dbid) } } - LWLockRelease(drops_track->lock); + LWLockRelease(drops_track_lock); return oids; } @@ -199,7 +202,7 @@ drops_track_move_undo(List *oids, Oid dbid) if (oids == NIL) return; - LWLockAcquire(drops_track->lock, LW_EXCLUSIVE); + LWLockAcquire(drops_track_lock, LW_EXCLUSIVE); foreach(cell, oids) { @@ -219,5 +222,5 @@ drops_track_move_undo(List *oids, Oid dbid) dlist_push_head(&drops_track->head, &track_node->node); } - LWLockRelease(drops_track->lock); + LWLockRelease(drops_track_lock); } diff --git a/gpcontrib/arenadata_toolkit/src/file_hook.c b/gpcontrib/arenadata_toolkit/src/file_hook.c index 0923f16a53cf..dad745e74289 100644 --- a/gpcontrib/arenadata_toolkit/src/file_hook.c +++ b/gpcontrib/arenadata_toolkit/src/file_hook.c @@ -60,8 +60,6 @@ xact_end_create_callback(XactEvent event, void *arg) static void hook_create(RelFileNodeBackend rnode) { - uint64_t hash; - if (next_file_create_hook) next_file_create_hook(rnode); From ed9e3008ad1183f91cc44cd16cace8d8b040747d Mon Sep 17 00:00:00 2001 From: Alexander Kondakov Date: Mon, 28 Oct 2024 03:25:14 +0300 Subject: [PATCH 23/69] Rework initialization approach --- .../arenadata_toolkit--1.6--1.7.sql | 18 ++-- .../expected/arenadata_toolkit_guc.out | 57 ++++++----- .../expected/arenadata_toolkit_test.out | 12 ++- .../sql/arenadata_toolkit_guc.sql | 38 ++++++-- .../src/arenadata_toolkit_worker.c | 10 +- gpcontrib/arenadata_toolkit/src/track_files.c | 95 ++++++++++++++++--- 6 files changed, 167 insertions(+), 63 deletions(-) diff --git a/gpcontrib/arenadata_toolkit/arenadata_toolkit--1.6--1.7.sql b/gpcontrib/arenadata_toolkit/arenadata_toolkit--1.6--1.7.sql index 9038b32c8ef9..9bc6cf57b3ef 100644 --- a/gpcontrib/arenadata_toolkit/arenadata_toolkit--1.6--1.7.sql +++ b/gpcontrib/arenadata_toolkit/arenadata_toolkit--1.6--1.7.sql @@ -1,24 +1,30 @@ /* gpcontrib/arenadata_toolkit/arenadata_toolkit--1.6--1.7.sql */ -CREATE FUNCTION arenadata_toolkit.tracking_register_db(dbid OID default 0) +CREATE FUNCTION arenadata_toolkit.tracking_register_db(dbid OID DEFAULT 0) returns BOOL AS '$libdir/arenadata_toolkit', -'tracking_register_db' LANGUAGE C; +'tracking_register_db' LANGUAGE C EXECUTE ON MASTER; REVOKE ALL ON FUNCTION arenadata_toolkit.tracking_register_db(dbid OID) FROM public; -CREATE FUNCTION arenadata_toolkit.tracking_unregister_db(dbid OID default 0) +CREATE FUNCTION arenadata_toolkit.tracking_register_db_main(reg BOOL, dbid OID DEFAULT 0) returns BOOL AS '$libdir/arenadata_toolkit', -'tracking_unregister_db' LANGUAGE C; +'tracking_register_db_main' LANGUAGE C; + +REVOKE ALL ON FUNCTION arenadata_toolkit.tracking_register_db_main(reg BOOL, dbid OID) FROM public; + +CREATE FUNCTION arenadata_toolkit.tracking_unregister_db(dbid OID DEFAULT 0) +returns BOOL AS '$libdir/arenadata_toolkit', +'tracking_unregister_db' LANGUAGE C EXECUTE ON MASTER; REVOKE ALL ON FUNCTION arenadata_toolkit.tracking_unregister_db(dbid OID) FROM public; -CREATE FUNCTION arenadata_toolkit.tracking_register_schema(schemaname NAME, dbid OID default 0) +CREATE FUNCTION arenadata_toolkit.tracking_register_schema(schemaname NAME, dbid OID DEFAULT 0) returns BOOL AS '$libdir/arenadata_toolkit', 'tracking_register_schema' LANGUAGE C EXECUTE ON master; REVOKE ALL ON FUNCTION arenadata_toolkit.tracking_register_schema(schema NAME, dbid OID) FROM public; -CREATE FUNCTION arenadata_toolkit.tracking_unregister_schema(schema NAME, dbid OID default 0) +CREATE FUNCTION arenadata_toolkit.tracking_unregister_schema(schema NAME, dbid OID DEFAULT 0) returns BOOL AS '$libdir/arenadata_toolkit', 'tracking_unregister_schema' LANGUAGE C EXECUTE ON master; diff --git a/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_guc.out b/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_guc.out index e6b567f81c11..0800a8e7147d 100644 --- a/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_guc.out +++ b/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_guc.out @@ -93,32 +93,37 @@ CREATE DATABASE tracking3; CREATE DATABASE tracking4; CREATE DATABASE tracking5; CREATE DATABASE tracking6; -SELECT arenadata_toolkit.tracking_register_db(oid) FROM pg_database WHERE datname IN -('tracking1', 'tracking2', 'tracking3', 'tracking4', 'tracking5'); - tracking_register_db ----------------------- - t - t - t - t - t -(5 rows) - -SELECT arenadata_toolkit.tracking_register_db(oid) FROM pg_database WHERE datname IN -('tracking6'); -ERROR: [arenadata_toolkit] exceeded maximum number of tracked databases (track_files.c:694) -SELECT arenadata_toolkit.tracking_unregister_db(oid) FROM pg_database WHERE datname IN -('tracking1', 'tracking2', 'tracking3', 'tracking4', 'tracking5', 'tracking6'); - tracking_unregister_db ------------------------- - t - t - t - t - t - t -(6 rows) - +DO $$ +DECLARE + db_oid oid; +BEGIN + FOR db_oid IN + SELECT oid + FROM pg_database + WHERE datname IN ('tracking1', 'tracking2', 'tracking3', + 'tracking4', 'tracking5', 'tracking6') + LOOP + PERFORM arenadata_toolkit.tracking_register_db(db_oid); + END LOOP; +END; +$$; +ERROR: [arenadata_toolkit] exceeded maximum number of tracked databases (track_files.c:714) +CONTEXT: SQL statement "SELECT arenadata_toolkit.tracking_register_db(db_oid)" +PL/pgSQL function inline_code_block line 11 at PERFORM +DO $$ +DECLARE + db_oid oid; +BEGIN + FOR db_oid IN + SELECT oid + FROM pg_database + WHERE datname IN ('tracking1', 'tracking2', 'tracking3', + 'tracking4', 'tracking5', 'tracking6') + LOOP + PERFORM arenadata_toolkit.tracking_unregister_db(db_oid); + END LOOP; +END; +$$; DROP DATABASE IF EXISTS tracking2; DROP DATABASE IF EXISTS tracking3; DROP DATABASE IF EXISTS tracking4; diff --git a/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_test.out b/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_test.out index ef9b5a9c5b9c..8ca1ffd02bf0 100644 --- a/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_test.out +++ b/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_test.out @@ -122,6 +122,7 @@ SELECT objname, objtype, objstorage, objacl FROM toolkit_objects_info ORDER BY o tracking_is_initial_snapshot_triggered_segments | proc | - | tracking_is_segment_initialized | proc | - | {owner=X/owner} tracking_register_db | proc | - | {owner=X/owner} + tracking_register_db_main | proc | - | {owner=X/owner} tracking_register_schema | proc | - | {owner=X/owner} tracking_set_relkinds | proc | - | {owner=X/owner} tracking_set_relstorages | proc | - | {owner=X/owner} @@ -129,7 +130,7 @@ SELECT objname, objtype, objstorage, objacl FROM toolkit_objects_info ORDER BY o tracking_trigger_initial_snapshot | proc | - | tracking_unregister_db | proc | - | {owner=X/owner} tracking_unregister_schema | proc | - | {owner=X/owner} -(40 rows) +(41 rows) -- check that toolkit objects now depends on extension SELECT objname, objtype, extname, deptype FROM pg_depend d JOIN @@ -161,6 +162,7 @@ WHERE d.deptype = 'e' AND e.extname = 'arenadata_toolkit' ORDER BY objname; tracking_is_initial_snapshot_triggered_segments | proc | arenadata_toolkit | e tracking_is_segment_initialized | proc | arenadata_toolkit | e tracking_register_db | proc | arenadata_toolkit | e + tracking_register_db_main | proc | arenadata_toolkit | e tracking_register_schema | proc | arenadata_toolkit | e tracking_set_relkinds | proc | arenadata_toolkit | e tracking_set_relstorages | proc | arenadata_toolkit | e @@ -168,7 +170,7 @@ WHERE d.deptype = 'e' AND e.extname = 'arenadata_toolkit' ORDER BY objname; tracking_trigger_initial_snapshot | proc | arenadata_toolkit | e tracking_unregister_db | proc | arenadata_toolkit | e tracking_unregister_schema | proc | arenadata_toolkit | e -(30 rows) +(31 rows) DROP EXTENSION arenadata_toolkit; DROP SCHEMA arenadata_toolkit CASCADE; @@ -217,6 +219,7 @@ SELECT objname, objtype, objstorage, objacl FROM toolkit_objects_info ORDER BY o tracking_is_initial_snapshot_triggered_segments | proc | - | tracking_is_segment_initialized | proc | - | {owner=X/owner} tracking_register_db | proc | - | {owner=X/owner} + tracking_register_db_main | proc | - | {owner=X/owner} tracking_register_schema | proc | - | {owner=X/owner} tracking_set_relkinds | proc | - | {owner=X/owner} tracking_set_relstorages | proc | - | {owner=X/owner} @@ -224,7 +227,7 @@ SELECT objname, objtype, objstorage, objacl FROM toolkit_objects_info ORDER BY o tracking_trigger_initial_snapshot | proc | - | tracking_unregister_db | proc | - | {owner=X/owner} tracking_unregister_schema | proc | - | {owner=X/owner} -(37 rows) +(38 rows) -- check that toolkit objects now depends on extension SELECT objname, objtype, extname, deptype FROM pg_depend d JOIN @@ -256,6 +259,7 @@ WHERE d.deptype = 'e' AND e.extname = 'arenadata_toolkit' ORDER BY objname; tracking_is_initial_snapshot_triggered_segments | proc | arenadata_toolkit | e tracking_is_segment_initialized | proc | arenadata_toolkit | e tracking_register_db | proc | arenadata_toolkit | e + tracking_register_db_main | proc | arenadata_toolkit | e tracking_register_schema | proc | arenadata_toolkit | e tracking_set_relkinds | proc | arenadata_toolkit | e tracking_set_relstorages | proc | arenadata_toolkit | e @@ -263,7 +267,7 @@ WHERE d.deptype = 'e' AND e.extname = 'arenadata_toolkit' ORDER BY objname; tracking_trigger_initial_snapshot | proc | arenadata_toolkit | e tracking_unregister_db | proc | arenadata_toolkit | e tracking_unregister_schema | proc | arenadata_toolkit | e -(30 rows) +(31 rows) DROP EXTENSION arenadata_toolkit; DROP SCHEMA arenadata_toolkit CASCADE; diff --git a/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_guc.sql b/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_guc.sql index 32033fc2f71f..bcd357659bba 100644 --- a/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_guc.sql +++ b/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_guc.sql @@ -62,14 +62,36 @@ CREATE DATABASE tracking4; CREATE DATABASE tracking5; CREATE DATABASE tracking6; -SELECT arenadata_toolkit.tracking_register_db(oid) FROM pg_database WHERE datname IN -('tracking1', 'tracking2', 'tracking3', 'tracking4', 'tracking5'); - -SELECT arenadata_toolkit.tracking_register_db(oid) FROM pg_database WHERE datname IN -('tracking6'); - -SELECT arenadata_toolkit.tracking_unregister_db(oid) FROM pg_database WHERE datname IN -('tracking1', 'tracking2', 'tracking3', 'tracking4', 'tracking5', 'tracking6'); +DO $$ +DECLARE + db_oid oid; +BEGIN + FOR db_oid IN + SELECT oid + FROM pg_database + WHERE datname IN ('tracking1', 'tracking2', 'tracking3', + 'tracking4', 'tracking5', 'tracking6') + LOOP + PERFORM arenadata_toolkit.tracking_register_db(db_oid); + END LOOP; +END; +$$; + + +DO $$ +DECLARE + db_oid oid; +BEGIN + FOR db_oid IN + SELECT oid + FROM pg_database + WHERE datname IN ('tracking1', 'tracking2', 'tracking3', + 'tracking4', 'tracking5', 'tracking6') + LOOP + PERFORM arenadata_toolkit.tracking_unregister_db(db_oid); + END LOOP; +END; +$$; DROP DATABASE IF EXISTS tracking2; DROP DATABASE IF EXISTS tracking3; diff --git a/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_worker.c b/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_worker.c index b59b881f6b4e..863c19325142 100644 --- a/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_worker.c +++ b/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_worker.c @@ -139,7 +139,7 @@ track_dbs(List *tracked_dbs) bloom_set_bind(&tf_shared_state->bloom_set, trackedDb->dbid); bloom_set_trigger_bits(&tf_shared_state->bloom_set, trackedDb->dbid, - trackedDb->get_full_snapshot_on_recovery); + trackedDb->get_full_snapshot_on_recovery); } } @@ -152,9 +152,10 @@ worker_tracking_status_check() tracked_dbs = get_tracked_dbs(); - if (pg_atomic_unlocked_test_flag(&tf_shared_state->tracking_is_initialized) && list_length(tracked_dbs) > 0) + if (pg_atomic_unlocked_test_flag(&tf_shared_state->tracking_is_initialized)) { - track_dbs(tracked_dbs); + if (list_length(tracked_dbs) > 0) + track_dbs(tracked_dbs); pg_atomic_test_set_flag(&tf_shared_state->tracking_is_initialized); } @@ -216,6 +217,8 @@ arenadata_toolkit_main(Datum main_arg) if (current_timeout <= 0) { + worker_tracking_status_check(); + INSTR_TIME_SET_CURRENT(start_time_timeout); current_timeout = timeout; } @@ -246,7 +249,6 @@ arenadata_toolkit_main(Datum main_arg) INSTR_TIME_SET_CURRENT(current_time_timeout); INSTR_TIME_SUBTRACT(current_time_timeout, start_time_timeout); current_timeout = timeout - (long) INSTR_TIME_GET_MILLISEC(current_time_timeout); - if (current_timeout <= 0) worker_tracking_status_check(); } if (got_sigterm) diff --git a/gpcontrib/arenadata_toolkit/src/track_files.c b/gpcontrib/arenadata_toolkit/src/track_files.c index d7c04367a81f..f43bce288670 100644 --- a/gpcontrib/arenadata_toolkit/src/track_files.c +++ b/gpcontrib/arenadata_toolkit/src/track_files.c @@ -32,6 +32,7 @@ #include "tf_shmem.h" PG_FUNCTION_INFO_V1(tracking_register_db); +PG_FUNCTION_INFO_V1(tracking_register_db_main); PG_FUNCTION_INFO_V1(tracking_unregister_db); PG_FUNCTION_INFO_V1(tracking_set_snapshot_on_recovery); PG_FUNCTION_INFO_V1(tracking_register_schema); @@ -441,16 +442,6 @@ tracking_get_track_main(PG_FUNCTION_ARGS) funcctx = SRF_PERCALL_SETUP(); state = funcctx->user_fctx; - if (pg_atomic_unlocked_test_flag(&tf_shared_state->tracking_is_initialized)) - { - systable_endscan(state->scan); - heap_close(state->pg_class_rel, AccessShareLock); - state->scan = NULL; - state->pg_class_rel = NULL; - elog(LOG, "Nothing to return from segment %d due to uninitialized status of Bloom filter", GpIdentity.segindex); - SRF_RETURN_DONE(funcctx); - } - while (true) { Oid filenode; @@ -724,6 +715,66 @@ track_db(Oid dbid, bool reg) (errmsg("[arenadata_toolkit] exceeded maximum number of tracked databases"))); } +Datum +tracking_register_db_main(PG_FUNCTION_ARGS) +{ + Oid dbid = PG_GETARG_OID(1); + bool reg = PG_GETARG_BOOL(0); + + tf_check_shmem_error(); + + dbid = (dbid == InvalidOid) ? MyDatabaseId : dbid; + + elog(LOG, "[arenadata_toolkit] registering database %u for tracking", dbid); + + track_db(dbid, reg); + + PG_RETURN_BOOL(true); +} + +static bool +is_initialized() +{ + CdbPgResults cdb_pgresults = {NULL, 0}; + bool all_inited = true; + + CdbDispatchCommand("select * from arenadata_toolkit.tracking_is_segment_initialized()", 0, &cdb_pgresults); + + for (int i = 0; i < cdb_pgresults.numResults; i++) + { + struct pg_result *pgresult = cdb_pgresults.pg_results[i]; + + if (PQresultStatus(pgresult) != PGRES_TUPLES_OK) + { + cdbdisp_clearCdbPgResults(&cdb_pgresults); + ereport(ERROR, + (errmsg("Failed to check segments status"))); + } + else + { + int32 segindex = 0; + bool is_initialized = false; + + segindex = atoi(PQgetvalue(pgresult, 0, 0)); + is_initialized = strcmp(PQgetvalue(pgresult, 0, 1), "t") == 0; + + elog(LOG, "[arenadata_toolkit] tracking_register_db initialization check" + " segindex: %d, is_initialized: %d", segindex, is_initialized); + + if (!is_initialized) + { + all_inited = false; + break; + } + } + } + + if (cdb_pgresults.numResults > 0) + cdbdisp_clearCdbPgResults(&cdb_pgresults); + + return all_inited; +} + /* * Registers current (if dbid is 0) or specific database as tracked by arenadata_toolkit tables tracking. * Dispatches call to segments by itself. Binds a bloom filter to the registered database if possible. @@ -735,6 +786,11 @@ tracking_register_db(PG_FUNCTION_ARGS) tf_check_shmem_error(); + if (Gp_role == GP_ROLE_DISPATCH && !is_initialized()) + ereport(ERROR, + (errmsg("[arenadata_toolkit] Cannot register database before workers initialize tracking"), + errhint("Wait arenadata_toolkit.tracking_worker_naptime_sec and try again"))); + dbid = (dbid == InvalidOid) ? MyDatabaseId : dbid; elog(LOG, "[arenadata_toolkit] registering database %u for tracking", dbid); @@ -742,10 +798,12 @@ tracking_register_db(PG_FUNCTION_ARGS) if (Gp_role == GP_ROLE_DISPATCH) { - char *cmd = - psprintf("select arenadata_toolkit.tracking_register_db(%u)", dbid); + char *cmd = + psprintf("select arenadata_toolkit.tracking_register_db_main(true, %u)", dbid); CdbDispatchCommand(cmd, 0, NULL); + + pfree(cmd); } PG_RETURN_BOOL(true); @@ -761,6 +819,11 @@ tracking_unregister_db(PG_FUNCTION_ARGS) tf_check_shmem_error(); + if (Gp_role == GP_ROLE_DISPATCH && !is_initialized()) + ereport(ERROR, + (errmsg("[arenadata_toolkit] Cannot register database before workers initialize tracking"), + errhint("Wait arenadata_toolkit.tracking_worker_naptime_sec and try again"))); + dbid = (dbid == InvalidOid) ? MyDatabaseId : dbid; elog(LOG, "[arenadata_toolkit] unregistering database %u from tracking", dbid); @@ -768,10 +831,12 @@ tracking_unregister_db(PG_FUNCTION_ARGS) if (Gp_role == GP_ROLE_DISPATCH) { - char *cmd = - psprintf("select arenadata_toolkit.tracking_unregister_db(%u)", dbid); + char *cmd = + psprintf("select arenadata_toolkit.tracking_register_db_main(false, %u)", dbid); CdbDispatchCommand(cmd, 0, NULL); + + pfree(cmd); } PG_RETURN_BOOL(true); @@ -780,7 +845,7 @@ tracking_unregister_db(PG_FUNCTION_ARGS) Datum tracking_set_snapshot_on_recovery(PG_FUNCTION_ARGS) { - bool set = PG_GETARG_OID(0); + bool set = PG_GETARG_BOOL(0); Oid dbid = PG_GETARG_OID(1); tf_check_shmem_error(); From 0cdd4313081c6012ea1461745c18162baf1616d4 Mon Sep 17 00:00:00 2001 From: Alexander Kondakov Date: Mon, 28 Oct 2024 04:26:36 +0300 Subject: [PATCH 24/69] fix segfault --- gpcontrib/arenadata_toolkit/src/track_files.c | 45 ++++--------------- 1 file changed, 8 insertions(+), 37 deletions(-) diff --git a/gpcontrib/arenadata_toolkit/src/track_files.c b/gpcontrib/arenadata_toolkit/src/track_files.c index f43bce288670..632919efe7bb 100644 --- a/gpcontrib/arenadata_toolkit/src/track_files.c +++ b/gpcontrib/arenadata_toolkit/src/track_files.c @@ -76,7 +76,7 @@ typedef struct Oid *typIOParams; } tf_get_func_state_t; -tf_get_global_state_t tf_get_global_state = {NULL, NULL, NIL, NULL, NIL, NIL, NIL}; +tf_get_global_state_t tf_get_global_state = {0}; static inline void tf_check_shmem_error(void) @@ -109,42 +109,13 @@ xact_end_get_callback(XactEvent event, void *arg) drops_track_move_undo(tf_get_global_state.drops, MyDatabaseId); } - if (tf_get_global_state.bloom) - { - pfree(tf_get_global_state.bloom); - tf_get_global_state.bloom = NULL; - } - - if (tf_get_global_state.rollback_bloom) - { - pfree(tf_get_global_state.rollback_bloom); - tf_get_global_state.rollback_bloom = NULL; - } - - if (tf_get_global_state.drops != NIL) - { - pfree(tf_get_global_state.drops); - tf_get_global_state.drops = NIL; - tf_get_global_state.next_drop = NULL; - } - - if (tf_get_global_state.relkinds != NIL) - { - pfree(tf_get_global_state.relkinds); - tf_get_global_state.relkinds = NIL; - } - - if (tf_get_global_state.relstorages != NIL) - { - pfree(tf_get_global_state.relstorages); - tf_get_global_state.relstorages = NIL; - } - - if (tf_get_global_state.schema_oids != NIL) - { - pfree(tf_get_global_state.schema_oids); - tf_get_global_state.schema_oids = NIL; - } + tf_get_global_state.bloom = NULL; + tf_get_global_state.rollback_bloom = NULL; + tf_get_global_state.drops = NIL; + tf_get_global_state.next_drop = NULL; + tf_get_global_state.relkinds = NIL; + tf_get_global_state.relstorages = NIL; + tf_get_global_state.schema_oids = NIL; } From d10812b98d2917441fdde479427b6519541a0127 Mon Sep 17 00:00:00 2001 From: Alexander Kondakov Date: Mon, 28 Oct 2024 05:24:24 +0300 Subject: [PATCH 25/69] Move to bits --- gpcontrib/arenadata_toolkit/src/track_files.c | 146 +++++++++++------- 1 file changed, 88 insertions(+), 58 deletions(-) diff --git a/gpcontrib/arenadata_toolkit/src/track_files.c b/gpcontrib/arenadata_toolkit/src/track_files.c index 632919efe7bb..2c7fb467f085 100644 --- a/gpcontrib/arenadata_toolkit/src/track_files.c +++ b/gpcontrib/arenadata_toolkit/src/track_files.c @@ -57,8 +57,8 @@ typedef struct bloom_t *rollback_bloom; List *drops; ListCell *next_drop; - List *relkinds; - List *relstorages; + uint64 relkinds; + uint64 relstorages; List *schema_oids; } tf_get_global_state_t; @@ -113,8 +113,8 @@ xact_end_get_callback(XactEvent event, void *arg) tf_get_global_state.rollback_bloom = NULL; tf_get_global_state.drops = NIL; tf_get_global_state.next_drop = NULL; - tf_get_global_state.relkinds = NIL; - tf_get_global_state.relstorages = NIL; + tf_get_global_state.relkinds = 0; + tf_get_global_state.relstorages = 0; tf_get_global_state.schema_oids = NIL; } @@ -148,6 +148,41 @@ split_string_to_list(const char *input) return result; } +static uint64 +list_to_bits(const char *input) +{ + char *input_copy; + char *token; + uint64 bits = 0; + + if (input == NULL) + return 0; + + input_copy = pstrdup(input); + + token = strtok(input_copy, ","); + + while (token != NULL) + { + if (*token != '\0') + { + char c = *token; + + if (c >= 'a' && c <= 'z') + bits |= (1UL << (c - 'a')); + else + bits |= (1UL << (26 + (c - 'A'))); + } + + token = strtok(NULL, ","); + } + + pfree(input_copy); + + return bits; +} + + static void get_filters_from_guc() { @@ -218,13 +253,13 @@ get_filters_from_guc() else schema_names = split_string_to_list(tracked_schemas); if (current_relstorages) - tf_get_global_state.relstorages = split_string_to_list(current_relstorages); + tf_get_global_state.relstorages = list_to_bits(current_relstorages); else - tf_get_global_state.relstorages = split_string_to_list(tracked_rel_storages); + tf_get_global_state.relstorages = list_to_bits(tracked_rel_storages); if (current_relkinds) - tf_get_global_state.relkinds = split_string_to_list(current_relkinds); + tf_get_global_state.relkinds = list_to_bits(current_relkinds); else - tf_get_global_state.relkinds = split_string_to_list(tracked_rel_kinds); + tf_get_global_state.relkinds = list_to_bits(tracked_rel_kinds); foreach(lc, schema_names) { @@ -267,41 +302,12 @@ schema_is_tracked(Oid schema) } static bool -relkind_is_tracked(char relkind) +kind_is_tracked(char type, uint64 allowed_kinds) { - ListCell *lc; - - if (tf_get_global_state.relkinds == NIL) - return false; - - foreach(lc, tf_get_global_state.relkinds) - { - char *tracked_relkind = (char *)lfirst(lc); - - if (tracked_relkind != NULL && *tracked_relkind == relkind) - return true; - } - - return false; -} - -static bool -relstorage_is_tracked(char relstorage) -{ - ListCell *lc; - - if (tf_get_global_state.relstorages == NIL) - return false; - - foreach(lc, tf_get_global_state.relstorages) - { - char *tracked_relstorage = (char *)lfirst(lc); - - if (tracked_relstorage != NULL && *tracked_relstorage == relstorage) - return true; - } - - return false; + if (type >= 'a' && type <= 'z') + return (allowed_kinds & (1UL << (type - 'a'))) != 0; + else + return (allowed_kinds & (1UL << (26 + (type - 'A')))) != 0; } /* @@ -376,8 +382,8 @@ tracking_get_track_main(PG_FUNCTION_ARGS) if (tf_get_global_state.schema_oids == NIL) get_filters_from_guc(); - if (tf_get_global_state.relstorages == NIL || - tf_get_global_state.relkinds == NIL || + if (tf_get_global_state.relstorages == 0 || + tf_get_global_state.relkinds == 0 || tf_get_global_state.schema_oids == NIL) ereport(ERROR, (errmsg("Cannot get tracking configuration (schemas, relkinds, reltorage) for database %u", MyDatabaseId))); @@ -437,22 +443,22 @@ tracking_get_track_main(PG_FUNCTION_ARGS) break; } - datums[6] = heap_getattr(pg_class_tuple, Anum_pg_class_relnamespace, RelationGetDescr(state->pg_class_rel), &nulls[6]); - relnamespace = DatumGetObjectId(datums[6]); - - if (!schema_is_tracked(relnamespace)) - continue; - datums[7] = heap_getattr(pg_class_tuple, Anum_pg_class_relkind, RelationGetDescr(state->pg_class_rel), &nulls[7]); relkind = DatumGetChar(datums[7]); - if (!relkind_is_tracked(relkind)) + if (!kind_is_tracked(relkind, tf_get_global_state.relkinds)) continue; datums[8] = heap_getattr(pg_class_tuple, Anum_pg_class_relstorage, RelationGetDescr(state->pg_class_rel), &nulls[8]); relstorage = DatumGetChar(datums[8]); - if (!relstorage_is_tracked(relstorage)) + if (!kind_is_tracked(relstorage, tf_get_global_state.relstorages)) + continue; + + datums[6] = heap_getattr(pg_class_tuple, Anum_pg_class_relnamespace, RelationGetDescr(state->pg_class_rel), &nulls[6]); + relnamespace = DatumGetObjectId(datums[6]); + + if (!schema_is_tracked(relnamespace)) continue; datums[0] = ObjectIdGetDatum(HeapTupleGetOid(pg_class_tuple)); @@ -1069,10 +1075,24 @@ tracking_unregister_schema(PG_FUNCTION_ARGS) static bool is_valid_relkind(char relkind) { - return (relkind == 'r' || relkind == 'i' || relkind == 'S' || - relkind == 't' || relkind == 'v' || relkind == 'c' || - relkind == 'f' || relkind == 'u' || relkind == 'm' || - relkind == 'o' || relkind == 'b' || relkind == 'M'); + switch (relkind) + { + case 'r': + case 'i': + case 'S': + case 't': + case 'v': + case 'c': + case 'f': + case 'u': + case 'm': + case 'o': + case 'b': + case 'M': + return true; + default: + return false; + } } Datum @@ -1159,8 +1179,18 @@ tracking_set_relkinds(PG_FUNCTION_ARGS) static bool is_valid_relstorage(char relstorage) { - return (relstorage == 'h' || relstorage == 'a' || relstorage == 'c' || - relstorage == 'x' || relstorage == 'v' || relstorage == 'f'); + switch (relstorage) + { + case 'h': + case 'a': + case 'c': + case 'x': + case 'v': + case 'f': + return true; + default: + return false; + } } Datum From f7c8c6d7c85c75dd5d3dd9f8d43e1f48e7618182 Mon Sep 17 00:00:00 2001 From: Alexander Kondakov Date: Mon, 28 Oct 2024 06:22:08 +0300 Subject: [PATCH 26/69] A bunch of comments --- gpcontrib/arenadata_toolkit/src/bloom.c | 9 ++++++ gpcontrib/arenadata_toolkit/src/dbsize.c | 2 +- .../arenadata_toolkit/src/include/bloom_set.h | 2 +- .../arenadata_toolkit/src/include/hashimpl.h | 11 ++++++- .../arenadata_toolkit/src/include/tf_shmem.h | 2 +- gpcontrib/arenadata_toolkit/src/tf_shmem.c | 31 +++++++++++++++++-- gpcontrib/arenadata_toolkit/src/track_files.c | 21 ++++++------- 7 files changed, 61 insertions(+), 17 deletions(-) diff --git a/gpcontrib/arenadata_toolkit/src/bloom.c b/gpcontrib/arenadata_toolkit/src/bloom.c index ddcf2c787e1e..6c285c0579d5 100644 --- a/gpcontrib/arenadata_toolkit/src/bloom.c +++ b/gpcontrib/arenadata_toolkit/src/bloom.c @@ -14,6 +14,15 @@ mod_m(uint32 val, uint64 m) return val & (m - 1); } +/* + * Generate k positions in bloom filter for relfilenode oid. + * + * The enhanced double hashing is used (Dillinger P, Manolios P. Bloom Filters + * in Probabilistic Verification. 2004.) to produce k positions from 2 independent + * hashes. + * + * out_hashes is out parameter which is filled with k bit indices. + */ static void tracking_hashes(Oid node, uint32 bloom_size, uint32 *out_hashes) { diff --git a/gpcontrib/arenadata_toolkit/src/dbsize.c b/gpcontrib/arenadata_toolkit/src/dbsize.c index 215df71613a7..ba9e6dc2b491 100644 --- a/gpcontrib/arenadata_toolkit/src/dbsize.c +++ b/gpcontrib/arenadata_toolkit/src/dbsize.c @@ -426,7 +426,7 @@ dbsize_calc_size(Form_pg_class pg_class_data) rel.rd_rel = pg_class_data; /* - * Initialize BackendIf field of RelationData. + * Initialize BackendId field of RelationData. */ switch (rel.rd_rel->relpersistence) { diff --git a/gpcontrib/arenadata_toolkit/src/include/bloom_set.h b/gpcontrib/arenadata_toolkit/src/include/bloom_set.h index c2585ef41ee6..e2fde9d7bbad 100644 --- a/gpcontrib/arenadata_toolkit/src/include/bloom_set.h +++ b/gpcontrib/arenadata_toolkit/src/include/bloom_set.h @@ -20,7 +20,7 @@ typedef struct { uint8 bloom_count; /* count of bloom_entry_t in bloom_entries */ uint32 bloom_size; /* size of bloom filter */ - char bloom_entries[FLEXIBLE_ARRAY_MEMBER]; /* array of + char bloom_entries[FLEXIBLE_ARRAY_MEMBER]; /* array of * bloom_entry_t */ } bloom_set_t; diff --git a/gpcontrib/arenadata_toolkit/src/include/hashimpl.h b/gpcontrib/arenadata_toolkit/src/include/hashimpl.h index f862398b61fe..f154edf7815d 100644 --- a/gpcontrib/arenadata_toolkit/src/include/hashimpl.h +++ b/gpcontrib/arenadata_toolkit/src/include/hashimpl.h @@ -5,6 +5,16 @@ #define _wyrot(x) (((x)>>32)|((x)<<32)) +/* + * This is an implementation of the wyhash algorithm - a modern, + * fast non-cryptographic hash function designed by Wang Yi. The code + * is unlicensed, therefore, we can use it for the Bloom filter without doubts. + * _wymum: Multiplication-based mixing function. + * _wymix: Combines multiplication mixing with XOR operations. + * wyhash: Main hash function that processes a 32-bit key with a seed. + * This algorithm has better properties than Postgres's Jenkins hash and + * provides good avalanche effect. + */ static inline void _wymum(uint64 *A, uint64 *B){ #if(SIZEOF_VOID_P < 8) uint64 hh=(*A>>32)*(*B>>32), @@ -26,7 +36,6 @@ static inline void _wymum(uint64 *A, uint64 *B){ #endif } -//multiply and xor mix function, aka MUM static inline uint64 _wymix(uint64 A, uint64 B){ _wymum(&A,&B); return A^B; } static inline uint64 wyhash(uint32 key, uint64 seed){ diff --git a/gpcontrib/arenadata_toolkit/src/include/tf_shmem.h b/gpcontrib/arenadata_toolkit/src/include/tf_shmem.h index b528a35dfc1e..b357cda5cb6b 100644 --- a/gpcontrib/arenadata_toolkit/src/include/tf_shmem.h +++ b/gpcontrib/arenadata_toolkit/src/include/tf_shmem.h @@ -30,7 +30,7 @@ extern int bloom_hash_num; void tf_shmem_init(void); void tf_shmem_deinit(void); -LWLock * LWLockAcquireEntry(Oid dbid, LWLockMode mode); +LWLock * LWLockAcquireEntry(Oid dbid, LWLockMode mode); void LWLockBindEntry(Oid dbid); void LWLockUnbindEntry(Oid dbid); diff --git a/gpcontrib/arenadata_toolkit/src/tf_shmem.c b/gpcontrib/arenadata_toolkit/src/tf_shmem.c index ece5a4b5e495..43c7228f791c 100644 --- a/gpcontrib/arenadata_toolkit/src/tf_shmem.c +++ b/gpcontrib/arenadata_toolkit/src/tf_shmem.c @@ -16,6 +16,9 @@ tf_entry_lock_t bloom_locks[MAX_DB_TRACK_COUNT]; uint64 bloom_hash_seed; int bloom_hash_num; +/* + * Separate initialization of LWLocks; + */ static void init_lwlocks(void) { @@ -29,6 +32,10 @@ init_lwlocks(void) } } +/* + * Calculate the closes power of 2 for given + * length. + */ static int my_bloom_power(uint64 target_bitset_bits) { @@ -43,6 +50,15 @@ my_bloom_power(uint64 target_bitset_bits) return bloom_power; } +/* + * In order to decrease false positive ratio and make + * Bloom filter close to theoretical form the calculation + * of k hashes is suggested. The value k is estimated + * as optimal value minimizing fp ratio. The bloom_size + * is also adjusted to power of 2. + * Additionally, the seed for hash calculation is initialized + * here. + */ static void init_bloom_invariants() { @@ -99,7 +115,10 @@ tf_shmem_hook(void) void tf_shmem_init() { - /* don't forget to add additional locks */ + /* + * tf_state_lock and bloom_set_lock locks + * plus one lock for each db entry. + */ RequestAddinLWLocks(2 + db_track_count); RequestAddinShmemSpace(tf_shmem_calc_size()); @@ -113,6 +132,9 @@ tf_shmem_deinit(void) shmem_startup_hook = next_shmem_startup_hook; } +/* + * Acquire lock corresponding to dbid in bloom_set. + */ LWLock * LWLockAcquireEntry(Oid dbid, LWLockMode mode) { @@ -131,7 +153,9 @@ LWLockAcquireEntry(Oid dbid, LWLockMode mode) return NULL; } - +/* + * Bind LWLock to tracked dbid. + */ void LWLockBindEntry(Oid dbid) { @@ -151,6 +175,9 @@ LWLockBindEntry(Oid dbid) LWLockRelease(tf_state_lock); } +/* + * Unbind LWLock from tracked dbid. + */ void LWLockUnbindEntry(Oid dbid) { diff --git a/gpcontrib/arenadata_toolkit/src/track_files.c b/gpcontrib/arenadata_toolkit/src/track_files.c index 2c7fb467f085..eaad6a6a32e9 100644 --- a/gpcontrib/arenadata_toolkit/src/track_files.c +++ b/gpcontrib/arenadata_toolkit/src/track_files.c @@ -47,32 +47,32 @@ PG_FUNCTION_INFO_V1(tracking_get_track_main); typedef struct { - Relation pg_class_rel; + Relation pg_class_rel; /*pg_class relation*/ SysScanDesc scan; } tf_main_func_state_t; typedef struct { - bloom_t *bloom; - bloom_t *rollback_bloom; - List *drops; + bloom_t *bloom; /* local copy of shared bloom */ + bloom_t *rollback_bloom; /* bloom for rollback in case of sequential track acquisition*/ + List *drops; /* drop list for current db */ ListCell *next_drop; - uint64 relkinds; - uint64 relstorages; - List *schema_oids; + uint64 relkinds; /* tracking relkinds */ + uint64 relstorages; /* tracking relstorages */ + List *schema_oids; /*tracking schemas */ } tf_get_global_state_t; typedef struct { - CdbPgResults cdb_results; + CdbPgResults cdb_results; /*results of CdbDispatch*/ int current_result; int current_row; - SPITupleTable *entry_result; + SPITupleTable *entry_result; /*results from SPI queries */ uint64 entry_processed; int entry_current_row; - FmgrInfo *inputFuncInfos; + FmgrInfo *inputFuncInfos; /* FuncInfos for string to Datum values transformation */ Oid *typIOParams; } tf_get_func_state_t; @@ -182,7 +182,6 @@ list_to_bits(const char *input) return bits; } - static void get_filters_from_guc() { From e827cd6dbdae8630530418cf39fb0be8b8178822 Mon Sep 17 00:00:00 2001 From: Alexander Kondakov Date: Mon, 28 Oct 2024 08:50:44 +0300 Subject: [PATCH 27/69] Remove hard code --- gpcontrib/arenadata_toolkit/src/track_files.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/gpcontrib/arenadata_toolkit/src/track_files.c b/gpcontrib/arenadata_toolkit/src/track_files.c index eaad6a6a32e9..219048e962e9 100644 --- a/gpcontrib/arenadata_toolkit/src/track_files.c +++ b/gpcontrib/arenadata_toolkit/src/track_files.c @@ -45,6 +45,8 @@ PG_FUNCTION_INFO_V1(tracking_is_initial_snapshot_triggered); PG_FUNCTION_INFO_V1(tracking_get_track); PG_FUNCTION_INFO_V1(tracking_get_track_main); +#define GET_TRACK_TUPDESC_LEN 9 + typedef struct { Relation pg_class_rel; /*pg_class relation*/ @@ -318,8 +320,8 @@ tracking_get_track_main(PG_FUNCTION_ARGS) FuncCallContext *funcctx; tf_main_func_state_t *state; HeapTuple result; - Datum datums[9]; - bool nulls[9] = {0}; + Datum datums[GET_TRACK_TUPDESC_LEN]; + bool nulls[GET_TRACK_TUPDESC_LEN] = {0}; tf_check_shmem_error(); @@ -391,7 +393,7 @@ tracking_get_track_main(PG_FUNCTION_ARGS) oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); - funcctx->tuple_desc = CreateTemplateTupleDesc(9, false); + funcctx->tuple_desc = CreateTemplateTupleDesc(GET_TRACK_TUPDESC_LEN, false); TupleDescInitEntry(funcctx->tuple_desc, (AttrNumber)1, "relid", OIDOID, -1, 0); TupleDescInitEntry(funcctx->tuple_desc, (AttrNumber)2, "name", NAMEOID, -1, 0); TupleDescInitEntry(funcctx->tuple_desc, (AttrNumber)3, "relfilenode", OIDOID, -1, 0); @@ -524,8 +526,8 @@ tracking_get_track(PG_FUNCTION_ARGS) FuncCallContext *funcctx; tf_get_func_state_t *state; HeapTuple result; - Datum values[9]; - bool nulls[9] = {0}; + Datum values[GET_TRACK_TUPDESC_LEN]; + bool nulls[GET_TRACK_TUPDESC_LEN] = {0}; tf_check_shmem_error(); From 42e1b2d3b186e67179b3c19afd16ec24cac50e41 Mon Sep 17 00:00:00 2001 From: Alexander Kondakov Date: Mon, 28 Oct 2024 20:02:04 +0300 Subject: [PATCH 28/69] Comments and pgindent --- .../src/arenadata_toolkit_guc.c | 19 ++- .../src/arenadata_toolkit_worker.c | 63 ++++----- gpcontrib/arenadata_toolkit/src/bloom.c | 46 ++++++- gpcontrib/arenadata_toolkit/src/bloom_set.c | 30 +++-- gpcontrib/arenadata_toolkit/src/drops_track.c | 18 ++- .../arenadata_toolkit/src/include/bloom.h | 8 +- .../arenadata_toolkit/src/include/bloom_set.h | 18 +-- .../arenadata_toolkit/src/include/hashimpl.h | 101 +++++++++----- .../arenadata_toolkit/src/include/tf_shmem.h | 26 +++- gpcontrib/arenadata_toolkit/src/tf_shmem.c | 30 ++++- gpcontrib/arenadata_toolkit/src/track_files.c | 127 ++++++++++-------- 11 files changed, 301 insertions(+), 185 deletions(-) diff --git a/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_guc.c b/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_guc.c index 002b0ef317a3..edaa770e8fe2 100644 --- a/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_guc.c +++ b/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_guc.c @@ -17,6 +17,10 @@ char *tracked_rel_storages = DEFAULT_TRACKED_REL_STORAGES; char *tracked_rel_kinds = DEFAULT_TRACKED_REL_KINDS; int tracking_worker_naptime_sec = DEFAULT_NAPTIME_SEC; +/* + * Variables controlling GUC setting. Only extension functions are allowed + * to set GUC during NormalProcessing mode. + */ static bool is_tracked_unlocked = false; static bool is_get_full_snapshot_on_recovery_unlocked = false; static bool is_schemas_unlocked = false; @@ -81,7 +85,8 @@ check_guc(bool *toolkit_guc, GucSource source, bool *manual) static bool check_tracked(bool *newval, void **extra, GucSource source) { - bool manual = false; + bool manual = false; + if (check_guc(&is_tracked_unlocked, source, &manual)) return true; @@ -96,7 +101,8 @@ check_tracked(bool *newval, void **extra, GucSource source) static bool check_get_full_snapshot_on_recovery(bool *newval, void **extra, GucSource source) { - bool manual = false; + bool manual = false; + if (check_guc(&is_get_full_snapshot_on_recovery_unlocked, source, &manual)) return true; @@ -111,7 +117,8 @@ check_get_full_snapshot_on_recovery(bool *newval, void **extra, GucSource source static bool check_relkinds(char **newval, void **extra, GucSource source) { - bool manual = false; + bool manual = false; + if (check_guc(&is_relkinds_unlocked, source, &manual)) return true; @@ -126,7 +133,8 @@ check_relkinds(char **newval, void **extra, GucSource source) static bool check_schemas(char **newval, void **extra, GucSource source) { - bool manual = false; + bool manual = false; + if (check_guc(&is_schemas_unlocked, source, &manual)) return true; @@ -141,7 +149,8 @@ check_schemas(char **newval, void **extra, GucSource source) static bool check_relstorages(char **newval, void **extra, GucSource source) { - bool manual = false; + bool manual = false; + if (check_guc(&is_relstorages_unlocked, source, &manual)) return true; diff --git a/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_worker.c b/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_worker.c index 863c19325142..f6ab8cb70a92 100644 --- a/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_worker.c +++ b/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_worker.c @@ -36,7 +36,7 @@ typedef struct static volatile sig_atomic_t got_sighup = false; static volatile sig_atomic_t got_sigterm = false; -void arenadata_toolkit_main(Datum); +void arenadata_toolkit_main(Datum); /* * Signal handler for SIGTERM @@ -72,24 +72,24 @@ tracking_sighup(SIGNAL_ARGS) errno = save_errno; } -static List* +static List * get_tracked_dbs() { StringInfoData query; - List *tracked_dbs = NIL; + List *tracked_dbs = NIL; tracked_db_t *trackedDb; MemoryContext topcontext = CurrentMemoryContext; initStringInfo(&query); appendStringInfo(&query, SQL( - WITH _ AS ( - WITH _ AS ( - SELECT "setdatabase", regexp_split_to_array(UNNEST("setconfig"), '=') AS "setconfig" FROM "pg_db_role_setting" WHERE "setrole"=0 - ) SELECT "setdatabase", json_object(array_agg("setconfig"[1]), array_agg("setconfig"[2])) AS "setconfig" FROM _ GROUP BY 1 - ) select "setdatabase", - ("setconfig"->>'arenadata_toolkit.tracking_snapshot_on_recovery')::bool as "snapshot" FROM _ WHERE - ("setconfig"->>'arenadata_toolkit.tracking_is_db_tracked')::bool IS TRUE - )); + WITH _ AS( + WITH _ AS( + SELECT "setdatabase", regexp_split_to_array(UNNEST("setconfig"), '=') AS "setconfig" FROM "pg_db_role_setting" WHERE "setrole" = 0 + ) SELECT "setdatabase", json_object(array_agg("setconfig"[1]), array_agg("setconfig"[2])) AS "setconfig" FROM _ GROUP BY 1 + ) select "setdatabase", +("setconfig"->> 'arenadata_toolkit.tracking_snapshot_on_recovery'): :bool as "snapshot" FROM _ WHERE +("setconfig"->> 'arenadata_toolkit.tracking_is_db_tracked'): :bool IS TRUE + )); if (SPI_connect() != SPI_OK_CONNECT) ereport(ERROR, (errmsg("SPI_connect failed"))); @@ -101,11 +101,11 @@ get_tracked_dbs() for (uint64 row = 0; row < SPI_processed; row++) { - HeapTuple val = SPI_tuptable->vals[row]; - TupleDesc tupdesc = SPI_tuptable->tupdesc; - bool isnull = false; - Oid dbid = DatumGetObjectId(SPI_getbinval(val, tupdesc, SPI_fnumber(tupdesc, "setdatabase"), &isnull)); - bool get_snapshot_on_recovery = DatumGetBool(SPI_getbinval(val, tupdesc, SPI_fnumber(tupdesc, "snapshot"), &isnull)); + HeapTuple val = SPI_tuptable->vals[row]; + TupleDesc tupdesc = SPI_tuptable->tupdesc; + bool isnull = false; + Oid dbid = DatumGetObjectId(SPI_getbinval(val, tupdesc, SPI_fnumber(tupdesc, "setdatabase"), &isnull)); + bool get_snapshot_on_recovery = DatumGetBool(SPI_getbinval(val, tupdesc, SPI_fnumber(tupdesc, "snapshot"), &isnull)); if (isnull) get_snapshot_on_recovery = get_full_snapshot_on_recovery; @@ -139,7 +139,7 @@ track_dbs(List *tracked_dbs) bloom_set_bind(&tf_shared_state->bloom_set, trackedDb->dbid); bloom_set_trigger_bits(&tf_shared_state->bloom_set, trackedDb->dbid, - trackedDb->get_full_snapshot_on_recovery); + trackedDb->get_full_snapshot_on_recovery); } } @@ -160,28 +160,19 @@ worker_tracking_status_check() pg_atomic_test_set_flag(&tf_shared_state->tracking_is_initialized); } - /* - * Here is quite a dump check, which imitates consistency validation. - * Written as an example of segment erroneous tracking status. - */ - if (list_length(tracked_dbs) != bloom_set_count(&tf_shared_state->bloom_set)) - { - if (pg_atomic_unlocked_test_flag(&tf_shared_state->tracking_error)) - pg_atomic_test_set_flag(&tf_shared_state->tracking_error); - } - if (tracked_dbs) list_free_deep(tracked_dbs); CommitTransactionCommand(); } -/* scan pg_db_role_setting, find all databases, bind blooms if necessary */ +/* Main worker cycle. Scans pg_db_role_setting and binds tracked dbids to + * corresponding Bloom filter. Lives on segments. */ void arenadata_toolkit_main(Datum main_arg) { - instr_time current_time_timeout; - instr_time start_time_timeout; - long current_timeout = -1; + instr_time current_time_timeout; + instr_time start_time_timeout; + long current_timeout = -1; elog(LOG, "[arenadata toolkit] Starting background worker"); @@ -189,7 +180,7 @@ arenadata_toolkit_main(Datum main_arg) * The worker shouldn't exist when the master boots in utility mode. * Otherwise BackgroundWorkerInitializeConnection will explode with FATAL. */ - if(IS_QUERY_DISPATCHER() && Gp_role != GP_ROLE_DISPATCH) + if (IS_QUERY_DISPATCHER() && Gp_role != GP_ROLE_DISPATCH) { proc_exit(0); } @@ -213,7 +204,7 @@ arenadata_toolkit_main(Datum main_arg) while (!got_sigterm) { int rc; - long timeout = tracking_worker_naptime_sec * 1000; + long timeout = tracking_worker_naptime_sec * 1000; if (current_timeout <= 0) { @@ -224,7 +215,7 @@ arenadata_toolkit_main(Datum main_arg) } rc = WaitLatch(&MyProc->procLatch, WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, - current_timeout); + current_timeout); if (rc & WL_LATCH_SET) { @@ -246,6 +237,10 @@ arenadata_toolkit_main(Datum main_arg) ProcessConfigFile(PGC_SIGHUP); } + /* + * We can wake up during WaitLatch very often, thus, timeout is + * calculated manually. + */ INSTR_TIME_SET_CURRENT(current_time_timeout); INSTR_TIME_SUBTRACT(current_time_timeout, start_time_timeout); current_timeout = timeout - (long) INSTR_TIME_GET_MILLISEC(current_time_timeout); diff --git a/gpcontrib/arenadata_toolkit/src/bloom.c b/gpcontrib/arenadata_toolkit/src/bloom.c index 6c285c0579d5..ad025bdbb7a1 100644 --- a/gpcontrib/arenadata_toolkit/src/bloom.c +++ b/gpcontrib/arenadata_toolkit/src/bloom.c @@ -15,13 +15,25 @@ mod_m(uint32 val, uint64 m) } /* - * Generate k positions in bloom filter for relfilenode oid. + * Generate k independent bit positions in a Bloom filter. * - * The enhanced double hashing is used (Dillinger P, Manolios P. Bloom Filters - * in Probabilistic Verification. 2004.) to produce k positions from 2 independent - * hashes. + * Implements Enhanced Double Hashing technique (Dillinger & Manolios, 2004) which + * generates k hash values using only 2 independent hash functions. This approach + * provides comparable performance to using k independent hash functions while + * being more computationally efficient. * - * out_hashes is out parameter which is filled with k bit indices. + * Algorithm: + * 1. Generate two independent 32-bit hashes (x, y) from a 64-bit wyhash + * 2. Apply modulo operation to fit within filter size + * 3. Generate subsequent indices using linear combination: x = (x + y) mod m + * y = (y + i) mod m + * + * Parameters: + * node - relation file node OID to hash + * bloom_size - size of Bloom filter in bytes + * out_hashes - output array to store k bit positions + * + * Reference: GPDB7 codebase. */ static void tracking_hashes(Oid node, uint32 bloom_size, uint32 *out_hashes) @@ -52,6 +64,15 @@ tracking_hashes(Oid node, uint32 bloom_size, uint32 *out_hashes) } } +/* +* Test membership of an element in Bloom filter +* +* Implements standard Bloom filter membership test by checking k different bit +* positions. The function provides probabilistic set membership with controllable +* false positive rate. +* +* Returns true if element might be in set, false if definitely not in set. +*/ bool bloom_isset(bloom_t * bloom, Oid relnode) { @@ -70,6 +91,17 @@ bloom_isset(bloom_t * bloom, Oid relnode) return true; } +/* + * Insert an element into Bloom filter + * + * Sets k bits in the Bloom filter's bit array corresponding to the k hash + * values generated for the input element. This operation is irreversible - + * elements cannot be removed without rebuilding the entire filter. + * + * Parameters: + * bloom - pointer to Bloom filter structure + * relnode - relation file node OID to insert + */ void bloom_set(bloom_t * bloom, Oid relnode) { @@ -83,7 +115,7 @@ bloom_set(bloom_t * bloom, Oid relnode) } void -bloom_init(const uint32 bloom_size, bloom_t *bloom) +bloom_init(const uint32 bloom_size, bloom_t * bloom) { bloom->size = bloom_size; bloom_clear(bloom); @@ -113,7 +145,7 @@ bloom_merge(bloom_t * dst, bloom_t * src) } void -bloom_copy(bloom_t * src, bloom_t *dest) +bloom_copy(bloom_t * src, bloom_t * dest) { dest->size = src->size; memcpy(dest->map, src->map, src->size); diff --git a/gpcontrib/arenadata_toolkit/src/bloom_set.c b/gpcontrib/arenadata_toolkit/src/bloom_set.c index 7893084007fc..19158913228b 100644 --- a/gpcontrib/arenadata_toolkit/src/bloom_set.c +++ b/gpcontrib/arenadata_toolkit/src/bloom_set.c @@ -9,21 +9,21 @@ #define BLOOM_ENTRY_GET(set, i) (void *)(set->bloom_entries + i * FULL_BLOOM_ENTRY_SIZE(set->bloom_size)); static void -bloom_entry_init(const uint32_t bloom_size, bloom_entry_t *bloom_entry) +bloom_entry_init(const uint32_t bloom_size, bloom_entry_t * bloom_entry) { bloom_entry->dbid = InvalidOid; bloom_init(bloom_size, &bloom_entry->bloom); } void -bloom_set_init(const uint32_t bloom_count, const uint32_t bloom_size, bloom_set_t *bloom_set) +bloom_set_init(const uint32_t bloom_count, const uint32_t bloom_size, bloom_set_t * bloom_set) { bloom_set->bloom_count = bloom_count; bloom_set->bloom_size = bloom_size; for (uint32_t i = 0; i < bloom_count; i++) { - bloom_entry_t *bloom_entry = BLOOM_ENTRY_GET(bloom_set, i); + bloom_entry_t *bloom_entry = BLOOM_ENTRY_GET(bloom_set, i); bloom_entry_init(bloom_size, bloom_entry); } @@ -31,13 +31,13 @@ bloom_set_init(const uint32_t bloom_count, const uint32_t bloom_size, bloom_set_ /* * Finds the entry in bloom_set by given dbid. - * That's a simple linear search, probably should be reworked (depends on target dbs count). + * That's a simple linear search, should be reworked (depends on target dbs count). */ static bloom_entry_t * find_bloom_entry(bloom_set_t * bloom_set, Oid dbid) { bloom_entry_t *bloom_entry; - int i = 0; + int i = 0; for (i = 0; i < bloom_set->bloom_count; i++) { @@ -78,11 +78,15 @@ bloom_set_bind(bloom_set_t * bloom_set, Oid dbid) return true; } +/* + * Fill the Bloom filter with 0 or 1. Used for setting + * full snapshots. + */ bool bloom_set_trigger_bits(bloom_set_t * bloom_set, Oid dbid, bool on) { bloom_entry_t *bloom_entry; - LWLock *entry_lock; + LWLock *entry_lock; LWLockAcquire(bloom_set_lock, LW_SHARED); entry_lock = LWLockAcquireEntry(dbid, LW_EXCLUSIVE); @@ -132,7 +136,7 @@ void bloom_set_set(bloom_set_t * bloom_s, Oid dbid, Oid relNode) { bloom_entry_t *bloom_entry; - LWLock *entry_lock; + LWLock *entry_lock; LWLockAcquire(bloom_set_lock, LW_SHARED); entry_lock = LWLockAcquireEntry(dbid, LW_EXCLUSIVE); @@ -148,10 +152,10 @@ bloom_set_set(bloom_set_t * bloom_s, Oid dbid, Oid relNode) /* Find bloom by dbid, copy all bytes to new filter, clear old (but keep it) */ bool -bloom_set_move(bloom_set_t * bloom_set, Oid dbid, bloom_t *dest) +bloom_set_move(bloom_set_t * bloom_set, Oid dbid, bloom_t * dest) { bloom_entry_t *bloom_entry; - LWLock *entry_lock; + LWLock *entry_lock; LWLockAcquire(bloom_set_lock, LW_SHARED); entry_lock = LWLockAcquireEntry(dbid, LW_EXCLUSIVE); @@ -177,7 +181,7 @@ bool bloom_set_merge(bloom_set_t * bloom_set, Oid dbid, bloom_t * m_bloom) { bloom_entry_t *bloom_entry; - LWLock *entry_lock; + LWLock *entry_lock; if (!m_bloom || !bloom_set) return false; @@ -194,7 +198,7 @@ bloom_set_merge(bloom_set_t * bloom_set, Oid dbid, bloom_t * m_bloom) return true; } if (entry_lock) - LWLockRelease(entry_lock); + LWLockRelease(entry_lock); LWLockRelease(bloom_set_lock); return false; @@ -205,7 +209,7 @@ bloom_set_is_all_bits_triggered(bloom_set_t * bloom_set, Oid dbid) { bloom_entry_t *bloom_entry; bool is_triggered = false; - LWLock *entry_lock; + LWLock *entry_lock; LWLockAcquire(bloom_set_lock, LW_SHARED); entry_lock = LWLockAcquireEntry(dbid, LW_SHARED); @@ -224,7 +228,7 @@ bloom_set_is_all_bits_triggered(bloom_set_t * bloom_set, Oid dbid) int bloom_set_count(bloom_set_t * bloom_set) { - int count = 0; + int count = 0; bloom_entry_t *bloom_entry; LWLockAcquire(bloom_set_lock, LW_EXCLUSIVE); diff --git a/gpcontrib/arenadata_toolkit/src/drops_track.c b/gpcontrib/arenadata_toolkit/src/drops_track.c index bfd6dd68e172..bc38a812e110 100644 --- a/gpcontrib/arenadata_toolkit/src/drops_track.c +++ b/gpcontrib/arenadata_toolkit/src/drops_track.c @@ -13,20 +13,24 @@ #define TRACK_NODE_GET(track, i) (void *)(track->nodes + i * sizeof(drops_track_node_t)); +/* + * Drop track element. Stores just relfilenode + * and dbid. + */ typedef struct { Oid relNode; Oid dbNode; } track_relfilenode_t; -/* doubly linked list node of dropped file nodes */ +/* Doubly linked list node of dropped file nodes */ typedef struct { dlist_node node; track_relfilenode_t relfileNode; } drops_track_node_t; - +/* Drops track */ typedef struct { dlist_head head; @@ -100,7 +104,7 @@ drops_track_deinit(void) shmem_startup_hook = next_shmem_startup_hook; } -/* find unused node; this should be heavily reworked or optimized */ +/* Find unused node i linked list. */ static drops_track_node_t * find_empty_node() { drops_track_node_t *track_node = NULL; @@ -131,7 +135,7 @@ static drops_track_node_t * find_empty_node() return track_node; } -/* add relNode to track; old node is dropped if no space */ +/* Add relNode to track. Old node is dropped if no space */ void drops_track_add(RelFileNode relfileNode) { @@ -158,7 +162,7 @@ drops_track_add(RelFileNode relfileNode) LWLockRelease(drops_track_lock); } -/* move relfilenodes from track to list */ +/* Extract relfilenodes corresponding to specific db into separeate list */ List * drops_track_move(Oid dbid) { @@ -193,7 +197,9 @@ drops_track_move(Oid dbid) return oids; } -/* undo moving of relfilenodes; old nodes are dropped if no space */ +/* Return extracted dropped relfilenodes. + * Old nodes are removed if no space. + */ void drops_track_move_undo(List *oids, Oid dbid) { diff --git a/gpcontrib/arenadata_toolkit/src/include/bloom.h b/gpcontrib/arenadata_toolkit/src/include/bloom.h index fc097d65c481..17778985e275 100644 --- a/gpcontrib/arenadata_toolkit/src/include/bloom.h +++ b/gpcontrib/arenadata_toolkit/src/include/bloom.h @@ -12,16 +12,16 @@ typedef struct { uint32_t size; /* size in bytes of 'map' */ - uint8 is_set_all; /* is all bits sets by bloom_set_all */ - uint8 map[FLEXIBLE_ARRAY_MEMBER]; /* filter itself, array of bytes */ ; + uint8 is_set_all; /* indicates all bits are set */ + uint8 map[FLEXIBLE_ARRAY_MEMBER]; /* core bit array */ ; } bloom_t; -void bloom_init(const uint32 bloom_size, bloom_t* bloom); +void bloom_init(const uint32 bloom_size, bloom_t * bloom); bool bloom_isset(bloom_t * bloom, Oid relnode); void bloom_set(bloom_t * bloom, Oid relnode); void bloom_set_all(bloom_t * bloom); void bloom_clear(bloom_t * bloom); void bloom_merge(bloom_t * dst, bloom_t * src); -void bloom_copy(bloom_t * src, bloom_t *dest); +void bloom_copy(bloom_t * src, bloom_t * dest); #endif /* BLOOM_H */ diff --git a/gpcontrib/arenadata_toolkit/src/include/bloom_set.h b/gpcontrib/arenadata_toolkit/src/include/bloom_set.h index e2fde9d7bbad..3761f256f49b 100644 --- a/gpcontrib/arenadata_toolkit/src/include/bloom_set.h +++ b/gpcontrib/arenadata_toolkit/src/include/bloom_set.h @@ -8,27 +8,27 @@ #define FULL_BLOOM_ENTRY_SIZE(size) (offsetof(bloom_entry_t, bloom) + FULL_BLOOM_SIZE(size)) #define FULL_BLOOM_SET_SIZE(size, count) (offsetof(bloom_set_t, bloom_entries) + FULL_BLOOM_ENTRY_SIZE(size) * count) -/* bloom filter extended by dbid */ +/* Bloom set entry. */ typedef struct { - Oid dbid; /* dbid if binded, InvalidOid if unbinded */ - bloom_t bloom; + Oid dbid; /* dbid of tracked database or InvalidOid */ + bloom_t bloom; /* bloom filter itself */ } bloom_entry_t; -/* static set of all bloom filters */ +/* Set of all allocated bloom filters*/ typedef struct { - uint8 bloom_count; /* count of bloom_entry_t in bloom_entries */ - uint32 bloom_size; /* size of bloom filter */ - char bloom_entries[FLEXIBLE_ARRAY_MEMBER]; /* array of + uint8 bloom_count; /* count of bloom_entry_t in bloom_entries */ + uint32 bloom_size; /* size of bloom filter */ + char bloom_entries[FLEXIBLE_ARRAY_MEMBER]; /* array of * bloom_entry_t */ } bloom_set_t; -void bloom_set_init(const uint32 bloom_count, const uint32 bloom_size, bloom_set_t *bloom_set); +void bloom_set_init(const uint32 bloom_count, const uint32 bloom_size, bloom_set_t * bloom_set); bool bloom_set_bind(bloom_set_t * bloom_set, Oid dbid); void bloom_set_unbind(bloom_set_t * bloom_set, Oid dbid); void bloom_set_set(bloom_set_t * bloom_set, Oid dbid, Oid relNode); -bool bloom_set_move(bloom_set_t * bloom_set, Oid dbid, bloom_t *dest); +bool bloom_set_move(bloom_set_t * bloom_set, Oid dbid, bloom_t * dest); bool bloom_set_merge(bloom_set_t * bloom_set, Oid dbid, bloom_t * m_bloom); bool bloom_set_trigger_bits(bloom_set_t * bloom_set, Oid dbid, bool on); bool bloom_set_is_all_bits_triggered(bloom_set_t * bloom_set, Oid dbid); diff --git a/gpcontrib/arenadata_toolkit/src/include/hashimpl.h b/gpcontrib/arenadata_toolkit/src/include/hashimpl.h index f154edf7815d..0f3a08b98af7 100644 --- a/gpcontrib/arenadata_toolkit/src/include/hashimpl.h +++ b/gpcontrib/arenadata_toolkit/src/include/hashimpl.h @@ -6,53 +6,84 @@ #define _wyrot(x) (((x)>>32)|((x)<<32)) /* - * This is an implementation of the wyhash algorithm - a modern, - * fast non-cryptographic hash function designed by Wang Yi. The code - * is unlicensed, therefore, we can use it for the Bloom filter without doubts. - * _wymum: Multiplication-based mixing function. - * _wymix: Combines multiplication mixing with XOR operations. - * wyhash: Main hash function that processes a 32-bit key with a seed. - * This algorithm has better properties than Postgres's Jenkins hash and - * provides good avalanche effect. + * This is an adaptation of WyHash - a fast, modern non-cryptographic hash function. + * + * Originally designed by Wang Yi, whose hash implementation is published + * without license. + * + * Core components: + * _wymum: Implements multiplication-based mixing + * _wymix: Two-step mixing function + * - Combines _wymum multiplication with XOR operations + * - Enhances bit diffusion and avalanche properties + * wyhash: Main hashing function + * - Processes 32-bit keys with a seed value + * For Bloom filter implementation this hash is considered having + * better statistical properties than Postgres's default Jenkins hash. */ -static inline void _wymum(uint64 *A, uint64 *B){ +static inline void +_wymum(uint64 *A, uint64 *B) +{ #if(SIZEOF_VOID_P < 8) - uint64 hh=(*A>>32)*(*B>>32), - hl=(*A>>32)*(uint32)*B, - lh=(uint32)*A*(*B>>32), - ll=(uint64)(uint32)*A*(uint32)*B; - *A=_wyrot(hl)^hh; - *B=_wyrot(lh)^ll; + uint64 hh = (*A >> 32) * (*B >> 32), + hl = (*A >> 32) * (uint32) *B, + lh = (uint32) *A * (*B >> 32), + ll = (uint64) (uint32) *A * (uint32) *B; + + *A = _wyrot(hl) ^ hh; + *B = _wyrot(lh) ^ ll; #elif defined(HAVE_INT128) - uint128 r=*A; - r*=*B; - *A=(uint64)r; - *B=(uint64)(r>>64); + uint128 r = *A; + + r *= *B; + *A = (uint64) r; + *B = (uint64) (r >> 64); #else - uint64 ha=*A>>32, hb=*B>>32, la=(uint32)*A, lb=(uint32)*B, hi, lo; - uint64 rh=ha*hb, rm0=ha*lb, rm1=hb*la, rl=la*lb, t=rl+(rm0<<32), c=t>32)+(rm1>>32)+c; - *A=lo; *B=hi; + uint64 ha = *A >> 32, + hb = *B >> 32, + la = (uint32) *A, + lb = (uint32) *B, + hi, + lo; + uint64 rh = ha * hb, + rm0 = ha * lb, + rm1 = hb * la, + rl = la * lb, + t = rl + (rm0 << 32), + c = t < rl; + + lo = t + (rm1 << 32); + c += lo < t; + hi = rh + (rm0 >> 32) + (rm1 >> 32) + c; + *A = lo; + *B = hi; #endif } -static inline uint64 _wymix(uint64 A, uint64 B){ _wymum(&A,&B); return A^B; } +static inline uint64 _wymix(uint64 A, uint64 B) +{ + _wymum(&A, &B); + return A ^ B; +} -static inline uint64 wyhash(uint32 key, uint64 seed){ - seed^=_wymix(seed^0x2d358dccaa6c78a5ull,0x8bb84b93962eacc9ull); +static inline uint64 +wyhash(uint32 key, uint64 seed) +{ + seed ^= _wymix(seed ^ 0x2d358dccaa6c78a5ull, 0x8bb84b93962eacc9ull); #if (WORDS_BIGENDIAN) #if defined(HAVE__BUILTIN_BSWAP32) - key = __builtin_bswap32(key); + key = __builtin_bswap32(key); #else - key = (((key >> 24) & 0xff)| ((key >> 8) & 0xff00)| ((key << 8) & 0xff0000)| ((key << 24) & 0xff000000)); + key = (((key >> 24) & 0xff) | ((key >> 8) & 0xff00) | ((key << 8) & 0xff0000) | ((key << 24) & 0xff000000)); #endif #endif - uint64 a = ((uint64) key << 32) | key; - uint64 b = 0; - a^=0x8bb84b93962eacc9ull; - b^=seed; - _wymum(&a,&b); - return _wymix(a^0x2d358dccaa6c78a5ull^4,b^0x8bb84b93962eacc9ull); + uint64 a = ((uint64) key << 32) | key; + uint64 b = 0; + + a ^= 0x8bb84b93962eacc9ull; + b ^= seed; + _wymum(&a, &b); + return _wymix(a ^ 0x2d358dccaa6c78a5ull ^ 4, b ^ 0x8bb84b93962eacc9ull); } -#endif /* HASHIMPL_H */ +#endif /* HASHIMPL_H */ diff --git a/gpcontrib/arenadata_toolkit/src/include/tf_shmem.h b/gpcontrib/arenadata_toolkit/src/include/tf_shmem.h index b357cda5cb6b..4e7ee841a3a2 100644 --- a/gpcontrib/arenadata_toolkit/src/include/tf_shmem.h +++ b/gpcontrib/arenadata_toolkit/src/include/tf_shmem.h @@ -6,8 +6,19 @@ #include "bloom_set.h" +/* maximum number of hash functions that can be used in Bloom filter */ #define MAX_BLOOM_HASH_FUNCS 6 +/* + * Stores the Bloom filter in shared memory. + * tracking_is_initialized - a flag indicating + * bgworker binded dbids at startup/recovery. + * + * tracking_error - sign of any internal error. If set, + * blocks the work of track_getting procedure until cluster restart. + * + * bloom_set - set of db_track_count Bloom filters. + */ typedef struct { pg_atomic_flag tracking_is_initialized; @@ -15,22 +26,25 @@ typedef struct bloom_set_t bloom_set; } tf_shared_state_t; +/* + * Locks on each bloom_entry_t in bloom_set. + */ typedef struct { - Oid dbid; - LWLock *lock; -} tf_entry_lock_t; + Oid dbid; + LWLock *lock; +} tf_entry_lock_t; -extern tf_shared_state_t * tf_shared_state; +extern tf_shared_state_t *tf_shared_state; extern LWLock *tf_state_lock; extern LWLock *bloom_set_lock; extern tf_entry_lock_t bloom_locks[]; extern uint64 bloom_hash_seed; -extern int bloom_hash_num; +extern int bloom_hash_num; void tf_shmem_init(void); void tf_shmem_deinit(void); -LWLock * LWLockAcquireEntry(Oid dbid, LWLockMode mode); +LWLock *LWLockAcquireEntry(Oid dbid, LWLockMode mode); void LWLockBindEntry(Oid dbid); void LWLockUnbindEntry(Oid dbid); diff --git a/gpcontrib/arenadata_toolkit/src/tf_shmem.c b/gpcontrib/arenadata_toolkit/src/tf_shmem.c index 43c7228f791c..36785f698a4a 100644 --- a/gpcontrib/arenadata_toolkit/src/tf_shmem.c +++ b/gpcontrib/arenadata_toolkit/src/tf_shmem.c @@ -51,13 +51,29 @@ my_bloom_power(uint64 target_bitset_bits) } /* - * In order to decrease false positive ratio and make - * Bloom filter close to theoretical form the calculation - * of k hashes is suggested. The value k is estimated - * as optimal value minimizing fp ratio. The bloom_size - * is also adjusted to power of 2. - * Additionally, the seed for hash calculation is initialized - * here. + * Initialize optimal Bloom filter parameters + * + * This function calculates and sets optimal parameters for the Bloom filter + * based on established widespread principles. + * + * Adjusts the filter size to the nearest power of 2 to optimize memory alignment + * and access patterns and simplify modulo operations in hash calculations. + * + * Calculates the optimal number of hash functions using the formula: + * k = (m/n)ln(2), which minimizes the false positive probability + * p = (1 - e^(-kn/m))^k. + * where: + * - m = total_bits (size of bit array) + * - n = TOTAL_ELEMENTS (expected number of insertions) + * + * Initializes bloom_hash_seed with a random value to prevent deterministic hash collisions + * and ensure independent hash distributions across runs + * + * Note: The actual false positive rate might slightly deviate from theoretical + * optimum due to: + * - Rounding of k to integer values + * - Size adjustment to power of 2 + * - Non-perfect independence of double hashing (see bloom.c) */ static void init_bloom_invariants() diff --git a/gpcontrib/arenadata_toolkit/src/track_files.c b/gpcontrib/arenadata_toolkit/src/track_files.c index 219048e962e9..b05767f31970 100644 --- a/gpcontrib/arenadata_toolkit/src/track_files.c +++ b/gpcontrib/arenadata_toolkit/src/track_files.c @@ -49,32 +49,38 @@ PG_FUNCTION_INFO_V1(tracking_get_track_main); typedef struct { - Relation pg_class_rel; /*pg_class relation*/ + Relation pg_class_rel; /* pg_class relation */ SysScanDesc scan; } tf_main_func_state_t; +/* + * Main state during tracking_get_track call. Stores + * copy of shared Bloom and tracking filtering parameters. + */ typedef struct { - bloom_t *bloom; /* local copy of shared bloom */ - bloom_t *rollback_bloom; /* bloom for rollback in case of sequential track acquisition*/ - List *drops; /* drop list for current db */ + bloom_t *bloom; /* local copy of shared bloom */ + bloom_t *rollback_bloom; /* bloom for rollback in case of sequential + * track acquisition */ + List *drops; /* drop list for current db */ ListCell *next_drop; - uint64 relkinds; /* tracking relkinds */ - uint64 relstorages; /* tracking relstorages */ - List *schema_oids; /*tracking schemas */ + uint64 relkinds; /* tracking relkinds */ + uint64 relstorages; /* tracking relstorages */ + List *schema_oids; /* tracking schemas */ } tf_get_global_state_t; typedef struct { - CdbPgResults cdb_results; /*results of CdbDispatch*/ + CdbPgResults cdb_results; /* results of CdbDispatch */ int current_result; int current_row; - SPITupleTable *entry_result; /*results from SPI queries */ + SPITupleTable *entry_result; /* results from SPI queries */ uint64 entry_processed; int entry_current_row; - FmgrInfo *inputFuncInfos; /* FuncInfos for string to Datum values transformation */ + FmgrInfo *inputFuncInfos; /* FuncInfos for parse string to Datum values + * transformation */ Oid *typIOParams; } tf_get_func_state_t; @@ -90,7 +96,6 @@ tf_check_shmem_error(void) } /* - * If get function complete with commit, just free resources; * In case of abort bloom is merged back as well as drops track. */ static void @@ -150,12 +155,16 @@ split_string_to_list(const char *input) return result; } +/* + * Tracked relkinds and relstorage types + * are coded into 64 bits via ascii offtests. + */ static uint64 list_to_bits(const char *input) { char *input_copy; char *token; - uint64 bits = 0; + uint64 bits = 0; if (input == NULL) return 0; @@ -168,7 +177,7 @@ list_to_bits(const char *input) { if (*token != '\0') { - char c = *token; + char c = *token; if (c >= 'a' && c <= 'z') bits |= (1UL << (c - 'a')); @@ -265,7 +274,7 @@ get_filters_from_guc() foreach(lc, schema_names) { Oid nspOid; - char *name = (char *)lfirst(lc); + char *name = (char *) lfirst(lc); nspOid = GetSysCacheOid1(NAMESPACENAME, CStringGetDatum(name)); @@ -346,8 +355,8 @@ tracking_get_track_main(PG_FUNCTION_ARGS) ereport(ERROR, (errcode(ERRCODE_GP_COMMAND_ERROR), errmsg("database %u is not tracked", MyDatabaseId), - errhint("Call 'arenadata_toolkit.tracking_register_db()'" - "to enable tracking"))); + errhint("Call 'arenadata_toolkit.tracking_register_db()'" + "to enable tracking"))); } else { @@ -367,8 +376,8 @@ tracking_get_track_main(PG_FUNCTION_ARGS) ereport(ERROR, (errcode(ERRCODE_GP_COMMAND_ERROR), errmsg("database %u is not tracked", MyDatabaseId), - errhint("Call 'arenadata_toolkit.tracking_register_db()'" - "to enable tracking"))); + errhint("Call 'arenadata_toolkit.tracking_register_db()'" + "to enable tracking"))); } /* initial snapshot shouldn't return drops */ if (tf_get_global_state.bloom && !tf_get_global_state.bloom->is_set_all) @@ -394,19 +403,19 @@ tracking_get_track_main(PG_FUNCTION_ARGS) oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); funcctx->tuple_desc = CreateTemplateTupleDesc(GET_TRACK_TUPDESC_LEN, false); - TupleDescInitEntry(funcctx->tuple_desc, (AttrNumber)1, "relid", OIDOID, -1, 0); - TupleDescInitEntry(funcctx->tuple_desc, (AttrNumber)2, "name", NAMEOID, -1, 0); - TupleDescInitEntry(funcctx->tuple_desc, (AttrNumber)3, "relfilenode", OIDOID, -1, 0); - TupleDescInitEntry(funcctx->tuple_desc, (AttrNumber)4, "size", INT8OID, -1, 0); - TupleDescInitEntry(funcctx->tuple_desc, (AttrNumber)5, "state", CHAROID, -1, 0); - TupleDescInitEntry(funcctx->tuple_desc, (AttrNumber)6, "gp_segment_id", INT4OID, -1, 0); - TupleDescInitEntry(funcctx->tuple_desc, (AttrNumber)7, "relnamespace", OIDOID, -1, 0); - TupleDescInitEntry(funcctx->tuple_desc, (AttrNumber)8, "relkind", CHAROID, -1, 0); - TupleDescInitEntry(funcctx->tuple_desc, (AttrNumber)9, "relstorage", CHAROID, -1, 0); + TupleDescInitEntry(funcctx->tuple_desc, (AttrNumber) 1, "relid", OIDOID, -1, 0); + TupleDescInitEntry(funcctx->tuple_desc, (AttrNumber) 2, "name", NAMEOID, -1, 0); + TupleDescInitEntry(funcctx->tuple_desc, (AttrNumber) 3, "relfilenode", OIDOID, -1, 0); + TupleDescInitEntry(funcctx->tuple_desc, (AttrNumber) 4, "size", INT8OID, -1, 0); + TupleDescInitEntry(funcctx->tuple_desc, (AttrNumber) 5, "state", CHAROID, -1, 0); + TupleDescInitEntry(funcctx->tuple_desc, (AttrNumber) 6, "gp_segment_id", INT4OID, -1, 0); + TupleDescInitEntry(funcctx->tuple_desc, (AttrNumber) 7, "relnamespace", OIDOID, -1, 0); + TupleDescInitEntry(funcctx->tuple_desc, (AttrNumber) 8, "relkind", CHAROID, -1, 0); + TupleDescInitEntry(funcctx->tuple_desc, (AttrNumber) 9, "relstorage", CHAROID, -1, 0); funcctx->tuple_desc = BlessTupleDesc(funcctx->tuple_desc); state = (tf_main_func_state_t *) palloc0(sizeof(tf_main_func_state_t)); - funcctx->user_fctx = (void *)state; + funcctx->user_fctx = (void *) state; if (tf_get_global_state.bloom) { @@ -428,7 +437,7 @@ tracking_get_track_main(PG_FUNCTION_ARGS) char relstorage; HeapTuple pg_class_tuple; Form_pg_class relp; - int64 size; + int64 size; if (!state->scan) break; @@ -552,7 +561,7 @@ tracking_get_track(PG_FUNCTION_ARGS) MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); state = (tf_get_func_state_t *) palloc0(sizeof(tf_get_func_state_t)); - funcctx->user_fctx = (void *)state; + funcctx->user_fctx = (void *) state; state->entry_result = SPI_tuptable; state->entry_processed = SPI_processed; @@ -565,23 +574,23 @@ tracking_get_track(PG_FUNCTION_ARGS) state->current_row = 0; funcctx->tuple_desc = CreateTemplateTupleDesc(9, false); - TupleDescInitEntry(funcctx->tuple_desc, (AttrNumber)1, "relid", OIDOID, -1, 0); - TupleDescInitEntry(funcctx->tuple_desc, (AttrNumber)2, "name", NAMEOID, -1, 0); - TupleDescInitEntry(funcctx->tuple_desc, (AttrNumber)3, "relfilenode", OIDOID, -1, 0); - TupleDescInitEntry(funcctx->tuple_desc, (AttrNumber)4, "size", INT8OID, -1, 0); - TupleDescInitEntry(funcctx->tuple_desc, (AttrNumber)5, "state", CHAROID, -1, 0); - TupleDescInitEntry(funcctx->tuple_desc, (AttrNumber)6, "gp_segment_id", INT4OID, -1, 0); - TupleDescInitEntry(funcctx->tuple_desc, (AttrNumber)7, "relnamespace", OIDOID, -1, 0); - TupleDescInitEntry(funcctx->tuple_desc, (AttrNumber)8, "relkind", CHAROID, -1, 0); - TupleDescInitEntry(funcctx->tuple_desc, (AttrNumber)9, "relstorage", CHAROID, -1, 0); + TupleDescInitEntry(funcctx->tuple_desc, (AttrNumber) 1, "relid", OIDOID, -1, 0); + TupleDescInitEntry(funcctx->tuple_desc, (AttrNumber) 2, "name", NAMEOID, -1, 0); + TupleDescInitEntry(funcctx->tuple_desc, (AttrNumber) 3, "relfilenode", OIDOID, -1, 0); + TupleDescInitEntry(funcctx->tuple_desc, (AttrNumber) 4, "size", INT8OID, -1, 0); + TupleDescInitEntry(funcctx->tuple_desc, (AttrNumber) 5, "state", CHAROID, -1, 0); + TupleDescInitEntry(funcctx->tuple_desc, (AttrNumber) 6, "gp_segment_id", INT4OID, -1, 0); + TupleDescInitEntry(funcctx->tuple_desc, (AttrNumber) 7, "relnamespace", OIDOID, -1, 0); + TupleDescInitEntry(funcctx->tuple_desc, (AttrNumber) 8, "relkind", CHAROID, -1, 0); + TupleDescInitEntry(funcctx->tuple_desc, (AttrNumber) 9, "relstorage", CHAROID, -1, 0); funcctx->tuple_desc = BlessTupleDesc(funcctx->tuple_desc); if (state->cdb_results.numResults > 0) { int natts = funcctx->tuple_desc->natts; - state->inputFuncInfos = (FmgrInfo *)palloc0(natts * sizeof(FmgrInfo)); - state->typIOParams = (Oid *)palloc0(natts * sizeof(Oid)); + state->inputFuncInfos = (FmgrInfo *) palloc0(natts * sizeof(FmgrInfo)); + state->typIOParams = (Oid *) palloc0(natts * sizeof(Oid)); for (int i = 0; i < natts; i++) { Oid type = TupleDescAttr(funcctx->tuple_desc, i)->atttypid; @@ -629,7 +638,7 @@ tracking_get_track(PG_FUNCTION_ARGS) { if (PQgetisnull(pgresult, state->current_row, col)) { - values[col] = (Datum)0; + values[col] = (Datum) 0; nulls[col] = true; } else @@ -671,7 +680,7 @@ track_db(Oid dbid, bool reg) if (stmt.dbname == NULL) ereport(ERROR, - (errmsg("[arenadata_toolkit] database %u does not exist", dbid))); + (errmsg("[arenadata_toolkit] database %u does not exist", dbid))); stmt.setstmt = &v_stmt; @@ -713,8 +722,8 @@ tracking_register_db_main(PG_FUNCTION_ARGS) static bool is_initialized() { - CdbPgResults cdb_pgresults = {NULL, 0}; - bool all_inited = true; + CdbPgResults cdb_pgresults = {NULL, 0}; + bool all_inited = true; CdbDispatchCommand("select * from arenadata_toolkit.tracking_is_segment_initialized()", 0, &cdb_pgresults); @@ -737,7 +746,7 @@ is_initialized() is_initialized = strcmp(PQgetvalue(pgresult, 0, 1), "t") == 0; elog(LOG, "[arenadata_toolkit] tracking_register_db initialization check" - " segindex: %d, is_initialized: %d", segindex, is_initialized); + " segindex: %d, is_initialized: %d", segindex, is_initialized); if (!is_initialized) { @@ -776,7 +785,7 @@ tracking_register_db(PG_FUNCTION_ARGS) if (Gp_role == GP_ROLE_DISPATCH) { - char *cmd = + char *cmd = psprintf("select arenadata_toolkit.tracking_register_db_main(true, %u)", dbid); CdbDispatchCommand(cmd, 0, NULL); @@ -809,7 +818,7 @@ tracking_unregister_db(PG_FUNCTION_ARGS) if (Gp_role == GP_ROLE_DISPATCH) { - char *cmd = + char *cmd = psprintf("select arenadata_toolkit.tracking_register_db_main(false, %u)", dbid); CdbDispatchCommand(cmd, 0, NULL); @@ -844,7 +853,7 @@ tracking_set_snapshot_on_recovery(PG_FUNCTION_ARGS) if (stmt.dbname == NULL) ereport(ERROR, - (errmsg("[arenadata_toolkit] database %u does not exist", dbid))); + (errmsg("[arenadata_toolkit] database %u does not exist", dbid))); v_stmt.type = T_VariableSetStmt; v_stmt.kind = VAR_SET_VALUE; @@ -991,7 +1000,7 @@ track_schema(const char *schemaName, Oid dbid, bool reg) if (stmt.dbname == NULL) ereport(ERROR, - (errmsg("[arenadata_toolkit] database %u does not exist", dbid))); + (errmsg("[arenadata_toolkit] database %u does not exist", dbid))); stmt.setstmt = &v_stmt; @@ -1124,11 +1133,11 @@ tracking_set_relkinds(PG_FUNCTION_ARGS) errmsg("Invalid relkind: %s", token), errhint("Valid relkinds are: 'r', 'i', 'S', 't', 'v', 'c', 'f', 'u', 'm', 'o', 'b', 'M'"))); - if (!seen_relkinds[(unsigned char)token[0]]) + if (!seen_relkinds[(unsigned char) token[0]]) { appendStringInfoChar(&buf, token[0]); appendStringInfoChar(&buf, ','); - seen_relkinds[(unsigned char)token[0]] = true; + seen_relkinds[(unsigned char) token[0]] = true; } token = strtok(NULL, ","); } @@ -1140,7 +1149,7 @@ tracking_set_relkinds(PG_FUNCTION_ARGS) if (stmt.dbname == NULL) ereport(ERROR, - (errmsg("[arenadata_toolkit] database %u does not exist", dbid))); + (errmsg("[arenadata_toolkit] database %u does not exist", dbid))); v_stmt.type = T_VariableSetStmt; v_stmt.name = "arenadata_toolkit.tracking_relkinds"; @@ -1222,11 +1231,11 @@ tracking_set_relstorages(PG_FUNCTION_ARGS) errmsg("Invalid relstorage type: %s", token), errhint("Valid relstorages are: 'h', 'x', 'a', 'v', 'c', 'f'"))); - if (!seen_relstorages[(unsigned char)token[0]]) + if (!seen_relstorages[(unsigned char) token[0]]) { appendStringInfoChar(&buf, token[0]); appendStringInfoChar(&buf, ','); - seen_relstorages[(unsigned char)token[0]] = true; + seen_relstorages[(unsigned char) token[0]] = true; } token = strtok(NULL, ","); } @@ -1237,7 +1246,7 @@ tracking_set_relstorages(PG_FUNCTION_ARGS) if (stmt.dbname == NULL) ereport(ERROR, - (errmsg("[arenadata_toolkit] database %u does not exist", dbid))); + (errmsg("[arenadata_toolkit] database %u does not exist", dbid))); stmt.setstmt = &v_stmt; @@ -1289,7 +1298,7 @@ tracking_trigger_initial_snapshot(PG_FUNCTION_ARGS) if (!bloom_set_trigger_bits(&tf_shared_state->bloom_set, dbid, true)) ereport(ERROR, - (errmsg("Failed to find corresponding filter to database %u", dbid))); + (errmsg("Failed to find corresponding filter to database %u", dbid))); if (Gp_role == GP_ROLE_DISPATCH) { @@ -1325,12 +1334,12 @@ tracking_is_segment_initialized(PG_FUNCTION_ARGS) TupleDesc tupdesc; HeapTuple tuple; Datum values[2]; - bool nulls[2] = {false, false}; + bool nulls[2] = {false, false}; Datum result; tf_check_shmem_error(); - rsi = (ReturnSetInfo *)fcinfo->resultinfo; + rsi = (ReturnSetInfo *) fcinfo->resultinfo; tupdesc = rsi->expectedDesc; /* Populate an output tuple. */ From bea4ca9affbb5ce2f7d4a3550fa79490fa2cc976 Mon Sep 17 00:00:00 2001 From: Alexander Kondakov Date: Mon, 28 Oct 2024 21:10:28 +0300 Subject: [PATCH 29/69] Add Readme --- gpcontrib/arenadata_toolkit/README.md | 59 +++++++++++++++++++ gpcontrib/arenadata_toolkit/src/drops_track.c | 2 +- .../arenadata_toolkit/src/include/bloom.h | 2 +- 3 files changed, 61 insertions(+), 2 deletions(-) create mode 100644 gpcontrib/arenadata_toolkit/README.md diff --git a/gpcontrib/arenadata_toolkit/README.md b/gpcontrib/arenadata_toolkit/README.md new file mode 100644 index 000000000000..276621117487 --- /dev/null +++ b/gpcontrib/arenadata_toolkit/README.md @@ -0,0 +1,59 @@ +## arenadata_toolkit - database objects tracking extenstion for GPDB + +`arenadata_toolkit` starting from version 1.7 represents a GPDB extension that efficiently tracks file system changes (extend, truncate, create, unlink operations) using space-efficient Bloom filters stored in shared memory. This extension is particularly useful for monitoring and maintaining database files sizes across a distributed environment. + +The main purpose of this code is achieving fast database size calculation and tracking file changes at relation +level. The extension implements a probabilistic tracking system using Bloom filters to monitor file changes across Greenplum segments. It utilizes shared memory for state management and employs a background workers to maintain consistency. + +#### Configuring GPDB and extension usage +Since extension uses shared memory, configuration on all GPDB segments must be changed by setting +```shell script +gpconfig -c shared_preload_libraries -v 'arenadata_toolkit' +``` +Extension may track restricted number of databases. The maximum number of them is defined by GUC +| arenadata_toolkit.tracking_db_track_count | Need restart |Possible values [1, 1000]; Default 5| +|--|--|--| +For each tracked database there allocated a Bloom filter in shared memory. The size of each filter is controlled via +| arenadata_toolkit.tracking_bloom_size | Need restart |Possible values (bytes) [64, 128000000] Default 1048576| +|--|--|--| +The specific database can be binded to unoccupied filter with function +```shell script +psql -d my_db -c select arenadata_toolkit.tracking_register_db() +or +psql -c select arenadata_toolkit.tracking_register_db(12345) +``` +After registering each relation file change withing the database will be noted in Bloom filter. +Using Bloom filter allows us to calculate the sizes of only relations whose relfilenode is present in the filter. +The current size snapshot can be taken via view: +``` +select * from arenadata_toolkit.tables_track; +``` +In order to get the snapshot of all database relations you should call in the database of interest +``` +arenadata_toolkit.tracking_trigger_initial_snapshot(); +``` + +#### Choosing optimal Bloom size + +Choosing the optimal Bloom filter size is crucial for balancing memory usage and accuracy. +First of all, when choosing the filter size, you should take into account your system resources, because bloom filters are allocated in shared memory for each segment, and too wide structures (tracking_db_track_count * tracking_bloom_size) could decrease overall perfomance. + +Next, choose the filter size satisfying your performance goals: +- Define false positive tolerance, p. Since Bloom filter is probabilistic data structure there is a probability to calculate the size of relation, which has not been modified. And the smaller filter is, the more often this occurs. +- Memory constraints +- Query patterns, if queries are mostly reading then huge sizes are unecessary. + +If you will estimate number of objects in your database, you can calculate theoretical size: +$$m = -\frac{n \ln p}{(\ln 2)^2}$$ +- n = estimated number of elements +- p = target false positive rate +- m = filter size in bits + +Quick Reference Table + + Deployment Size | Files | Target FPR | Recommended Size | +|----------------|------------|------------|------------------| +| Small | < 100K | 1% | 1 MB | +| Medium | 100K - 1M | 1% | 8 MB | +| Large | > 1M | 1% | 32 MB | +| Enterprise | > 10M | 0.1% | 128 MB | \ No newline at end of file diff --git a/gpcontrib/arenadata_toolkit/src/drops_track.c b/gpcontrib/arenadata_toolkit/src/drops_track.c index bc38a812e110..dc7001669313 100644 --- a/gpcontrib/arenadata_toolkit/src/drops_track.c +++ b/gpcontrib/arenadata_toolkit/src/drops_track.c @@ -174,7 +174,7 @@ drops_track_move(Oid dbid) if (drops_track->used_count == 0) { LWLockRelease(drops_track_lock); - return oids; + return NIL; } dlist_foreach_modify(iter, &drops_track->head) diff --git a/gpcontrib/arenadata_toolkit/src/include/bloom.h b/gpcontrib/arenadata_toolkit/src/include/bloom.h index 17778985e275..5a06b08f2d2d 100644 --- a/gpcontrib/arenadata_toolkit/src/include/bloom.h +++ b/gpcontrib/arenadata_toolkit/src/include/bloom.h @@ -6,7 +6,7 @@ #include #define MAX_BLOOM_HASH_FUNCS 6 -#define TOTAL_ELEMENTS 100000000UL +#define TOTAL_ELEMENTS 10000000UL #define FULL_BLOOM_SIZE(size) (offsetof(bloom_t, map) + size) typedef struct From d7793a516bcce8a1e2af93d4070307ffad2db31c Mon Sep 17 00:00:00 2001 From: Alexander Kondakov Date: Mon, 28 Oct 2024 21:27:26 +0300 Subject: [PATCH 30/69] Fix tests --- .../expected/arenadata_toolkit_tracking.out | 12 ++++++------ .../sql/arenadata_toolkit_guc.sql | 4 ---- .../sql/arenadata_toolkit_tracking.sql | 4 +--- .../src/arenadata_toolkit_worker.c | 17 +++++++++-------- 4 files changed, 16 insertions(+), 21 deletions(-) diff --git a/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_tracking.out b/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_tracking.out index 0096928c62d6..bc0f70875420 100644 --- a/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_tracking.out +++ b/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_tracking.out @@ -11,18 +11,18 @@ SELECT * FROM arenadata_toolkit.tracking_get_track(); ERROR: database 102559 is not tracked HINT: Call 'arenadata_toolkit.tracking_register_db()'to enable tracking CONTEXT: SQL statement "SELECT * FROM arenadata_toolkit.tracking_get_track_main()" -SELECT arenadata_toolkit.tracking_register_db(); - tracking_register_db ----------------------- - t -(1 row) - SELECT pg_sleep(current_setting('arenadata_toolkit.tracking_worker_naptime_sec')::int * 2); pg_sleep ---------- (1 row) +SELECT arenadata_toolkit.tracking_register_db(); + tracking_register_db +---------------------- + t +(1 row) + -- 2. Test initial snapshot behaviour. Triggering initial snapshot leads to -- setting up the bloom filter such that all relfilenodes are considered. SELECT arenadata_toolkit.tracking_trigger_initial_snapshot(); diff --git a/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_guc.sql b/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_guc.sql index bcd357659bba..30618b2b2ee8 100644 --- a/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_guc.sql +++ b/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_guc.sql @@ -169,7 +169,3 @@ ALTER DATABASE tracking1 SET arenadata_toolkit.tracking_schemas = "pg_catalog, \c contrib_regression; DROP DATABASE tracking1; --- start_ignore -\! gpconfig -r shared_preload_libraries; -\! gpstop -raq -M fast --- end_ignore diff --git a/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_tracking.sql b/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_tracking.sql index 71f2193a2f2e..8f6953a47e7b 100644 --- a/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_tracking.sql +++ b/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_tracking.sql @@ -1,7 +1,5 @@ -- Tests for size tracking logic introduced in version 1.7 -- start_ignore -\! gpconfig -c shared_preload_libraries -v 'arenadata_toolkit' -\! gpstop -raq -M fast \! gpconfig -c arenadata_toolkit.tracking_worker_naptime_sec -v '5' \! gpstop -u \c @@ -20,8 +18,8 @@ CREATE EXTENSION arenadata_toolkit; -- 1. Test getting track on not registered database; SELECT * FROM arenadata_toolkit.tracking_get_track(); -SELECT arenadata_toolkit.tracking_register_db(); SELECT pg_sleep(current_setting('arenadata_toolkit.tracking_worker_naptime_sec')::int * 2); +SELECT arenadata_toolkit.tracking_register_db(); -- 2. Test initial snapshot behaviour. Triggering initial snapshot leads to -- setting up the bloom filter such that all relfilenodes are considered. diff --git a/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_worker.c b/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_worker.c index f6ab8cb70a92..c5318f94e16b 100644 --- a/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_worker.c +++ b/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_worker.c @@ -82,14 +82,15 @@ get_tracked_dbs() initStringInfo(&query); appendStringInfo(&query, SQL( - WITH _ AS( - WITH _ AS( - SELECT "setdatabase", regexp_split_to_array(UNNEST("setconfig"), '=') AS "setconfig" FROM "pg_db_role_setting" WHERE "setrole" = 0 - ) SELECT "setdatabase", json_object(array_agg("setconfig"[1]), array_agg("setconfig"[2])) AS "setconfig" FROM _ GROUP BY 1 - ) select "setdatabase", -("setconfig"->> 'arenadata_toolkit.tracking_snapshot_on_recovery'): :bool as "snapshot" FROM _ WHERE -("setconfig"->> 'arenadata_toolkit.tracking_is_db_tracked'): :bool IS TRUE - )); + WITH _ AS ( + WITH _ AS ( + SELECT "setdatabase", regexp_split_to_array(UNNEST("setconfig"), '=') AS "setconfig" + FROM "pg_db_role_setting" WHERE "setrole"=0) + SELECT "setdatabase", json_object(array_agg("setconfig"[1]), array_agg("setconfig"[2])) AS "setconfig" + FROM _ GROUP BY 1) + SELECT "setdatabase", + ("setconfig"->>'arenadata_toolkit.tracking_snapshot_on_recovery')::bool as "snapshot" FROM _ WHERE + ("setconfig"->>'arenadata_toolkit.tracking_is_db_tracked')::bool IS TRUE)); if (SPI_connect() != SPI_OK_CONNECT) ereport(ERROR, (errmsg("SPI_connect failed"))); From b9e94f089f3577c0f39c45c45bb2e5b21dfbbf21 Mon Sep 17 00:00:00 2001 From: Alexander Kondakov Date: Tue, 29 Oct 2024 21:58:16 +0300 Subject: [PATCH 31/69] Remove size adjustment --- gpcontrib/arenadata_toolkit/README.md | 22 ++++++------ gpcontrib/arenadata_toolkit/src/tf_shmem.c | 40 +++------------------- 2 files changed, 15 insertions(+), 47 deletions(-) diff --git a/gpcontrib/arenadata_toolkit/README.md b/gpcontrib/arenadata_toolkit/README.md index 276621117487..776508132e76 100644 --- a/gpcontrib/arenadata_toolkit/README.md +++ b/gpcontrib/arenadata_toolkit/README.md @@ -3,7 +3,7 @@ `arenadata_toolkit` starting from version 1.7 represents a GPDB extension that efficiently tracks file system changes (extend, truncate, create, unlink operations) using space-efficient Bloom filters stored in shared memory. This extension is particularly useful for monitoring and maintaining database files sizes across a distributed environment. The main purpose of this code is achieving fast database size calculation and tracking file changes at relation -level. The extension implements a probabilistic tracking system using Bloom filters to monitor file changes across Greenplum segments. It utilizes shared memory for state management and employs a background workers to maintain consistency. +level. The extension implements a probabilistic tracking system using Bloom filters to monitor file changes across Greenplum segments. It utilizes shared memory for state management and employs background workers to maintain consistency. #### Configuring GPDB and extension usage Since extension uses shared memory, configuration on all GPDB segments must be changed by setting @@ -16,13 +16,13 @@ Extension may track restricted number of databases. The maximum number of them i For each tracked database there allocated a Bloom filter in shared memory. The size of each filter is controlled via | arenadata_toolkit.tracking_bloom_size | Need restart |Possible values (bytes) [64, 128000000] Default 1048576| |--|--|--| -The specific database can be binded to unoccupied filter with function +The specific database can be bound to unoccupied filter with function ```shell script psql -d my_db -c select arenadata_toolkit.tracking_register_db() or psql -c select arenadata_toolkit.tracking_register_db(12345) ``` -After registering each relation file change withing the database will be noted in Bloom filter. +After registering each relation file change within the database will be noted in Bloom filter. Using Bloom filter allows us to calculate the sizes of only relations whose relfilenode is present in the filter. The current size snapshot can be taken via view: ``` @@ -36,12 +36,12 @@ arenadata_toolkit.tracking_trigger_initial_snapshot(); #### Choosing optimal Bloom size Choosing the optimal Bloom filter size is crucial for balancing memory usage and accuracy. -First of all, when choosing the filter size, you should take into account your system resources, because bloom filters are allocated in shared memory for each segment, and too wide structures (tracking_db_track_count * tracking_bloom_size) could decrease overall perfomance. +First of all, when choosing the filter size, you should take into account your system resources, because bloom filters are allocated in shared memory for each segment, and too wide structures (tracking_db_track_count * tracking_bloom_size) could decrease overall performance. Next, choose the filter size satisfying your performance goals: - Define false positive tolerance, p. Since Bloom filter is probabilistic data structure there is a probability to calculate the size of relation, which has not been modified. And the smaller filter is, the more often this occurs. - Memory constraints -- Query patterns, if queries are mostly reading then huge sizes are unecessary. +- Query patterns, if queries are mostly reading then huge sizes are unnecessary. If you will estimate number of objects in your database, you can calculate theoretical size: $$m = -\frac{n \ln p}{(\ln 2)^2}$$ @@ -51,9 +51,9 @@ $$m = -\frac{n \ln p}{(\ln 2)^2}$$ Quick Reference Table - Deployment Size | Files | Target FPR | Recommended Size | -|----------------|------------|------------|------------------| -| Small | < 100K | 1% | 1 MB | -| Medium | 100K - 1M | 1% | 8 MB | -| Large | > 1M | 1% | 32 MB | -| Enterprise | > 10M | 0.1% | 128 MB | \ No newline at end of file + Deployment Size | Files | Target FPR | Recommended Size, bytes| +|----------------|------------|------------|------------------------| +| Small | < 100K | 1% | 1048576 | +| Medium | 100K - 1M | 1% | 8388608 | +| Large | > 1M | 1% | 33554432 | +| Enterprise | > 10M | 1% | 134217728 | \ No newline at end of file diff --git a/gpcontrib/arenadata_toolkit/src/tf_shmem.c b/gpcontrib/arenadata_toolkit/src/tf_shmem.c index 36785f698a4a..fe2530ac4601 100644 --- a/gpcontrib/arenadata_toolkit/src/tf_shmem.c +++ b/gpcontrib/arenadata_toolkit/src/tf_shmem.c @@ -9,7 +9,7 @@ #include static shmem_startup_hook_type next_shmem_startup_hook = NULL; -tf_shared_state_t *tf_shared_state; +tf_shared_state_t *tf_shared_state = NULL; LWLock *tf_state_lock; LWLock *bloom_set_lock; tf_entry_lock_t bloom_locks[MAX_DB_TRACK_COUNT]; @@ -32,33 +32,12 @@ init_lwlocks(void) } } -/* - * Calculate the closes power of 2 for given - * length. - */ -static int -my_bloom_power(uint64 target_bitset_bits) -{ - int bloom_power = -1; - - while (target_bitset_bits > 0 && bloom_power < 32) - { - bloom_power++; - target_bitset_bits >>= 1; - } - - return bloom_power; -} - /* * Initialize optimal Bloom filter parameters * * This function calculates and sets optimal parameters for the Bloom filter * based on established widespread principles. * - * Adjusts the filter size to the nearest power of 2 to optimize memory alignment - * and access patterns and simplify modulo operations in hash calculations. - * * Calculates the optimal number of hash functions using the formula: * k = (m/n)ln(2), which minimizes the false positive probability * p = (1 - e^(-kn/m))^k. @@ -66,25 +45,14 @@ my_bloom_power(uint64 target_bitset_bits) * - m = total_bits (size of bit array) * - n = TOTAL_ELEMENTS (expected number of insertions) * - * Initializes bloom_hash_seed with a random value to prevent deterministic hash collisions - * and ensure independent hash distributions across runs - * - * Note: The actual false positive rate might slightly deviate from theoretical - * optimum due to: - * - Rounding of k to integer values - * - Size adjustment to power of 2 - * - Non-perfect independence of double hashing (see bloom.c) + * Initializes bloom_hash_seed with a random value to prevent deterministic + * hash collisions and ensure independent hash distributions across runs. */ static void init_bloom_invariants() { - uint64 total_bits; - int k; - - total_bits = UINT64CONST(1) << my_bloom_power(bloom_size * 8); - bloom_size = total_bits / 8; + int k = rint(log(2.0) * (bloom_size * 8) / TOTAL_ELEMENTS); - k = rint(log(2.0) * total_bits / TOTAL_ELEMENTS); bloom_hash_num = Max(1, Min(k, MAX_BLOOM_HASH_FUNCS)); bloom_hash_seed = (uint64) random(); } From 72d60717e911fc8a91c156131cef5cd1b6c69e71 Mon Sep 17 00:00:00 2001 From: Alexander Kondakov Date: Tue, 29 Oct 2024 23:35:11 +0300 Subject: [PATCH 32/69] Set gucs --- .../expected/arenadata_toolkit_guc.out | 117 +++++++++++++++++- .../sql/arenadata_toolkit_guc.sql | 28 +++++ .../src/arenadata_toolkit_guc.c | 4 +- gpcontrib/arenadata_toolkit/src/track_files.c | 68 ++++++---- 4 files changed, 189 insertions(+), 28 deletions(-) diff --git a/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_guc.out b/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_guc.out index 0800a8e7147d..9ad0562d65c9 100644 --- a/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_guc.out +++ b/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_guc.out @@ -45,7 +45,7 @@ SELECT arenadata_toolkit.tracking_register_db(); SHOW arenadata_toolkit.tracking_is_db_tracked; arenadata_toolkit.tracking_is_db_tracked ------------------------------------------ - off + on (1 row) SELECT datname, setconfig FROM pg_db_role_setting JOIN pg_database ON @@ -256,5 +256,120 @@ SET arenadata_toolkit.tracking_schemas = "pg_catalog, mychema"; ERROR: cannot change tracking status outside the tracking_register_schema function ALTER DATABASE tracking1 SET arenadata_toolkit.tracking_schemas = "pg_catalog, mychema"; ERROR: cannot change tracking status outside the tracking_register_schema function +-- Test GUCs are set in the caller's session. +SELECT arenadata_toolkit.tracking_register_db(); + tracking_register_db +---------------------- + t +(1 row) + +SHOW arenadata_toolkit.tracking_is_db_tracked; + arenadata_toolkit.tracking_is_db_tracked +------------------------------------------ + on +(1 row) + +SELECT arenadata_toolkit.tracking_unregister_db(); + tracking_unregister_db +------------------------ + t +(1 row) + +SHOW arenadata_toolkit.tracking_is_db_tracked; + arenadata_toolkit.tracking_is_db_tracked +------------------------------------------ + off +(1 row) + +SELECT arenadata_toolkit.tracking_set_snapshot_on_recovery(true); + tracking_set_snapshot_on_recovery +----------------------------------- + t +(1 row) + +SHOW arenadata_toolkit.tracking_snapshot_on_recovery; + arenadata_toolkit.tracking_snapshot_on_recovery +------------------------------------------------- + on +(1 row) + +SELECT arenadata_toolkit.tracking_set_snapshot_on_recovery(false); + tracking_set_snapshot_on_recovery +----------------------------------- + t +(1 row) + +SHOW arenadata_toolkit.tracking_snapshot_on_recovery; + arenadata_toolkit.tracking_snapshot_on_recovery +------------------------------------------------- + off +(1 row) + +SHOW arenadata_toolkit.tracking_schemas; + arenadata_toolkit.tracking_schemas +------------------------------------ + arenadata_toolkit +(1 row) + +SELECT arenadata_toolkit.tracking_register_schema('arenadata_toolkit'); + tracking_register_schema +-------------------------- + t +(1 row) + +SHOW arenadata_toolkit.tracking_schemas; + arenadata_toolkit.tracking_schemas +------------------------------------ + arenadata_toolkit +(1 row) + +SELECT arenadata_toolkit.tracking_unregister_schema('arenadata_toolkit'); + tracking_unregister_schema +---------------------------- + t +(1 row) + +SHOW arenadata_toolkit.tracking_schemas; + arenadata_toolkit.tracking_schemas +-------------------------------------------------------------------------- + public,arenadata_toolkit,pg_catalog,pg_toast,pg_aoseg,information_schema +(1 row) + +SHOW arenadata_toolkit.tracking_relkinds; + arenadata_toolkit.tracking_relkinds +------------------------------------- + m,M,o +(1 row) + +SELECT arenadata_toolkit.tracking_set_relkinds('r,t'); + tracking_set_relkinds +----------------------- + t +(1 row) + +SHOW arenadata_toolkit.tracking_relkinds; + arenadata_toolkit.tracking_relkinds +------------------------------------- + r,t +(1 row) + +SHOW arenadata_toolkit.tracking_relstorages; + arenadata_toolkit.tracking_relstorages +---------------------------------------- + v +(1 row) + +SELECT arenadata_toolkit.tracking_set_relstorages('a'); + tracking_set_relstorages +-------------------------- + t +(1 row) + +SHOW arenadata_toolkit.tracking_relstorages; + arenadata_toolkit.tracking_relstorages +---------------------------------------- + a +(1 row) + \c contrib_regression; DROP DATABASE tracking1; diff --git a/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_guc.sql b/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_guc.sql index 30618b2b2ee8..c2790f84be30 100644 --- a/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_guc.sql +++ b/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_guc.sql @@ -166,6 +166,34 @@ SET arenadata_toolkit.tracking_schemas = "pg_catalog, mychema"; ALTER DATABASE tracking1 SET arenadata_toolkit.tracking_schemas = "pg_catalog, mychema"; +-- Test GUCs are set in the caller's session. +SELECT arenadata_toolkit.tracking_register_db(); +SHOW arenadata_toolkit.tracking_is_db_tracked; + +SELECT arenadata_toolkit.tracking_unregister_db(); +SHOW arenadata_toolkit.tracking_is_db_tracked; + +SELECT arenadata_toolkit.tracking_set_snapshot_on_recovery(true); +SHOW arenadata_toolkit.tracking_snapshot_on_recovery; + +SELECT arenadata_toolkit.tracking_set_snapshot_on_recovery(false); +SHOW arenadata_toolkit.tracking_snapshot_on_recovery; + +SHOW arenadata_toolkit.tracking_schemas; +SELECT arenadata_toolkit.tracking_register_schema('arenadata_toolkit'); +SHOW arenadata_toolkit.tracking_schemas; + +SELECT arenadata_toolkit.tracking_unregister_schema('arenadata_toolkit'); +SHOW arenadata_toolkit.tracking_schemas; + +SHOW arenadata_toolkit.tracking_relkinds; +SELECT arenadata_toolkit.tracking_set_relkinds('r,t'); +SHOW arenadata_toolkit.tracking_relkinds; + +SHOW arenadata_toolkit.tracking_relstorages; +SELECT arenadata_toolkit.tracking_set_relstorages('a'); +SHOW arenadata_toolkit.tracking_relstorages; + \c contrib_regression; DROP DATABASE tracking1; diff --git a/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_guc.c b/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_guc.c index edaa770e8fe2..f5bb32033fb9 100644 --- a/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_guc.c +++ b/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_guc.c @@ -69,7 +69,9 @@ check_guc(bool *toolkit_guc, GucSource source, bool *manual) { *toolkit_guc = false; - if (source != PGC_S_DATABASE && source != PGC_S_DEFAULT && source != PGC_S_TEST) + if (source != PGC_S_DATABASE && + source != PGC_S_DEFAULT && + source != PGC_S_TEST) return false; return true; diff --git a/gpcontrib/arenadata_toolkit/src/track_files.c b/gpcontrib/arenadata_toolkit/src/track_files.c index b05767f31970..42718988d7c4 100644 --- a/gpcontrib/arenadata_toolkit/src/track_files.c +++ b/gpcontrib/arenadata_toolkit/src/track_files.c @@ -695,6 +695,10 @@ track_db(Oid dbid, bool reg) AlterDatabaseSet(&stmt); } + tf_guc_unlock_tracked_once(); + SetConfigOption("arenadata_toolkit.tracking_is_db_tracked", reg ? "t" : "f", + PGC_S_DATABASE, PGC_S_DATABASE); + if (!reg) bloom_set_unbind(&tf_shared_state->bloom_set, dbid); else if (!bloom_set_bind(&tf_shared_state->bloom_set, dbid)) @@ -842,38 +846,31 @@ tracking_set_snapshot_on_recovery(PG_FUNCTION_ARGS) A_Const aconst = {.type = T_A_Const,.val = {.type = T_String,.val.str = set ? "t" : "f"}}; - if (Gp_role == GP_ROLE_DISPATCH) - { - AlterDatabaseSetStmt stmt; - VariableSetStmt v_stmt; - - stmt.type = T_AlterDatabaseSetStmt; - stmt.dbname = get_database_name(dbid); - stmt.setstmt = &v_stmt; + AlterDatabaseSetStmt stmt; + VariableSetStmt v_stmt; - if (stmt.dbname == NULL) - ereport(ERROR, - (errmsg("[arenadata_toolkit] database %u does not exist", dbid))); + stmt.type = T_AlterDatabaseSetStmt; + stmt.dbname = get_database_name(dbid); + stmt.setstmt = &v_stmt; - v_stmt.type = T_VariableSetStmt; - v_stmt.kind = VAR_SET_VALUE; - v_stmt.name = "arenadata_toolkit.tracking_snapshot_on_recovery"; - v_stmt.args = lappend(NIL, &aconst); - v_stmt.is_local = false; + if (stmt.dbname == NULL) + ereport(ERROR, + (errmsg("[arenadata_toolkit] database %u does not exist", dbid))); - tf_guc_unlock_full_snapshot_on_recovery_once(); + v_stmt.type = T_VariableSetStmt; + v_stmt.kind = VAR_SET_VALUE; + v_stmt.name = "arenadata_toolkit.tracking_snapshot_on_recovery"; + v_stmt.args = lappend(NIL, &aconst); + v_stmt.is_local = false; - AlterDatabaseSet(&stmt); - } + tf_guc_unlock_full_snapshot_on_recovery_once(); - if (Gp_role == GP_ROLE_DISPATCH) - { - char *cmd = - psprintf("select arenadata_toolkit.tracking_set_snapshot_on_recovery(%s, %u)", - set ? "true" : "false", dbid); + AlterDatabaseSet(&stmt); - CdbDispatchCommand(cmd, 0, NULL); - } + /* Will set the GUC in caller session only on coordinator */ + tf_guc_unlock_full_snapshot_on_recovery_once(); + SetConfigOption("arenadata_toolkit.tracking_snapshot_on_recovery", set ? "t" : "f", + PGC_S_DATABASE, PGC_S_DATABASE); PG_RETURN_BOOL(true); } @@ -1032,6 +1029,12 @@ track_schema(const char *schemaName, Oid dbid, bool reg) AlterDatabaseSet(&stmt); + /* Will set the GUC in caller session only on coordinator */ + tf_guc_unlock_schemas_once(); + SetConfigOption("arenadata_toolkit.tracking_schemas", + new_schemas ? new_schemas : DEFAULT_TRACKED_SCHEMAS, + PGC_S_DATABASE, PGC_S_DATABASE); + if (current_schemas) pfree(current_schemas); if (new_schemas) @@ -1181,6 +1184,13 @@ tracking_set_relkinds(PG_FUNCTION_ARGS) tf_guc_unlock_relkinds_once(); AlterDatabaseSet(&stmt); + + /* Will set the GUC in caller session only on coordinator */ + tf_guc_unlock_relkinds_once(); + SetConfigOption("arenadata_toolkit.tracking_relkinds", + buf.len ? buf.data : DEFAULT_TRACKED_REL_KINDS, + PGC_S_DATABASE, PGC_S_DATABASE); + pfree(buf.data); PG_RETURN_BOOL(true); @@ -1281,6 +1291,12 @@ tracking_set_relstorages(PG_FUNCTION_ARGS) AlterDatabaseSet(&stmt); + /* Will set the GUC in caller session only on coordinator */ + tf_guc_unlock_relstorages_once(); + SetConfigOption("arenadata_toolkit.tracking_relstorages", + buf.len ? buf.data : DEFAULT_TRACKED_REL_STORAGES, + PGC_S_DATABASE, PGC_S_DATABASE); + pfree(buf.data); PG_RETURN_BOOL(true); From 1305ba5154fef6dc2384fe90d6fa1d7980223dc8 Mon Sep 17 00:00:00 2001 From: Alexander Kondakov Date: Tue, 29 Oct 2024 23:54:09 +0300 Subject: [PATCH 33/69] Remove shared_state_lock --- gpcontrib/arenadata_toolkit/src/file_hook.c | 27 +++++++++++++------ .../arenadata_toolkit/src/include/tf_shmem.h | 1 - gpcontrib/arenadata_toolkit/src/tf_shmem.c | 12 ++------- gpcontrib/arenadata_toolkit/src/track_files.c | 25 ++++++----------- 4 files changed, 29 insertions(+), 36 deletions(-) diff --git a/gpcontrib/arenadata_toolkit/src/file_hook.c b/gpcontrib/arenadata_toolkit/src/file_hook.c index dad745e74289..caaf5a1a9ef6 100644 --- a/gpcontrib/arenadata_toolkit/src/file_hook.c +++ b/gpcontrib/arenadata_toolkit/src/file_hook.c @@ -22,17 +22,17 @@ static file_extend_hook_type next_file_extend_hook = NULL; static file_truncate_hook_type next_file_truncate_hook = NULL; static file_unlink_hook_type next_file_unlink_hook = NULL; -static bloom_t * non_committed_bloom = NULL; +static bloom_t *non_committed_bloom = NULL; static Oid non_committed_dbid = InvalidOid; static bool -is_file_node_trackable(RelFileNodeBackend * rnode) +is_file_node_trackable(RelFileNodeBackend *rnode) { return !(rnode->node.dbNode == InvalidOid); } static void -file_node_set(RelFileNodeBackend * rnode) +file_node_set(RelFileNodeBackend *rnode) { if (!is_file_node_trackable(rnode)) return; @@ -40,7 +40,10 @@ file_node_set(RelFileNodeBackend * rnode) bloom_set_set(&tf_shared_state->bloom_set, rnode->node.dbNode, rnode->node.relNode); } -/* 'create' events stored in local bloom and merged only on commit, when changes are already in catalog */ +/* + * 'create' events stored in local bloom and merged only on commit, when + * changes are already in catalog. + */ static void xact_end_create_callback(XactEvent event, void *arg) { @@ -57,6 +60,10 @@ xact_end_create_callback(XactEvent event, void *arg) non_committed_dbid = InvalidOid; } +/* + * Since we can't be sure that created rnode will be commited, the create events + * are stored in a separate bloom filter. + */ static void hook_create(RelFileNodeBackend rnode) { @@ -76,7 +83,8 @@ hook_create(RelFileNodeBackend rnode) } elog(DEBUG1, "hook_create: %d %d %d %d", - rnode.backend, rnode.node.dbNode, rnode.node.spcNode, rnode.node.relNode); + rnode.backend, rnode.node.dbNode, + rnode.node.spcNode, rnode.node.relNode); bloom_set(non_committed_bloom, rnode.node.relNode); @@ -89,7 +97,8 @@ hook_extend(RelFileNodeBackend rnode) next_file_extend_hook(rnode); elog(DEBUG1, "hook_extend: %d %d %d %d", - rnode.backend, rnode.node.dbNode, rnode.node.spcNode, rnode.node.relNode); + rnode.backend, rnode.node.dbNode, + rnode.node.spcNode, rnode.node.relNode); file_node_set(&rnode); } @@ -101,7 +110,8 @@ hook_truncate(RelFileNodeBackend rnode) next_file_truncate_hook(rnode); elog(DEBUG1, "hook_truncate: %d %d %d %d", - rnode.backend, rnode.node.dbNode, rnode.node.spcNode, rnode.node.relNode); + rnode.backend, rnode.node.dbNode, + rnode.node.spcNode, rnode.node.relNode); file_node_set(&rnode); } @@ -113,7 +123,8 @@ hook_unlink(RelFileNodeBackend rnode) next_file_unlink_hook(rnode); elog(DEBUG1, "hook_unlink: %d %d %d %d", - rnode.backend, rnode.node.dbNode, rnode.node.spcNode, rnode.node.relNode); + rnode.backend, rnode.node.dbNode, + rnode.node.spcNode, rnode.node.relNode); drops_track_add(rnode.node); } diff --git a/gpcontrib/arenadata_toolkit/src/include/tf_shmem.h b/gpcontrib/arenadata_toolkit/src/include/tf_shmem.h index 4e7ee841a3a2..73f8ad42c78c 100644 --- a/gpcontrib/arenadata_toolkit/src/include/tf_shmem.h +++ b/gpcontrib/arenadata_toolkit/src/include/tf_shmem.h @@ -36,7 +36,6 @@ typedef struct } tf_entry_lock_t; extern tf_shared_state_t *tf_shared_state; -extern LWLock *tf_state_lock; extern LWLock *bloom_set_lock; extern tf_entry_lock_t bloom_locks[]; extern uint64 bloom_hash_seed; diff --git a/gpcontrib/arenadata_toolkit/src/tf_shmem.c b/gpcontrib/arenadata_toolkit/src/tf_shmem.c index fe2530ac4601..1304209c3c61 100644 --- a/gpcontrib/arenadata_toolkit/src/tf_shmem.c +++ b/gpcontrib/arenadata_toolkit/src/tf_shmem.c @@ -10,7 +10,6 @@ static shmem_startup_hook_type next_shmem_startup_hook = NULL; tf_shared_state_t *tf_shared_state = NULL; -LWLock *tf_state_lock; LWLock *bloom_set_lock; tf_entry_lock_t bloom_locks[MAX_DB_TRACK_COUNT]; uint64 bloom_hash_seed; @@ -22,7 +21,6 @@ int bloom_hash_num; static void init_lwlocks(void) { - tf_state_lock = LWLockAssign(); bloom_set_lock = LWLockAssign(); for (int i = 0; i < db_track_count; ++i) @@ -122,17 +120,14 @@ tf_shmem_deinit(void) LWLock * LWLockAcquireEntry(Oid dbid, LWLockMode mode) { - LWLockAcquire(tf_state_lock, LW_SHARED); for (int i = 0; i < db_track_count; ++i) { if (bloom_locks[i].dbid == dbid) { LWLockAcquire(bloom_locks[i].lock, mode); - LWLockRelease(tf_state_lock); return bloom_locks[i].lock; } } - LWLockRelease(tf_state_lock); return NULL; } @@ -144,7 +139,7 @@ void LWLockBindEntry(Oid dbid) { int i; - LWLockAcquire(tf_state_lock, LW_EXCLUSIVE); + for (i = 0; i < db_track_count; ++i) { if (bloom_locks[i].dbid == InvalidOid) @@ -156,7 +151,6 @@ LWLockBindEntry(Oid dbid) if (i == db_track_count && pg_atomic_unlocked_test_flag(&tf_shared_state->tracking_error)) pg_atomic_test_set_flag(&tf_shared_state->tracking_error); - LWLockRelease(tf_state_lock); } /* @@ -166,7 +160,7 @@ void LWLockUnbindEntry(Oid dbid) { int i; - LWLockAcquire(tf_state_lock, LW_EXCLUSIVE); + for (i = 0; i < db_track_count; ++i) { if (bloom_locks[i].dbid == dbid) @@ -178,6 +172,4 @@ LWLockUnbindEntry(Oid dbid) if (i == db_track_count && pg_atomic_unlocked_test_flag(&tf_shared_state->tracking_error)) pg_atomic_test_set_flag(&tf_shared_state->tracking_error); - - LWLockRelease(tf_state_lock); } diff --git a/gpcontrib/arenadata_toolkit/src/track_files.c b/gpcontrib/arenadata_toolkit/src/track_files.c index 42718988d7c4..e1dbe2363295 100644 --- a/gpcontrib/arenadata_toolkit/src/track_files.c +++ b/gpcontrib/arenadata_toolkit/src/track_files.c @@ -47,14 +47,15 @@ PG_FUNCTION_INFO_V1(tracking_get_track_main); #define GET_TRACK_TUPDESC_LEN 9 +/* Preserved state among the calls of tracking_get_track_main */ typedef struct { Relation pg_class_rel; /* pg_class relation */ - SysScanDesc scan; + SysScanDesc scan; /* for scans of system table */ } tf_main_func_state_t; /* - * Main state during tracking_get_track call. Stores + * Main state during tracking_get_track_main call. Stores * copy of shared Bloom and tracking filtering parameters. */ typedef struct @@ -75,12 +76,12 @@ typedef struct int current_result; int current_row; - SPITupleTable *entry_result; /* results from SPI queries */ + SPITupleTable *entry_result; /* results from SPI queries*/ uint64 entry_processed; int entry_current_row; FmgrInfo *inputFuncInfos; /* FuncInfos for parse string to Datum values - * transformation */ + * transformation when using CdbDispatch* */ Oid *typIOParams; } tf_get_func_state_t; @@ -157,7 +158,7 @@ split_string_to_list(const char *input) /* * Tracked relkinds and relstorage types - * are coded into 64 bits via ascii offtests. + * are coded into 64 bits via ascii offsets. */ static uint64 list_to_bits(const char *input) @@ -176,14 +177,7 @@ list_to_bits(const char *input) while (token != NULL) { if (*token != '\0') - { - char c = *token; - - if (c >= 'a' && c <= 'z') - bits |= (1UL << (c - 'a')); - else - bits |= (1UL << (26 + (c - 'A'))); - } + bits |= (1UL << (*token - 'A')); token = strtok(NULL, ","); } @@ -314,10 +308,7 @@ schema_is_tracked(Oid schema) static bool kind_is_tracked(char type, uint64 allowed_kinds) { - if (type >= 'a' && type <= 'z') - return (allowed_kinds & (1UL << (type - 'a'))) != 0; - else - return (allowed_kinds & (1UL << (26 + (type - 'A')))) != 0; + return (allowed_kinds & (1UL << (type - 'A'))) != 0; } /* From 919f7cdd0f6327516164d37ed004784680d74039 Mon Sep 17 00:00:00 2001 From: Alexander Kondakov Date: Wed, 30 Oct 2024 11:00:48 +0300 Subject: [PATCH 34/69] Make bloom_set singleton --- .../src/arenadata_toolkit_worker.c | 4 +- gpcontrib/arenadata_toolkit/src/bloom.c | 2 +- gpcontrib/arenadata_toolkit/src/bloom_set.c | 89 +++++++++++-------- gpcontrib/arenadata_toolkit/src/file_hook.c | 6 +- .../arenadata_toolkit/src/include/bloom.h | 2 +- .../arenadata_toolkit/src/include/bloom_set.h | 17 ++-- gpcontrib/arenadata_toolkit/src/tf_shmem.c | 2 +- gpcontrib/arenadata_toolkit/src/track_files.c | 16 ++-- 8 files changed, 75 insertions(+), 63 deletions(-) diff --git a/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_worker.c b/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_worker.c index c5318f94e16b..a3dbacf3d862 100644 --- a/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_worker.c +++ b/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_worker.c @@ -138,8 +138,8 @@ track_dbs(List *tracked_dbs) { trackedDb = (tracked_db_t *) lfirst(cell); - bloom_set_bind(&tf_shared_state->bloom_set, trackedDb->dbid); - bloom_set_trigger_bits(&tf_shared_state->bloom_set, trackedDb->dbid, + bloom_set_bind(trackedDb->dbid); + bloom_set_trigger_bits(trackedDb->dbid, trackedDb->get_full_snapshot_on_recovery); } } diff --git a/gpcontrib/arenadata_toolkit/src/bloom.c b/gpcontrib/arenadata_toolkit/src/bloom.c index ad025bdbb7a1..509f0e6de04a 100644 --- a/gpcontrib/arenadata_toolkit/src/bloom.c +++ b/gpcontrib/arenadata_toolkit/src/bloom.c @@ -103,7 +103,7 @@ bloom_isset(bloom_t * bloom, Oid relnode) * relnode - relation file node OID to insert */ void -bloom_set(bloom_t * bloom, Oid relnode) +bloom_set_bits(bloom_t * bloom, Oid relnode) { uint32 hashes[MAX_BLOOM_HASH_FUNCS]; diff --git a/gpcontrib/arenadata_toolkit/src/bloom_set.c b/gpcontrib/arenadata_toolkit/src/bloom_set.c index 19158913228b..a3e83483ea61 100644 --- a/gpcontrib/arenadata_toolkit/src/bloom_set.c +++ b/gpcontrib/arenadata_toolkit/src/bloom_set.c @@ -2,12 +2,26 @@ * Set of blooms. Main entry point to find a bloom and work with it. * Used to track create, extend, truncate events. */ - #include "bloom_set.h" #include "tf_shmem.h" #define BLOOM_ENTRY_GET(set, i) (void *)(set->bloom_entries + i * FULL_BLOOM_ENTRY_SIZE(set->bloom_size)); +/* + * bloom_set api assumes that we are working with the single bloom set. + * This object is considered as singleton. + */ +bloom_set_t *bloom_set = NULL; + +static inline void +bloom_set_check_state(void) +{ + if (tf_shared_state == NULL || bloom_set == NULL) + ereport(ERROR, + (errmsg("Failed to access shared memory due to wrong extension initialization"), + errhint("Load extension's code through shared_preload_library configuration"))); +} + static void bloom_entry_init(const uint32_t bloom_size, bloom_entry_t * bloom_entry) { @@ -16,8 +30,10 @@ bloom_entry_init(const uint32_t bloom_size, bloom_entry_t * bloom_entry) } void -bloom_set_init(const uint32_t bloom_count, const uint32_t bloom_size, bloom_set_t * bloom_set) +bloom_set_init(const uint32_t bloom_count, const uint32_t bloom_size) { + bloom_set = &tf_shared_state->bloom_set; + bloom_set->bloom_count = bloom_count; bloom_set->bloom_size = bloom_size; @@ -34,7 +50,7 @@ bloom_set_init(const uint32_t bloom_count, const uint32_t bloom_size, bloom_set_ * That's a simple linear search, should be reworked (depends on target dbs count). */ static bloom_entry_t * -find_bloom_entry(bloom_set_t * bloom_set, Oid dbid) +find_bloom_entry(Oid dbid) { bloom_entry_t *bloom_entry; int i = 0; @@ -54,18 +70,20 @@ find_bloom_entry(bloom_set_t * bloom_set, Oid dbid) /* Bind available filter to given dbid */ bool -bloom_set_bind(bloom_set_t * bloom_set, Oid dbid) +bloom_set_bind(Oid dbid) { bloom_entry_t *bloom_entry; + bloom_set_check_state(); + LWLockAcquire(bloom_set_lock, LW_EXCLUSIVE); - bloom_entry = find_bloom_entry(bloom_set, dbid); + bloom_entry = find_bloom_entry(dbid); if (bloom_entry) { LWLockRelease(bloom_set_lock); return true; } - bloom_entry = find_bloom_entry(bloom_set, InvalidOid); + bloom_entry = find_bloom_entry(InvalidOid); if (bloom_entry == NULL) { LWLockRelease(bloom_set_lock); @@ -83,14 +101,16 @@ bloom_set_bind(bloom_set_t * bloom_set, Oid dbid) * full snapshots. */ bool -bloom_set_trigger_bits(bloom_set_t * bloom_set, Oid dbid, bool on) +bloom_set_trigger_bits(Oid dbid, bool on) { bloom_entry_t *bloom_entry; LWLock *entry_lock; + bloom_set_check_state(); + LWLockAcquire(bloom_set_lock, LW_SHARED); entry_lock = LWLockAcquireEntry(dbid, LW_EXCLUSIVE); - bloom_entry = find_bloom_entry(bloom_set, dbid); + bloom_entry = find_bloom_entry(dbid); if (bloom_entry) { if (on) @@ -114,12 +134,14 @@ bloom_set_trigger_bits(bloom_set_t * bloom_set, Oid dbid, bool on) /* Unbind used filter by given dbid */ void -bloom_set_unbind(bloom_set_t * bloom_set, Oid dbid) +bloom_set_unbind(Oid dbid) { bloom_entry_t *bloom_entry; + bloom_set_check_state(); + LWLockAcquire(bloom_set_lock, LW_EXCLUSIVE); - bloom_entry = find_bloom_entry(bloom_set, dbid); + bloom_entry = find_bloom_entry(dbid); if (bloom_entry == NULL) { LWLockRelease(bloom_set_lock); @@ -133,17 +155,19 @@ bloom_set_unbind(bloom_set_t * bloom_set, Oid dbid) /* Find bloom by dbid, set bit based on relNode hash */ void -bloom_set_set(bloom_set_t * bloom_s, Oid dbid, Oid relNode) +bloom_set_set(Oid dbid, Oid relNode) { bloom_entry_t *bloom_entry; LWLock *entry_lock; + bloom_set_check_state(); + LWLockAcquire(bloom_set_lock, LW_SHARED); entry_lock = LWLockAcquireEntry(dbid, LW_EXCLUSIVE); - bloom_entry = find_bloom_entry(bloom_s, dbid); + bloom_entry = find_bloom_entry(dbid); if (bloom_entry) { - bloom_set(&bloom_entry->bloom, relNode); + bloom_set_bits(&bloom_entry->bloom, relNode); } if (entry_lock) LWLockRelease(entry_lock); @@ -152,14 +176,16 @@ bloom_set_set(bloom_set_t * bloom_s, Oid dbid, Oid relNode) /* Find bloom by dbid, copy all bytes to new filter, clear old (but keep it) */ bool -bloom_set_move(bloom_set_t * bloom_set, Oid dbid, bloom_t * dest) +bloom_set_move(Oid dbid, bloom_t * dest) { bloom_entry_t *bloom_entry; LWLock *entry_lock; + bloom_set_check_state(); + LWLockAcquire(bloom_set_lock, LW_SHARED); entry_lock = LWLockAcquireEntry(dbid, LW_EXCLUSIVE); - bloom_entry = find_bloom_entry(bloom_set, dbid); + bloom_entry = find_bloom_entry(dbid); if (bloom_entry) { bloom_copy(&bloom_entry->bloom, dest); @@ -178,20 +204,22 @@ bloom_set_move(bloom_set_t * bloom_set, Oid dbid, bloom_t * dest) /* Find bloom by dbid, merge bytes from another bloom to it */ bool -bloom_set_merge(bloom_set_t * bloom_set, Oid dbid, bloom_t * m_bloom) +bloom_set_merge(Oid dbid, bloom_t * from) { bloom_entry_t *bloom_entry; LWLock *entry_lock; - if (!m_bloom || !bloom_set) + bloom_set_check_state(); + + if (!from) return false; LWLockAcquire(bloom_set_lock, LW_SHARED); entry_lock = LWLockAcquireEntry(dbid, LW_EXCLUSIVE); - bloom_entry = find_bloom_entry(bloom_set, dbid); + bloom_entry = find_bloom_entry(dbid); if (bloom_entry) { - bloom_merge(&bloom_entry->bloom, m_bloom); + bloom_merge(&bloom_entry->bloom, from); if (entry_lock) LWLockRelease(entry_lock); LWLockRelease(bloom_set_lock); @@ -205,15 +233,17 @@ bloom_set_merge(bloom_set_t * bloom_set, Oid dbid, bloom_t * m_bloom) } bool -bloom_set_is_all_bits_triggered(bloom_set_t * bloom_set, Oid dbid) +bloom_set_is_all_bits_triggered(Oid dbid) { bloom_entry_t *bloom_entry; bool is_triggered = false; LWLock *entry_lock; + bloom_set_check_state(); + LWLockAcquire(bloom_set_lock, LW_SHARED); entry_lock = LWLockAcquireEntry(dbid, LW_SHARED); - bloom_entry = find_bloom_entry(bloom_set, dbid); + bloom_entry = find_bloom_entry(dbid); if (bloom_entry) { is_triggered = bloom_entry->bloom.is_set_all; @@ -224,20 +254,3 @@ bloom_set_is_all_bits_triggered(bloom_set_t * bloom_set, Oid dbid) return is_triggered; } - -int -bloom_set_count(bloom_set_t * bloom_set) -{ - int count = 0; - bloom_entry_t *bloom_entry; - - LWLockAcquire(bloom_set_lock, LW_EXCLUSIVE); - for (int i = 0; i < bloom_set->bloom_count; ++i) - { - bloom_entry = BLOOM_ENTRY_GET(bloom_set, i); - if (bloom_entry->dbid != InvalidOid) - ++count; - } - LWLockRelease(bloom_set_lock); - return count; -} diff --git a/gpcontrib/arenadata_toolkit/src/file_hook.c b/gpcontrib/arenadata_toolkit/src/file_hook.c index caaf5a1a9ef6..a26181c1f07f 100644 --- a/gpcontrib/arenadata_toolkit/src/file_hook.c +++ b/gpcontrib/arenadata_toolkit/src/file_hook.c @@ -37,7 +37,7 @@ file_node_set(RelFileNodeBackend *rnode) if (!is_file_node_trackable(rnode)) return; - bloom_set_set(&tf_shared_state->bloom_set, rnode->node.dbNode, rnode->node.relNode); + bloom_set_set(rnode->node.dbNode, rnode->node.relNode); } /* @@ -53,7 +53,7 @@ xact_end_create_callback(XactEvent event, void *arg) elog(DEBUG1, "xact_end_create_callback"); if (event == XACT_EVENT_COMMIT) - bloom_set_merge(&tf_shared_state->bloom_set, non_committed_dbid, non_committed_bloom); + bloom_set_merge(non_committed_dbid, non_committed_bloom); pfree(non_committed_bloom); non_committed_bloom = NULL; @@ -86,7 +86,7 @@ hook_create(RelFileNodeBackend rnode) rnode.backend, rnode.node.dbNode, rnode.node.spcNode, rnode.node.relNode); - bloom_set(non_committed_bloom, rnode.node.relNode); + bloom_set_bits(non_committed_bloom, rnode.node.relNode); } diff --git a/gpcontrib/arenadata_toolkit/src/include/bloom.h b/gpcontrib/arenadata_toolkit/src/include/bloom.h index 5a06b08f2d2d..24cf2da5386d 100644 --- a/gpcontrib/arenadata_toolkit/src/include/bloom.h +++ b/gpcontrib/arenadata_toolkit/src/include/bloom.h @@ -18,7 +18,7 @@ typedef struct void bloom_init(const uint32 bloom_size, bloom_t * bloom); bool bloom_isset(bloom_t * bloom, Oid relnode); -void bloom_set(bloom_t * bloom, Oid relnode); +void bloom_set_bits(bloom_t * bloom, Oid relnode); void bloom_set_all(bloom_t * bloom); void bloom_clear(bloom_t * bloom); void bloom_merge(bloom_t * dst, bloom_t * src); diff --git a/gpcontrib/arenadata_toolkit/src/include/bloom_set.h b/gpcontrib/arenadata_toolkit/src/include/bloom_set.h index 3761f256f49b..d5ee800a353f 100644 --- a/gpcontrib/arenadata_toolkit/src/include/bloom_set.h +++ b/gpcontrib/arenadata_toolkit/src/include/bloom_set.h @@ -24,14 +24,13 @@ typedef struct * bloom_entry_t */ } bloom_set_t; -void bloom_set_init(const uint32 bloom_count, const uint32 bloom_size, bloom_set_t * bloom_set); -bool bloom_set_bind(bloom_set_t * bloom_set, Oid dbid); -void bloom_set_unbind(bloom_set_t * bloom_set, Oid dbid); -void bloom_set_set(bloom_set_t * bloom_set, Oid dbid, Oid relNode); -bool bloom_set_move(bloom_set_t * bloom_set, Oid dbid, bloom_t * dest); -bool bloom_set_merge(bloom_set_t * bloom_set, Oid dbid, bloom_t * m_bloom); -bool bloom_set_trigger_bits(bloom_set_t * bloom_set, Oid dbid, bool on); -bool bloom_set_is_all_bits_triggered(bloom_set_t * bloom_set, Oid dbid); -int bloom_set_count(bloom_set_t * bloom_set); +void bloom_set_init(const uint32 bloom_count, const uint32 bloom_size); +bool bloom_set_bind(Oid dbid); +void bloom_set_unbind(Oid dbid); +void bloom_set_set(Oid dbid, Oid relNode); +bool bloom_set_move(Oid dbid, bloom_t * dest); +bool bloom_set_merge(Oid dbid, bloom_t * from); +bool bloom_set_trigger_bits(Oid dbid, bool on); +bool bloom_set_is_all_bits_triggered(Oid dbid); #endif /* BLOOM_SET_H */ diff --git a/gpcontrib/arenadata_toolkit/src/tf_shmem.c b/gpcontrib/arenadata_toolkit/src/tf_shmem.c index 1304209c3c61..17ddfc6e0beb 100644 --- a/gpcontrib/arenadata_toolkit/src/tf_shmem.c +++ b/gpcontrib/arenadata_toolkit/src/tf_shmem.c @@ -83,7 +83,7 @@ tf_shmem_hook(void) { pg_atomic_init_flag(&tf_shared_state->tracking_is_initialized); pg_atomic_init_flag(&tf_shared_state->tracking_error); - bloom_set_init(db_track_count, bloom_size, &tf_shared_state->bloom_set); + bloom_set_init(db_track_count, bloom_size); } init_lwlocks(); diff --git a/gpcontrib/arenadata_toolkit/src/track_files.c b/gpcontrib/arenadata_toolkit/src/track_files.c index e1dbe2363295..3b576e12dffa 100644 --- a/gpcontrib/arenadata_toolkit/src/track_files.c +++ b/gpcontrib/arenadata_toolkit/src/track_files.c @@ -111,9 +111,9 @@ xact_end_get_callback(XactEvent event, void *arg) if (event == XACT_EVENT_ABORT) { if (tf_get_global_state.rollback_bloom) - bloom_set_merge(&tf_shared_state->bloom_set, MyDatabaseId, tf_get_global_state.rollback_bloom); + bloom_set_merge(MyDatabaseId, tf_get_global_state.rollback_bloom); else - bloom_set_merge(&tf_shared_state->bloom_set, MyDatabaseId, tf_get_global_state.bloom); + bloom_set_merge(MyDatabaseId, tf_get_global_state.bloom); drops_track_move_undo(tf_get_global_state.drops, MyDatabaseId); } @@ -342,7 +342,7 @@ tracking_get_track_main(PG_FUNCTION_ARGS) if (tf_get_global_state.bloom == NULL) { tf_get_global_state.bloom = palloc(FULL_BLOOM_SIZE(bloom_size)); - if (!bloom_set_move(&tf_shared_state->bloom_set, MyDatabaseId, tf_get_global_state.bloom)) + if (!bloom_set_move(MyDatabaseId, tf_get_global_state.bloom)) ereport(ERROR, (errcode(ERRCODE_GP_COMMAND_ERROR), errmsg("database %u is not tracked", MyDatabaseId), @@ -363,7 +363,7 @@ tracking_get_track_main(PG_FUNCTION_ARGS) bloom_copy(tf_get_global_state.bloom, tf_get_global_state.rollback_bloom); } bloom_clear(tf_get_global_state.bloom); - if (!bloom_set_move(&tf_shared_state->bloom_set, MyDatabaseId, tf_get_global_state.bloom)) + if (!bloom_set_move(MyDatabaseId, tf_get_global_state.bloom)) ereport(ERROR, (errcode(ERRCODE_GP_COMMAND_ERROR), errmsg("database %u is not tracked", MyDatabaseId), @@ -691,8 +691,8 @@ track_db(Oid dbid, bool reg) PGC_S_DATABASE, PGC_S_DATABASE); if (!reg) - bloom_set_unbind(&tf_shared_state->bloom_set, dbid); - else if (!bloom_set_bind(&tf_shared_state->bloom_set, dbid)) + bloom_set_unbind(dbid); + else if (!bloom_set_bind(dbid)) ereport(ERROR, (errmsg("[arenadata_toolkit] exceeded maximum number of tracked databases"))); } @@ -1303,7 +1303,7 @@ tracking_trigger_initial_snapshot(PG_FUNCTION_ARGS) dbid = (dbid == InvalidOid) ? MyDatabaseId : dbid; elog(LOG, "[arenadata_toolkit] tracking_trigger_initial_snapshot dbid: %u", dbid); - if (!bloom_set_trigger_bits(&tf_shared_state->bloom_set, dbid, true)) + if (!bloom_set_trigger_bits(dbid, true)) ereport(ERROR, (errmsg("Failed to find corresponding filter to database %u", dbid))); @@ -1327,7 +1327,7 @@ tracking_is_initial_snapshot_triggered(PG_FUNCTION_ARGS) dbid = (dbid == InvalidOid) ? MyDatabaseId : dbid; - is_triggered = bloom_set_is_all_bits_triggered(&tf_shared_state->bloom_set, dbid); + is_triggered = bloom_set_is_all_bits_triggered(dbid); elog(LOG, "[arenadata_toolkit] is_initial_snapshot_triggered:%d dbid: %u", is_triggered, dbid); From af8c73b2c87efe27bffd6572c76903f68dd21aba Mon Sep 17 00:00:00 2001 From: Alexander Kondakov Date: Wed, 30 Oct 2024 16:46:45 +0300 Subject: [PATCH 35/69] dbid inline function --- gpcontrib/arenadata_toolkit/src/track_files.c | 52 +++++++------------ 1 file changed, 19 insertions(+), 33 deletions(-) diff --git a/gpcontrib/arenadata_toolkit/src/track_files.c b/gpcontrib/arenadata_toolkit/src/track_files.c index 3b576e12dffa..038f56e4f3b1 100644 --- a/gpcontrib/arenadata_toolkit/src/track_files.c +++ b/gpcontrib/arenadata_toolkit/src/track_files.c @@ -96,6 +96,12 @@ tf_check_shmem_error(void) errhint("Load extension's code through shared_preload_library configuration"))); } +static inline Oid +get_dbid(Oid dbid) +{ + return (dbid == InvalidOid) ? MyDatabaseId : dbid; +} + /* * In case of abort bloom is merged back as well as drops track. */ @@ -371,7 +377,7 @@ tracking_get_track_main(PG_FUNCTION_ARGS) "to enable tracking"))); } /* initial snapshot shouldn't return drops */ - if (tf_get_global_state.bloom && !tf_get_global_state.bloom->is_set_all) + if (!tf_get_global_state.bloom->is_set_all) { tf_get_global_state.drops = drops_track_move(MyDatabaseId); tf_get_global_state.next_drop = list_head(tf_get_global_state.drops); @@ -408,11 +414,8 @@ tracking_get_track_main(PG_FUNCTION_ARGS) state = (tf_main_func_state_t *) palloc0(sizeof(tf_main_func_state_t)); funcctx->user_fctx = (void *) state; - if (tf_get_global_state.bloom) - { - state->pg_class_rel = heap_open(RelationRelationId, AccessShareLock); - state->scan = systable_beginscan(state->pg_class_rel, InvalidOid, false, NULL, 0, NULL); - } + state->pg_class_rel = heap_open(RelationRelationId, AccessShareLock); + state->scan = systable_beginscan(state->pg_class_rel, InvalidOid, false, NULL, 0, NULL); MemoryContextSwitchTo(oldcontext); } @@ -700,13 +703,11 @@ track_db(Oid dbid, bool reg) Datum tracking_register_db_main(PG_FUNCTION_ARGS) { - Oid dbid = PG_GETARG_OID(1); + Oid dbid = get_dbid(PG_GETARG_OID(1)); bool reg = PG_GETARG_BOOL(0); tf_check_shmem_error(); - dbid = (dbid == InvalidOid) ? MyDatabaseId : dbid; - elog(LOG, "[arenadata_toolkit] registering database %u for tracking", dbid); track_db(dbid, reg); @@ -764,7 +765,7 @@ is_initialized() Datum tracking_register_db(PG_FUNCTION_ARGS) { - Oid dbid = PG_GETARG_OID(0); + Oid dbid = get_dbid(PG_GETARG_OID(0)); tf_check_shmem_error(); @@ -773,7 +774,6 @@ tracking_register_db(PG_FUNCTION_ARGS) (errmsg("[arenadata_toolkit] Cannot register database before workers initialize tracking"), errhint("Wait arenadata_toolkit.tracking_worker_naptime_sec and try again"))); - dbid = (dbid == InvalidOid) ? MyDatabaseId : dbid; elog(LOG, "[arenadata_toolkit] registering database %u for tracking", dbid); track_db(dbid, true); @@ -797,7 +797,7 @@ tracking_register_db(PG_FUNCTION_ARGS) Datum tracking_unregister_db(PG_FUNCTION_ARGS) { - Oid dbid = PG_GETARG_OID(0); + Oid dbid = get_dbid(PG_GETARG_OID(0)); tf_check_shmem_error(); @@ -806,7 +806,6 @@ tracking_unregister_db(PG_FUNCTION_ARGS) (errmsg("[arenadata_toolkit] Cannot register database before workers initialize tracking"), errhint("Wait arenadata_toolkit.tracking_worker_naptime_sec and try again"))); - dbid = (dbid == InvalidOid) ? MyDatabaseId : dbid; elog(LOG, "[arenadata_toolkit] unregistering database %u from tracking", dbid); track_db(dbid, false); @@ -828,12 +827,10 @@ Datum tracking_set_snapshot_on_recovery(PG_FUNCTION_ARGS) { bool set = PG_GETARG_BOOL(0); - Oid dbid = PG_GETARG_OID(1); + Oid dbid = get_dbid(PG_GETARG_OID(1)); tf_check_shmem_error(); - dbid = (dbid == InvalidOid) ? MyDatabaseId : dbid; - A_Const aconst = {.type = T_A_Const,.val = {.type = T_String,.val.str = set ? "t" : "f"}}; @@ -1036,12 +1033,10 @@ Datum tracking_register_schema(PG_FUNCTION_ARGS) { const char *schema_name = NameStr(*PG_GETARG_NAME(0)); - Oid dbid = PG_GETARG_OID(1); + Oid dbid = get_dbid(PG_GETARG_OID(1)); tf_check_shmem_error(); - dbid = (dbid == InvalidOid) ? MyDatabaseId : dbid; - if (!SearchSysCacheExists1(NAMESPACENAME, CStringGetDatum(schema_name))) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_SCHEMA), @@ -1058,12 +1053,10 @@ Datum tracking_unregister_schema(PG_FUNCTION_ARGS) { const char *schema_name = NameStr(*PG_GETARG_NAME(0)); - Oid dbid = PG_GETARG_OID(1); + Oid dbid = get_dbid(PG_GETARG_OID(1)); tf_check_shmem_error(); - dbid = (dbid == InvalidOid) ? MyDatabaseId : dbid; - if (!SearchSysCacheExists1(NAMESPACENAME, CStringGetDatum(schema_name))) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_SCHEMA), @@ -1103,7 +1096,7 @@ Datum tracking_set_relkinds(PG_FUNCTION_ARGS) { char *relkinds_str = NameStr(*PG_GETARG_NAME(0)); - Oid dbid = PG_GETARG_OID(1); + Oid dbid = get_dbid(PG_GETARG_OID(1)); char *token; char *str_copy; bool seen_relkinds[256] = {false}; @@ -1114,8 +1107,6 @@ tracking_set_relkinds(PG_FUNCTION_ARGS) tf_check_shmem_error(); - dbid = (dbid == InvalidOid) ? MyDatabaseId : dbid; - initStringInfo(&buf); str_copy = pstrdup(relkinds_str); token = strtok(str_copy, ","); @@ -1208,7 +1199,7 @@ Datum tracking_set_relstorages(PG_FUNCTION_ARGS) { char *relstorages_str = NameStr(*PG_GETARG_NAME(0)); - Oid dbid = PG_GETARG_OID(1); + Oid dbid = get_dbid(PG_GETARG_OID(1)); char *token; char *str_copy; bool seen_relstorages[256] = {false}; @@ -1219,8 +1210,6 @@ tracking_set_relstorages(PG_FUNCTION_ARGS) tf_check_shmem_error(); - dbid = (dbid == InvalidOid) ? MyDatabaseId : dbid; - initStringInfo(&buf); str_copy = pstrdup(relstorages_str); token = strtok(str_copy, ","); @@ -1296,11 +1285,10 @@ tracking_set_relstorages(PG_FUNCTION_ARGS) Datum tracking_trigger_initial_snapshot(PG_FUNCTION_ARGS) { - Oid dbid = PG_GETARG_OID(0); + Oid dbid = get_dbid(PG_GETARG_OID(0)); tf_check_shmem_error(); - dbid = (dbid == InvalidOid) ? MyDatabaseId : dbid; elog(LOG, "[arenadata_toolkit] tracking_trigger_initial_snapshot dbid: %u", dbid); if (!bloom_set_trigger_bits(dbid, true)) @@ -1320,13 +1308,11 @@ tracking_trigger_initial_snapshot(PG_FUNCTION_ARGS) Datum tracking_is_initial_snapshot_triggered(PG_FUNCTION_ARGS) { - Oid dbid = PG_GETARG_OID(0); + Oid dbid = get_dbid(PG_GETARG_OID(0)); bool is_triggered = false; tf_check_shmem_error(); - dbid = (dbid == InvalidOid) ? MyDatabaseId : dbid; - is_triggered = bloom_set_is_all_bits_triggered(dbid); elog(LOG, "[arenadata_toolkit] is_initial_snapshot_triggered:%d dbid: %u", is_triggered, dbid); From f4e252cb314a2f020f814fcbf7b7b91ca16b8a61 Mon Sep 17 00:00:00 2001 From: Alexander Kondakov Date: Sun, 3 Nov 2024 11:26:31 +0300 Subject: [PATCH 36/69] Move init_locks under if --- gpcontrib/arenadata_toolkit/src/tf_shmem.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/gpcontrib/arenadata_toolkit/src/tf_shmem.c b/gpcontrib/arenadata_toolkit/src/tf_shmem.c index 17ddfc6e0beb..3281b8fec1f0 100644 --- a/gpcontrib/arenadata_toolkit/src/tf_shmem.c +++ b/gpcontrib/arenadata_toolkit/src/tf_shmem.c @@ -72,7 +72,6 @@ tf_shmem_hook(void) bool found; Size size; - init_bloom_invariants(); size = tf_shmem_calc_size(); LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE); @@ -83,10 +82,12 @@ tf_shmem_hook(void) { pg_atomic_init_flag(&tf_shared_state->tracking_is_initialized); pg_atomic_init_flag(&tf_shared_state->tracking_error); + bloom_set_init(db_track_count, bloom_size); - } - init_lwlocks(); + init_lwlocks(); + init_bloom_invariants(); + } LWLockRelease(AddinShmemInitLock); From b694b96b167fa886c748f38eb0a7b188a4e38787 Mon Sep 17 00:00:00 2001 From: Alexander Kondakov Date: Fri, 8 Nov 2024 00:10:35 +0300 Subject: [PATCH 37/69] Use versioning --- gpcontrib/arenadata_toolkit/README.md | 2 +- .../arenadata_toolkit--1.6--1.7.sql | 33 +- .../expected/arenadata_toolkit_test.out | 28 +- .../expected/arenadata_toolkit_tracking.out | 71 ++- .../sql/arenadata_toolkit_tracking.sql | 30 +- gpcontrib/arenadata_toolkit/src/bloom.c | 93 +++- gpcontrib/arenadata_toolkit/src/bloom_set.c | 235 ++++++--- gpcontrib/arenadata_toolkit/src/drops_track.c | 9 +- gpcontrib/arenadata_toolkit/src/file_hook.c | 2 +- .../arenadata_toolkit/src/include/bloom.h | 17 +- .../arenadata_toolkit/src/include/bloom_set.h | 40 +- .../arenadata_toolkit/src/include/tf_shmem.h | 21 +- gpcontrib/arenadata_toolkit/src/tf_shmem.c | 114 +--- gpcontrib/arenadata_toolkit/src/track_files.c | 497 +++++++++--------- 14 files changed, 654 insertions(+), 538 deletions(-) diff --git a/gpcontrib/arenadata_toolkit/README.md b/gpcontrib/arenadata_toolkit/README.md index 776508132e76..ae80160a5ee4 100644 --- a/gpcontrib/arenadata_toolkit/README.md +++ b/gpcontrib/arenadata_toolkit/README.md @@ -1,4 +1,4 @@ -## arenadata_toolkit - database objects tracking extenstion for GPDB +## arenadata_toolkit - database objects tracking extension for GPDB `arenadata_toolkit` starting from version 1.7 represents a GPDB extension that efficiently tracks file system changes (extend, truncate, create, unlink operations) using space-efficient Bloom filters stored in shared memory. This extension is particularly useful for monitoring and maintaining database files sizes across a distributed environment. diff --git a/gpcontrib/arenadata_toolkit/arenadata_toolkit--1.6--1.7.sql b/gpcontrib/arenadata_toolkit/arenadata_toolkit--1.6--1.7.sql index 9bc6cf57b3ef..a786f29c2c19 100644 --- a/gpcontrib/arenadata_toolkit/arenadata_toolkit--1.6--1.7.sql +++ b/gpcontrib/arenadata_toolkit/arenadata_toolkit--1.6--1.7.sql @@ -72,19 +72,42 @@ returns TABLE(segindex INT, is_initialized BOOL) AS '$libdir/arenadata_toolkit', REVOKE ALL ON FUNCTION arenadata_toolkit.tracking_is_segment_initialized() FROM public; -CREATE FUNCTION arenadata_toolkit.tracking_get_track_main() +CREATE FUNCTION arenadata_toolkit.tracking_track_version() +returns BIGINT AS '$libdir/arenadata_toolkit', +'tracking_track_version' LANGUAGE C STABLE EXECUTE ON MASTER; + +REVOKE ALL ON FUNCTION arenadata_toolkit.tracking_track_version() FROM public; + +CREATE FUNCTION arenadata_toolkit.tracking_get_track_master(version BIGINT) RETURNS TABLE(relid OID, relname NAME, relfilenode OID, size BIGINT, state "char", segid INT, relnamespace OID, relkind "char", relstorage "char") AS '$libdir/arenadata_toolkit', -'tracking_get_track_main' LANGUAGE C; +'tracking_get_track' LANGUAGE C EXECUTE ON MASTER; -CREATE FUNCTION arenadata_toolkit.tracking_get_track() +REVOKE ALL ON FUNCTION arenadata_toolkit.tracking_get_track_master(version BIGINT) FROM public; + +CREATE FUNCTION arenadata_toolkit.tracking_get_track_segments(version BIGINT) RETURNS TABLE(relid OID, relname NAME, relfilenode OID, size BIGINT, state "char", segid INT, relnamespace OID, relkind "char", relstorage "char") AS '$libdir/arenadata_toolkit', -'tracking_get_track' LANGUAGE C EXECUTE ON master; +'tracking_get_track' LANGUAGE C EXECUTE ON ALL SEGMENTS; + +REVOKE ALL ON FUNCTION arenadata_toolkit.tracking_get_track_segments(version BIGINT) FROM public; CREATE VIEW arenadata_toolkit.tables_track AS SELECT t.*, coalesce(c.oid, i.indrelid, vm.relid, blk.relid, seg.relid) AS parent_relid -FROM arenadata_toolkit.tracking_get_track() AS t +FROM arenadata_toolkit.tracking_get_track_master(arenadata_toolkit.tracking_track_version()) AS t +LEFT JOIN pg_class AS c + ON c.reltoastrelid = t.relid AND t.relkind = 't' +LEFT JOIN pg_index AS i + ON i.indexrelid = t.relid AND t.relkind = 'i' +LEFT JOIN pg_catalog.pg_appendonly AS vm + ON vm.visimaprelid = t.relid AND t.relkind = 'M' +LEFT JOIN pg_catalog.pg_appendonly AS blk + ON blk.blkdirrelid = t.relid AND t.relkind = 'b' +LEFT JOIN pg_catalog.pg_appendonly AS seg + ON seg.segrelid = t.relid AND t.relkind = 'o' +UNION ALL +SELECT t.*, coalesce(c.oid, i.indrelid, vm.relid, blk.relid, seg.relid) AS parent_relid +FROM arenadata_toolkit.tracking_get_track_segments(arenadata_toolkit.tracking_track_version()) AS t LEFT JOIN pg_class AS c ON c.reltoastrelid = t.relid AND t.relkind = 't' LEFT JOIN pg_index AS i diff --git a/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_test.out b/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_test.out index 8ca1ffd02bf0..69bc19edb28d 100644 --- a/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_test.out +++ b/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_test.out @@ -115,8 +115,8 @@ SELECT objname, objtype, objstorage, objacl FROM toolkit_objects_info ORDER BY o is_initial_snapshot_triggered | table | v | operation_exclude | table | a | tables_track | table | v | - tracking_get_track | proc | - | - tracking_get_track_main | proc | - | + tracking_get_track_master | proc | - | {owner=X/owner} + tracking_get_track_segments | proc | - | {owner=X/owner} tracking_is_initial_snapshot_triggered | proc | - | {owner=X/owner} tracking_is_initial_snapshot_triggered_master | proc | - | tracking_is_initial_snapshot_triggered_segments | proc | - | @@ -127,10 +127,11 @@ SELECT objname, objtype, objstorage, objacl FROM toolkit_objects_info ORDER BY o tracking_set_relkinds | proc | - | {owner=X/owner} tracking_set_relstorages | proc | - | {owner=X/owner} tracking_set_snapshot_on_recovery | proc | - | {owner=X/owner} + tracking_track_version | proc | - | {owner=X/owner} tracking_trigger_initial_snapshot | proc | - | tracking_unregister_db | proc | - | {owner=X/owner} tracking_unregister_schema | proc | - | {owner=X/owner} -(41 rows) +(42 rows) -- check that toolkit objects now depends on extension SELECT objname, objtype, extname, deptype FROM pg_depend d JOIN @@ -155,8 +156,8 @@ WHERE d.deptype = 'e' AND e.extname = 'arenadata_toolkit' ORDER BY objname; adb_vacuum_strategy_newest_last | proc | arenadata_toolkit | e is_initial_snapshot_triggered | table | arenadata_toolkit | e tables_track | table | arenadata_toolkit | e - tracking_get_track | proc | arenadata_toolkit | e - tracking_get_track_main | proc | arenadata_toolkit | e + tracking_get_track_master | proc | arenadata_toolkit | e + tracking_get_track_segments | proc | arenadata_toolkit | e tracking_is_initial_snapshot_triggered | proc | arenadata_toolkit | e tracking_is_initial_snapshot_triggered_master | proc | arenadata_toolkit | e tracking_is_initial_snapshot_triggered_segments | proc | arenadata_toolkit | e @@ -167,10 +168,11 @@ WHERE d.deptype = 'e' AND e.extname = 'arenadata_toolkit' ORDER BY objname; tracking_set_relkinds | proc | arenadata_toolkit | e tracking_set_relstorages | proc | arenadata_toolkit | e tracking_set_snapshot_on_recovery | proc | arenadata_toolkit | e + tracking_track_version | proc | arenadata_toolkit | e tracking_trigger_initial_snapshot | proc | arenadata_toolkit | e tracking_unregister_db | proc | arenadata_toolkit | e tracking_unregister_schema | proc | arenadata_toolkit | e -(31 rows) +(32 rows) DROP EXTENSION arenadata_toolkit; DROP SCHEMA arenadata_toolkit CASCADE; @@ -212,8 +214,8 @@ SELECT objname, objtype, objstorage, objacl FROM toolkit_objects_info ORDER BY o is_initial_snapshot_triggered | table | v | operation_exclude | table | a | {owner=arwdDxt/owner} tables_track | table | v | - tracking_get_track | proc | - | - tracking_get_track_main | proc | - | + tracking_get_track_master | proc | - | {owner=X/owner} + tracking_get_track_segments | proc | - | {owner=X/owner} tracking_is_initial_snapshot_triggered | proc | - | {owner=X/owner} tracking_is_initial_snapshot_triggered_master | proc | - | tracking_is_initial_snapshot_triggered_segments | proc | - | @@ -224,10 +226,11 @@ SELECT objname, objtype, objstorage, objacl FROM toolkit_objects_info ORDER BY o tracking_set_relkinds | proc | - | {owner=X/owner} tracking_set_relstorages | proc | - | {owner=X/owner} tracking_set_snapshot_on_recovery | proc | - | {owner=X/owner} + tracking_track_version | proc | - | {owner=X/owner} tracking_trigger_initial_snapshot | proc | - | tracking_unregister_db | proc | - | {owner=X/owner} tracking_unregister_schema | proc | - | {owner=X/owner} -(38 rows) +(39 rows) -- check that toolkit objects now depends on extension SELECT objname, objtype, extname, deptype FROM pg_depend d JOIN @@ -252,8 +255,8 @@ WHERE d.deptype = 'e' AND e.extname = 'arenadata_toolkit' ORDER BY objname; adb_vacuum_strategy_newest_last | proc | arenadata_toolkit | e is_initial_snapshot_triggered | table | arenadata_toolkit | e tables_track | table | arenadata_toolkit | e - tracking_get_track | proc | arenadata_toolkit | e - tracking_get_track_main | proc | arenadata_toolkit | e + tracking_get_track_master | proc | arenadata_toolkit | e + tracking_get_track_segments | proc | arenadata_toolkit | e tracking_is_initial_snapshot_triggered | proc | arenadata_toolkit | e tracking_is_initial_snapshot_triggered_master | proc | arenadata_toolkit | e tracking_is_initial_snapshot_triggered_segments | proc | arenadata_toolkit | e @@ -264,10 +267,11 @@ WHERE d.deptype = 'e' AND e.extname = 'arenadata_toolkit' ORDER BY objname; tracking_set_relkinds | proc | arenadata_toolkit | e tracking_set_relstorages | proc | arenadata_toolkit | e tracking_set_snapshot_on_recovery | proc | arenadata_toolkit | e + tracking_track_version | proc | arenadata_toolkit | e tracking_trigger_initial_snapshot | proc | arenadata_toolkit | e tracking_unregister_db | proc | arenadata_toolkit | e tracking_unregister_schema | proc | arenadata_toolkit | e -(31 rows) +(32 rows) DROP EXTENSION arenadata_toolkit; DROP SCHEMA arenadata_toolkit CASCADE; diff --git a/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_tracking.out b/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_tracking.out index bc0f70875420..faeadc37efab 100644 --- a/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_tracking.out +++ b/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_tracking.out @@ -7,10 +7,9 @@ CREATE DATABASE tracking_db1; \c tracking_db1; CREATE EXTENSION arenadata_toolkit; -- 1. Test getting track on not registered database; -SELECT * FROM arenadata_toolkit.tracking_get_track(); -ERROR: database 102559 is not tracked +SELECT * FROM arenadata_toolkit.tables_track; +ERROR: database 167165 is not tracked HINT: Call 'arenadata_toolkit.tracking_register_db()'to enable tracking -CONTEXT: SQL statement "SELECT * FROM arenadata_toolkit.tracking_get_track_main()" SELECT pg_sleep(current_setting('arenadata_toolkit.tracking_worker_naptime_sec')::int * 2); pg_sleep ---------- @@ -39,8 +38,8 @@ SELECT is_triggered FROM arenadata_toolkit.is_initial_snapshot_triggered; -- 3. If user hasn't registered any schema, the default schemas are used. -- See arenadata_toolkit_guc.c. At commit the bloom filter is cleared. The next --- call of tracking_get_track() will return nothing if database is not modified in between. -SELECT count(*) FROM arenadata_toolkit.tracking_get_track(); +-- track acquisition will return nothing if database is not modified in between. +SELECT count(*) FROM arenadata_toolkit.tables_track; count ------- 1056 @@ -57,19 +56,19 @@ SELECT arenadata_toolkit.tracking_register_schema('arenadata_toolkit'); -- Getting the track. Only created table with size 0 is expected; SELECT relname, size, state, segid, relkind, relstorage -FROM arenadata_toolkit.tracking_get_track(); +FROM arenadata_toolkit.tables_track; relname | size | state | segid | relkind | relstorage -------------+------+-------+-------+---------+------------ tracking_t1 | 0 | a | -1 | r | c - tracking_t1 | 0 | a | 0 | r | c tracking_t1 | 0 | a | 1 | r | c tracking_t1 | 0 | a | 2 | r | c + tracking_t1 | 0 | a | 0 | r | c (4 rows) -- 5. Test data extending event. Bloom should capture it. INSERT INTO arenadata_toolkit.tracking_t1 SELECT generate_series(1,100000); SELECT relname, size, state, segid, relkind, relstorage -FROM arenadata_toolkit.tracking_get_track(); +FROM arenadata_toolkit.tables_track; relname | size | state | segid | relkind | relstorage -------------+--------+-------+-------+---------+------------ tracking_t1 | 134064 | a | 0 | r | c @@ -80,13 +79,17 @@ FROM arenadata_toolkit.tracking_get_track(); -- 6. Dropping table. The track shows only relfilenodes without names and other additional info with status 'd'. DROP TABLE arenadata_toolkit.tracking_t1; SELECT relname, size, state, segid, relkind, relstorage -FROM arenadata_toolkit.tracking_get_track(); +FROM arenadata_toolkit.tables_track; relname | size | state | segid | relkind | relstorage ---------+------+-------+-------+---------+------------ | 0 | d | -1 | | | 0 | d | -1 | | | 0 | d | -1 | | | 0 | d | -1 | | + | 0 | d | 2 | | + | 0 | d | 2 | | + | 0 | d | 2 | | + | 0 | d | 2 | | | 0 | d | 0 | | | 0 | d | 0 | | | 0 | d | 0 | | @@ -95,10 +98,6 @@ FROM arenadata_toolkit.tracking_get_track(); | 0 | d | 1 | | | 0 | d | 1 | | | 0 | d | 1 | | - | 0 | d | 2 | | - | 0 | d | 2 | | - | 0 | d | 2 | | - | 0 | d | 2 | | (16 rows) -- 8. Test actions on commit and rollback @@ -108,12 +107,12 @@ INSERT INTO arenadata_toolkit.tracking_t1 SELECT generate_series(1,100000); -- If the wrapping transaction rollbacks, the Bloom filter is not cleared up. BEGIN; SELECT relname, size, state, segid, relkind, relstorage -FROM arenadata_toolkit.tracking_get_track(); +FROM arenadata_toolkit.tables_track; relname | size | state | segid | relkind | relstorage -------------+--------+-------+-------+---------+------------ tracking_t1 | 0 | a | -1 | r | c - tracking_t1 | 134064 | a | 0 | r | c tracking_t1 | 133528 | a | 1 | r | c + tracking_t1 | 134064 | a | 0 | r | c tracking_t1 | 133064 | a | 2 | r | c (4 rows) @@ -121,7 +120,7 @@ ROLLBACK; -- If commits, filter is cleared. BEGIN; SELECT relname, size, state, segid, relkind, relstorage -FROM arenadata_toolkit.tracking_get_track(); +FROM arenadata_toolkit.tables_track; relname | size | state | segid | relkind | relstorage -------------+--------+-------+-------+---------+------------ tracking_t1 | 0 | a | -1 | r | c @@ -132,51 +131,51 @@ FROM arenadata_toolkit.tracking_get_track(); COMMIT; SELECT relname, size, state, segid, relkind, relstorage -FROM arenadata_toolkit.tracking_get_track(); +FROM arenadata_toolkit.tables_track; relname | size | state | segid | relkind | relstorage ---------+------+-------+-------+---------+------------ (0 rows) --- 9. Test repetitive track call within the same transaction. In case of --- rollback only first changes should be present. +-- 9. Test repetitive track call within the same transaction. All the +-- calls should return the same relation set. INSERT INTO arenadata_toolkit.tracking_t1 SELECT generate_series(1,10000); BEGIN; SELECT relname, size, state, segid, relkind, relstorage -FROM arenadata_toolkit.tracking_get_track(); +FROM arenadata_toolkit.tables_track; relname | size | state | segid | relkind | relstorage -------------+--------+-------+-------+---------+------------ + tracking_t1 | 146096 | a | 2 | r | c tracking_t1 | 147576 | a | 0 | r | c tracking_t1 | 147112 | a | 1 | r | c - tracking_t1 | 146096 | a | 2 | r | c (3 rows) CREATE TABLE arenadata_toolkit.tracking_t2 (j BIGINT) DISTRIBUTED BY (j); INSERT INTO arenadata_toolkit.tracking_t2 SELECT generate_series(1,10000); INSERT INTO arenadata_toolkit.tracking_t1 SELECT generate_series(1,10000); SELECT relname, size, state, segid, relkind, relstorage -FROM arenadata_toolkit.tracking_get_track(); +FROM arenadata_toolkit.tables_track; relname | size | state | segid | relkind | relstorage -------------+--------+-------+-------+---------+------------ - tracking_t1 | 161088 | a | 0 | r | c - tracking_t2 | 229376 | a | 0 | r | h - tracking_t1 | 160696 | a | 1 | r | c tracking_t2 | 229376 | a | 1 | r | h - tracking_t1 | 159128 | a | 2 | r | c + tracking_t1 | 160696 | a | 1 | r | c + tracking_t2 | 229376 | a | 0 | r | h + tracking_t1 | 161088 | a | 0 | r | c tracking_t2 | 229376 | a | 2 | r | h + tracking_t1 | 159128 | a | 2 | r | c (6 rows) ROLLBACK; SELECT relname, size, state, segid, relkind, relstorage -FROM arenadata_toolkit.tracking_get_track(); +FROM arenadata_toolkit.tables_track; relname | size | state | segid | relkind | relstorage -------------+--------+-------+-------+---------+------------ | 0 | d | -1 | | - tracking_t1 | 161088 | a | 0 | r | c | 0 | d | 0 | | - tracking_t1 | 160696 | a | 1 | r | c - | 0 | d | 1 | | - tracking_t1 | 159128 | a | 2 | r | c + tracking_t1 | 161088 | a | 0 | r | c | 0 | d | 2 | | + tracking_t1 | 159128 | a | 2 | r | c + | 0 | d | 1 | | + tracking_t1 | 160696 | a | 1 | r | c (7 rows) -- 10. Test relkind filtering. @@ -199,17 +198,17 @@ SELECT arenadata_toolkit.tracking_set_relkinds('o,i'); (1 row) SELECT size, state, segid, relkind, relstorage -FROM arenadata_toolkit.tracking_get_track(); +FROM arenadata_toolkit.tables_track; size | state | segid | relkind | relstorage ---------+-------+-------+---------+------------ 32768 | a | -1 | i | h 32768 | a | -1 | i | h - 1638400 | a | 0 | i | h - 65536 | a | 0 | i | h - 1638400 | a | 1 | i | h 65536 | a | 1 | i | h - 1638400 | a | 2 | i | h + 1638400 | a | 1 | i | h 65536 | a | 2 | i | h + 1638400 | a | 2 | i | h + 65536 | a | 0 | i | h + 1638400 | a | 0 | i | h (8 rows) DROP TABLE arenadata_toolkit.tracking_t1; diff --git a/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_tracking.sql b/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_tracking.sql index 8f6953a47e7b..b332bc41cb7b 100644 --- a/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_tracking.sql +++ b/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_tracking.sql @@ -16,7 +16,7 @@ CREATE DATABASE tracking_db1; CREATE EXTENSION arenadata_toolkit; -- 1. Test getting track on not registered database; -SELECT * FROM arenadata_toolkit.tracking_get_track(); +SELECT * FROM arenadata_toolkit.tables_track; SELECT pg_sleep(current_setting('arenadata_toolkit.tracking_worker_naptime_sec')::int * 2); SELECT arenadata_toolkit.tracking_register_db(); @@ -28,8 +28,8 @@ SELECT is_triggered FROM arenadata_toolkit.is_initial_snapshot_triggered; -- 3. If user hasn't registered any schema, the default schemas are used. -- See arenadata_toolkit_guc.c. At commit the bloom filter is cleared. The next --- call of tracking_get_track() will return nothing if database is not modified in between. -SELECT count(*) FROM arenadata_toolkit.tracking_get_track(); +-- track acquisition will return nothing if database is not modified in between. +SELECT count(*) FROM arenadata_toolkit.tables_track; -- 4. Create table in specific schema and register that schema. CREATE TABLE arenadata_toolkit.tracking_t1 (i INT) @@ -39,17 +39,17 @@ SELECT arenadata_toolkit.tracking_register_schema('arenadata_toolkit'); -- Getting the track. Only created table with size 0 is expected; SELECT relname, size, state, segid, relkind, relstorage -FROM arenadata_toolkit.tracking_get_track(); +FROM arenadata_toolkit.tables_track; -- 5. Test data extending event. Bloom should capture it. INSERT INTO arenadata_toolkit.tracking_t1 SELECT generate_series(1,100000); SELECT relname, size, state, segid, relkind, relstorage -FROM arenadata_toolkit.tracking_get_track(); +FROM arenadata_toolkit.tables_track; -- 6. Dropping table. The track shows only relfilenodes without names and other additional info with status 'd'. DROP TABLE arenadata_toolkit.tracking_t1; SELECT relname, size, state, segid, relkind, relstorage -FROM arenadata_toolkit.tracking_get_track(); +FROM arenadata_toolkit.tables_track; -- 8. Test actions on commit and rollback CREATE TABLE arenadata_toolkit.tracking_t1 (i INT) @@ -59,33 +59,33 @@ INSERT INTO arenadata_toolkit.tracking_t1 SELECT generate_series(1,100000); -- If the wrapping transaction rollbacks, the Bloom filter is not cleared up. BEGIN; SELECT relname, size, state, segid, relkind, relstorage -FROM arenadata_toolkit.tracking_get_track(); +FROM arenadata_toolkit.tables_track; ROLLBACK; -- If commits, filter is cleared. BEGIN; SELECT relname, size, state, segid, relkind, relstorage -FROM arenadata_toolkit.tracking_get_track(); +FROM arenadata_toolkit.tables_track; COMMIT; SELECT relname, size, state, segid, relkind, relstorage -FROM arenadata_toolkit.tracking_get_track(); +FROM arenadata_toolkit.tables_track; --- 9. Test repetitive track call within the same transaction. In case of --- rollback only first changes should be present. +-- 9. Test repetitive track call within the same transaction. All the +-- calls should return the same relation set. INSERT INTO arenadata_toolkit.tracking_t1 SELECT generate_series(1,10000); BEGIN; SELECT relname, size, state, segid, relkind, relstorage -FROM arenadata_toolkit.tracking_get_track(); +FROM arenadata_toolkit.tables_track; CREATE TABLE arenadata_toolkit.tracking_t2 (j BIGINT) DISTRIBUTED BY (j); INSERT INTO arenadata_toolkit.tracking_t2 SELECT generate_series(1,10000); INSERT INTO arenadata_toolkit.tracking_t1 SELECT generate_series(1,10000); SELECT relname, size, state, segid, relkind, relstorage -FROM arenadata_toolkit.tracking_get_track(); +FROM arenadata_toolkit.tables_track; ROLLBACK; SELECT relname, size, state, segid, relkind, relstorage -FROM arenadata_toolkit.tracking_get_track(); +FROM arenadata_toolkit.tables_track; -- 10. Test relkind filtering. CREATE TABLE arenadata_toolkit.tracking_t1 (i INT) @@ -98,7 +98,7 @@ SELECT arenadata_toolkit.tracking_register_schema('pg_aoseg'); SELECT arenadata_toolkit.tracking_set_relkinds('o,i'); SELECT size, state, segid, relkind, relstorage -FROM arenadata_toolkit.tracking_get_track(); +FROM arenadata_toolkit.tables_track; DROP TABLE arenadata_toolkit.tracking_t1; diff --git a/gpcontrib/arenadata_toolkit/src/bloom.c b/gpcontrib/arenadata_toolkit/src/bloom.c index 509f0e6de04a..38c2f8ea9cc0 100644 --- a/gpcontrib/arenadata_toolkit/src/bloom.c +++ b/gpcontrib/arenadata_toolkit/src/bloom.c @@ -1,17 +1,24 @@ /* * Simple bloom filter without using postgres primitives. */ + +#include + +#include "arenadata_toolkit_guc.h" #include "bloom.h" #include "hashimpl.h" #include "tf_shmem.h" +uint64 bloom_hash_seed; +int bloom_hash_num; + static inline uint32 mod_m(uint32 val, uint64 m) { - Assert(m <= PG_UINT32_MAX + UINT64CONST(1)); - Assert(((m - 1) & m) == 0); - - return val & (m - 1); + if (((m - 1) & m) == 0) + return val & (m - 1); + else + return val % m; } /* @@ -85,7 +92,7 @@ bloom_isset(bloom_t * bloom, Oid relnode) for (int i = 0; i < bloom_hash_num; ++i) { - if (!(bloom->map[hashes[i] >> 3] & (1 << (hashes[i] & 7)))) + if (!(bloom->current_bloom[hashes[i] >> 3] & (1 << (hashes[i] & 7)))) return false; } return true; @@ -110,7 +117,7 @@ bloom_set_bits(bloom_t * bloom, Oid relnode) tracking_hashes(relnode, bloom->size, hashes); for (int i = 0; i < bloom_hash_num; ++i) { - bloom->map[hashes[i] >> 3] |= 1 << (hashes[i] & 7); + bloom->current_bloom[hashes[i] >> 3] |= 1 << (hashes[i] & 7); } } @@ -118,36 +125,98 @@ void bloom_init(const uint32 bloom_size, bloom_t * bloom) { bloom->size = bloom_size; + bloom->current_bloom = bloom->map; bloom_clear(bloom); } +/* + * Initialize optimal Bloom filter parameters + * + * This function calculates and sets optimal parameters for the Bloom filter + * based on established widespread principles. + * + * Calculates the optimal number of hash functions using the formula: + * k = (m/n)ln(2), which minimizes the false positive probability + * p = (1 - e^(-kn/m))^k. + * where: + * - m = total_bits (size of bit array) + * - n = TOTAL_ELEMENTS (expected number of insertions) + * + * Initializes bloom_hash_seed with a random value to prevent deterministic + * hash collisions and ensure independent hash distributions across runs. + */ +void +init_bloom_invariants(void) +{ + int k = rint(log(2.0) * (bloom_size * 8) / TOTAL_ELEMENTS); + + bloom_hash_num = Max(1, Min(k, MAX_BLOOM_HASH_FUNCS)); + bloom_hash_seed = (uint64) random(); +} + void bloom_set_all(bloom_t * bloom) { - memset(bloom->map, 0xFF, bloom->size); + memset(bloom->current_bloom, 0xFF, bloom->size); bloom->is_set_all = 1; } void bloom_clear(bloom_t * bloom) { - memset(bloom->map, 0, bloom->size); + memset(bloom->current_bloom, 0, bloom->size); bloom->is_set_all = 0; } void bloom_merge(bloom_t * dst, bloom_t * src) { - for (uint32_t i = 0; i < dst->size; i++) - dst->map[i] |= src->map[i]; if (src->is_set_all) + { + memset(dst->current_bloom, 0xFF, dst->size); dst->is_set_all = src->is_set_all; + return; + } + + for (uint32 i = 0; i < dst->size; i++) + dst->current_bloom[i] |= src->current_bloom[i]; } void -bloom_copy(bloom_t * src, bloom_t * dest) +bloom_copy(bloom_t * dest, bloom_t * src) { dest->size = src->size; - memcpy(dest->map, src->map, src->size); + memcpy(dest->current_bloom, src->current_bloom, src->size); dest->is_set_all = src->is_set_all; } + +void +bloom_switch_current(bloom_t * bloom) +{ + uint8 *map_base = bloom->map; + uint8 *map_off = bloom->map + bloom->size; + + bloom->current_bloom = (bloom->current_bloom == map_base) ? map_off : map_base; + bloom->is_set_all = false; +} + +uint8 * +bloom_get_other(bloom_t * bloom) +{ + uint8 *map_base = bloom->map; + uint8 *map_off = bloom->map + bloom->size; + + return (bloom->current_bloom == map_base) ? map_off : map_base; +} + +void +bloom_merge_internal(bloom_t * bloom) +{ + if (bloom->is_set_all) + return; + + uint8 *bloom_other = bloom_get_other(bloom); + + for (uint32 i = 0; i < bloom->size; i++) + bloom->current_bloom[i] |= bloom_other[i]; +} diff --git a/gpcontrib/arenadata_toolkit/src/bloom_set.c b/gpcontrib/arenadata_toolkit/src/bloom_set.c index a3e83483ea61..2580277657c6 100644 --- a/gpcontrib/arenadata_toolkit/src/bloom_set.c +++ b/gpcontrib/arenadata_toolkit/src/bloom_set.c @@ -2,10 +2,24 @@ * Set of blooms. Main entry point to find a bloom and work with it. * Used to track create, extend, truncate events. */ +#include "arenadata_toolkit_guc.h" #include "bloom_set.h" #include "tf_shmem.h" -#define BLOOM_ENTRY_GET(set, i) (void *)(set->bloom_entries + i * FULL_BLOOM_ENTRY_SIZE(set->bloom_size)); +LWLock *bloom_set_lock; +tf_entry_lock_t bloom_locks[MAX_DB_TRACK_COUNT]; + +static inline Size +bloom_entry_size(uint32 size) +{ + return (offsetof(bloom_entry_t, bloom) + full_bloom_size(2 * size)); +} + +static inline void * +bloom_entry_get(bloom_set_t * set, int idx) +{ + return (void *) ((uint8 *) set->bloom_entries + idx * bloom_entry_size(set->bloom_size)); +} /* * bloom_set api assumes that we are working with the single bloom set. @@ -26,9 +40,26 @@ static void bloom_entry_init(const uint32_t bloom_size, bloom_entry_t * bloom_entry) { bloom_entry->dbid = InvalidOid; + bloom_entry->master_version = InvalidVersion; + bloom_entry->work_version = InvalidVersion; bloom_init(bloom_size, &bloom_entry->bloom); } +/* + * Separate initialization of LWLocks; + */ +static void +init_lwlocks(void) +{ + bloom_set_lock = LWLockAssign(); + + for (int i = 0; i < db_track_count; ++i) + { + bloom_locks[i].lock = LWLockAssign(); + bloom_locks[i].dbid = InvalidOid; + } +} + void bloom_set_init(const uint32_t bloom_count, const uint32_t bloom_size) { @@ -39,10 +70,19 @@ bloom_set_init(const uint32_t bloom_count, const uint32_t bloom_size) for (uint32_t i = 0; i < bloom_count; i++) { - bloom_entry_t *bloom_entry = BLOOM_ENTRY_GET(bloom_set, i); + bloom_entry_t *bloom_entry = bloom_entry_get(bloom_set, i); bloom_entry_init(bloom_size, bloom_entry); } + + init_lwlocks(); + init_bloom_invariants(); +} + +Size +bloom_set_required_size(uint32 size, int count) +{ + return (offsetof(bloom_set_t, bloom_entries) + count * bloom_entry_size(size)); } /* @@ -57,7 +97,7 @@ find_bloom_entry(Oid dbid) for (i = 0; i < bloom_set->bloom_count; i++) { - bloom_entry = BLOOM_ENTRY_GET(bloom_set, i); + bloom_entry = bloom_entry_get(bloom_set, i); if (bloom_entry->dbid == dbid) break; } @@ -90,6 +130,9 @@ bloom_set_bind(Oid dbid) return false; } bloom_entry->dbid = dbid; + bloom_entry->master_version = StartVersion; + bloom_entry->work_version = StartVersion; + pg_atomic_init_flag(&bloom_entry->capture_in_progress); LWLockBindEntry(dbid); LWLockRelease(bloom_set_lock); @@ -103,31 +146,20 @@ bloom_set_bind(Oid dbid) bool bloom_set_trigger_bits(Oid dbid, bool on) { - bloom_entry_t *bloom_entry; - LWLock *entry_lock; - - bloom_set_check_state(); + bloom_op_ctx_t ctx = bloom_set_get_entry(dbid, LW_SHARED, LW_EXCLUSIVE); - LWLockAcquire(bloom_set_lock, LW_SHARED); - entry_lock = LWLockAcquireEntry(dbid, LW_EXCLUSIVE); - bloom_entry = find_bloom_entry(dbid); - if (bloom_entry) + if (ctx.entry) { if (on) - bloom_set_all(&bloom_entry->bloom); + bloom_set_all(&ctx.entry->bloom); else - bloom_clear(&bloom_entry->bloom); - if (entry_lock) - LWLockRelease(entry_lock); - LWLockRelease(bloom_set_lock); + bloom_clear(&ctx.entry->bloom); + + bloom_set_release(&ctx); return true; } - if (entry_lock) - LWLockRelease(entry_lock); - LWLockRelease(bloom_set_lock); - if (bloom_entry == NULL) - elog(LOG, "[arenadata toolkit] tracking_initial_snapshot Bloom filter not found"); + bloom_set_release(&ctx); return false; } @@ -157,47 +189,31 @@ bloom_set_unbind(Oid dbid) void bloom_set_set(Oid dbid, Oid relNode) { - bloom_entry_t *bloom_entry; - LWLock *entry_lock; - - bloom_set_check_state(); + bloom_op_ctx_t ctx = bloom_set_get_entry(dbid, LW_SHARED, LW_EXCLUSIVE); - LWLockAcquire(bloom_set_lock, LW_SHARED); - entry_lock = LWLockAcquireEntry(dbid, LW_EXCLUSIVE); - bloom_entry = find_bloom_entry(dbid); - if (bloom_entry) + if (ctx.entry) { - bloom_set_bits(&bloom_entry->bloom, relNode); + bloom_set_bits(&ctx.entry->bloom, relNode); } - if (entry_lock) - LWLockRelease(entry_lock); - LWLockRelease(bloom_set_lock); + bloom_set_release(&ctx); + } -/* Find bloom by dbid, copy all bytes to new filter, clear old (but keep it) */ +/* Find bloom by dbid, copy all bytes to new filter */ bool bloom_set_move(Oid dbid, bloom_t * dest) { - bloom_entry_t *bloom_entry; - LWLock *entry_lock; - - bloom_set_check_state(); + bloom_op_ctx_t ctx = bloom_set_get_entry(dbid, LW_SHARED, LW_EXCLUSIVE); - LWLockAcquire(bloom_set_lock, LW_SHARED); - entry_lock = LWLockAcquireEntry(dbid, LW_EXCLUSIVE); - bloom_entry = find_bloom_entry(dbid); - if (bloom_entry) + if (ctx.entry) { - bloom_copy(&bloom_entry->bloom, dest); - bloom_clear(&bloom_entry->bloom); - if (entry_lock) - LWLockRelease(entry_lock); - LWLockRelease(bloom_set_lock); + bloom_copy(dest, &ctx.entry->bloom); + bloom_clear(&ctx.entry->bloom); + bloom_set_release(&ctx); return true; } - if (entry_lock) - LWLockRelease(entry_lock); - LWLockRelease(bloom_set_lock); + + bloom_set_release(&ctx); return false; } @@ -206,28 +222,18 @@ bloom_set_move(Oid dbid, bloom_t * dest) bool bloom_set_merge(Oid dbid, bloom_t * from) { - bloom_entry_t *bloom_entry; - LWLock *entry_lock; - - bloom_set_check_state(); - if (!from) return false; - LWLockAcquire(bloom_set_lock, LW_SHARED); - entry_lock = LWLockAcquireEntry(dbid, LW_EXCLUSIVE); - bloom_entry = find_bloom_entry(dbid); - if (bloom_entry) + bloom_op_ctx_t ctx = bloom_set_get_entry(dbid, LW_SHARED, LW_EXCLUSIVE); + + if (ctx.entry) { - bloom_merge(&bloom_entry->bloom, from); - if (entry_lock) - LWLockRelease(entry_lock); - LWLockRelease(bloom_set_lock); + bloom_merge(&ctx.entry->bloom, from); + bloom_set_release(&ctx); return true; } - if (entry_lock) - LWLockRelease(entry_lock); - LWLockRelease(bloom_set_lock); + bloom_set_release(&ctx); return false; } @@ -235,22 +241,97 @@ bloom_set_merge(Oid dbid, bloom_t * from) bool bloom_set_is_all_bits_triggered(Oid dbid) { - bloom_entry_t *bloom_entry; bool is_triggered = false; - LWLock *entry_lock; + bloom_op_ctx_t ctx = bloom_set_get_entry(dbid, LW_SHARED, LW_SHARED); + + if (ctx.entry) + { + is_triggered = ctx.entry->bloom.is_set_all; + } + + bloom_set_release(&ctx); + + return is_triggered; +} + +bloom_op_ctx_t +bloom_set_get_entry(Oid dbid, LWLockMode s_mode, LWLockMode e_mode) +{ + bloom_op_ctx_t ctx = {0}; bloom_set_check_state(); - LWLockAcquire(bloom_set_lock, LW_SHARED); - entry_lock = LWLockAcquireEntry(dbid, LW_SHARED); - bloom_entry = find_bloom_entry(dbid); - if (bloom_entry) + LWLockAcquire(bloom_set_lock, s_mode); + ctx.entry_lock = LWLockAcquireEntry(dbid, e_mode); + ctx.entry = find_bloom_entry(dbid); + ctx.set_lock = bloom_set_lock; + + return ctx; +} +void +bloom_set_release(bloom_op_ctx_t * ctx) +{ + if (ctx->entry_lock) + LWLockRelease(ctx->entry_lock); + LWLockRelease(ctx->set_lock); +} + +/* + * Acquire lock corresponding to dbid in bloom_set. + */ +LWLock * +LWLockAcquireEntry(Oid dbid, LWLockMode mode) +{ + for (int i = 0; i < db_track_count; ++i) { - is_triggered = bloom_entry->bloom.is_set_all; + if (bloom_locks[i].dbid == dbid) + { + LWLockAcquire(bloom_locks[i].lock, mode); + return bloom_locks[i].lock; + } } - if (entry_lock) - LWLockRelease(entry_lock); - LWLockRelease(bloom_set_lock); - return is_triggered; + return NULL; +} + +/* + * Bind LWLock to tracked dbid. + */ +void +LWLockBindEntry(Oid dbid) +{ + int i; + + for (i = 0; i < db_track_count; ++i) + { + if (bloom_locks[i].dbid == InvalidOid) + { + bloom_locks[i].dbid = dbid; + break; + } + } + + if (i == db_track_count && pg_atomic_unlocked_test_flag(&tf_shared_state->tracking_error)) + pg_atomic_test_set_flag(&tf_shared_state->tracking_error); +} + +/* + * Unbind LWLock from tracked dbid. + */ +void +LWLockUnbindEntry(Oid dbid) +{ + int i; + + for (i = 0; i < db_track_count; ++i) + { + if (bloom_locks[i].dbid == dbid) + { + bloom_locks[i].dbid = InvalidOid; + break; + } + } + + if (i == db_track_count && pg_atomic_unlocked_test_flag(&tf_shared_state->tracking_error)) + pg_atomic_test_set_flag(&tf_shared_state->tracking_error); } diff --git a/gpcontrib/arenadata_toolkit/src/drops_track.c b/gpcontrib/arenadata_toolkit/src/drops_track.c index dc7001669313..1a4ebc732a90 100644 --- a/gpcontrib/arenadata_toolkit/src/drops_track.c +++ b/gpcontrib/arenadata_toolkit/src/drops_track.c @@ -41,8 +41,8 @@ typedef struct } drops_track_t; static shmem_startup_hook_type next_shmem_startup_hook = NULL; -static drops_track_t * drops_track; -LWLock *drops_track_lock; +static drops_track_t *drops_track; +LWLock *drops_track_lock; static Size drops_track_calc_size() @@ -104,8 +104,9 @@ drops_track_deinit(void) shmem_startup_hook = next_shmem_startup_hook; } -/* Find unused node i linked list. */ -static drops_track_node_t * find_empty_node() +/* Find unused node in linked list. */ +static drops_track_node_t * +find_empty_node() { drops_track_node_t *track_node = NULL; diff --git a/gpcontrib/arenadata_toolkit/src/file_hook.c b/gpcontrib/arenadata_toolkit/src/file_hook.c index a26181c1f07f..344678a7cfb8 100644 --- a/gpcontrib/arenadata_toolkit/src/file_hook.c +++ b/gpcontrib/arenadata_toolkit/src/file_hook.c @@ -76,7 +76,7 @@ hook_create(RelFileNodeBackend rnode) if (!non_committed_bloom) { non_committed_bloom = - MemoryContextAlloc(TopMemoryContext, FULL_BLOOM_SIZE(bloom_size)); + MemoryContextAlloc(TopMemoryContext, full_bloom_size(bloom_size)); bloom_init(bloom_size, non_committed_bloom); non_committed_dbid = rnode.node.dbNode; RegisterXactCallbackOnce(xact_end_create_callback, NULL); diff --git a/gpcontrib/arenadata_toolkit/src/include/bloom.h b/gpcontrib/arenadata_toolkit/src/include/bloom.h index 24cf2da5386d..986e2c9f37ad 100644 --- a/gpcontrib/arenadata_toolkit/src/include/bloom.h +++ b/gpcontrib/arenadata_toolkit/src/include/bloom.h @@ -7,21 +7,34 @@ #define MAX_BLOOM_HASH_FUNCS 6 #define TOTAL_ELEMENTS 10000000UL -#define FULL_BLOOM_SIZE(size) (offsetof(bloom_t, map) + size) typedef struct { - uint32_t size; /* size in bytes of 'map' */ + uint8 *current_bloom; + uint32 size; /* equal to bloom_size, half a map size */ uint8 is_set_all; /* indicates all bits are set */ uint8 map[FLEXIBLE_ARRAY_MEMBER]; /* core bit array */ ; } bloom_t; +static inline Size +full_bloom_size(uint32 size) +{ + return (offsetof(bloom_t, map) + size); +} + +extern uint64 bloom_hash_seed; +extern int bloom_hash_num; + void bloom_init(const uint32 bloom_size, bloom_t * bloom); +void init_bloom_invariants(void); bool bloom_isset(bloom_t * bloom, Oid relnode); void bloom_set_bits(bloom_t * bloom, Oid relnode); void bloom_set_all(bloom_t * bloom); void bloom_clear(bloom_t * bloom); void bloom_merge(bloom_t * dst, bloom_t * src); void bloom_copy(bloom_t * src, bloom_t * dest); +void bloom_switch_current(bloom_t * bloom); +uint8 *bloom_get_other(bloom_t * bloom); +void bloom_merge_internal(bloom_t * bloom); #endif /* BLOOM_H */ diff --git a/gpcontrib/arenadata_toolkit/src/include/bloom_set.h b/gpcontrib/arenadata_toolkit/src/include/bloom_set.h index d5ee800a353f..1e99c3228082 100644 --- a/gpcontrib/arenadata_toolkit/src/include/bloom_set.h +++ b/gpcontrib/arenadata_toolkit/src/include/bloom_set.h @@ -2,16 +2,25 @@ #define BLOOM_SET_H #include "postgres.h" +#include "storage/lwlock.h" +#include "port/atomics.h" #include "bloom.h" -#define FULL_BLOOM_ENTRY_SIZE(size) (offsetof(bloom_entry_t, bloom) + FULL_BLOOM_SIZE(size)) -#define FULL_BLOOM_SET_SIZE(size, count) (offsetof(bloom_set_t, bloom_entries) + FULL_BLOOM_ENTRY_SIZE(size) * count) +#define InvalidVersion ((uint32) 0) +#define ControlVersion ((uint32) 1) +#define StartVersion ((uint32) 2) /* Bloom set entry. */ typedef struct { Oid dbid; /* dbid of tracked database or InvalidOid */ + uint32 master_version; /* Auxiliary counter, which is send from + * master to support transaction semantics */ + uint32 work_version; /* Auxiliary counter which represents current + * state of bloom filter */ + pg_atomic_flag capture_in_progress; /* indicates whether tracking + * acquisition in progress */ bloom_t bloom; /* bloom filter itself */ } bloom_entry_t; @@ -20,10 +29,30 @@ typedef struct { uint8 bloom_count; /* count of bloom_entry_t in bloom_entries */ uint32 bloom_size; /* size of bloom filter */ - char bloom_entries[FLEXIBLE_ARRAY_MEMBER]; /* array of + bloom_entry_t bloom_entries[FLEXIBLE_ARRAY_MEMBER]; /* array of * bloom_entry_t */ } bloom_set_t; +typedef struct +{ + LWLock *set_lock; + LWLock *entry_lock; + bloom_entry_t *entry; +} bloom_op_ctx_t; + +/* + * Locks on each bloom_entry_t in bloom_set. + */ +typedef struct +{ + Oid dbid; + LWLock *lock; +} tf_entry_lock_t; + +extern LWLock *bloom_set_lock; +extern tf_entry_lock_t bloom_locks[]; + +Size bloom_set_required_size(uint32 size, int count); void bloom_set_init(const uint32 bloom_count, const uint32 bloom_size); bool bloom_set_bind(Oid dbid); void bloom_set_unbind(Oid dbid); @@ -32,5 +61,10 @@ bool bloom_set_move(Oid dbid, bloom_t * dest); bool bloom_set_merge(Oid dbid, bloom_t * from); bool bloom_set_trigger_bits(Oid dbid, bool on); bool bloom_set_is_all_bits_triggered(Oid dbid); +bloom_op_ctx_t bloom_set_get_entry(Oid dbid, LWLockMode s_mode, LWLockMode e_mode); +void bloom_set_release(bloom_op_ctx_t * ctx); +LWLock *LWLockAcquireEntry(Oid dbid, LWLockMode mode); +void LWLockBindEntry(Oid dbid); +void LWLockUnbindEntry(Oid dbid); #endif /* BLOOM_SET_H */ diff --git a/gpcontrib/arenadata_toolkit/src/include/tf_shmem.h b/gpcontrib/arenadata_toolkit/src/include/tf_shmem.h index 73f8ad42c78c..603fd7d4505e 100644 --- a/gpcontrib/arenadata_toolkit/src/include/tf_shmem.h +++ b/gpcontrib/arenadata_toolkit/src/include/tf_shmem.h @@ -6,13 +6,10 @@ #include "bloom_set.h" -/* maximum number of hash functions that can be used in Bloom filter */ -#define MAX_BLOOM_HASH_FUNCS 6 - /* * Stores the Bloom filter in shared memory. * tracking_is_initialized - a flag indicating - * bgworker binded dbids at startup/recovery. + * bgworker bound dbids at startup/recovery. * * tracking_error - sign of any internal error. If set, * blocks the work of track_getting procedure until cluster restart. @@ -26,25 +23,9 @@ typedef struct bloom_set_t bloom_set; } tf_shared_state_t; -/* - * Locks on each bloom_entry_t in bloom_set. - */ -typedef struct -{ - Oid dbid; - LWLock *lock; -} tf_entry_lock_t; - extern tf_shared_state_t *tf_shared_state; -extern LWLock *bloom_set_lock; -extern tf_entry_lock_t bloom_locks[]; -extern uint64 bloom_hash_seed; -extern int bloom_hash_num; void tf_shmem_init(void); void tf_shmem_deinit(void); -LWLock *LWLockAcquireEntry(Oid dbid, LWLockMode mode); -void LWLockBindEntry(Oid dbid); -void LWLockUnbindEntry(Oid dbid); #endif /* TF_SHMEM_H */ diff --git a/gpcontrib/arenadata_toolkit/src/tf_shmem.c b/gpcontrib/arenadata_toolkit/src/tf_shmem.c index 3281b8fec1f0..b5fd78214263 100644 --- a/gpcontrib/arenadata_toolkit/src/tf_shmem.c +++ b/gpcontrib/arenadata_toolkit/src/tf_shmem.c @@ -6,54 +6,8 @@ #include "arenadata_toolkit_guc.h" -#include - static shmem_startup_hook_type next_shmem_startup_hook = NULL; tf_shared_state_t *tf_shared_state = NULL; -LWLock *bloom_set_lock; -tf_entry_lock_t bloom_locks[MAX_DB_TRACK_COUNT]; -uint64 bloom_hash_seed; -int bloom_hash_num; - -/* - * Separate initialization of LWLocks; - */ -static void -init_lwlocks(void) -{ - bloom_set_lock = LWLockAssign(); - - for (int i = 0; i < db_track_count; ++i) - { - bloom_locks[i].lock = LWLockAssign(); - bloom_locks[i].dbid = InvalidOid; - } -} - -/* - * Initialize optimal Bloom filter parameters - * - * This function calculates and sets optimal parameters for the Bloom filter - * based on established widespread principles. - * - * Calculates the optimal number of hash functions using the formula: - * k = (m/n)ln(2), which minimizes the false positive probability - * p = (1 - e^(-kn/m))^k. - * where: - * - m = total_bits (size of bit array) - * - n = TOTAL_ELEMENTS (expected number of insertions) - * - * Initializes bloom_hash_seed with a random value to prevent deterministic - * hash collisions and ensure independent hash distributions across runs. - */ -static void -init_bloom_invariants() -{ - int k = rint(log(2.0) * (bloom_size * 8) / TOTAL_ELEMENTS); - - bloom_hash_num = Max(1, Min(k, MAX_BLOOM_HASH_FUNCS)); - bloom_hash_seed = (uint64) random(); -} static Size tf_shmem_calc_size(void) @@ -61,7 +15,7 @@ tf_shmem_calc_size(void) Size size; size = offsetof(tf_shared_state_t, bloom_set); - size = add_size(size, FULL_BLOOM_SET_SIZE(bloom_size, db_track_count)); + size = add_size(size, bloom_set_required_size(bloom_size, db_track_count)); return size; } @@ -84,9 +38,6 @@ tf_shmem_hook(void) pg_atomic_init_flag(&tf_shared_state->tracking_error); bloom_set_init(db_track_count, bloom_size); - - init_lwlocks(); - init_bloom_invariants(); } LWLockRelease(AddinShmemInitLock); @@ -99,8 +50,7 @@ void tf_shmem_init() { /* - * tf_state_lock and bloom_set_lock locks - * plus one lock for each db entry. + * tf_state_lock and bloom_set_lock locks plus one lock for each db entry. */ RequestAddinLWLocks(2 + db_track_count); RequestAddinShmemSpace(tf_shmem_calc_size()); @@ -114,63 +64,3 @@ tf_shmem_deinit(void) { shmem_startup_hook = next_shmem_startup_hook; } - -/* - * Acquire lock corresponding to dbid in bloom_set. - */ -LWLock * -LWLockAcquireEntry(Oid dbid, LWLockMode mode) -{ - for (int i = 0; i < db_track_count; ++i) - { - if (bloom_locks[i].dbid == dbid) - { - LWLockAcquire(bloom_locks[i].lock, mode); - return bloom_locks[i].lock; - } - } - - return NULL; -} - -/* - * Bind LWLock to tracked dbid. - */ -void -LWLockBindEntry(Oid dbid) -{ - int i; - - for (i = 0; i < db_track_count; ++i) - { - if (bloom_locks[i].dbid == InvalidOid) - { - bloom_locks[i].dbid = dbid; - break; - } - } - - if (i == db_track_count && pg_atomic_unlocked_test_flag(&tf_shared_state->tracking_error)) - pg_atomic_test_set_flag(&tf_shared_state->tracking_error); -} - -/* - * Unbind LWLock from tracked dbid. - */ -void -LWLockUnbindEntry(Oid dbid) -{ - int i; - - for (i = 0; i < db_track_count; ++i) - { - if (bloom_locks[i].dbid == dbid) - { - bloom_locks[i].dbid = InvalidOid; - break; - } - } - - if (i == db_track_count && pg_atomic_unlocked_test_flag(&tf_shared_state->tracking_error)) - pg_atomic_test_set_flag(&tf_shared_state->tracking_error); -} diff --git a/gpcontrib/arenadata_toolkit/src/track_files.c b/gpcontrib/arenadata_toolkit/src/track_files.c index 038f56e4f3b1..155ad8d49652 100644 --- a/gpcontrib/arenadata_toolkit/src/track_files.c +++ b/gpcontrib/arenadata_toolkit/src/track_files.c @@ -43,15 +43,24 @@ PG_FUNCTION_INFO_V1(tracking_is_segment_initialized); PG_FUNCTION_INFO_V1(tracking_trigger_initial_snapshot); PG_FUNCTION_INFO_V1(tracking_is_initial_snapshot_triggered); PG_FUNCTION_INFO_V1(tracking_get_track); -PG_FUNCTION_INFO_V1(tracking_get_track_main); +PG_FUNCTION_INFO_V1(tracking_track_version); #define GET_TRACK_TUPDESC_LEN 9 +#define Anum_track_relid ((AttrNumber) 0) +#define Anum_track_name ((AttrNumber) 1) +#define Anum_track_relfilenode ((AttrNumber) 2) +#define Anum_track_size ((AttrNumber) 3) +#define Anum_track_state ((AttrNumber) 4) +#define Anum_track_gp_segment_id ((AttrNumber) 5) +#define Anum_track_gp_segment_relnamespace ((AttrNumber) 6) +#define Anum_track_gp_segment_relkind ((AttrNumber) 7) +#define Anum_track_gp_segment_relstorage ((AttrNumber) 8) /* Preserved state among the calls of tracking_get_track_main */ typedef struct { Relation pg_class_rel; /* pg_class relation */ - SysScanDesc scan; /* for scans of system table */ + SysScanDesc scan; /* for scans of system table */ } tf_main_func_state_t; /* @@ -61,8 +70,7 @@ typedef struct typedef struct { bloom_t *bloom; /* local copy of shared bloom */ - bloom_t *rollback_bloom; /* bloom for rollback in case of sequential - * track acquisition */ + List *drops; /* drop list for current db */ ListCell *next_drop; uint64 relkinds; /* tracking relkinds */ @@ -70,22 +78,11 @@ typedef struct List *schema_oids; /* tracking schemas */ } tf_get_global_state_t; -typedef struct -{ - CdbPgResults cdb_results; /* results of CdbDispatch */ - int current_result; - int current_row; - - SPITupleTable *entry_result; /* results from SPI queries*/ - uint64 entry_processed; - int entry_current_row; - - FmgrInfo *inputFuncInfos; /* FuncInfos for parse string to Datum values - * transformation when using CdbDispatch* */ - Oid *typIOParams; -} tf_get_func_state_t; +static tf_get_global_state_t tf_get_global_state = {0}; -tf_get_global_state_t tf_get_global_state = {0}; +static bool callbackRegistered = false; +static bool controlVersionUsed = false; +static TransactionId local_xid = InvalidTransactionId; static inline void tf_check_shmem_error(void) @@ -103,34 +100,40 @@ get_dbid(Oid dbid) } /* - * In case of abort bloom is merged back as well as drops track. + * If transaction called tracking_track_version commits, we + * can bump the track version, what leads to consistency with + * state on segments. In case of abort version on master differs from + * segment's and during track acquisition the previous + * filter is used on segments. */ static void -xact_end_get_callback(XactEvent event, void *arg) +xact_end_version_callback(XactEvent event, void *arg) { - if (event != XACT_EVENT_COMMIT && event != XACT_EVENT_ABORT) - return; - - if (tf_get_global_state.bloom == NULL) - return; + bloom_op_ctx_t ctx = bloom_set_get_entry(MyDatabaseId, LW_SHARED, LW_EXCLUSIVE); - if (event == XACT_EVENT_ABORT) + if (ctx.entry) { - if (tf_get_global_state.rollback_bloom) - bloom_set_merge(MyDatabaseId, tf_get_global_state.rollback_bloom); - else - bloom_set_merge(MyDatabaseId, tf_get_global_state.bloom); - drops_track_move_undo(tf_get_global_state.drops, MyDatabaseId); + if (event == XACT_EVENT_COMMIT) + ctx.entry->master_version++; + pg_atomic_clear_flag(&ctx.entry->capture_in_progress); } + bloom_set_release(&ctx); + + local_xid = InvalidTransactionId; + callbackRegistered = false; + controlVersionUsed = false; +} + +static void +xact_end_track_callback(XactEvent event, void *arg) +{ tf_get_global_state.bloom = NULL; - tf_get_global_state.rollback_bloom = NULL; tf_get_global_state.drops = NIL; tf_get_global_state.next_drop = NULL; tf_get_global_state.relkinds = 0; tf_get_global_state.relstorages = 0; tf_get_global_state.schema_oids = NIL; - } static List * @@ -183,7 +186,7 @@ list_to_bits(const char *input) while (token != NULL) { if (*token != '\0') - bits |= (1UL << (*token - 'A')); + bits |= (1ULL << (*token - 'A')); token = strtok(NULL, ","); } @@ -314,20 +317,21 @@ schema_is_tracked(Oid schema) static bool kind_is_tracked(char type, uint64 allowed_kinds) { - return (allowed_kinds & (1UL << (type - 'A'))) != 0; + return (allowed_kinds & (1ULL << (type - 'A'))) != 0; } /* - * Main logic for getting the size track. + * Main function for relation size track acquisition. */ Datum -tracking_get_track_main(PG_FUNCTION_ARGS) +tracking_get_track(PG_FUNCTION_ARGS) { FuncCallContext *funcctx; tf_main_func_state_t *state; HeapTuple result; Datum datums[GET_TRACK_TUPDESC_LEN]; bool nulls[GET_TRACK_TUPDESC_LEN] = {0}; + uint32 version = PG_GETARG_INT64(0); tf_check_shmem_error(); @@ -341,41 +345,72 @@ tracking_get_track_main(PG_FUNCTION_ARGS) funcctx = SRF_FIRSTCALL_INIT(); - RegisterXactCallbackOnce(xact_end_get_callback, NULL); + RegisterXactCallbackOnce(xact_end_track_callback, NULL); oldcontext = MemoryContextSwitchTo(CurTransactionContext); - if (tf_get_global_state.bloom == NULL) + bloom_op_ctx_t bloom_ctx = bloom_set_get_entry(MyDatabaseId, LW_SHARED, LW_EXCLUSIVE); + + if (bloom_ctx.entry == NULL) { - tf_get_global_state.bloom = palloc(FULL_BLOOM_SIZE(bloom_size)); - if (!bloom_set_move(MyDatabaseId, tf_get_global_state.bloom)) - ereport(ERROR, - (errcode(ERRCODE_GP_COMMAND_ERROR), - errmsg("database %u is not tracked", MyDatabaseId), - errhint("Call 'arenadata_toolkit.tracking_register_db()'" - "to enable tracking"))); + bloom_set_release(&bloom_ctx); + ereport(ERROR, + (errcode(ERRCODE_GP_COMMAND_ERROR), + errmsg("database %u is not tracked", MyDatabaseId), + errhint("Call 'arenadata_toolkit.tracking_register_db()'" + "to enable tracking"))); } - else + + /* + * If current bloom's version differs from incoming, we suppose that + * the rollback of previous track acquisition have occured. In this + * situation we merge previous filter to current active filter. + * + * If the ControlVersion comes, it means that track is acquired + * several times in the same transaction. And the same filter is used + * in this situation. + */ + if (version != ControlVersion && version != bloom_ctx.entry->work_version) { - /* - * This code is needed for the cases when there are several track - * requests within the same transaction. rollback_bloom stands for - * preserving initial filter state at the moment of the first - * function call within the transaction. - */ - if (tf_get_global_state.rollback_bloom == NULL) + bloom_merge_internal(&bloom_ctx.entry->bloom); + } + + /* + * This block handles 2 scenarios: + * 1. First track acquisition in transaction: + * - Copy current active bloom filter to local array. + * - Switch active bloom filter to preserve the state, which has just + * been copied. + * - Clear active filter. + * - Increment current version. + * 2. Subsequent track acquisition in same transaction (ControlVersion) + * - Temporarily switch to previous filter state + * - Copy switched bloom filter to local array + * - Switch back to active filter + * - Keep existing current version + */ + if (tf_get_global_state.bloom == NULL) + { + tf_get_global_state.bloom = palloc0(full_bloom_size(bloom_size)); + bloom_init(bloom_size, tf_get_global_state.bloom); + + if (version == ControlVersion) + { + bloom_switch_current(&bloom_ctx.entry->bloom); + } + + bloom_copy(tf_get_global_state.bloom, &bloom_ctx.entry->bloom); + bloom_switch_current(&bloom_ctx.entry->bloom); + + if (version != ControlVersion) { - tf_get_global_state.rollback_bloom = palloc(FULL_BLOOM_SIZE(bloom_size)); - bloom_copy(tf_get_global_state.bloom, tf_get_global_state.rollback_bloom); + bloom_clear(&bloom_ctx.entry->bloom); + bloom_ctx.entry->work_version = version + 1; } - bloom_clear(tf_get_global_state.bloom); - if (!bloom_set_move(MyDatabaseId, tf_get_global_state.bloom)) - ereport(ERROR, - (errcode(ERRCODE_GP_COMMAND_ERROR), - errmsg("database %u is not tracked", MyDatabaseId), - errhint("Call 'arenadata_toolkit.tracking_register_db()'" - "to enable tracking"))); } + + bloom_set_release(&bloom_ctx); + /* initial snapshot shouldn't return drops */ if (!tf_get_global_state.bloom->is_set_all) { @@ -400,15 +435,15 @@ tracking_get_track_main(PG_FUNCTION_ARGS) oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); funcctx->tuple_desc = CreateTemplateTupleDesc(GET_TRACK_TUPDESC_LEN, false); - TupleDescInitEntry(funcctx->tuple_desc, (AttrNumber) 1, "relid", OIDOID, -1, 0); - TupleDescInitEntry(funcctx->tuple_desc, (AttrNumber) 2, "name", NAMEOID, -1, 0); - TupleDescInitEntry(funcctx->tuple_desc, (AttrNumber) 3, "relfilenode", OIDOID, -1, 0); - TupleDescInitEntry(funcctx->tuple_desc, (AttrNumber) 4, "size", INT8OID, -1, 0); - TupleDescInitEntry(funcctx->tuple_desc, (AttrNumber) 5, "state", CHAROID, -1, 0); - TupleDescInitEntry(funcctx->tuple_desc, (AttrNumber) 6, "gp_segment_id", INT4OID, -1, 0); - TupleDescInitEntry(funcctx->tuple_desc, (AttrNumber) 7, "relnamespace", OIDOID, -1, 0); - TupleDescInitEntry(funcctx->tuple_desc, (AttrNumber) 8, "relkind", CHAROID, -1, 0); - TupleDescInitEntry(funcctx->tuple_desc, (AttrNumber) 9, "relstorage", CHAROID, -1, 0); + TupleDescInitEntry(funcctx->tuple_desc, Anum_track_relid + 1, "relid", OIDOID, -1, 0); + TupleDescInitEntry(funcctx->tuple_desc, Anum_track_name + 1, "name", NAMEOID, -1, 0); + TupleDescInitEntry(funcctx->tuple_desc, Anum_track_relfilenode + 1, "relfilenode", OIDOID, -1, 0); + TupleDescInitEntry(funcctx->tuple_desc, Anum_track_size + 1, "size", INT8OID, -1, 0); + TupleDescInitEntry(funcctx->tuple_desc, Anum_track_state + 1, "state", CHAROID, -1, 0); + TupleDescInitEntry(funcctx->tuple_desc, Anum_track_gp_segment_id + 1, "gp_segment_id", INT4OID, -1, 0); + TupleDescInitEntry(funcctx->tuple_desc, Anum_track_gp_segment_relnamespace + 1, "relnamespace", OIDOID, -1, 0); + TupleDescInitEntry(funcctx->tuple_desc, Anum_track_gp_segment_relkind + 1, "relkind", CHAROID, -1, 0); + TupleDescInitEntry(funcctx->tuple_desc, Anum_track_gp_segment_relstorage + 1, "relstorage", CHAROID, -1, 0); funcctx->tuple_desc = BlessTupleDesc(funcctx->tuple_desc); state = (tf_main_func_state_t *) palloc0(sizeof(tf_main_func_state_t)); @@ -447,32 +482,32 @@ tracking_get_track_main(PG_FUNCTION_ARGS) break; } - datums[7] = heap_getattr(pg_class_tuple, Anum_pg_class_relkind, RelationGetDescr(state->pg_class_rel), &nulls[7]); - relkind = DatumGetChar(datums[7]); + datums[Anum_track_gp_segment_relkind] = heap_getattr(pg_class_tuple, Anum_pg_class_relkind, RelationGetDescr(state->pg_class_rel), &nulls[7]); + relkind = DatumGetChar(datums[Anum_track_gp_segment_relkind]); if (!kind_is_tracked(relkind, tf_get_global_state.relkinds)) continue; - datums[8] = heap_getattr(pg_class_tuple, Anum_pg_class_relstorage, RelationGetDescr(state->pg_class_rel), &nulls[8]); - relstorage = DatumGetChar(datums[8]); + datums[Anum_track_gp_segment_relstorage] = heap_getattr(pg_class_tuple, Anum_pg_class_relstorage, RelationGetDescr(state->pg_class_rel), &nulls[8]); + relstorage = DatumGetChar(datums[Anum_track_gp_segment_relstorage]); if (!kind_is_tracked(relstorage, tf_get_global_state.relstorages)) continue; - datums[6] = heap_getattr(pg_class_tuple, Anum_pg_class_relnamespace, RelationGetDescr(state->pg_class_rel), &nulls[6]); - relnamespace = DatumGetObjectId(datums[6]); + datums[Anum_track_gp_segment_relnamespace] = heap_getattr(pg_class_tuple, Anum_pg_class_relnamespace, RelationGetDescr(state->pg_class_rel), &nulls[6]); + relnamespace = DatumGetObjectId(datums[Anum_track_gp_segment_relnamespace]); if (!schema_is_tracked(relnamespace)) continue; - datums[0] = ObjectIdGetDatum(HeapTupleGetOid(pg_class_tuple)); + datums[Anum_track_relid] = ObjectIdGetDatum(HeapTupleGetOid(pg_class_tuple)); - datums[1] = heap_getattr(pg_class_tuple, Anum_pg_class_relname, RelationGetDescr(state->pg_class_rel), &nulls[1]); + datums[Anum_track_name] = heap_getattr(pg_class_tuple, Anum_pg_class_relname, RelationGetDescr(state->pg_class_rel), &nulls[1]); - datums[2] = heap_getattr(pg_class_tuple, Anum_pg_class_relfilenode, RelationGetDescr(state->pg_class_rel), &nulls[2]); - filenode = DatumGetObjectId(datums[2]); + datums[Anum_track_relfilenode] = heap_getattr(pg_class_tuple, Anum_pg_class_relfilenode, RelationGetDescr(state->pg_class_rel), &nulls[2]); + filenode = DatumGetObjectId(datums[Anum_track_relfilenode]); - if (nulls[2]) + if (nulls[Anum_track_relfilenode]) continue; /* Bloom filter check */ @@ -481,9 +516,9 @@ tracking_get_track_main(PG_FUNCTION_ARGS) relp = (Form_pg_class) GETSTRUCT(pg_class_tuple); size = dbsize_calc_size(relp); - datums[3] = Int64GetDatum(size); - datums[4] = CharGetDatum(tf_get_global_state.bloom->is_set_all ? 'i' : 'a'); - datums[5] = Int32GetDatum(GpIdentity.segindex); + datums[Anum_track_size] = Int64GetDatum(size); + datums[Anum_track_state] = CharGetDatum(tf_get_global_state.bloom->is_set_all ? 'i' : 'a'); + datums[Anum_track_gp_segment_id] = Int32GetDatum(GpIdentity.segindex); result = heap_form_tuple(funcctx->tuple_desc, datums, nulls); @@ -500,160 +535,31 @@ tracking_get_track_main(PG_FUNCTION_ARGS) filenode = lfirst_oid(tf_get_global_state.next_drop); tf_get_global_state.next_drop = lnext(tf_get_global_state.next_drop); - nulls[0] = true; - nulls[1] = true; - datums[2] = filenode; - datums[3] = Int64GetDatum(0); - datums[4] = CharGetDatum('d'); - datums[5] = Int32GetDatum(GpIdentity.segindex); - nulls[6] = true; - nulls[7] = true; - nulls[8] = true; + nulls[Anum_track_relid] = true; + nulls[Anum_track_name] = true; + datums[Anum_track_relfilenode] = filenode; + datums[Anum_track_size] = Int64GetDatum(0); + datums[Anum_track_state] = CharGetDatum('d'); + datums[Anum_track_gp_segment_id] = Int32GetDatum(GpIdentity.segindex); + nulls[Anum_track_gp_segment_relnamespace] = true; + nulls[Anum_track_gp_segment_relkind] = true; + nulls[Anum_track_gp_segment_relstorage] = true; result = heap_form_tuple(funcctx->tuple_desc, datums, nulls); SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(result)); } - SRF_RETURN_DONE(funcctx); -} - -/* - * Function used in "arenadata_toolkit.tables_track" view. In order to keep bloom filter - * in consistent state across segments this function dispatches main tracking logic to the - * segments in a distributed transaction. - */ -Datum -tracking_get_track(PG_FUNCTION_ARGS) -{ - FuncCallContext *funcctx; - tf_get_func_state_t *state; - HeapTuple result; - Datum values[GET_TRACK_TUPDESC_LEN]; - bool nulls[GET_TRACK_TUPDESC_LEN] = {0}; - - tf_check_shmem_error(); - - if (SRF_IS_FIRSTCALL()) - { - MemoryContext oldcontext = CurrentMemoryContext; - - funcctx = SRF_FIRSTCALL_INIT(); - - /* - * If we use CdbDispatchCommandToSegments, we will face the problem - * that entry db slice won't be part of global transaction and - * immediately commits, killing the chance for bloom filter to - * restore. Therefore, the spi approach for retrieving track at -1 - * segment is chosen. - */ - if (SPI_connect() != SPI_OK_CONNECT) - ereport(ERROR, (errmsg("SPI_connect failed"))); - if (SPI_execute("SELECT * FROM arenadata_toolkit.tracking_get_track_main()", true, 0) != SPI_OK_SELECT) - ereport(ERROR, (errmsg("SPI_execute failed"))); - - MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); - - state = (tf_get_func_state_t *) palloc0(sizeof(tf_get_func_state_t)); - funcctx->user_fctx = (void *) state; - - state->entry_result = SPI_tuptable; - state->entry_processed = SPI_processed; - state->entry_current_row = 0; - - CdbDispatchCommand("SELECT * FROM arenadata_toolkit.tracking_get_track_main()", DF_NEED_TWO_PHASE | DF_CANCEL_ON_ERROR, - &state->cdb_results); - - state->current_result = 0; - state->current_row = 0; - - funcctx->tuple_desc = CreateTemplateTupleDesc(9, false); - TupleDescInitEntry(funcctx->tuple_desc, (AttrNumber) 1, "relid", OIDOID, -1, 0); - TupleDescInitEntry(funcctx->tuple_desc, (AttrNumber) 2, "name", NAMEOID, -1, 0); - TupleDescInitEntry(funcctx->tuple_desc, (AttrNumber) 3, "relfilenode", OIDOID, -1, 0); - TupleDescInitEntry(funcctx->tuple_desc, (AttrNumber) 4, "size", INT8OID, -1, 0); - TupleDescInitEntry(funcctx->tuple_desc, (AttrNumber) 5, "state", CHAROID, -1, 0); - TupleDescInitEntry(funcctx->tuple_desc, (AttrNumber) 6, "gp_segment_id", INT4OID, -1, 0); - TupleDescInitEntry(funcctx->tuple_desc, (AttrNumber) 7, "relnamespace", OIDOID, -1, 0); - TupleDescInitEntry(funcctx->tuple_desc, (AttrNumber) 8, "relkind", CHAROID, -1, 0); - TupleDescInitEntry(funcctx->tuple_desc, (AttrNumber) 9, "relstorage", CHAROID, -1, 0); - funcctx->tuple_desc = BlessTupleDesc(funcctx->tuple_desc); - - if (state->cdb_results.numResults > 0) - { - int natts = funcctx->tuple_desc->natts; - - state->inputFuncInfos = (FmgrInfo *) palloc0(natts * sizeof(FmgrInfo)); - state->typIOParams = (Oid *) palloc0(natts * sizeof(Oid)); - for (int i = 0; i < natts; i++) - { - Oid type = TupleDescAttr(funcctx->tuple_desc, i)->atttypid; - - getTypeInputInfo(type, &state->inputFuncInfos[i].fn_oid, &state->typIOParams[i]); - fmgr_info(state->inputFuncInfos[i].fn_oid, &state->inputFuncInfos[i]); - } - } - - MemoryContextSwitchTo(oldcontext); - } - - funcctx = SRF_PERCALL_SETUP(); - state = funcctx->user_fctx; - - if (state->entry_current_row < state->entry_processed) + if (tf_get_global_state.bloom) { - HeapTuple inputTuple = state->entry_result->vals[state->entry_current_row]; - TupleDesc inputTupleDesc = state->entry_result->tupdesc; - - for (int i = 0; i < funcctx->tuple_desc->natts; i++) - { - values[i] = SPI_getbinval(inputTuple, inputTupleDesc, i + 1, &nulls[i]); - } - HeapTuple resultTuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); - - state->entry_current_row++; - SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(resultTuple)); + pfree(tf_get_global_state.bloom); + tf_get_global_state.bloom = NULL; } - SPI_finish(); - - while (state->current_result < state->cdb_results.numResults) + if (tf_get_global_state.schema_oids) { - struct pg_result *pgresult = state->cdb_results.pg_results[state->current_result]; - - if (pgresult) - { - int nrows = PQntuples(pgresult); - int ncols = PQnfields(pgresult); - - if (state->current_row < nrows) - { - for (int col = 0; col < ncols; col++) - { - if (PQgetisnull(pgresult, state->current_row, col)) - { - values[col] = (Datum) 0; - nulls[col] = true; - } - else - { - char *value = PQgetvalue(pgresult, state->current_row, col); - - values[col] = InputFunctionCall(&state->inputFuncInfos[col], value, state->typIOParams[col], -1); - } - } - result = heap_form_tuple(funcctx->tuple_desc, values, nulls); - state->current_row++; - SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(result)); - } - else - { - state->current_row = 0; - state->current_result++; - } - } - else - state->current_result++; + pfree(tf_get_global_state.schema_oids); + tf_get_global_state.schema_oids = NIL; } SRF_RETURN_DONE(funcctx); @@ -769,6 +675,12 @@ tracking_register_db(PG_FUNCTION_ARGS) tf_check_shmem_error(); + if (Gp_role != GP_ROLE_DISPATCH && IS_QUERY_DISPATCHER()) + { + ereport(ERROR, + (errmsg("Cannot execute tracking_register_db outside query dispatcher"))); + } + if (Gp_role == GP_ROLE_DISPATCH && !is_initialized()) ereport(ERROR, (errmsg("[arenadata_toolkit] Cannot register database before workers initialize tracking"), @@ -801,6 +713,12 @@ tracking_unregister_db(PG_FUNCTION_ARGS) tf_check_shmem_error(); + if (Gp_role != GP_ROLE_DISPATCH && IS_QUERY_DISPATCHER()) + { + ereport(ERROR, + (errmsg("Cannot execute tracking_unregister_db outside query dispatcher"))); + } + if (Gp_role == GP_ROLE_DISPATCH && !is_initialized()) ereport(ERROR, (errmsg("[arenadata_toolkit] Cannot register database before workers initialize tracking"), @@ -829,7 +747,11 @@ tracking_set_snapshot_on_recovery(PG_FUNCTION_ARGS) bool set = PG_GETARG_BOOL(0); Oid dbid = get_dbid(PG_GETARG_OID(1)); - tf_check_shmem_error(); + if (Gp_role != GP_ROLE_DISPATCH) + { + ereport(ERROR, + (errmsg("Cannot execute tracking_set_snapshot_on_recovery outside query dispatcher"))); + } A_Const aconst = {.type = T_A_Const,.val = {.type = T_String,.val.str = set ? "t" : "f"}}; @@ -843,7 +765,7 @@ tracking_set_snapshot_on_recovery(PG_FUNCTION_ARGS) if (stmt.dbname == NULL) ereport(ERROR, - (errmsg("[arenadata_toolkit] database %u does not exist", dbid))); + (errmsg("[arenadata_toolkit] database %u does not exist", dbid))); v_stmt.type = T_VariableSetStmt; v_stmt.kind = VAR_SET_VALUE; @@ -1035,7 +957,11 @@ tracking_register_schema(PG_FUNCTION_ARGS) const char *schema_name = NameStr(*PG_GETARG_NAME(0)); Oid dbid = get_dbid(PG_GETARG_OID(1)); - tf_check_shmem_error(); + if (Gp_role != GP_ROLE_DISPATCH) + { + ereport(ERROR, + (errmsg("Cannot execute tracking_register_schema outside query dispatcher"))); + } if (!SearchSysCacheExists1(NAMESPACENAME, CStringGetDatum(schema_name))) ereport(ERROR, @@ -1055,7 +981,11 @@ tracking_unregister_schema(PG_FUNCTION_ARGS) const char *schema_name = NameStr(*PG_GETARG_NAME(0)); Oid dbid = get_dbid(PG_GETARG_OID(1)); - tf_check_shmem_error(); + if (Gp_role != GP_ROLE_DISPATCH) + { + ereport(ERROR, + (errmsg("Cannot execute tracking_unregister_schema outside query dispatcher"))); + } if (!SearchSysCacheExists1(NAMESPACENAME, CStringGetDatum(schema_name))) ereport(ERROR, @@ -1105,7 +1035,11 @@ tracking_set_relkinds(PG_FUNCTION_ARGS) VariableSetStmt v_stmt; A_Const arg; - tf_check_shmem_error(); + if (Gp_role != GP_ROLE_DISPATCH) + { + ereport(ERROR, + (errmsg("Cannot execute tracking_set_relkinds outside query dispatcher"))); + } initStringInfo(&buf); str_copy = pstrdup(relkinds_str); @@ -1208,7 +1142,11 @@ tracking_set_relstorages(PG_FUNCTION_ARGS) VariableSetStmt v_stmt; A_Const arg; - tf_check_shmem_error(); + if (Gp_role != GP_ROLE_DISPATCH) + { + ereport(ERROR, + (errmsg("Cannot execute tracking_set_relstorages outside query dispatcher"))); + } initStringInfo(&buf); str_copy = pstrdup(relstorages_str); @@ -1286,14 +1224,36 @@ Datum tracking_trigger_initial_snapshot(PG_FUNCTION_ARGS) { Oid dbid = get_dbid(PG_GETARG_OID(0)); + bloom_op_ctx_t ctx = {0}; tf_check_shmem_error(); + if (Gp_role != GP_ROLE_DISPATCH && IS_QUERY_DISPATCHER()) + { + ereport(ERROR, + (errmsg("Cannot execute tracking_trigger_initial_snapshot outside query dispatcher"))); + } + elog(LOG, "[arenadata_toolkit] tracking_trigger_initial_snapshot dbid: %u", dbid); - if (!bloom_set_trigger_bits(dbid, true)) + ctx = bloom_set_get_entry(MyDatabaseId, LW_SHARED, LW_EXCLUSIVE); + + if (!ctx.entry) + { + bloom_set_release(&ctx); ereport(ERROR, (errmsg("Failed to find corresponding filter to database %u", dbid))); + } + + if (Gp_role == GP_ROLE_DISPATCH && !pg_atomic_unlocked_test_flag(&ctx.entry->capture_in_progress)) + { + bloom_set_release(&ctx); + ereport(ERROR, + (errmsg("Cannot modify track during track acquisition %u", dbid))); + } + + bloom_set_all(&ctx.entry->bloom); + bloom_set_release(&ctx); if (Gp_role == GP_ROLE_DISPATCH) { @@ -1344,3 +1304,64 @@ tracking_is_segment_initialized(PG_FUNCTION_ARGS) PG_RETURN_DATUM(result); } + +/* + * This function should be used as argument for tracking_get_track function to + * follow correct transaction semantics. Several calls of the function within + * the same transaction return ControlVersion, which says tracking_get_track + * to return previous filter state. + */ +Datum +tracking_track_version(PG_FUNCTION_ARGS) +{ + int64 version = (int64) InvalidVersion; + TransactionId current_xid = GetCurrentTransactionIdIfAny(); + + tf_check_shmem_error(); + + bloom_op_ctx_t ctx = bloom_set_get_entry(MyDatabaseId, LW_SHARED, LW_EXCLUSIVE); + + if (!ctx.entry) + { + bloom_set_release(&ctx); + + ereport(ERROR, + (errcode(ERRCODE_GP_COMMAND_ERROR), + errmsg("database %u is not tracked", MyDatabaseId), + errhint("Call 'arenadata_toolkit.tracking_register_db()'" + "to enable tracking"))); + } + else if (!callbackRegistered && !pg_atomic_test_set_flag(&ctx.entry->capture_in_progress)) + { + bloom_set_release(&ctx); + ereport(ERROR, + (errcode(ERRCODE_GP_COMMAND_ERROR), + errmsg("Track for database %u is being acquired in other transaction", MyDatabaseId))); + } + + version = (int64) ctx.entry->master_version; + bloom_set_release(&ctx); + + if (!callbackRegistered) + { + RegisterXactCallbackOnce(xact_end_version_callback, NULL); + callbackRegistered = true; + + if (current_xid != local_xid) + { + local_xid = current_xid; + controlVersionUsed = false; + } + else if (current_xid != InvalidTransactionId) + { + controlVersionUsed = true; + } + } + + if (controlVersionUsed) + { + version = (int64) ControlVersion; + } + + PG_RETURN_INT64(version); +} From 0a8cf7e2e968050fd1f19578aac7b61498d80a61 Mon Sep 17 00:00:00 2001 From: Alexander Kondakov Date: Fri, 8 Nov 2024 11:39:05 +0300 Subject: [PATCH 38/69] Exclude explain mode --- .../arenadata_toolkit/src/arenadata_toolkit.c | 3 + .../arenadata_toolkit/src/include/bloom_set.h | 4 +- gpcontrib/arenadata_toolkit/src/track_files.c | 83 ++++++++++++++++++- 3 files changed, 85 insertions(+), 5 deletions(-) diff --git a/gpcontrib/arenadata_toolkit/src/arenadata_toolkit.c b/gpcontrib/arenadata_toolkit/src/arenadata_toolkit.c index 5fbc2e5f3772..6bcac4b17269 100644 --- a/gpcontrib/arenadata_toolkit/src/arenadata_toolkit.c +++ b/gpcontrib/arenadata_toolkit/src/arenadata_toolkit.c @@ -11,6 +11,7 @@ #include "drops_track.h" #include "file_hook.h" #include "tf_shmem.h" +#include "track_files.h" void _PG_init(void); void _PG_fini(void); @@ -24,6 +25,7 @@ _PG_init(void) tf_guc_define(); tf_shmem_init(); file_hook_init(); + track_setup_ProcessUtility_hook(); drops_track_init(); @@ -34,6 +36,7 @@ void _PG_fini(void) { drops_track_deinit(); + track_uninstall_ProcessUtility_hook(); file_hook_deinit(); tf_shmem_deinit(); } diff --git a/gpcontrib/arenadata_toolkit/src/include/bloom_set.h b/gpcontrib/arenadata_toolkit/src/include/bloom_set.h index 1e99c3228082..42422525505c 100644 --- a/gpcontrib/arenadata_toolkit/src/include/bloom_set.h +++ b/gpcontrib/arenadata_toolkit/src/include/bloom_set.h @@ -15,12 +15,12 @@ typedef struct { Oid dbid; /* dbid of tracked database or InvalidOid */ - uint32 master_version; /* Auxiliary counter, which is send from + uint32 master_version; /* Auxiliary counter, which is sent from * master to support transaction semantics */ uint32 work_version; /* Auxiliary counter which represents current * state of bloom filter */ pg_atomic_flag capture_in_progress; /* indicates whether tracking - * acquisition in progress */ + * acquisition is in progress */ bloom_t bloom; /* bloom filter itself */ } bloom_entry_t; diff --git a/gpcontrib/arenadata_toolkit/src/track_files.c b/gpcontrib/arenadata_toolkit/src/track_files.c index 155ad8d49652..f557136b67da 100644 --- a/gpcontrib/arenadata_toolkit/src/track_files.c +++ b/gpcontrib/arenadata_toolkit/src/track_files.c @@ -11,12 +11,13 @@ #include "cdb/cdbvars.h" #include "cdb/cdbutil.h" #include "commands/dbcommands.h" -#include "executor/spi.h" +#include "commands/defrem.h" #include "fmgr.h" #include "funcapi.h" #include "libpq-fe.h" #include "miscadmin.h" #include "storage/shmem.h" +#include "tcop/utility.h" #include "utils/relcache.h" #include "utils/builtins.h" #include "utils/memutils.h" @@ -30,6 +31,7 @@ #include "dbsize.h" #include "file_hook.h" #include "tf_shmem.h" +#include "track_files.h" PG_FUNCTION_INFO_V1(tracking_register_db); PG_FUNCTION_INFO_V1(tracking_register_db_main); @@ -45,6 +47,9 @@ PG_FUNCTION_INFO_V1(tracking_is_initial_snapshot_triggered); PG_FUNCTION_INFO_V1(tracking_get_track); PG_FUNCTION_INFO_V1(tracking_track_version); +/* + * Tuple description for result of tracking_get_track function. + */ #define GET_TRACK_TUPDESC_LEN 9 #define Anum_track_relid ((AttrNumber) 0) #define Anum_track_name ((AttrNumber) 1) @@ -84,6 +89,9 @@ static bool callbackRegistered = false; static bool controlVersionUsed = false; static TransactionId local_xid = InvalidTransactionId; +static bool isExecutorExplainMode = false; +ProcessUtility_hook_type next_ProcessUtility_hook = NULL; + static inline void tf_check_shmem_error(void) { @@ -99,6 +107,16 @@ get_dbid(Oid dbid) return (dbid == InvalidOid) ? MyDatabaseId : dbid; } +static uint32 +track_bump_version(uint32 ver) +{ + ver++; + if (ver == InvalidVersion || ver == ControlVersion) + ver += StartVersion - ver; + + return ver; +} + /* * If transaction called tracking_track_version commits, we * can bump the track version, what leads to consistency with @@ -114,7 +132,7 @@ xact_end_version_callback(XactEvent event, void *arg) if (ctx.entry) { if (event == XACT_EVENT_COMMIT) - ctx.entry->master_version++; + ctx.entry->master_version = track_bump_version(ctx.entry->master_version); pg_atomic_clear_flag(&ctx.entry->capture_in_progress); } @@ -405,7 +423,7 @@ tracking_get_track(PG_FUNCTION_ARGS) if (version != ControlVersion) { bloom_clear(&bloom_ctx.entry->bloom); - bloom_ctx.entry->work_version = version + 1; + bloom_ctx.entry->work_version = track_bump_version(version); } } @@ -1305,6 +1323,58 @@ tracking_is_segment_initialized(PG_FUNCTION_ARGS) PG_RETURN_DATUM(result); } +static bool +is_explain_analyze(List *options) +{ + ListCell *lc; + + foreach(lc, options) + { + DefElem *opt = (DefElem *) lfirst(lc); + + if (pg_strcasecmp(opt->defname, "analyze") == 0) + { + return defGetBoolean(opt); + } + } + return false; +} + +static void +explain_detector_ProcessUtility(Node *parsetree, + const char *queryString, + ProcessUtilityContext context, + ParamListInfo params, + DestReceiver *dest, + char *completionTag) +{ + isExecutorExplainMode = false; + + if (IsA(parsetree, ExplainStmt)) + { + ExplainStmt *stmt = (ExplainStmt *) parsetree; + + if (!is_explain_analyze(stmt->options)) + isExecutorExplainMode = true; + } + + if (next_ProcessUtility_hook) + next_ProcessUtility_hook(parsetree, queryString, context, params, dest, completionTag); +} + +void +track_setup_ProcessUtility_hook(void) +{ + next_ProcessUtility_hook = ProcessUtility_hook ? ProcessUtility_hook : standard_ProcessUtility; + ProcessUtility_hook = explain_detector_ProcessUtility; +} + +void +track_uninstall_ProcessUtility_hook(void) +{ + ProcessUtility_hook = next_ProcessUtility_hook == standard_ProcessUtility ? NULL : next_ProcessUtility_hook; +} + /* * This function should be used as argument for tracking_get_track function to * follow correct transaction semantics. Several calls of the function within @@ -1317,6 +1387,13 @@ tracking_track_version(PG_FUNCTION_ARGS) int64 version = (int64) InvalidVersion; TransactionId current_xid = GetCurrentTransactionIdIfAny(); + if (Gp_role != GP_ROLE_DISPATCH) + ereport(ERROR, + (errmsg("Cannot acquire track using such query"))); + + if (isExecutorExplainMode) + PG_RETURN_INT64((int64) InvalidVersion); + tf_check_shmem_error(); bloom_op_ctx_t ctx = bloom_set_get_entry(MyDatabaseId, LW_SHARED, LW_EXCLUSIVE); From 8f763a44acc352ecb9d730c99d7a22db45ded2c0 Mon Sep 17 00:00:00 2001 From: Alexander Kondakov Date: Fri, 8 Nov 2024 11:44:22 +0300 Subject: [PATCH 39/69] Smiplify --- gpcontrib/arenadata_toolkit/src/track_files.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gpcontrib/arenadata_toolkit/src/track_files.c b/gpcontrib/arenadata_toolkit/src/track_files.c index f557136b67da..49c3da48af9d 100644 --- a/gpcontrib/arenadata_toolkit/src/track_files.c +++ b/gpcontrib/arenadata_toolkit/src/track_files.c @@ -112,7 +112,7 @@ track_bump_version(uint32 ver) { ver++; if (ver == InvalidVersion || ver == ControlVersion) - ver += StartVersion - ver; + return StartVersion; return ver; } From 8fcb1e605e7c476caf340f148757584ea457a586 Mon Sep 17 00:00:00 2001 From: Alexander Kondakov Date: Fri, 8 Nov 2024 12:14:05 +0300 Subject: [PATCH 40/69] new header file --- gpcontrib/arenadata_toolkit/src/include/track_files.h | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 gpcontrib/arenadata_toolkit/src/include/track_files.h diff --git a/gpcontrib/arenadata_toolkit/src/include/track_files.h b/gpcontrib/arenadata_toolkit/src/include/track_files.h new file mode 100644 index 000000000000..c7928dbd2001 --- /dev/null +++ b/gpcontrib/arenadata_toolkit/src/include/track_files.h @@ -0,0 +1,7 @@ +#ifndef TRACK_FILES_H +#define TRACK_FILES_H + +void track_setup_ProcessUtility_hook(void); +void track_uninstall_ProcessUtility_hook(void); + +#endif /* TRACK_FILES_H */ From 83a5c23fc5a5a4fea1ebafbf25db3ca8ebab9d6b Mon Sep 17 00:00:00 2001 From: Alexander Kondakov Date: Tue, 12 Nov 2024 09:13:37 +0300 Subject: [PATCH 41/69] remove db reconnection in tests --- .../arenadata_toolkit/expected/arenadata_toolkit_guc.out | 4 ---- gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_guc.sql | 6 ------ 2 files changed, 10 deletions(-) diff --git a/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_guc.out b/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_guc.out index 9ad0562d65c9..03920aefd58b 100644 --- a/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_guc.out +++ b/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_guc.out @@ -34,8 +34,6 @@ setdatabase=oid WHERE datname=current_database(); ---------+----------- (0 rows) -\c -; -\c tracking1; SELECT arenadata_toolkit.tracking_register_db(); tracking_register_db ---------------------- @@ -61,8 +59,6 @@ SELECT arenadata_toolkit.tracking_unregister_db(); t (1 row) -\c -; -\c tracking1; SHOW arenadata_toolkit.tracking_is_db_tracked; arenadata_toolkit.tracking_is_db_tracked ------------------------------------------ diff --git a/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_guc.sql b/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_guc.sql index c2790f84be30..ddd67eb7f813 100644 --- a/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_guc.sql +++ b/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_guc.sql @@ -28,9 +28,6 @@ SHOW arenadata_toolkit.tracking_is_db_tracked; SELECT datname, setconfig FROM pg_db_role_setting JOIN pg_database ON setdatabase=oid WHERE datname=current_database(); -\c -; -\c tracking1; - SELECT arenadata_toolkit.tracking_register_db(); SHOW arenadata_toolkit.tracking_is_db_tracked; @@ -40,9 +37,6 @@ setdatabase=oid WHERE datname=current_database(); SELECT arenadata_toolkit.tracking_unregister_db(); -\c -; -\c tracking1; - SHOW arenadata_toolkit.tracking_is_db_tracked; SELECT datname, setconfig FROM pg_db_role_setting JOIN pg_database ON From 55ecda294d9d3b899c8cfcedd3a853939cbb483e Mon Sep 17 00:00:00 2001 From: Alexander Kondakov Date: Tue, 12 Nov 2024 11:48:37 +0300 Subject: [PATCH 42/69] Refactor drops track --- gpcontrib/arenadata_toolkit/src/drops_track.c | 98 +++++-------------- .../src/include/drops_track.h | 2 - 2 files changed, 24 insertions(+), 76 deletions(-) diff --git a/gpcontrib/arenadata_toolkit/src/drops_track.c b/gpcontrib/arenadata_toolkit/src/drops_track.c index 1a4ebc732a90..5e75116ce80d 100644 --- a/gpcontrib/arenadata_toolkit/src/drops_track.c +++ b/gpcontrib/arenadata_toolkit/src/drops_track.c @@ -11,8 +11,6 @@ #include "arenadata_toolkit_guc.h" -#define TRACK_NODE_GET(track, i) (void *)(track->nodes + i * sizeof(drops_track_node_t)); - /* * Drop track element. Stores just relfilenode * and dbid. @@ -33,10 +31,9 @@ typedef struct /* Drops track */ typedef struct { - dlist_head head; + dlist_head used_head; + dlist_head free_head; uint32_t used_count; /* count of used nodes */ - int unused_idx; /* next unused idx or -1 if unknown; for - * faster search */ char nodes[FLEXIBLE_ARRAY_MEMBER]; /* array of drops_track_node_t */ } drops_track_t; @@ -44,6 +41,12 @@ static shmem_startup_hook_type next_shmem_startup_hook = NULL; static drops_track_t *drops_track; LWLock *drops_track_lock; +static inline drops_track_node_t * +track_node_get(drops_track_t * track, int i) +{ + return (drops_track_node_t *) (track->nodes + i * sizeof(drops_track_node_t)); +} + static Size drops_track_calc_size() { @@ -68,15 +71,16 @@ drops_track_hook(void) if (!found) { drops_track->used_count = 0; - drops_track->unused_idx = 0; - dlist_init(&drops_track->head); + dlist_init(&drops_track->used_head); + dlist_init(&drops_track->free_head); for (uint32_t i = 0; i < drops_count; i++) { - drops_track_node_t *track_node = TRACK_NODE_GET(drops_track, i); + drops_track_node_t *track_node = track_node_get(drops_track, i); track_node->relfileNode.relNode = InvalidOid; track_node->relfileNode.dbNode = InvalidOid; + dlist_push_tail(&drops_track->free_head, &track_node->node); } } @@ -104,36 +108,13 @@ drops_track_deinit(void) shmem_startup_hook = next_shmem_startup_hook; } -/* Find unused node in linked list. */ static drops_track_node_t * -find_empty_node() +get_free_node(void) { - drops_track_node_t *track_node = NULL; + if (dlist_is_empty(&drops_track->free_head)) + return NULL; - if (drops_track->unused_idx >= 0) - { - track_node = TRACK_NODE_GET(drops_track, drops_track->unused_idx); - drops_track->unused_idx++; - if (drops_track->unused_idx >= drops_count) - drops_track->unused_idx = -1; - else - { - drops_track_node_t *unused_node = TRACK_NODE_GET(drops_track, drops_track->unused_idx); - - if (unused_node->relfileNode.relNode != InvalidOid) - drops_track->unused_idx = -1; - } - } - else - { - for (uint32_t i = 0; i < drops_count; i++) - { - track_node = TRACK_NODE_GET(drops_track, i); - if (track_node->relfileNode.relNode == InvalidOid) - break; - } - } - return track_node; + return (drops_track_node_t *) dlist_pop_head_node(&drops_track->free_head); } /* Add relNode to track. Old node is dropped if no space */ @@ -146,19 +127,22 @@ drops_track_add(RelFileNode relfileNode) if (drops_track->used_count >= drops_count) { - track_node = (drops_track_node_t *) dlist_pop_head_node(&drops_track->head); + track_node = (drops_track_node_t *) dlist_pop_head_node(&drops_track->used_head); elog(DEBUG1, "No space for drop track. Oldest node removed (%d).", track_node->relfileNode.relNode); } else { - track_node = find_empty_node(); + track_node = get_free_node(); drops_track->used_count++; Assert(track_node); } track_node->relfileNode.relNode = relfileNode.relNode; track_node->relfileNode.dbNode = relfileNode.dbNode; - dlist_push_tail(&drops_track->head, &track_node->node); + dlist_push_tail(&drops_track->used_head, &track_node->node); + + elog(DEBUG1, "added relNode %u for dbNode %u to drops track", + relfileNode.relNode, relfileNode.dbNode); LWLockRelease(drops_track_lock); } @@ -178,11 +162,10 @@ drops_track_move(Oid dbid) return NIL; } - dlist_foreach_modify(iter, &drops_track->head) + dlist_foreach_modify(iter, &drops_track->used_head) { drops_track_node_t *track_node = (drops_track_node_t *) iter.cur; - /* newest in head, oldest in tail */ if (track_node->relfileNode.dbNode == dbid) { oids = lcons_oid(track_node->relfileNode.relNode, oids); @@ -190,6 +173,7 @@ drops_track_move(Oid dbid) track_node->relfileNode.relNode = InvalidOid; track_node->relfileNode.dbNode = InvalidOid; dlist_delete(&track_node->node); + dlist_push_tail(&drops_track->free_head, &track_node->node); } } @@ -197,37 +181,3 @@ drops_track_move(Oid dbid) return oids; } - -/* Return extracted dropped relfilenodes. - * Old nodes are removed if no space. - */ -void -drops_track_move_undo(List *oids, Oid dbid) -{ - ListCell *cell; - - if (oids == NIL) - return; - - LWLockAcquire(drops_track_lock, LW_EXCLUSIVE); - - foreach(cell, oids) - { - Oid oid = lfirst_oid(cell); - drops_track_node_t *track_node; - - if (drops_track->used_count >= drops_count) - { - elog(DEBUG1, "No space for move back. Oldest node removed (%d).", oid); - continue; - } - - track_node = find_empty_node(); - drops_track->used_count++; - track_node->relfileNode.relNode = oid; - track_node->relfileNode.dbNode = dbid; - dlist_push_head(&drops_track->head, &track_node->node); - } - - LWLockRelease(drops_track_lock); -} diff --git a/gpcontrib/arenadata_toolkit/src/include/drops_track.h b/gpcontrib/arenadata_toolkit/src/include/drops_track.h index e30c65f05632..b52f5180dbe6 100644 --- a/gpcontrib/arenadata_toolkit/src/include/drops_track.h +++ b/gpcontrib/arenadata_toolkit/src/include/drops_track.h @@ -10,7 +10,5 @@ void drops_track_deinit(void); void drops_track_add(RelFileNode relNode); List *drops_track_move(Oid dbid); -void drops_track_move_undo(List *oids, Oid dbid); - #endif /* DROPS_TRACK_H */ From c8139d47f02ae97a07b7320ff29e97930f069de8 Mon Sep 17 00:00:00 2001 From: Alexander Kondakov Date: Tue, 12 Nov 2024 21:46:14 +0300 Subject: [PATCH 43/69] Fix locks for set entries --- gpcontrib/arenadata_toolkit/src/bloom_set.c | 67 ++----------------- .../arenadata_toolkit/src/include/bloom_set.h | 4 +- gpcontrib/arenadata_toolkit/src/track_files.c | 5 +- 3 files changed, 12 insertions(+), 64 deletions(-) diff --git a/gpcontrib/arenadata_toolkit/src/bloom_set.c b/gpcontrib/arenadata_toolkit/src/bloom_set.c index 2580277657c6..6b60ab2a9bfd 100644 --- a/gpcontrib/arenadata_toolkit/src/bloom_set.c +++ b/gpcontrib/arenadata_toolkit/src/bloom_set.c @@ -45,20 +45,6 @@ bloom_entry_init(const uint32_t bloom_size, bloom_entry_t * bloom_entry) bloom_init(bloom_size, &bloom_entry->bloom); } -/* - * Separate initialization of LWLocks; - */ -static void -init_lwlocks(void) -{ - bloom_set_lock = LWLockAssign(); - - for (int i = 0; i < db_track_count; ++i) - { - bloom_locks[i].lock = LWLockAssign(); - bloom_locks[i].dbid = InvalidOid; - } -} void bloom_set_init(const uint32_t bloom_count, const uint32_t bloom_size) @@ -68,14 +54,17 @@ bloom_set_init(const uint32_t bloom_count, const uint32_t bloom_size) bloom_set->bloom_count = bloom_count; bloom_set->bloom_size = bloom_size; + bloom_set_lock = LWLockAssign(); + for (uint32_t i = 0; i < bloom_count; i++) { bloom_entry_t *bloom_entry = bloom_entry_get(bloom_set, i); bloom_entry_init(bloom_size, bloom_entry); + bloom_locks[i].lock = LWLockAssign(); + bloom_locks[i].entry = (void *) bloom_entry; } - init_lwlocks(); init_bloom_invariants(); } @@ -133,7 +122,6 @@ bloom_set_bind(Oid dbid) bloom_entry->master_version = StartVersion; bloom_entry->work_version = StartVersion; pg_atomic_init_flag(&bloom_entry->capture_in_progress); - LWLockBindEntry(dbid); LWLockRelease(bloom_set_lock); return true; @@ -181,7 +169,6 @@ bloom_set_unbind(Oid dbid) } bloom_entry->dbid = InvalidOid; bloom_clear(&bloom_entry->bloom); - LWLockUnbindEntry(dbid); LWLockRelease(bloom_set_lock); } @@ -284,7 +271,9 @@ LWLockAcquireEntry(Oid dbid, LWLockMode mode) { for (int i = 0; i < db_track_count; ++i) { - if (bloom_locks[i].dbid == dbid) + bloom_entry_t *bloom_entry = (bloom_entry_t *) (bloom_locks[i].entry); + + if (bloom_entry->dbid == dbid) { LWLockAcquire(bloom_locks[i].lock, mode); return bloom_locks[i].lock; @@ -293,45 +282,3 @@ LWLockAcquireEntry(Oid dbid, LWLockMode mode) return NULL; } - -/* - * Bind LWLock to tracked dbid. - */ -void -LWLockBindEntry(Oid dbid) -{ - int i; - - for (i = 0; i < db_track_count; ++i) - { - if (bloom_locks[i].dbid == InvalidOid) - { - bloom_locks[i].dbid = dbid; - break; - } - } - - if (i == db_track_count && pg_atomic_unlocked_test_flag(&tf_shared_state->tracking_error)) - pg_atomic_test_set_flag(&tf_shared_state->tracking_error); -} - -/* - * Unbind LWLock from tracked dbid. - */ -void -LWLockUnbindEntry(Oid dbid) -{ - int i; - - for (i = 0; i < db_track_count; ++i) - { - if (bloom_locks[i].dbid == dbid) - { - bloom_locks[i].dbid = InvalidOid; - break; - } - } - - if (i == db_track_count && pg_atomic_unlocked_test_flag(&tf_shared_state->tracking_error)) - pg_atomic_test_set_flag(&tf_shared_state->tracking_error); -} diff --git a/gpcontrib/arenadata_toolkit/src/include/bloom_set.h b/gpcontrib/arenadata_toolkit/src/include/bloom_set.h index 42422525505c..f0b6f81c3d39 100644 --- a/gpcontrib/arenadata_toolkit/src/include/bloom_set.h +++ b/gpcontrib/arenadata_toolkit/src/include/bloom_set.h @@ -45,7 +45,7 @@ typedef struct */ typedef struct { - Oid dbid; + void *entry; /* It's a key that binds lock to bloom_entry */ LWLock *lock; } tf_entry_lock_t; @@ -64,7 +64,5 @@ bool bloom_set_is_all_bits_triggered(Oid dbid); bloom_op_ctx_t bloom_set_get_entry(Oid dbid, LWLockMode s_mode, LWLockMode e_mode); void bloom_set_release(bloom_op_ctx_t * ctx); LWLock *LWLockAcquireEntry(Oid dbid, LWLockMode mode); -void LWLockBindEntry(Oid dbid); -void LWLockUnbindEntry(Oid dbid); #endif /* BLOOM_SET_H */ diff --git a/gpcontrib/arenadata_toolkit/src/track_files.c b/gpcontrib/arenadata_toolkit/src/track_files.c index 49c3da48af9d..4d1af7d5bb7b 100644 --- a/gpcontrib/arenadata_toolkit/src/track_files.c +++ b/gpcontrib/arenadata_toolkit/src/track_files.c @@ -645,6 +645,9 @@ is_initialized() CdbPgResults cdb_pgresults = {NULL, 0}; bool all_inited = true; + if (pg_atomic_unlocked_test_flag(&tf_shared_state->tracking_is_initialized)) + return false; + CdbDispatchCommand("select * from arenadata_toolkit.tracking_is_segment_initialized()", 0, &cdb_pgresults); for (int i = 0; i < cdb_pgresults.numResults; i++) @@ -739,7 +742,7 @@ tracking_unregister_db(PG_FUNCTION_ARGS) if (Gp_role == GP_ROLE_DISPATCH && !is_initialized()) ereport(ERROR, - (errmsg("[arenadata_toolkit] Cannot register database before workers initialize tracking"), + (errmsg("[arenadata_toolkit] Cannot unregister database before workers initialize tracking"), errhint("Wait arenadata_toolkit.tracking_worker_naptime_sec and try again"))); elog(LOG, "[arenadata_toolkit] unregistering database %u from tracking", dbid); From 9980cc9f364217aa02dc2b6bfab111b6b109795f Mon Sep 17 00:00:00 2001 From: Alexander Kondakov Date: Tue, 12 Nov 2024 21:51:34 +0300 Subject: [PATCH 44/69] Add isolation tests --- .../arenadata_toolkit/isolation2/.gitignore | 3 + .../arenadata_toolkit/isolation2/Makefile | 8 + .../isolation2/expected/recovery_failover.out | 144 ++++++++++++++++++ .../isolation2/expected/track_concurrent.out | 57 +++++++ .../arenadata_toolkit/isolation2/init_file | 0 .../isolation2/sql/recovery_failover.sql | 83 ++++++++++ .../isolation2/sql/track_concurrent.sql | 24 +++ .../isolation2/toolkit_schedule | 3 + 8 files changed, 322 insertions(+) create mode 100644 gpcontrib/arenadata_toolkit/isolation2/.gitignore create mode 100644 gpcontrib/arenadata_toolkit/isolation2/Makefile create mode 100644 gpcontrib/arenadata_toolkit/isolation2/expected/recovery_failover.out create mode 100644 gpcontrib/arenadata_toolkit/isolation2/expected/track_concurrent.out create mode 100644 gpcontrib/arenadata_toolkit/isolation2/init_file create mode 100644 gpcontrib/arenadata_toolkit/isolation2/sql/recovery_failover.sql create mode 100644 gpcontrib/arenadata_toolkit/isolation2/sql/track_concurrent.sql create mode 100644 gpcontrib/arenadata_toolkit/isolation2/toolkit_schedule diff --git a/gpcontrib/arenadata_toolkit/isolation2/.gitignore b/gpcontrib/arenadata_toolkit/isolation2/.gitignore new file mode 100644 index 000000000000..354653a9b45c --- /dev/null +++ b/gpcontrib/arenadata_toolkit/isolation2/.gitignore @@ -0,0 +1,3 @@ +/results/ +regression.diffs +regression.out diff --git a/gpcontrib/arenadata_toolkit/isolation2/Makefile b/gpcontrib/arenadata_toolkit/isolation2/Makefile new file mode 100644 index 000000000000..e088602543d2 --- /dev/null +++ b/gpcontrib/arenadata_toolkit/isolation2/Makefile @@ -0,0 +1,8 @@ +top_builddir = ../../../ + +ISOLATION2_ROOT = $(top_builddir)/src/test/isolation2 + +installcheck: + cd $(ISOLATION2_ROOT) && ./pg_isolation2_regress $(EXTRA_REGRESS_OPTS) --init-file=${CURDIR}/init_file --psqldir='$(PSQLDIR)' \ + --dbname=toolkitisolation --schedule=${CURDIR}/toolkit_schedule --inputdir=${CURDIR} --outputdir=${CURDIR} \ + --load-extension=gp_inject_fault --load-extension=plpythonu \ No newline at end of file diff --git a/gpcontrib/arenadata_toolkit/isolation2/expected/recovery_failover.out b/gpcontrib/arenadata_toolkit/isolation2/expected/recovery_failover.out new file mode 100644 index 000000000000..651fffae62b8 --- /dev/null +++ b/gpcontrib/arenadata_toolkit/isolation2/expected/recovery_failover.out @@ -0,0 +1,144 @@ +-- This test triggers failover of content 1 and checks +-- the correct tracking state behaviour after recovery +!\retcode gpconfig -c shared_preload_libraries -v 'arenadata_toolkit'; +(exited with code 0) +!\retcode gpconfig -c gp_fts_probe_retries -v 2 --masteronly; +(exited with code 0) +-- Allow extra time for mirror promotion to complete recovery +!\retcode gpconfig -c gp_gang_creation_retry_count -v 120 --skipvalidation --masteronly; +(exited with code 0) +!\retcode gpconfig -c gp_gang_creation_retry_timer -v 1000 --skipvalidation --masteronly; +(exited with code 0) +!\retcode gpconfig -c arenadata_toolkit.tracking_worker_naptime_sec -v '5'; +(exited with code 0) +!\retcode gpstop -raq -M fast; +(exited with code 0) + +CREATE EXTENSION arenadata_toolkit; +CREATE + +SELECT pg_sleep(current_setting('arenadata_toolkit.tracking_worker_naptime_sec')::int); + pg_sleep +---------- + +(1 row) +SELECT arenadata_toolkit.tracking_register_db(); + tracking_register_db +---------------------- + t +(1 row) +SELECT arenadata_toolkit.tracking_trigger_initial_snapshot(); + tracking_trigger_initial_snapshot +----------------------------------- + t +(1 row) +SELECT tt.segid, count(*) FROM arenadata_toolkit.tables_track tt GROUP BY tt.segid; + segid | count +-------+------- + 1 | 264 + 0 | 264 + 2 | 264 + -1 | 264 +(4 rows) + +include: helpers/server_helpers.sql; +CREATE + +-- Helper functions +CREATE OR REPLACE FUNCTION tracking_is_segment_initialized_master() RETURNS TABLE(segindex INT, is_initialized BOOL) LANGUAGE SQL EXECUTE ON MASTER AS $$ SELECT segindex, is_initialized FROM arenadata_toolkit.tracking_is_segment_initialized(); $$; +CREATE + +CREATE OR REPLACE FUNCTION tracking_is_segment_initialized_segments() RETURNS TABLE(segindex INT, is_initialized BOOL) LANGUAGE SQL EXECUTE ON ALL SEGMENTS AS $$ SELECT segindex, is_initialized FROM arenadata_toolkit.tracking_is_segment_initialized(); $$; +CREATE + +CREATE or REPLACE FUNCTION wait_until_segments_are_down(num_segs int) RETURNS bool AS $$ declare retries int; /* in func */ begin /* in func */ retries := 1200; /* in func */ loop /* in func */ if (select count(*) = num_segs from gp_segment_configuration where status = 'd') then /* in func */ return true; /* in func */ end if; /* in func */ if retries <= 0 then /* in func */ return false; /* in func */ end if; /* in func */ perform pg_sleep(0.1); /* in func */ retries := retries - 1; /* in func */ end loop; /* in func */ end; /* in func */ $$ language plpgsql; +CREATE + +-- no segment down. +select count(*) from gp_segment_configuration where status = 'd'; + count +------- + 0 +(1 row) + +select pg_ctl((select datadir from gp_segment_configuration c where c.role='p' and c.content=1), 'stop'); + pg_ctl +-------- + OK +(1 row) + +select wait_until_segments_are_down(1); + wait_until_segments_are_down +------------------------------ + t +(1 row) + +SELECT * FROM tracking_is_segment_initialized_master() UNION ALL SELECT * FROM tracking_is_segment_initialized_segments(); + segindex | is_initialized +----------+---------------- + -1 | t + 0 | t + 1 | t + 2 | t +(4 rows) + +-- Track acquisition should retrurn full snapshot from promoted mirror since +-- initial snapshot is activated on recovery by deafult. +SELECT tt.segid, count(*) FROM arenadata_toolkit.tables_track tt GROUP BY tt.segid; + segid | count +-------+------- + 1 | 264 + 0 | 1 + 2 | 1 + -1 | 1 +(4 rows) + +-- fully recover the failed primary as new mirror +!\retcode gprecoverseg -aF --no-progress; +(exited with code 0) + +-- loop while segments come in sync +select wait_until_all_segments_synchronized(); + wait_until_all_segments_synchronized +-------------------------------------- + OK +(1 row) + +!\retcode gprecoverseg -ar; +(exited with code 0) + +-- loop while segments come in sync +select wait_until_all_segments_synchronized(); + wait_until_all_segments_synchronized +-------------------------------------- + OK +(1 row) + +-- verify no segment is down after recovery +select count(*) from gp_segment_configuration where status = 'd'; + count +------- + 0 +(1 row) + +-- Track should be returned only from recovered segment since +-- initial snapshot is activated on recovery by deafult. +SELECT tt.segid, count(*) FROM arenadata_toolkit.tables_track tt GROUP BY tt.segid; + segid | count +-------+------- + 1 | 264 +(1 row) +SELECT arenadata_toolkit.tracking_unregister_db(); + tracking_unregister_db +------------------------ + t +(1 row) + +!\retcode gpconfig -r gp_fts_probe_retries --masteronly; +(exited with code 0) +!\retcode gpconfig -r gp_gang_creation_retry_count --skipvalidation --masteronly; +(exited with code 0) +!\retcode gpconfig -r gp_gang_creation_retry_timer --skipvalidation --masteronly; +(exited with code 0) +!\retcode gpstop -u; +(exited with code 0) diff --git a/gpcontrib/arenadata_toolkit/isolation2/expected/track_concurrent.out b/gpcontrib/arenadata_toolkit/isolation2/expected/track_concurrent.out new file mode 100644 index 000000000000..a5a69270f76b --- /dev/null +++ b/gpcontrib/arenadata_toolkit/isolation2/expected/track_concurrent.out @@ -0,0 +1,57 @@ +-- start_matchsubs +-- m/ERROR: Track for database \d+ is being acquired in other transaction/ +-- s/\d+/XXX/g +-- end_matchsubs +-- Test concurrent track acquisition. +1: CREATE EXTENSION IF NOT EXISTS arenadata_toolkit; +CREATE +1: SELECT arenadata_toolkit.tracking_register_db(); + tracking_register_db +---------------------- + t +(1 row) +1: SELECT arenadata_toolkit.tracking_trigger_initial_snapshot(); + tracking_trigger_initial_snapshot +----------------------------------- + t +(1 row) +1: BEGIN; +BEGIN +1: SELECT tt.segid, count(*) FROM arenadata_toolkit.tables_track tt GROUP BY tt.segid; + segid | count +-------+------- + 1 | 264 + 0 | 264 + 2 | 264 + -1 | 264 +(4 rows) + +2: SELECT tt.segid, count(*) FROM arenadata_toolkit.tables_track tt GROUP BY tt.segid; +ERROR: Track for database 24798 is being acquired in other transaction + +1: ROLLBACK; +ROLLBACK + +2: SELECT tt.segid, count(*) FROM arenadata_toolkit.tables_track tt GROUP BY tt.segid; + segid | count +-------+------- + 1 | 264 + 0 | 264 + 2 | 264 + -1 | 264 +(4 rows) + +1: SELECT arenadata_toolkit.tracking_unregister_db(); + tracking_unregister_db +------------------------ + t +(1 row) +1q: ... +2q: ... + +!\retcode gpconfig -r shared_preload_libraries; +(exited with code 0) +!\retcode gpconfig -r arenadata_toolkit.tracking_worker_naptime_sec; +(exited with code 0) +!\retcode gpstop -raq -M fast; +(exited with code 0) diff --git a/gpcontrib/arenadata_toolkit/isolation2/init_file b/gpcontrib/arenadata_toolkit/isolation2/init_file new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/gpcontrib/arenadata_toolkit/isolation2/sql/recovery_failover.sql b/gpcontrib/arenadata_toolkit/isolation2/sql/recovery_failover.sql new file mode 100644 index 000000000000..a025da7f0a6e --- /dev/null +++ b/gpcontrib/arenadata_toolkit/isolation2/sql/recovery_failover.sql @@ -0,0 +1,83 @@ +-- This test triggers failover of content 1 and checks +-- the correct tracking state behaviour after recovery +!\retcode gpconfig -c shared_preload_libraries -v 'arenadata_toolkit'; +!\retcode gpconfig -c gp_fts_probe_retries -v 2 --masteronly; +-- Allow extra time for mirror promotion to complete recovery +!\retcode gpconfig -c gp_gang_creation_retry_count -v 120 --skipvalidation --masteronly; +!\retcode gpconfig -c gp_gang_creation_retry_timer -v 1000 --skipvalidation --masteronly; +!\retcode gpconfig -c arenadata_toolkit.tracking_worker_naptime_sec -v '5'; +!\retcode gpstop -raq -M fast; + +CREATE EXTENSION arenadata_toolkit; + +SELECT pg_sleep(current_setting('arenadata_toolkit.tracking_worker_naptime_sec')::int); +SELECT arenadata_toolkit.tracking_register_db(); +SELECT arenadata_toolkit.tracking_trigger_initial_snapshot(); +SELECT tt.segid, count(*) FROM arenadata_toolkit.tables_track tt GROUP BY tt.segid; + +include: helpers/server_helpers.sql; + +-- Helper functions +CREATE OR REPLACE FUNCTION tracking_is_segment_initialized_master() RETURNS TABLE(segindex INT, is_initialized BOOL) LANGUAGE SQL EXECUTE ON MASTER AS $$ SELECT segindex, is_initialized FROM arenadata_toolkit.tracking_is_segment_initialized(); $$; + +CREATE OR REPLACE FUNCTION tracking_is_segment_initialized_segments() RETURNS TABLE(segindex INT, is_initialized BOOL) LANGUAGE SQL EXECUTE ON ALL SEGMENTS AS $$ SELECT segindex, is_initialized FROM arenadata_toolkit.tracking_is_segment_initialized(); $$; + +CREATE or REPLACE FUNCTION wait_until_segments_are_down(num_segs int) +RETURNS bool AS +$$ +declare +retries int; /* in func */ +begin /* in func */ + retries := 1200; /* in func */ + loop /* in func */ + if (select count(*) = num_segs from gp_segment_configuration where status = 'd') then /* in func */ + return true; /* in func */ + end if; /* in func */ + if retries <= 0 then /* in func */ + return false; /* in func */ + end if; /* in func */ + perform pg_sleep(0.1); /* in func */ + retries := retries - 1; /* in func */ + end loop; /* in func */ +end; /* in func */ +$$ language plpgsql; + +-- no segment down. +select count(*) from gp_segment_configuration where status = 'd'; + +select pg_ctl((select datadir from gp_segment_configuration c +where c.role='p' and c.content=1), 'stop'); + +select wait_until_segments_are_down(1); + +SELECT * FROM tracking_is_segment_initialized_master() +UNION ALL +SELECT * FROM tracking_is_segment_initialized_segments(); + +-- Track acquisition should retrurn full snapshot from promoted mirror since +-- initial snapshot is activated on recovery by deafult. +SELECT tt.segid, count(*) FROM arenadata_toolkit.tables_track tt GROUP BY tt.segid; + +-- fully recover the failed primary as new mirror +!\retcode gprecoverseg -aF --no-progress; + +-- loop while segments come in sync +select wait_until_all_segments_synchronized(); + +!\retcode gprecoverseg -ar; + +-- loop while segments come in sync +select wait_until_all_segments_synchronized(); + +-- verify no segment is down after recovery +select count(*) from gp_segment_configuration where status = 'd'; + +-- Track should be returned only from recovered segment since +-- initial snapshot is activated on recovery by deafult. +SELECT tt.segid, count(*) FROM arenadata_toolkit.tables_track tt GROUP BY tt.segid; +SELECT arenadata_toolkit.tracking_unregister_db(); + +!\retcode gpconfig -r gp_fts_probe_retries --masteronly; +!\retcode gpconfig -r gp_gang_creation_retry_count --skipvalidation --masteronly; +!\retcode gpconfig -r gp_gang_creation_retry_timer --skipvalidation --masteronly; +!\retcode gpstop -u; \ No newline at end of file diff --git a/gpcontrib/arenadata_toolkit/isolation2/sql/track_concurrent.sql b/gpcontrib/arenadata_toolkit/isolation2/sql/track_concurrent.sql new file mode 100644 index 000000000000..a710c780ea7d --- /dev/null +++ b/gpcontrib/arenadata_toolkit/isolation2/sql/track_concurrent.sql @@ -0,0 +1,24 @@ +-- start_matchsubs +-- m/ERROR: Track for database \d+ is being acquired in other transaction/ +-- s/\d+/XXX/g +-- end_matchsubs +-- Test concurrent track acquisition. +1: CREATE EXTENSION IF NOT EXISTS arenadata_toolkit; +1: SELECT arenadata_toolkit.tracking_register_db(); +1: SELECT arenadata_toolkit.tracking_trigger_initial_snapshot(); +1: BEGIN; +1: SELECT tt.segid, count(*) FROM arenadata_toolkit.tables_track tt GROUP BY tt.segid; + +2: SELECT tt.segid, count(*) FROM arenadata_toolkit.tables_track tt GROUP BY tt.segid; + +1: ROLLBACK; + +2: SELECT tt.segid, count(*) FROM arenadata_toolkit.tables_track tt GROUP BY tt.segid; + +1: SELECT arenadata_toolkit.tracking_unregister_db(); +1q: +2q: + +!\retcode gpconfig -r shared_preload_libraries; +!\retcode gpconfig -r arenadata_toolkit.tracking_worker_naptime_sec; +!\retcode gpstop -raq -M fast; \ No newline at end of file diff --git a/gpcontrib/arenadata_toolkit/isolation2/toolkit_schedule b/gpcontrib/arenadata_toolkit/isolation2/toolkit_schedule new file mode 100644 index 000000000000..aa30a135146e --- /dev/null +++ b/gpcontrib/arenadata_toolkit/isolation2/toolkit_schedule @@ -0,0 +1,3 @@ +# isolation tests list +test: recovery_failover +test: track_concurrent From 627fdeccd12d8ee2f87afa6b179982710d463040 Mon Sep 17 00:00:00 2001 From: Alexander Kondakov Date: Wed, 13 Nov 2024 19:37:21 +0300 Subject: [PATCH 45/69] Improve tests --- .../expected/arenadata_toolkit_tracking.out | 22 +++- .../arenadata_toolkit/isolation2/Makefile | 5 +- .../isolation2/expected/recovery_failover.out | 74 ++++++------ .../isolation2/expected/track_concurrent.out | 29 +++-- .../arenadata_toolkit/isolation2/init_file | 0 .../isolation2/sql/recovery_failover.sql | 109 +++++++++++++----- .../isolation2/sql/track_concurrent.sql | 32 ++++- .../sql/arenadata_toolkit_tracking.sql | 16 ++- 8 files changed, 198 insertions(+), 89 deletions(-) delete mode 100644 gpcontrib/arenadata_toolkit/isolation2/init_file diff --git a/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_tracking.out b/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_tracking.out index faeadc37efab..1f1eed6254a6 100644 --- a/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_tracking.out +++ b/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_tracking.out @@ -39,10 +39,24 @@ SELECT is_triggered FROM arenadata_toolkit.is_initial_snapshot_triggered; -- 3. If user hasn't registered any schema, the default schemas are used. -- See arenadata_toolkit_guc.c. At commit the bloom filter is cleared. The next -- track acquisition will return nothing if database is not modified in between. -SELECT count(*) FROM arenadata_toolkit.tables_track; - count -------- - 1056 +-- Test track acquisition returns the same count of tuples as pg_class when +-- initial snapshot is triggered. +WITH segment_counts AS ( + SELECT tt.segid, COUNT(*) as cnt + FROM arenadata_toolkit.tables_track tt + GROUP BY tt.segid +), +pg_class_count AS ( + SELECT COUNT(*) AS cnt FROM pg_class c JOIN pg_namespace n ON c.relnamespace = n.oid + WHERE nspname = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_schemas'), ',')) + AND c.relstorage = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_relstorages'), ',')) + AND c.relkind = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_relkinds'), ',')) +) +SELECT bool_and(sc.cnt = pc.cnt) +FROM segment_counts sc, pg_class_count pc; + bool_and +---------- + t (1 row) -- 4. Create table in specific schema and register that schema. diff --git a/gpcontrib/arenadata_toolkit/isolation2/Makefile b/gpcontrib/arenadata_toolkit/isolation2/Makefile index e088602543d2..517f0fd27f9c 100644 --- a/gpcontrib/arenadata_toolkit/isolation2/Makefile +++ b/gpcontrib/arenadata_toolkit/isolation2/Makefile @@ -3,6 +3,7 @@ top_builddir = ../../../ ISOLATION2_ROOT = $(top_builddir)/src/test/isolation2 installcheck: - cd $(ISOLATION2_ROOT) && ./pg_isolation2_regress $(EXTRA_REGRESS_OPTS) --init-file=${CURDIR}/init_file --psqldir='$(PSQLDIR)' \ + cd $(ISOLATION2_ROOT) && ./pg_isolation2_regress $(EXTRA_REGRESS_OPTS) --psqldir='$(PSQLDIR)' \ --dbname=toolkitisolation --schedule=${CURDIR}/toolkit_schedule --inputdir=${CURDIR} --outputdir=${CURDIR} \ - --load-extension=gp_inject_fault --load-extension=plpythonu \ No newline at end of file + --load-extension=gp_inject_fault --load-extension=plpythonu + \ No newline at end of file diff --git a/gpcontrib/arenadata_toolkit/isolation2/expected/recovery_failover.out b/gpcontrib/arenadata_toolkit/isolation2/expected/recovery_failover.out index 651fffae62b8..b2df7babe9bf 100644 --- a/gpcontrib/arenadata_toolkit/isolation2/expected/recovery_failover.out +++ b/gpcontrib/arenadata_toolkit/isolation2/expected/recovery_failover.out @@ -9,14 +9,17 @@ (exited with code 0) !\retcode gpconfig -c gp_gang_creation_retry_timer -v 1000 --skipvalidation --masteronly; (exited with code 0) -!\retcode gpconfig -c arenadata_toolkit.tracking_worker_naptime_sec -v '5'; -(exited with code 0) !\retcode gpstop -raq -M fast; (exited with code 0) -CREATE EXTENSION arenadata_toolkit; +CREATE EXTENSION IF NOT EXISTS arenadata_toolkit; CREATE +!\retcode gpconfig -c arenadata_toolkit.tracking_worker_naptime_sec -v '5'; +(exited with code 0) +!\retcode gpstop -u; +(exited with code 0) + SELECT pg_sleep(current_setting('arenadata_toolkit.tracking_worker_naptime_sec')::int); pg_sleep ---------- @@ -32,47 +35,52 @@ SELECT arenadata_toolkit.tracking_trigger_initial_snapshot(); ----------------------------------- t (1 row) -SELECT tt.segid, count(*) FROM arenadata_toolkit.tables_track tt GROUP BY tt.segid; - segid | count --------+------- - 1 | 264 - 0 | 264 - 2 | 264 - -1 | 264 -(4 rows) + +-- Test track acquisition returns the same count of tuples as pg_class has with +-- default filter options. +WITH segment_counts AS ( SELECT tt.segid, COUNT(*) as cnt FROM arenadata_toolkit.tables_track tt GROUP BY tt.segid ), pg_class_count AS ( SELECT COUNT(*) AS cnt FROM pg_class c JOIN pg_namespace n ON c.relnamespace = n.oid WHERE nspname = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_schemas'), ',')) AND c.relstorage = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_relstorages'), ',')) AND c.relkind = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_relkinds'), ',')) ) SELECT bool_and(sc.cnt = pc.cnt) FROM segment_counts sc, pg_class_count pc; + bool_and +---------- + t +(1 row) include: helpers/server_helpers.sql; CREATE -- Helper functions -CREATE OR REPLACE FUNCTION tracking_is_segment_initialized_master() RETURNS TABLE(segindex INT, is_initialized BOOL) LANGUAGE SQL EXECUTE ON MASTER AS $$ SELECT segindex, is_initialized FROM arenadata_toolkit.tracking_is_segment_initialized(); $$; +CREATE OR REPLACE FUNCTION tracking_is_segment_initialized_master() /* in func */ RETURNS TABLE(segindex INT, is_initialized BOOL) AS $$ /* in func */ SELECT segindex, is_initialized /* in func */ FROM arenadata_toolkit.tracking_is_segment_initialized(); /* in func */ $$ LANGUAGE SQL EXECUTE ON MASTER; CREATE -CREATE OR REPLACE FUNCTION tracking_is_segment_initialized_segments() RETURNS TABLE(segindex INT, is_initialized BOOL) LANGUAGE SQL EXECUTE ON ALL SEGMENTS AS $$ SELECT segindex, is_initialized FROM arenadata_toolkit.tracking_is_segment_initialized(); $$; +CREATE OR REPLACE FUNCTION tracking_is_segment_initialized_segments() /* in func */ RETURNS TABLE(segindex INT, is_initialized BOOL) AS $$ /* in func */ SELECT segindex, is_initialized /* in func */ FROM arenadata_toolkit.tracking_is_segment_initialized(); /* in func */ $$ LANGUAGE SQL EXECUTE ON ALL SEGMENTS; CREATE -CREATE or REPLACE FUNCTION wait_until_segments_are_down(num_segs int) RETURNS bool AS $$ declare retries int; /* in func */ begin /* in func */ retries := 1200; /* in func */ loop /* in func */ if (select count(*) = num_segs from gp_segment_configuration where status = 'd') then /* in func */ return true; /* in func */ end if; /* in func */ if retries <= 0 then /* in func */ return false; /* in func */ end if; /* in func */ perform pg_sleep(0.1); /* in func */ retries := retries - 1; /* in func */ end loop; /* in func */ end; /* in func */ $$ language plpgsql; +CREATE or REPLACE FUNCTION wait_until_segments_are_down(num_segs int) RETURNS BOOL AS $$ DECLARE retries int; /* in func */ BEGIN /* in func */ retries := 1200; /* in func */ loop /* in func */ IF (select count(*) = num_segs FROM gp_segment_configuration WHERE status = 'd') THEN /* in func */ return TRUE; /* in func */ END IF; /* in func */ IF retries <= 0 THEN /* in func */ return FALSE; /* in func */ END IF; /* in func */ perform pg_sleep(0.1); /* in func */ retries := retries - 1; /* in func */ END loop; /* in func */ END; /* in func */ $$ language plpgsql; CREATE -- no segment down. -select count(*) from gp_segment_configuration where status = 'd'; +SELECT count(*) FROM gp_segment_configuration WHERE status = 'd'; count ------- 0 (1 row) -select pg_ctl((select datadir from gp_segment_configuration c where c.role='p' and c.content=1), 'stop'); +SELECT pg_ctl((select datadir FROM gp_segment_configuration c WHERE c.role='p' AND c.content=1), 'stop'); pg_ctl -------- OK (1 row) -select wait_until_segments_are_down(1); +SELECT wait_until_segments_are_down(1); wait_until_segments_are_down ------------------------------ t (1 row) +SELECT pg_sleep(current_setting('arenadata_toolkit.tracking_worker_naptime_sec')::int); + pg_sleep +---------- + +(1 row) SELECT * FROM tracking_is_segment_initialized_master() UNION ALL SELECT * FROM tracking_is_segment_initialized_segments(); segindex | is_initialized ----------+---------------- @@ -83,22 +91,19 @@ SELECT * FROM tracking_is_segment_initialized_master() UNION ALL SELECT * FROM t (4 rows) -- Track acquisition should retrurn full snapshot from promoted mirror since --- initial snapshot is activated on recovery by deafult. -SELECT tt.segid, count(*) FROM arenadata_toolkit.tables_track tt GROUP BY tt.segid; - segid | count --------+------- - 1 | 264 - 0 | 1 - 2 | 1 - -1 | 1 -(4 rows) +-- initial snapshot is activated on recovery by default. +WITH segment_counts AS ( SELECT COUNT(*) as cnt FROM arenadata_toolkit.tables_track tt WHERE tt.segid = 1 GROUP BY tt.segid ), pg_class_count AS ( SELECT COUNT(*) AS cnt FROM pg_class c JOIN pg_namespace n ON c.relnamespace = n.oid WHERE nspname = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_schemas'), ',')) AND c.relstorage = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_relstorages'), ',')) AND c.relkind = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_relkinds'), ',')) ) SELECT bool_and(sc.cnt = pc.cnt) FROM segment_counts sc, pg_class_count pc; + bool_and +---------- + t +(1 row) -- fully recover the failed primary as new mirror !\retcode gprecoverseg -aF --no-progress; (exited with code 0) -- loop while segments come in sync -select wait_until_all_segments_synchronized(); +SELECT wait_until_all_segments_synchronized(); wait_until_all_segments_synchronized -------------------------------------- OK @@ -108,26 +113,27 @@ select wait_until_all_segments_synchronized(); (exited with code 0) -- loop while segments come in sync -select wait_until_all_segments_synchronized(); +SELECT wait_until_all_segments_synchronized(); wait_until_all_segments_synchronized -------------------------------------- OK (1 row) -- verify no segment is down after recovery -select count(*) from gp_segment_configuration where status = 'd'; +SELECT count(*) FROM gp_segment_configuration WHERE status = 'd'; count ------- 0 (1 row) -- Track should be returned only from recovered segment since --- initial snapshot is activated on recovery by deafult. -SELECT tt.segid, count(*) FROM arenadata_toolkit.tables_track tt GROUP BY tt.segid; - segid | count --------+------- - 1 | 264 +-- initial snapshot is activated on recovery by default. +WITH segment_counts AS ( SELECT COUNT(*) as cnt FROM arenadata_toolkit.tables_track tt GROUP BY tt.segid ), pg_class_count AS ( SELECT COUNT(*) AS cnt FROM pg_class c JOIN pg_namespace n ON c.relnamespace = n.oid WHERE nspname = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_schemas'), ',')) AND c.relstorage = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_relstorages'), ',')) AND c.relkind = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_relkinds'), ',')) ) SELECT bool_and(sc.cnt = pc.cnt) FROM segment_counts sc, pg_class_count pc; + bool_and +---------- + t (1 row) + SELECT arenadata_toolkit.tracking_unregister_db(); tracking_unregister_db ------------------------ diff --git a/gpcontrib/arenadata_toolkit/isolation2/expected/track_concurrent.out b/gpcontrib/arenadata_toolkit/isolation2/expected/track_concurrent.out index a5a69270f76b..5ab8b6b5e501 100644 --- a/gpcontrib/arenadata_toolkit/isolation2/expected/track_concurrent.out +++ b/gpcontrib/arenadata_toolkit/isolation2/expected/track_concurrent.out @@ -17,14 +17,11 @@ CREATE (1 row) 1: BEGIN; BEGIN -1: SELECT tt.segid, count(*) FROM arenadata_toolkit.tables_track tt GROUP BY tt.segid; - segid | count --------+------- - 1 | 264 - 0 | 264 - 2 | 264 - -1 | 264 -(4 rows) +1: WITH segment_counts AS ( SELECT tt.segid, COUNT(*) as cnt FROM arenadata_toolkit.tables_track tt GROUP BY tt.segid ), pg_class_count AS ( SELECT COUNT(*) AS cnt FROM pg_class c JOIN pg_namespace n ON c.relnamespace = n.oid WHERE nspname = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_schemas'), ',')) AND c.relstorage = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_relstorages'), ',')) AND c.relkind = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_relkinds'), ',')) ) SELECT bool_and(sc.cnt = pc.cnt) FROM segment_counts sc, pg_class_count pc; + bool_and +---------- + t +(1 row) 2: SELECT tt.segid, count(*) FROM arenadata_toolkit.tables_track tt GROUP BY tt.segid; ERROR: Track for database 24798 is being acquired in other transaction @@ -32,20 +29,20 @@ ERROR: Track for database 24798 is being acquired in other transaction 1: ROLLBACK; ROLLBACK -2: SELECT tt.segid, count(*) FROM arenadata_toolkit.tables_track tt GROUP BY tt.segid; - segid | count --------+------- - 1 | 264 - 0 | 264 - 2 | 264 - -1 | 264 -(4 rows) +2: WITH segment_counts AS ( SELECT tt.segid, COUNT(*) as cnt FROM arenadata_toolkit.tables_track tt GROUP BY tt.segid ), pg_class_count AS ( SELECT COUNT(*) AS cnt FROM pg_class c JOIN pg_namespace n ON c.relnamespace = n.oid WHERE nspname = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_schemas'), ',')) AND c.relstorage = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_relstorages'), ',')) AND c.relkind = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_relkinds'), ',')) ) SELECT bool_and(sc.cnt = pc.cnt) FROM segment_counts sc, pg_class_count pc; + bool_and +---------- + t +(1 row) 1: SELECT arenadata_toolkit.tracking_unregister_db(); tracking_unregister_db ------------------------ t (1 row) +1: DROP EXTENSION arenadata_toolkit; +DROP + 1q: ... 2q: ... diff --git a/gpcontrib/arenadata_toolkit/isolation2/init_file b/gpcontrib/arenadata_toolkit/isolation2/init_file deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/gpcontrib/arenadata_toolkit/isolation2/sql/recovery_failover.sql b/gpcontrib/arenadata_toolkit/isolation2/sql/recovery_failover.sql index a025da7f0a6e..437f2a7c78a4 100644 --- a/gpcontrib/arenadata_toolkit/isolation2/sql/recovery_failover.sql +++ b/gpcontrib/arenadata_toolkit/isolation2/sql/recovery_failover.sql @@ -5,79 +5,130 @@ -- Allow extra time for mirror promotion to complete recovery !\retcode gpconfig -c gp_gang_creation_retry_count -v 120 --skipvalidation --masteronly; !\retcode gpconfig -c gp_gang_creation_retry_timer -v 1000 --skipvalidation --masteronly; -!\retcode gpconfig -c arenadata_toolkit.tracking_worker_naptime_sec -v '5'; !\retcode gpstop -raq -M fast; -CREATE EXTENSION arenadata_toolkit; +CREATE EXTENSION IF NOT EXISTS arenadata_toolkit; + +!\retcode gpconfig -c arenadata_toolkit.tracking_worker_naptime_sec -v '5'; +!\retcode gpstop -u; SELECT pg_sleep(current_setting('arenadata_toolkit.tracking_worker_naptime_sec')::int); SELECT arenadata_toolkit.tracking_register_db(); SELECT arenadata_toolkit.tracking_trigger_initial_snapshot(); -SELECT tt.segid, count(*) FROM arenadata_toolkit.tables_track tt GROUP BY tt.segid; + +-- Test track acquisition returns the same count of tuples as pg_class has with +-- default filter options. +WITH segment_counts AS ( + SELECT tt.segid, COUNT(*) as cnt + FROM arenadata_toolkit.tables_track tt + GROUP BY tt.segid +), +pg_class_count AS ( + SELECT COUNT(*) AS cnt FROM pg_class c JOIN pg_namespace n ON c.relnamespace = n.oid + WHERE nspname = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_schemas'), ',')) + AND c.relstorage = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_relstorages'), ',')) + AND c.relkind = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_relkinds'), ',')) +) +SELECT bool_and(sc.cnt = pc.cnt) +FROM segment_counts sc, pg_class_count pc; include: helpers/server_helpers.sql; -- Helper functions -CREATE OR REPLACE FUNCTION tracking_is_segment_initialized_master() RETURNS TABLE(segindex INT, is_initialized BOOL) LANGUAGE SQL EXECUTE ON MASTER AS $$ SELECT segindex, is_initialized FROM arenadata_toolkit.tracking_is_segment_initialized(); $$; - -CREATE OR REPLACE FUNCTION tracking_is_segment_initialized_segments() RETURNS TABLE(segindex INT, is_initialized BOOL) LANGUAGE SQL EXECUTE ON ALL SEGMENTS AS $$ SELECT segindex, is_initialized FROM arenadata_toolkit.tracking_is_segment_initialized(); $$; +CREATE OR REPLACE FUNCTION tracking_is_segment_initialized_master() /* in func */ +RETURNS TABLE(segindex INT, is_initialized BOOL) AS $$ /* in func */ +SELECT segindex, is_initialized /* in func */ +FROM arenadata_toolkit.tracking_is_segment_initialized(); /* in func */ +$$ LANGUAGE SQL EXECUTE ON MASTER; + +CREATE OR REPLACE FUNCTION tracking_is_segment_initialized_segments() /* in func */ +RETURNS TABLE(segindex INT, is_initialized BOOL) AS $$ /* in func */ +SELECT segindex, is_initialized /* in func */ +FROM arenadata_toolkit.tracking_is_segment_initialized(); /* in func */ +$$ LANGUAGE SQL EXECUTE ON ALL SEGMENTS; CREATE or REPLACE FUNCTION wait_until_segments_are_down(num_segs int) -RETURNS bool AS +RETURNS BOOL AS $$ -declare +DECLARE retries int; /* in func */ -begin /* in func */ +BEGIN /* in func */ retries := 1200; /* in func */ loop /* in func */ - if (select count(*) = num_segs from gp_segment_configuration where status = 'd') then /* in func */ - return true; /* in func */ - end if; /* in func */ - if retries <= 0 then /* in func */ - return false; /* in func */ - end if; /* in func */ + IF (select count(*) = num_segs FROM gp_segment_configuration WHERE status = 'd') THEN /* in func */ + return TRUE; /* in func */ + END IF; /* in func */ + IF retries <= 0 THEN /* in func */ + return FALSE; /* in func */ + END IF; /* in func */ perform pg_sleep(0.1); /* in func */ retries := retries - 1; /* in func */ - end loop; /* in func */ -end; /* in func */ + END loop; /* in func */ +END; /* in func */ $$ language plpgsql; -- no segment down. -select count(*) from gp_segment_configuration where status = 'd'; +SELECT count(*) FROM gp_segment_configuration WHERE status = 'd'; -select pg_ctl((select datadir from gp_segment_configuration c -where c.role='p' and c.content=1), 'stop'); +SELECT pg_ctl((select datadir FROM gp_segment_configuration c +WHERE c.role='p' AND c.content=1), 'stop'); -select wait_until_segments_are_down(1); +SELECT wait_until_segments_are_down(1); +SELECT pg_sleep(current_setting('arenadata_toolkit.tracking_worker_naptime_sec')::int); SELECT * FROM tracking_is_segment_initialized_master() UNION ALL SELECT * FROM tracking_is_segment_initialized_segments(); -- Track acquisition should retrurn full snapshot from promoted mirror since --- initial snapshot is activated on recovery by deafult. -SELECT tt.segid, count(*) FROM arenadata_toolkit.tables_track tt GROUP BY tt.segid; +-- initial snapshot is activated on recovery by default. +WITH segment_counts AS ( + SELECT COUNT(*) as cnt + FROM arenadata_toolkit.tables_track tt WHERE tt.segid = 1 + GROUP BY tt.segid +), +pg_class_count AS ( + SELECT COUNT(*) AS cnt FROM pg_class c JOIN pg_namespace n ON c.relnamespace = n.oid + WHERE nspname = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_schemas'), ',')) + AND c.relstorage = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_relstorages'), ',')) + AND c.relkind = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_relkinds'), ',')) +) +SELECT bool_and(sc.cnt = pc.cnt) +FROM segment_counts sc, pg_class_count pc; -- fully recover the failed primary as new mirror !\retcode gprecoverseg -aF --no-progress; -- loop while segments come in sync -select wait_until_all_segments_synchronized(); +SELECT wait_until_all_segments_synchronized(); !\retcode gprecoverseg -ar; -- loop while segments come in sync -select wait_until_all_segments_synchronized(); +SELECT wait_until_all_segments_synchronized(); -- verify no segment is down after recovery -select count(*) from gp_segment_configuration where status = 'd'; +SELECT count(*) FROM gp_segment_configuration WHERE status = 'd'; -- Track should be returned only from recovered segment since --- initial snapshot is activated on recovery by deafult. -SELECT tt.segid, count(*) FROM arenadata_toolkit.tables_track tt GROUP BY tt.segid; +-- initial snapshot is activated on recovery by default. +WITH segment_counts AS ( + SELECT COUNT(*) as cnt + FROM arenadata_toolkit.tables_track tt + GROUP BY tt.segid +), +pg_class_count AS ( + SELECT COUNT(*) AS cnt FROM pg_class c JOIN pg_namespace n ON c.relnamespace = n.oid + WHERE nspname = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_schemas'), ',')) + AND c.relstorage = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_relstorages'), ',')) + AND c.relkind = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_relkinds'), ',')) +) +SELECT bool_and(sc.cnt = pc.cnt) +FROM segment_counts sc, pg_class_count pc; + SELECT arenadata_toolkit.tracking_unregister_db(); !\retcode gpconfig -r gp_fts_probe_retries --masteronly; !\retcode gpconfig -r gp_gang_creation_retry_count --skipvalidation --masteronly; !\retcode gpconfig -r gp_gang_creation_retry_timer --skipvalidation --masteronly; -!\retcode gpstop -u; \ No newline at end of file +!\retcode gpstop -u; diff --git a/gpcontrib/arenadata_toolkit/isolation2/sql/track_concurrent.sql b/gpcontrib/arenadata_toolkit/isolation2/sql/track_concurrent.sql index a710c780ea7d..d5e7a473194b 100644 --- a/gpcontrib/arenadata_toolkit/isolation2/sql/track_concurrent.sql +++ b/gpcontrib/arenadata_toolkit/isolation2/sql/track_concurrent.sql @@ -7,18 +7,44 @@ 1: SELECT arenadata_toolkit.tracking_register_db(); 1: SELECT arenadata_toolkit.tracking_trigger_initial_snapshot(); 1: BEGIN; -1: SELECT tt.segid, count(*) FROM arenadata_toolkit.tables_track tt GROUP BY tt.segid; +1: WITH segment_counts AS ( + SELECT tt.segid, COUNT(*) as cnt + FROM arenadata_toolkit.tables_track tt + GROUP BY tt.segid +), +pg_class_count AS ( + SELECT COUNT(*) AS cnt FROM pg_class c JOIN pg_namespace n ON c.relnamespace = n.oid + WHERE nspname = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_schemas'), ',')) + AND c.relstorage = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_relstorages'), ',')) + AND c.relkind = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_relkinds'), ',')) +) +SELECT bool_and(sc.cnt = pc.cnt) +FROM segment_counts sc, pg_class_count pc; 2: SELECT tt.segid, count(*) FROM arenadata_toolkit.tables_track tt GROUP BY tt.segid; 1: ROLLBACK; -2: SELECT tt.segid, count(*) FROM arenadata_toolkit.tables_track tt GROUP BY tt.segid; +2: WITH segment_counts AS ( + SELECT tt.segid, COUNT(*) as cnt + FROM arenadata_toolkit.tables_track tt + GROUP BY tt.segid +), +pg_class_count AS ( + SELECT COUNT(*) AS cnt FROM pg_class c JOIN pg_namespace n ON c.relnamespace = n.oid + WHERE nspname = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_schemas'), ',')) + AND c.relstorage = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_relstorages'), ',')) + AND c.relkind = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_relkinds'), ',')) +) +SELECT bool_and(sc.cnt = pc.cnt) +FROM segment_counts sc, pg_class_count pc; 1: SELECT arenadata_toolkit.tracking_unregister_db(); +1: DROP EXTENSION arenadata_toolkit; + 1q: 2q: !\retcode gpconfig -r shared_preload_libraries; !\retcode gpconfig -r arenadata_toolkit.tracking_worker_naptime_sec; -!\retcode gpstop -raq -M fast; \ No newline at end of file +!\retcode gpstop -raq -M fast; diff --git a/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_tracking.sql b/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_tracking.sql index b332bc41cb7b..346e8c7311ad 100644 --- a/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_tracking.sql +++ b/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_tracking.sql @@ -29,7 +29,21 @@ SELECT is_triggered FROM arenadata_toolkit.is_initial_snapshot_triggered; -- 3. If user hasn't registered any schema, the default schemas are used. -- See arenadata_toolkit_guc.c. At commit the bloom filter is cleared. The next -- track acquisition will return nothing if database is not modified in between. -SELECT count(*) FROM arenadata_toolkit.tables_track; +-- Test track acquisition returns the same count of tuples as pg_class when +-- initial snapshot is triggered. +WITH segment_counts AS ( + SELECT tt.segid, COUNT(*) as cnt + FROM arenadata_toolkit.tables_track tt + GROUP BY tt.segid +), +pg_class_count AS ( + SELECT COUNT(*) AS cnt FROM pg_class c JOIN pg_namespace n ON c.relnamespace = n.oid + WHERE nspname = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_schemas'), ',')) + AND c.relstorage = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_relstorages'), ',')) + AND c.relkind = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_relkinds'), ',')) +) +SELECT bool_and(sc.cnt = pc.cnt) +FROM segment_counts sc, pg_class_count pc; -- 4. Create table in specific schema and register that schema. CREATE TABLE arenadata_toolkit.tracking_t1 (i INT) From 857c2456a2abdd65e3c9f4dbfc059f2b878f1412 Mon Sep 17 00:00:00 2001 From: Alexander Kondakov Date: Wed, 13 Nov 2024 19:37:50 +0300 Subject: [PATCH 46/69] Remove error_flag --- gpcontrib/arenadata_toolkit/src/include/tf_shmem.h | 4 ---- gpcontrib/arenadata_toolkit/src/tf_shmem.c | 1 - gpcontrib/arenadata_toolkit/src/track_files.c | 4 ---- 3 files changed, 9 deletions(-) diff --git a/gpcontrib/arenadata_toolkit/src/include/tf_shmem.h b/gpcontrib/arenadata_toolkit/src/include/tf_shmem.h index 603fd7d4505e..0f3d3eaf49b1 100644 --- a/gpcontrib/arenadata_toolkit/src/include/tf_shmem.h +++ b/gpcontrib/arenadata_toolkit/src/include/tf_shmem.h @@ -11,15 +11,11 @@ * tracking_is_initialized - a flag indicating * bgworker bound dbids at startup/recovery. * - * tracking_error - sign of any internal error. If set, - * blocks the work of track_getting procedure until cluster restart. - * * bloom_set - set of db_track_count Bloom filters. */ typedef struct { pg_atomic_flag tracking_is_initialized; - pg_atomic_flag tracking_error; bloom_set_t bloom_set; } tf_shared_state_t; diff --git a/gpcontrib/arenadata_toolkit/src/tf_shmem.c b/gpcontrib/arenadata_toolkit/src/tf_shmem.c index b5fd78214263..8286af2319fb 100644 --- a/gpcontrib/arenadata_toolkit/src/tf_shmem.c +++ b/gpcontrib/arenadata_toolkit/src/tf_shmem.c @@ -35,7 +35,6 @@ tf_shmem_hook(void) if (!found) { pg_atomic_init_flag(&tf_shared_state->tracking_is_initialized); - pg_atomic_init_flag(&tf_shared_state->tracking_error); bloom_set_init(db_track_count, bloom_size); } diff --git a/gpcontrib/arenadata_toolkit/src/track_files.c b/gpcontrib/arenadata_toolkit/src/track_files.c index 4d1af7d5bb7b..9a222ca60d3a 100644 --- a/gpcontrib/arenadata_toolkit/src/track_files.c +++ b/gpcontrib/arenadata_toolkit/src/track_files.c @@ -353,10 +353,6 @@ tracking_get_track(PG_FUNCTION_ARGS) tf_check_shmem_error(); - if (!pg_atomic_unlocked_test_flag(&tf_shared_state->tracking_error)) - ereport(ERROR, - (errmsg("Can't perform tracking for database %u properly due to internal error", MyDatabaseId))); - if (SRF_IS_FIRSTCALL()) { MemoryContext oldcontext; From a694039538661b324c10f34640f8981326a5d9a0 Mon Sep 17 00:00:00 2001 From: Alexander Kondakov Date: Wed, 13 Nov 2024 21:29:33 +0300 Subject: [PATCH 47/69] Fix explain case --- gpcontrib/arenadata_toolkit/src/track_files.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/gpcontrib/arenadata_toolkit/src/track_files.c b/gpcontrib/arenadata_toolkit/src/track_files.c index 9a222ca60d3a..3cbcb59ff98c 100644 --- a/gpcontrib/arenadata_toolkit/src/track_files.c +++ b/gpcontrib/arenadata_toolkit/src/track_files.c @@ -141,6 +141,7 @@ xact_end_version_callback(XactEvent event, void *arg) local_xid = InvalidTransactionId; callbackRegistered = false; controlVersionUsed = false; + isExecutorExplainMode = false; } static void @@ -1347,8 +1348,6 @@ explain_detector_ProcessUtility(Node *parsetree, DestReceiver *dest, char *completionTag) { - isExecutorExplainMode = false; - if (IsA(parsetree, ExplainStmt)) { ExplainStmt *stmt = (ExplainStmt *) parsetree; @@ -1359,6 +1358,8 @@ explain_detector_ProcessUtility(Node *parsetree, if (next_ProcessUtility_hook) next_ProcessUtility_hook(parsetree, queryString, context, params, dest, completionTag); + + isExecutorExplainMode = false; } void From d38a4615cc37e805d566aa7fd00e91ecda19ac22 Mon Sep 17 00:00:00 2001 From: Alexander Kondakov Date: Wed, 13 Nov 2024 21:34:50 +0300 Subject: [PATCH 48/69] Take the error back in --- gpcontrib/arenadata_toolkit/src/track_files.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/gpcontrib/arenadata_toolkit/src/track_files.c b/gpcontrib/arenadata_toolkit/src/track_files.c index 3cbcb59ff98c..ecc6a7f1fc81 100644 --- a/gpcontrib/arenadata_toolkit/src/track_files.c +++ b/gpcontrib/arenadata_toolkit/src/track_files.c @@ -354,6 +354,10 @@ tracking_get_track(PG_FUNCTION_ARGS) tf_check_shmem_error(); + if (version == InvalidVersion) + ereport(ERROR, + (errmsg("Can't perform tracking for database %u properly due to internal error", MyDatabaseId))); + if (SRF_IS_FIRSTCALL()) { MemoryContext oldcontext; From 8c3979472e7cc96834291703bfe9322304eedcfc Mon Sep 17 00:00:00 2001 From: Alexander Kondakov Date: Thu, 14 Nov 2024 09:13:40 +0300 Subject: [PATCH 49/69] Typos --- gpcontrib/arenadata_toolkit/README.md | 5 +++++ .../expected/arenadata_toolkit_tracking.out | 2 +- gpcontrib/arenadata_toolkit/isolation2/Makefile | 1 - .../isolation2/expected/recovery_failover.out | 6 +++--- .../isolation2/expected/track_concurrent.out | 4 ++-- .../arenadata_toolkit/isolation2/sql/recovery_failover.sql | 6 +++--- .../arenadata_toolkit/isolation2/sql/track_concurrent.sql | 4 ++-- .../arenadata_toolkit/sql/arenadata_toolkit_tracking.sql | 2 +- 8 files changed, 17 insertions(+), 13 deletions(-) diff --git a/gpcontrib/arenadata_toolkit/README.md b/gpcontrib/arenadata_toolkit/README.md index ae80160a5ee4..1f7b8c0ccfca 100644 --- a/gpcontrib/arenadata_toolkit/README.md +++ b/gpcontrib/arenadata_toolkit/README.md @@ -33,6 +33,11 @@ In order to get the snapshot of all database relations you should call in the da arenadata_toolkit.tracking_trigger_initial_snapshot(); ``` + +***Attention***: Acquiring size track from parallel sessions is not recommended, since there is the only +instance of Bloom filter for a database. I.e. track acquisition can return whole accumulated relation set +in one session, and empty set for acquisition from the second session (the first session acquired data earlier). + #### Choosing optimal Bloom size Choosing the optimal Bloom filter size is crucial for balancing memory usage and accuracy. diff --git a/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_tracking.out b/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_tracking.out index 1f1eed6254a6..08874f7a0c60 100644 --- a/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_tracking.out +++ b/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_tracking.out @@ -42,7 +42,7 @@ SELECT is_triggered FROM arenadata_toolkit.is_initial_snapshot_triggered; -- Test track acquisition returns the same count of tuples as pg_class when -- initial snapshot is triggered. WITH segment_counts AS ( - SELECT tt.segid, COUNT(*) as cnt + SELECT tt.segid, COUNT(*) AS cnt FROM arenadata_toolkit.tables_track tt GROUP BY tt.segid ), diff --git a/gpcontrib/arenadata_toolkit/isolation2/Makefile b/gpcontrib/arenadata_toolkit/isolation2/Makefile index 517f0fd27f9c..fc0a73dc060c 100644 --- a/gpcontrib/arenadata_toolkit/isolation2/Makefile +++ b/gpcontrib/arenadata_toolkit/isolation2/Makefile @@ -6,4 +6,3 @@ installcheck: cd $(ISOLATION2_ROOT) && ./pg_isolation2_regress $(EXTRA_REGRESS_OPTS) --psqldir='$(PSQLDIR)' \ --dbname=toolkitisolation --schedule=${CURDIR}/toolkit_schedule --inputdir=${CURDIR} --outputdir=${CURDIR} \ --load-extension=gp_inject_fault --load-extension=plpythonu - \ No newline at end of file diff --git a/gpcontrib/arenadata_toolkit/isolation2/expected/recovery_failover.out b/gpcontrib/arenadata_toolkit/isolation2/expected/recovery_failover.out index b2df7babe9bf..01d5863ba2c5 100644 --- a/gpcontrib/arenadata_toolkit/isolation2/expected/recovery_failover.out +++ b/gpcontrib/arenadata_toolkit/isolation2/expected/recovery_failover.out @@ -38,7 +38,7 @@ SELECT arenadata_toolkit.tracking_trigger_initial_snapshot(); -- Test track acquisition returns the same count of tuples as pg_class has with -- default filter options. -WITH segment_counts AS ( SELECT tt.segid, COUNT(*) as cnt FROM arenadata_toolkit.tables_track tt GROUP BY tt.segid ), pg_class_count AS ( SELECT COUNT(*) AS cnt FROM pg_class c JOIN pg_namespace n ON c.relnamespace = n.oid WHERE nspname = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_schemas'), ',')) AND c.relstorage = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_relstorages'), ',')) AND c.relkind = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_relkinds'), ',')) ) SELECT bool_and(sc.cnt = pc.cnt) FROM segment_counts sc, pg_class_count pc; +WITH segment_counts AS ( SELECT tt.segid, COUNT(*) AS cnt FROM arenadata_toolkit.tables_track tt GROUP BY tt.segid ), pg_class_count AS ( SELECT COUNT(*) AS cnt FROM pg_class c JOIN pg_namespace n ON c.relnamespace = n.oid WHERE nspname = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_schemas'), ',')) AND c.relstorage = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_relstorages'), ',')) AND c.relkind = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_relkinds'), ',')) ) SELECT bool_and(sc.cnt = pc.cnt) FROM segment_counts sc, pg_class_count pc; bool_and ---------- t @@ -92,7 +92,7 @@ SELECT * FROM tracking_is_segment_initialized_master() UNION ALL SELECT * FROM t -- Track acquisition should retrurn full snapshot from promoted mirror since -- initial snapshot is activated on recovery by default. -WITH segment_counts AS ( SELECT COUNT(*) as cnt FROM arenadata_toolkit.tables_track tt WHERE tt.segid = 1 GROUP BY tt.segid ), pg_class_count AS ( SELECT COUNT(*) AS cnt FROM pg_class c JOIN pg_namespace n ON c.relnamespace = n.oid WHERE nspname = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_schemas'), ',')) AND c.relstorage = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_relstorages'), ',')) AND c.relkind = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_relkinds'), ',')) ) SELECT bool_and(sc.cnt = pc.cnt) FROM segment_counts sc, pg_class_count pc; +WITH segment_counts AS ( SELECT COUNT(*) AS cnt FROM arenadata_toolkit.tables_track tt WHERE tt.segid = 1 GROUP BY tt.segid ), pg_class_count AS ( SELECT COUNT(*) AS cnt FROM pg_class c JOIN pg_namespace n ON c.relnamespace = n.oid WHERE nspname = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_schemas'), ',')) AND c.relstorage = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_relstorages'), ',')) AND c.relkind = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_relkinds'), ',')) ) SELECT bool_and(sc.cnt = pc.cnt) FROM segment_counts sc, pg_class_count pc; bool_and ---------- t @@ -128,7 +128,7 @@ SELECT count(*) FROM gp_segment_configuration WHERE status = 'd'; -- Track should be returned only from recovered segment since -- initial snapshot is activated on recovery by default. -WITH segment_counts AS ( SELECT COUNT(*) as cnt FROM arenadata_toolkit.tables_track tt GROUP BY tt.segid ), pg_class_count AS ( SELECT COUNT(*) AS cnt FROM pg_class c JOIN pg_namespace n ON c.relnamespace = n.oid WHERE nspname = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_schemas'), ',')) AND c.relstorage = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_relstorages'), ',')) AND c.relkind = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_relkinds'), ',')) ) SELECT bool_and(sc.cnt = pc.cnt) FROM segment_counts sc, pg_class_count pc; +WITH segment_counts AS ( SELECT COUNT(*) AS cnt FROM arenadata_toolkit.tables_track tt GROUP BY tt.segid ), pg_class_count AS ( SELECT COUNT(*) AS cnt FROM pg_class c JOIN pg_namespace n ON c.relnamespace = n.oid WHERE nspname = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_schemas'), ',')) AND c.relstorage = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_relstorages'), ',')) AND c.relkind = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_relkinds'), ',')) ) SELECT bool_and(sc.cnt = pc.cnt) FROM segment_counts sc, pg_class_count pc; bool_and ---------- t diff --git a/gpcontrib/arenadata_toolkit/isolation2/expected/track_concurrent.out b/gpcontrib/arenadata_toolkit/isolation2/expected/track_concurrent.out index 5ab8b6b5e501..4285f4256b98 100644 --- a/gpcontrib/arenadata_toolkit/isolation2/expected/track_concurrent.out +++ b/gpcontrib/arenadata_toolkit/isolation2/expected/track_concurrent.out @@ -17,7 +17,7 @@ CREATE (1 row) 1: BEGIN; BEGIN -1: WITH segment_counts AS ( SELECT tt.segid, COUNT(*) as cnt FROM arenadata_toolkit.tables_track tt GROUP BY tt.segid ), pg_class_count AS ( SELECT COUNT(*) AS cnt FROM pg_class c JOIN pg_namespace n ON c.relnamespace = n.oid WHERE nspname = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_schemas'), ',')) AND c.relstorage = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_relstorages'), ',')) AND c.relkind = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_relkinds'), ',')) ) SELECT bool_and(sc.cnt = pc.cnt) FROM segment_counts sc, pg_class_count pc; +1: WITH segment_counts AS ( SELECT tt.segid, COUNT(*) AS cnt FROM arenadata_toolkit.tables_track tt GROUP BY tt.segid ), pg_class_count AS ( SELECT COUNT(*) AS cnt FROM pg_class c JOIN pg_namespace n ON c.relnamespace = n.oid WHERE nspname = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_schemas'), ',')) AND c.relstorage = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_relstorages'), ',')) AND c.relkind = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_relkinds'), ',')) ) SELECT bool_and(sc.cnt = pc.cnt) FROM segment_counts sc, pg_class_count pc; bool_and ---------- t @@ -29,7 +29,7 @@ ERROR: Track for database 24798 is being acquired in other transaction 1: ROLLBACK; ROLLBACK -2: WITH segment_counts AS ( SELECT tt.segid, COUNT(*) as cnt FROM arenadata_toolkit.tables_track tt GROUP BY tt.segid ), pg_class_count AS ( SELECT COUNT(*) AS cnt FROM pg_class c JOIN pg_namespace n ON c.relnamespace = n.oid WHERE nspname = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_schemas'), ',')) AND c.relstorage = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_relstorages'), ',')) AND c.relkind = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_relkinds'), ',')) ) SELECT bool_and(sc.cnt = pc.cnt) FROM segment_counts sc, pg_class_count pc; +2: WITH segment_counts AS ( SELECT tt.segid, COUNT(*) AS cnt FROM arenadata_toolkit.tables_track tt GROUP BY tt.segid ), pg_class_count AS ( SELECT COUNT(*) AS cnt FROM pg_class c JOIN pg_namespace n ON c.relnamespace = n.oid WHERE nspname = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_schemas'), ',')) AND c.relstorage = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_relstorages'), ',')) AND c.relkind = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_relkinds'), ',')) ) SELECT bool_and(sc.cnt = pc.cnt) FROM segment_counts sc, pg_class_count pc; bool_and ---------- t diff --git a/gpcontrib/arenadata_toolkit/isolation2/sql/recovery_failover.sql b/gpcontrib/arenadata_toolkit/isolation2/sql/recovery_failover.sql index 437f2a7c78a4..1edc6d382115 100644 --- a/gpcontrib/arenadata_toolkit/isolation2/sql/recovery_failover.sql +++ b/gpcontrib/arenadata_toolkit/isolation2/sql/recovery_failover.sql @@ -19,7 +19,7 @@ SELECT arenadata_toolkit.tracking_trigger_initial_snapshot(); -- Test track acquisition returns the same count of tuples as pg_class has with -- default filter options. WITH segment_counts AS ( - SELECT tt.segid, COUNT(*) as cnt + SELECT tt.segid, COUNT(*) AS cnt FROM arenadata_toolkit.tables_track tt GROUP BY tt.segid ), @@ -83,7 +83,7 @@ SELECT * FROM tracking_is_segment_initialized_segments(); -- Track acquisition should retrurn full snapshot from promoted mirror since -- initial snapshot is activated on recovery by default. WITH segment_counts AS ( - SELECT COUNT(*) as cnt + SELECT COUNT(*) AS cnt FROM arenadata_toolkit.tables_track tt WHERE tt.segid = 1 GROUP BY tt.segid ), @@ -113,7 +113,7 @@ SELECT count(*) FROM gp_segment_configuration WHERE status = 'd'; -- Track should be returned only from recovered segment since -- initial snapshot is activated on recovery by default. WITH segment_counts AS ( - SELECT COUNT(*) as cnt + SELECT COUNT(*) AS cnt FROM arenadata_toolkit.tables_track tt GROUP BY tt.segid ), diff --git a/gpcontrib/arenadata_toolkit/isolation2/sql/track_concurrent.sql b/gpcontrib/arenadata_toolkit/isolation2/sql/track_concurrent.sql index d5e7a473194b..efef94ef50b7 100644 --- a/gpcontrib/arenadata_toolkit/isolation2/sql/track_concurrent.sql +++ b/gpcontrib/arenadata_toolkit/isolation2/sql/track_concurrent.sql @@ -8,7 +8,7 @@ 1: SELECT arenadata_toolkit.tracking_trigger_initial_snapshot(); 1: BEGIN; 1: WITH segment_counts AS ( - SELECT tt.segid, COUNT(*) as cnt + SELECT tt.segid, COUNT(*) AS cnt FROM arenadata_toolkit.tables_track tt GROUP BY tt.segid ), @@ -26,7 +26,7 @@ FROM segment_counts sc, pg_class_count pc; 1: ROLLBACK; 2: WITH segment_counts AS ( - SELECT tt.segid, COUNT(*) as cnt + SELECT tt.segid, COUNT(*) AS cnt FROM arenadata_toolkit.tables_track tt GROUP BY tt.segid ), diff --git a/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_tracking.sql b/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_tracking.sql index 346e8c7311ad..3324ff618747 100644 --- a/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_tracking.sql +++ b/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_tracking.sql @@ -32,7 +32,7 @@ SELECT is_triggered FROM arenadata_toolkit.is_initial_snapshot_triggered; -- Test track acquisition returns the same count of tuples as pg_class when -- initial snapshot is triggered. WITH segment_counts AS ( - SELECT tt.segid, COUNT(*) as cnt + SELECT tt.segid, COUNT(*) AS cnt FROM arenadata_toolkit.tables_track tt GROUP BY tt.segid ), From cab839b1553bd1fe5b1d3fd6508cc2029bdbb35f Mon Sep 17 00:00:00 2001 From: Alexander Kondakov Date: Thu, 14 Nov 2024 09:50:43 +0300 Subject: [PATCH 50/69] return typo --- .../arenadata_toolkit/isolation2/expected/recovery_failover.out | 2 +- .../arenadata_toolkit/isolation2/sql/recovery_failover.sql | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/gpcontrib/arenadata_toolkit/isolation2/expected/recovery_failover.out b/gpcontrib/arenadata_toolkit/isolation2/expected/recovery_failover.out index 01d5863ba2c5..4ef197fd9f1a 100644 --- a/gpcontrib/arenadata_toolkit/isolation2/expected/recovery_failover.out +++ b/gpcontrib/arenadata_toolkit/isolation2/expected/recovery_failover.out @@ -90,7 +90,7 @@ SELECT * FROM tracking_is_segment_initialized_master() UNION ALL SELECT * FROM t 2 | t (4 rows) --- Track acquisition should retrurn full snapshot from promoted mirror since +-- Track acquisition should return full snapshot from promoted mirror since -- initial snapshot is activated on recovery by default. WITH segment_counts AS ( SELECT COUNT(*) AS cnt FROM arenadata_toolkit.tables_track tt WHERE tt.segid = 1 GROUP BY tt.segid ), pg_class_count AS ( SELECT COUNT(*) AS cnt FROM pg_class c JOIN pg_namespace n ON c.relnamespace = n.oid WHERE nspname = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_schemas'), ',')) AND c.relstorage = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_relstorages'), ',')) AND c.relkind = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_relkinds'), ',')) ) SELECT bool_and(sc.cnt = pc.cnt) FROM segment_counts sc, pg_class_count pc; bool_and diff --git a/gpcontrib/arenadata_toolkit/isolation2/sql/recovery_failover.sql b/gpcontrib/arenadata_toolkit/isolation2/sql/recovery_failover.sql index 1edc6d382115..5d7d957df836 100644 --- a/gpcontrib/arenadata_toolkit/isolation2/sql/recovery_failover.sql +++ b/gpcontrib/arenadata_toolkit/isolation2/sql/recovery_failover.sql @@ -80,7 +80,7 @@ SELECT * FROM tracking_is_segment_initialized_master() UNION ALL SELECT * FROM tracking_is_segment_initialized_segments(); --- Track acquisition should retrurn full snapshot from promoted mirror since +-- Track acquisition should return full snapshot from promoted mirror since -- initial snapshot is activated on recovery by default. WITH segment_counts AS ( SELECT COUNT(*) AS cnt From 9483c06b6966b83266eda20fdb993a8856449d19 Mon Sep 17 00:00:00 2001 From: Alexander Kondakov Date: Sun, 17 Nov 2024 12:23:35 +0300 Subject: [PATCH 51/69] Remove unnecessary functions and change priveleges --- .../arenadata_toolkit--1.6--1.7.sql | 36 ++++++------ .../expected/arenadata_toolkit_test.out | 58 ++++++++----------- gpcontrib/arenadata_toolkit/src/track_files.c | 29 +++------- 3 files changed, 49 insertions(+), 74 deletions(-) diff --git a/gpcontrib/arenadata_toolkit/arenadata_toolkit--1.6--1.7.sql b/gpcontrib/arenadata_toolkit/arenadata_toolkit--1.6--1.7.sql index a786f29c2c19..f40c7c811bd2 100644 --- a/gpcontrib/arenadata_toolkit/arenadata_toolkit--1.6--1.7.sql +++ b/gpcontrib/arenadata_toolkit/arenadata_toolkit--1.6--1.7.sql @@ -6,12 +6,6 @@ returns BOOL AS '$libdir/arenadata_toolkit', REVOKE ALL ON FUNCTION arenadata_toolkit.tracking_register_db(dbid OID) FROM public; -CREATE FUNCTION arenadata_toolkit.tracking_register_db_main(reg BOOL, dbid OID DEFAULT 0) -returns BOOL AS '$libdir/arenadata_toolkit', -'tracking_register_db_main' LANGUAGE C; - -REVOKE ALL ON FUNCTION arenadata_toolkit.tracking_register_db_main(reg BOOL, dbid OID) FROM public; - CREATE FUNCTION arenadata_toolkit.tracking_unregister_db(dbid OID DEFAULT 0) returns BOOL AS '$libdir/arenadata_toolkit', 'tracking_unregister_db' LANGUAGE C EXECUTE ON MASTER; @@ -52,20 +46,20 @@ CREATE FUNCTION arenadata_toolkit.tracking_trigger_initial_snapshot(dbid OID DEF returns BOOL AS '$libdir/arenadata_toolkit', 'tracking_trigger_initial_snapshot' LANGUAGE C; -CREATE FUNCTION arenadata_toolkit.tracking_is_initial_snapshot_triggered(dbid OID DEFAULT 0) -returns BOOL AS '$libdir/arenadata_toolkit', -'tracking_is_initial_snapshot_triggered' LANGUAGE C; - -REVOKE ALL ON FUNCTION arenadata_toolkit.tracking_is_initial_snapshot_triggered(dbid OID) FROM public; +GRANT EXECUTE ON FUNCTION arenadata_toolkit.tracking_trigger_initial_snapshot(dbid OID) TO public; CREATE FUNCTION arenadata_toolkit.tracking_is_initial_snapshot_triggered_master(dbid OID DEFAULT 0) returns BOOL AS '$libdir/arenadata_toolkit', 'tracking_is_initial_snapshot_triggered' LANGUAGE C EXECUTE ON master; +GRANT EXECUTE ON FUNCTION arenadata_toolkit.tracking_is_initial_snapshot_triggered_master(dbid OID) TO public; + CREATE FUNCTION arenadata_toolkit.tracking_is_initial_snapshot_triggered_segments(dbid OID DEFAULT 0) returns BOOL AS '$libdir/arenadata_toolkit', 'tracking_is_initial_snapshot_triggered' LANGUAGE C EXECUTE ON ALL segments; +GRANT EXECUTE ON FUNCTION arenadata_toolkit.tracking_is_initial_snapshot_triggered_segments(dbid OID) TO public; + CREATE FUNCTION arenadata_toolkit.tracking_is_segment_initialized() returns TABLE(segindex INT, is_initialized BOOL) AS '$libdir/arenadata_toolkit', 'tracking_is_segment_initialized' LANGUAGE C; @@ -76,21 +70,22 @@ CREATE FUNCTION arenadata_toolkit.tracking_track_version() returns BIGINT AS '$libdir/arenadata_toolkit', 'tracking_track_version' LANGUAGE C STABLE EXECUTE ON MASTER; -REVOKE ALL ON FUNCTION arenadata_toolkit.tracking_track_version() FROM public; +-- Shouldn't be called explicitly +GRANT EXECUTE ON FUNCTION arenadata_toolkit.tracking_track_version() TO public; CREATE FUNCTION arenadata_toolkit.tracking_get_track_master(version BIGINT) RETURNS TABLE(relid OID, relname NAME, relfilenode OID, size BIGINT, state "char", segid INT, relnamespace OID, relkind "char", relstorage "char") AS '$libdir/arenadata_toolkit', 'tracking_get_track' LANGUAGE C EXECUTE ON MASTER; -REVOKE ALL ON FUNCTION arenadata_toolkit.tracking_get_track_master(version BIGINT) FROM public; +GRANT EXECUTE ON FUNCTION arenadata_toolkit.tracking_get_track_master(version BIGINT) TO public; CREATE FUNCTION arenadata_toolkit.tracking_get_track_segments(version BIGINT) RETURNS TABLE(relid OID, relname NAME, relfilenode OID, size BIGINT, state "char", segid INT, relnamespace OID, relkind "char", relstorage "char") AS '$libdir/arenadata_toolkit', 'tracking_get_track' LANGUAGE C EXECUTE ON ALL SEGMENTS; -REVOKE ALL ON FUNCTION arenadata_toolkit.tracking_get_track_segments(version BIGINT) FROM public; +GRANT EXECUTE ON FUNCTION arenadata_toolkit.tracking_get_track_segments(version BIGINT) TO public; CREATE VIEW arenadata_toolkit.tables_track AS SELECT t.*, coalesce(c.oid, i.indrelid, vm.relid, blk.relid, seg.relid) AS parent_relid @@ -119,9 +114,12 @@ LEFT JOIN pg_catalog.pg_appendonly AS blk LEFT JOIN pg_catalog.pg_appendonly AS seg ON seg.segrelid = t.relid AND t.relkind = 'o'; +GRANT SELECT ON arenadata_toolkit.tables_track TO public; + CREATE VIEW arenadata_toolkit.is_initial_snapshot_triggered AS -SELECT CASE - WHEN TRUE = ALL(select arenadata_toolkit.tracking_is_initial_snapshot_triggered_segments()) - AND - arenadata_toolkit.tracking_is_initial_snapshot_triggered_master() - THEN 1 ELSE NULL END AS is_triggered; +SELECT CASE +WHEN TRUE = ALL(select arenadata_toolkit.tracking_is_initial_snapshot_triggered_segments()) +AND arenadata_toolkit.tracking_is_initial_snapshot_triggered_master() +THEN 1 ELSE NULL END AS is_triggered; + +GRANT SELECT ON arenadata_toolkit.is_initial_snapshot_triggered TO public; diff --git a/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_test.out b/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_test.out index 69bc19edb28d..8a27d8a7b9d4 100644 --- a/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_test.out +++ b/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_test.out @@ -108,30 +108,28 @@ SELECT objname, objtype, objstorage, objacl FROM toolkit_objects_info ORDER BY o db_files_current | table | h | {owner=arwdDxt/owner,=r/owner} db_files_history | table | a | {owner=arwdDxt/owner} db_files_history_1_prt_default_part | table | a | {owner=arwdDxt/owner} - db_files_history_1_prt_pYYYYMM | table | a | {owner=arwdDxt/owner} - db_files_history_backup_YYYYMMDDtHHMMSS | table | a | - db_files_history_backup_YYYYMMDDtHHMMSS_1_prt_default_part | table | a | - db_files_history_backup_YYYYMMDDtHHMMSS_1_prt_pYYYYMM | table | a | - is_initial_snapshot_triggered | table | v | + db_files_history_1_prt_p202411 | table | a | {owner=arwdDxt/owner} + db_files_history_backup_20241117t005023 | table | a | + db_files_history_backup_20241117t005023_1_prt_default_part | table | a | + db_files_history_backup_20241117t005023_1_prt_p202411 | table | a | + is_initial_snapshot_triggered | table | v | {owner=arwdDxt/owner,=r/owner} operation_exclude | table | a | - tables_track | table | v | - tracking_get_track_master | proc | - | {owner=X/owner} - tracking_get_track_segments | proc | - | {owner=X/owner} - tracking_is_initial_snapshot_triggered | proc | - | {owner=X/owner} - tracking_is_initial_snapshot_triggered_master | proc | - | - tracking_is_initial_snapshot_triggered_segments | proc | - | + tables_track | table | v | {owner=arwdDxt/owner,=r/owner} + tracking_get_track_master | proc | - | {=X/owner,owner=X/owner} + tracking_get_track_segments | proc | - | {=X/owner,owner=X/owner} + tracking_is_initial_snapshot_triggered_master | proc | - | {=X/owner,owner=X/owner} + tracking_is_initial_snapshot_triggered_segments | proc | - | {=X/owner,owner=X/owner} tracking_is_segment_initialized | proc | - | {owner=X/owner} tracking_register_db | proc | - | {owner=X/owner} - tracking_register_db_main | proc | - | {owner=X/owner} tracking_register_schema | proc | - | {owner=X/owner} tracking_set_relkinds | proc | - | {owner=X/owner} tracking_set_relstorages | proc | - | {owner=X/owner} tracking_set_snapshot_on_recovery | proc | - | {owner=X/owner} - tracking_track_version | proc | - | {owner=X/owner} - tracking_trigger_initial_snapshot | proc | - | + tracking_track_version | proc | - | {=X/owner,owner=X/owner} + tracking_trigger_initial_snapshot | proc | - | {=X/owner,owner=X/owner} tracking_unregister_db | proc | - | {owner=X/owner} tracking_unregister_schema | proc | - | {owner=X/owner} -(42 rows) +(40 rows) -- check that toolkit objects now depends on extension SELECT objname, objtype, extname, deptype FROM pg_depend d JOIN @@ -158,12 +156,10 @@ WHERE d.deptype = 'e' AND e.extname = 'arenadata_toolkit' ORDER BY objname; tables_track | table | arenadata_toolkit | e tracking_get_track_master | proc | arenadata_toolkit | e tracking_get_track_segments | proc | arenadata_toolkit | e - tracking_is_initial_snapshot_triggered | proc | arenadata_toolkit | e tracking_is_initial_snapshot_triggered_master | proc | arenadata_toolkit | e tracking_is_initial_snapshot_triggered_segments | proc | arenadata_toolkit | e tracking_is_segment_initialized | proc | arenadata_toolkit | e tracking_register_db | proc | arenadata_toolkit | e - tracking_register_db_main | proc | arenadata_toolkit | e tracking_register_schema | proc | arenadata_toolkit | e tracking_set_relkinds | proc | arenadata_toolkit | e tracking_set_relstorages | proc | arenadata_toolkit | e @@ -172,7 +168,7 @@ WHERE d.deptype = 'e' AND e.extname = 'arenadata_toolkit' ORDER BY objname; tracking_trigger_initial_snapshot | proc | arenadata_toolkit | e tracking_unregister_db | proc | arenadata_toolkit | e tracking_unregister_schema | proc | arenadata_toolkit | e -(32 rows) +(30 rows) DROP EXTENSION arenadata_toolkit; DROP SCHEMA arenadata_toolkit CASCADE; @@ -210,27 +206,25 @@ SELECT objname, objtype, objstorage, objacl FROM toolkit_objects_info ORDER BY o db_files_current | table | h | {owner=arwdDxt/owner,=r/owner} db_files_history | table | a | {owner=arwdDxt/owner} db_files_history_1_prt_default_part | table | a | {owner=arwdDxt/owner} - db_files_history_1_prt_p202410 | table | a | {owner=arwdDxt/owner} - is_initial_snapshot_triggered | table | v | + db_files_history_1_prt_p202411 | table | a | {owner=arwdDxt/owner} + is_initial_snapshot_triggered | table | v | {owner=arwdDxt/owner,=r/owner} operation_exclude | table | a | {owner=arwdDxt/owner} - tables_track | table | v | - tracking_get_track_master | proc | - | {owner=X/owner} - tracking_get_track_segments | proc | - | {owner=X/owner} - tracking_is_initial_snapshot_triggered | proc | - | {owner=X/owner} - tracking_is_initial_snapshot_triggered_master | proc | - | - tracking_is_initial_snapshot_triggered_segments | proc | - | + tables_track | table | v | {owner=arwdDxt/owner,=r/owner} + tracking_get_track_master | proc | - | {=X/owner,owner=X/owner} + tracking_get_track_segments | proc | - | {=X/owner,owner=X/owner} + tracking_is_initial_snapshot_triggered_master | proc | - | {=X/owner,owner=X/owner} + tracking_is_initial_snapshot_triggered_segments | proc | - | {=X/owner,owner=X/owner} tracking_is_segment_initialized | proc | - | {owner=X/owner} tracking_register_db | proc | - | {owner=X/owner} - tracking_register_db_main | proc | - | {owner=X/owner} tracking_register_schema | proc | - | {owner=X/owner} tracking_set_relkinds | proc | - | {owner=X/owner} tracking_set_relstorages | proc | - | {owner=X/owner} tracking_set_snapshot_on_recovery | proc | - | {owner=X/owner} - tracking_track_version | proc | - | {owner=X/owner} - tracking_trigger_initial_snapshot | proc | - | + tracking_track_version | proc | - | {=X/owner,owner=X/owner} + tracking_trigger_initial_snapshot | proc | - | {=X/owner,owner=X/owner} tracking_unregister_db | proc | - | {owner=X/owner} tracking_unregister_schema | proc | - | {owner=X/owner} -(39 rows) +(37 rows) -- check that toolkit objects now depends on extension SELECT objname, objtype, extname, deptype FROM pg_depend d JOIN @@ -257,12 +251,10 @@ WHERE d.deptype = 'e' AND e.extname = 'arenadata_toolkit' ORDER BY objname; tables_track | table | arenadata_toolkit | e tracking_get_track_master | proc | arenadata_toolkit | e tracking_get_track_segments | proc | arenadata_toolkit | e - tracking_is_initial_snapshot_triggered | proc | arenadata_toolkit | e tracking_is_initial_snapshot_triggered_master | proc | arenadata_toolkit | e tracking_is_initial_snapshot_triggered_segments | proc | arenadata_toolkit | e tracking_is_segment_initialized | proc | arenadata_toolkit | e tracking_register_db | proc | arenadata_toolkit | e - tracking_register_db_main | proc | arenadata_toolkit | e tracking_register_schema | proc | arenadata_toolkit | e tracking_set_relkinds | proc | arenadata_toolkit | e tracking_set_relstorages | proc | arenadata_toolkit | e @@ -271,7 +263,7 @@ WHERE d.deptype = 'e' AND e.extname = 'arenadata_toolkit' ORDER BY objname; tracking_trigger_initial_snapshot | proc | arenadata_toolkit | e tracking_unregister_db | proc | arenadata_toolkit | e tracking_unregister_schema | proc | arenadata_toolkit | e -(32 rows) +(30 rows) DROP EXTENSION arenadata_toolkit; DROP SCHEMA arenadata_toolkit CASCADE; diff --git a/gpcontrib/arenadata_toolkit/src/track_files.c b/gpcontrib/arenadata_toolkit/src/track_files.c index ecc6a7f1fc81..2877b71b0b84 100644 --- a/gpcontrib/arenadata_toolkit/src/track_files.c +++ b/gpcontrib/arenadata_toolkit/src/track_files.c @@ -34,7 +34,6 @@ #include "track_files.h" PG_FUNCTION_INFO_V1(tracking_register_db); -PG_FUNCTION_INFO_V1(tracking_register_db_main); PG_FUNCTION_INFO_V1(tracking_unregister_db); PG_FUNCTION_INFO_V1(tracking_set_snapshot_on_recovery); PG_FUNCTION_INFO_V1(tracking_register_schema); @@ -612,11 +611,12 @@ track_db(Oid dbid, bool reg) tf_guc_unlock_tracked_once(); AlterDatabaseSet(&stmt); - } - tf_guc_unlock_tracked_once(); - SetConfigOption("arenadata_toolkit.tracking_is_db_tracked", reg ? "t" : "f", - PGC_S_DATABASE, PGC_S_DATABASE); + tf_guc_unlock_tracked_once(); + /* Will set the GUC in caller session only on coordinator */ + SetConfigOption("arenadata_toolkit.tracking_is_db_tracked", reg ? "t" : "f", + PGC_S_DATABASE, PGC_S_DATABASE); + } if (!reg) bloom_set_unbind(dbid); @@ -625,21 +625,6 @@ track_db(Oid dbid, bool reg) (errmsg("[arenadata_toolkit] exceeded maximum number of tracked databases"))); } -Datum -tracking_register_db_main(PG_FUNCTION_ARGS) -{ - Oid dbid = get_dbid(PG_GETARG_OID(1)); - bool reg = PG_GETARG_BOOL(0); - - tf_check_shmem_error(); - - elog(LOG, "[arenadata_toolkit] registering database %u for tracking", dbid); - - track_db(dbid, reg); - - PG_RETURN_BOOL(true); -} - static bool is_initialized() { @@ -715,7 +700,7 @@ tracking_register_db(PG_FUNCTION_ARGS) if (Gp_role == GP_ROLE_DISPATCH) { char *cmd = - psprintf("select arenadata_toolkit.tracking_register_db_main(true, %u)", dbid); + psprintf("select arenadata_toolkit.tracking_register_db(%u)", dbid); CdbDispatchCommand(cmd, 0, NULL); @@ -753,7 +738,7 @@ tracking_unregister_db(PG_FUNCTION_ARGS) if (Gp_role == GP_ROLE_DISPATCH) { char *cmd = - psprintf("select arenadata_toolkit.tracking_register_db_main(false, %u)", dbid); + psprintf("select arenadata_toolkit.tracking_unregister_db(%u)", dbid); CdbDispatchCommand(cmd, 0, NULL); From f740832cbe96a4d08428b823dd7651ac2a51915b Mon Sep 17 00:00:00 2001 From: Alexander Kondakov Date: Mon, 18 Nov 2024 16:21:32 +0300 Subject: [PATCH 52/69] Fix control version --- .../expected/arenadata_toolkit_tracking.out | 7 +- .../arenadata_toolkit/src/arenadata_toolkit.c | 9 +- .../src/include/track_files.h | 4 +- gpcontrib/arenadata_toolkit/src/track_files.c | 86 ++++++++++--------- 4 files changed, 54 insertions(+), 52 deletions(-) diff --git a/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_tracking.out b/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_tracking.out index 08874f7a0c60..be0cf3f1abfa 100644 --- a/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_tracking.out +++ b/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_tracking.out @@ -170,13 +170,10 @@ SELECT relname, size, state, segid, relkind, relstorage FROM arenadata_toolkit.tables_track; relname | size | state | segid | relkind | relstorage -------------+--------+-------+-------+---------+------------ - tracking_t2 | 229376 | a | 1 | r | h - tracking_t1 | 160696 | a | 1 | r | c - tracking_t2 | 229376 | a | 0 | r | h tracking_t1 | 161088 | a | 0 | r | c - tracking_t2 | 229376 | a | 2 | r | h + tracking_t1 | 160696 | a | 1 | r | c tracking_t1 | 159128 | a | 2 | r | c -(6 rows) +(3 rows) ROLLBACK; SELECT relname, size, state, segid, relkind, relstorage diff --git a/gpcontrib/arenadata_toolkit/src/arenadata_toolkit.c b/gpcontrib/arenadata_toolkit/src/arenadata_toolkit.c index 6bcac4b17269..e235b6f6c466 100644 --- a/gpcontrib/arenadata_toolkit/src/arenadata_toolkit.c +++ b/gpcontrib/arenadata_toolkit/src/arenadata_toolkit.c @@ -25,18 +25,21 @@ _PG_init(void) tf_guc_define(); tf_shmem_init(); file_hook_init(); - track_setup_ProcessUtility_hook(); - drops_track_init(); + if (IS_QUERY_DISPATCHER()) + track_setup_executor_hooks(); + arenadata_toolkit_worker_register(); } void _PG_fini(void) { + if (IS_QUERY_DISPATCHER()) + track_uninstall_executor_hooks(); + drops_track_deinit(); - track_uninstall_ProcessUtility_hook(); file_hook_deinit(); tf_shmem_deinit(); } diff --git a/gpcontrib/arenadata_toolkit/src/include/track_files.h b/gpcontrib/arenadata_toolkit/src/include/track_files.h index c7928dbd2001..2c46a900f082 100644 --- a/gpcontrib/arenadata_toolkit/src/include/track_files.h +++ b/gpcontrib/arenadata_toolkit/src/include/track_files.h @@ -1,7 +1,7 @@ #ifndef TRACK_FILES_H #define TRACK_FILES_H -void track_setup_ProcessUtility_hook(void); -void track_uninstall_ProcessUtility_hook(void); +void track_setup_executor_hooks(void); +void track_uninstall_executor_hooks(void); #endif /* TRACK_FILES_H */ diff --git a/gpcontrib/arenadata_toolkit/src/track_files.c b/gpcontrib/arenadata_toolkit/src/track_files.c index 2877b71b0b84..4d20491f6c09 100644 --- a/gpcontrib/arenadata_toolkit/src/track_files.c +++ b/gpcontrib/arenadata_toolkit/src/track_files.c @@ -12,6 +12,7 @@ #include "cdb/cdbutil.h" #include "commands/dbcommands.h" #include "commands/defrem.h" +#include "executor/executor.h" #include "fmgr.h" #include "funcapi.h" #include "libpq-fe.h" @@ -85,11 +86,11 @@ typedef struct static tf_get_global_state_t tf_get_global_state = {0}; static bool callbackRegistered = false; -static bool controlVersionUsed = false; -static TransactionId local_xid = InvalidTransactionId; +static uint32 current_version = InvalidVersion; static bool isExecutorExplainMode = false; ProcessUtility_hook_type next_ProcessUtility_hook = NULL; +ExecutorEnd_hook_type next_ExecutorEnd_hook = NULL; static inline void tf_check_shmem_error(void) @@ -137,9 +138,8 @@ xact_end_version_callback(XactEvent event, void *arg) bloom_set_release(&ctx); - local_xid = InvalidTransactionId; callbackRegistered = false; - controlVersionUsed = false; + current_version = InvalidVersion; isExecutorExplainMode = false; } @@ -1351,17 +1351,37 @@ explain_detector_ProcessUtility(Node *parsetree, isExecutorExplainMode = false; } +/* + * When any query execution ends, current_version is set to control. + * If the tracking_track_version registered transaction callback + * and its transaction is still going, then subsequent tracking_track_version + * calls within the transaction will return ControlVerion. + */ +static void +track_ExecutorEnd(QueryDesc *queryDesc) +{ + current_version = ControlVersion; + + if (next_ExecutorEnd_hook) + next_ExecutorEnd_hook(queryDesc); +} + void -track_setup_ProcessUtility_hook(void) +track_setup_executor_hooks(void) { next_ProcessUtility_hook = ProcessUtility_hook ? ProcessUtility_hook : standard_ProcessUtility; ProcessUtility_hook = explain_detector_ProcessUtility; + + next_ExecutorEnd_hook = ExecutorEnd_hook ? ExecutorEnd_hook : standard_ExecutorEnd; + ExecutorEnd_hook = track_ExecutorEnd; + } void -track_uninstall_ProcessUtility_hook(void) +track_uninstall_executor_hooks(void) { ProcessUtility_hook = next_ProcessUtility_hook == standard_ProcessUtility ? NULL : next_ProcessUtility_hook; + ExecutorEnd_hook = next_ExecutorEnd_hook == standard_ExecutorEnd ? NULL : next_ExecutorEnd_hook; } /* @@ -1373,9 +1393,6 @@ track_uninstall_ProcessUtility_hook(void) Datum tracking_track_version(PG_FUNCTION_ARGS) { - int64 version = (int64) InvalidVersion; - TransactionId current_xid = GetCurrentTransactionIdIfAny(); - if (Gp_role != GP_ROLE_DISPATCH) ereport(ERROR, (errmsg("Cannot acquire track using such query"))); @@ -1385,49 +1402,34 @@ tracking_track_version(PG_FUNCTION_ARGS) tf_check_shmem_error(); - bloom_op_ctx_t ctx = bloom_set_get_entry(MyDatabaseId, LW_SHARED, LW_EXCLUSIVE); - - if (!ctx.entry) - { - bloom_set_release(&ctx); - - ereport(ERROR, - (errcode(ERRCODE_GP_COMMAND_ERROR), - errmsg("database %u is not tracked", MyDatabaseId), - errhint("Call 'arenadata_toolkit.tracking_register_db()'" - "to enable tracking"))); - } - else if (!callbackRegistered && !pg_atomic_test_set_flag(&ctx.entry->capture_in_progress)) - { - bloom_set_release(&ctx); - ereport(ERROR, - (errcode(ERRCODE_GP_COMMAND_ERROR), - errmsg("Track for database %u is being acquired in other transaction", MyDatabaseId))); - } - - version = (int64) ctx.entry->master_version; - bloom_set_release(&ctx); - if (!callbackRegistered) { RegisterXactCallbackOnce(xact_end_version_callback, NULL); callbackRegistered = true; - if (current_xid != local_xid) + bloom_op_ctx_t ctx = bloom_set_get_entry(MyDatabaseId, LW_SHARED, LW_EXCLUSIVE); + + if (!ctx.entry) { - local_xid = current_xid; - controlVersionUsed = false; + bloom_set_release(&ctx); + + ereport(ERROR, + (errcode(ERRCODE_GP_COMMAND_ERROR), + errmsg("database %u is not tracked", MyDatabaseId), + errhint("Call 'arenadata_toolkit.tracking_register_db()'" + "to enable tracking"))); } - else if (current_xid != InvalidTransactionId) + else if (!pg_atomic_test_set_flag(&ctx.entry->capture_in_progress)) { - controlVersionUsed = true; + bloom_set_release(&ctx); + ereport(ERROR, + (errcode(ERRCODE_GP_COMMAND_ERROR), + errmsg("Track for database %u is being acquired in other transaction", MyDatabaseId))); } - } - if (controlVersionUsed) - { - version = (int64) ControlVersion; + current_version = (int64) ctx.entry->master_version; + bloom_set_release(&ctx); } - PG_RETURN_INT64(version); + PG_RETURN_INT64((int64) current_version); } From d56241c867c8d18e7e1d2a3d1caafd5855dbabf4 Mon Sep 17 00:00:00 2001 From: Alexander Kondakov Date: Mon, 18 Nov 2024 16:51:45 +0300 Subject: [PATCH 53/69] Change worker --- .../src/arenadata_toolkit_worker.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_worker.c b/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_worker.c index a3dbacf3d862..bf4f4fec8822 100644 --- a/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_worker.c +++ b/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_worker.c @@ -149,21 +149,24 @@ worker_tracking_status_check() { List *tracked_dbs = NIL; - StartTransactionCommand(); - - tracked_dbs = get_tracked_dbs(); - if (pg_atomic_unlocked_test_flag(&tf_shared_state->tracking_is_initialized)) { + StartTransactionCommand(); + + tracked_dbs = get_tracked_dbs(); + if (list_length(tracked_dbs) > 0) track_dbs(tracked_dbs); + CommitTransactionCommand(); + + if (tracked_dbs) + list_free_deep(tracked_dbs); + pg_atomic_test_set_flag(&tf_shared_state->tracking_is_initialized); } - if (tracked_dbs) - list_free_deep(tracked_dbs); - CommitTransactionCommand(); + } /* Main worker cycle. Scans pg_db_role_setting and binds tracked dbids to From d3b3e4880926fcd18b791c978519f0523a6e84cc Mon Sep 17 00:00:00 2001 From: Alexander Kondakov Date: Mon, 18 Nov 2024 16:55:20 +0300 Subject: [PATCH 54/69] Refactor dbsize --- gpcontrib/arenadata_toolkit/src/dbsize.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/gpcontrib/arenadata_toolkit/src/dbsize.c b/gpcontrib/arenadata_toolkit/src/dbsize.c index ba9e6dc2b491..0ad1b3f6d96b 100644 --- a/gpcontrib/arenadata_toolkit/src/dbsize.c +++ b/gpcontrib/arenadata_toolkit/src/dbsize.c @@ -44,7 +44,7 @@ static bool calculate_ao_storage_perSegFile(const int segno, void *ctx); static void fill_relation_seg_path(char *buf, int bufLen, const char *relpath, int segNo); static int64 calculate_toast_table_size(Oid toastrelid, ForkNumber forknum); -static int64 get_heap_storage_total_bytes_soft(Relation rel, ForkNumber forknum, char *relpath); +static int64 get_heap_storage_total_bytes_soft(char *relpath); static bool calculate_ao_storage_perSegFile_soft(const int segno, void *ctx); /* @@ -124,7 +124,7 @@ calculate_relation_size(Relation rel, ForkNumber forknum, bool softCalc) char *relpath = relpathbackend(rel->rd_node, rel->rd_backend, forknum); if (RelationIsHeap(rel)) - return softCalc ? get_heap_storage_total_bytes_soft(rel, forknum, relpath) : get_heap_storage_total_bytes(rel, forknum, relpath); + return softCalc ? get_heap_storage_total_bytes_soft(relpath) : get_heap_storage_total_bytes(rel, forknum, relpath); return get_ao_storage_total_bytes(rel, relpath, softCalc); } @@ -401,10 +401,9 @@ Datum adb_hba_file_rules(PG_FUNCTION_ARGS) int64 dbsize_calc_size(Form_pg_class pg_class_data) { - RelationData rel; + RelationData rel = {0}; int64 size = 0; - memset(&rel, 0, sizeof(RelationData)); /* * Initialize Relfilenode field of RelationData. */ @@ -467,7 +466,7 @@ dbsize_calc_size(Form_pg_class pg_class_data) * The errors of stat() call are ignored */ static int64 -get_heap_storage_total_bytes_soft(Relation rel, ForkNumber forknum, char *relpath) +get_heap_storage_total_bytes_soft(char *relpath) { int64 totalsize = 0; char segPath[MAXPATHLEN_WITHSEGNO]; From 4a6fc1b5e6bcb7c9355f78ca8edf51cb30434080 Mon Sep 17 00:00:00 2001 From: Alexander Kondakov Date: Mon, 18 Nov 2024 17:05:48 +0300 Subject: [PATCH 55/69] Simplify gucs --- .../src/arenadata_toolkit_guc.c | 52 +++++-------------- .../src/include/arenadata_toolkit_guc.h | 6 +-- gpcontrib/arenadata_toolkit/src/track_files.c | 20 +++---- 3 files changed, 23 insertions(+), 55 deletions(-) diff --git a/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_guc.c b/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_guc.c index f5bb32033fb9..9c817bb5288b 100644 --- a/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_guc.c +++ b/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_guc.c @@ -18,43 +18,15 @@ char *tracked_rel_kinds = DEFAULT_TRACKED_REL_KINDS; int tracking_worker_naptime_sec = DEFAULT_NAPTIME_SEC; /* - * Variables controlling GUC setting. Only extension functions are allowed + * Variable controlling GUC setting. Only extension functions are allowed * to set GUC during NormalProcessing mode. */ -static bool is_tracked_unlocked = false; -static bool is_get_full_snapshot_on_recovery_unlocked = false; -static bool is_schemas_unlocked = false; -static bool is_relkinds_unlocked = false; -static bool is_relstorages_unlocked = false; +static bool guc_is_unlocked = false; void -tf_guc_unlock_tracked_once(void) +tf_guc_unlock(void) { - is_tracked_unlocked = true; -} - -void -tf_guc_unlock_full_snapshot_on_recovery_once(void) -{ - is_get_full_snapshot_on_recovery_unlocked = true; -} - -void -tf_guc_unlock_schemas_once(void) -{ - is_schemas_unlocked = true; -} - -void -tf_guc_unlock_relkinds_once(void) -{ - is_relkinds_unlocked = true; -} - -void -tf_guc_unlock_relstorages_once(void) -{ - is_relstorages_unlocked = true; + guc_is_unlocked = true; } /* @@ -62,12 +34,12 @@ tf_guc_unlock_relstorages_once(void) * This is not called for RESET, so RESET is not guarded */ static bool -check_guc(bool *toolkit_guc, GucSource source, bool *manual) +check_guc(bool *guc_unlocked, GucSource source, bool *manual) { if (IsInitProcessingMode() || Gp_role == GP_ROLE_EXECUTE || - (Gp_role == GP_ROLE_DISPATCH && *toolkit_guc)) + (Gp_role == GP_ROLE_DISPATCH && *guc_unlocked)) { - *toolkit_guc = false; + *guc_unlocked = false; if (source != PGC_S_DATABASE && source != PGC_S_DEFAULT && @@ -89,7 +61,7 @@ check_tracked(bool *newval, void **extra, GucSource source) { bool manual = false; - if (check_guc(&is_tracked_unlocked, source, &manual)) + if (check_guc(&guc_is_unlocked, source, &manual)) return true; if (manual) @@ -105,7 +77,7 @@ check_get_full_snapshot_on_recovery(bool *newval, void **extra, GucSource source { bool manual = false; - if (check_guc(&is_get_full_snapshot_on_recovery_unlocked, source, &manual)) + if (check_guc(&guc_is_unlocked, source, &manual)) return true; if (manual) @@ -121,7 +93,7 @@ check_relkinds(char **newval, void **extra, GucSource source) { bool manual = false; - if (check_guc(&is_relkinds_unlocked, source, &manual)) + if (check_guc(&guc_is_unlocked, source, &manual)) return true; if (manual) @@ -137,7 +109,7 @@ check_schemas(char **newval, void **extra, GucSource source) { bool manual = false; - if (check_guc(&is_schemas_unlocked, source, &manual)) + if (check_guc(&guc_is_unlocked, source, &manual)) return true; if (manual) @@ -153,7 +125,7 @@ check_relstorages(char **newval, void **extra, GucSource source) { bool manual = false; - if (check_guc(&is_relstorages_unlocked, source, &manual)) + if (check_guc(&guc_is_unlocked, source, &manual)) return true; if (manual) diff --git a/gpcontrib/arenadata_toolkit/src/include/arenadata_toolkit_guc.h b/gpcontrib/arenadata_toolkit/src/include/arenadata_toolkit_guc.h index 91d4307a887c..8edb098c0065 100644 --- a/gpcontrib/arenadata_toolkit/src/include/arenadata_toolkit_guc.h +++ b/gpcontrib/arenadata_toolkit/src/include/arenadata_toolkit_guc.h @@ -32,11 +32,7 @@ extern char *tracked_rel_storages; extern char *tracked_rel_kinds; extern int tracking_worker_naptime_sec; -void tf_guc_unlock_tracked_once(void); -void tf_guc_unlock_full_snapshot_on_recovery_once(void); +void tf_guc_unlock(void); void tf_guc_define(void); -void tf_guc_unlock_schemas_once(void); -void tf_guc_unlock_relkinds_once(void); -void tf_guc_unlock_relstorages_once(void); #endif /* ARENADATA_TOOLKIT_GUC_H */ diff --git a/gpcontrib/arenadata_toolkit/src/track_files.c b/gpcontrib/arenadata_toolkit/src/track_files.c index 4d20491f6c09..d9e09fea7399 100644 --- a/gpcontrib/arenadata_toolkit/src/track_files.c +++ b/gpcontrib/arenadata_toolkit/src/track_files.c @@ -608,11 +608,11 @@ track_db(Oid dbid, bool reg) v_stmt.args = lappend(NIL, &aconst); v_stmt.is_local = false; - tf_guc_unlock_tracked_once(); + tf_guc_unlock(); AlterDatabaseSet(&stmt); - tf_guc_unlock_tracked_once(); + tf_guc_unlock(); /* Will set the GUC in caller session only on coordinator */ SetConfigOption("arenadata_toolkit.tracking_is_db_tracked", reg ? "t" : "f", PGC_S_DATABASE, PGC_S_DATABASE); @@ -780,12 +780,12 @@ tracking_set_snapshot_on_recovery(PG_FUNCTION_ARGS) v_stmt.args = lappend(NIL, &aconst); v_stmt.is_local = false; - tf_guc_unlock_full_snapshot_on_recovery_once(); + tf_guc_unlock(); AlterDatabaseSet(&stmt); /* Will set the GUC in caller session only on coordinator */ - tf_guc_unlock_full_snapshot_on_recovery_once(); + tf_guc_unlock(); SetConfigOption("arenadata_toolkit.tracking_snapshot_on_recovery", set ? "t" : "f", PGC_S_DATABASE, PGC_S_DATABASE); @@ -942,12 +942,12 @@ track_schema(const char *schemaName, Oid dbid, bool reg) v_stmt.args = list_make1(&arg); } - tf_guc_unlock_schemas_once(); + tf_guc_unlock(); AlterDatabaseSet(&stmt); /* Will set the GUC in caller session only on coordinator */ - tf_guc_unlock_schemas_once(); + tf_guc_unlock(); SetConfigOption("arenadata_toolkit.tracking_schemas", new_schemas ? new_schemas : DEFAULT_TRACKED_SCHEMAS, PGC_S_DATABASE, PGC_S_DATABASE); @@ -1104,12 +1104,12 @@ tracking_set_relkinds(PG_FUNCTION_ARGS) elog(LOG, "[arenadata_toolkit] setting relkinds %s in database %u for tracking", buf.data, dbid); } - tf_guc_unlock_relkinds_once(); + tf_guc_unlock(); AlterDatabaseSet(&stmt); /* Will set the GUC in caller session only on coordinator */ - tf_guc_unlock_relkinds_once(); + tf_guc_unlock(); SetConfigOption("arenadata_toolkit.tracking_relkinds", buf.len ? buf.data : DEFAULT_TRACKED_REL_KINDS, PGC_S_DATABASE, PGC_S_DATABASE); @@ -1212,12 +1212,12 @@ tracking_set_relstorages(PG_FUNCTION_ARGS) elog(LOG, "[arenadata_toolkit] setting relstorages %s in database %u for tracking", buf.data, dbid); } - tf_guc_unlock_relstorages_once(); + tf_guc_unlock(); AlterDatabaseSet(&stmt); /* Will set the GUC in caller session only on coordinator */ - tf_guc_unlock_relstorages_once(); + tf_guc_unlock(); SetConfigOption("arenadata_toolkit.tracking_relstorages", buf.len ? buf.data : DEFAULT_TRACKED_REL_STORAGES, PGC_S_DATABASE, PGC_S_DATABASE); From 2c87274fd85f4d6bfa2b94309e3308e59e4c6e44 Mon Sep 17 00:00:00 2001 From: Alexander Kondakov Date: Mon, 18 Nov 2024 18:12:36 +0300 Subject: [PATCH 56/69] track_files.c changes --- gpcontrib/arenadata_toolkit/src/track_files.c | 127 ++++++++++++------ 1 file changed, 86 insertions(+), 41 deletions(-) diff --git a/gpcontrib/arenadata_toolkit/src/track_files.c b/gpcontrib/arenadata_toolkit/src/track_files.c index d9e09fea7399..2c687b65d318 100644 --- a/gpcontrib/arenadata_toolkit/src/track_files.c +++ b/gpcontrib/arenadata_toolkit/src/track_files.c @@ -61,7 +61,14 @@ PG_FUNCTION_INFO_V1(tracking_track_version); #define Anum_track_gp_segment_relkind ((AttrNumber) 7) #define Anum_track_gp_segment_relstorage ((AttrNumber) 8) -/* Preserved state among the calls of tracking_get_track_main */ +/* + * Macros for string constants, which are used during work with GUCs + */ +#define TRACKING_SCHEMAS_PREFIX "arenadata_toolkit.tracking_schemas=" +#define TRACKING_RELSTORAGES_PREFIX "arenadata_toolkit.tracking_relstorages=" +#define TRACKING_RELKINDS_PREFIX "arenadata_toolkit.tracking_relkinds=" + +/* Preserved state among the calls of tracking_get_track */ typedef struct { Relation pg_class_rel; /* pg_class relation */ @@ -253,8 +260,8 @@ get_filters_from_guc() if (!isnull) { ArrayType *array; - Datum *elems; - bool *nulls; + Datum *elems = NULL; + bool *nulls = NULL; int nelems; array = DatumGetArrayTypeP(str_datum); @@ -264,16 +271,35 @@ get_filters_from_guc() { if (nulls[i]) continue; + char *str = TextDatumGetCString(elems[i]); - if (strncmp(str, "arenadata_toolkit.tracking_schemas=", 35) == 0) - current_schemas = pstrdup(str + 35); - else if (strncmp(str, "arenadata_toolkit.tracking_relstorages=", 39) == 0) - current_relstorages = pstrdup(str + 39); - else if (strncmp(str, "arenadata_toolkit.tracking_relkinds=", 36) == 0) - current_relkinds = pstrdup(str + 36); + if (strncmp(str, + TRACKING_SCHEMAS_PREFIX, + sizeof(TRACKING_SCHEMAS_PREFIX) - 1) == 0) + { + current_schemas = pstrdup(str + sizeof(TRACKING_SCHEMAS_PREFIX) - 1); + } + else if (strncmp(str, + TRACKING_RELSTORAGES_PREFIX, + sizeof(TRACKING_RELSTORAGES_PREFIX) - 1) == 0) + { + current_relstorages = pstrdup(str + sizeof(TRACKING_RELSTORAGES_PREFIX) - 1); + } + else if (strncmp(str, + TRACKING_RELKINDS_PREFIX, + sizeof(TRACKING_RELKINDS_PREFIX) - 1) == 0) + { + current_relkinds = pstrdup(str + sizeof(TRACKING_RELKINDS_PREFIX) - 1); + } + pfree(str); } + + if (elems) + pfree(elems); + if (nulls) + pfree(nulls); } } systable_endscan(scan); @@ -409,7 +435,7 @@ tracking_get_track(PG_FUNCTION_ARGS) */ if (tf_get_global_state.bloom == NULL) { - tf_get_global_state.bloom = palloc0(full_bloom_size(bloom_size)); + tf_get_global_state.bloom = palloc(full_bloom_size(bloom_size)); bloom_init(bloom_size, tf_get_global_state.bloom); if (version == ControlVersion) @@ -500,19 +526,28 @@ tracking_get_track(PG_FUNCTION_ARGS) break; } - datums[Anum_track_gp_segment_relkind] = heap_getattr(pg_class_tuple, Anum_pg_class_relkind, RelationGetDescr(state->pg_class_rel), &nulls[7]); + datums[Anum_track_gp_segment_relkind] = heap_getattr(pg_class_tuple, + Anum_pg_class_relkind, + RelationGetDescr(state->pg_class_rel), + &nulls[Anum_track_gp_segment_relkind]); relkind = DatumGetChar(datums[Anum_track_gp_segment_relkind]); if (!kind_is_tracked(relkind, tf_get_global_state.relkinds)) continue; - datums[Anum_track_gp_segment_relstorage] = heap_getattr(pg_class_tuple, Anum_pg_class_relstorage, RelationGetDescr(state->pg_class_rel), &nulls[8]); + datums[Anum_track_gp_segment_relstorage] = heap_getattr(pg_class_tuple, + Anum_pg_class_relstorage, + RelationGetDescr(state->pg_class_rel), + &nulls[Anum_track_gp_segment_relstorage]); relstorage = DatumGetChar(datums[Anum_track_gp_segment_relstorage]); if (!kind_is_tracked(relstorage, tf_get_global_state.relstorages)) continue; - datums[Anum_track_gp_segment_relnamespace] = heap_getattr(pg_class_tuple, Anum_pg_class_relnamespace, RelationGetDescr(state->pg_class_rel), &nulls[6]); + datums[Anum_track_gp_segment_relnamespace] = heap_getattr(pg_class_tuple, + Anum_pg_class_relnamespace, + RelationGetDescr(state->pg_class_rel), + &nulls[Anum_track_gp_segment_relnamespace]); relnamespace = DatumGetObjectId(datums[Anum_track_gp_segment_relnamespace]); if (!schema_is_tracked(relnamespace)) @@ -520,9 +555,15 @@ tracking_get_track(PG_FUNCTION_ARGS) datums[Anum_track_relid] = ObjectIdGetDatum(HeapTupleGetOid(pg_class_tuple)); - datums[Anum_track_name] = heap_getattr(pg_class_tuple, Anum_pg_class_relname, RelationGetDescr(state->pg_class_rel), &nulls[1]); + datums[Anum_track_name] = heap_getattr(pg_class_tuple, + Anum_pg_class_relname, + RelationGetDescr(state->pg_class_rel), + &nulls[Anum_track_name]); - datums[Anum_track_relfilenode] = heap_getattr(pg_class_tuple, Anum_pg_class_relfilenode, RelationGetDescr(state->pg_class_rel), &nulls[2]); + datums[Anum_track_relfilenode] = heap_getattr(pg_class_tuple, + Anum_pg_class_relfilenode, + RelationGetDescr(state->pg_class_rel), + &nulls[Anum_track_relfilenode]); filenode = DatumGetObjectId(datums[Anum_track_relfilenode]); if (nulls[Anum_track_relfilenode]) @@ -615,7 +656,7 @@ track_db(Oid dbid, bool reg) tf_guc_unlock(); /* Will set the GUC in caller session only on coordinator */ SetConfigOption("arenadata_toolkit.tracking_is_db_tracked", reg ? "t" : "f", - PGC_S_DATABASE, PGC_S_DATABASE); + PGC_SUSET, PGC_S_DATABASE); } if (!reg) @@ -787,7 +828,7 @@ tracking_set_snapshot_on_recovery(PG_FUNCTION_ARGS) /* Will set the GUC in caller session only on coordinator */ tf_guc_unlock(); SetConfigOption("arenadata_toolkit.tracking_snapshot_on_recovery", set ? "t" : "f", - PGC_S_DATABASE, PGC_S_DATABASE); + PGC_SUSET, PGC_S_DATABASE); PG_RETURN_BOOL(true); } @@ -885,7 +926,7 @@ track_schema(const char *schemaName, Oid dbid, bool reg) if (!isnull) { ArrayType *array; - Datum *elems; + Datum *elems = NULL; int nelems; array = DatumGetArrayTypeP(str_datum); @@ -895,13 +936,17 @@ track_schema(const char *schemaName, Oid dbid, bool reg) { char *str = TextDatumGetCString(elems[i]); - if (strncmp(str, "arenadata_toolkit.tracking_schemas=", 35) == 0) + if (strncmp(str, TRACKING_SCHEMAS_PREFIX, + sizeof(TRACKING_SCHEMAS_PREFIX) - 1) == 0) { - current_schemas = pstrdup(str + 35); + current_schemas = pstrdup(str + sizeof(TRACKING_SCHEMAS_PREFIX) - 1); break; } pfree(str); } + + if (elems) + pfree(elems); } } systable_endscan(scan); @@ -950,7 +995,7 @@ track_schema(const char *schemaName, Oid dbid, bool reg) tf_guc_unlock(); SetConfigOption("arenadata_toolkit.tracking_schemas", new_schemas ? new_schemas : DEFAULT_TRACKED_SCHEMAS, - PGC_S_DATABASE, PGC_S_DATABASE); + PGC_SUSET, PGC_S_DATABASE); if (current_schemas) pfree(current_schemas); @@ -1011,18 +1056,18 @@ is_valid_relkind(char relkind) { switch (relkind) { - case 'r': - case 'i': - case 'S': - case 't': - case 'v': - case 'c': - case 'f': - case 'u': - case 'm': - case 'o': - case 'b': - case 'M': + case RELKIND_RELATION: + case RELKIND_INDEX: + case RELKIND_SEQUENCE: + case RELKIND_TOASTVALUE: + case RELKIND_VIEW: + case RELKIND_COMPOSITE_TYPE: + case RELKIND_FOREIGN_TABLE: + case RELKIND_UNCATALOGED: + case RELKIND_MATVIEW: + case RELKIND_AOSEGMENTS: + case RELKIND_AOBLOCKDIR: + case RELKIND_AOVISIMAP: return true; default: return false; @@ -1112,7 +1157,7 @@ tracking_set_relkinds(PG_FUNCTION_ARGS) tf_guc_unlock(); SetConfigOption("arenadata_toolkit.tracking_relkinds", buf.len ? buf.data : DEFAULT_TRACKED_REL_KINDS, - PGC_S_DATABASE, PGC_S_DATABASE); + PGC_SUSET, PGC_S_DATABASE); pfree(buf.data); @@ -1124,12 +1169,12 @@ is_valid_relstorage(char relstorage) { switch (relstorage) { - case 'h': - case 'a': - case 'c': - case 'x': - case 'v': - case 'f': + case RELSTORAGE_HEAP: + case RELSTORAGE_AOROWS: + case RELSTORAGE_AOCOLS: + case RELSTORAGE_EXTERNAL: + case RELSTORAGE_VIRTUAL: + case RELSTORAGE_FOREIGN: return true; default: return false; @@ -1220,7 +1265,7 @@ tracking_set_relstorages(PG_FUNCTION_ARGS) tf_guc_unlock(); SetConfigOption("arenadata_toolkit.tracking_relstorages", buf.len ? buf.data : DEFAULT_TRACKED_REL_STORAGES, - PGC_S_DATABASE, PGC_S_DATABASE); + PGC_SUSET, PGC_S_DATABASE); pfree(buf.data); From 3f73780946ab09b3aa745a62fb86aa1d27d8c481 Mon Sep 17 00:00:00 2001 From: Alexander Kondakov Date: Tue, 19 Nov 2024 14:40:24 +0300 Subject: [PATCH 57/69] remove if clause --- gpcontrib/arenadata_toolkit/src/arenadata_toolkit_worker.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_worker.c b/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_worker.c index bf4f4fec8822..f3a5926ec118 100644 --- a/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_worker.c +++ b/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_worker.c @@ -250,8 +250,7 @@ arenadata_toolkit_main(Datum main_arg) current_timeout = timeout - (long) INSTR_TIME_GET_MILLISEC(current_time_timeout); } - if (got_sigterm) - ereport(LOG, (errmsg("[arenadata toolkit] stop worker process"))); + ereport(LOG, (errmsg("[arenadata toolkit] stop worker process"))); proc_exit(0); } From 2fb8ff47a556318ea72cd6b0e1b46876cc6cf561 Mon Sep 17 00:00:00 2001 From: Alexander Kondakov Date: Tue, 19 Nov 2024 18:22:00 +0300 Subject: [PATCH 58/69] Refactor --- .../expected/arenadata_toolkit_test.out | 10 +-- .../isolation2/expected/track_concurrent.out | 61 +++++++++++++++++++ .../isolation2/sql/track_concurrent.sql | 22 +++++++ .../src/arenadata_toolkit_guc.c | 16 ++--- .../src/arenadata_toolkit_worker.c | 8 +-- gpcontrib/arenadata_toolkit/src/bloom.c | 20 +++--- gpcontrib/arenadata_toolkit/src/bloom_set.c | 29 ++------- gpcontrib/arenadata_toolkit/src/drops_track.c | 2 +- .../src/include/arenadata_toolkit_guc.h | 2 +- .../arenadata_toolkit/src/include/bloom.h | 22 +++---- .../arenadata_toolkit/src/include/bloom_set.h | 5 +- gpcontrib/arenadata_toolkit/src/track_files.c | 5 +- 12 files changed, 131 insertions(+), 71 deletions(-) diff --git a/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_test.out b/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_test.out index 8a27d8a7b9d4..0e34a7dee115 100644 --- a/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_test.out +++ b/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_test.out @@ -108,10 +108,10 @@ SELECT objname, objtype, objstorage, objacl FROM toolkit_objects_info ORDER BY o db_files_current | table | h | {owner=arwdDxt/owner,=r/owner} db_files_history | table | a | {owner=arwdDxt/owner} db_files_history_1_prt_default_part | table | a | {owner=arwdDxt/owner} - db_files_history_1_prt_p202411 | table | a | {owner=arwdDxt/owner} - db_files_history_backup_20241117t005023 | table | a | - db_files_history_backup_20241117t005023_1_prt_default_part | table | a | - db_files_history_backup_20241117t005023_1_prt_p202411 | table | a | + db_files_history_1_prt_pYYYYMM | table | a | {owner=arwdDxt/owner} + db_files_history_backup_YYYYMMDDtHHMMSS | table | a | + db_files_history_backup_YYYYMMDDtHHMMSS_1_prt_default_part | table | a | + db_files_history_backup_YYYYMMDDtHHMMSS_1_prt_pYYYYMM | table | a | is_initial_snapshot_triggered | table | v | {owner=arwdDxt/owner,=r/owner} operation_exclude | table | a | tables_track | table | v | {owner=arwdDxt/owner,=r/owner} @@ -206,7 +206,7 @@ SELECT objname, objtype, objstorage, objacl FROM toolkit_objects_info ORDER BY o db_files_current | table | h | {owner=arwdDxt/owner,=r/owner} db_files_history | table | a | {owner=arwdDxt/owner} db_files_history_1_prt_default_part | table | a | {owner=arwdDxt/owner} - db_files_history_1_prt_p202411 | table | a | {owner=arwdDxt/owner} + db_files_history_1_prt_p202410 | table | a | {owner=arwdDxt/owner} is_initial_snapshot_triggered | table | v | {owner=arwdDxt/owner,=r/owner} operation_exclude | table | a | {owner=arwdDxt/owner} tables_track | table | v | {owner=arwdDxt/owner,=r/owner} diff --git a/gpcontrib/arenadata_toolkit/isolation2/expected/track_concurrent.out b/gpcontrib/arenadata_toolkit/isolation2/expected/track_concurrent.out index 4285f4256b98..16a58472033b 100644 --- a/gpcontrib/arenadata_toolkit/isolation2/expected/track_concurrent.out +++ b/gpcontrib/arenadata_toolkit/isolation2/expected/track_concurrent.out @@ -35,6 +35,67 @@ ROLLBACK t (1 row) +-- Test uncommited file creation is not seen from other transaction until the +-- first one is commited. +1: BEGIN; +BEGIN +1: CREATE TABLE tracking_t1 AS SELECT generate_series (1, 100) i DISTRIBUTED BY (i); +CREATE 100 + +2: SELECT relname, size, state, segid, relkind, relstorage FROM arenadata_toolkit.tables_track; + relname | size | state | segid | relkind | relstorage +--------------------------+-------+-------+-------+---------+------------ + gp_distribution_policy | 32768 | a | -1 | r | h + gp_policy_localoid_index | 65536 | a | -1 | i | h + gp_distribution_policy | 32768 | a | 1 | r | h + gp_policy_localoid_index | 65536 | a | 1 | i | h + gp_distribution_policy | 32768 | a | 0 | r | h + gp_policy_localoid_index | 65536 | a | 0 | i | h + gp_distribution_policy | 32768 | a | 2 | r | h + gp_policy_localoid_index | 65536 | a | 2 | i | h +(8 rows) + +1: COMMIT; +COMMIT + +2: SELECT relname, size, state, segid, relkind, relstorage FROM arenadata_toolkit.tables_track; + relname | size | state | segid | relkind | relstorage +-------------+-------+-------+-------+---------+------------ + tracking_t1 | 0 | a | -1 | r | h + tracking_t1 | 32768 | a | 1 | r | h + tracking_t1 | 32768 | a | 0 | r | h + tracking_t1 | 32768 | a | 2 | r | h +(4 rows) + +-- Test file creation is seen from other transaction after the first transaction +-- has taken the track. +1: BEGIN; +BEGIN +1: CREATE TABLE tracking_t2 AS SELECT generate_series (1, 100) i DISTRIBUTED BY (i); +CREATE 100 +1: SELECT relname, size, state, segid, relkind, relstorage FROM arenadata_toolkit.tables_track; + relname | size | state | segid | relkind | relstorage +-------------+-------+-------+-------+---------+------------ + tracking_t2 | 32768 | a | 1 | r | h + tracking_t2 | 32768 | a | 2 | r | h + tracking_t2 | 32768 | a | 0 | r | h +(3 rows) +1: COMMIT; +COMMIT + +2: SELECT relname, size, state, segid, relkind, relstorage FROM arenadata_toolkit.tables_track; + relname | size | state | segid | relkind | relstorage +-------------+-------+-------+-------+---------+------------ + tracking_t2 | 0 | a | -1 | r | h + tracking_t2 | 32768 | a | 2 | r | h + tracking_t2 | 32768 | a | 0 | r | h + tracking_t2 | 32768 | a | 1 | r | h +(4 rows) + +1: DROP TABLE tracking_t1; +DROP +1: DROP TABLE tracking_t2; +DROP 1: SELECT arenadata_toolkit.tracking_unregister_db(); tracking_unregister_db ------------------------ diff --git a/gpcontrib/arenadata_toolkit/isolation2/sql/track_concurrent.sql b/gpcontrib/arenadata_toolkit/isolation2/sql/track_concurrent.sql index efef94ef50b7..9ddc4a31a01c 100644 --- a/gpcontrib/arenadata_toolkit/isolation2/sql/track_concurrent.sql +++ b/gpcontrib/arenadata_toolkit/isolation2/sql/track_concurrent.sql @@ -39,6 +39,28 @@ pg_class_count AS ( SELECT bool_and(sc.cnt = pc.cnt) FROM segment_counts sc, pg_class_count pc; +-- Test uncommited file creation is not seen from other transaction until the +-- first one is commited. +1: BEGIN; +1: CREATE TABLE tracking_t1 AS SELECT generate_series (1, 100) i DISTRIBUTED BY (i); + +2: SELECT relname, size, state, segid, relkind, relstorage FROM arenadata_toolkit.tables_track; + +1: COMMIT; + +2: SELECT relname, size, state, segid, relkind, relstorage FROM arenadata_toolkit.tables_track; + +-- Test file creation is seen from other transaction after the first transaction +-- has taken the track. +1: BEGIN; +1: CREATE TABLE tracking_t2 AS SELECT generate_series (1, 100) i DISTRIBUTED BY (i); +1: SELECT relname, size, state, segid, relkind, relstorage FROM arenadata_toolkit.tables_track; +1: COMMIT; + +2: SELECT relname, size, state, segid, relkind, relstorage FROM arenadata_toolkit.tables_track; + +1: DROP TABLE tracking_t1; +1: DROP TABLE tracking_t2; 1: SELECT arenadata_toolkit.tracking_unregister_db(); 1: DROP EXTENSION arenadata_toolkit; diff --git a/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_guc.c b/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_guc.c index 9c817bb5288b..afc22701c738 100644 --- a/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_guc.c +++ b/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_guc.c @@ -34,12 +34,12 @@ tf_guc_unlock(void) * This is not called for RESET, so RESET is not guarded */ static bool -check_guc(bool *guc_unlocked, GucSource source, bool *manual) +check_guc(GucSource source, bool *manual) { if (IsInitProcessingMode() || Gp_role == GP_ROLE_EXECUTE || - (Gp_role == GP_ROLE_DISPATCH && *guc_unlocked)) + (Gp_role == GP_ROLE_DISPATCH && guc_is_unlocked)) { - *guc_unlocked = false; + guc_is_unlocked = false; if (source != PGC_S_DATABASE && source != PGC_S_DEFAULT && @@ -61,7 +61,7 @@ check_tracked(bool *newval, void **extra, GucSource source) { bool manual = false; - if (check_guc(&guc_is_unlocked, source, &manual)) + if (check_guc(source, &manual)) return true; if (manual) @@ -77,7 +77,7 @@ check_get_full_snapshot_on_recovery(bool *newval, void **extra, GucSource source { bool manual = false; - if (check_guc(&guc_is_unlocked, source, &manual)) + if (check_guc(source, &manual)) return true; if (manual) @@ -93,7 +93,7 @@ check_relkinds(char **newval, void **extra, GucSource source) { bool manual = false; - if (check_guc(&guc_is_unlocked, source, &manual)) + if (check_guc(source, &manual)) return true; if (manual) @@ -109,7 +109,7 @@ check_schemas(char **newval, void **extra, GucSource source) { bool manual = false; - if (check_guc(&guc_is_unlocked, source, &manual)) + if (check_guc(source, &manual)) return true; if (manual) @@ -125,7 +125,7 @@ check_relstorages(char **newval, void **extra, GucSource source) { bool manual = false; - if (check_guc(&guc_is_unlocked, source, &manual)) + if (check_guc(source, &manual)) return true; if (manual) diff --git a/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_worker.c b/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_worker.c index f3a5926ec118..bb28ae85e337 100644 --- a/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_worker.c +++ b/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_worker.c @@ -158,11 +158,11 @@ worker_tracking_status_check() if (list_length(tracked_dbs) > 0) track_dbs(tracked_dbs); - CommitTransactionCommand(); - if (tracked_dbs) list_free_deep(tracked_dbs); + CommitTransactionCommand(); + pg_atomic_test_set_flag(&tf_shared_state->tracking_is_initialized); } @@ -258,9 +258,7 @@ arenadata_toolkit_main(Datum main_arg) void arenadata_toolkit_worker_register() { - BackgroundWorker worker; - - memset(&worker, 0, sizeof(BackgroundWorker)); + BackgroundWorker worker = {0}; worker.bgw_flags = BGWORKER_SHMEM_ACCESS | BGWORKER_BACKEND_DATABASE_CONNECTION; worker.bgw_start_time = BgWorkerStart_RecoveryFinished; diff --git a/gpcontrib/arenadata_toolkit/src/bloom.c b/gpcontrib/arenadata_toolkit/src/bloom.c index 38c2f8ea9cc0..26062395b423 100644 --- a/gpcontrib/arenadata_toolkit/src/bloom.c +++ b/gpcontrib/arenadata_toolkit/src/bloom.c @@ -81,7 +81,7 @@ tracking_hashes(Oid node, uint32 bloom_size, uint32 *out_hashes) * Returns true if element might be in set, false if definitely not in set. */ bool -bloom_isset(bloom_t * bloom, Oid relnode) +bloom_isset(bloom_t *bloom, Oid relnode) { uint32 hashes[MAX_BLOOM_HASH_FUNCS]; @@ -110,7 +110,7 @@ bloom_isset(bloom_t * bloom, Oid relnode) * relnode - relation file node OID to insert */ void -bloom_set_bits(bloom_t * bloom, Oid relnode) +bloom_set_bits(bloom_t *bloom, Oid relnode) { uint32 hashes[MAX_BLOOM_HASH_FUNCS]; @@ -122,7 +122,7 @@ bloom_set_bits(bloom_t * bloom, Oid relnode) } void -bloom_init(const uint32 bloom_size, bloom_t * bloom) +bloom_init(const uint32 bloom_size, bloom_t *bloom) { bloom->size = bloom_size; bloom->current_bloom = bloom->map; @@ -155,21 +155,21 @@ init_bloom_invariants(void) } void -bloom_set_all(bloom_t * bloom) +bloom_set_all(bloom_t *bloom) { memset(bloom->current_bloom, 0xFF, bloom->size); bloom->is_set_all = 1; } void -bloom_clear(bloom_t * bloom) +bloom_clear(bloom_t *bloom) { memset(bloom->current_bloom, 0, bloom->size); bloom->is_set_all = 0; } void -bloom_merge(bloom_t * dst, bloom_t * src) +bloom_merge(bloom_t *dst, bloom_t *src) { if (src->is_set_all) { @@ -183,7 +183,7 @@ bloom_merge(bloom_t * dst, bloom_t * src) } void -bloom_copy(bloom_t * dest, bloom_t * src) +bloom_copy(bloom_t *dest, bloom_t *src) { dest->size = src->size; memcpy(dest->current_bloom, src->current_bloom, src->size); @@ -191,7 +191,7 @@ bloom_copy(bloom_t * dest, bloom_t * src) } void -bloom_switch_current(bloom_t * bloom) +bloom_switch_current(bloom_t *bloom) { uint8 *map_base = bloom->map; uint8 *map_off = bloom->map + bloom->size; @@ -201,7 +201,7 @@ bloom_switch_current(bloom_t * bloom) } uint8 * -bloom_get_other(bloom_t * bloom) +bloom_get_other(bloom_t *bloom) { uint8 *map_base = bloom->map; uint8 *map_off = bloom->map + bloom->size; @@ -210,7 +210,7 @@ bloom_get_other(bloom_t * bloom) } void -bloom_merge_internal(bloom_t * bloom) +bloom_merge_internal(bloom_t *bloom) { if (bloom->is_set_all) return; diff --git a/gpcontrib/arenadata_toolkit/src/bloom_set.c b/gpcontrib/arenadata_toolkit/src/bloom_set.c index 6b60ab2a9bfd..b48407e6bea9 100644 --- a/gpcontrib/arenadata_toolkit/src/bloom_set.c +++ b/gpcontrib/arenadata_toolkit/src/bloom_set.c @@ -16,7 +16,7 @@ bloom_entry_size(uint32 size) } static inline void * -bloom_entry_get(bloom_set_t * set, int idx) +bloom_entry_get(bloom_set_t *set, int idx) { return (void *) ((uint8 *) set->bloom_entries + idx * bloom_entry_size(set->bloom_size)); } @@ -37,12 +37,12 @@ bloom_set_check_state(void) } static void -bloom_entry_init(const uint32_t bloom_size, bloom_entry_t * bloom_entry) +bloom_entry_init(const uint32_t size, bloom_entry_t *bloom_entry) { bloom_entry->dbid = InvalidOid; bloom_entry->master_version = InvalidVersion; bloom_entry->work_version = InvalidVersion; - bloom_init(bloom_size, &bloom_entry->bloom); + bloom_init(size, &bloom_entry->bloom); } @@ -186,28 +186,9 @@ bloom_set_set(Oid dbid, Oid relNode) } -/* Find bloom by dbid, copy all bytes to new filter */ -bool -bloom_set_move(Oid dbid, bloom_t * dest) -{ - bloom_op_ctx_t ctx = bloom_set_get_entry(dbid, LW_SHARED, LW_EXCLUSIVE); - - if (ctx.entry) - { - bloom_copy(dest, &ctx.entry->bloom); - bloom_clear(&ctx.entry->bloom); - bloom_set_release(&ctx); - return true; - } - - bloom_set_release(&ctx); - - return false; -} - /* Find bloom by dbid, merge bytes from another bloom to it */ bool -bloom_set_merge(Oid dbid, bloom_t * from) +bloom_set_merge(Oid dbid, bloom_t *from) { if (!from) return false; @@ -256,7 +237,7 @@ bloom_set_get_entry(Oid dbid, LWLockMode s_mode, LWLockMode e_mode) return ctx; } void -bloom_set_release(bloom_op_ctx_t * ctx) +bloom_set_release(bloom_op_ctx_t *ctx) { if (ctx->entry_lock) LWLockRelease(ctx->entry_lock); diff --git a/gpcontrib/arenadata_toolkit/src/drops_track.c b/gpcontrib/arenadata_toolkit/src/drops_track.c index 5e75116ce80d..f1106e798f63 100644 --- a/gpcontrib/arenadata_toolkit/src/drops_track.c +++ b/gpcontrib/arenadata_toolkit/src/drops_track.c @@ -42,7 +42,7 @@ static drops_track_t *drops_track; LWLock *drops_track_lock; static inline drops_track_node_t * -track_node_get(drops_track_t * track, int i) +track_node_get(drops_track_t *track, int i) { return (drops_track_node_t *) (track->nodes + i * sizeof(drops_track_node_t)); } diff --git a/gpcontrib/arenadata_toolkit/src/include/arenadata_toolkit_guc.h b/gpcontrib/arenadata_toolkit/src/include/arenadata_toolkit_guc.h index 8edb098c0065..e328ec4bcfa0 100644 --- a/gpcontrib/arenadata_toolkit/src/include/arenadata_toolkit_guc.h +++ b/gpcontrib/arenadata_toolkit/src/include/arenadata_toolkit_guc.h @@ -21,7 +21,7 @@ #define MAX_BLOOM_SIZE_BYTES 128000000 #define MAX_DB_TRACK_COUNT 1000 #define MAX_DROPS_COUNT 1000000 -#define MAX_NAPTIME_SEC OID_MAX & 0x7FFFFFFF +#define MAX_NAPTIME_SEC (OID_MAX & 0x7FFFFFFF) extern int bloom_size; extern int db_track_count; diff --git a/gpcontrib/arenadata_toolkit/src/include/bloom.h b/gpcontrib/arenadata_toolkit/src/include/bloom.h index 986e2c9f37ad..2ac4ad7ef821 100644 --- a/gpcontrib/arenadata_toolkit/src/include/bloom.h +++ b/gpcontrib/arenadata_toolkit/src/include/bloom.h @@ -13,7 +13,7 @@ typedef struct uint8 *current_bloom; uint32 size; /* equal to bloom_size, half a map size */ uint8 is_set_all; /* indicates all bits are set */ - uint8 map[FLEXIBLE_ARRAY_MEMBER]; /* core bit array */ ; + uint8 map[FLEXIBLE_ARRAY_MEMBER]; /* core bit array */ } bloom_t; static inline Size @@ -25,16 +25,16 @@ full_bloom_size(uint32 size) extern uint64 bloom_hash_seed; extern int bloom_hash_num; -void bloom_init(const uint32 bloom_size, bloom_t * bloom); +void bloom_init(const uint32 bloom_size, bloom_t *bloom); void init_bloom_invariants(void); -bool bloom_isset(bloom_t * bloom, Oid relnode); -void bloom_set_bits(bloom_t * bloom, Oid relnode); -void bloom_set_all(bloom_t * bloom); -void bloom_clear(bloom_t * bloom); -void bloom_merge(bloom_t * dst, bloom_t * src); -void bloom_copy(bloom_t * src, bloom_t * dest); -void bloom_switch_current(bloom_t * bloom); -uint8 *bloom_get_other(bloom_t * bloom); -void bloom_merge_internal(bloom_t * bloom); +bool bloom_isset(bloom_t *bloom, Oid relnode); +void bloom_set_bits(bloom_t *bloom, Oid relnode); +void bloom_set_all(bloom_t *bloom); +void bloom_clear(bloom_t *bloom); +void bloom_merge(bloom_t *dst, bloom_t *src); +void bloom_copy(bloom_t *dst, bloom_t *src); +void bloom_switch_current(bloom_t *bloom); +uint8 *bloom_get_other(bloom_t *bloom); +void bloom_merge_internal(bloom_t *bloom); #endif /* BLOOM_H */ diff --git a/gpcontrib/arenadata_toolkit/src/include/bloom_set.h b/gpcontrib/arenadata_toolkit/src/include/bloom_set.h index f0b6f81c3d39..35345e63e3aa 100644 --- a/gpcontrib/arenadata_toolkit/src/include/bloom_set.h +++ b/gpcontrib/arenadata_toolkit/src/include/bloom_set.h @@ -57,12 +57,11 @@ void bloom_set_init(const uint32 bloom_count, const uint32 bloom_size); bool bloom_set_bind(Oid dbid); void bloom_set_unbind(Oid dbid); void bloom_set_set(Oid dbid, Oid relNode); -bool bloom_set_move(Oid dbid, bloom_t * dest); -bool bloom_set_merge(Oid dbid, bloom_t * from); +bool bloom_set_merge(Oid dbid, bloom_t *from); bool bloom_set_trigger_bits(Oid dbid, bool on); bool bloom_set_is_all_bits_triggered(Oid dbid); bloom_op_ctx_t bloom_set_get_entry(Oid dbid, LWLockMode s_mode, LWLockMode e_mode); -void bloom_set_release(bloom_op_ctx_t * ctx); +void bloom_set_release(bloom_op_ctx_t *ctx); LWLock *LWLockAcquireEntry(Oid dbid, LWLockMode mode); #endif /* BLOOM_SET_H */ diff --git a/gpcontrib/arenadata_toolkit/src/track_files.c b/gpcontrib/arenadata_toolkit/src/track_files.c index 2c687b65d318..525909aaf6ef 100644 --- a/gpcontrib/arenadata_toolkit/src/track_files.c +++ b/gpcontrib/arenadata_toolkit/src/track_files.c @@ -463,10 +463,9 @@ tracking_get_track(PG_FUNCTION_ARGS) } /* - * Let's retrieve tracking information only once for the transaction. + * Let's retrieve tracking information. */ - if (tf_get_global_state.schema_oids == NIL) - get_filters_from_guc(); + get_filters_from_guc(); if (tf_get_global_state.relstorages == 0 || tf_get_global_state.relkinds == 0 || From 6dfaeca1cf61e91e6df035706aa1d350836514f0 Mon Sep 17 00:00:00 2001 From: Alexander Kondakov Date: Tue, 19 Nov 2024 19:54:01 +0300 Subject: [PATCH 59/69] readme --- gpcontrib/arenadata_toolkit/README.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/gpcontrib/arenadata_toolkit/README.md b/gpcontrib/arenadata_toolkit/README.md index 1f7b8c0ccfca..3067869a8ea8 100644 --- a/gpcontrib/arenadata_toolkit/README.md +++ b/gpcontrib/arenadata_toolkit/README.md @@ -11,11 +11,15 @@ Since extension uses shared memory, configuration on all GPDB segments must be c gpconfig -c shared_preload_libraries -v 'arenadata_toolkit' ``` Extension may track restricted number of databases. The maximum number of them is defined by GUC +|||| +--|--|-- | arenadata_toolkit.tracking_db_track_count | Need restart |Possible values [1, 1000]; Default 5| -|--|--|--| + For each tracked database there allocated a Bloom filter in shared memory. The size of each filter is controlled via +|||| +--|--|-- | arenadata_toolkit.tracking_bloom_size | Need restart |Possible values (bytes) [64, 128000000] Default 1048576| -|--|--|--| + The specific database can be bound to unoccupied filter with function ```shell script psql -d my_db -c select arenadata_toolkit.tracking_register_db() From 05a0a248968a318aeb9f057ed363adac39572ac7 Mon Sep 17 00:00:00 2001 From: Alexander Kondakov Date: Wed, 20 Nov 2024 07:47:29 +0300 Subject: [PATCH 60/69] int64 --- gpcontrib/arenadata_toolkit/src/track_files.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gpcontrib/arenadata_toolkit/src/track_files.c b/gpcontrib/arenadata_toolkit/src/track_files.c index 525909aaf6ef..4ed8b2162beb 100644 --- a/gpcontrib/arenadata_toolkit/src/track_files.c +++ b/gpcontrib/arenadata_toolkit/src/track_files.c @@ -1471,7 +1471,7 @@ tracking_track_version(PG_FUNCTION_ARGS) errmsg("Track for database %u is being acquired in other transaction", MyDatabaseId))); } - current_version = (int64) ctx.entry->master_version; + current_version = ctx.entry->master_version; bloom_set_release(&ctx); } From bfd00a82a48eeb4dd9732731bbdc4f16274b1311 Mon Sep 17 00:00:00 2001 From: Alexander Kondakov Date: Wed, 20 Nov 2024 09:34:32 +0300 Subject: [PATCH 61/69] isolation2 tests in main installcheck --- gpcontrib/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/gpcontrib/Makefile b/gpcontrib/Makefile index 1ef54b62b414..17634644ecf3 100644 --- a/gpcontrib/Makefile +++ b/gpcontrib/Makefile @@ -105,4 +105,5 @@ installcheck: $(MAKE) -C gp_subtransaction_overflow installcheck $(MAKE) -C gp_check_functions installcheck $(MAKE) -C arenadata_toolkit installcheck + $(MAKE) -C arenadata_toolkit/isolation2 installcheck From 1d7a32564770990b328c7fa1bef5f36ba4e09b4c Mon Sep 17 00:00:00 2001 From: Alexander Kondakov Date: Wed, 20 Nov 2024 13:08:50 +0300 Subject: [PATCH 62/69] fix typos --- .../expected/arenadata_toolkit_guc.out | 42 ++++++++++--------- .../expected/arenadata_toolkit_tracking.out | 3 -- .../sql/arenadata_toolkit_guc.sql | 4 +- .../sql/arenadata_toolkit_tracking.sql | 2 - 4 files changed, 24 insertions(+), 27 deletions(-) diff --git a/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_guc.out b/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_guc.out index 03920aefd58b..f7dbef215dbc 100644 --- a/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_guc.out +++ b/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_guc.out @@ -140,8 +140,10 @@ setdatabase=oid WHERE datname=current_database(); (1 row) -- Prohibit manual GUC setting. -SET arenadata_toolkit.tracking_set_snapshot_on_recovery = false; -ALTER DATABASE tracking1 SET arenadata_toolkit.tracking_set_snapshot_on_recovery = false; +SET arenadata_toolkit.tracking_snapshot_on_recovery = false; +ERROR: cannot change tracking status outside the tracking_set_snapshot_on_recovery function +ALTER DATABASE tracking1 SET arenadata_toolkit.tracking_snapshot_on_recovery = false; +ERROR: cannot change tracking status outside the tracking_set_snapshot_on_recovery function -- Test arenadata_toolkit.tracking_relstorages GUC SELECT arenadata_toolkit.tracking_set_relstorages('f,a,x'); tracking_set_relstorages @@ -151,9 +153,9 @@ SELECT arenadata_toolkit.tracking_set_relstorages('f,a,x'); SELECT datname, setconfig FROM pg_db_role_setting JOIN pg_database ON setdatabase=oid WHERE datname=current_database(); - datname | setconfig ------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - tracking1 | {arenadata_toolkit.tracking_is_db_tracked=f,arenadata_toolkit.tracking_snapshot_on_recovery=t,arenadata_toolkit.tracking_set_snapshot_on_recovery=false,"arenadata_toolkit.tracking_relstorages=f,a,x"} + datname | setconfig +-----------+----------------------------------------------------------------------------------------------------------------------------------------------- + tracking1 | {arenadata_toolkit.tracking_is_db_tracked=f,arenadata_toolkit.tracking_snapshot_on_recovery=t,"arenadata_toolkit.tracking_relstorages=f,a,x"} (1 row) SELECT arenadata_toolkit.tracking_set_relstorages('v,v,v,,,'); @@ -164,9 +166,9 @@ SELECT arenadata_toolkit.tracking_set_relstorages('v,v,v,,,'); SELECT datname, setconfig FROM pg_db_role_setting JOIN pg_database ON setdatabase=oid WHERE datname=current_database(); - datname | setconfig ------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - tracking1 | {arenadata_toolkit.tracking_is_db_tracked=f,arenadata_toolkit.tracking_snapshot_on_recovery=t,arenadata_toolkit.tracking_set_snapshot_on_recovery=false,arenadata_toolkit.tracking_relstorages=v} + datname | setconfig +-----------+----------------------------------------------------------------------------------------------------------------------------------------- + tracking1 | {arenadata_toolkit.tracking_is_db_tracked=f,arenadata_toolkit.tracking_snapshot_on_recovery=t,arenadata_toolkit.tracking_relstorages=v} (1 row) SELECT arenadata_toolkit.tracking_set_relstorages('d,b,c'); @@ -186,9 +188,9 @@ SELECT arenadata_toolkit.tracking_set_relkinds('r,t,o,S'); SELECT datname, setconfig FROM pg_db_role_setting JOIN pg_database ON setdatabase=oid WHERE datname=current_database(); - datname | setconfig ------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - tracking1 | {arenadata_toolkit.tracking_is_db_tracked=f,arenadata_toolkit.tracking_snapshot_on_recovery=t,arenadata_toolkit.tracking_set_snapshot_on_recovery=false,arenadata_toolkit.tracking_relstorages=v,"arenadata_toolkit.tracking_relkinds=r,t,o,S"} + datname | setconfig +-----------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + tracking1 | {arenadata_toolkit.tracking_is_db_tracked=f,arenadata_toolkit.tracking_snapshot_on_recovery=t,arenadata_toolkit.tracking_relstorages=v,"arenadata_toolkit.tracking_relkinds=r,t,o,S"} (1 row) SELECT arenadata_toolkit.tracking_set_relkinds('m,M,o,,,'); @@ -199,9 +201,9 @@ SELECT arenadata_toolkit.tracking_set_relkinds('m,M,o,,,'); SELECT datname, setconfig FROM pg_db_role_setting JOIN pg_database ON setdatabase=oid WHERE datname=current_database(); - datname | setconfig ------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - tracking1 | {arenadata_toolkit.tracking_is_db_tracked=f,arenadata_toolkit.tracking_snapshot_on_recovery=t,arenadata_toolkit.tracking_set_snapshot_on_recovery=false,arenadata_toolkit.tracking_relstorages=v,"arenadata_toolkit.tracking_relkinds=m,M,o"} + datname | setconfig +-----------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + tracking1 | {arenadata_toolkit.tracking_is_db_tracked=f,arenadata_toolkit.tracking_snapshot_on_recovery=t,arenadata_toolkit.tracking_relstorages=v,"arenadata_toolkit.tracking_relkinds=m,M,o"} (1 row) SELECT arenadata_toolkit.tracking_set_relkinds('d,b,c'); @@ -227,9 +229,9 @@ SELECT arenadata_toolkit.tracking_register_schema('public'); SELECT datname, setconfig FROM pg_db_role_setting JOIN pg_database ON setdatabase=oid WHERE datname=current_database(); - datname | setconfig ------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - tracking1 | {arenadata_toolkit.tracking_is_db_tracked=f,arenadata_toolkit.tracking_snapshot_on_recovery=t,arenadata_toolkit.tracking_set_snapshot_on_recovery=false,arenadata_toolkit.tracking_relstorages=v,"arenadata_toolkit.tracking_relkinds=m,M,o","arenadata_toolkit.tracking_schemas=arenadata_toolkit,public"} + datname | setconfig +-----------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + tracking1 | {arenadata_toolkit.tracking_is_db_tracked=f,arenadata_toolkit.tracking_snapshot_on_recovery=t,arenadata_toolkit.tracking_relstorages=v,"arenadata_toolkit.tracking_relkinds=m,M,o","arenadata_toolkit.tracking_schemas=arenadata_toolkit,public"} (1 row) SELECT arenadata_toolkit.tracking_unregister_schema('public'); @@ -240,9 +242,9 @@ SELECT arenadata_toolkit.tracking_unregister_schema('public'); SELECT datname, setconfig FROM pg_db_role_setting JOIN pg_database ON setdatabase=oid WHERE datname=current_database(); - datname | setconfig ------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - tracking1 | {arenadata_toolkit.tracking_is_db_tracked=f,arenadata_toolkit.tracking_snapshot_on_recovery=t,arenadata_toolkit.tracking_set_snapshot_on_recovery=false,arenadata_toolkit.tracking_relstorages=v,"arenadata_toolkit.tracking_relkinds=m,M,o",arenadata_toolkit.tracking_schemas=arenadata_toolkit} + datname | setconfig +-----------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + tracking1 | {arenadata_toolkit.tracking_is_db_tracked=f,arenadata_toolkit.tracking_snapshot_on_recovery=t,arenadata_toolkit.tracking_relstorages=v,"arenadata_toolkit.tracking_relkinds=m,M,o",arenadata_toolkit.tracking_schemas=arenadata_toolkit} (1 row) SELECT arenadata_toolkit.tracking_register_schema('pg_pg'); diff --git a/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_tracking.out b/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_tracking.out index be0cf3f1abfa..142ea1004eb5 100644 --- a/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_tracking.out +++ b/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_tracking.out @@ -190,9 +190,6 @@ FROM arenadata_toolkit.tables_track; (7 rows) -- 10. Test relkind filtering. -CREATE TABLE arenadata_toolkit.tracking_t1 (i INT) -WITH (appendonly=true, orientation=column) DISTRIBUTED BY (i); -ERROR: relation "tracking_t1" already exists INSERT INTO arenadata_toolkit.tracking_t1 SELECT generate_series(1,100000); CREATE INDEX ON arenadata_toolkit.tracking_t1(i); -- Want to see index and block dir relation. diff --git a/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_guc.sql b/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_guc.sql index ddd67eb7f813..164aa717807d 100644 --- a/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_guc.sql +++ b/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_guc.sql @@ -100,9 +100,9 @@ SELECT datname, setconfig FROM pg_db_role_setting JOIN pg_database ON setdatabase=oid WHERE datname=current_database(); -- Prohibit manual GUC setting. -SET arenadata_toolkit.tracking_set_snapshot_on_recovery = false; +SET arenadata_toolkit.tracking_snapshot_on_recovery = false; -ALTER DATABASE tracking1 SET arenadata_toolkit.tracking_set_snapshot_on_recovery = false; +ALTER DATABASE tracking1 SET arenadata_toolkit.tracking_snapshot_on_recovery = false; -- Test arenadata_toolkit.tracking_relstorages GUC SELECT arenadata_toolkit.tracking_set_relstorages('f,a,x'); diff --git a/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_tracking.sql b/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_tracking.sql index 3324ff618747..6748a192f4c0 100644 --- a/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_tracking.sql +++ b/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_tracking.sql @@ -102,8 +102,6 @@ SELECT relname, size, state, segid, relkind, relstorage FROM arenadata_toolkit.tables_track; -- 10. Test relkind filtering. -CREATE TABLE arenadata_toolkit.tracking_t1 (i INT) -WITH (appendonly=true, orientation=column) DISTRIBUTED BY (i); INSERT INTO arenadata_toolkit.tracking_t1 SELECT generate_series(1,100000); CREATE INDEX ON arenadata_toolkit.tracking_t1(i); From 881c81e69d5f5859bbdc1c57063982a45458f599 Mon Sep 17 00:00:00 2001 From: Alexander Kondakov Date: Thu, 21 Nov 2024 09:14:48 +0300 Subject: [PATCH 63/69] Minor changes --- .../src/arenadata_toolkit_guc.c | 49 +++---------------- gpcontrib/arenadata_toolkit/src/track_files.c | 14 +++--- 2 files changed, 14 insertions(+), 49 deletions(-) diff --git a/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_guc.c b/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_guc.c index afc22701c738..4797c3eb7731 100644 --- a/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_guc.c +++ b/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_guc.c @@ -34,7 +34,7 @@ tf_guc_unlock(void) * This is not called for RESET, so RESET is not guarded */ static bool -check_guc(GucSource source, bool *manual) +check_guc(GucSource source, const char *handle) { if (IsInitProcessingMode() || Gp_role == GP_ROLE_EXECUTE || (Gp_role == GP_ROLE_DISPATCH && guc_is_unlocked)) @@ -49,7 +49,7 @@ check_guc(GucSource source, bool *manual) return true; } - *manual = true; + GUC_check_errmsg("cannot change tracking status outside the %s function", handle); return false; } @@ -59,14 +59,7 @@ check_guc(GucSource source, bool *manual) static bool check_tracked(bool *newval, void **extra, GucSource source) { - bool manual = false; - - if (check_guc(source, &manual)) - return true; - - if (manual) - GUC_check_errmsg("cannot change tracking status outside the tracking_register_db function"); - return false; + return check_guc(source, "tracking_register_db"); } /* @@ -75,14 +68,7 @@ check_tracked(bool *newval, void **extra, GucSource source) static bool check_get_full_snapshot_on_recovery(bool *newval, void **extra, GucSource source) { - bool manual = false; - - if (check_guc(source, &manual)) - return true; - - if (manual) - GUC_check_errmsg("cannot change tracking status outside the tracking_set_snapshot_on_recovery function"); - return false; + return check_guc(source, "tracking_set_snapshot_on_recovery"); } /* @@ -91,14 +77,7 @@ check_get_full_snapshot_on_recovery(bool *newval, void **extra, GucSource source static bool check_relkinds(char **newval, void **extra, GucSource source) { - bool manual = false; - - if (check_guc(source, &manual)) - return true; - - if (manual) - GUC_check_errmsg("cannot change tracking status outside the tracking_register_relkinds function"); - return false; + return check_guc(source, "tracking_register_relkinds"); } /* @@ -107,14 +86,7 @@ check_relkinds(char **newval, void **extra, GucSource source) static bool check_schemas(char **newval, void **extra, GucSource source) { - bool manual = false; - - if (check_guc(source, &manual)) - return true; - - if (manual) - GUC_check_errmsg("cannot change tracking status outside the tracking_register_schema function"); - return false; + return check_guc(source, "tracking_register_schema"); } /* @@ -123,14 +95,7 @@ check_schemas(char **newval, void **extra, GucSource source) static bool check_relstorages(char **newval, void **extra, GucSource source) { - bool manual = false; - - if (check_guc(source, &manual)) - return true; - - if (manual) - GUC_check_errmsg("cannot change tracking status outside the tracking_register_relstorages function"); - return false; + return check_guc(source, "tracking_register_relstorages"); } void diff --git a/gpcontrib/arenadata_toolkit/src/track_files.c b/gpcontrib/arenadata_toolkit/src/track_files.c index 4ed8b2162beb..06648367a112 100644 --- a/gpcontrib/arenadata_toolkit/src/track_files.c +++ b/gpcontrib/arenadata_toolkit/src/track_files.c @@ -93,7 +93,7 @@ typedef struct static tf_get_global_state_t tf_get_global_state = {0}; static bool callbackRegistered = false; -static uint32 current_version = InvalidVersion; +static uint32 CurrentVersion = InvalidVersion; static bool isExecutorExplainMode = false; ProcessUtility_hook_type next_ProcessUtility_hook = NULL; @@ -146,7 +146,7 @@ xact_end_version_callback(XactEvent event, void *arg) bloom_set_release(&ctx); callbackRegistered = false; - current_version = InvalidVersion; + CurrentVersion = InvalidVersion; isExecutorExplainMode = false; } @@ -1404,7 +1404,7 @@ explain_detector_ProcessUtility(Node *parsetree, static void track_ExecutorEnd(QueryDesc *queryDesc) { - current_version = ControlVersion; + CurrentVersion = ControlVersion; if (next_ExecutorEnd_hook) next_ExecutorEnd_hook(queryDesc); @@ -1424,8 +1424,8 @@ track_setup_executor_hooks(void) void track_uninstall_executor_hooks(void) { - ProcessUtility_hook = next_ProcessUtility_hook == standard_ProcessUtility ? NULL : next_ProcessUtility_hook; - ExecutorEnd_hook = next_ExecutorEnd_hook == standard_ExecutorEnd ? NULL : next_ExecutorEnd_hook; + ProcessUtility_hook = (next_ProcessUtility_hook == standard_ProcessUtility) ? NULL : next_ProcessUtility_hook; + ExecutorEnd_hook = (next_ExecutorEnd_hook == standard_ExecutorEnd) ? NULL : next_ExecutorEnd_hook; } /* @@ -1471,9 +1471,9 @@ tracking_track_version(PG_FUNCTION_ARGS) errmsg("Track for database %u is being acquired in other transaction", MyDatabaseId))); } - current_version = ctx.entry->master_version; + CurrentVersion = ctx.entry->master_version; bloom_set_release(&ctx); } - PG_RETURN_INT64((int64) current_version); + PG_RETURN_INT64((int64) CurrentVersion); } From 510d87ede600533ad8e9634eb0694fe3ff6616c9 Mon Sep 17 00:00:00 2001 From: Alexander Kondakov Date: Mon, 25 Nov 2024 17:13:07 +0300 Subject: [PATCH 64/69] Fix size calculation of AO tabe size. --- gpcontrib/arenadata_toolkit/src/dbsize.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/gpcontrib/arenadata_toolkit/src/dbsize.c b/gpcontrib/arenadata_toolkit/src/dbsize.c index 0ad1b3f6d96b..58930d7732e3 100644 --- a/gpcontrib/arenadata_toolkit/src/dbsize.c +++ b/gpcontrib/arenadata_toolkit/src/dbsize.c @@ -514,6 +514,9 @@ calculate_ao_storage_perSegFile_soft(const int segno, void *ctx) if (stat(segPath, &fst) < 0) { + if (errno == ENOENT) + return false; + ereport(DEBUG1, (errcode_for_file_access(), errmsg("[arenadata_toolkit] could not access file %s: %m", segPath))); } From f2151130e94939ac98d71005acc6a957bcaf4986 Mon Sep 17 00:00:00 2001 From: Alexander Kondakov Date: Mon, 25 Nov 2024 17:16:03 +0300 Subject: [PATCH 65/69] Return false on any error --- gpcontrib/arenadata_toolkit/src/dbsize.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/gpcontrib/arenadata_toolkit/src/dbsize.c b/gpcontrib/arenadata_toolkit/src/dbsize.c index 58930d7732e3..d2efd367962c 100644 --- a/gpcontrib/arenadata_toolkit/src/dbsize.c +++ b/gpcontrib/arenadata_toolkit/src/dbsize.c @@ -514,11 +514,9 @@ calculate_ao_storage_perSegFile_soft(const int segno, void *ctx) if (stat(segPath, &fst) < 0) { - if (errno == ENOENT) - return false; - ereport(DEBUG1, (errcode_for_file_access(), errmsg("[arenadata_toolkit] could not access file %s: %m", segPath))); + return false; } else calcCtx->total_size += fst.st_size; From 4eb6e14eca0da28f0f628f3f823282ccfc208a37 Mon Sep 17 00:00:00 2001 From: Alexander Kondakov Date: Fri, 29 Nov 2024 04:19:19 +0300 Subject: [PATCH 66/69] Change default parameters handling --- .../expected/arenadata_toolkit_guc.out | 91 ++++++++++++---- .../expected/arenadata_toolkit_tracking.out | 101 +++++++++++++++--- .../sql/arenadata_toolkit_guc.sql | 24 +++++ .../sql/arenadata_toolkit_tracking.sql | 28 ++++- gpcontrib/arenadata_toolkit/src/track_files.c | 73 ++++++------- 5 files changed, 239 insertions(+), 78 deletions(-) diff --git a/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_guc.out b/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_guc.out index f7dbef215dbc..e8f64f6443c3 100644 --- a/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_guc.out +++ b/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_guc.out @@ -174,11 +174,26 @@ setdatabase=oid WHERE datname=current_database(); SELECT arenadata_toolkit.tracking_set_relstorages('d,b,c'); ERROR: Invalid relstorage type: d HINT: Valid relstorages are: 'h', 'x', 'a', 'v', 'c', 'f' +SELECT arenadata_toolkit.tracking_set_relstorages(''); + tracking_set_relstorages +-------------------------- + t +(1 row) + +SELECT datname, setconfig FROM pg_db_role_setting JOIN pg_database ON +setdatabase=oid WHERE datname=current_database(); + datname | setconfig +-----------+---------------------------------------------------------------------------------------------------------------------------------------- + tracking1 | {arenadata_toolkit.tracking_is_db_tracked=f,arenadata_toolkit.tracking_snapshot_on_recovery=t,arenadata_toolkit.tracking_relstorages=} +(1 row) + -- Prohibit manual GUC setting. SET arenadata_toolkit.tracking_relstorages = "h, a, x"; ERROR: cannot change tracking status outside the tracking_register_relstorages function ALTER DATABASE tracking1 SET arenadata_toolkit.tracking_relstorages = "h, a, x"; ERROR: cannot change tracking status outside the tracking_register_relstorages function +-- Resetting case is allowed. +ALTER DATABASE tracking1 RESET arenadata_toolkit.tracking_relstorages; -- Test arenadata_toolkit.tracking_relkinds GUC SELECT arenadata_toolkit.tracking_set_relkinds('r,t,o,S'); tracking_set_relkinds @@ -188,9 +203,9 @@ SELECT arenadata_toolkit.tracking_set_relkinds('r,t,o,S'); SELECT datname, setconfig FROM pg_db_role_setting JOIN pg_database ON setdatabase=oid WHERE datname=current_database(); - datname | setconfig ------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - tracking1 | {arenadata_toolkit.tracking_is_db_tracked=f,arenadata_toolkit.tracking_snapshot_on_recovery=t,arenadata_toolkit.tracking_relstorages=v,"arenadata_toolkit.tracking_relkinds=r,t,o,S"} + datname | setconfig +-----------+---------------------------------------------------------------------------------------------------------------------------------------------- + tracking1 | {arenadata_toolkit.tracking_is_db_tracked=f,arenadata_toolkit.tracking_snapshot_on_recovery=t,"arenadata_toolkit.tracking_relkinds=r,t,o,S"} (1 row) SELECT arenadata_toolkit.tracking_set_relkinds('m,M,o,,,'); @@ -201,20 +216,48 @@ SELECT arenadata_toolkit.tracking_set_relkinds('m,M,o,,,'); SELECT datname, setconfig FROM pg_db_role_setting JOIN pg_database ON setdatabase=oid WHERE datname=current_database(); - datname | setconfig ------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - tracking1 | {arenadata_toolkit.tracking_is_db_tracked=f,arenadata_toolkit.tracking_snapshot_on_recovery=t,arenadata_toolkit.tracking_relstorages=v,"arenadata_toolkit.tracking_relkinds=m,M,o"} + datname | setconfig +-----------+-------------------------------------------------------------------------------------------------------------------------------------------- + tracking1 | {arenadata_toolkit.tracking_is_db_tracked=f,arenadata_toolkit.tracking_snapshot_on_recovery=t,"arenadata_toolkit.tracking_relkinds=m,M,o"} (1 row) SELECT arenadata_toolkit.tracking_set_relkinds('d,b,c'); ERROR: Invalid relkind: d HINT: Valid relkinds are: 'r', 'i', 'S', 't', 'v', 'c', 'f', 'u', 'm', 'o', 'b', 'M' +SELECT arenadata_toolkit.tracking_set_relkinds(''); + tracking_set_relkinds +----------------------- + t +(1 row) + +SELECT datname, setconfig FROM pg_db_role_setting JOIN pg_database ON +setdatabase=oid WHERE datname=current_database(); + datname | setconfig +-----------+------------------------------------------------------------------------------------------------------------------------------------- + tracking1 | {arenadata_toolkit.tracking_is_db_tracked=f,arenadata_toolkit.tracking_snapshot_on_recovery=t,arenadata_toolkit.tracking_relkinds=} +(1 row) + -- Prohibit manual GUC setting. SET arenadata_toolkit.tracking_relkinds = "h, a, x"; ERROR: cannot change tracking status outside the tracking_register_relkinds function ALTER DATABASE tracking1 SET arenadata_toolkit.tracking_relkinds = "h, a, x"; ERROR: cannot change tracking status outside the tracking_register_relkinds function +-- Resetting case is allowed. +ALTER DATABASE tracking1 RESET arenadata_toolkit.tracking_relkinds; -- Test arenadata_toolkit.tracking_schemas GUC +SELECT arenadata_toolkit.tracking_unregister_schema('public'); + tracking_unregister_schema +---------------------------- + t +(1 row) + +SELECT datname, setconfig FROM pg_db_role_setting JOIN pg_database ON +setdatabase=oid WHERE datname=current_database(); + datname | setconfig +-----------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + tracking1 | {arenadata_toolkit.tracking_is_db_tracked=f,arenadata_toolkit.tracking_snapshot_on_recovery=t,"arenadata_toolkit.tracking_schemas=arenadata_toolkit,pg_catalog,pg_toast,pg_aoseg,information_schema"} +(1 row) + SELECT arenadata_toolkit.tracking_register_schema('arenadata_toolkit'); tracking_register_schema -------------------------- @@ -229,9 +272,9 @@ SELECT arenadata_toolkit.tracking_register_schema('public'); SELECT datname, setconfig FROM pg_db_role_setting JOIN pg_database ON setdatabase=oid WHERE datname=current_database(); - datname | setconfig ------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - tracking1 | {arenadata_toolkit.tracking_is_db_tracked=f,arenadata_toolkit.tracking_snapshot_on_recovery=t,arenadata_toolkit.tracking_relstorages=v,"arenadata_toolkit.tracking_relkinds=m,M,o","arenadata_toolkit.tracking_schemas=arenadata_toolkit,public"} + datname | setconfig +-----------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + tracking1 | {arenadata_toolkit.tracking_is_db_tracked=f,arenadata_toolkit.tracking_snapshot_on_recovery=t,"arenadata_toolkit.tracking_schemas=arenadata_toolkit,pg_catalog,pg_toast,pg_aoseg,information_schema,public"} (1 row) SELECT arenadata_toolkit.tracking_unregister_schema('public'); @@ -242,9 +285,9 @@ SELECT arenadata_toolkit.tracking_unregister_schema('public'); SELECT datname, setconfig FROM pg_db_role_setting JOIN pg_database ON setdatabase=oid WHERE datname=current_database(); - datname | setconfig ------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ - tracking1 | {arenadata_toolkit.tracking_is_db_tracked=f,arenadata_toolkit.tracking_snapshot_on_recovery=t,arenadata_toolkit.tracking_relstorages=v,"arenadata_toolkit.tracking_relkinds=m,M,o",arenadata_toolkit.tracking_schemas=arenadata_toolkit} + datname | setconfig +-----------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + tracking1 | {arenadata_toolkit.tracking_is_db_tracked=f,arenadata_toolkit.tracking_snapshot_on_recovery=t,"arenadata_toolkit.tracking_schemas=arenadata_toolkit,pg_catalog,pg_toast,pg_aoseg,information_schema"} (1 row) SELECT arenadata_toolkit.tracking_register_schema('pg_pg'); @@ -254,6 +297,8 @@ SET arenadata_toolkit.tracking_schemas = "pg_catalog, mychema"; ERROR: cannot change tracking status outside the tracking_register_schema function ALTER DATABASE tracking1 SET arenadata_toolkit.tracking_schemas = "pg_catalog, mychema"; ERROR: cannot change tracking status outside the tracking_register_schema function +-- Resetting case is allowed. +ALTER DATABASE tracking1 RESET arenadata_toolkit.tracking_schemas; -- Test GUCs are set in the caller's session. SELECT arenadata_toolkit.tracking_register_db(); tracking_register_db @@ -304,9 +349,9 @@ SHOW arenadata_toolkit.tracking_snapshot_on_recovery; (1 row) SHOW arenadata_toolkit.tracking_schemas; - arenadata_toolkit.tracking_schemas ------------------------------------- - arenadata_toolkit + arenadata_toolkit.tracking_schemas +------------------------------------------------------------------- + arenadata_toolkit,pg_catalog,pg_toast,pg_aoseg,information_schema (1 row) SELECT arenadata_toolkit.tracking_register_schema('arenadata_toolkit'); @@ -316,9 +361,9 @@ SELECT arenadata_toolkit.tracking_register_schema('arenadata_toolkit'); (1 row) SHOW arenadata_toolkit.tracking_schemas; - arenadata_toolkit.tracking_schemas ------------------------------------- - arenadata_toolkit + arenadata_toolkit.tracking_schemas +-------------------------------------------------------------------------- + public,arenadata_toolkit,pg_catalog,pg_toast,pg_aoseg,information_schema (1 row) SELECT arenadata_toolkit.tracking_unregister_schema('arenadata_toolkit'); @@ -328,15 +373,15 @@ SELECT arenadata_toolkit.tracking_unregister_schema('arenadata_toolkit'); (1 row) SHOW arenadata_toolkit.tracking_schemas; - arenadata_toolkit.tracking_schemas --------------------------------------------------------------------------- - public,arenadata_toolkit,pg_catalog,pg_toast,pg_aoseg,information_schema + arenadata_toolkit.tracking_schemas +-------------------------------------------------------- + public,pg_catalog,pg_toast,pg_aoseg,information_schema (1 row) SHOW arenadata_toolkit.tracking_relkinds; arenadata_toolkit.tracking_relkinds ------------------------------------- - m,M,o + (1 row) SELECT arenadata_toolkit.tracking_set_relkinds('r,t'); @@ -354,7 +399,7 @@ SHOW arenadata_toolkit.tracking_relkinds; SHOW arenadata_toolkit.tracking_relstorages; arenadata_toolkit.tracking_relstorages ---------------------------------------- - v + (1 row) SELECT arenadata_toolkit.tracking_set_relstorages('a'); diff --git a/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_tracking.out b/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_tracking.out index 142ea1004eb5..26f92f2fa60f 100644 --- a/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_tracking.out +++ b/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_tracking.out @@ -59,12 +59,37 @@ FROM segment_counts sc, pg_class_count pc; t (1 row) --- 4. Create table in specific schema and register that schema. +-- 4. Create table in one of default schemas. Then unregister all +-- default schemas except this one. CREATE TABLE arenadata_toolkit.tracking_t1 (i INT) WITH (appendonly=true, orientation=column) DISTRIBUTED BY (i); -SELECT arenadata_toolkit.tracking_register_schema('arenadata_toolkit'); - tracking_register_schema --------------------------- +SELECT arenadata_toolkit.tracking_unregister_schema('information_schema'); + tracking_unregister_schema +---------------------------- + t +(1 row) + +SELECT arenadata_toolkit.tracking_unregister_schema('pg_aoseg'); + tracking_unregister_schema +---------------------------- + t +(1 row) + +SELECT arenadata_toolkit.tracking_unregister_schema('pg_toast'); + tracking_unregister_schema +---------------------------- + t +(1 row) + +SELECT arenadata_toolkit.tracking_unregister_schema('pg_catalog'); + tracking_unregister_schema +---------------------------- + t +(1 row) + +SELECT arenadata_toolkit.tracking_unregister_schema('public'); + tracking_unregister_schema +---------------------------- t (1 row) @@ -74,19 +99,44 @@ FROM arenadata_toolkit.tables_track; relname | size | state | segid | relkind | relstorage -------------+------+-------+-------+---------+------------ tracking_t1 | 0 | a | -1 | r | c - tracking_t1 | 0 | a | 1 | r | c tracking_t1 | 0 | a | 2 | r | c tracking_t1 | 0 | a | 0 | r | c + tracking_t1 | 0 | a | 1 | r | c (4 rows) +SELECT arenadata_toolkit.tracking_unregister_schema('arenadata_toolkit'); + tracking_unregister_schema +---------------------------- + t +(1 row) + +SELECT arenadata_toolkit.tracking_trigger_initial_snapshot(); + tracking_trigger_initial_snapshot +----------------------------------- + t +(1 row) + +--Empty track is expected +SELECT relname, size, state, segid, relkind, relstorage +FROM arenadata_toolkit.tables_track; + relname | size | state | segid | relkind | relstorage +---------+------+-------+-------+---------+------------ +(0 rows) + +SELECT arenadata_toolkit.tracking_register_schema('arenadata_toolkit'); + tracking_register_schema +-------------------------- + t +(1 row) + -- 5. Test data extending event. Bloom should capture it. INSERT INTO arenadata_toolkit.tracking_t1 SELECT generate_series(1,100000); SELECT relname, size, state, segid, relkind, relstorage FROM arenadata_toolkit.tables_track; relname | size | state | segid | relkind | relstorage -------------+--------+-------+-------+---------+------------ - tracking_t1 | 134064 | a | 0 | r | c tracking_t1 | 133528 | a | 1 | r | c + tracking_t1 | 134064 | a | 0 | r | c tracking_t1 | 133064 | a | 2 | r | c (3 rows) @@ -104,14 +154,14 @@ FROM arenadata_toolkit.tables_track; | 0 | d | 2 | | | 0 | d | 2 | | | 0 | d | 2 | | - | 0 | d | 0 | | - | 0 | d | 0 | | - | 0 | d | 0 | | - | 0 | d | 0 | | | 0 | d | 1 | | | 0 | d | 1 | | | 0 | d | 1 | | | 0 | d | 1 | | + | 0 | d | 0 | | + | 0 | d | 0 | | + | 0 | d | 0 | | + | 0 | d | 0 | | (16 rows) -- 8. Test actions on commit and rollback @@ -125,8 +175,8 @@ FROM arenadata_toolkit.tables_track; relname | size | state | segid | relkind | relstorage -------------+--------+-------+-------+---------+------------ tracking_t1 | 0 | a | -1 | r | c - tracking_t1 | 133528 | a | 1 | r | c tracking_t1 | 134064 | a | 0 | r | c + tracking_t1 | 133528 | a | 1 | r | c tracking_t1 | 133064 | a | 2 | r | c (4 rows) @@ -158,9 +208,9 @@ SELECT relname, size, state, segid, relkind, relstorage FROM arenadata_toolkit.tables_track; relname | size | state | segid | relkind | relstorage -------------+--------+-------+-------+---------+------------ - tracking_t1 | 146096 | a | 2 | r | c tracking_t1 | 147576 | a | 0 | r | c tracking_t1 | 147112 | a | 1 | r | c + tracking_t1 | 146096 | a | 2 | r | c (3 rows) CREATE TABLE arenadata_toolkit.tracking_t2 (j BIGINT) DISTRIBUTED BY (j); @@ -211,15 +261,34 @@ FROM arenadata_toolkit.tables_track; ---------+-------+-------+---------+------------ 32768 | a | -1 | i | h 32768 | a | -1 | i | h - 65536 | a | 1 | i | h + 1638400 | a | 0 | i | h + 65536 | a | 0 | i | h 1638400 | a | 1 | i | h - 65536 | a | 2 | i | h + 65536 | a | 1 | i | h 1638400 | a | 2 | i | h - 65536 | a | 0 | i | h - 1638400 | a | 0 | i | h + 65536 | a | 2 | i | h (8 rows) DROP TABLE arenadata_toolkit.tracking_t1; +-- Set empty relkinds. The track result set should be empty. +SELECT arenadata_toolkit.tracking_set_relkinds(''); + tracking_set_relkinds +----------------------- + t +(1 row) + +SELECT arenadata_toolkit.tracking_trigger_initial_snapshot(); + tracking_trigger_initial_snapshot +----------------------------------- + t +(1 row) + +SELECT size, state, segid, relkind, relstorage +FROM arenadata_toolkit.tables_track; + size | state | segid | relkind | relstorage +------+-------+-------+---------+------------ +(0 rows) + -- Clean up SELECT arenadata_toolkit.tracking_unregister_db(); tracking_unregister_db diff --git a/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_guc.sql b/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_guc.sql index 164aa717807d..f46b257782fd 100644 --- a/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_guc.sql +++ b/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_guc.sql @@ -117,11 +117,19 @@ setdatabase=oid WHERE datname=current_database(); SELECT arenadata_toolkit.tracking_set_relstorages('d,b,c'); +SELECT arenadata_toolkit.tracking_set_relstorages(''); + +SELECT datname, setconfig FROM pg_db_role_setting JOIN pg_database ON +setdatabase=oid WHERE datname=current_database(); + -- Prohibit manual GUC setting. SET arenadata_toolkit.tracking_relstorages = "h, a, x"; ALTER DATABASE tracking1 SET arenadata_toolkit.tracking_relstorages = "h, a, x"; +-- Resetting case is allowed. +ALTER DATABASE tracking1 RESET arenadata_toolkit.tracking_relstorages; + -- Test arenadata_toolkit.tracking_relkinds GUC SELECT arenadata_toolkit.tracking_set_relkinds('r,t,o,S'); @@ -135,12 +143,25 @@ setdatabase=oid WHERE datname=current_database(); SELECT arenadata_toolkit.tracking_set_relkinds('d,b,c'); +SELECT arenadata_toolkit.tracking_set_relkinds(''); + +SELECT datname, setconfig FROM pg_db_role_setting JOIN pg_database ON +setdatabase=oid WHERE datname=current_database(); + -- Prohibit manual GUC setting. SET arenadata_toolkit.tracking_relkinds = "h, a, x"; ALTER DATABASE tracking1 SET arenadata_toolkit.tracking_relkinds = "h, a, x"; +-- Resetting case is allowed. +ALTER DATABASE tracking1 RESET arenadata_toolkit.tracking_relkinds; + -- Test arenadata_toolkit.tracking_schemas GUC +SELECT arenadata_toolkit.tracking_unregister_schema('public'); + +SELECT datname, setconfig FROM pg_db_role_setting JOIN pg_database ON +setdatabase=oid WHERE datname=current_database(); + SELECT arenadata_toolkit.tracking_register_schema('arenadata_toolkit'); SELECT arenadata_toolkit.tracking_register_schema('public'); @@ -160,6 +181,9 @@ SET arenadata_toolkit.tracking_schemas = "pg_catalog, mychema"; ALTER DATABASE tracking1 SET arenadata_toolkit.tracking_schemas = "pg_catalog, mychema"; +-- Resetting case is allowed. +ALTER DATABASE tracking1 RESET arenadata_toolkit.tracking_schemas; + -- Test GUCs are set in the caller's session. SELECT arenadata_toolkit.tracking_register_db(); SHOW arenadata_toolkit.tracking_is_db_tracked; diff --git a/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_tracking.sql b/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_tracking.sql index 6748a192f4c0..c5226c062e3d 100644 --- a/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_tracking.sql +++ b/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_tracking.sql @@ -9,7 +9,7 @@ -- s/\d+/XXX/g -- end_matchsubs --start_ignore -DROP DATABASE IF EXISTS tracking1; +DROP DATABASE IF EXISTS tracking_db1; --end_ignore CREATE DATABASE tracking_db1; \c tracking_db1; @@ -45,16 +45,30 @@ pg_class_count AS ( SELECT bool_and(sc.cnt = pc.cnt) FROM segment_counts sc, pg_class_count pc; --- 4. Create table in specific schema and register that schema. +-- 4. Create table in one of default schemas. Then unregister all +-- default schemas except this one. CREATE TABLE arenadata_toolkit.tracking_t1 (i INT) WITH (appendonly=true, orientation=column) DISTRIBUTED BY (i); -SELECT arenadata_toolkit.tracking_register_schema('arenadata_toolkit'); +SELECT arenadata_toolkit.tracking_unregister_schema('information_schema'); +SELECT arenadata_toolkit.tracking_unregister_schema('pg_aoseg'); +SELECT arenadata_toolkit.tracking_unregister_schema('pg_toast'); +SELECT arenadata_toolkit.tracking_unregister_schema('pg_catalog'); +SELECT arenadata_toolkit.tracking_unregister_schema('public'); -- Getting the track. Only created table with size 0 is expected; SELECT relname, size, state, segid, relkind, relstorage FROM arenadata_toolkit.tables_track; +SELECT arenadata_toolkit.tracking_unregister_schema('arenadata_toolkit'); +SELECT arenadata_toolkit.tracking_trigger_initial_snapshot(); + +--Empty track is expected +SELECT relname, size, state, segid, relkind, relstorage +FROM arenadata_toolkit.tables_track; + +SELECT arenadata_toolkit.tracking_register_schema('arenadata_toolkit'); + -- 5. Test data extending event. Bloom should capture it. INSERT INTO arenadata_toolkit.tracking_t1 SELECT generate_series(1,100000); SELECT relname, size, state, segid, relkind, relstorage @@ -114,6 +128,14 @@ FROM arenadata_toolkit.tables_track; DROP TABLE arenadata_toolkit.tracking_t1; +-- Set empty relkinds. The track result set should be empty. +SELECT arenadata_toolkit.tracking_set_relkinds(''); + +SELECT arenadata_toolkit.tracking_trigger_initial_snapshot(); + +SELECT size, state, segid, relkind, relstorage +FROM arenadata_toolkit.tables_track; + -- Clean up SELECT arenadata_toolkit.tracking_unregister_db(); diff --git a/gpcontrib/arenadata_toolkit/src/track_files.c b/gpcontrib/arenadata_toolkit/src/track_files.c index 06648367a112..ff396fc51e14 100644 --- a/gpcontrib/arenadata_toolkit/src/track_files.c +++ b/gpcontrib/arenadata_toolkit/src/track_files.c @@ -470,8 +470,9 @@ tracking_get_track(PG_FUNCTION_ARGS) if (tf_get_global_state.relstorages == 0 || tf_get_global_state.relkinds == 0 || tf_get_global_state.schema_oids == NIL) - ereport(ERROR, - (errmsg("Cannot get tracking configuration (schemas, relkinds, reltorage) for database %u", MyDatabaseId))); + ereport(LOG, + (errmsg("One of the tracking parameters (schemas, relkinds," + "reltorage) for database %u is empty", MyDatabaseId))); MemoryContextSwitchTo(oldcontext); @@ -843,6 +844,23 @@ add_or_remove_schema(const char *schema_string, const char *schemaName, bool add initStringInfo(&buf); + /* + * consider NULL value as a need for applying operation + * to default schema set + */ + if (schema_string == NULL){ + schema_string = DEFAULT_TRACKED_SCHEMAS; + } + + /* + * If string is empty, we can only add + */ + if (schema_string[0] == '\0' && !add) + { + pfree(buf.data); + return NULL; + } + if (schema_string && schema_string[0] != '\0') { str = pstrdup(schema_string); @@ -974,18 +992,17 @@ track_schema(const char *schemaName, Oid dbid, bool reg) if (new_schemas == NULL) { /* - * If new_schemas is NULL, we're removing the last schema, so let's - * just RESET the variable + * If new_schemas is NULL, we're removing the last schema, that should + * lead to empty result set during track acquisition. But we anyway + * need to store an empty string to distinguish state when the GUC has + * default value and when the get_track() filers out all schemas. */ - v_stmt.kind = VAR_RESET; - v_stmt.args = NIL; - } - else - { - v_stmt.kind = VAR_SET_VALUE; - v_stmt.args = list_make1(&arg); + arg.val.val.str = pstrdup(""); } + v_stmt.kind = VAR_SET_VALUE; + v_stmt.args = list_make1(&arg); + tf_guc_unlock(); AlterDatabaseSet(&stmt); @@ -993,7 +1010,7 @@ track_schema(const char *schemaName, Oid dbid, bool reg) /* Will set the GUC in caller session only on coordinator */ tf_guc_unlock(); SetConfigOption("arenadata_toolkit.tracking_schemas", - new_schemas ? new_schemas : DEFAULT_TRACKED_SCHEMAS, + new_schemas ? new_schemas : "", PGC_SUSET, PGC_S_DATABASE); if (current_schemas) @@ -1136,17 +1153,9 @@ tracking_set_relkinds(PG_FUNCTION_ARGS) buf.len--; } - if (buf.len == 0) - { - v_stmt.kind = VAR_RESET; - v_stmt.args = NIL; - } - else - { - v_stmt.kind = VAR_SET_VALUE; - v_stmt.args = list_make1(&arg); - elog(LOG, "[arenadata_toolkit] setting relkinds %s in database %u for tracking", buf.data, dbid); - } + v_stmt.kind = VAR_SET_VALUE; + v_stmt.args = list_make1(&arg); + elog(LOG, "[arenadata_toolkit] setting relkinds %s in database %u for tracking", buf.data, dbid); tf_guc_unlock(); @@ -1155,7 +1164,7 @@ tracking_set_relkinds(PG_FUNCTION_ARGS) /* Will set the GUC in caller session only on coordinator */ tf_guc_unlock(); SetConfigOption("arenadata_toolkit.tracking_relkinds", - buf.len ? buf.data : DEFAULT_TRACKED_REL_KINDS, + buf.data, PGC_SUSET, PGC_S_DATABASE); pfree(buf.data); @@ -1244,17 +1253,9 @@ tracking_set_relstorages(PG_FUNCTION_ARGS) buf.len--; } - if (buf.len == 0) - { - v_stmt.kind = VAR_RESET; - v_stmt.args = NIL; - } - else - { - v_stmt.kind = VAR_SET_VALUE; - v_stmt.args = list_make1(&arg); - elog(LOG, "[arenadata_toolkit] setting relstorages %s in database %u for tracking", buf.data, dbid); - } + v_stmt.kind = VAR_SET_VALUE; + v_stmt.args = list_make1(&arg); + elog(LOG, "[arenadata_toolkit] setting relstorages %s in database %u for tracking", buf.data, dbid); tf_guc_unlock(); @@ -1263,7 +1264,7 @@ tracking_set_relstorages(PG_FUNCTION_ARGS) /* Will set the GUC in caller session only on coordinator */ tf_guc_unlock(); SetConfigOption("arenadata_toolkit.tracking_relstorages", - buf.len ? buf.data : DEFAULT_TRACKED_REL_STORAGES, + buf.data, PGC_SUSET, PGC_S_DATABASE); pfree(buf.data); From 476730936abe50c63c855d89a3d1f7757d19ed32 Mon Sep 17 00:00:00 2001 From: Alexander Kondakov Date: Fri, 29 Nov 2024 07:33:57 +0300 Subject: [PATCH 67/69] Improve validation and emit warning --- gpcontrib/arenadata_toolkit/README.md | 10 ++++- .../expected/arenadata_toolkit_tracking.out | 4 ++ .../sql/arenadata_toolkit_tracking.sql | 2 + gpcontrib/arenadata_toolkit/src/track_files.c | 39 +++++++++++++++---- 4 files changed, 46 insertions(+), 9 deletions(-) diff --git a/gpcontrib/arenadata_toolkit/README.md b/gpcontrib/arenadata_toolkit/README.md index 3067869a8ea8..d2fb82f4fdc6 100644 --- a/gpcontrib/arenadata_toolkit/README.md +++ b/gpcontrib/arenadata_toolkit/README.md @@ -37,11 +37,19 @@ In order to get the snapshot of all database relations you should call in the da arenadata_toolkit.tracking_trigger_initial_snapshot(); ``` - ***Attention***: Acquiring size track from parallel sessions is not recommended, since there is the only instance of Bloom filter for a database. I.e. track acquisition can return whole accumulated relation set in one session, and empty set for acquisition from the second session (the first session acquired data earlier). +The result of track acquisiton can be filtered via following GUC +|GUC|Setter|Default value| +--|--|-- +| arenadata_toolkit.tracking_schemas | arenadata_toolkit.tracking_register_schema(schema name) |public,arenadata_toolkit,pg_catalog,pg_toast,pg_aoseg,information_schema +| arenadata_toolkit.tracking_relkinds | arenadata_toolkit.tracking_set_relkinds(relkinds name) |r,i,t,m,o,b,M| +| arenadata_toolkit.tracking_relstorages | arenadata_toolkit.tracking_set_relstorages(relstorages name) |h,a,c| + +If one of that params is empty, the track acquisition will return an empty track as well. + #### Choosing optimal Bloom size Choosing the optimal Bloom filter size is crucial for balancing memory usage and accuracy. diff --git a/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_tracking.out b/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_tracking.out index 26f92f2fa60f..ef6c0082ec5b 100644 --- a/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_tracking.out +++ b/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_tracking.out @@ -2,6 +2,8 @@ -- start_matchsubs -- m/ERROR: database \d+ is not tracked/ -- s/\d+/XXX/g +-- m/for database \d+ is empty/ +-- s/\d+/XXX/g -- end_matchsubs CREATE DATABASE tracking_db1; \c tracking_db1; @@ -119,6 +121,7 @@ SELECT arenadata_toolkit.tracking_trigger_initial_snapshot(); --Empty track is expected SELECT relname, size, state, segid, relkind, relstorage FROM arenadata_toolkit.tables_track; +WARNING: One of the tracking parameters (schemas,relkinds, relstorages) for database 167165 is empty. relname | size | state | segid | relkind | relstorage ---------+------+-------+-------+---------+------------ (0 rows) @@ -285,6 +288,7 @@ SELECT arenadata_toolkit.tracking_trigger_initial_snapshot(); SELECT size, state, segid, relkind, relstorage FROM arenadata_toolkit.tables_track; +WARNING: One of the tracking parameters (schemas,relkinds, relstorages) for database 167165 is empty. size | state | segid | relkind | relstorage ------+-------+-------+---------+------------ (0 rows) diff --git a/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_tracking.sql b/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_tracking.sql index c5226c062e3d..75b0d29785ba 100644 --- a/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_tracking.sql +++ b/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_tracking.sql @@ -7,6 +7,8 @@ -- start_matchsubs -- m/ERROR: database \d+ is not tracked/ -- s/\d+/XXX/g +-- m/for database \d+ is empty/ +-- s/\d+/XXX/g -- end_matchsubs --start_ignore DROP DATABASE IF EXISTS tracking_db1; diff --git a/gpcontrib/arenadata_toolkit/src/track_files.c b/gpcontrib/arenadata_toolkit/src/track_files.c index ff396fc51e14..538527aeb15e 100644 --- a/gpcontrib/arenadata_toolkit/src/track_files.c +++ b/gpcontrib/arenadata_toolkit/src/track_files.c @@ -467,12 +467,14 @@ tracking_get_track(PG_FUNCTION_ARGS) */ get_filters_from_guc(); - if (tf_get_global_state.relstorages == 0 || + /* emit warning only at coordinator */ + if ((tf_get_global_state.relstorages == 0 || tf_get_global_state.relkinds == 0 || - tf_get_global_state.schema_oids == NIL) - ereport(LOG, - (errmsg("One of the tracking parameters (schemas, relkinds," - "reltorage) for database %u is empty", MyDatabaseId))); + tf_get_global_state.schema_oids == NIL) && + IS_QUERY_DISPATCHER()) + ereport(WARNING, + (errmsg("One of the tracking parameters (schemas," + "relkinds, relstorages) for database %u is empty.", MyDatabaseId))); MemoryContextSwitchTo(oldcontext); @@ -848,7 +850,8 @@ add_or_remove_schema(const char *schema_string, const char *schemaName, bool add * consider NULL value as a need for applying operation * to default schema set */ - if (schema_string == NULL){ + if (schema_string == NULL) + { schema_string = DEFAULT_TRACKED_SCHEMAS; } @@ -1026,10 +1029,13 @@ tracking_register_schema(PG_FUNCTION_ARGS) Oid dbid = get_dbid(PG_GETARG_OID(1)); if (Gp_role != GP_ROLE_DISPATCH) - { ereport(ERROR, (errmsg("Cannot execute tracking_register_schema outside query dispatcher"))); - } + + if (schema_name == NULL) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_SCHEMA), + errmsg("schema does not exist"))); if (!SearchSysCacheExists1(NAMESPACENAME, CStringGetDatum(schema_name))) ereport(ERROR, @@ -1055,6 +1061,11 @@ tracking_unregister_schema(PG_FUNCTION_ARGS) (errmsg("Cannot execute tracking_unregister_schema outside query dispatcher"))); } + if (schema_name == NULL) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_SCHEMA), + errmsg("schema does not exist"))); + if (!SearchSysCacheExists1(NAMESPACENAME, CStringGetDatum(schema_name))) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_SCHEMA), @@ -1109,6 +1120,12 @@ tracking_set_relkinds(PG_FUNCTION_ARGS) (errmsg("Cannot execute tracking_set_relkinds outside query dispatcher"))); } + if (relkinds_str == NULL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Invalid relkind set"), + errhint("Valid relkinds are: 'r', 'i', 'S', 't', 'v', 'c', 'f', 'u', 'm', 'o', 'b', 'M'"))); + initStringInfo(&buf); str_copy = pstrdup(relkinds_str); token = strtok(str_copy, ","); @@ -1208,6 +1225,12 @@ tracking_set_relstorages(PG_FUNCTION_ARGS) (errmsg("Cannot execute tracking_set_relstorages outside query dispatcher"))); } + if (relstorages_str == NULL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Invalid relstorage set"), + errhint("Valid relstorages are: 'h', 'x', 'a', 'v', 'c', 'f'"))); + initStringInfo(&buf); str_copy = pstrdup(relstorages_str); token = strtok(str_copy, ","); From 64df2b57496b5ad7052a9624f0068baa1f057d8e Mon Sep 17 00:00:00 2001 From: Alexander Kondakov Date: Fri, 29 Nov 2024 07:53:32 +0300 Subject: [PATCH 68/69] Typo --- gpcontrib/arenadata_toolkit/README.md | 2 +- gpcontrib/arenadata_toolkit/src/track_files.c | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/gpcontrib/arenadata_toolkit/README.md b/gpcontrib/arenadata_toolkit/README.md index d2fb82f4fdc6..8f3ee0c4496f 100644 --- a/gpcontrib/arenadata_toolkit/README.md +++ b/gpcontrib/arenadata_toolkit/README.md @@ -41,7 +41,7 @@ arenadata_toolkit.tracking_trigger_initial_snapshot(); instance of Bloom filter for a database. I.e. track acquisition can return whole accumulated relation set in one session, and empty set for acquisition from the second session (the first session acquired data earlier). -The result of track acquisiton can be filtered via following GUC +The result of track acquisition can be filtered via following GUC |GUC|Setter|Default value| --|--|-- | arenadata_toolkit.tracking_schemas | arenadata_toolkit.tracking_register_schema(schema name) |public,arenadata_toolkit,pg_catalog,pg_toast,pg_aoseg,information_schema diff --git a/gpcontrib/arenadata_toolkit/src/track_files.c b/gpcontrib/arenadata_toolkit/src/track_files.c index 538527aeb15e..ec9d43fe7025 100644 --- a/gpcontrib/arenadata_toolkit/src/track_files.c +++ b/gpcontrib/arenadata_toolkit/src/track_files.c @@ -1123,8 +1123,7 @@ tracking_set_relkinds(PG_FUNCTION_ARGS) if (relkinds_str == NULL) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("Invalid relkind set"), - errhint("Valid relkinds are: 'r', 'i', 'S', 't', 'v', 'c', 'f', 'u', 'm', 'o', 'b', 'M'"))); + errmsg("relkinds argument cannot be NULL"))); initStringInfo(&buf); str_copy = pstrdup(relkinds_str); @@ -1228,8 +1227,7 @@ tracking_set_relstorages(PG_FUNCTION_ARGS) if (relstorages_str == NULL) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("Invalid relstorage set"), - errhint("Valid relstorages are: 'h', 'x', 'a', 'v', 'c', 'f'"))); + errmsg("relstorages argument cannot be NULL"))); initStringInfo(&buf); str_copy = pstrdup(relstorages_str); From b6fec63087998e7bb6b22d61080e05a9a37d5194 Mon Sep 17 00:00:00 2001 From: Alexander Kondakov Date: Fri, 29 Nov 2024 11:58:37 +0300 Subject: [PATCH 69/69] Use default settings at track acquisition --- gpcontrib/arenadata_toolkit/src/track_files.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/gpcontrib/arenadata_toolkit/src/track_files.c b/gpcontrib/arenadata_toolkit/src/track_files.c index ec9d43fe7025..638a84452ae4 100644 --- a/gpcontrib/arenadata_toolkit/src/track_files.c +++ b/gpcontrib/arenadata_toolkit/src/track_files.c @@ -308,15 +308,15 @@ get_filters_from_guc() if (current_schemas) schema_names = split_string_to_list(current_schemas); else - schema_names = split_string_to_list(tracked_schemas); + schema_names = split_string_to_list(DEFAULT_TRACKED_SCHEMAS); if (current_relstorages) tf_get_global_state.relstorages = list_to_bits(current_relstorages); else - tf_get_global_state.relstorages = list_to_bits(tracked_rel_storages); + tf_get_global_state.relstorages = list_to_bits(DEFAULT_TRACKED_REL_STORAGES); if (current_relkinds) tf_get_global_state.relkinds = list_to_bits(current_relkinds); else - tf_get_global_state.relkinds = list_to_bits(tracked_rel_kinds); + tf_get_global_state.relkinds = list_to_bits(DEFAULT_TRACKED_REL_KINDS); foreach(lc, schema_names) { @@ -998,7 +998,8 @@ track_schema(const char *schemaName, Oid dbid, bool reg) * If new_schemas is NULL, we're removing the last schema, that should * lead to empty result set during track acquisition. But we anyway * need to store an empty string to distinguish state when the GUC has - * default value and when the get_track() filers out all schemas. + * default value and when the get_track() is supposed to + * filter out all schemas. */ arg.val.val.str = pstrdup(""); }