diff --git a/gpcontrib/Makefile b/gpcontrib/Makefile index 1ef54b62b414..17634644ecf3 100644 --- a/gpcontrib/Makefile +++ b/gpcontrib/Makefile @@ -105,4 +105,5 @@ installcheck: $(MAKE) -C gp_subtransaction_overflow installcheck $(MAKE) -C gp_check_functions installcheck $(MAKE) -C arenadata_toolkit installcheck + $(MAKE) -C arenadata_toolkit/isolation2 installcheck diff --git a/gpcontrib/arenadata_toolkit/Makefile b/gpcontrib/arenadata_toolkit/Makefile index 446afcbc399d..665c4ae401e5 100644 --- a/gpcontrib/arenadata_toolkit/Makefile +++ b/gpcontrib/arenadata_toolkit/Makefile @@ -3,7 +3,7 @@ MODULES = arenadata_toolkit EXTENSION = arenadata_toolkit -EXTENSION_VERSION = 1.6 +EXTENSION_VERSION = 1.7 DATA = \ arenadata_toolkit--1.0.sql \ arenadata_toolkit--1.0--1.1.sql \ @@ -12,15 +12,29 @@ DATA = \ arenadata_toolkit--1.3--1.4.sql \ arenadata_toolkit--1.4--1.5.sql \ arenadata_toolkit--1.5--1.6.sql \ + arenadata_toolkit--1.6--1.7.sql \ DATA_built = $(EXTENSION)--$(EXTENSION_VERSION).sql -$(DATA_built): $(DATA) - cat $(DATA) > $(DATA_built) +MODULE_big = arenadata_toolkit +OBJS = \ + src/arenadata_toolkit_guc.o \ + src/bloom.o \ + src/bloom_set.o \ + src/drops_track.o \ + src/file_hook.o \ + src/tf_shmem.o \ + src/arenadata_toolkit.o \ + src/arenadata_toolkit_worker.o \ + src/track_files.o \ + src/dbsize.o \ + +PG_CFLAGS = -I$(libpq_srcdir) -I$(CURDIR)/src/include REGRESS = arenadata_toolkit_test arenadata_toolkit_skew_test adb_get_relfilenodes_test \ adb_collect_table_stats_test adb_vacuum_strategy_test adb_relation_storage_size_test \ - tablespace_location upgrade_test adb_hba_file_rules_view_test + tablespace_location upgrade_test adb_hba_file_rules_view_test \ + arenadata_toolkit_guc arenadata_toolkit_tracking REGRESS_OPTS += --init-file=$(top_srcdir)/src/test/regress/init_file ifdef USE_PGXS @@ -33,3 +47,6 @@ top_builddir = ../.. include $(top_builddir)/src/Makefile.global include $(top_srcdir)/contrib/contrib-global.mk endif + +$(DATA_built): $(DATA) + cat $(DATA) > $(DATA_built) diff --git a/gpcontrib/arenadata_toolkit/README.md b/gpcontrib/arenadata_toolkit/README.md new file mode 100644 index 000000000000..8f3ee0c4496f --- /dev/null +++ b/gpcontrib/arenadata_toolkit/README.md @@ -0,0 +1,76 @@ +## arenadata_toolkit - database objects tracking extension for GPDB + +`arenadata_toolkit` starting from version 1.7 represents a GPDB extension that efficiently tracks file system changes (extend, truncate, create, unlink operations) using space-efficient Bloom filters stored in shared memory. This extension is particularly useful for monitoring and maintaining database files sizes across a distributed environment. + +The main purpose of this code is achieving fast database size calculation and tracking file changes at relation +level. The extension implements a probabilistic tracking system using Bloom filters to monitor file changes across Greenplum segments. It utilizes shared memory for state management and employs background workers to maintain consistency. + +#### Configuring GPDB and extension usage +Since extension uses shared memory, configuration on all GPDB segments must be changed by setting +```shell script +gpconfig -c shared_preload_libraries -v 'arenadata_toolkit' +``` +Extension may track restricted number of databases. The maximum number of them is defined by GUC +|||| +--|--|-- +| arenadata_toolkit.tracking_db_track_count | Need restart |Possible values [1, 1000]; Default 5| + +For each tracked database there allocated a Bloom filter in shared memory. The size of each filter is controlled via +|||| +--|--|-- +| arenadata_toolkit.tracking_bloom_size | Need restart |Possible values (bytes) [64, 128000000] Default 1048576| + +The specific database can be bound to unoccupied filter with function +```shell script +psql -d my_db -c select arenadata_toolkit.tracking_register_db() +or +psql -c select arenadata_toolkit.tracking_register_db(12345) +``` +After registering each relation file change within the database will be noted in Bloom filter. +Using Bloom filter allows us to calculate the sizes of only relations whose relfilenode is present in the filter. +The current size snapshot can be taken via view: +``` +select * from arenadata_toolkit.tables_track; +``` +In order to get the snapshot of all database relations you should call in the database of interest +``` +arenadata_toolkit.tracking_trigger_initial_snapshot(); +``` + +***Attention***: Acquiring size track from parallel sessions is not recommended, since there is the only +instance of Bloom filter for a database. I.e. track acquisition can return whole accumulated relation set +in one session, and empty set for acquisition from the second session (the first session acquired data earlier). + +The result of track acquisition can be filtered via following GUC +|GUC|Setter|Default value| +--|--|-- +| arenadata_toolkit.tracking_schemas | arenadata_toolkit.tracking_register_schema(schema name) |public,arenadata_toolkit,pg_catalog,pg_toast,pg_aoseg,information_schema +| arenadata_toolkit.tracking_relkinds | arenadata_toolkit.tracking_set_relkinds(relkinds name) |r,i,t,m,o,b,M| +| arenadata_toolkit.tracking_relstorages | arenadata_toolkit.tracking_set_relstorages(relstorages name) |h,a,c| + +If one of that params is empty, the track acquisition will return an empty track as well. + +#### Choosing optimal Bloom size + +Choosing the optimal Bloom filter size is crucial for balancing memory usage and accuracy. +First of all, when choosing the filter size, you should take into account your system resources, because bloom filters are allocated in shared memory for each segment, and too wide structures (tracking_db_track_count * tracking_bloom_size) could decrease overall performance. + +Next, choose the filter size satisfying your performance goals: +- Define false positive tolerance, p. Since Bloom filter is probabilistic data structure there is a probability to calculate the size of relation, which has not been modified. And the smaller filter is, the more often this occurs. +- Memory constraints +- Query patterns, if queries are mostly reading then huge sizes are unnecessary. + +If you will estimate number of objects in your database, you can calculate theoretical size: +$$m = -\frac{n \ln p}{(\ln 2)^2}$$ +- n = estimated number of elements +- p = target false positive rate +- m = filter size in bits + +Quick Reference Table + + Deployment Size | Files | Target FPR | Recommended Size, bytes| +|----------------|------------|------------|------------------------| +| Small | < 100K | 1% | 1048576 | +| Medium | 100K - 1M | 1% | 8388608 | +| Large | > 1M | 1% | 33554432 | +| Enterprise | > 10M | 1% | 134217728 | \ No newline at end of file diff --git a/gpcontrib/arenadata_toolkit/arenadata_toolkit--1.6--1.7.sql b/gpcontrib/arenadata_toolkit/arenadata_toolkit--1.6--1.7.sql new file mode 100644 index 000000000000..f40c7c811bd2 --- /dev/null +++ b/gpcontrib/arenadata_toolkit/arenadata_toolkit--1.6--1.7.sql @@ -0,0 +1,125 @@ +/* gpcontrib/arenadata_toolkit/arenadata_toolkit--1.6--1.7.sql */ + +CREATE FUNCTION arenadata_toolkit.tracking_register_db(dbid OID DEFAULT 0) +returns BOOL AS '$libdir/arenadata_toolkit', +'tracking_register_db' LANGUAGE C EXECUTE ON MASTER; + +REVOKE ALL ON FUNCTION arenadata_toolkit.tracking_register_db(dbid OID) FROM public; + +CREATE FUNCTION arenadata_toolkit.tracking_unregister_db(dbid OID DEFAULT 0) +returns BOOL AS '$libdir/arenadata_toolkit', +'tracking_unregister_db' LANGUAGE C EXECUTE ON MASTER; + +REVOKE ALL ON FUNCTION arenadata_toolkit.tracking_unregister_db(dbid OID) FROM public; + +CREATE FUNCTION arenadata_toolkit.tracking_register_schema(schemaname NAME, dbid OID DEFAULT 0) +returns BOOL AS '$libdir/arenadata_toolkit', +'tracking_register_schema' LANGUAGE C EXECUTE ON master; + +REVOKE ALL ON FUNCTION arenadata_toolkit.tracking_register_schema(schema NAME, dbid OID) FROM public; + +CREATE FUNCTION arenadata_toolkit.tracking_unregister_schema(schema NAME, dbid OID DEFAULT 0) +returns BOOL AS '$libdir/arenadata_toolkit', +'tracking_unregister_schema' LANGUAGE C EXECUTE ON master; + +REVOKE ALL ON FUNCTION arenadata_toolkit.tracking_unregister_schema(schema NAME, dbid OID) FROM public; + +CREATE FUNCTION arenadata_toolkit.tracking_set_relkinds(relkinds NAME, dbid OID DEFAULT 0) +returns BOOL AS '$libdir/arenadata_toolkit', +'tracking_set_relkinds' LANGUAGE C EXECUTE ON master; + +REVOKE ALL ON FUNCTION arenadata_toolkit.tracking_set_relkinds(relkinds NAME, dbid OID) FROM public; + +CREATE FUNCTION arenadata_toolkit.tracking_set_relstorages(relstorages NAME, dbid OID DEFAULT 0) +returns BOOL AS '$libdir/arenadata_toolkit', +'tracking_set_relstorages' LANGUAGE C EXECUTE ON master; + +REVOKE ALL ON FUNCTION arenadata_toolkit.tracking_set_relstorages(relstorages NAME, dbid OID) FROM public; + +CREATE FUNCTION arenadata_toolkit.tracking_set_snapshot_on_recovery(val BOOL, dbid OID DEFAULT 0) +returns BOOL AS '$libdir/arenadata_toolkit', +'tracking_set_snapshot_on_recovery' LANGUAGE C EXECUTE ON master; + +REVOKE ALL ON FUNCTION arenadata_toolkit.tracking_set_snapshot_on_recovery(val BOOL, dbid OID) FROM public; + +CREATE FUNCTION arenadata_toolkit.tracking_trigger_initial_snapshot(dbid OID DEFAULT 0) +returns BOOL AS '$libdir/arenadata_toolkit', +'tracking_trigger_initial_snapshot' LANGUAGE C; + +GRANT EXECUTE ON FUNCTION arenadata_toolkit.tracking_trigger_initial_snapshot(dbid OID) TO public; + +CREATE FUNCTION arenadata_toolkit.tracking_is_initial_snapshot_triggered_master(dbid OID DEFAULT 0) +returns BOOL AS '$libdir/arenadata_toolkit', +'tracking_is_initial_snapshot_triggered' LANGUAGE C EXECUTE ON master; + +GRANT EXECUTE ON FUNCTION arenadata_toolkit.tracking_is_initial_snapshot_triggered_master(dbid OID) TO public; + +CREATE FUNCTION arenadata_toolkit.tracking_is_initial_snapshot_triggered_segments(dbid OID DEFAULT 0) +returns BOOL AS '$libdir/arenadata_toolkit', +'tracking_is_initial_snapshot_triggered' LANGUAGE C EXECUTE ON ALL segments; + +GRANT EXECUTE ON FUNCTION arenadata_toolkit.tracking_is_initial_snapshot_triggered_segments(dbid OID) TO public; + +CREATE FUNCTION arenadata_toolkit.tracking_is_segment_initialized() +returns TABLE(segindex INT, is_initialized BOOL) AS '$libdir/arenadata_toolkit', +'tracking_is_segment_initialized' LANGUAGE C; + +REVOKE ALL ON FUNCTION arenadata_toolkit.tracking_is_segment_initialized() FROM public; + +CREATE FUNCTION arenadata_toolkit.tracking_track_version() +returns BIGINT AS '$libdir/arenadata_toolkit', +'tracking_track_version' LANGUAGE C STABLE EXECUTE ON MASTER; + +-- Shouldn't be called explicitly +GRANT EXECUTE ON FUNCTION arenadata_toolkit.tracking_track_version() TO public; + +CREATE FUNCTION arenadata_toolkit.tracking_get_track_master(version BIGINT) +RETURNS TABLE(relid OID, relname NAME, relfilenode OID, size BIGINT, state "char", segid INT, +relnamespace OID, relkind "char", relstorage "char") AS '$libdir/arenadata_toolkit', +'tracking_get_track' LANGUAGE C EXECUTE ON MASTER; + +GRANT EXECUTE ON FUNCTION arenadata_toolkit.tracking_get_track_master(version BIGINT) TO public; + +CREATE FUNCTION arenadata_toolkit.tracking_get_track_segments(version BIGINT) +RETURNS TABLE(relid OID, relname NAME, relfilenode OID, size BIGINT, state "char", segid INT, +relnamespace OID, relkind "char", relstorage "char") AS '$libdir/arenadata_toolkit', +'tracking_get_track' LANGUAGE C EXECUTE ON ALL SEGMENTS; + +GRANT EXECUTE ON FUNCTION arenadata_toolkit.tracking_get_track_segments(version BIGINT) TO public; + +CREATE VIEW arenadata_toolkit.tables_track AS +SELECT t.*, coalesce(c.oid, i.indrelid, vm.relid, blk.relid, seg.relid) AS parent_relid +FROM arenadata_toolkit.tracking_get_track_master(arenadata_toolkit.tracking_track_version()) AS t +LEFT JOIN pg_class AS c + ON c.reltoastrelid = t.relid AND t.relkind = 't' +LEFT JOIN pg_index AS i + ON i.indexrelid = t.relid AND t.relkind = 'i' +LEFT JOIN pg_catalog.pg_appendonly AS vm + ON vm.visimaprelid = t.relid AND t.relkind = 'M' +LEFT JOIN pg_catalog.pg_appendonly AS blk + ON blk.blkdirrelid = t.relid AND t.relkind = 'b' +LEFT JOIN pg_catalog.pg_appendonly AS seg + ON seg.segrelid = t.relid AND t.relkind = 'o' +UNION ALL +SELECT t.*, coalesce(c.oid, i.indrelid, vm.relid, blk.relid, seg.relid) AS parent_relid +FROM arenadata_toolkit.tracking_get_track_segments(arenadata_toolkit.tracking_track_version()) AS t +LEFT JOIN pg_class AS c + ON c.reltoastrelid = t.relid AND t.relkind = 't' +LEFT JOIN pg_index AS i + ON i.indexrelid = t.relid AND t.relkind = 'i' +LEFT JOIN pg_catalog.pg_appendonly AS vm + ON vm.visimaprelid = t.relid AND t.relkind = 'M' +LEFT JOIN pg_catalog.pg_appendonly AS blk + ON blk.blkdirrelid = t.relid AND t.relkind = 'b' +LEFT JOIN pg_catalog.pg_appendonly AS seg + ON seg.segrelid = t.relid AND t.relkind = 'o'; + +GRANT SELECT ON arenadata_toolkit.tables_track TO public; + +CREATE VIEW arenadata_toolkit.is_initial_snapshot_triggered AS +SELECT CASE +WHEN TRUE = ALL(select arenadata_toolkit.tracking_is_initial_snapshot_triggered_segments()) +AND arenadata_toolkit.tracking_is_initial_snapshot_triggered_master() +THEN 1 ELSE NULL END AS is_triggered; + +GRANT SELECT ON arenadata_toolkit.is_initial_snapshot_triggered TO public; diff --git a/gpcontrib/arenadata_toolkit/arenadata_toolkit.control b/gpcontrib/arenadata_toolkit/arenadata_toolkit.control index 9d7f496cca3d..cf6ec33cb566 100644 --- a/gpcontrib/arenadata_toolkit/arenadata_toolkit.control +++ b/gpcontrib/arenadata_toolkit/arenadata_toolkit.control @@ -1,5 +1,5 @@ # arenadata_toolkit extension comment = 'extension is used for manipulation of objects created by adb-bundle' -default_version = '1.6' +default_version = '1.7' module_pathname = '$libdir/arenadata_toolkit' relocatable = false diff --git a/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_guc.out b/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_guc.out new file mode 100644 index 000000000000..e8f64f6443c3 --- /dev/null +++ b/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_guc.out @@ -0,0 +1,418 @@ +-- start_matchsubs +-- +-- m/ERROR: \[arenadata_toolkit\] exceeded maximum number of tracked databases \(track_files\.c:\d+\)/ +-- s/\d+/XXX/g +-- +-- end_matchsubs +--start_ignore +DROP DATABASE IF EXISTS tracking1; +NOTICE: database "tracking1" does not exist, skipping +DROP DATABASE IF EXISTS tracking2; +NOTICE: database "tracking2" does not exist, skipping +DROP DATABASE IF EXISTS tracking3; +NOTICE: database "tracking3" does not exist, skipping +DROP DATABASE IF EXISTS tracking4; +NOTICE: database "tracking4" does not exist, skipping +DROP DATABASE IF EXISTS tracking5; +NOTICE: database "tracking5" does not exist, skipping +DROP DATABASE IF EXISTS tracking6; +NOTICE: database "tracking6" does not exist, skipping +--end_ignore +-- Test database registering GUC. +CREATE DATABASE tracking1; +\c tracking1; +CREATE EXTENSION arenadata_toolkit; +SHOW arenadata_toolkit.tracking_is_db_tracked; + arenadata_toolkit.tracking_is_db_tracked +------------------------------------------ + off +(1 row) + +SELECT datname, setconfig FROM pg_db_role_setting JOIN pg_database ON +setdatabase=oid WHERE datname=current_database(); + datname | setconfig +---------+----------- +(0 rows) + +SELECT arenadata_toolkit.tracking_register_db(); + tracking_register_db +---------------------- + t +(1 row) + +SHOW arenadata_toolkit.tracking_is_db_tracked; + arenadata_toolkit.tracking_is_db_tracked +------------------------------------------ + on +(1 row) + +SELECT datname, setconfig FROM pg_db_role_setting JOIN pg_database ON +setdatabase=oid WHERE datname=current_database(); + datname | setconfig +-----------+---------------------------------------------- + tracking1 | {arenadata_toolkit.tracking_is_db_tracked=t} +(1 row) + +SELECT arenadata_toolkit.tracking_unregister_db(); + tracking_unregister_db +------------------------ + t +(1 row) + +SHOW arenadata_toolkit.tracking_is_db_tracked; + arenadata_toolkit.tracking_is_db_tracked +------------------------------------------ + off +(1 row) + +SELECT datname, setconfig FROM pg_db_role_setting JOIN pg_database ON +setdatabase=oid WHERE datname=current_database(); + datname | setconfig +-----------+---------------------------------------------- + tracking1 | {arenadata_toolkit.tracking_is_db_tracked=f} +(1 row) + +-- Prohibit manual GUC setting. +SET arenadata_toolkit.tracking_is_db_tracked = true; +ERROR: cannot change tracking status outside the tracking_register_db function +ALTER DATABASE tracking1 SET arenadata_toolkit.tracking_is_db_tracked = true; +ERROR: cannot change tracking status outside the tracking_register_db function +-- Test limit of tracking databases. +SHOW arenadata_toolkit.tracking_db_track_count; + arenadata_toolkit.tracking_db_track_count +------------------------------------------- + 5 +(1 row) + +CREATE DATABASE tracking2; +CREATE DATABASE tracking3; +CREATE DATABASE tracking4; +CREATE DATABASE tracking5; +CREATE DATABASE tracking6; +DO $$ +DECLARE + db_oid oid; +BEGIN + FOR db_oid IN + SELECT oid + FROM pg_database + WHERE datname IN ('tracking1', 'tracking2', 'tracking3', + 'tracking4', 'tracking5', 'tracking6') + LOOP + PERFORM arenadata_toolkit.tracking_register_db(db_oid); + END LOOP; +END; +$$; +ERROR: [arenadata_toolkit] exceeded maximum number of tracked databases (track_files.c:714) +CONTEXT: SQL statement "SELECT arenadata_toolkit.tracking_register_db(db_oid)" +PL/pgSQL function inline_code_block line 11 at PERFORM +DO $$ +DECLARE + db_oid oid; +BEGIN + FOR db_oid IN + SELECT oid + FROM pg_database + WHERE datname IN ('tracking1', 'tracking2', 'tracking3', + 'tracking4', 'tracking5', 'tracking6') + LOOP + PERFORM arenadata_toolkit.tracking_unregister_db(db_oid); + END LOOP; +END; +$$; +DROP DATABASE IF EXISTS tracking2; +DROP DATABASE IF EXISTS tracking3; +DROP DATABASE IF EXISTS tracking4; +DROP DATABASE IF EXISTS tracking5; +DROP DATABASE IF EXISTS tracking6; +-- Test arenadata_toolkit.tracking_snapshot_on_recovery GUC +SELECT arenadata_toolkit.tracking_set_snapshot_on_recovery(true); + tracking_set_snapshot_on_recovery +----------------------------------- + t +(1 row) + +SELECT datname, setconfig FROM pg_db_role_setting JOIN pg_database ON +setdatabase=oid WHERE datname=current_database(); + datname | setconfig +-----------+------------------------------------------------------------------------------------------------ + tracking1 | {arenadata_toolkit.tracking_is_db_tracked=f,arenadata_toolkit.tracking_snapshot_on_recovery=t} +(1 row) + +-- Prohibit manual GUC setting. +SET arenadata_toolkit.tracking_snapshot_on_recovery = false; +ERROR: cannot change tracking status outside the tracking_set_snapshot_on_recovery function +ALTER DATABASE tracking1 SET arenadata_toolkit.tracking_snapshot_on_recovery = false; +ERROR: cannot change tracking status outside the tracking_set_snapshot_on_recovery function +-- Test arenadata_toolkit.tracking_relstorages GUC +SELECT arenadata_toolkit.tracking_set_relstorages('f,a,x'); + tracking_set_relstorages +-------------------------- + t +(1 row) + +SELECT datname, setconfig FROM pg_db_role_setting JOIN pg_database ON +setdatabase=oid WHERE datname=current_database(); + datname | setconfig +-----------+----------------------------------------------------------------------------------------------------------------------------------------------- + tracking1 | {arenadata_toolkit.tracking_is_db_tracked=f,arenadata_toolkit.tracking_snapshot_on_recovery=t,"arenadata_toolkit.tracking_relstorages=f,a,x"} +(1 row) + +SELECT arenadata_toolkit.tracking_set_relstorages('v,v,v,,,'); + tracking_set_relstorages +-------------------------- + t +(1 row) + +SELECT datname, setconfig FROM pg_db_role_setting JOIN pg_database ON +setdatabase=oid WHERE datname=current_database(); + datname | setconfig +-----------+----------------------------------------------------------------------------------------------------------------------------------------- + tracking1 | {arenadata_toolkit.tracking_is_db_tracked=f,arenadata_toolkit.tracking_snapshot_on_recovery=t,arenadata_toolkit.tracking_relstorages=v} +(1 row) + +SELECT arenadata_toolkit.tracking_set_relstorages('d,b,c'); +ERROR: Invalid relstorage type: d +HINT: Valid relstorages are: 'h', 'x', 'a', 'v', 'c', 'f' +SELECT arenadata_toolkit.tracking_set_relstorages(''); + tracking_set_relstorages +-------------------------- + t +(1 row) + +SELECT datname, setconfig FROM pg_db_role_setting JOIN pg_database ON +setdatabase=oid WHERE datname=current_database(); + datname | setconfig +-----------+---------------------------------------------------------------------------------------------------------------------------------------- + tracking1 | {arenadata_toolkit.tracking_is_db_tracked=f,arenadata_toolkit.tracking_snapshot_on_recovery=t,arenadata_toolkit.tracking_relstorages=} +(1 row) + +-- Prohibit manual GUC setting. +SET arenadata_toolkit.tracking_relstorages = "h, a, x"; +ERROR: cannot change tracking status outside the tracking_register_relstorages function +ALTER DATABASE tracking1 SET arenadata_toolkit.tracking_relstorages = "h, a, x"; +ERROR: cannot change tracking status outside the tracking_register_relstorages function +-- Resetting case is allowed. +ALTER DATABASE tracking1 RESET arenadata_toolkit.tracking_relstorages; +-- Test arenadata_toolkit.tracking_relkinds GUC +SELECT arenadata_toolkit.tracking_set_relkinds('r,t,o,S'); + tracking_set_relkinds +----------------------- + t +(1 row) + +SELECT datname, setconfig FROM pg_db_role_setting JOIN pg_database ON +setdatabase=oid WHERE datname=current_database(); + datname | setconfig +-----------+---------------------------------------------------------------------------------------------------------------------------------------------- + tracking1 | {arenadata_toolkit.tracking_is_db_tracked=f,arenadata_toolkit.tracking_snapshot_on_recovery=t,"arenadata_toolkit.tracking_relkinds=r,t,o,S"} +(1 row) + +SELECT arenadata_toolkit.tracking_set_relkinds('m,M,o,,,'); + tracking_set_relkinds +----------------------- + t +(1 row) + +SELECT datname, setconfig FROM pg_db_role_setting JOIN pg_database ON +setdatabase=oid WHERE datname=current_database(); + datname | setconfig +-----------+-------------------------------------------------------------------------------------------------------------------------------------------- + tracking1 | {arenadata_toolkit.tracking_is_db_tracked=f,arenadata_toolkit.tracking_snapshot_on_recovery=t,"arenadata_toolkit.tracking_relkinds=m,M,o"} +(1 row) + +SELECT arenadata_toolkit.tracking_set_relkinds('d,b,c'); +ERROR: Invalid relkind: d +HINT: Valid relkinds are: 'r', 'i', 'S', 't', 'v', 'c', 'f', 'u', 'm', 'o', 'b', 'M' +SELECT arenadata_toolkit.tracking_set_relkinds(''); + tracking_set_relkinds +----------------------- + t +(1 row) + +SELECT datname, setconfig FROM pg_db_role_setting JOIN pg_database ON +setdatabase=oid WHERE datname=current_database(); + datname | setconfig +-----------+------------------------------------------------------------------------------------------------------------------------------------- + tracking1 | {arenadata_toolkit.tracking_is_db_tracked=f,arenadata_toolkit.tracking_snapshot_on_recovery=t,arenadata_toolkit.tracking_relkinds=} +(1 row) + +-- Prohibit manual GUC setting. +SET arenadata_toolkit.tracking_relkinds = "h, a, x"; +ERROR: cannot change tracking status outside the tracking_register_relkinds function +ALTER DATABASE tracking1 SET arenadata_toolkit.tracking_relkinds = "h, a, x"; +ERROR: cannot change tracking status outside the tracking_register_relkinds function +-- Resetting case is allowed. +ALTER DATABASE tracking1 RESET arenadata_toolkit.tracking_relkinds; +-- Test arenadata_toolkit.tracking_schemas GUC +SELECT arenadata_toolkit.tracking_unregister_schema('public'); + tracking_unregister_schema +---------------------------- + t +(1 row) + +SELECT datname, setconfig FROM pg_db_role_setting JOIN pg_database ON +setdatabase=oid WHERE datname=current_database(); + datname | setconfig +-----------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + tracking1 | {arenadata_toolkit.tracking_is_db_tracked=f,arenadata_toolkit.tracking_snapshot_on_recovery=t,"arenadata_toolkit.tracking_schemas=arenadata_toolkit,pg_catalog,pg_toast,pg_aoseg,information_schema"} +(1 row) + +SELECT arenadata_toolkit.tracking_register_schema('arenadata_toolkit'); + tracking_register_schema +-------------------------- + t +(1 row) + +SELECT arenadata_toolkit.tracking_register_schema('public'); + tracking_register_schema +-------------------------- + t +(1 row) + +SELECT datname, setconfig FROM pg_db_role_setting JOIN pg_database ON +setdatabase=oid WHERE datname=current_database(); + datname | setconfig +-----------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + tracking1 | {arenadata_toolkit.tracking_is_db_tracked=f,arenadata_toolkit.tracking_snapshot_on_recovery=t,"arenadata_toolkit.tracking_schemas=arenadata_toolkit,pg_catalog,pg_toast,pg_aoseg,information_schema,public"} +(1 row) + +SELECT arenadata_toolkit.tracking_unregister_schema('public'); + tracking_unregister_schema +---------------------------- + t +(1 row) + +SELECT datname, setconfig FROM pg_db_role_setting JOIN pg_database ON +setdatabase=oid WHERE datname=current_database(); + datname | setconfig +-----------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + tracking1 | {arenadata_toolkit.tracking_is_db_tracked=f,arenadata_toolkit.tracking_snapshot_on_recovery=t,"arenadata_toolkit.tracking_schemas=arenadata_toolkit,pg_catalog,pg_toast,pg_aoseg,information_schema"} +(1 row) + +SELECT arenadata_toolkit.tracking_register_schema('pg_pg'); +ERROR: schema pg_pg does not exist +-- Prohibit manual GUC setting. +SET arenadata_toolkit.tracking_schemas = "pg_catalog, mychema"; +ERROR: cannot change tracking status outside the tracking_register_schema function +ALTER DATABASE tracking1 SET arenadata_toolkit.tracking_schemas = "pg_catalog, mychema"; +ERROR: cannot change tracking status outside the tracking_register_schema function +-- Resetting case is allowed. +ALTER DATABASE tracking1 RESET arenadata_toolkit.tracking_schemas; +-- Test GUCs are set in the caller's session. +SELECT arenadata_toolkit.tracking_register_db(); + tracking_register_db +---------------------- + t +(1 row) + +SHOW arenadata_toolkit.tracking_is_db_tracked; + arenadata_toolkit.tracking_is_db_tracked +------------------------------------------ + on +(1 row) + +SELECT arenadata_toolkit.tracking_unregister_db(); + tracking_unregister_db +------------------------ + t +(1 row) + +SHOW arenadata_toolkit.tracking_is_db_tracked; + arenadata_toolkit.tracking_is_db_tracked +------------------------------------------ + off +(1 row) + +SELECT arenadata_toolkit.tracking_set_snapshot_on_recovery(true); + tracking_set_snapshot_on_recovery +----------------------------------- + t +(1 row) + +SHOW arenadata_toolkit.tracking_snapshot_on_recovery; + arenadata_toolkit.tracking_snapshot_on_recovery +------------------------------------------------- + on +(1 row) + +SELECT arenadata_toolkit.tracking_set_snapshot_on_recovery(false); + tracking_set_snapshot_on_recovery +----------------------------------- + t +(1 row) + +SHOW arenadata_toolkit.tracking_snapshot_on_recovery; + arenadata_toolkit.tracking_snapshot_on_recovery +------------------------------------------------- + off +(1 row) + +SHOW arenadata_toolkit.tracking_schemas; + arenadata_toolkit.tracking_schemas +------------------------------------------------------------------- + arenadata_toolkit,pg_catalog,pg_toast,pg_aoseg,information_schema +(1 row) + +SELECT arenadata_toolkit.tracking_register_schema('arenadata_toolkit'); + tracking_register_schema +-------------------------- + t +(1 row) + +SHOW arenadata_toolkit.tracking_schemas; + arenadata_toolkit.tracking_schemas +-------------------------------------------------------------------------- + public,arenadata_toolkit,pg_catalog,pg_toast,pg_aoseg,information_schema +(1 row) + +SELECT arenadata_toolkit.tracking_unregister_schema('arenadata_toolkit'); + tracking_unregister_schema +---------------------------- + t +(1 row) + +SHOW arenadata_toolkit.tracking_schemas; + arenadata_toolkit.tracking_schemas +-------------------------------------------------------- + public,pg_catalog,pg_toast,pg_aoseg,information_schema +(1 row) + +SHOW arenadata_toolkit.tracking_relkinds; + arenadata_toolkit.tracking_relkinds +------------------------------------- + +(1 row) + +SELECT arenadata_toolkit.tracking_set_relkinds('r,t'); + tracking_set_relkinds +----------------------- + t +(1 row) + +SHOW arenadata_toolkit.tracking_relkinds; + arenadata_toolkit.tracking_relkinds +------------------------------------- + r,t +(1 row) + +SHOW arenadata_toolkit.tracking_relstorages; + arenadata_toolkit.tracking_relstorages +---------------------------------------- + +(1 row) + +SELECT arenadata_toolkit.tracking_set_relstorages('a'); + tracking_set_relstorages +-------------------------- + t +(1 row) + +SHOW arenadata_toolkit.tracking_relstorages; + arenadata_toolkit.tracking_relstorages +---------------------------------------- + a +(1 row) + +\c contrib_regression; +DROP DATABASE tracking1; diff --git a/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_test.out b/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_test.out index 97e9c1ac4dd8..0e34a7dee115 100644 --- a/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_test.out +++ b/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_test.out @@ -112,31 +112,63 @@ SELECT objname, objtype, objstorage, objacl FROM toolkit_objects_info ORDER BY o db_files_history_backup_YYYYMMDDtHHMMSS | table | a | db_files_history_backup_YYYYMMDDtHHMMSS_1_prt_default_part | table | a | db_files_history_backup_YYYYMMDDtHHMMSS_1_prt_pYYYYMM | table | a | + is_initial_snapshot_triggered | table | v | {owner=arwdDxt/owner,=r/owner} operation_exclude | table | a | -(24 rows) + tables_track | table | v | {owner=arwdDxt/owner,=r/owner} + tracking_get_track_master | proc | - | {=X/owner,owner=X/owner} + tracking_get_track_segments | proc | - | {=X/owner,owner=X/owner} + tracking_is_initial_snapshot_triggered_master | proc | - | {=X/owner,owner=X/owner} + tracking_is_initial_snapshot_triggered_segments | proc | - | {=X/owner,owner=X/owner} + tracking_is_segment_initialized | proc | - | {owner=X/owner} + tracking_register_db | proc | - | {owner=X/owner} + tracking_register_schema | proc | - | {owner=X/owner} + tracking_set_relkinds | proc | - | {owner=X/owner} + tracking_set_relstorages | proc | - | {owner=X/owner} + tracking_set_snapshot_on_recovery | proc | - | {owner=X/owner} + tracking_track_version | proc | - | {=X/owner,owner=X/owner} + tracking_trigger_initial_snapshot | proc | - | {=X/owner,owner=X/owner} + tracking_unregister_db | proc | - | {owner=X/owner} + tracking_unregister_schema | proc | - | {owner=X/owner} +(40 rows) -- check that toolkit objects now depends on extension SELECT objname, objtype, extname, deptype FROM pg_depend d JOIN toolkit_objects_info objs ON d.objid = objs.objid JOIN pg_extension e ON d.refobjid = e.oid WHERE d.deptype = 'e' AND e.extname = 'arenadata_toolkit' ORDER BY objname; - objname | objtype | extname | deptype ----------------------------------------+---------+-------------------+--------- - __db_files_current | table | arenadata_toolkit | e - __db_files_current_unmapped | table | arenadata_toolkit | e - __db_segment_files | table | arenadata_toolkit | e - adb_collect_table_stats | proc | arenadata_toolkit | e - adb_create_tables | proc | arenadata_toolkit | e - adb_get_relfilenodes | proc | arenadata_toolkit | e - adb_hba_file_rules | proc | arenadata_toolkit | e - adb_hba_file_rules_view | table | arenadata_toolkit | e - adb_relation_storage_size | proc | arenadata_toolkit | e - adb_relation_storage_size_on_segments | proc | arenadata_toolkit | e - adb_skew_coefficients | table | arenadata_toolkit | e - adb_vacuum_strategy | proc | arenadata_toolkit | e - adb_vacuum_strategy_newest_first | proc | arenadata_toolkit | e - adb_vacuum_strategy_newest_last | proc | arenadata_toolkit | e -(14 rows) + objname | objtype | extname | deptype +-------------------------------------------------+---------+-------------------+--------- + __db_files_current | table | arenadata_toolkit | e + __db_files_current_unmapped | table | arenadata_toolkit | e + __db_segment_files | table | arenadata_toolkit | e + adb_collect_table_stats | proc | arenadata_toolkit | e + adb_create_tables | proc | arenadata_toolkit | e + adb_get_relfilenodes | proc | arenadata_toolkit | e + adb_hba_file_rules | proc | arenadata_toolkit | e + adb_hba_file_rules_view | table | arenadata_toolkit | e + adb_relation_storage_size | proc | arenadata_toolkit | e + adb_relation_storage_size_on_segments | proc | arenadata_toolkit | e + adb_skew_coefficients | table | arenadata_toolkit | e + adb_vacuum_strategy | proc | arenadata_toolkit | e + adb_vacuum_strategy_newest_first | proc | arenadata_toolkit | e + adb_vacuum_strategy_newest_last | proc | arenadata_toolkit | e + is_initial_snapshot_triggered | table | arenadata_toolkit | e + tables_track | table | arenadata_toolkit | e + tracking_get_track_master | proc | arenadata_toolkit | e + tracking_get_track_segments | proc | arenadata_toolkit | e + tracking_is_initial_snapshot_triggered_master | proc | arenadata_toolkit | e + tracking_is_initial_snapshot_triggered_segments | proc | arenadata_toolkit | e + tracking_is_segment_initialized | proc | arenadata_toolkit | e + tracking_register_db | proc | arenadata_toolkit | e + tracking_register_schema | proc | arenadata_toolkit | e + tracking_set_relkinds | proc | arenadata_toolkit | e + tracking_set_relstorages | proc | arenadata_toolkit | e + tracking_set_snapshot_on_recovery | proc | arenadata_toolkit | e + tracking_track_version | proc | arenadata_toolkit | e + tracking_trigger_initial_snapshot | proc | arenadata_toolkit | e + tracking_unregister_db | proc | arenadata_toolkit | e + tracking_unregister_schema | proc | arenadata_toolkit | e +(30 rows) DROP EXTENSION arenadata_toolkit; DROP SCHEMA arenadata_toolkit CASCADE; @@ -153,53 +185,85 @@ SELECT arenadata_toolkit.adb_create_tables(); -- show toolkit objects (and their grants) that belongs to arenadata_toolkit schema after creating -- extension and calling adb_create_tables SELECT objname, objtype, objstorage, objacl FROM toolkit_objects_info ORDER BY objname; - objname | objtype | objstorage | objacl ----------------------------------------+---------+------------+-------------------------------- - __db_files_current | table | v | {owner=arwdDxt/owner,=r/owner} - __db_files_current_unmapped | table | v | {owner=arwdDxt/owner,=r/owner} - __db_segment_files | table | v | {owner=arwdDxt/owner,=r/owner} - adb_collect_table_stats | proc | - | {owner=X/owner} - adb_create_tables | proc | - | {owner=X/owner} - adb_get_relfilenodes | proc | - | {=X/owner,owner=X/owner} - adb_hba_file_rules | proc | - | {owner=X/owner} - adb_hba_file_rules_view | table | v | {owner=arwdDxt/owner} - adb_relation_storage_size | proc | - | {=X/owner,owner=X/owner} - adb_relation_storage_size_on_segments | proc | - | {=X/owner,owner=X/owner} - adb_skew_coefficients | table | v | {owner=arwdDxt/owner,=r/owner} - adb_vacuum_strategy | proc | - | {owner=X/owner} - adb_vacuum_strategy_newest_first | proc | - | {owner=X/owner} - adb_vacuum_strategy_newest_last | proc | - | {owner=X/owner} - arenadata_toolkit | schema | - | {owner=UC/owner,=U/owner} - daily_operation | table | a | {owner=arwdDxt/owner} - db_files_current | table | h | {owner=arwdDxt/owner,=r/owner} - db_files_history | table | a | {owner=arwdDxt/owner} - db_files_history_1_prt_default_part | table | a | {owner=arwdDxt/owner} - db_files_history_1_prt_pYYYYMM | table | a | {owner=arwdDxt/owner} - operation_exclude | table | a | {owner=arwdDxt/owner} -(21 rows) + objname | objtype | objstorage | objacl +-------------------------------------------------+---------+------------+-------------------------------- + __db_files_current | table | v | {owner=arwdDxt/owner,=r/owner} + __db_files_current_unmapped | table | v | {owner=arwdDxt/owner,=r/owner} + __db_segment_files | table | v | {owner=arwdDxt/owner,=r/owner} + adb_collect_table_stats | proc | - | {owner=X/owner} + adb_create_tables | proc | - | {owner=X/owner} + adb_get_relfilenodes | proc | - | {=X/owner,owner=X/owner} + adb_hba_file_rules | proc | - | {owner=X/owner} + adb_hba_file_rules_view | table | v | {owner=arwdDxt/owner} + adb_relation_storage_size | proc | - | {=X/owner,owner=X/owner} + adb_relation_storage_size_on_segments | proc | - | {=X/owner,owner=X/owner} + adb_skew_coefficients | table | v | {owner=arwdDxt/owner,=r/owner} + adb_vacuum_strategy | proc | - | {owner=X/owner} + adb_vacuum_strategy_newest_first | proc | - | {owner=X/owner} + adb_vacuum_strategy_newest_last | proc | - | {owner=X/owner} + arenadata_toolkit | schema | - | {owner=UC/owner,=U/owner} + daily_operation | table | a | {owner=arwdDxt/owner} + db_files_current | table | h | {owner=arwdDxt/owner,=r/owner} + db_files_history | table | a | {owner=arwdDxt/owner} + db_files_history_1_prt_default_part | table | a | {owner=arwdDxt/owner} + db_files_history_1_prt_p202410 | table | a | {owner=arwdDxt/owner} + is_initial_snapshot_triggered | table | v | {owner=arwdDxt/owner,=r/owner} + operation_exclude | table | a | {owner=arwdDxt/owner} + tables_track | table | v | {owner=arwdDxt/owner,=r/owner} + tracking_get_track_master | proc | - | {=X/owner,owner=X/owner} + tracking_get_track_segments | proc | - | {=X/owner,owner=X/owner} + tracking_is_initial_snapshot_triggered_master | proc | - | {=X/owner,owner=X/owner} + tracking_is_initial_snapshot_triggered_segments | proc | - | {=X/owner,owner=X/owner} + tracking_is_segment_initialized | proc | - | {owner=X/owner} + tracking_register_db | proc | - | {owner=X/owner} + tracking_register_schema | proc | - | {owner=X/owner} + tracking_set_relkinds | proc | - | {owner=X/owner} + tracking_set_relstorages | proc | - | {owner=X/owner} + tracking_set_snapshot_on_recovery | proc | - | {owner=X/owner} + tracking_track_version | proc | - | {=X/owner,owner=X/owner} + tracking_trigger_initial_snapshot | proc | - | {=X/owner,owner=X/owner} + tracking_unregister_db | proc | - | {owner=X/owner} + tracking_unregister_schema | proc | - | {owner=X/owner} +(37 rows) -- check that toolkit objects now depends on extension SELECT objname, objtype, extname, deptype FROM pg_depend d JOIN toolkit_objects_info objs ON d.objid = objs.objid JOIN pg_extension e ON d.refobjid = e.oid WHERE d.deptype = 'e' AND e.extname = 'arenadata_toolkit' ORDER BY objname; - objname | objtype | extname | deptype ----------------------------------------+---------+-------------------+--------- - __db_files_current | table | arenadata_toolkit | e - __db_files_current_unmapped | table | arenadata_toolkit | e - __db_segment_files | table | arenadata_toolkit | e - adb_collect_table_stats | proc | arenadata_toolkit | e - adb_create_tables | proc | arenadata_toolkit | e - adb_get_relfilenodes | proc | arenadata_toolkit | e - adb_hba_file_rules | proc | arenadata_toolkit | e - adb_hba_file_rules_view | table | arenadata_toolkit | e - adb_relation_storage_size | proc | arenadata_toolkit | e - adb_relation_storage_size_on_segments | proc | arenadata_toolkit | e - adb_skew_coefficients | table | arenadata_toolkit | e - adb_vacuum_strategy | proc | arenadata_toolkit | e - adb_vacuum_strategy_newest_first | proc | arenadata_toolkit | e - adb_vacuum_strategy_newest_last | proc | arenadata_toolkit | e -(14 rows) + objname | objtype | extname | deptype +-------------------------------------------------+---------+-------------------+--------- + __db_files_current | table | arenadata_toolkit | e + __db_files_current_unmapped | table | arenadata_toolkit | e + __db_segment_files | table | arenadata_toolkit | e + adb_collect_table_stats | proc | arenadata_toolkit | e + adb_create_tables | proc | arenadata_toolkit | e + adb_get_relfilenodes | proc | arenadata_toolkit | e + adb_hba_file_rules | proc | arenadata_toolkit | e + adb_hba_file_rules_view | table | arenadata_toolkit | e + adb_relation_storage_size | proc | arenadata_toolkit | e + adb_relation_storage_size_on_segments | proc | arenadata_toolkit | e + adb_skew_coefficients | table | arenadata_toolkit | e + adb_vacuum_strategy | proc | arenadata_toolkit | e + adb_vacuum_strategy_newest_first | proc | arenadata_toolkit | e + adb_vacuum_strategy_newest_last | proc | arenadata_toolkit | e + is_initial_snapshot_triggered | table | arenadata_toolkit | e + tables_track | table | arenadata_toolkit | e + tracking_get_track_master | proc | arenadata_toolkit | e + tracking_get_track_segments | proc | arenadata_toolkit | e + tracking_is_initial_snapshot_triggered_master | proc | arenadata_toolkit | e + tracking_is_initial_snapshot_triggered_segments | proc | arenadata_toolkit | e + tracking_is_segment_initialized | proc | arenadata_toolkit | e + tracking_register_db | proc | arenadata_toolkit | e + tracking_register_schema | proc | arenadata_toolkit | e + tracking_set_relkinds | proc | arenadata_toolkit | e + tracking_set_relstorages | proc | arenadata_toolkit | e + tracking_set_snapshot_on_recovery | proc | arenadata_toolkit | e + tracking_track_version | proc | arenadata_toolkit | e + tracking_trigger_initial_snapshot | proc | arenadata_toolkit | e + tracking_unregister_db | proc | arenadata_toolkit | e + tracking_unregister_schema | proc | arenadata_toolkit | e +(30 rows) DROP EXTENSION arenadata_toolkit; DROP SCHEMA arenadata_toolkit CASCADE; diff --git a/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_tracking.out b/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_tracking.out new file mode 100644 index 000000000000..ef6c0082ec5b --- /dev/null +++ b/gpcontrib/arenadata_toolkit/expected/arenadata_toolkit_tracking.out @@ -0,0 +1,304 @@ +-- Tests for size tracking logic introduced in version 1.7 +-- start_matchsubs +-- m/ERROR: database \d+ is not tracked/ +-- s/\d+/XXX/g +-- m/for database \d+ is empty/ +-- s/\d+/XXX/g +-- end_matchsubs +CREATE DATABASE tracking_db1; +\c tracking_db1; +CREATE EXTENSION arenadata_toolkit; +-- 1. Test getting track on not registered database; +SELECT * FROM arenadata_toolkit.tables_track; +ERROR: database 167165 is not tracked +HINT: Call 'arenadata_toolkit.tracking_register_db()'to enable tracking +SELECT pg_sleep(current_setting('arenadata_toolkit.tracking_worker_naptime_sec')::int * 2); + pg_sleep +---------- + +(1 row) + +SELECT arenadata_toolkit.tracking_register_db(); + tracking_register_db +---------------------- + t +(1 row) + +-- 2. Test initial snapshot behaviour. Triggering initial snapshot leads to +-- setting up the bloom filter such that all relfilenodes are considered. +SELECT arenadata_toolkit.tracking_trigger_initial_snapshot(); + tracking_trigger_initial_snapshot +----------------------------------- + t +(1 row) + +SELECT is_triggered FROM arenadata_toolkit.is_initial_snapshot_triggered; + is_triggered +-------------- + 1 +(1 row) + +-- 3. If user hasn't registered any schema, the default schemas are used. +-- See arenadata_toolkit_guc.c. At commit the bloom filter is cleared. The next +-- track acquisition will return nothing if database is not modified in between. +-- Test track acquisition returns the same count of tuples as pg_class when +-- initial snapshot is triggered. +WITH segment_counts AS ( + SELECT tt.segid, COUNT(*) AS cnt + FROM arenadata_toolkit.tables_track tt + GROUP BY tt.segid +), +pg_class_count AS ( + SELECT COUNT(*) AS cnt FROM pg_class c JOIN pg_namespace n ON c.relnamespace = n.oid + WHERE nspname = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_schemas'), ',')) + AND c.relstorage = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_relstorages'), ',')) + AND c.relkind = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_relkinds'), ',')) +) +SELECT bool_and(sc.cnt = pc.cnt) +FROM segment_counts sc, pg_class_count pc; + bool_and +---------- + t +(1 row) + +-- 4. Create table in one of default schemas. Then unregister all +-- default schemas except this one. +CREATE TABLE arenadata_toolkit.tracking_t1 (i INT) +WITH (appendonly=true, orientation=column) DISTRIBUTED BY (i); +SELECT arenadata_toolkit.tracking_unregister_schema('information_schema'); + tracking_unregister_schema +---------------------------- + t +(1 row) + +SELECT arenadata_toolkit.tracking_unregister_schema('pg_aoseg'); + tracking_unregister_schema +---------------------------- + t +(1 row) + +SELECT arenadata_toolkit.tracking_unregister_schema('pg_toast'); + tracking_unregister_schema +---------------------------- + t +(1 row) + +SELECT arenadata_toolkit.tracking_unregister_schema('pg_catalog'); + tracking_unregister_schema +---------------------------- + t +(1 row) + +SELECT arenadata_toolkit.tracking_unregister_schema('public'); + tracking_unregister_schema +---------------------------- + t +(1 row) + +-- Getting the track. Only created table with size 0 is expected; +SELECT relname, size, state, segid, relkind, relstorage +FROM arenadata_toolkit.tables_track; + relname | size | state | segid | relkind | relstorage +-------------+------+-------+-------+---------+------------ + tracking_t1 | 0 | a | -1 | r | c + tracking_t1 | 0 | a | 2 | r | c + tracking_t1 | 0 | a | 0 | r | c + tracking_t1 | 0 | a | 1 | r | c +(4 rows) + +SELECT arenadata_toolkit.tracking_unregister_schema('arenadata_toolkit'); + tracking_unregister_schema +---------------------------- + t +(1 row) + +SELECT arenadata_toolkit.tracking_trigger_initial_snapshot(); + tracking_trigger_initial_snapshot +----------------------------------- + t +(1 row) + +--Empty track is expected +SELECT relname, size, state, segid, relkind, relstorage +FROM arenadata_toolkit.tables_track; +WARNING: One of the tracking parameters (schemas,relkinds, relstorages) for database 167165 is empty. + relname | size | state | segid | relkind | relstorage +---------+------+-------+-------+---------+------------ +(0 rows) + +SELECT arenadata_toolkit.tracking_register_schema('arenadata_toolkit'); + tracking_register_schema +-------------------------- + t +(1 row) + +-- 5. Test data extending event. Bloom should capture it. +INSERT INTO arenadata_toolkit.tracking_t1 SELECT generate_series(1,100000); +SELECT relname, size, state, segid, relkind, relstorage +FROM arenadata_toolkit.tables_track; + relname | size | state | segid | relkind | relstorage +-------------+--------+-------+-------+---------+------------ + tracking_t1 | 133528 | a | 1 | r | c + tracking_t1 | 134064 | a | 0 | r | c + tracking_t1 | 133064 | a | 2 | r | c +(3 rows) + +-- 6. Dropping table. The track shows only relfilenodes without names and other additional info with status 'd'. +DROP TABLE arenadata_toolkit.tracking_t1; +SELECT relname, size, state, segid, relkind, relstorage +FROM arenadata_toolkit.tables_track; + relname | size | state | segid | relkind | relstorage +---------+------+-------+-------+---------+------------ + | 0 | d | -1 | | + | 0 | d | -1 | | + | 0 | d | -1 | | + | 0 | d | -1 | | + | 0 | d | 2 | | + | 0 | d | 2 | | + | 0 | d | 2 | | + | 0 | d | 2 | | + | 0 | d | 1 | | + | 0 | d | 1 | | + | 0 | d | 1 | | + | 0 | d | 1 | | + | 0 | d | 0 | | + | 0 | d | 0 | | + | 0 | d | 0 | | + | 0 | d | 0 | | +(16 rows) + +-- 8. Test actions on commit and rollback +CREATE TABLE arenadata_toolkit.tracking_t1 (i INT) +WITH (appendonly=true, orientation=column) DISTRIBUTED BY (i); +INSERT INTO arenadata_toolkit.tracking_t1 SELECT generate_series(1,100000); +-- If the wrapping transaction rollbacks, the Bloom filter is not cleared up. +BEGIN; +SELECT relname, size, state, segid, relkind, relstorage +FROM arenadata_toolkit.tables_track; + relname | size | state | segid | relkind | relstorage +-------------+--------+-------+-------+---------+------------ + tracking_t1 | 0 | a | -1 | r | c + tracking_t1 | 134064 | a | 0 | r | c + tracking_t1 | 133528 | a | 1 | r | c + tracking_t1 | 133064 | a | 2 | r | c +(4 rows) + +ROLLBACK; +-- If commits, filter is cleared. +BEGIN; +SELECT relname, size, state, segid, relkind, relstorage +FROM arenadata_toolkit.tables_track; + relname | size | state | segid | relkind | relstorage +-------------+--------+-------+-------+---------+------------ + tracking_t1 | 0 | a | -1 | r | c + tracking_t1 | 134064 | a | 0 | r | c + tracking_t1 | 133528 | a | 1 | r | c + tracking_t1 | 133064 | a | 2 | r | c +(4 rows) + +COMMIT; +SELECT relname, size, state, segid, relkind, relstorage +FROM arenadata_toolkit.tables_track; + relname | size | state | segid | relkind | relstorage +---------+------+-------+-------+---------+------------ +(0 rows) + +-- 9. Test repetitive track call within the same transaction. All the +-- calls should return the same relation set. +INSERT INTO arenadata_toolkit.tracking_t1 SELECT generate_series(1,10000); +BEGIN; +SELECT relname, size, state, segid, relkind, relstorage +FROM arenadata_toolkit.tables_track; + relname | size | state | segid | relkind | relstorage +-------------+--------+-------+-------+---------+------------ + tracking_t1 | 147576 | a | 0 | r | c + tracking_t1 | 147112 | a | 1 | r | c + tracking_t1 | 146096 | a | 2 | r | c +(3 rows) + +CREATE TABLE arenadata_toolkit.tracking_t2 (j BIGINT) DISTRIBUTED BY (j); +INSERT INTO arenadata_toolkit.tracking_t2 SELECT generate_series(1,10000); +INSERT INTO arenadata_toolkit.tracking_t1 SELECT generate_series(1,10000); +SELECT relname, size, state, segid, relkind, relstorage +FROM arenadata_toolkit.tables_track; + relname | size | state | segid | relkind | relstorage +-------------+--------+-------+-------+---------+------------ + tracking_t1 | 161088 | a | 0 | r | c + tracking_t1 | 160696 | a | 1 | r | c + tracking_t1 | 159128 | a | 2 | r | c +(3 rows) + +ROLLBACK; +SELECT relname, size, state, segid, relkind, relstorage +FROM arenadata_toolkit.tables_track; + relname | size | state | segid | relkind | relstorage +-------------+--------+-------+-------+---------+------------ + | 0 | d | -1 | | + | 0 | d | 0 | | + tracking_t1 | 161088 | a | 0 | r | c + | 0 | d | 2 | | + tracking_t1 | 159128 | a | 2 | r | c + | 0 | d | 1 | | + tracking_t1 | 160696 | a | 1 | r | c +(7 rows) + +-- 10. Test relkind filtering. +INSERT INTO arenadata_toolkit.tracking_t1 SELECT generate_series(1,100000); +CREATE INDEX ON arenadata_toolkit.tracking_t1(i); +-- Want to see index and block dir relation. +SELECT arenadata_toolkit.tracking_register_schema('pg_aoseg'); + tracking_register_schema +-------------------------- + t +(1 row) + +SELECT arenadata_toolkit.tracking_set_relkinds('o,i'); + tracking_set_relkinds +----------------------- + t +(1 row) + +SELECT size, state, segid, relkind, relstorage +FROM arenadata_toolkit.tables_track; + size | state | segid | relkind | relstorage +---------+-------+-------+---------+------------ + 32768 | a | -1 | i | h + 32768 | a | -1 | i | h + 1638400 | a | 0 | i | h + 65536 | a | 0 | i | h + 1638400 | a | 1 | i | h + 65536 | a | 1 | i | h + 1638400 | a | 2 | i | h + 65536 | a | 2 | i | h +(8 rows) + +DROP TABLE arenadata_toolkit.tracking_t1; +-- Set empty relkinds. The track result set should be empty. +SELECT arenadata_toolkit.tracking_set_relkinds(''); + tracking_set_relkinds +----------------------- + t +(1 row) + +SELECT arenadata_toolkit.tracking_trigger_initial_snapshot(); + tracking_trigger_initial_snapshot +----------------------------------- + t +(1 row) + +SELECT size, state, segid, relkind, relstorage +FROM arenadata_toolkit.tables_track; +WARNING: One of the tracking parameters (schemas,relkinds, relstorages) for database 167165 is empty. + size | state | segid | relkind | relstorage +------+-------+-------+---------+------------ +(0 rows) + +-- Clean up +SELECT arenadata_toolkit.tracking_unregister_db(); + tracking_unregister_db +------------------------ + t +(1 row) + +\c contrib_regression; +DROP DATABASE tracking_db1; diff --git a/gpcontrib/arenadata_toolkit/expected/upgrade_test.out b/gpcontrib/arenadata_toolkit/expected/upgrade_test.out index 35cb2c00ccab..22915ab53bc9 100644 --- a/gpcontrib/arenadata_toolkit/expected/upgrade_test.out +++ b/gpcontrib/arenadata_toolkit/expected/upgrade_test.out @@ -175,7 +175,12 @@ ORDER BY 1; 1.5: column tablespace_location check 1.5: create the latest check 1.5: only alter check -(34 rows) + 1.6: alter and create_tables check + 1.6: alter, create_tables and collect_table_stats check + 1.6: column tablespace_location check + 1.6: create the latest check + 1.6: only alter check +(39 rows) -- Cleanup DROP FUNCTION do_upgrade_test_for_arenadata_toolkit(TEXT); diff --git a/gpcontrib/arenadata_toolkit/isolation2/.gitignore b/gpcontrib/arenadata_toolkit/isolation2/.gitignore new file mode 100644 index 000000000000..354653a9b45c --- /dev/null +++ b/gpcontrib/arenadata_toolkit/isolation2/.gitignore @@ -0,0 +1,3 @@ +/results/ +regression.diffs +regression.out diff --git a/gpcontrib/arenadata_toolkit/isolation2/Makefile b/gpcontrib/arenadata_toolkit/isolation2/Makefile new file mode 100644 index 000000000000..fc0a73dc060c --- /dev/null +++ b/gpcontrib/arenadata_toolkit/isolation2/Makefile @@ -0,0 +1,8 @@ +top_builddir = ../../../ + +ISOLATION2_ROOT = $(top_builddir)/src/test/isolation2 + +installcheck: + cd $(ISOLATION2_ROOT) && ./pg_isolation2_regress $(EXTRA_REGRESS_OPTS) --psqldir='$(PSQLDIR)' \ + --dbname=toolkitisolation --schedule=${CURDIR}/toolkit_schedule --inputdir=${CURDIR} --outputdir=${CURDIR} \ + --load-extension=gp_inject_fault --load-extension=plpythonu diff --git a/gpcontrib/arenadata_toolkit/isolation2/expected/recovery_failover.out b/gpcontrib/arenadata_toolkit/isolation2/expected/recovery_failover.out new file mode 100644 index 000000000000..4ef197fd9f1a --- /dev/null +++ b/gpcontrib/arenadata_toolkit/isolation2/expected/recovery_failover.out @@ -0,0 +1,150 @@ +-- This test triggers failover of content 1 and checks +-- the correct tracking state behaviour after recovery +!\retcode gpconfig -c shared_preload_libraries -v 'arenadata_toolkit'; +(exited with code 0) +!\retcode gpconfig -c gp_fts_probe_retries -v 2 --masteronly; +(exited with code 0) +-- Allow extra time for mirror promotion to complete recovery +!\retcode gpconfig -c gp_gang_creation_retry_count -v 120 --skipvalidation --masteronly; +(exited with code 0) +!\retcode gpconfig -c gp_gang_creation_retry_timer -v 1000 --skipvalidation --masteronly; +(exited with code 0) +!\retcode gpstop -raq -M fast; +(exited with code 0) + +CREATE EXTENSION IF NOT EXISTS arenadata_toolkit; +CREATE + +!\retcode gpconfig -c arenadata_toolkit.tracking_worker_naptime_sec -v '5'; +(exited with code 0) +!\retcode gpstop -u; +(exited with code 0) + +SELECT pg_sleep(current_setting('arenadata_toolkit.tracking_worker_naptime_sec')::int); + pg_sleep +---------- + +(1 row) +SELECT arenadata_toolkit.tracking_register_db(); + tracking_register_db +---------------------- + t +(1 row) +SELECT arenadata_toolkit.tracking_trigger_initial_snapshot(); + tracking_trigger_initial_snapshot +----------------------------------- + t +(1 row) + +-- Test track acquisition returns the same count of tuples as pg_class has with +-- default filter options. +WITH segment_counts AS ( SELECT tt.segid, COUNT(*) AS cnt FROM arenadata_toolkit.tables_track tt GROUP BY tt.segid ), pg_class_count AS ( SELECT COUNT(*) AS cnt FROM pg_class c JOIN pg_namespace n ON c.relnamespace = n.oid WHERE nspname = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_schemas'), ',')) AND c.relstorage = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_relstorages'), ',')) AND c.relkind = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_relkinds'), ',')) ) SELECT bool_and(sc.cnt = pc.cnt) FROM segment_counts sc, pg_class_count pc; + bool_and +---------- + t +(1 row) + +include: helpers/server_helpers.sql; +CREATE + +-- Helper functions +CREATE OR REPLACE FUNCTION tracking_is_segment_initialized_master() /* in func */ RETURNS TABLE(segindex INT, is_initialized BOOL) AS $$ /* in func */ SELECT segindex, is_initialized /* in func */ FROM arenadata_toolkit.tracking_is_segment_initialized(); /* in func */ $$ LANGUAGE SQL EXECUTE ON MASTER; +CREATE + +CREATE OR REPLACE FUNCTION tracking_is_segment_initialized_segments() /* in func */ RETURNS TABLE(segindex INT, is_initialized BOOL) AS $$ /* in func */ SELECT segindex, is_initialized /* in func */ FROM arenadata_toolkit.tracking_is_segment_initialized(); /* in func */ $$ LANGUAGE SQL EXECUTE ON ALL SEGMENTS; +CREATE + +CREATE or REPLACE FUNCTION wait_until_segments_are_down(num_segs int) RETURNS BOOL AS $$ DECLARE retries int; /* in func */ BEGIN /* in func */ retries := 1200; /* in func */ loop /* in func */ IF (select count(*) = num_segs FROM gp_segment_configuration WHERE status = 'd') THEN /* in func */ return TRUE; /* in func */ END IF; /* in func */ IF retries <= 0 THEN /* in func */ return FALSE; /* in func */ END IF; /* in func */ perform pg_sleep(0.1); /* in func */ retries := retries - 1; /* in func */ END loop; /* in func */ END; /* in func */ $$ language plpgsql; +CREATE + +-- no segment down. +SELECT count(*) FROM gp_segment_configuration WHERE status = 'd'; + count +------- + 0 +(1 row) + +SELECT pg_ctl((select datadir FROM gp_segment_configuration c WHERE c.role='p' AND c.content=1), 'stop'); + pg_ctl +-------- + OK +(1 row) + +SELECT wait_until_segments_are_down(1); + wait_until_segments_are_down +------------------------------ + t +(1 row) + +SELECT pg_sleep(current_setting('arenadata_toolkit.tracking_worker_naptime_sec')::int); + pg_sleep +---------- + +(1 row) +SELECT * FROM tracking_is_segment_initialized_master() UNION ALL SELECT * FROM tracking_is_segment_initialized_segments(); + segindex | is_initialized +----------+---------------- + -1 | t + 0 | t + 1 | t + 2 | t +(4 rows) + +-- Track acquisition should return full snapshot from promoted mirror since +-- initial snapshot is activated on recovery by default. +WITH segment_counts AS ( SELECT COUNT(*) AS cnt FROM arenadata_toolkit.tables_track tt WHERE tt.segid = 1 GROUP BY tt.segid ), pg_class_count AS ( SELECT COUNT(*) AS cnt FROM pg_class c JOIN pg_namespace n ON c.relnamespace = n.oid WHERE nspname = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_schemas'), ',')) AND c.relstorage = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_relstorages'), ',')) AND c.relkind = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_relkinds'), ',')) ) SELECT bool_and(sc.cnt = pc.cnt) FROM segment_counts sc, pg_class_count pc; + bool_and +---------- + t +(1 row) + +-- fully recover the failed primary as new mirror +!\retcode gprecoverseg -aF --no-progress; +(exited with code 0) + +-- loop while segments come in sync +SELECT wait_until_all_segments_synchronized(); + wait_until_all_segments_synchronized +-------------------------------------- + OK +(1 row) + +!\retcode gprecoverseg -ar; +(exited with code 0) + +-- loop while segments come in sync +SELECT wait_until_all_segments_synchronized(); + wait_until_all_segments_synchronized +-------------------------------------- + OK +(1 row) + +-- verify no segment is down after recovery +SELECT count(*) FROM gp_segment_configuration WHERE status = 'd'; + count +------- + 0 +(1 row) + +-- Track should be returned only from recovered segment since +-- initial snapshot is activated on recovery by default. +WITH segment_counts AS ( SELECT COUNT(*) AS cnt FROM arenadata_toolkit.tables_track tt GROUP BY tt.segid ), pg_class_count AS ( SELECT COUNT(*) AS cnt FROM pg_class c JOIN pg_namespace n ON c.relnamespace = n.oid WHERE nspname = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_schemas'), ',')) AND c.relstorage = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_relstorages'), ',')) AND c.relkind = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_relkinds'), ',')) ) SELECT bool_and(sc.cnt = pc.cnt) FROM segment_counts sc, pg_class_count pc; + bool_and +---------- + t +(1 row) + +SELECT arenadata_toolkit.tracking_unregister_db(); + tracking_unregister_db +------------------------ + t +(1 row) + +!\retcode gpconfig -r gp_fts_probe_retries --masteronly; +(exited with code 0) +!\retcode gpconfig -r gp_gang_creation_retry_count --skipvalidation --masteronly; +(exited with code 0) +!\retcode gpconfig -r gp_gang_creation_retry_timer --skipvalidation --masteronly; +(exited with code 0) +!\retcode gpstop -u; +(exited with code 0) diff --git a/gpcontrib/arenadata_toolkit/isolation2/expected/track_concurrent.out b/gpcontrib/arenadata_toolkit/isolation2/expected/track_concurrent.out new file mode 100644 index 000000000000..16a58472033b --- /dev/null +++ b/gpcontrib/arenadata_toolkit/isolation2/expected/track_concurrent.out @@ -0,0 +1,115 @@ +-- start_matchsubs +-- m/ERROR: Track for database \d+ is being acquired in other transaction/ +-- s/\d+/XXX/g +-- end_matchsubs +-- Test concurrent track acquisition. +1: CREATE EXTENSION IF NOT EXISTS arenadata_toolkit; +CREATE +1: SELECT arenadata_toolkit.tracking_register_db(); + tracking_register_db +---------------------- + t +(1 row) +1: SELECT arenadata_toolkit.tracking_trigger_initial_snapshot(); + tracking_trigger_initial_snapshot +----------------------------------- + t +(1 row) +1: BEGIN; +BEGIN +1: WITH segment_counts AS ( SELECT tt.segid, COUNT(*) AS cnt FROM arenadata_toolkit.tables_track tt GROUP BY tt.segid ), pg_class_count AS ( SELECT COUNT(*) AS cnt FROM pg_class c JOIN pg_namespace n ON c.relnamespace = n.oid WHERE nspname = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_schemas'), ',')) AND c.relstorage = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_relstorages'), ',')) AND c.relkind = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_relkinds'), ',')) ) SELECT bool_and(sc.cnt = pc.cnt) FROM segment_counts sc, pg_class_count pc; + bool_and +---------- + t +(1 row) + +2: SELECT tt.segid, count(*) FROM arenadata_toolkit.tables_track tt GROUP BY tt.segid; +ERROR: Track for database 24798 is being acquired in other transaction + +1: ROLLBACK; +ROLLBACK + +2: WITH segment_counts AS ( SELECT tt.segid, COUNT(*) AS cnt FROM arenadata_toolkit.tables_track tt GROUP BY tt.segid ), pg_class_count AS ( SELECT COUNT(*) AS cnt FROM pg_class c JOIN pg_namespace n ON c.relnamespace = n.oid WHERE nspname = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_schemas'), ',')) AND c.relstorage = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_relstorages'), ',')) AND c.relkind = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_relkinds'), ',')) ) SELECT bool_and(sc.cnt = pc.cnt) FROM segment_counts sc, pg_class_count pc; + bool_and +---------- + t +(1 row) + +-- Test uncommited file creation is not seen from other transaction until the +-- first one is commited. +1: BEGIN; +BEGIN +1: CREATE TABLE tracking_t1 AS SELECT generate_series (1, 100) i DISTRIBUTED BY (i); +CREATE 100 + +2: SELECT relname, size, state, segid, relkind, relstorage FROM arenadata_toolkit.tables_track; + relname | size | state | segid | relkind | relstorage +--------------------------+-------+-------+-------+---------+------------ + gp_distribution_policy | 32768 | a | -1 | r | h + gp_policy_localoid_index | 65536 | a | -1 | i | h + gp_distribution_policy | 32768 | a | 1 | r | h + gp_policy_localoid_index | 65536 | a | 1 | i | h + gp_distribution_policy | 32768 | a | 0 | r | h + gp_policy_localoid_index | 65536 | a | 0 | i | h + gp_distribution_policy | 32768 | a | 2 | r | h + gp_policy_localoid_index | 65536 | a | 2 | i | h +(8 rows) + +1: COMMIT; +COMMIT + +2: SELECT relname, size, state, segid, relkind, relstorage FROM arenadata_toolkit.tables_track; + relname | size | state | segid | relkind | relstorage +-------------+-------+-------+-------+---------+------------ + tracking_t1 | 0 | a | -1 | r | h + tracking_t1 | 32768 | a | 1 | r | h + tracking_t1 | 32768 | a | 0 | r | h + tracking_t1 | 32768 | a | 2 | r | h +(4 rows) + +-- Test file creation is seen from other transaction after the first transaction +-- has taken the track. +1: BEGIN; +BEGIN +1: CREATE TABLE tracking_t2 AS SELECT generate_series (1, 100) i DISTRIBUTED BY (i); +CREATE 100 +1: SELECT relname, size, state, segid, relkind, relstorage FROM arenadata_toolkit.tables_track; + relname | size | state | segid | relkind | relstorage +-------------+-------+-------+-------+---------+------------ + tracking_t2 | 32768 | a | 1 | r | h + tracking_t2 | 32768 | a | 2 | r | h + tracking_t2 | 32768 | a | 0 | r | h +(3 rows) +1: COMMIT; +COMMIT + +2: SELECT relname, size, state, segid, relkind, relstorage FROM arenadata_toolkit.tables_track; + relname | size | state | segid | relkind | relstorage +-------------+-------+-------+-------+---------+------------ + tracking_t2 | 0 | a | -1 | r | h + tracking_t2 | 32768 | a | 2 | r | h + tracking_t2 | 32768 | a | 0 | r | h + tracking_t2 | 32768 | a | 1 | r | h +(4 rows) + +1: DROP TABLE tracking_t1; +DROP +1: DROP TABLE tracking_t2; +DROP +1: SELECT arenadata_toolkit.tracking_unregister_db(); + tracking_unregister_db +------------------------ + t +(1 row) +1: DROP EXTENSION arenadata_toolkit; +DROP + +1q: ... +2q: ... + +!\retcode gpconfig -r shared_preload_libraries; +(exited with code 0) +!\retcode gpconfig -r arenadata_toolkit.tracking_worker_naptime_sec; +(exited with code 0) +!\retcode gpstop -raq -M fast; +(exited with code 0) diff --git a/gpcontrib/arenadata_toolkit/isolation2/sql/recovery_failover.sql b/gpcontrib/arenadata_toolkit/isolation2/sql/recovery_failover.sql new file mode 100644 index 000000000000..5d7d957df836 --- /dev/null +++ b/gpcontrib/arenadata_toolkit/isolation2/sql/recovery_failover.sql @@ -0,0 +1,134 @@ +-- This test triggers failover of content 1 and checks +-- the correct tracking state behaviour after recovery +!\retcode gpconfig -c shared_preload_libraries -v 'arenadata_toolkit'; +!\retcode gpconfig -c gp_fts_probe_retries -v 2 --masteronly; +-- Allow extra time for mirror promotion to complete recovery +!\retcode gpconfig -c gp_gang_creation_retry_count -v 120 --skipvalidation --masteronly; +!\retcode gpconfig -c gp_gang_creation_retry_timer -v 1000 --skipvalidation --masteronly; +!\retcode gpstop -raq -M fast; + +CREATE EXTENSION IF NOT EXISTS arenadata_toolkit; + +!\retcode gpconfig -c arenadata_toolkit.tracking_worker_naptime_sec -v '5'; +!\retcode gpstop -u; + +SELECT pg_sleep(current_setting('arenadata_toolkit.tracking_worker_naptime_sec')::int); +SELECT arenadata_toolkit.tracking_register_db(); +SELECT arenadata_toolkit.tracking_trigger_initial_snapshot(); + +-- Test track acquisition returns the same count of tuples as pg_class has with +-- default filter options. +WITH segment_counts AS ( + SELECT tt.segid, COUNT(*) AS cnt + FROM arenadata_toolkit.tables_track tt + GROUP BY tt.segid +), +pg_class_count AS ( + SELECT COUNT(*) AS cnt FROM pg_class c JOIN pg_namespace n ON c.relnamespace = n.oid + WHERE nspname = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_schemas'), ',')) + AND c.relstorage = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_relstorages'), ',')) + AND c.relkind = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_relkinds'), ',')) +) +SELECT bool_and(sc.cnt = pc.cnt) +FROM segment_counts sc, pg_class_count pc; + +include: helpers/server_helpers.sql; + +-- Helper functions +CREATE OR REPLACE FUNCTION tracking_is_segment_initialized_master() /* in func */ +RETURNS TABLE(segindex INT, is_initialized BOOL) AS $$ /* in func */ +SELECT segindex, is_initialized /* in func */ +FROM arenadata_toolkit.tracking_is_segment_initialized(); /* in func */ +$$ LANGUAGE SQL EXECUTE ON MASTER; + +CREATE OR REPLACE FUNCTION tracking_is_segment_initialized_segments() /* in func */ +RETURNS TABLE(segindex INT, is_initialized BOOL) AS $$ /* in func */ +SELECT segindex, is_initialized /* in func */ +FROM arenadata_toolkit.tracking_is_segment_initialized(); /* in func */ +$$ LANGUAGE SQL EXECUTE ON ALL SEGMENTS; + +CREATE or REPLACE FUNCTION wait_until_segments_are_down(num_segs int) +RETURNS BOOL AS +$$ +DECLARE +retries int; /* in func */ +BEGIN /* in func */ + retries := 1200; /* in func */ + loop /* in func */ + IF (select count(*) = num_segs FROM gp_segment_configuration WHERE status = 'd') THEN /* in func */ + return TRUE; /* in func */ + END IF; /* in func */ + IF retries <= 0 THEN /* in func */ + return FALSE; /* in func */ + END IF; /* in func */ + perform pg_sleep(0.1); /* in func */ + retries := retries - 1; /* in func */ + END loop; /* in func */ +END; /* in func */ +$$ language plpgsql; + +-- no segment down. +SELECT count(*) FROM gp_segment_configuration WHERE status = 'd'; + +SELECT pg_ctl((select datadir FROM gp_segment_configuration c +WHERE c.role='p' AND c.content=1), 'stop'); + +SELECT wait_until_segments_are_down(1); + +SELECT pg_sleep(current_setting('arenadata_toolkit.tracking_worker_naptime_sec')::int); +SELECT * FROM tracking_is_segment_initialized_master() +UNION ALL +SELECT * FROM tracking_is_segment_initialized_segments(); + +-- Track acquisition should return full snapshot from promoted mirror since +-- initial snapshot is activated on recovery by default. +WITH segment_counts AS ( + SELECT COUNT(*) AS cnt + FROM arenadata_toolkit.tables_track tt WHERE tt.segid = 1 + GROUP BY tt.segid +), +pg_class_count AS ( + SELECT COUNT(*) AS cnt FROM pg_class c JOIN pg_namespace n ON c.relnamespace = n.oid + WHERE nspname = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_schemas'), ',')) + AND c.relstorage = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_relstorages'), ',')) + AND c.relkind = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_relkinds'), ',')) +) +SELECT bool_and(sc.cnt = pc.cnt) +FROM segment_counts sc, pg_class_count pc; + +-- fully recover the failed primary as new mirror +!\retcode gprecoverseg -aF --no-progress; + +-- loop while segments come in sync +SELECT wait_until_all_segments_synchronized(); + +!\retcode gprecoverseg -ar; + +-- loop while segments come in sync +SELECT wait_until_all_segments_synchronized(); + +-- verify no segment is down after recovery +SELECT count(*) FROM gp_segment_configuration WHERE status = 'd'; + +-- Track should be returned only from recovered segment since +-- initial snapshot is activated on recovery by default. +WITH segment_counts AS ( + SELECT COUNT(*) AS cnt + FROM arenadata_toolkit.tables_track tt + GROUP BY tt.segid +), +pg_class_count AS ( + SELECT COUNT(*) AS cnt FROM pg_class c JOIN pg_namespace n ON c.relnamespace = n.oid + WHERE nspname = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_schemas'), ',')) + AND c.relstorage = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_relstorages'), ',')) + AND c.relkind = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_relkinds'), ',')) +) +SELECT bool_and(sc.cnt = pc.cnt) +FROM segment_counts sc, pg_class_count pc; + +SELECT arenadata_toolkit.tracking_unregister_db(); + +!\retcode gpconfig -r gp_fts_probe_retries --masteronly; +!\retcode gpconfig -r gp_gang_creation_retry_count --skipvalidation --masteronly; +!\retcode gpconfig -r gp_gang_creation_retry_timer --skipvalidation --masteronly; +!\retcode gpstop -u; diff --git a/gpcontrib/arenadata_toolkit/isolation2/sql/track_concurrent.sql b/gpcontrib/arenadata_toolkit/isolation2/sql/track_concurrent.sql new file mode 100644 index 000000000000..9ddc4a31a01c --- /dev/null +++ b/gpcontrib/arenadata_toolkit/isolation2/sql/track_concurrent.sql @@ -0,0 +1,72 @@ +-- start_matchsubs +-- m/ERROR: Track for database \d+ is being acquired in other transaction/ +-- s/\d+/XXX/g +-- end_matchsubs +-- Test concurrent track acquisition. +1: CREATE EXTENSION IF NOT EXISTS arenadata_toolkit; +1: SELECT arenadata_toolkit.tracking_register_db(); +1: SELECT arenadata_toolkit.tracking_trigger_initial_snapshot(); +1: BEGIN; +1: WITH segment_counts AS ( + SELECT tt.segid, COUNT(*) AS cnt + FROM arenadata_toolkit.tables_track tt + GROUP BY tt.segid +), +pg_class_count AS ( + SELECT COUNT(*) AS cnt FROM pg_class c JOIN pg_namespace n ON c.relnamespace = n.oid + WHERE nspname = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_schemas'), ',')) + AND c.relstorage = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_relstorages'), ',')) + AND c.relkind = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_relkinds'), ',')) +) +SELECT bool_and(sc.cnt = pc.cnt) +FROM segment_counts sc, pg_class_count pc; + +2: SELECT tt.segid, count(*) FROM arenadata_toolkit.tables_track tt GROUP BY tt.segid; + +1: ROLLBACK; + +2: WITH segment_counts AS ( + SELECT tt.segid, COUNT(*) AS cnt + FROM arenadata_toolkit.tables_track tt + GROUP BY tt.segid +), +pg_class_count AS ( + SELECT COUNT(*) AS cnt FROM pg_class c JOIN pg_namespace n ON c.relnamespace = n.oid + WHERE nspname = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_schemas'), ',')) + AND c.relstorage = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_relstorages'), ',')) + AND c.relkind = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_relkinds'), ',')) +) +SELECT bool_and(sc.cnt = pc.cnt) +FROM segment_counts sc, pg_class_count pc; + +-- Test uncommited file creation is not seen from other transaction until the +-- first one is commited. +1: BEGIN; +1: CREATE TABLE tracking_t1 AS SELECT generate_series (1, 100) i DISTRIBUTED BY (i); + +2: SELECT relname, size, state, segid, relkind, relstorage FROM arenadata_toolkit.tables_track; + +1: COMMIT; + +2: SELECT relname, size, state, segid, relkind, relstorage FROM arenadata_toolkit.tables_track; + +-- Test file creation is seen from other transaction after the first transaction +-- has taken the track. +1: BEGIN; +1: CREATE TABLE tracking_t2 AS SELECT generate_series (1, 100) i DISTRIBUTED BY (i); +1: SELECT relname, size, state, segid, relkind, relstorage FROM arenadata_toolkit.tables_track; +1: COMMIT; + +2: SELECT relname, size, state, segid, relkind, relstorage FROM arenadata_toolkit.tables_track; + +1: DROP TABLE tracking_t1; +1: DROP TABLE tracking_t2; +1: SELECT arenadata_toolkit.tracking_unregister_db(); +1: DROP EXTENSION arenadata_toolkit; + +1q: +2q: + +!\retcode gpconfig -r shared_preload_libraries; +!\retcode gpconfig -r arenadata_toolkit.tracking_worker_naptime_sec; +!\retcode gpstop -raq -M fast; diff --git a/gpcontrib/arenadata_toolkit/isolation2/toolkit_schedule b/gpcontrib/arenadata_toolkit/isolation2/toolkit_schedule new file mode 100644 index 000000000000..aa30a135146e --- /dev/null +++ b/gpcontrib/arenadata_toolkit/isolation2/toolkit_schedule @@ -0,0 +1,3 @@ +# isolation tests list +test: recovery_failover +test: track_concurrent diff --git a/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_guc.sql b/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_guc.sql new file mode 100644 index 000000000000..f46b257782fd --- /dev/null +++ b/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_guc.sql @@ -0,0 +1,217 @@ +-- start_ignore +\! gpconfig -c shared_preload_libraries -v 'arenadata_toolkit' +\! gpstop -raq -M fast +\c +-- end_ignore +-- start_matchsubs +-- +-- m/ERROR: \[arenadata_toolkit\] exceeded maximum number of tracked databases \(track_files\.c:\d+\)/ +-- s/\d+/XXX/g +-- +-- end_matchsubs +--start_ignore +DROP DATABASE IF EXISTS tracking1; +DROP DATABASE IF EXISTS tracking2; +DROP DATABASE IF EXISTS tracking3; +DROP DATABASE IF EXISTS tracking4; +DROP DATABASE IF EXISTS tracking5; +DROP DATABASE IF EXISTS tracking6; +--end_ignore + +-- Test database registering GUC. +CREATE DATABASE tracking1; +\c tracking1; +CREATE EXTENSION arenadata_toolkit; + +SHOW arenadata_toolkit.tracking_is_db_tracked; + +SELECT datname, setconfig FROM pg_db_role_setting JOIN pg_database ON +setdatabase=oid WHERE datname=current_database(); + +SELECT arenadata_toolkit.tracking_register_db(); + +SHOW arenadata_toolkit.tracking_is_db_tracked; + +SELECT datname, setconfig FROM pg_db_role_setting JOIN pg_database ON +setdatabase=oid WHERE datname=current_database(); + +SELECT arenadata_toolkit.tracking_unregister_db(); + +SHOW arenadata_toolkit.tracking_is_db_tracked; + +SELECT datname, setconfig FROM pg_db_role_setting JOIN pg_database ON +setdatabase=oid WHERE datname=current_database(); + +-- Prohibit manual GUC setting. +SET arenadata_toolkit.tracking_is_db_tracked = true; + +ALTER DATABASE tracking1 SET arenadata_toolkit.tracking_is_db_tracked = true; + +-- Test limit of tracking databases. +SHOW arenadata_toolkit.tracking_db_track_count; + +CREATE DATABASE tracking2; +CREATE DATABASE tracking3; +CREATE DATABASE tracking4; +CREATE DATABASE tracking5; +CREATE DATABASE tracking6; + +DO $$ +DECLARE + db_oid oid; +BEGIN + FOR db_oid IN + SELECT oid + FROM pg_database + WHERE datname IN ('tracking1', 'tracking2', 'tracking3', + 'tracking4', 'tracking5', 'tracking6') + LOOP + PERFORM arenadata_toolkit.tracking_register_db(db_oid); + END LOOP; +END; +$$; + + +DO $$ +DECLARE + db_oid oid; +BEGIN + FOR db_oid IN + SELECT oid + FROM pg_database + WHERE datname IN ('tracking1', 'tracking2', 'tracking3', + 'tracking4', 'tracking5', 'tracking6') + LOOP + PERFORM arenadata_toolkit.tracking_unregister_db(db_oid); + END LOOP; +END; +$$; + +DROP DATABASE IF EXISTS tracking2; +DROP DATABASE IF EXISTS tracking3; +DROP DATABASE IF EXISTS tracking4; +DROP DATABASE IF EXISTS tracking5; +DROP DATABASE IF EXISTS tracking6; + +-- Test arenadata_toolkit.tracking_snapshot_on_recovery GUC +SELECT arenadata_toolkit.tracking_set_snapshot_on_recovery(true); + +SELECT datname, setconfig FROM pg_db_role_setting JOIN pg_database ON +setdatabase=oid WHERE datname=current_database(); + +-- Prohibit manual GUC setting. +SET arenadata_toolkit.tracking_snapshot_on_recovery = false; + +ALTER DATABASE tracking1 SET arenadata_toolkit.tracking_snapshot_on_recovery = false; + +-- Test arenadata_toolkit.tracking_relstorages GUC +SELECT arenadata_toolkit.tracking_set_relstorages('f,a,x'); + +SELECT datname, setconfig FROM pg_db_role_setting JOIN pg_database ON +setdatabase=oid WHERE datname=current_database(); + +SELECT arenadata_toolkit.tracking_set_relstorages('v,v,v,,,'); + +SELECT datname, setconfig FROM pg_db_role_setting JOIN pg_database ON +setdatabase=oid WHERE datname=current_database(); + +SELECT arenadata_toolkit.tracking_set_relstorages('d,b,c'); + +SELECT arenadata_toolkit.tracking_set_relstorages(''); + +SELECT datname, setconfig FROM pg_db_role_setting JOIN pg_database ON +setdatabase=oid WHERE datname=current_database(); + +-- Prohibit manual GUC setting. +SET arenadata_toolkit.tracking_relstorages = "h, a, x"; + +ALTER DATABASE tracking1 SET arenadata_toolkit.tracking_relstorages = "h, a, x"; + +-- Resetting case is allowed. +ALTER DATABASE tracking1 RESET arenadata_toolkit.tracking_relstorages; + +-- Test arenadata_toolkit.tracking_relkinds GUC +SELECT arenadata_toolkit.tracking_set_relkinds('r,t,o,S'); + +SELECT datname, setconfig FROM pg_db_role_setting JOIN pg_database ON +setdatabase=oid WHERE datname=current_database(); + +SELECT arenadata_toolkit.tracking_set_relkinds('m,M,o,,,'); + +SELECT datname, setconfig FROM pg_db_role_setting JOIN pg_database ON +setdatabase=oid WHERE datname=current_database(); + +SELECT arenadata_toolkit.tracking_set_relkinds('d,b,c'); + +SELECT arenadata_toolkit.tracking_set_relkinds(''); + +SELECT datname, setconfig FROM pg_db_role_setting JOIN pg_database ON +setdatabase=oid WHERE datname=current_database(); + +-- Prohibit manual GUC setting. +SET arenadata_toolkit.tracking_relkinds = "h, a, x"; + +ALTER DATABASE tracking1 SET arenadata_toolkit.tracking_relkinds = "h, a, x"; + +-- Resetting case is allowed. +ALTER DATABASE tracking1 RESET arenadata_toolkit.tracking_relkinds; + +-- Test arenadata_toolkit.tracking_schemas GUC +SELECT arenadata_toolkit.tracking_unregister_schema('public'); + +SELECT datname, setconfig FROM pg_db_role_setting JOIN pg_database ON +setdatabase=oid WHERE datname=current_database(); + +SELECT arenadata_toolkit.tracking_register_schema('arenadata_toolkit'); + +SELECT arenadata_toolkit.tracking_register_schema('public'); + +SELECT datname, setconfig FROM pg_db_role_setting JOIN pg_database ON +setdatabase=oid WHERE datname=current_database(); + +SELECT arenadata_toolkit.tracking_unregister_schema('public'); + +SELECT datname, setconfig FROM pg_db_role_setting JOIN pg_database ON +setdatabase=oid WHERE datname=current_database(); + +SELECT arenadata_toolkit.tracking_register_schema('pg_pg'); + +-- Prohibit manual GUC setting. +SET arenadata_toolkit.tracking_schemas = "pg_catalog, mychema"; + +ALTER DATABASE tracking1 SET arenadata_toolkit.tracking_schemas = "pg_catalog, mychema"; + +-- Resetting case is allowed. +ALTER DATABASE tracking1 RESET arenadata_toolkit.tracking_schemas; + +-- Test GUCs are set in the caller's session. +SELECT arenadata_toolkit.tracking_register_db(); +SHOW arenadata_toolkit.tracking_is_db_tracked; + +SELECT arenadata_toolkit.tracking_unregister_db(); +SHOW arenadata_toolkit.tracking_is_db_tracked; + +SELECT arenadata_toolkit.tracking_set_snapshot_on_recovery(true); +SHOW arenadata_toolkit.tracking_snapshot_on_recovery; + +SELECT arenadata_toolkit.tracking_set_snapshot_on_recovery(false); +SHOW arenadata_toolkit.tracking_snapshot_on_recovery; + +SHOW arenadata_toolkit.tracking_schemas; +SELECT arenadata_toolkit.tracking_register_schema('arenadata_toolkit'); +SHOW arenadata_toolkit.tracking_schemas; + +SELECT arenadata_toolkit.tracking_unregister_schema('arenadata_toolkit'); +SHOW arenadata_toolkit.tracking_schemas; + +SHOW arenadata_toolkit.tracking_relkinds; +SELECT arenadata_toolkit.tracking_set_relkinds('r,t'); +SHOW arenadata_toolkit.tracking_relkinds; + +SHOW arenadata_toolkit.tracking_relstorages; +SELECT arenadata_toolkit.tracking_set_relstorages('a'); +SHOW arenadata_toolkit.tracking_relstorages; + +\c contrib_regression; + +DROP DATABASE tracking1; diff --git a/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_tracking.sql b/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_tracking.sql new file mode 100644 index 000000000000..75b0d29785ba --- /dev/null +++ b/gpcontrib/arenadata_toolkit/sql/arenadata_toolkit_tracking.sql @@ -0,0 +1,150 @@ +-- Tests for size tracking logic introduced in version 1.7 +-- start_ignore +\! gpconfig -c arenadata_toolkit.tracking_worker_naptime_sec -v '5' +\! gpstop -u +\c +-- end_ignore +-- start_matchsubs +-- m/ERROR: database \d+ is not tracked/ +-- s/\d+/XXX/g +-- m/for database \d+ is empty/ +-- s/\d+/XXX/g +-- end_matchsubs +--start_ignore +DROP DATABASE IF EXISTS tracking_db1; +--end_ignore +CREATE DATABASE tracking_db1; +\c tracking_db1; +CREATE EXTENSION arenadata_toolkit; + +-- 1. Test getting track on not registered database; +SELECT * FROM arenadata_toolkit.tables_track; + +SELECT pg_sleep(current_setting('arenadata_toolkit.tracking_worker_naptime_sec')::int * 2); +SELECT arenadata_toolkit.tracking_register_db(); + +-- 2. Test initial snapshot behaviour. Triggering initial snapshot leads to +-- setting up the bloom filter such that all relfilenodes are considered. +SELECT arenadata_toolkit.tracking_trigger_initial_snapshot(); +SELECT is_triggered FROM arenadata_toolkit.is_initial_snapshot_triggered; + +-- 3. If user hasn't registered any schema, the default schemas are used. +-- See arenadata_toolkit_guc.c. At commit the bloom filter is cleared. The next +-- track acquisition will return nothing if database is not modified in between. +-- Test track acquisition returns the same count of tuples as pg_class when +-- initial snapshot is triggered. +WITH segment_counts AS ( + SELECT tt.segid, COUNT(*) AS cnt + FROM arenadata_toolkit.tables_track tt + GROUP BY tt.segid +), +pg_class_count AS ( + SELECT COUNT(*) AS cnt FROM pg_class c JOIN pg_namespace n ON c.relnamespace = n.oid + WHERE nspname = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_schemas'), ',')) + AND c.relstorage = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_relstorages'), ',')) + AND c.relkind = ANY (string_to_array(current_setting('arenadata_toolkit.tracking_relkinds'), ',')) +) +SELECT bool_and(sc.cnt = pc.cnt) +FROM segment_counts sc, pg_class_count pc; + +-- 4. Create table in one of default schemas. Then unregister all +-- default schemas except this one. +CREATE TABLE arenadata_toolkit.tracking_t1 (i INT) +WITH (appendonly=true, orientation=column) DISTRIBUTED BY (i); + +SELECT arenadata_toolkit.tracking_unregister_schema('information_schema'); +SELECT arenadata_toolkit.tracking_unregister_schema('pg_aoseg'); +SELECT arenadata_toolkit.tracking_unregister_schema('pg_toast'); +SELECT arenadata_toolkit.tracking_unregister_schema('pg_catalog'); +SELECT arenadata_toolkit.tracking_unregister_schema('public'); + +-- Getting the track. Only created table with size 0 is expected; +SELECT relname, size, state, segid, relkind, relstorage +FROM arenadata_toolkit.tables_track; + +SELECT arenadata_toolkit.tracking_unregister_schema('arenadata_toolkit'); +SELECT arenadata_toolkit.tracking_trigger_initial_snapshot(); + +--Empty track is expected +SELECT relname, size, state, segid, relkind, relstorage +FROM arenadata_toolkit.tables_track; + +SELECT arenadata_toolkit.tracking_register_schema('arenadata_toolkit'); + +-- 5. Test data extending event. Bloom should capture it. +INSERT INTO arenadata_toolkit.tracking_t1 SELECT generate_series(1,100000); +SELECT relname, size, state, segid, relkind, relstorage +FROM arenadata_toolkit.tables_track; + +-- 6. Dropping table. The track shows only relfilenodes without names and other additional info with status 'd'. +DROP TABLE arenadata_toolkit.tracking_t1; +SELECT relname, size, state, segid, relkind, relstorage +FROM arenadata_toolkit.tables_track; + +-- 8. Test actions on commit and rollback +CREATE TABLE arenadata_toolkit.tracking_t1 (i INT) +WITH (appendonly=true, orientation=column) DISTRIBUTED BY (i); +INSERT INTO arenadata_toolkit.tracking_t1 SELECT generate_series(1,100000); + +-- If the wrapping transaction rollbacks, the Bloom filter is not cleared up. +BEGIN; +SELECT relname, size, state, segid, relkind, relstorage +FROM arenadata_toolkit.tables_track; +ROLLBACK; + +-- If commits, filter is cleared. +BEGIN; +SELECT relname, size, state, segid, relkind, relstorage +FROM arenadata_toolkit.tables_track; +COMMIT; +SELECT relname, size, state, segid, relkind, relstorage +FROM arenadata_toolkit.tables_track; + +-- 9. Test repetitive track call within the same transaction. All the +-- calls should return the same relation set. +INSERT INTO arenadata_toolkit.tracking_t1 SELECT generate_series(1,10000); +BEGIN; +SELECT relname, size, state, segid, relkind, relstorage +FROM arenadata_toolkit.tables_track; + +CREATE TABLE arenadata_toolkit.tracking_t2 (j BIGINT) DISTRIBUTED BY (j); +INSERT INTO arenadata_toolkit.tracking_t2 SELECT generate_series(1,10000); +INSERT INTO arenadata_toolkit.tracking_t1 SELECT generate_series(1,10000); + +SELECT relname, size, state, segid, relkind, relstorage +FROM arenadata_toolkit.tables_track; +ROLLBACK; +SELECT relname, size, state, segid, relkind, relstorage +FROM arenadata_toolkit.tables_track; + +-- 10. Test relkind filtering. +INSERT INTO arenadata_toolkit.tracking_t1 SELECT generate_series(1,100000); +CREATE INDEX ON arenadata_toolkit.tracking_t1(i); + +-- Want to see index and block dir relation. +SELECT arenadata_toolkit.tracking_register_schema('pg_aoseg'); +SELECT arenadata_toolkit.tracking_set_relkinds('o,i'); + +SELECT size, state, segid, relkind, relstorage +FROM arenadata_toolkit.tables_track; + +DROP TABLE arenadata_toolkit.tracking_t1; + +-- Set empty relkinds. The track result set should be empty. +SELECT arenadata_toolkit.tracking_set_relkinds(''); + +SELECT arenadata_toolkit.tracking_trigger_initial_snapshot(); + +SELECT size, state, segid, relkind, relstorage +FROM arenadata_toolkit.tables_track; + +-- Clean up +SELECT arenadata_toolkit.tracking_unregister_db(); + +\c contrib_regression; +DROP DATABASE tracking_db1; +-- start_ignore +\! gpconfig -r shared_preload_libraries +\! gpconfig -r arenadata_toolkit.tracking_worker_naptime_sec +\! gpstop -u +-- end_ignore diff --git a/gpcontrib/arenadata_toolkit/src/arenadata_toolkit.c b/gpcontrib/arenadata_toolkit/src/arenadata_toolkit.c new file mode 100644 index 000000000000..e235b6f6c466 --- /dev/null +++ b/gpcontrib/arenadata_toolkit/src/arenadata_toolkit.c @@ -0,0 +1,45 @@ +#include "postgres.h" + +#include "access/xlog.h" +#include "cdb/cdbvars.h" +#include "miscadmin.h" +#include "postmaster/bgworker.h" +#include "storage/shmem.h" + +#include "arenadata_toolkit_guc.h" +#include "arenadata_toolkit_worker.h" +#include "drops_track.h" +#include "file_hook.h" +#include "tf_shmem.h" +#include "track_files.h" + +void _PG_init(void); +void _PG_fini(void); + +void +_PG_init(void) +{ + if (!process_shared_preload_libraries_in_progress) + return; + + tf_guc_define(); + tf_shmem_init(); + file_hook_init(); + drops_track_init(); + + if (IS_QUERY_DISPATCHER()) + track_setup_executor_hooks(); + + arenadata_toolkit_worker_register(); +} + +void +_PG_fini(void) +{ + if (IS_QUERY_DISPATCHER()) + track_uninstall_executor_hooks(); + + drops_track_deinit(); + file_hook_deinit(); + tf_shmem_deinit(); +} diff --git a/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_guc.c b/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_guc.c new file mode 100644 index 000000000000..4797c3eb7731 --- /dev/null +++ b/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_guc.c @@ -0,0 +1,213 @@ +#include "arenadata_toolkit_guc.h" + +#include "cdb/cdbvars.h" +#include "catalog/objectaccess.h" +#include "catalog/pg_db_role_setting.h" +#include +#include "utils/guc.h" +#include "tf_shmem.h" + +int bloom_size = DEFAULT_BLOOM_SIZE_BYTES; +int db_track_count = DEFAULT_DB_TRACK_COUNT; +bool is_tracked = DEFAULT_IS_TRACKED; +bool get_full_snapshot_on_recovery = DEFAULT_GET_FULL_SNAPSHOT_ON_RECOVERY; +int drops_count = DEFAULT_DROPS_COUNT; +char *tracked_schemas = DEFAULT_TRACKED_SCHEMAS; +char *tracked_rel_storages = DEFAULT_TRACKED_REL_STORAGES; +char *tracked_rel_kinds = DEFAULT_TRACKED_REL_KINDS; +int tracking_worker_naptime_sec = DEFAULT_NAPTIME_SEC; + +/* + * Variable controlling GUC setting. Only extension functions are allowed + * to set GUC during NormalProcessing mode. + */ +static bool guc_is_unlocked = false; + +void +tf_guc_unlock(void) +{ + guc_is_unlocked = true; +} + +/* + * Prohibit changing the GUC value manually except several cases. + * This is not called for RESET, so RESET is not guarded + */ +static bool +check_guc(GucSource source, const char *handle) +{ + if (IsInitProcessingMode() || Gp_role == GP_ROLE_EXECUTE || + (Gp_role == GP_ROLE_DISPATCH && guc_is_unlocked)) + { + guc_is_unlocked = false; + + if (source != PGC_S_DATABASE && + source != PGC_S_DEFAULT && + source != PGC_S_TEST) + return false; + + return true; + } + + GUC_check_errmsg("cannot change tracking status outside the %s function", handle); + return false; +} + +/* + * Prohibit changing the arenadata_toolkit.tracking_is_db_tracked value manually + */ +static bool +check_tracked(bool *newval, void **extra, GucSource source) +{ + return check_guc(source, "tracking_register_db"); +} + +/* + * Prohibit changing the arenadata_toolkit.tracking_snapshot_on_recovery value manually + */ +static bool +check_get_full_snapshot_on_recovery(bool *newval, void **extra, GucSource source) +{ + return check_guc(source, "tracking_set_snapshot_on_recovery"); +} + +/* + * Prohibit changing the arenadata_toolkit.tracking_relkinds value manually + */ +static bool +check_relkinds(char **newval, void **extra, GucSource source) +{ + return check_guc(source, "tracking_register_relkinds"); +} + +/* + * Prohibit changing the arenadata_toolkit.tracking_schemas value manually + */ +static bool +check_schemas(char **newval, void **extra, GucSource source) +{ + return check_guc(source, "tracking_register_schema"); +} + +/* + * Prohibit changing the arenadata_toolkit.tracking_relstorages value manually + */ +static bool +check_relstorages(char **newval, void **extra, GucSource source) +{ + return check_guc(source, "tracking_register_relstorages"); +} + +void +tf_guc_define(void) +{ + DefineCustomIntVariable("arenadata_toolkit.tracking_bloom_size", + "Size of bloom filter in bytes for each tracked database", + NULL, + &bloom_size, + DEFAULT_BLOOM_SIZE_BYTES, + MIN_BLOOM_SIZE_BYTES, + MAX_BLOOM_SIZE_BYTES, + PGC_POSTMASTER, + 0, + NULL, + NULL, + NULL + ); + + DefineCustomIntVariable("arenadata_toolkit.tracking_db_track_count", + "Count of tracked databases.", + NULL, + &db_track_count, + DEFAULT_DB_TRACK_COUNT, + MIN_DB_TRACK_COUNT, + MAX_DB_TRACK_COUNT, + PGC_POSTMASTER, + 0, + NULL, + NULL, + NULL + ); + + DefineCustomBoolVariable("arenadata_toolkit.tracking_is_db_tracked", + "Is current database tracked.", + NULL, + &is_tracked, + DEFAULT_IS_TRACKED, + PGC_SUSET, + 0, + check_tracked, + NULL, + NULL); + + DefineCustomBoolVariable("arenadata_toolkit.tracking_snapshot_on_recovery", + "Return full snapshot at startup/recovery.", + NULL, + &get_full_snapshot_on_recovery, + DEFAULT_GET_FULL_SNAPSHOT_ON_RECOVERY, + PGC_SUSET, + 0, + check_get_full_snapshot_on_recovery, + NULL, + NULL); + + DefineCustomIntVariable("arenadata_toolkit.tracking_drops_count", + "Count of max monitored drop events.", + NULL, + &drops_count, + DEFAULT_DROPS_COUNT, + MIN_DROPS_COUNT, + MAX_DROPS_COUNT, + PGC_POSTMASTER, + 0, + NULL, + NULL, + NULL); + + DefineCustomStringVariable("arenadata_toolkit.tracking_schemas", + "Tracked schema names.", + NULL, + &tracked_schemas, + DEFAULT_TRACKED_SCHEMAS, + PGC_SUSET, + 0, + check_schemas, + NULL, + NULL); + + DefineCustomStringVariable("arenadata_toolkit.tracking_relstorages", + "Tracked relation storage types.", + NULL, + &tracked_rel_storages, + DEFAULT_TRACKED_REL_STORAGES, + PGC_SUSET, + 0, + check_relstorages, + NULL, + NULL); + + DefineCustomStringVariable("arenadata_toolkit.tracking_relkinds", + "Tracked relation kinds.", + NULL, + &tracked_rel_kinds, + DEFAULT_TRACKED_REL_KINDS, + PGC_SUSET, + 0, + check_relkinds, + NULL, + NULL); + + + DefineCustomIntVariable("arenadata_toolkit.tracking_worker_naptime_sec", + "Toolkit background worker nap time", + NULL, + &tracking_worker_naptime_sec, + DEFAULT_NAPTIME_SEC, + MIN_NAPTIME_SEC, + MAX_NAPTIME_SEC, + PGC_SIGHUP, + 0, + NULL, + NULL, + NULL); +} diff --git a/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_worker.c b/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_worker.c new file mode 100644 index 000000000000..bb28ae85e337 --- /dev/null +++ b/gpcontrib/arenadata_toolkit/src/arenadata_toolkit_worker.c @@ -0,0 +1,272 @@ +#include "postgres.h" + +#include "access/xact.h" +#include "access/genam.h" +#include "access/heapam.h" +#include "catalog/pg_db_role_setting.h" +#include "catalog/pg_extension.h" +#include "catalog/indexing.h" +#include "cdb/cdbdisp_query.h" +#include "cdb/cdbdispatchresult.h" +#include "cdb/cdbvars.h" +#include "executor/spi.h" +#include "libpq-fe.h" +#include "postmaster/bgworker.h" +#include "storage/proc.h" +#include "storage/ipc.h" +#include "utils/snapmgr.h" +#include "utils/builtins.h" +#include "utils/fmgroids.h" + +#include "arenadata_toolkit_worker.h" +#include "arenadata_toolkit_guc.h" +#include "bloom_set.h" +#include "tf_shmem.h" + +#define TOOLKIT_BINARY_NAME "arenadata_toolkit" +#define SQL(...) #__VA_ARGS__ + +typedef struct +{ + Oid dbid; + bool get_full_snapshot_on_recovery; +} tracked_db_t; + +/* flags set by signal handlers */ +static volatile sig_atomic_t got_sighup = false; +static volatile sig_atomic_t got_sigterm = false; + +void arenadata_toolkit_main(Datum); + +/* + * Signal handler for SIGTERM + * Set a flag to let the main loop to terminate, and set our latch to wake + * it up. + */ +static void +tracking_sigterm(SIGNAL_ARGS) +{ + int save_errno = errno; + + got_sigterm = true; + if (MyProc) + SetLatch(&MyProc->procLatch); + + errno = save_errno; +} + +/* + * Signal handler for SIGHUP + * Set a flag to tell the main loop to reread the config file, and set + * our latch to wake it up. + */ +static void +tracking_sighup(SIGNAL_ARGS) +{ + int save_errno = errno; + + got_sighup = true; + if (MyProc) + SetLatch(&MyProc->procLatch); + + errno = save_errno; +} + +static List * +get_tracked_dbs() +{ + StringInfoData query; + List *tracked_dbs = NIL; + tracked_db_t *trackedDb; + MemoryContext topcontext = CurrentMemoryContext; + + initStringInfo(&query); + appendStringInfo(&query, SQL( + WITH _ AS ( + WITH _ AS ( + SELECT "setdatabase", regexp_split_to_array(UNNEST("setconfig"), '=') AS "setconfig" + FROM "pg_db_role_setting" WHERE "setrole"=0) + SELECT "setdatabase", json_object(array_agg("setconfig"[1]), array_agg("setconfig"[2])) AS "setconfig" + FROM _ GROUP BY 1) + SELECT "setdatabase", + ("setconfig"->>'arenadata_toolkit.tracking_snapshot_on_recovery')::bool as "snapshot" FROM _ WHERE + ("setconfig"->>'arenadata_toolkit.tracking_is_db_tracked')::bool IS TRUE)); + + if (SPI_connect() != SPI_OK_CONNECT) + ereport(ERROR, (errmsg("SPI_connect failed"))); + + PushActiveSnapshot(GetTransactionSnapshot()); + + if (SPI_execute(query.data, true, 0) != SPI_OK_SELECT) + ereport(ERROR, (errmsg("SPI_execute failed"))); + + for (uint64 row = 0; row < SPI_processed; row++) + { + HeapTuple val = SPI_tuptable->vals[row]; + TupleDesc tupdesc = SPI_tuptable->tupdesc; + bool isnull = false; + Oid dbid = DatumGetObjectId(SPI_getbinval(val, tupdesc, SPI_fnumber(tupdesc, "setdatabase"), &isnull)); + bool get_snapshot_on_recovery = DatumGetBool(SPI_getbinval(val, tupdesc, SPI_fnumber(tupdesc, "snapshot"), &isnull)); + + if (isnull) + get_snapshot_on_recovery = get_full_snapshot_on_recovery; + + MemoryContext oldcontext = MemoryContextSwitchTo(topcontext); + + trackedDb = (tracked_db_t *) palloc0(sizeof(tracked_db_t)); + trackedDb->dbid = dbid; + trackedDb->get_full_snapshot_on_recovery = get_snapshot_on_recovery; + tracked_dbs = lappend(tracked_dbs, trackedDb); + + MemoryContextSwitchTo(oldcontext); + } + SPI_finish(); + PopActiveSnapshot(); + + pfree(query.data); + + return tracked_dbs; +} + +static void +track_dbs(List *tracked_dbs) +{ + ListCell *cell; + tracked_db_t *trackedDb; + + foreach(cell, tracked_dbs) + { + trackedDb = (tracked_db_t *) lfirst(cell); + + bloom_set_bind(trackedDb->dbid); + bloom_set_trigger_bits(trackedDb->dbid, + trackedDb->get_full_snapshot_on_recovery); + } +} + +static void +worker_tracking_status_check() +{ + List *tracked_dbs = NIL; + + if (pg_atomic_unlocked_test_flag(&tf_shared_state->tracking_is_initialized)) + { + StartTransactionCommand(); + + tracked_dbs = get_tracked_dbs(); + + if (list_length(tracked_dbs) > 0) + track_dbs(tracked_dbs); + + if (tracked_dbs) + list_free_deep(tracked_dbs); + + CommitTransactionCommand(); + + pg_atomic_test_set_flag(&tf_shared_state->tracking_is_initialized); + } + + +} + +/* Main worker cycle. Scans pg_db_role_setting and binds tracked dbids to + * corresponding Bloom filter. Lives on segments. */ +void +arenadata_toolkit_main(Datum main_arg) +{ + instr_time current_time_timeout; + instr_time start_time_timeout; + long current_timeout = -1; + + elog(LOG, "[arenadata toolkit] Starting background worker"); + + /* + * The worker shouldn't exist when the master boots in utility mode. + * Otherwise BackgroundWorkerInitializeConnection will explode with FATAL. + */ + if (IS_QUERY_DISPATCHER() && Gp_role != GP_ROLE_DISPATCH) + { + proc_exit(0); + } + + /* + * Kludge for scanning pg_db_role_setting on segments. + */ + if (!IS_QUERY_DISPATCHER() && Gp_role == GP_ROLE_DISPATCH) + { + Gp_role = GP_ROLE_UTILITY; + Gp_session_role = GP_ROLE_UTILITY; + } + + pqsignal(SIGHUP, tracking_sighup); + pqsignal(SIGTERM, tracking_sigterm); + + BackgroundWorkerUnblockSignals(); + + BackgroundWorkerInitializeConnection(DB_FOR_COMMON_ACCESS, NULL); + + while (!got_sigterm) + { + int rc; + long timeout = tracking_worker_naptime_sec * 1000; + + if (current_timeout <= 0) + { + worker_tracking_status_check(); + + INSTR_TIME_SET_CURRENT(start_time_timeout); + current_timeout = timeout; + } + + rc = WaitLatch(&MyProc->procLatch, WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, + current_timeout); + + if (rc & WL_LATCH_SET) + { + ResetLatch(&MyProc->procLatch); + CHECK_FOR_INTERRUPTS(); + } + + /* Emergency bailout if postmaster has died */ + if (rc & WL_POSTMASTER_DEATH) + { + ereport(LOG, (errmsg("[arenadata toolkit] bgworker is being terminated by postmaster death."))); + proc_exit(1); + } + + if (got_sighup) + { + elog(DEBUG1, "[arenadata_tookit] got sighup"); + got_sighup = false; + ProcessConfigFile(PGC_SIGHUP); + } + + /* + * We can wake up during WaitLatch very often, thus, timeout is + * calculated manually. + */ + INSTR_TIME_SET_CURRENT(current_time_timeout); + INSTR_TIME_SUBTRACT(current_time_timeout, start_time_timeout); + current_timeout = timeout - (long) INSTR_TIME_GET_MILLISEC(current_time_timeout); + } + + ereport(LOG, (errmsg("[arenadata toolkit] stop worker process"))); + + proc_exit(0); +} + +void +arenadata_toolkit_worker_register() +{ + BackgroundWorker worker = {0}; + + worker.bgw_flags = BGWORKER_SHMEM_ACCESS | BGWORKER_BACKEND_DATABASE_CONNECTION; + worker.bgw_start_time = BgWorkerStart_RecoveryFinished; + worker.bgw_restart_time = BGW_DEFAULT_RESTART_INTERVAL; + snprintf(worker.bgw_library_name, BGW_MAXLEN, TOOLKIT_BINARY_NAME); + snprintf(worker.bgw_function_name, BGW_MAXLEN, "arenadata_toolkit_main"); + worker.bgw_notify_pid = 0; + snprintf(worker.bgw_name, BGW_MAXLEN, "arenadata_toolkit"); + + RegisterBackgroundWorker(&worker); +} diff --git a/gpcontrib/arenadata_toolkit/src/bloom.c b/gpcontrib/arenadata_toolkit/src/bloom.c new file mode 100644 index 000000000000..26062395b423 --- /dev/null +++ b/gpcontrib/arenadata_toolkit/src/bloom.c @@ -0,0 +1,222 @@ +/* + * Simple bloom filter without using postgres primitives. + */ + +#include + +#include "arenadata_toolkit_guc.h" +#include "bloom.h" +#include "hashimpl.h" +#include "tf_shmem.h" + +uint64 bloom_hash_seed; +int bloom_hash_num; + +static inline uint32 +mod_m(uint32 val, uint64 m) +{ + if (((m - 1) & m) == 0) + return val & (m - 1); + else + return val % m; +} + +/* + * Generate k independent bit positions in a Bloom filter. + * + * Implements Enhanced Double Hashing technique (Dillinger & Manolios, 2004) which + * generates k hash values using only 2 independent hash functions. This approach + * provides comparable performance to using k independent hash functions while + * being more computationally efficient. + * + * Algorithm: + * 1. Generate two independent 32-bit hashes (x, y) from a 64-bit wyhash + * 2. Apply modulo operation to fit within filter size + * 3. Generate subsequent indices using linear combination: x = (x + y) mod m + * y = (y + i) mod m + * + * Parameters: + * node - relation file node OID to hash + * bloom_size - size of Bloom filter in bytes + * out_hashes - output array to store k bit positions + * + * Reference: GPDB7 codebase. + */ +static void +tracking_hashes(Oid node, uint32 bloom_size, uint32 *out_hashes) +{ + uint64 hash; + uint32 x, + y; + uint64 m; + int i; + + /* Use 64-bit hashing to get two independent 32-bit hashes */ + hash = wyhash(node, bloom_hash_seed); + x = (uint32) hash; + y = (uint32) (hash >> 32); + m = bloom_size * 8; + + x = mod_m(x, m); + y = mod_m(y, m); + + /* Accumulate hashes */ + out_hashes[0] = x; + for (i = 1; i < bloom_hash_num; i++) + { + x = mod_m(x + y, m); + y = mod_m(y + i, m); + + out_hashes[i] = x; + } +} + +/* +* Test membership of an element in Bloom filter +* +* Implements standard Bloom filter membership test by checking k different bit +* positions. The function provides probabilistic set membership with controllable +* false positive rate. +* +* Returns true if element might be in set, false if definitely not in set. +*/ +bool +bloom_isset(bloom_t *bloom, Oid relnode) +{ + uint32 hashes[MAX_BLOOM_HASH_FUNCS]; + + if (bloom->is_set_all) + return true; + + tracking_hashes(relnode, bloom->size, hashes); + + for (int i = 0; i < bloom_hash_num; ++i) + { + if (!(bloom->current_bloom[hashes[i] >> 3] & (1 << (hashes[i] & 7)))) + return false; + } + return true; +} + +/* + * Insert an element into Bloom filter + * + * Sets k bits in the Bloom filter's bit array corresponding to the k hash + * values generated for the input element. This operation is irreversible - + * elements cannot be removed without rebuilding the entire filter. + * + * Parameters: + * bloom - pointer to Bloom filter structure + * relnode - relation file node OID to insert + */ +void +bloom_set_bits(bloom_t *bloom, Oid relnode) +{ + uint32 hashes[MAX_BLOOM_HASH_FUNCS]; + + tracking_hashes(relnode, bloom->size, hashes); + for (int i = 0; i < bloom_hash_num; ++i) + { + bloom->current_bloom[hashes[i] >> 3] |= 1 << (hashes[i] & 7); + } +} + +void +bloom_init(const uint32 bloom_size, bloom_t *bloom) +{ + bloom->size = bloom_size; + bloom->current_bloom = bloom->map; + bloom_clear(bloom); +} + +/* + * Initialize optimal Bloom filter parameters + * + * This function calculates and sets optimal parameters for the Bloom filter + * based on established widespread principles. + * + * Calculates the optimal number of hash functions using the formula: + * k = (m/n)ln(2), which minimizes the false positive probability + * p = (1 - e^(-kn/m))^k. + * where: + * - m = total_bits (size of bit array) + * - n = TOTAL_ELEMENTS (expected number of insertions) + * + * Initializes bloom_hash_seed with a random value to prevent deterministic + * hash collisions and ensure independent hash distributions across runs. + */ +void +init_bloom_invariants(void) +{ + int k = rint(log(2.0) * (bloom_size * 8) / TOTAL_ELEMENTS); + + bloom_hash_num = Max(1, Min(k, MAX_BLOOM_HASH_FUNCS)); + bloom_hash_seed = (uint64) random(); +} + +void +bloom_set_all(bloom_t *bloom) +{ + memset(bloom->current_bloom, 0xFF, bloom->size); + bloom->is_set_all = 1; +} + +void +bloom_clear(bloom_t *bloom) +{ + memset(bloom->current_bloom, 0, bloom->size); + bloom->is_set_all = 0; +} + +void +bloom_merge(bloom_t *dst, bloom_t *src) +{ + if (src->is_set_all) + { + memset(dst->current_bloom, 0xFF, dst->size); + dst->is_set_all = src->is_set_all; + return; + } + + for (uint32 i = 0; i < dst->size; i++) + dst->current_bloom[i] |= src->current_bloom[i]; +} + +void +bloom_copy(bloom_t *dest, bloom_t *src) +{ + dest->size = src->size; + memcpy(dest->current_bloom, src->current_bloom, src->size); + dest->is_set_all = src->is_set_all; +} + +void +bloom_switch_current(bloom_t *bloom) +{ + uint8 *map_base = bloom->map; + uint8 *map_off = bloom->map + bloom->size; + + bloom->current_bloom = (bloom->current_bloom == map_base) ? map_off : map_base; + bloom->is_set_all = false; +} + +uint8 * +bloom_get_other(bloom_t *bloom) +{ + uint8 *map_base = bloom->map; + uint8 *map_off = bloom->map + bloom->size; + + return (bloom->current_bloom == map_base) ? map_off : map_base; +} + +void +bloom_merge_internal(bloom_t *bloom) +{ + if (bloom->is_set_all) + return; + + uint8 *bloom_other = bloom_get_other(bloom); + + for (uint32 i = 0; i < bloom->size; i++) + bloom->current_bloom[i] |= bloom_other[i]; +} diff --git a/gpcontrib/arenadata_toolkit/src/bloom_set.c b/gpcontrib/arenadata_toolkit/src/bloom_set.c new file mode 100644 index 000000000000..b48407e6bea9 --- /dev/null +++ b/gpcontrib/arenadata_toolkit/src/bloom_set.c @@ -0,0 +1,265 @@ +/* + * Set of blooms. Main entry point to find a bloom and work with it. + * Used to track create, extend, truncate events. + */ +#include "arenadata_toolkit_guc.h" +#include "bloom_set.h" +#include "tf_shmem.h" + +LWLock *bloom_set_lock; +tf_entry_lock_t bloom_locks[MAX_DB_TRACK_COUNT]; + +static inline Size +bloom_entry_size(uint32 size) +{ + return (offsetof(bloom_entry_t, bloom) + full_bloom_size(2 * size)); +} + +static inline void * +bloom_entry_get(bloom_set_t *set, int idx) +{ + return (void *) ((uint8 *) set->bloom_entries + idx * bloom_entry_size(set->bloom_size)); +} + +/* + * bloom_set api assumes that we are working with the single bloom set. + * This object is considered as singleton. + */ +bloom_set_t *bloom_set = NULL; + +static inline void +bloom_set_check_state(void) +{ + if (tf_shared_state == NULL || bloom_set == NULL) + ereport(ERROR, + (errmsg("Failed to access shared memory due to wrong extension initialization"), + errhint("Load extension's code through shared_preload_library configuration"))); +} + +static void +bloom_entry_init(const uint32_t size, bloom_entry_t *bloom_entry) +{ + bloom_entry->dbid = InvalidOid; + bloom_entry->master_version = InvalidVersion; + bloom_entry->work_version = InvalidVersion; + bloom_init(size, &bloom_entry->bloom); +} + + +void +bloom_set_init(const uint32_t bloom_count, const uint32_t bloom_size) +{ + bloom_set = &tf_shared_state->bloom_set; + + bloom_set->bloom_count = bloom_count; + bloom_set->bloom_size = bloom_size; + + bloom_set_lock = LWLockAssign(); + + for (uint32_t i = 0; i < bloom_count; i++) + { + bloom_entry_t *bloom_entry = bloom_entry_get(bloom_set, i); + + bloom_entry_init(bloom_size, bloom_entry); + bloom_locks[i].lock = LWLockAssign(); + bloom_locks[i].entry = (void *) bloom_entry; + } + + init_bloom_invariants(); +} + +Size +bloom_set_required_size(uint32 size, int count) +{ + return (offsetof(bloom_set_t, bloom_entries) + count * bloom_entry_size(size)); +} + +/* + * Finds the entry in bloom_set by given dbid. + * That's a simple linear search, should be reworked (depends on target dbs count). + */ +static bloom_entry_t * +find_bloom_entry(Oid dbid) +{ + bloom_entry_t *bloom_entry; + int i = 0; + + for (i = 0; i < bloom_set->bloom_count; i++) + { + bloom_entry = bloom_entry_get(bloom_set, i); + if (bloom_entry->dbid == dbid) + break; + } + + if (i == bloom_set->bloom_count) + return NULL; + + return bloom_entry; +} + +/* Bind available filter to given dbid */ +bool +bloom_set_bind(Oid dbid) +{ + bloom_entry_t *bloom_entry; + + bloom_set_check_state(); + + LWLockAcquire(bloom_set_lock, LW_EXCLUSIVE); + bloom_entry = find_bloom_entry(dbid); + if (bloom_entry) + { + LWLockRelease(bloom_set_lock); + return true; + } + bloom_entry = find_bloom_entry(InvalidOid); + if (bloom_entry == NULL) + { + LWLockRelease(bloom_set_lock); + return false; + } + bloom_entry->dbid = dbid; + bloom_entry->master_version = StartVersion; + bloom_entry->work_version = StartVersion; + pg_atomic_init_flag(&bloom_entry->capture_in_progress); + LWLockRelease(bloom_set_lock); + + return true; +} + +/* + * Fill the Bloom filter with 0 or 1. Used for setting + * full snapshots. + */ +bool +bloom_set_trigger_bits(Oid dbid, bool on) +{ + bloom_op_ctx_t ctx = bloom_set_get_entry(dbid, LW_SHARED, LW_EXCLUSIVE); + + if (ctx.entry) + { + if (on) + bloom_set_all(&ctx.entry->bloom); + else + bloom_clear(&ctx.entry->bloom); + + bloom_set_release(&ctx); + return true; + } + + bloom_set_release(&ctx); + + return false; +} + +/* Unbind used filter by given dbid */ +void +bloom_set_unbind(Oid dbid) +{ + bloom_entry_t *bloom_entry; + + bloom_set_check_state(); + + LWLockAcquire(bloom_set_lock, LW_EXCLUSIVE); + bloom_entry = find_bloom_entry(dbid); + if (bloom_entry == NULL) + { + LWLockRelease(bloom_set_lock); + return; + } + bloom_entry->dbid = InvalidOid; + bloom_clear(&bloom_entry->bloom); + LWLockRelease(bloom_set_lock); +} + +/* Find bloom by dbid, set bit based on relNode hash */ +void +bloom_set_set(Oid dbid, Oid relNode) +{ + bloom_op_ctx_t ctx = bloom_set_get_entry(dbid, LW_SHARED, LW_EXCLUSIVE); + + if (ctx.entry) + { + bloom_set_bits(&ctx.entry->bloom, relNode); + } + bloom_set_release(&ctx); + +} + +/* Find bloom by dbid, merge bytes from another bloom to it */ +bool +bloom_set_merge(Oid dbid, bloom_t *from) +{ + if (!from) + return false; + + bloom_op_ctx_t ctx = bloom_set_get_entry(dbid, LW_SHARED, LW_EXCLUSIVE); + + if (ctx.entry) + { + bloom_merge(&ctx.entry->bloom, from); + bloom_set_release(&ctx); + return true; + } + bloom_set_release(&ctx); + + return false; +} + +bool +bloom_set_is_all_bits_triggered(Oid dbid) +{ + bool is_triggered = false; + bloom_op_ctx_t ctx = bloom_set_get_entry(dbid, LW_SHARED, LW_SHARED); + + if (ctx.entry) + { + is_triggered = ctx.entry->bloom.is_set_all; + } + + bloom_set_release(&ctx); + + return is_triggered; +} + +bloom_op_ctx_t +bloom_set_get_entry(Oid dbid, LWLockMode s_mode, LWLockMode e_mode) +{ + bloom_op_ctx_t ctx = {0}; + + bloom_set_check_state(); + + LWLockAcquire(bloom_set_lock, s_mode); + ctx.entry_lock = LWLockAcquireEntry(dbid, e_mode); + ctx.entry = find_bloom_entry(dbid); + ctx.set_lock = bloom_set_lock; + + return ctx; +} +void +bloom_set_release(bloom_op_ctx_t *ctx) +{ + if (ctx->entry_lock) + LWLockRelease(ctx->entry_lock); + LWLockRelease(ctx->set_lock); +} + +/* + * Acquire lock corresponding to dbid in bloom_set. + */ +LWLock * +LWLockAcquireEntry(Oid dbid, LWLockMode mode) +{ + for (int i = 0; i < db_track_count; ++i) + { + bloom_entry_t *bloom_entry = (bloom_entry_t *) (bloom_locks[i].entry); + + if (bloom_entry->dbid == dbid) + { + LWLockAcquire(bloom_locks[i].lock, mode); + return bloom_locks[i].lock; + } + } + + return NULL; +} diff --git a/gpcontrib/arenadata_toolkit/arenadata_toolkit.c b/gpcontrib/arenadata_toolkit/src/dbsize.c similarity index 66% rename from gpcontrib/arenadata_toolkit/arenadata_toolkit.c rename to gpcontrib/arenadata_toolkit/src/dbsize.c index 11a5bd9868ff..d2efd367962c 100644 --- a/gpcontrib/arenadata_toolkit/arenadata_toolkit.c +++ b/gpcontrib/arenadata_toolkit/src/dbsize.c @@ -9,6 +9,7 @@ #include "access/heapam.h" #include "cdb/cdbvars.h" #include "common/relpath.h" +#include "catalog/namespace.h" #include "fmgr.h" #include "funcapi.h" #include "miscadmin.h" @@ -25,6 +26,8 @@ #include "catalog/pg_tablespace.h" #include "storage/lock.h" +#include "dbsize.h" + PG_MODULE_MAGIC; /* @@ -33,14 +36,16 @@ PG_MODULE_MAGIC; */ #define MAXPATHLEN_WITHSEGNO (MAXPGPATH + 12) -static int64 calculate_relation_size(Relation rel, ForkNumber forknum); +static int64 calculate_relation_size(Relation rel, ForkNumber forknum, bool softCalc); static int64 get_heap_storage_total_bytes(Relation rel, ForkNumber forknum, char *relpath); -static int64 get_ao_storage_total_bytes(Relation rel, char *relpath); +static int64 get_ao_storage_total_bytes(Relation rel, char *relpath, bool softCalc); static bool calculate_ao_storage_perSegFile(const int segno, void *ctx); static void fill_relation_seg_path(char *buf, int bufLen, const char *relpath, int segNo); static int64 calculate_toast_table_size(Oid toastrelid, ForkNumber forknum); +static int64 get_heap_storage_total_bytes_soft(char *relpath); +static bool calculate_ao_storage_perSegFile_soft(const int segno, void *ctx); /* * Structure used to accumulate the size of AO/CO relation from callback. @@ -75,7 +80,7 @@ adb_relation_storage_size(PG_FUNCTION_ARGS) if (relOid == 0 || rel->rd_node.relNode == 0) size = 0; else - size = calculate_relation_size(rel, forkNumber); + size = calculate_relation_size(rel, forkNumber, false); if (Gp_role == GP_ROLE_DISPATCH) { @@ -102,9 +107,14 @@ adb_relation_storage_size(PG_FUNCTION_ARGS) * dbsize.c. Thus calculation of size for heap/AO/CO relations is supported * (AO/CO relations don't have any extra forks, so only main fork is supported) * In other cases zero value is returned. + * + * softCalc parameter stands for indicator whether one can ignore stat() call + * errors. In this mode rel is not fully initialized and the lock on relation + * is not acquired. Therefore, the size calculation will be fuzzy since + * anything could happen with segment files. */ static int64 -calculate_relation_size(Relation rel, ForkNumber forknum) +calculate_relation_size(Relation rel, ForkNumber forknum, bool softCalc) { bool isAOMainFork = RelationIsAppendOptimized(rel) && forknum == MAIN_FORKNUM; @@ -114,9 +124,9 @@ calculate_relation_size(Relation rel, ForkNumber forknum) char *relpath = relpathbackend(rel->rd_node, rel->rd_backend, forknum); if (RelationIsHeap(rel)) - return get_heap_storage_total_bytes(rel, forknum, relpath); + return softCalc ? get_heap_storage_total_bytes_soft(relpath) : get_heap_storage_total_bytes(rel, forknum, relpath); - return get_ao_storage_total_bytes(rel, relpath); + return get_ao_storage_total_bytes(rel, relpath, softCalc); } static void @@ -163,7 +173,7 @@ static int64 calculate_toast_table_size(Oid toastrelid, ForkNumber forknum) { Relation toastRel = relation_open(toastrelid, AccessShareLock); - int64 size = calculate_relation_size(toastRel, forknum); + int64 size = calculate_relation_size(toastRel, forknum, false); relation_close(toastRel, AccessShareLock); return size; @@ -207,9 +217,14 @@ get_heap_storage_total_bytes(Relation rel, ForkNumber forknum, char *relpath) /* * Function calculates the size of AO/CO tables. + * + * softCalc parameter stands for indicator whether one can ignore stat() call + * errors. In this mode rel is not fully initialized and the lock on relation + * is not acquired. Therefore, the size calculation will be fuzzy since + * anything could happen with segment files. */ static int64 -get_ao_storage_total_bytes(Relation rel, char *relpath) +get_ao_storage_total_bytes(Relation rel, char *relpath, bool softCalc) { struct calculate_ao_storage_callback_ctx ctx = { .relfilenode_path = relpath, @@ -223,9 +238,17 @@ get_ao_storage_total_bytes(Relation rel, char *relpath) * operations (for ex: CTAS) zero segment will store tuples). Thus * calculate segno=0 manually. */ - (void) calculate_ao_storage_perSegFile(0, &ctx); + if (softCalc) + { + (void) calculate_ao_storage_perSegFile_soft(0, &ctx); + ao_foreach_extent_file(calculate_ao_storage_perSegFile_soft, &ctx); + } + else + { + (void) calculate_ao_storage_perSegFile(0, &ctx); + ao_foreach_extent_file(calculate_ao_storage_perSegFile, &ctx); + } - ao_foreach_extent_file(calculate_ao_storage_perSegFile, &ctx); return ctx.total_size; } @@ -370,3 +393,133 @@ Datum adb_hba_file_rules(PG_FUNCTION_ARGS) { return pg_hba_file_rules(fcinfo); } +/* + * Calculates relation size among all the forks. No lock is acquired on table. + * RelationData is partially initialized. Only necessary fields are taken from + * pg_class tuple to determine segment file location. + */ +int64 +dbsize_calc_size(Form_pg_class pg_class_data) +{ + RelationData rel = {0}; + int64 size = 0; + + /* + * Initialize Relfilenode field of RelationData. + */ + if (pg_class_data->reltablespace) + rel.rd_node.spcNode = pg_class_data->reltablespace; + else + rel.rd_node.spcNode = MyDatabaseTableSpace; + if (rel.rd_node.spcNode == GLOBALTABLESPACE_OID) + rel.rd_node.dbNode = InvalidOid; + else + rel.rd_node.dbNode = MyDatabaseId; + + if (pg_class_data->relfilenode) + rel.rd_node.relNode = pg_class_data->relfilenode; + + if (rel.rd_node.relNode == 0) + return size; + + rel.rd_rel = pg_class_data; + + /* + * Initialize BackendId field of RelationData. + */ + switch (rel.rd_rel->relpersistence) + { + case RELPERSISTENCE_UNLOGGED: + case RELPERSISTENCE_PERMANENT: + rel.rd_backend = InvalidBackendId; + rel.rd_islocaltemp = false; + break; + case RELPERSISTENCE_TEMP: + if (isTempOrToastNamespace(rel.rd_rel->relnamespace)) + { + rel.rd_backend = TempRelBackendId; + rel.rd_islocaltemp = true; + } + else + { + rel.rd_backend = TempRelBackendId; + rel.rd_islocaltemp = false; + } + break; + default: + elog(ERROR, "invalid relpersistence: %c", + rel.rd_rel->relpersistence); + break; + } + + size += calculate_relation_size(&rel, MAIN_FORKNUM, true); + size += calculate_relation_size(&rel, FSM_FORKNUM, true); + size += calculate_relation_size(&rel, VISIBILITYMAP_FORKNUM, true); + size += calculate_relation_size(&rel, INIT_FORKNUM, true); + + return size; +} + +/* + * Function calculates the size of heap tables. + * + * The errors of stat() call are ignored + */ +static int64 +get_heap_storage_total_bytes_soft(char *relpath) +{ + int64 totalsize = 0; + char segPath[MAXPATHLEN_WITHSEGNO]; + + /* + * Ordinary relation, including heap and index. They take form of + * relationpath, or relationpath.%d There will be no holes, therefore, we + * can stop when we reach the first non-existing file. + */ + for (int segno = 0;; segno++) + { + struct stat fst; + + CHECK_FOR_INTERRUPTS(); + + fill_relation_seg_path(segPath, MAXPATHLEN_WITHSEGNO, relpath, segno); + if (stat(segPath, &fst) < 0) + { + ereport(DEBUG1, (errcode_for_file_access(), + errmsg("[arenadata_toolkit] could not stat file %s: %m", segPath))); + break; + } + totalsize += fst.st_size; + } + + return totalsize; +} + +/* + * Function calculates the size of ao segment files. + * + * The errors of stat() call are ignored. + */ +static bool +calculate_ao_storage_perSegFile_soft(const int segno, void *ctx) +{ + struct stat fst; + char segPath[MAXPATHLEN_WITHSEGNO]; + struct calculate_ao_storage_callback_ctx *calcCtx = ctx; + + CHECK_FOR_INTERRUPTS(); + + fill_relation_seg_path(segPath, MAXPATHLEN_WITHSEGNO, + calcCtx->relfilenode_path, segno); + + if (stat(segPath, &fst) < 0) + { + ereport(DEBUG1, (errcode_for_file_access(), + errmsg("[arenadata_toolkit] could not access file %s: %m", segPath))); + return false; + } + else + calcCtx->total_size += fst.st_size; + + return true; +} diff --git a/gpcontrib/arenadata_toolkit/src/drops_track.c b/gpcontrib/arenadata_toolkit/src/drops_track.c new file mode 100644 index 000000000000..f1106e798f63 --- /dev/null +++ b/gpcontrib/arenadata_toolkit/src/drops_track.c @@ -0,0 +1,183 @@ +/* + * Track unlink hook events. + */ + +#include "drops_track.h" + +#include "lib/ilist.h" +#include "storage/lwlock.h" +#include "storage/ipc.h" +#include "storage/shmem.h" + +#include "arenadata_toolkit_guc.h" + +/* + * Drop track element. Stores just relfilenode + * and dbid. + */ +typedef struct +{ + Oid relNode; + Oid dbNode; +} track_relfilenode_t; + +/* Doubly linked list node of dropped file nodes */ +typedef struct +{ + dlist_node node; + track_relfilenode_t relfileNode; +} drops_track_node_t; + +/* Drops track */ +typedef struct +{ + dlist_head used_head; + dlist_head free_head; + uint32_t used_count; /* count of used nodes */ + char nodes[FLEXIBLE_ARRAY_MEMBER]; /* array of drops_track_node_t */ +} drops_track_t; + +static shmem_startup_hook_type next_shmem_startup_hook = NULL; +static drops_track_t *drops_track; +LWLock *drops_track_lock; + +static inline drops_track_node_t * +track_node_get(drops_track_t *track, int i) +{ + return (drops_track_node_t *) (track->nodes + i * sizeof(drops_track_node_t)); +} + +static Size +drops_track_calc_size() +{ + Size size; + + size = offsetof(drops_track_t, nodes); + size = add_size(size, mul_size(drops_count, sizeof(drops_track_node_t))); + + return size; +} + +static void +drops_track_hook(void) +{ + bool found; + Size size = drops_track_calc_size(); + + LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE); + + drops_track = ShmemInitStruct("adb_track_files_drops", size, &found); + + if (!found) + { + drops_track->used_count = 0; + dlist_init(&drops_track->used_head); + dlist_init(&drops_track->free_head); + + for (uint32_t i = 0; i < drops_count; i++) + { + drops_track_node_t *track_node = track_node_get(drops_track, i); + + track_node->relfileNode.relNode = InvalidOid; + track_node->relfileNode.dbNode = InvalidOid; + dlist_push_tail(&drops_track->free_head, &track_node->node); + } + } + + drops_track_lock = LWLockAssign(); + + LWLockRelease(AddinShmemInitLock); + + if (next_shmem_startup_hook) + next_shmem_startup_hook(); +} + +void +drops_track_init(void) +{ + RequestAddinLWLocks(1); + RequestAddinShmemSpace(drops_track_calc_size()); + + next_shmem_startup_hook = shmem_startup_hook; + shmem_startup_hook = drops_track_hook; +} + +void +drops_track_deinit(void) +{ + shmem_startup_hook = next_shmem_startup_hook; +} + +static drops_track_node_t * +get_free_node(void) +{ + if (dlist_is_empty(&drops_track->free_head)) + return NULL; + + return (drops_track_node_t *) dlist_pop_head_node(&drops_track->free_head); +} + +/* Add relNode to track. Old node is dropped if no space */ +void +drops_track_add(RelFileNode relfileNode) +{ + drops_track_node_t *track_node; + + LWLockAcquire(drops_track_lock, LW_EXCLUSIVE); + + if (drops_track->used_count >= drops_count) + { + track_node = (drops_track_node_t *) dlist_pop_head_node(&drops_track->used_head); + elog(DEBUG1, "No space for drop track. Oldest node removed (%d).", track_node->relfileNode.relNode); + } + else + { + track_node = get_free_node(); + drops_track->used_count++; + Assert(track_node); + } + + track_node->relfileNode.relNode = relfileNode.relNode; + track_node->relfileNode.dbNode = relfileNode.dbNode; + dlist_push_tail(&drops_track->used_head, &track_node->node); + + elog(DEBUG1, "added relNode %u for dbNode %u to drops track", + relfileNode.relNode, relfileNode.dbNode); + + LWLockRelease(drops_track_lock); +} + +/* Extract relfilenodes corresponding to specific db into separeate list */ +List * +drops_track_move(Oid dbid) +{ + List *oids = NIL; + dlist_mutable_iter iter; + + LWLockAcquire(drops_track_lock, LW_EXCLUSIVE); + + if (drops_track->used_count == 0) + { + LWLockRelease(drops_track_lock); + return NIL; + } + + dlist_foreach_modify(iter, &drops_track->used_head) + { + drops_track_node_t *track_node = (drops_track_node_t *) iter.cur; + + if (track_node->relfileNode.dbNode == dbid) + { + oids = lcons_oid(track_node->relfileNode.relNode, oids); + drops_track->used_count--; + track_node->relfileNode.relNode = InvalidOid; + track_node->relfileNode.dbNode = InvalidOid; + dlist_delete(&track_node->node); + dlist_push_tail(&drops_track->free_head, &track_node->node); + } + } + + LWLockRelease(drops_track_lock); + + return oids; +} diff --git a/gpcontrib/arenadata_toolkit/src/file_hook.c b/gpcontrib/arenadata_toolkit/src/file_hook.c new file mode 100644 index 000000000000..344678a7cfb8 --- /dev/null +++ b/gpcontrib/arenadata_toolkit/src/file_hook.c @@ -0,0 +1,155 @@ +/* + * File hooks to track events. + */ + +#include "file_hook.h" + +#include "postgres.h" +#include "storage/smgr.h" +#include "access/xact.h" +#include "catalog/namespace.h" +#include "utils/lsyscache.h" +#include "access/heapam.h" +#include "access/htup_details.h" +#include "catalog/objectaccess.h" + +#include "tf_shmem.h" +#include "arenadata_toolkit_guc.h" +#include "drops_track.h" + +static file_create_hook_type next_file_create_hook = NULL; +static file_extend_hook_type next_file_extend_hook = NULL; +static file_truncate_hook_type next_file_truncate_hook = NULL; +static file_unlink_hook_type next_file_unlink_hook = NULL; + +static bloom_t *non_committed_bloom = NULL; +static Oid non_committed_dbid = InvalidOid; + +static bool +is_file_node_trackable(RelFileNodeBackend *rnode) +{ + return !(rnode->node.dbNode == InvalidOid); +} + +static void +file_node_set(RelFileNodeBackend *rnode) +{ + if (!is_file_node_trackable(rnode)) + return; + + bloom_set_set(rnode->node.dbNode, rnode->node.relNode); +} + +/* + * 'create' events stored in local bloom and merged only on commit, when + * changes are already in catalog. + */ +static void +xact_end_create_callback(XactEvent event, void *arg) +{ + if (event != XACT_EVENT_COMMIT && event != XACT_EVENT_ABORT) + return; + + elog(DEBUG1, "xact_end_create_callback"); + + if (event == XACT_EVENT_COMMIT) + bloom_set_merge(non_committed_dbid, non_committed_bloom); + + pfree(non_committed_bloom); + non_committed_bloom = NULL; + non_committed_dbid = InvalidOid; +} + +/* + * Since we can't be sure that created rnode will be commited, the create events + * are stored in a separate bloom filter. + */ +static void +hook_create(RelFileNodeBackend rnode) +{ + if (next_file_create_hook) + next_file_create_hook(rnode); + + if (!is_file_node_trackable(&rnode)) + return; + + if (!non_committed_bloom) + { + non_committed_bloom = + MemoryContextAlloc(TopMemoryContext, full_bloom_size(bloom_size)); + bloom_init(bloom_size, non_committed_bloom); + non_committed_dbid = rnode.node.dbNode; + RegisterXactCallbackOnce(xact_end_create_callback, NULL); + } + + elog(DEBUG1, "hook_create: %d %d %d %d", + rnode.backend, rnode.node.dbNode, + rnode.node.spcNode, rnode.node.relNode); + + bloom_set_bits(non_committed_bloom, rnode.node.relNode); + +} + +static void +hook_extend(RelFileNodeBackend rnode) +{ + if (next_file_extend_hook) + next_file_extend_hook(rnode); + + elog(DEBUG1, "hook_extend: %d %d %d %d", + rnode.backend, rnode.node.dbNode, + rnode.node.spcNode, rnode.node.relNode); + + file_node_set(&rnode); +} + +static void +hook_truncate(RelFileNodeBackend rnode) +{ + if (next_file_truncate_hook) + next_file_truncate_hook(rnode); + + elog(DEBUG1, "hook_truncate: %d %d %d %d", + rnode.backend, rnode.node.dbNode, + rnode.node.spcNode, rnode.node.relNode); + + file_node_set(&rnode); +} + +static void +hook_unlink(RelFileNodeBackend rnode) +{ + if (next_file_unlink_hook) + next_file_unlink_hook(rnode); + + elog(DEBUG1, "hook_unlink: %d %d %d %d", + rnode.backend, rnode.node.dbNode, + rnode.node.spcNode, rnode.node.relNode); + + drops_track_add(rnode.node); +} + +void +file_hook_init() +{ + next_file_create_hook = file_create_hook; + file_create_hook = hook_create; + + next_file_extend_hook = file_extend_hook; + file_extend_hook = hook_extend; + + next_file_truncate_hook = file_truncate_hook; + file_truncate_hook = hook_truncate; + + next_file_unlink_hook = file_unlink_hook; + file_unlink_hook = hook_unlink; +} + +void +file_hook_deinit() +{ + file_create_hook = next_file_create_hook; + file_extend_hook = next_file_extend_hook; + file_truncate_hook = next_file_truncate_hook; + file_unlink_hook = next_file_unlink_hook; +} diff --git a/gpcontrib/arenadata_toolkit/src/include/arenadata_toolkit_guc.h b/gpcontrib/arenadata_toolkit/src/include/arenadata_toolkit_guc.h new file mode 100644 index 000000000000..e328ec4bcfa0 --- /dev/null +++ b/gpcontrib/arenadata_toolkit/src/include/arenadata_toolkit_guc.h @@ -0,0 +1,38 @@ +#ifndef ARENADATA_TOOLKIT_GUC_H +#define ARENADATA_TOOLKIT_GUC_H + +#include "postgres.h" + +#define DEFAULT_BLOOM_SIZE_BYTES 1048576 +#define DEFAULT_DB_TRACK_COUNT 5 +#define DEFAULT_IS_TRACKED false +#define DEFAULT_DROPS_COUNT 100000 +#define DEFAULT_TRACKED_SCHEMAS "public,arenadata_toolkit,pg_catalog,pg_toast,pg_aoseg,information_schema" +#define DEFAULT_GET_FULL_SNAPSHOT_ON_RECOVERY true +#define DEFAULT_TRACKED_REL_STORAGES "h,a,c" +#define DEFAULT_TRACKED_REL_KINDS "r,i,t,m,o,b,M" +#define DEFAULT_NAPTIME_SEC 60 + +#define MIN_BLOOM_SIZE_BYTES 64 +#define MIN_DB_TRACK_COUNT 1 +#define MIN_DROPS_COUNT 1 +#define MIN_NAPTIME_SEC 1 + +#define MAX_BLOOM_SIZE_BYTES 128000000 +#define MAX_DB_TRACK_COUNT 1000 +#define MAX_DROPS_COUNT 1000000 +#define MAX_NAPTIME_SEC (OID_MAX & 0x7FFFFFFF) + +extern int bloom_size; +extern int db_track_count; +extern int drops_count; +extern bool get_full_snapshot_on_recovery; +extern char *tracked_schemas; +extern char *tracked_rel_storages; +extern char *tracked_rel_kinds; +extern int tracking_worker_naptime_sec; + +void tf_guc_unlock(void); +void tf_guc_define(void); + +#endif /* ARENADATA_TOOLKIT_GUC_H */ diff --git a/gpcontrib/arenadata_toolkit/src/include/arenadata_toolkit_worker.h b/gpcontrib/arenadata_toolkit/src/include/arenadata_toolkit_worker.h new file mode 100644 index 000000000000..7481463c391d --- /dev/null +++ b/gpcontrib/arenadata_toolkit/src/include/arenadata_toolkit_worker.h @@ -0,0 +1,6 @@ +#ifndef ARENADATA_TOOLKIT_WORKER_H +#define ARENADATA_TOOLKIT_WORKER_H + +void arenadata_toolkit_worker_register(void); + +#endif /* ARENADATA_TOOLKIT_WORKER_H */ diff --git a/gpcontrib/arenadata_toolkit/src/include/bloom.h b/gpcontrib/arenadata_toolkit/src/include/bloom.h new file mode 100644 index 000000000000..2ac4ad7ef821 --- /dev/null +++ b/gpcontrib/arenadata_toolkit/src/include/bloom.h @@ -0,0 +1,40 @@ +#ifndef BLOOM_H +#define BLOOM_H + +#include "postgres.h" + +#include + +#define MAX_BLOOM_HASH_FUNCS 6 +#define TOTAL_ELEMENTS 10000000UL + +typedef struct +{ + uint8 *current_bloom; + uint32 size; /* equal to bloom_size, half a map size */ + uint8 is_set_all; /* indicates all bits are set */ + uint8 map[FLEXIBLE_ARRAY_MEMBER]; /* core bit array */ +} bloom_t; + +static inline Size +full_bloom_size(uint32 size) +{ + return (offsetof(bloom_t, map) + size); +} + +extern uint64 bloom_hash_seed; +extern int bloom_hash_num; + +void bloom_init(const uint32 bloom_size, bloom_t *bloom); +void init_bloom_invariants(void); +bool bloom_isset(bloom_t *bloom, Oid relnode); +void bloom_set_bits(bloom_t *bloom, Oid relnode); +void bloom_set_all(bloom_t *bloom); +void bloom_clear(bloom_t *bloom); +void bloom_merge(bloom_t *dst, bloom_t *src); +void bloom_copy(bloom_t *dst, bloom_t *src); +void bloom_switch_current(bloom_t *bloom); +uint8 *bloom_get_other(bloom_t *bloom); +void bloom_merge_internal(bloom_t *bloom); + +#endif /* BLOOM_H */ diff --git a/gpcontrib/arenadata_toolkit/src/include/bloom_set.h b/gpcontrib/arenadata_toolkit/src/include/bloom_set.h new file mode 100644 index 000000000000..35345e63e3aa --- /dev/null +++ b/gpcontrib/arenadata_toolkit/src/include/bloom_set.h @@ -0,0 +1,67 @@ +#ifndef BLOOM_SET_H +#define BLOOM_SET_H + +#include "postgres.h" +#include "storage/lwlock.h" +#include "port/atomics.h" + +#include "bloom.h" + +#define InvalidVersion ((uint32) 0) +#define ControlVersion ((uint32) 1) +#define StartVersion ((uint32) 2) + +/* Bloom set entry. */ +typedef struct +{ + Oid dbid; /* dbid of tracked database or InvalidOid */ + uint32 master_version; /* Auxiliary counter, which is sent from + * master to support transaction semantics */ + uint32 work_version; /* Auxiliary counter which represents current + * state of bloom filter */ + pg_atomic_flag capture_in_progress; /* indicates whether tracking + * acquisition is in progress */ + bloom_t bloom; /* bloom filter itself */ +} bloom_entry_t; + +/* Set of all allocated bloom filters*/ +typedef struct +{ + uint8 bloom_count; /* count of bloom_entry_t in bloom_entries */ + uint32 bloom_size; /* size of bloom filter */ + bloom_entry_t bloom_entries[FLEXIBLE_ARRAY_MEMBER]; /* array of + * bloom_entry_t */ +} bloom_set_t; + +typedef struct +{ + LWLock *set_lock; + LWLock *entry_lock; + bloom_entry_t *entry; +} bloom_op_ctx_t; + +/* + * Locks on each bloom_entry_t in bloom_set. + */ +typedef struct +{ + void *entry; /* It's a key that binds lock to bloom_entry */ + LWLock *lock; +} tf_entry_lock_t; + +extern LWLock *bloom_set_lock; +extern tf_entry_lock_t bloom_locks[]; + +Size bloom_set_required_size(uint32 size, int count); +void bloom_set_init(const uint32 bloom_count, const uint32 bloom_size); +bool bloom_set_bind(Oid dbid); +void bloom_set_unbind(Oid dbid); +void bloom_set_set(Oid dbid, Oid relNode); +bool bloom_set_merge(Oid dbid, bloom_t *from); +bool bloom_set_trigger_bits(Oid dbid, bool on); +bool bloom_set_is_all_bits_triggered(Oid dbid); +bloom_op_ctx_t bloom_set_get_entry(Oid dbid, LWLockMode s_mode, LWLockMode e_mode); +void bloom_set_release(bloom_op_ctx_t *ctx); +LWLock *LWLockAcquireEntry(Oid dbid, LWLockMode mode); + +#endif /* BLOOM_SET_H */ diff --git a/gpcontrib/arenadata_toolkit/src/include/dbsize.h b/gpcontrib/arenadata_toolkit/src/include/dbsize.h new file mode 100644 index 000000000000..bd6cdcebce76 --- /dev/null +++ b/gpcontrib/arenadata_toolkit/src/include/dbsize.h @@ -0,0 +1,8 @@ +#ifndef DBSIZE_H +#define DBSIZE_H + +#include "catalog/pg_class.h" + +int64 dbsize_calc_size(Form_pg_class relInfo); + +#endif /* DBSIZE_H */ diff --git a/gpcontrib/arenadata_toolkit/src/include/drops_track.h b/gpcontrib/arenadata_toolkit/src/include/drops_track.h new file mode 100644 index 000000000000..b52f5180dbe6 --- /dev/null +++ b/gpcontrib/arenadata_toolkit/src/include/drops_track.h @@ -0,0 +1,14 @@ +#ifndef DROPS_TRACK_H +#define DROPS_TRACK_H + +#include "postgres.h" +#include "nodes/pg_list.h" +#include "storage/relfilenode.h" + +void drops_track_init(void); +void drops_track_deinit(void); + +void drops_track_add(RelFileNode relNode); +List *drops_track_move(Oid dbid); + +#endif /* DROPS_TRACK_H */ diff --git a/gpcontrib/arenadata_toolkit/src/include/file_hook.h b/gpcontrib/arenadata_toolkit/src/include/file_hook.h new file mode 100644 index 000000000000..d5f4c3048b10 --- /dev/null +++ b/gpcontrib/arenadata_toolkit/src/include/file_hook.h @@ -0,0 +1,7 @@ +#ifndef FILE_HOOK_H +#define FILE_HOOK_H + +void file_hook_init(void); +void file_hook_deinit(void); + +#endif /* FILE_HOOK_H */ diff --git a/gpcontrib/arenadata_toolkit/src/include/hashimpl.h b/gpcontrib/arenadata_toolkit/src/include/hashimpl.h new file mode 100644 index 000000000000..0f3a08b98af7 --- /dev/null +++ b/gpcontrib/arenadata_toolkit/src/include/hashimpl.h @@ -0,0 +1,89 @@ +#ifndef HASHIMPL_H +#define HASHIMPL_H + +#include "c.h" + +#define _wyrot(x) (((x)>>32)|((x)<<32)) + +/* + * This is an adaptation of WyHash - a fast, modern non-cryptographic hash function. + * + * Originally designed by Wang Yi, whose hash implementation is published + * without license. + * + * Core components: + * _wymum: Implements multiplication-based mixing + * _wymix: Two-step mixing function + * - Combines _wymum multiplication with XOR operations + * - Enhances bit diffusion and avalanche properties + * wyhash: Main hashing function + * - Processes 32-bit keys with a seed value + * For Bloom filter implementation this hash is considered having + * better statistical properties than Postgres's default Jenkins hash. + */ +static inline void +_wymum(uint64 *A, uint64 *B) +{ +#if(SIZEOF_VOID_P < 8) + uint64 hh = (*A >> 32) * (*B >> 32), + hl = (*A >> 32) * (uint32) *B, + lh = (uint32) *A * (*B >> 32), + ll = (uint64) (uint32) *A * (uint32) *B; + + *A = _wyrot(hl) ^ hh; + *B = _wyrot(lh) ^ ll; +#elif defined(HAVE_INT128) + uint128 r = *A; + + r *= *B; + *A = (uint64) r; + *B = (uint64) (r >> 64); +#else + uint64 ha = *A >> 32, + hb = *B >> 32, + la = (uint32) *A, + lb = (uint32) *B, + hi, + lo; + uint64 rh = ha * hb, + rm0 = ha * lb, + rm1 = hb * la, + rl = la * lb, + t = rl + (rm0 << 32), + c = t < rl; + + lo = t + (rm1 << 32); + c += lo < t; + hi = rh + (rm0 >> 32) + (rm1 >> 32) + c; + *A = lo; + *B = hi; +#endif +} + +static inline uint64 _wymix(uint64 A, uint64 B) +{ + _wymum(&A, &B); + return A ^ B; +} + +static inline uint64 +wyhash(uint32 key, uint64 seed) +{ + seed ^= _wymix(seed ^ 0x2d358dccaa6c78a5ull, 0x8bb84b93962eacc9ull); +#if (WORDS_BIGENDIAN) +#if defined(HAVE__BUILTIN_BSWAP32) + key = __builtin_bswap32(key); +#else + key = (((key >> 24) & 0xff) | ((key >> 8) & 0xff00) | ((key << 8) & 0xff0000) | ((key << 24) & 0xff000000)); +#endif +#endif + uint64 a = ((uint64) key << 32) | key; + uint64 b = 0; + + a ^= 0x8bb84b93962eacc9ull; + b ^= seed; + _wymum(&a, &b); + return _wymix(a ^ 0x2d358dccaa6c78a5ull ^ 4, b ^ 0x8bb84b93962eacc9ull); +} + +#endif /* HASHIMPL_H */ diff --git a/gpcontrib/arenadata_toolkit/src/include/tf_shmem.h b/gpcontrib/arenadata_toolkit/src/include/tf_shmem.h new file mode 100644 index 000000000000..0f3d3eaf49b1 --- /dev/null +++ b/gpcontrib/arenadata_toolkit/src/include/tf_shmem.h @@ -0,0 +1,27 @@ +#ifndef TF_SHMEM_H +#define TF_SHMEM_H + +#include "storage/lwlock.h" +#include "port/atomics.h" + +#include "bloom_set.h" + +/* + * Stores the Bloom filter in shared memory. + * tracking_is_initialized - a flag indicating + * bgworker bound dbids at startup/recovery. + * + * bloom_set - set of db_track_count Bloom filters. + */ +typedef struct +{ + pg_atomic_flag tracking_is_initialized; + bloom_set_t bloom_set; +} tf_shared_state_t; + +extern tf_shared_state_t *tf_shared_state; + +void tf_shmem_init(void); +void tf_shmem_deinit(void); + +#endif /* TF_SHMEM_H */ diff --git a/gpcontrib/arenadata_toolkit/src/include/track_files.h b/gpcontrib/arenadata_toolkit/src/include/track_files.h new file mode 100644 index 000000000000..2c46a900f082 --- /dev/null +++ b/gpcontrib/arenadata_toolkit/src/include/track_files.h @@ -0,0 +1,7 @@ +#ifndef TRACK_FILES_H +#define TRACK_FILES_H + +void track_setup_executor_hooks(void); +void track_uninstall_executor_hooks(void); + +#endif /* TRACK_FILES_H */ diff --git a/gpcontrib/arenadata_toolkit/src/tf_shmem.c b/gpcontrib/arenadata_toolkit/src/tf_shmem.c new file mode 100644 index 000000000000..8286af2319fb --- /dev/null +++ b/gpcontrib/arenadata_toolkit/src/tf_shmem.c @@ -0,0 +1,65 @@ +#include "bloom_set.h" +#include "tf_shmem.h" + +#include "storage/ipc.h" +#include "storage/shmem.h" + +#include "arenadata_toolkit_guc.h" + +static shmem_startup_hook_type next_shmem_startup_hook = NULL; +tf_shared_state_t *tf_shared_state = NULL; + +static Size +tf_shmem_calc_size(void) +{ + Size size; + + size = offsetof(tf_shared_state_t, bloom_set); + size = add_size(size, bloom_set_required_size(bloom_size, db_track_count)); + + return size; +} + +static void +tf_shmem_hook(void) +{ + bool found; + Size size; + + size = tf_shmem_calc_size(); + + LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE); + + tf_shared_state = ShmemInitStruct("toolkit_track_files", size, &found); + + if (!found) + { + pg_atomic_init_flag(&tf_shared_state->tracking_is_initialized); + + bloom_set_init(db_track_count, bloom_size); + } + + LWLockRelease(AddinShmemInitLock); + + if (next_shmem_startup_hook) + next_shmem_startup_hook(); +} + +void +tf_shmem_init() +{ + /* + * tf_state_lock and bloom_set_lock locks plus one lock for each db entry. + */ + RequestAddinLWLocks(2 + db_track_count); + RequestAddinShmemSpace(tf_shmem_calc_size()); + + next_shmem_startup_hook = shmem_startup_hook; + shmem_startup_hook = tf_shmem_hook; +} + +void +tf_shmem_deinit(void) +{ + shmem_startup_hook = next_shmem_startup_hook; +} diff --git a/gpcontrib/arenadata_toolkit/src/track_files.c b/gpcontrib/arenadata_toolkit/src/track_files.c new file mode 100644 index 000000000000..638a84452ae4 --- /dev/null +++ b/gpcontrib/arenadata_toolkit/src/track_files.c @@ -0,0 +1,1502 @@ +#include "postgres.h" + + +#include "access/genam.h" +#include "access/xact.h" +#include "catalog/indexing.h" +#include "catalog/pg_namespace.h" +#include "catalog/pg_db_role_setting.h" +#include "cdb/cdbdisp_query.h" +#include "cdb/cdbdispatchresult.h" +#include "cdb/cdbvars.h" +#include "cdb/cdbutil.h" +#include "commands/dbcommands.h" +#include "commands/defrem.h" +#include "executor/executor.h" +#include "fmgr.h" +#include "funcapi.h" +#include "libpq-fe.h" +#include "miscadmin.h" +#include "storage/shmem.h" +#include "tcop/utility.h" +#include "utils/relcache.h" +#include "utils/builtins.h" +#include "utils/memutils.h" +#include "utils/fmgroids.h" +#include "utils/syscache.h" +#include "utils/lsyscache.h" + +#include "arenadata_toolkit_guc.h" +#include "arenadata_toolkit_worker.h" +#include "drops_track.h" +#include "dbsize.h" +#include "file_hook.h" +#include "tf_shmem.h" +#include "track_files.h" + +PG_FUNCTION_INFO_V1(tracking_register_db); +PG_FUNCTION_INFO_V1(tracking_unregister_db); +PG_FUNCTION_INFO_V1(tracking_set_snapshot_on_recovery); +PG_FUNCTION_INFO_V1(tracking_register_schema); +PG_FUNCTION_INFO_V1(tracking_unregister_schema); +PG_FUNCTION_INFO_V1(tracking_set_relkinds); +PG_FUNCTION_INFO_V1(tracking_set_relstorages); +PG_FUNCTION_INFO_V1(tracking_is_segment_initialized); +PG_FUNCTION_INFO_V1(tracking_trigger_initial_snapshot); +PG_FUNCTION_INFO_V1(tracking_is_initial_snapshot_triggered); +PG_FUNCTION_INFO_V1(tracking_get_track); +PG_FUNCTION_INFO_V1(tracking_track_version); + +/* + * Tuple description for result of tracking_get_track function. + */ +#define GET_TRACK_TUPDESC_LEN 9 +#define Anum_track_relid ((AttrNumber) 0) +#define Anum_track_name ((AttrNumber) 1) +#define Anum_track_relfilenode ((AttrNumber) 2) +#define Anum_track_size ((AttrNumber) 3) +#define Anum_track_state ((AttrNumber) 4) +#define Anum_track_gp_segment_id ((AttrNumber) 5) +#define Anum_track_gp_segment_relnamespace ((AttrNumber) 6) +#define Anum_track_gp_segment_relkind ((AttrNumber) 7) +#define Anum_track_gp_segment_relstorage ((AttrNumber) 8) + +/* + * Macros for string constants, which are used during work with GUCs + */ +#define TRACKING_SCHEMAS_PREFIX "arenadata_toolkit.tracking_schemas=" +#define TRACKING_RELSTORAGES_PREFIX "arenadata_toolkit.tracking_relstorages=" +#define TRACKING_RELKINDS_PREFIX "arenadata_toolkit.tracking_relkinds=" + +/* Preserved state among the calls of tracking_get_track */ +typedef struct +{ + Relation pg_class_rel; /* pg_class relation */ + SysScanDesc scan; /* for scans of system table */ +} tf_main_func_state_t; + +/* + * Main state during tracking_get_track_main call. Stores + * copy of shared Bloom and tracking filtering parameters. + */ +typedef struct +{ + bloom_t *bloom; /* local copy of shared bloom */ + + List *drops; /* drop list for current db */ + ListCell *next_drop; + uint64 relkinds; /* tracking relkinds */ + uint64 relstorages; /* tracking relstorages */ + List *schema_oids; /* tracking schemas */ +} tf_get_global_state_t; + +static tf_get_global_state_t tf_get_global_state = {0}; + +static bool callbackRegistered = false; +static uint32 CurrentVersion = InvalidVersion; + +static bool isExecutorExplainMode = false; +ProcessUtility_hook_type next_ProcessUtility_hook = NULL; +ExecutorEnd_hook_type next_ExecutorEnd_hook = NULL; + +static inline void +tf_check_shmem_error(void) +{ + if (tf_shared_state == NULL) + ereport(ERROR, + (errmsg("Failed to access shared memory due to wrong extension initialization"), + errhint("Load extension's code through shared_preload_library configuration"))); +} + +static inline Oid +get_dbid(Oid dbid) +{ + return (dbid == InvalidOid) ? MyDatabaseId : dbid; +} + +static uint32 +track_bump_version(uint32 ver) +{ + ver++; + if (ver == InvalidVersion || ver == ControlVersion) + return StartVersion; + + return ver; +} + +/* + * If transaction called tracking_track_version commits, we + * can bump the track version, what leads to consistency with + * state on segments. In case of abort version on master differs from + * segment's and during track acquisition the previous + * filter is used on segments. + */ +static void +xact_end_version_callback(XactEvent event, void *arg) +{ + bloom_op_ctx_t ctx = bloom_set_get_entry(MyDatabaseId, LW_SHARED, LW_EXCLUSIVE); + + if (ctx.entry) + { + if (event == XACT_EVENT_COMMIT) + ctx.entry->master_version = track_bump_version(ctx.entry->master_version); + pg_atomic_clear_flag(&ctx.entry->capture_in_progress); + } + + bloom_set_release(&ctx); + + callbackRegistered = false; + CurrentVersion = InvalidVersion; + isExecutorExplainMode = false; +} + +static void +xact_end_track_callback(XactEvent event, void *arg) +{ + tf_get_global_state.bloom = NULL; + tf_get_global_state.drops = NIL; + tf_get_global_state.next_drop = NULL; + tf_get_global_state.relkinds = 0; + tf_get_global_state.relstorages = 0; + tf_get_global_state.schema_oids = NIL; +} + +static List * +split_string_to_list(const char *input) +{ + List *result = NIL; + char *input_copy; + char *token; + + if (input == NULL) + return NIL; + + input_copy = pstrdup(input); + + token = strtok(input_copy, ","); + + while (token != NULL) + { + if (*token != '\0') + { + result = lappend(result, pstrdup(token)); + } + + token = strtok(NULL, ","); + } + + pfree(input_copy); + + return result; +} + +/* + * Tracked relkinds and relstorage types + * are coded into 64 bits via ascii offsets. + */ +static uint64 +list_to_bits(const char *input) +{ + char *input_copy; + char *token; + uint64 bits = 0; + + if (input == NULL) + return 0; + + input_copy = pstrdup(input); + + token = strtok(input_copy, ","); + + while (token != NULL) + { + if (*token != '\0') + bits |= (1ULL << (*token - 'A')); + + token = strtok(NULL, ","); + } + + pfree(input_copy); + + return bits; +} + +static void +get_filters_from_guc() +{ + Relation rel; + ScanKeyData skey[2]; + SysScanDesc scan; + HeapTuple tuple; + char *current_schemas = NULL; + char *current_relkinds = NULL; + char *current_relstorages = NULL; + List *schema_names = NIL; + ListCell *lc; + + rel = heap_open(DbRoleSettingRelationId, RowExclusiveLock); + ScanKeyInit(&skey[0], + Anum_pg_db_role_setting_setdatabase, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(MyDatabaseId)); + + /* + * Lookup for not role specific configuration + */ + ScanKeyInit(&skey[1], + Anum_pg_db_role_setting_setrole, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(InvalidOid)); + scan = systable_beginscan(rel, DbRoleSettingDatidRolidIndexId, true, NULL, 2, skey); + + tuple = systable_getnext(scan); + if (HeapTupleIsValid(tuple)) + { + bool isnull; + Datum str_datum; + + str_datum = heap_getattr(tuple, Anum_pg_db_role_setting_setconfig, + RelationGetDescr(rel), &isnull); + if (!isnull) + { + ArrayType *array; + Datum *elems = NULL; + bool *nulls = NULL; + int nelems; + + array = DatumGetArrayTypeP(str_datum); + deconstruct_array(array, TEXTOID, -1, false, 'i', + &elems, &nulls, &nelems); + for (int i = 0; i < nelems; i++) + { + if (nulls[i]) + continue; + + char *str = TextDatumGetCString(elems[i]); + + if (strncmp(str, + TRACKING_SCHEMAS_PREFIX, + sizeof(TRACKING_SCHEMAS_PREFIX) - 1) == 0) + { + current_schemas = pstrdup(str + sizeof(TRACKING_SCHEMAS_PREFIX) - 1); + } + else if (strncmp(str, + TRACKING_RELSTORAGES_PREFIX, + sizeof(TRACKING_RELSTORAGES_PREFIX) - 1) == 0) + { + current_relstorages = pstrdup(str + sizeof(TRACKING_RELSTORAGES_PREFIX) - 1); + } + else if (strncmp(str, + TRACKING_RELKINDS_PREFIX, + sizeof(TRACKING_RELKINDS_PREFIX) - 1) == 0) + { + current_relkinds = pstrdup(str + sizeof(TRACKING_RELKINDS_PREFIX) - 1); + } + + pfree(str); + } + + if (elems) + pfree(elems); + if (nulls) + pfree(nulls); + } + } + systable_endscan(scan); + heap_close(rel, RowExclusiveLock); + + if (current_schemas) + schema_names = split_string_to_list(current_schemas); + else + schema_names = split_string_to_list(DEFAULT_TRACKED_SCHEMAS); + if (current_relstorages) + tf_get_global_state.relstorages = list_to_bits(current_relstorages); + else + tf_get_global_state.relstorages = list_to_bits(DEFAULT_TRACKED_REL_STORAGES); + if (current_relkinds) + tf_get_global_state.relkinds = list_to_bits(current_relkinds); + else + tf_get_global_state.relkinds = list_to_bits(DEFAULT_TRACKED_REL_KINDS); + + foreach(lc, schema_names) + { + Oid nspOid; + char *name = (char *) lfirst(lc); + + nspOid = GetSysCacheOid1(NAMESPACENAME, CStringGetDatum(name)); + + if (!OidIsValid(nspOid)) + { + elog(DEBUG1, "[tracking_get_track] schema \"%s\" does not exist", name); + continue; + } + + tf_get_global_state.schema_oids = lappend_oid(tf_get_global_state.schema_oids, nspOid); + } + + if (schema_names) + pfree(schema_names); +} + + +static bool +schema_is_tracked(Oid schema) +{ + ListCell *lc; + + if (tf_get_global_state.schema_oids == NIL) + return false; + + foreach(lc, tf_get_global_state.schema_oids) + { + Oid tracked_schema = lfirst_oid(lc); + + if (tracked_schema == schema) + return true; + } + + return false; +} + +static bool +kind_is_tracked(char type, uint64 allowed_kinds) +{ + return (allowed_kinds & (1ULL << (type - 'A'))) != 0; +} + +/* + * Main function for relation size track acquisition. + */ +Datum +tracking_get_track(PG_FUNCTION_ARGS) +{ + FuncCallContext *funcctx; + tf_main_func_state_t *state; + HeapTuple result; + Datum datums[GET_TRACK_TUPDESC_LEN]; + bool nulls[GET_TRACK_TUPDESC_LEN] = {0}; + uint32 version = PG_GETARG_INT64(0); + + tf_check_shmem_error(); + + if (version == InvalidVersion) + ereport(ERROR, + (errmsg("Can't perform tracking for database %u properly due to internal error", MyDatabaseId))); + + if (SRF_IS_FIRSTCALL()) + { + MemoryContext oldcontext; + + funcctx = SRF_FIRSTCALL_INIT(); + + RegisterXactCallbackOnce(xact_end_track_callback, NULL); + + oldcontext = MemoryContextSwitchTo(CurTransactionContext); + + bloom_op_ctx_t bloom_ctx = bloom_set_get_entry(MyDatabaseId, LW_SHARED, LW_EXCLUSIVE); + + if (bloom_ctx.entry == NULL) + { + bloom_set_release(&bloom_ctx); + ereport(ERROR, + (errcode(ERRCODE_GP_COMMAND_ERROR), + errmsg("database %u is not tracked", MyDatabaseId), + errhint("Call 'arenadata_toolkit.tracking_register_db()'" + "to enable tracking"))); + } + + /* + * If current bloom's version differs from incoming, we suppose that + * the rollback of previous track acquisition have occured. In this + * situation we merge previous filter to current active filter. + * + * If the ControlVersion comes, it means that track is acquired + * several times in the same transaction. And the same filter is used + * in this situation. + */ + if (version != ControlVersion && version != bloom_ctx.entry->work_version) + { + bloom_merge_internal(&bloom_ctx.entry->bloom); + } + + /* + * This block handles 2 scenarios: + * 1. First track acquisition in transaction: + * - Copy current active bloom filter to local array. + * - Switch active bloom filter to preserve the state, which has just + * been copied. + * - Clear active filter. + * - Increment current version. + * 2. Subsequent track acquisition in same transaction (ControlVersion) + * - Temporarily switch to previous filter state + * - Copy switched bloom filter to local array + * - Switch back to active filter + * - Keep existing current version + */ + if (tf_get_global_state.bloom == NULL) + { + tf_get_global_state.bloom = palloc(full_bloom_size(bloom_size)); + bloom_init(bloom_size, tf_get_global_state.bloom); + + if (version == ControlVersion) + { + bloom_switch_current(&bloom_ctx.entry->bloom); + } + + bloom_copy(tf_get_global_state.bloom, &bloom_ctx.entry->bloom); + bloom_switch_current(&bloom_ctx.entry->bloom); + + if (version != ControlVersion) + { + bloom_clear(&bloom_ctx.entry->bloom); + bloom_ctx.entry->work_version = track_bump_version(version); + } + } + + bloom_set_release(&bloom_ctx); + + /* initial snapshot shouldn't return drops */ + if (!tf_get_global_state.bloom->is_set_all) + { + tf_get_global_state.drops = drops_track_move(MyDatabaseId); + tf_get_global_state.next_drop = list_head(tf_get_global_state.drops); + } + + /* + * Let's retrieve tracking information. + */ + get_filters_from_guc(); + + /* emit warning only at coordinator */ + if ((tf_get_global_state.relstorages == 0 || + tf_get_global_state.relkinds == 0 || + tf_get_global_state.schema_oids == NIL) && + IS_QUERY_DISPATCHER()) + ereport(WARNING, + (errmsg("One of the tracking parameters (schemas," + "relkinds, relstorages) for database %u is empty.", MyDatabaseId))); + + MemoryContextSwitchTo(oldcontext); + + oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + + funcctx->tuple_desc = CreateTemplateTupleDesc(GET_TRACK_TUPDESC_LEN, false); + TupleDescInitEntry(funcctx->tuple_desc, Anum_track_relid + 1, "relid", OIDOID, -1, 0); + TupleDescInitEntry(funcctx->tuple_desc, Anum_track_name + 1, "name", NAMEOID, -1, 0); + TupleDescInitEntry(funcctx->tuple_desc, Anum_track_relfilenode + 1, "relfilenode", OIDOID, -1, 0); + TupleDescInitEntry(funcctx->tuple_desc, Anum_track_size + 1, "size", INT8OID, -1, 0); + TupleDescInitEntry(funcctx->tuple_desc, Anum_track_state + 1, "state", CHAROID, -1, 0); + TupleDescInitEntry(funcctx->tuple_desc, Anum_track_gp_segment_id + 1, "gp_segment_id", INT4OID, -1, 0); + TupleDescInitEntry(funcctx->tuple_desc, Anum_track_gp_segment_relnamespace + 1, "relnamespace", OIDOID, -1, 0); + TupleDescInitEntry(funcctx->tuple_desc, Anum_track_gp_segment_relkind + 1, "relkind", CHAROID, -1, 0); + TupleDescInitEntry(funcctx->tuple_desc, Anum_track_gp_segment_relstorage + 1, "relstorage", CHAROID, -1, 0); + funcctx->tuple_desc = BlessTupleDesc(funcctx->tuple_desc); + + state = (tf_main_func_state_t *) palloc0(sizeof(tf_main_func_state_t)); + funcctx->user_fctx = (void *) state; + + state->pg_class_rel = heap_open(RelationRelationId, AccessShareLock); + state->scan = systable_beginscan(state->pg_class_rel, InvalidOid, false, NULL, 0, NULL); + + MemoryContextSwitchTo(oldcontext); + } + + funcctx = SRF_PERCALL_SETUP(); + state = funcctx->user_fctx; + + while (true) + { + Oid filenode; + Oid relnamespace; + char relkind; + char relstorage; + HeapTuple pg_class_tuple; + Form_pg_class relp; + int64 size; + + if (!state->scan) + break; + + pg_class_tuple = systable_getnext(state->scan); + + if (!HeapTupleIsValid(pg_class_tuple)) + { + systable_endscan(state->scan); + heap_close(state->pg_class_rel, AccessShareLock); + state->scan = NULL; + state->pg_class_rel = NULL; + break; + } + + datums[Anum_track_gp_segment_relkind] = heap_getattr(pg_class_tuple, + Anum_pg_class_relkind, + RelationGetDescr(state->pg_class_rel), + &nulls[Anum_track_gp_segment_relkind]); + relkind = DatumGetChar(datums[Anum_track_gp_segment_relkind]); + + if (!kind_is_tracked(relkind, tf_get_global_state.relkinds)) + continue; + + datums[Anum_track_gp_segment_relstorage] = heap_getattr(pg_class_tuple, + Anum_pg_class_relstorage, + RelationGetDescr(state->pg_class_rel), + &nulls[Anum_track_gp_segment_relstorage]); + relstorage = DatumGetChar(datums[Anum_track_gp_segment_relstorage]); + + if (!kind_is_tracked(relstorage, tf_get_global_state.relstorages)) + continue; + + datums[Anum_track_gp_segment_relnamespace] = heap_getattr(pg_class_tuple, + Anum_pg_class_relnamespace, + RelationGetDescr(state->pg_class_rel), + &nulls[Anum_track_gp_segment_relnamespace]); + relnamespace = DatumGetObjectId(datums[Anum_track_gp_segment_relnamespace]); + + if (!schema_is_tracked(relnamespace)) + continue; + + datums[Anum_track_relid] = ObjectIdGetDatum(HeapTupleGetOid(pg_class_tuple)); + + datums[Anum_track_name] = heap_getattr(pg_class_tuple, + Anum_pg_class_relname, + RelationGetDescr(state->pg_class_rel), + &nulls[Anum_track_name]); + + datums[Anum_track_relfilenode] = heap_getattr(pg_class_tuple, + Anum_pg_class_relfilenode, + RelationGetDescr(state->pg_class_rel), + &nulls[Anum_track_relfilenode]); + filenode = DatumGetObjectId(datums[Anum_track_relfilenode]); + + if (nulls[Anum_track_relfilenode]) + continue; + + /* Bloom filter check */ + if (!bloom_isset(tf_get_global_state.bloom, filenode)) + continue; + + relp = (Form_pg_class) GETSTRUCT(pg_class_tuple); + size = dbsize_calc_size(relp); + datums[Anum_track_size] = Int64GetDatum(size); + datums[Anum_track_state] = CharGetDatum(tf_get_global_state.bloom->is_set_all ? 'i' : 'a'); + datums[Anum_track_gp_segment_id] = Int32GetDatum(GpIdentity.segindex); + + result = heap_form_tuple(funcctx->tuple_desc, datums, nulls); + + SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(result)); + } + + while (true) + { + Oid filenode; + + if (!tf_get_global_state.next_drop) + break; + + filenode = lfirst_oid(tf_get_global_state.next_drop); + tf_get_global_state.next_drop = lnext(tf_get_global_state.next_drop); + + nulls[Anum_track_relid] = true; + nulls[Anum_track_name] = true; + datums[Anum_track_relfilenode] = filenode; + datums[Anum_track_size] = Int64GetDatum(0); + datums[Anum_track_state] = CharGetDatum('d'); + datums[Anum_track_gp_segment_id] = Int32GetDatum(GpIdentity.segindex); + nulls[Anum_track_gp_segment_relnamespace] = true; + nulls[Anum_track_gp_segment_relkind] = true; + nulls[Anum_track_gp_segment_relstorage] = true; + + result = heap_form_tuple(funcctx->tuple_desc, datums, nulls); + + SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(result)); + } + + if (tf_get_global_state.bloom) + { + pfree(tf_get_global_state.bloom); + tf_get_global_state.bloom = NULL; + } + + if (tf_get_global_state.schema_oids) + { + pfree(tf_get_global_state.schema_oids); + tf_get_global_state.schema_oids = NIL; + } + + SRF_RETURN_DONE(funcctx); +} + +static void +track_db(Oid dbid, bool reg) +{ + if (Gp_role == GP_ROLE_DISPATCH) + { + AlterDatabaseSetStmt stmt; + VariableSetStmt v_stmt; + A_Const aconst = + {.type = T_A_Const,.val = {.type = T_String,.val.str = reg ? "t" : "f"}}; + + stmt.type = T_AlterDatabaseSetStmt; + stmt.dbname = get_database_name(dbid); + + if (stmt.dbname == NULL) + ereport(ERROR, + (errmsg("[arenadata_toolkit] database %u does not exist", dbid))); + + stmt.setstmt = &v_stmt; + + v_stmt.type = T_VariableSetStmt; + v_stmt.kind = VAR_SET_VALUE; + v_stmt.name = "arenadata_toolkit.tracking_is_db_tracked"; + v_stmt.args = lappend(NIL, &aconst); + v_stmt.is_local = false; + + tf_guc_unlock(); + + AlterDatabaseSet(&stmt); + + tf_guc_unlock(); + /* Will set the GUC in caller session only on coordinator */ + SetConfigOption("arenadata_toolkit.tracking_is_db_tracked", reg ? "t" : "f", + PGC_SUSET, PGC_S_DATABASE); + } + + if (!reg) + bloom_set_unbind(dbid); + else if (!bloom_set_bind(dbid)) + ereport(ERROR, + (errmsg("[arenadata_toolkit] exceeded maximum number of tracked databases"))); +} + +static bool +is_initialized() +{ + CdbPgResults cdb_pgresults = {NULL, 0}; + bool all_inited = true; + + if (pg_atomic_unlocked_test_flag(&tf_shared_state->tracking_is_initialized)) + return false; + + CdbDispatchCommand("select * from arenadata_toolkit.tracking_is_segment_initialized()", 0, &cdb_pgresults); + + for (int i = 0; i < cdb_pgresults.numResults; i++) + { + struct pg_result *pgresult = cdb_pgresults.pg_results[i]; + + if (PQresultStatus(pgresult) != PGRES_TUPLES_OK) + { + cdbdisp_clearCdbPgResults(&cdb_pgresults); + ereport(ERROR, + (errmsg("Failed to check segments status"))); + } + else + { + int32 segindex = 0; + bool is_initialized = false; + + segindex = atoi(PQgetvalue(pgresult, 0, 0)); + is_initialized = strcmp(PQgetvalue(pgresult, 0, 1), "t") == 0; + + elog(LOG, "[arenadata_toolkit] tracking_register_db initialization check" + " segindex: %d, is_initialized: %d", segindex, is_initialized); + + if (!is_initialized) + { + all_inited = false; + break; + } + } + } + + if (cdb_pgresults.numResults > 0) + cdbdisp_clearCdbPgResults(&cdb_pgresults); + + return all_inited; +} + +/* + * Registers current (if dbid is 0) or specific database as tracked by arenadata_toolkit tables tracking. + * Dispatches call to segments by itself. Binds a bloom filter to the registered database if possible. + */ +Datum +tracking_register_db(PG_FUNCTION_ARGS) +{ + Oid dbid = get_dbid(PG_GETARG_OID(0)); + + tf_check_shmem_error(); + + if (Gp_role != GP_ROLE_DISPATCH && IS_QUERY_DISPATCHER()) + { + ereport(ERROR, + (errmsg("Cannot execute tracking_register_db outside query dispatcher"))); + } + + if (Gp_role == GP_ROLE_DISPATCH && !is_initialized()) + ereport(ERROR, + (errmsg("[arenadata_toolkit] Cannot register database before workers initialize tracking"), + errhint("Wait arenadata_toolkit.tracking_worker_naptime_sec and try again"))); + + elog(LOG, "[arenadata_toolkit] registering database %u for tracking", dbid); + + track_db(dbid, true); + + if (Gp_role == GP_ROLE_DISPATCH) + { + char *cmd = + psprintf("select arenadata_toolkit.tracking_register_db(%u)", dbid); + + CdbDispatchCommand(cmd, 0, NULL); + + pfree(cmd); + } + + PG_RETURN_BOOL(true); +} + +/* + * Stop tracking given database and unbind from bloom. + */ +Datum +tracking_unregister_db(PG_FUNCTION_ARGS) +{ + Oid dbid = get_dbid(PG_GETARG_OID(0)); + + tf_check_shmem_error(); + + if (Gp_role != GP_ROLE_DISPATCH && IS_QUERY_DISPATCHER()) + { + ereport(ERROR, + (errmsg("Cannot execute tracking_unregister_db outside query dispatcher"))); + } + + if (Gp_role == GP_ROLE_DISPATCH && !is_initialized()) + ereport(ERROR, + (errmsg("[arenadata_toolkit] Cannot unregister database before workers initialize tracking"), + errhint("Wait arenadata_toolkit.tracking_worker_naptime_sec and try again"))); + + elog(LOG, "[arenadata_toolkit] unregistering database %u from tracking", dbid); + + track_db(dbid, false); + + if (Gp_role == GP_ROLE_DISPATCH) + { + char *cmd = + psprintf("select arenadata_toolkit.tracking_unregister_db(%u)", dbid); + + CdbDispatchCommand(cmd, 0, NULL); + + pfree(cmd); + } + + PG_RETURN_BOOL(true); +} + +Datum +tracking_set_snapshot_on_recovery(PG_FUNCTION_ARGS) +{ + bool set = PG_GETARG_BOOL(0); + Oid dbid = get_dbid(PG_GETARG_OID(1)); + + if (Gp_role != GP_ROLE_DISPATCH) + { + ereport(ERROR, + (errmsg("Cannot execute tracking_set_snapshot_on_recovery outside query dispatcher"))); + } + + A_Const aconst = + {.type = T_A_Const,.val = {.type = T_String,.val.str = set ? "t" : "f"}}; + + AlterDatabaseSetStmt stmt; + VariableSetStmt v_stmt; + + stmt.type = T_AlterDatabaseSetStmt; + stmt.dbname = get_database_name(dbid); + stmt.setstmt = &v_stmt; + + if (stmt.dbname == NULL) + ereport(ERROR, + (errmsg("[arenadata_toolkit] database %u does not exist", dbid))); + + v_stmt.type = T_VariableSetStmt; + v_stmt.kind = VAR_SET_VALUE; + v_stmt.name = "arenadata_toolkit.tracking_snapshot_on_recovery"; + v_stmt.args = lappend(NIL, &aconst); + v_stmt.is_local = false; + + tf_guc_unlock(); + + AlterDatabaseSet(&stmt); + + /* Will set the GUC in caller session only on coordinator */ + tf_guc_unlock(); + SetConfigOption("arenadata_toolkit.tracking_snapshot_on_recovery", set ? "t" : "f", + PGC_SUSET, PGC_S_DATABASE); + + PG_RETURN_BOOL(true); +} + +/* Helper function to add or remove schema from configuration string */ +static char * +add_or_remove_schema(const char *schema_string, const char *schemaName, bool add) +{ + StringInfoData buf; + char *token; + char *str; + bool found = false; + + initStringInfo(&buf); + + /* + * consider NULL value as a need for applying operation + * to default schema set + */ + if (schema_string == NULL) + { + schema_string = DEFAULT_TRACKED_SCHEMAS; + } + + /* + * If string is empty, we can only add + */ + if (schema_string[0] == '\0' && !add) + { + pfree(buf.data); + return NULL; + } + + if (schema_string && schema_string[0] != '\0') + { + str = pstrdup(schema_string); + token = strtok(str, ","); + while (token != NULL) + { + if (strcmp(token, schemaName) == 0) + { + found = true; + if (add) + { + appendStringInfo(&buf, "%s,", token); + } + } + else + { + appendStringInfo(&buf, "%s,", token); + } + token = strtok(NULL, ","); + } + pfree(str); + } + + if (add && !found) + { + appendStringInfo(&buf, "%s,", schemaName); + } + + if (buf.len > 0 && buf.data[buf.len - 1] == ',') + { + buf.data[buf.len - 1] = '\0'; + buf.len--; + } + + if (buf.len == 0) + { + pfree(buf.data); + return NULL; + } + + return buf.data; +} + +static void +track_schema(const char *schemaName, Oid dbid, bool reg) +{ + Relation rel; + ScanKeyData skey[2]; + SysScanDesc scan; + HeapTuple tuple; + char *current_schemas = NULL; + char *new_schemas = NULL; + AlterDatabaseSetStmt stmt; + VariableSetStmt v_stmt; + A_Const arg; + + rel = heap_open(DbRoleSettingRelationId, RowExclusiveLock); + ScanKeyInit(&skey[0], + Anum_pg_db_role_setting_setdatabase, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(dbid)); + + /* + * Lookup for not role specific configuration + */ + ScanKeyInit(&skey[1], + Anum_pg_db_role_setting_setrole, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(InvalidOid)); + scan = systable_beginscan(rel, DbRoleSettingDatidRolidIndexId, true, NULL, 2, skey); + + tuple = systable_getnext(scan); + if (HeapTupleIsValid(tuple)) + { + bool isnull; + Datum str_datum; + + str_datum = heap_getattr(tuple, Anum_pg_db_role_setting_setconfig, + RelationGetDescr(rel), &isnull); + if (!isnull) + { + ArrayType *array; + Datum *elems = NULL; + int nelems; + + array = DatumGetArrayTypeP(str_datum); + deconstruct_array(array, TEXTOID, -1, false, 'i', + &elems, NULL, &nelems); + for (int i = 0; i < nelems; i++) + { + char *str = TextDatumGetCString(elems[i]); + + if (strncmp(str, TRACKING_SCHEMAS_PREFIX, + sizeof(TRACKING_SCHEMAS_PREFIX) - 1) == 0) + { + current_schemas = pstrdup(str + sizeof(TRACKING_SCHEMAS_PREFIX) - 1); + break; + } + pfree(str); + } + + if (elems) + pfree(elems); + } + } + systable_endscan(scan); + heap_close(rel, RowExclusiveLock); + + new_schemas = add_or_remove_schema(current_schemas, schemaName, reg); + + stmt.type = T_AlterDatabaseSetStmt; + stmt.dbname = get_database_name(dbid); + + if (stmt.dbname == NULL) + ereport(ERROR, + (errmsg("[arenadata_toolkit] database %u does not exist", dbid))); + + stmt.setstmt = &v_stmt; + + v_stmt.type = T_VariableSetStmt; + v_stmt.name = "arenadata_toolkit.tracking_schemas"; + v_stmt.is_local = false; + + arg.type = T_A_Const; + arg.val.type = T_String; + arg.val.val.str = new_schemas; + arg.location = -1; + + if (new_schemas == NULL) + { + /* + * If new_schemas is NULL, we're removing the last schema, that should + * lead to empty result set during track acquisition. But we anyway + * need to store an empty string to distinguish state when the GUC has + * default value and when the get_track() is supposed to + * filter out all schemas. + */ + arg.val.val.str = pstrdup(""); + } + + v_stmt.kind = VAR_SET_VALUE; + v_stmt.args = list_make1(&arg); + + tf_guc_unlock(); + + AlterDatabaseSet(&stmt); + + /* Will set the GUC in caller session only on coordinator */ + tf_guc_unlock(); + SetConfigOption("arenadata_toolkit.tracking_schemas", + new_schemas ? new_schemas : "", + PGC_SUSET, PGC_S_DATABASE); + + if (current_schemas) + pfree(current_schemas); + if (new_schemas) + pfree(new_schemas); +} + +Datum +tracking_register_schema(PG_FUNCTION_ARGS) +{ + const char *schema_name = NameStr(*PG_GETARG_NAME(0)); + Oid dbid = get_dbid(PG_GETARG_OID(1)); + + if (Gp_role != GP_ROLE_DISPATCH) + ereport(ERROR, + (errmsg("Cannot execute tracking_register_schema outside query dispatcher"))); + + if (schema_name == NULL) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_SCHEMA), + errmsg("schema does not exist"))); + + if (!SearchSysCacheExists1(NAMESPACENAME, CStringGetDatum(schema_name))) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_SCHEMA), + errmsg("schema %s does not exist", schema_name))); + + elog(LOG, "[arenadata_toolkit] registering schema %s in database %u for tracking", schema_name, dbid); + + track_schema(schema_name, dbid, true); + + PG_RETURN_BOOL(true); +} + +Datum +tracking_unregister_schema(PG_FUNCTION_ARGS) +{ + const char *schema_name = NameStr(*PG_GETARG_NAME(0)); + Oid dbid = get_dbid(PG_GETARG_OID(1)); + + if (Gp_role != GP_ROLE_DISPATCH) + { + ereport(ERROR, + (errmsg("Cannot execute tracking_unregister_schema outside query dispatcher"))); + } + + if (schema_name == NULL) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_SCHEMA), + errmsg("schema does not exist"))); + + if (!SearchSysCacheExists1(NAMESPACENAME, CStringGetDatum(schema_name))) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_SCHEMA), + errmsg("schema with OID %s does not exist", schema_name))); + + elog(LOG, "[arenadata_toolkit] registering schema %s in database %u for tracking", schema_name, dbid); + + track_schema(schema_name, dbid, false); + + PG_RETURN_BOOL(true); +} + +static bool +is_valid_relkind(char relkind) +{ + switch (relkind) + { + case RELKIND_RELATION: + case RELKIND_INDEX: + case RELKIND_SEQUENCE: + case RELKIND_TOASTVALUE: + case RELKIND_VIEW: + case RELKIND_COMPOSITE_TYPE: + case RELKIND_FOREIGN_TABLE: + case RELKIND_UNCATALOGED: + case RELKIND_MATVIEW: + case RELKIND_AOSEGMENTS: + case RELKIND_AOBLOCKDIR: + case RELKIND_AOVISIMAP: + return true; + default: + return false; + } +} + +Datum +tracking_set_relkinds(PG_FUNCTION_ARGS) +{ + char *relkinds_str = NameStr(*PG_GETARG_NAME(0)); + Oid dbid = get_dbid(PG_GETARG_OID(1)); + char *token; + char *str_copy; + bool seen_relkinds[256] = {false}; + StringInfoData buf; + AlterDatabaseSetStmt stmt; + VariableSetStmt v_stmt; + A_Const arg; + + if (Gp_role != GP_ROLE_DISPATCH) + { + ereport(ERROR, + (errmsg("Cannot execute tracking_set_relkinds outside query dispatcher"))); + } + + if (relkinds_str == NULL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("relkinds argument cannot be NULL"))); + + initStringInfo(&buf); + str_copy = pstrdup(relkinds_str); + token = strtok(str_copy, ","); + while (token != NULL) + { + if (strlen(token) != 1 || !is_valid_relkind(token[0])) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Invalid relkind: %s", token), + errhint("Valid relkinds are: 'r', 'i', 'S', 't', 'v', 'c', 'f', 'u', 'm', 'o', 'b', 'M'"))); + + if (!seen_relkinds[(unsigned char) token[0]]) + { + appendStringInfoChar(&buf, token[0]); + appendStringInfoChar(&buf, ','); + seen_relkinds[(unsigned char) token[0]] = true; + } + token = strtok(NULL, ","); + } + pfree(str_copy); + + stmt.type = T_AlterDatabaseSetStmt; + stmt.dbname = get_database_name(dbid); + stmt.setstmt = &v_stmt; + + if (stmt.dbname == NULL) + ereport(ERROR, + (errmsg("[arenadata_toolkit] database %u does not exist", dbid))); + + v_stmt.type = T_VariableSetStmt; + v_stmt.name = "arenadata_toolkit.tracking_relkinds"; + v_stmt.is_local = false; + + arg.type = T_A_Const; + arg.val.type = T_String; + arg.val.val.str = buf.data; + arg.location = -1; + + if (buf.len > 0 && buf.data[buf.len - 1] == ',') + { + buf.data[buf.len - 1] = '\0'; + buf.len--; + } + + v_stmt.kind = VAR_SET_VALUE; + v_stmt.args = list_make1(&arg); + elog(LOG, "[arenadata_toolkit] setting relkinds %s in database %u for tracking", buf.data, dbid); + + tf_guc_unlock(); + + AlterDatabaseSet(&stmt); + + /* Will set the GUC in caller session only on coordinator */ + tf_guc_unlock(); + SetConfigOption("arenadata_toolkit.tracking_relkinds", + buf.data, + PGC_SUSET, PGC_S_DATABASE); + + pfree(buf.data); + + PG_RETURN_BOOL(true); +} + +static bool +is_valid_relstorage(char relstorage) +{ + switch (relstorage) + { + case RELSTORAGE_HEAP: + case RELSTORAGE_AOROWS: + case RELSTORAGE_AOCOLS: + case RELSTORAGE_EXTERNAL: + case RELSTORAGE_VIRTUAL: + case RELSTORAGE_FOREIGN: + return true; + default: + return false; + } +} + +Datum +tracking_set_relstorages(PG_FUNCTION_ARGS) +{ + char *relstorages_str = NameStr(*PG_GETARG_NAME(0)); + Oid dbid = get_dbid(PG_GETARG_OID(1)); + char *token; + char *str_copy; + bool seen_relstorages[256] = {false}; + StringInfoData buf; + AlterDatabaseSetStmt stmt; + VariableSetStmt v_stmt; + A_Const arg; + + if (Gp_role != GP_ROLE_DISPATCH) + { + ereport(ERROR, + (errmsg("Cannot execute tracking_set_relstorages outside query dispatcher"))); + } + + if (relstorages_str == NULL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("relstorages argument cannot be NULL"))); + + initStringInfo(&buf); + str_copy = pstrdup(relstorages_str); + token = strtok(str_copy, ","); + while (token != NULL) + { + if (strlen(token) != 1 || !is_valid_relstorage(token[0])) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("Invalid relstorage type: %s", token), + errhint("Valid relstorages are: 'h', 'x', 'a', 'v', 'c', 'f'"))); + + if (!seen_relstorages[(unsigned char) token[0]]) + { + appendStringInfoChar(&buf, token[0]); + appendStringInfoChar(&buf, ','); + seen_relstorages[(unsigned char) token[0]] = true; + } + token = strtok(NULL, ","); + } + pfree(str_copy); + + stmt.type = T_AlterDatabaseSetStmt; + stmt.dbname = get_database_name(dbid); + + if (stmt.dbname == NULL) + ereport(ERROR, + (errmsg("[arenadata_toolkit] database %u does not exist", dbid))); + + stmt.setstmt = &v_stmt; + + v_stmt.type = T_VariableSetStmt; + v_stmt.name = "arenadata_toolkit.tracking_relstorages"; + v_stmt.is_local = false; + + arg.type = T_A_Const; + arg.val.type = T_String; + arg.val.val.str = buf.data; + arg.location = -1; + + if (buf.len > 0 && buf.data[buf.len - 1] == ',') + { + buf.data[buf.len - 1] = '\0'; + buf.len--; + } + + v_stmt.kind = VAR_SET_VALUE; + v_stmt.args = list_make1(&arg); + elog(LOG, "[arenadata_toolkit] setting relstorages %s in database %u for tracking", buf.data, dbid); + + tf_guc_unlock(); + + AlterDatabaseSet(&stmt); + + /* Will set the GUC in caller session only on coordinator */ + tf_guc_unlock(); + SetConfigOption("arenadata_toolkit.tracking_relstorages", + buf.data, + PGC_SUSET, PGC_S_DATABASE); + + pfree(buf.data); + + PG_RETURN_BOOL(true); +} + +Datum +tracking_trigger_initial_snapshot(PG_FUNCTION_ARGS) +{ + Oid dbid = get_dbid(PG_GETARG_OID(0)); + bloom_op_ctx_t ctx = {0}; + + tf_check_shmem_error(); + + if (Gp_role != GP_ROLE_DISPATCH && IS_QUERY_DISPATCHER()) + { + ereport(ERROR, + (errmsg("Cannot execute tracking_trigger_initial_snapshot outside query dispatcher"))); + } + + elog(LOG, "[arenadata_toolkit] tracking_trigger_initial_snapshot dbid: %u", dbid); + + ctx = bloom_set_get_entry(MyDatabaseId, LW_SHARED, LW_EXCLUSIVE); + + if (!ctx.entry) + { + bloom_set_release(&ctx); + ereport(ERROR, + (errmsg("Failed to find corresponding filter to database %u", dbid))); + } + + if (Gp_role == GP_ROLE_DISPATCH && !pg_atomic_unlocked_test_flag(&ctx.entry->capture_in_progress)) + { + bloom_set_release(&ctx); + ereport(ERROR, + (errmsg("Cannot modify track during track acquisition %u", dbid))); + } + + bloom_set_all(&ctx.entry->bloom); + bloom_set_release(&ctx); + + if (Gp_role == GP_ROLE_DISPATCH) + { + char *cmd = psprintf("select arenadata_toolkit.tracking_trigger_initial_snapshot(%u)", dbid); + + CdbDispatchCommand(cmd, 0, NULL); + } + + PG_RETURN_BOOL(true); +} + +Datum +tracking_is_initial_snapshot_triggered(PG_FUNCTION_ARGS) +{ + Oid dbid = get_dbid(PG_GETARG_OID(0)); + bool is_triggered = false; + + tf_check_shmem_error(); + + is_triggered = bloom_set_is_all_bits_triggered(dbid); + + elog(LOG, "[arenadata_toolkit] is_initial_snapshot_triggered:%d dbid: %u", is_triggered, dbid); + + PG_RETURN_BOOL(is_triggered); +} + +Datum +tracking_is_segment_initialized(PG_FUNCTION_ARGS) +{ + ReturnSetInfo *rsi; + TupleDesc tupdesc; + HeapTuple tuple; + Datum values[2]; + bool nulls[2] = {false, false}; + Datum result; + + tf_check_shmem_error(); + + rsi = (ReturnSetInfo *) fcinfo->resultinfo; + tupdesc = rsi->expectedDesc; + + /* Populate an output tuple. */ + values[0] = Int32GetDatum(GpIdentity.segindex); + values[1] = BoolGetDatum(pg_atomic_unlocked_test_flag(&tf_shared_state->tracking_is_initialized) == false); + + tuple = heap_form_tuple(tupdesc, values, nulls); + result = HeapTupleGetDatum(tuple); + + PG_RETURN_DATUM(result); +} + +static bool +is_explain_analyze(List *options) +{ + ListCell *lc; + + foreach(lc, options) + { + DefElem *opt = (DefElem *) lfirst(lc); + + if (pg_strcasecmp(opt->defname, "analyze") == 0) + { + return defGetBoolean(opt); + } + } + return false; +} + +static void +explain_detector_ProcessUtility(Node *parsetree, + const char *queryString, + ProcessUtilityContext context, + ParamListInfo params, + DestReceiver *dest, + char *completionTag) +{ + if (IsA(parsetree, ExplainStmt)) + { + ExplainStmt *stmt = (ExplainStmt *) parsetree; + + if (!is_explain_analyze(stmt->options)) + isExecutorExplainMode = true; + } + + if (next_ProcessUtility_hook) + next_ProcessUtility_hook(parsetree, queryString, context, params, dest, completionTag); + + isExecutorExplainMode = false; +} + +/* + * When any query execution ends, current_version is set to control. + * If the tracking_track_version registered transaction callback + * and its transaction is still going, then subsequent tracking_track_version + * calls within the transaction will return ControlVerion. + */ +static void +track_ExecutorEnd(QueryDesc *queryDesc) +{ + CurrentVersion = ControlVersion; + + if (next_ExecutorEnd_hook) + next_ExecutorEnd_hook(queryDesc); +} + +void +track_setup_executor_hooks(void) +{ + next_ProcessUtility_hook = ProcessUtility_hook ? ProcessUtility_hook : standard_ProcessUtility; + ProcessUtility_hook = explain_detector_ProcessUtility; + + next_ExecutorEnd_hook = ExecutorEnd_hook ? ExecutorEnd_hook : standard_ExecutorEnd; + ExecutorEnd_hook = track_ExecutorEnd; + +} + +void +track_uninstall_executor_hooks(void) +{ + ProcessUtility_hook = (next_ProcessUtility_hook == standard_ProcessUtility) ? NULL : next_ProcessUtility_hook; + ExecutorEnd_hook = (next_ExecutorEnd_hook == standard_ExecutorEnd) ? NULL : next_ExecutorEnd_hook; +} + +/* + * This function should be used as argument for tracking_get_track function to + * follow correct transaction semantics. Several calls of the function within + * the same transaction return ControlVersion, which says tracking_get_track + * to return previous filter state. + */ +Datum +tracking_track_version(PG_FUNCTION_ARGS) +{ + if (Gp_role != GP_ROLE_DISPATCH) + ereport(ERROR, + (errmsg("Cannot acquire track using such query"))); + + if (isExecutorExplainMode) + PG_RETURN_INT64((int64) InvalidVersion); + + tf_check_shmem_error(); + + if (!callbackRegistered) + { + RegisterXactCallbackOnce(xact_end_version_callback, NULL); + callbackRegistered = true; + + bloom_op_ctx_t ctx = bloom_set_get_entry(MyDatabaseId, LW_SHARED, LW_EXCLUSIVE); + + if (!ctx.entry) + { + bloom_set_release(&ctx); + + ereport(ERROR, + (errcode(ERRCODE_GP_COMMAND_ERROR), + errmsg("database %u is not tracked", MyDatabaseId), + errhint("Call 'arenadata_toolkit.tracking_register_db()'" + "to enable tracking"))); + } + else if (!pg_atomic_test_set_flag(&ctx.entry->capture_in_progress)) + { + bloom_set_release(&ctx); + ereport(ERROR, + (errcode(ERRCODE_GP_COMMAND_ERROR), + errmsg("Track for database %u is being acquired in other transaction", MyDatabaseId))); + } + + CurrentVersion = ctx.entry->master_version; + bloom_set_release(&ctx); + } + + PG_RETURN_INT64((int64) CurrentVersion); +}