Skip to content

Commit

Permalink
Invalidate diskquota.table_size entries during startup (#27)
Browse files Browse the repository at this point in the history
Diskquota calculates sizes and stores information in the diskquota.table_size
table periodically with a pause in diskquota.naptime, 2 seconds by default.
If we restart the cluster during this pause, then diskquota will lose all
changes that have occurred since the last save to the diskquota.table_size
table. We could create temporary tables, wait when it will be flushed to
diskquota.table_size table, restart the cluster, and diskquota would remember
the information about the temporary tables. Or we could delete the tables,
restart the cluster, and again diskquota will remember information about the
deleted tables. This happens because at the start of the cluster, diskquota
remembers all the information written to the diskquota.table_size table,
but does not check that some tables may have already been deleted.

As a solution, we invalidate diskquota.table_size during diskquota
worker start in addition to pg_class validation.
  • Loading branch information
RekGRpth authored and red1452 committed Nov 23, 2023
1 parent 49a9a2d commit 14b861d
Show file tree
Hide file tree
Showing 8 changed files with 182 additions and 7 deletions.
2 changes: 1 addition & 1 deletion src/diskquota.h
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ extern bool diskquota_hardlimit;
extern int SEGCOUNT;
extern int worker_spi_get_extension_version(int *major, int *minor);
extern void truncateStringInfo(StringInfo str, int nchars);
extern List *get_rel_oid_list(void);
extern List *get_rel_oid_list(bool is_init);
extern int64 calculate_relation_size_all_forks(RelFileNodeBackend *rnode, char relstorage, Oid relam);
extern Relation diskquota_relation_open(Oid relid);
extern bool get_rel_name_namespace(Oid relid, Oid *nsOid, char *relname);
Expand Down
18 changes: 13 additions & 5 deletions src/diskquota_utility.c
Original file line number Diff line number Diff line change
Expand Up @@ -113,8 +113,6 @@ static float4 get_per_segment_ratio(Oid spcoid);
static bool to_delete_quota(QuotaType type, int64 quota_limit_mb, float4 segratio);
static void check_role(Oid roleoid, char *rolname, int64 quota_limit_mb);

List *get_rel_oid_list(void);

/* ---- Help Functions to set quota limit. ---- */
/*
* Initialize table diskquota.table_size.
Expand Down Expand Up @@ -1296,24 +1294,34 @@ worker_spi_get_extension_version(int *major, int *minor)
* Get the list of oids of the tables which diskquota
* needs to care about in the database.
* Firstly the all the table oids which relkind is 'r'
* or 'm' and not system table.
* or 'm' and not system table. On init stage, oids from
* diskquota.table_size are added to invalidate them.
* Then, fetch the indexes of those tables.
*/

List *
get_rel_oid_list(void)
get_rel_oid_list(bool is_init)
{
List *oidlist = NIL;
int ret;

ret = SPI_execute_with_args("select oid from pg_class where oid >= $1 and (relkind='r' or relkind='m')", 1,
#define SELECT_FROM_PG_CATALOG_PG_CLASS "select oid from pg_catalog.pg_class where oid >= $1 and relkind in ('r', 'm')"

ret = SPI_execute_with_args(is_init ? SELECT_FROM_PG_CATALOG_PG_CLASS
" union distinct"
" select tableid from diskquota.table_size where segid = -1"
: SELECT_FROM_PG_CATALOG_PG_CLASS,
1,
(Oid[]){
OIDOID,
},
(Datum[]){
ObjectIdGetDatum(FirstNormalObjectId),
},
NULL, false, 0);

#undef SELECT_FROM_PG_CATALOG_PG_CLASS

if (ret != SPI_OK_SELECT) elog(ERROR, "cannot fetch in pg_class. error code %d", ret);

TupleDesc tupdesc = SPI_tuptable->tupdesc;
Expand Down
28 changes: 27 additions & 1 deletion src/quotamodel.c
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,8 @@ static bool get_table_size_entry_flag(TableSizeEntry *entry, TableSizeEntryFlag
static void reset_table_size_entry_flag(TableSizeEntry *entry, TableSizeEntryFlag flag);
static void set_table_size_entry_flag(TableSizeEntry *entry, TableSizeEntryFlag flag);

static void delete_from_table_size_map(char *str);

/*
* put QuotaInfoEntry into quota_info_map and return this entry.
* return NULL: no free SHM for quota_info_map
Expand Down Expand Up @@ -911,6 +913,10 @@ calculate_table_disk_usage(bool is_init, HTAB *local_active_table_stat_map)
TableEntryKey active_table_key;
List *oidlist;
ListCell *l;
int delete_entries_num = 0;
StringInfoData delete_statement;

initStringInfo(&delete_statement);

/*
* unset is_exist flag for tsentry in table_size_map this is used to
Expand All @@ -927,7 +933,7 @@ calculate_table_disk_usage(bool is_init, HTAB *local_active_table_stat_map)
* calculate the file size for active table and update namespace_size_map
* and role_size_map
*/
oidlist = get_rel_oid_list();
oidlist = get_rel_oid_list(is_init);

oidlist = merge_uncommitted_table_to_oidlist(oidlist);

Expand Down Expand Up @@ -961,6 +967,23 @@ calculate_table_disk_usage(bool is_init, HTAB *local_active_table_stat_map)
{
elog(WARNING, "cache lookup failed for relation %u", relOid);
LWLockRelease(diskquota_locks.relation_cache_lock);

if (!is_init) continue;

for (int i = -1; i < SEGCOUNT; i++)
{
appendStringInfo(&delete_statement, "%s(%u,%d)", (delete_entries_num == 0) ? " " : ", ", relOid, i);

delete_entries_num++;

if (delete_entries_num > SQL_MAX_VALUES_NUMBER)
{
delete_from_table_size_map(delete_statement.data);
resetStringInfo(&delete_statement);
delete_entries_num = 0;
}
}

continue;
}
relnamespace = relation_entry->namespaceoid;
Expand Down Expand Up @@ -1100,6 +1123,9 @@ calculate_table_disk_usage(bool is_init, HTAB *local_active_table_stat_map)
}
}

if (delete_entries_num) delete_from_table_size_map(delete_statement.data);

pfree(delete_statement.data);
list_free(oidlist);

/*
Expand Down
49 changes: 49 additions & 0 deletions tests/isolation2/expected/test_dropped_table.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
-- Ensure diskquota does not save information about dropped table during restart cluster by invalidates it at startup

1: CREATE SCHEMA dropped_schema;
CREATE
1: SET search_path TO dropped_schema;
SET
1: SELECT diskquota.set_schema_quota('dropped_schema', '1 MB');
set_schema_quota
------------------

(1 row)
1: SELECT diskquota.wait_for_worker_new_epoch();
wait_for_worker_new_epoch
---------------------------
t
(1 row)
1: CREATE TABLE dropped_table(id int) DISTRIBUTED BY (id);
CREATE
1: INSERT INTO dropped_table SELECT generate_series(1, 100000);
INSERT 100000
-- Wait for the diskquota bgworker refreshing the size of 'dropped_table'.
1: SELECT diskquota.wait_for_worker_new_epoch();
wait_for_worker_new_epoch
---------------------------
t
(1 row)
1: DROP TABLE dropped_table;
DROP
1q: ... <quitting>

-- Restart cluster fastly
!\retcode gpstop -afr;
-- start_ignore
-- end_ignore
(exited with code 0)

-- Indicates that there is no dropped table in pg_catalog.pg_class
1: SELECT oid FROM pg_catalog.pg_class WHERE relname = 'dropped_table';
oid
-----
(0 rows)
-- Indicates that there are no entries in diskquota.table_size that are not present in pg_catalog.pg_class
1: SELECT tableid FROM diskquota.table_size WHERE NOT EXISTS (SELECT 1 FROM pg_catalog.pg_class WHERE tableid = oid) AND segid = -1;
tableid
---------
(0 rows)
1: DROP SCHEMA dropped_schema CASCADE;
DROP
1q: ... <quitting>
47 changes: 47 additions & 0 deletions tests/isolation2/expected/test_temporary_table.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
-- Ensure diskquota does not save information about temporary table during restart cluster by invalidates it at startup

1: CREATE SCHEMA temporary_schema;
CREATE
1: SET search_path TO temporary_schema;
SET
1: SELECT diskquota.set_schema_quota('temporary_schema', '1 MB');
set_schema_quota
------------------

(1 row)
1: SELECT diskquota.wait_for_worker_new_epoch();
wait_for_worker_new_epoch
---------------------------
t
(1 row)
1: CREATE TEMPORARY TABLE temporary_table(id int) DISTRIBUTED BY (id);
CREATE
1: INSERT INTO temporary_table SELECT generate_series(1, 100000);
INSERT 100000
-- Wait for the diskquota bgworker refreshing the size of 'temporary_table'.
1: SELECT diskquota.wait_for_worker_new_epoch();
wait_for_worker_new_epoch
---------------------------
t
(1 row)
1q: ... <quitting>

-- Restart cluster fastly
!\retcode gpstop -afr;
-- start_ignore
-- end_ignore
(exited with code 0)

-- Indicates that there is no temporary table in pg_catalog.pg_class
1: SELECT oid FROM pg_catalog.pg_class WHERE relname = 'temporary_table';
oid
-----
(0 rows)
-- Indicates that there are no entries in diskquota.table_size that are not present in pg_catalog.pg_class
1: SELECT tableid FROM diskquota.table_size WHERE NOT EXISTS (SELECT 1 FROM pg_catalog.pg_class WHERE tableid = oid) AND segid = -1;
tableid
---------
(0 rows)
1: DROP SCHEMA temporary_schema CASCADE;
DROP
1q: ... <quitting>
2 changes: 2 additions & 0 deletions tests/isolation2/isolation2_schedule
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ test: test_relation_size
test: test_rejectmap
test: test_vacuum
test: test_truncate
test: test_temporary_table
test: test_dropped_table
test: test_postmaster_restart
test: test_worker_timeout
test: test_per_segment_config
Expand Down
22 changes: 22 additions & 0 deletions tests/isolation2/sql/test_dropped_table.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
-- Ensure diskquota does not save information about dropped table during restart cluster by invalidates it at startup

1: CREATE SCHEMA dropped_schema;
1: SET search_path TO dropped_schema;
1: SELECT diskquota.set_schema_quota('dropped_schema', '1 MB');
1: SELECT diskquota.wait_for_worker_new_epoch();
1: CREATE TABLE dropped_table(id int) DISTRIBUTED BY (id);
1: INSERT INTO dropped_table SELECT generate_series(1, 100000);
-- Wait for the diskquota bgworker refreshing the size of 'dropped_table'.
1: SELECT diskquota.wait_for_worker_new_epoch();
1: DROP TABLE dropped_table;
1q:

-- Restart cluster fastly
!\retcode gpstop -afr;

-- Indicates that there is no dropped table in pg_catalog.pg_class
1: SELECT oid FROM pg_catalog.pg_class WHERE relname = 'dropped_table';
-- Indicates that there are no entries in diskquota.table_size that are not present in pg_catalog.pg_class
1: SELECT tableid FROM diskquota.table_size WHERE NOT EXISTS (SELECT 1 FROM pg_catalog.pg_class WHERE tableid = oid) AND segid = -1;
1: DROP SCHEMA dropped_schema CASCADE;
1q:
21 changes: 21 additions & 0 deletions tests/isolation2/sql/test_temporary_table.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
-- Ensure diskquota does not save information about temporary table during restart cluster by invalidates it at startup

1: CREATE SCHEMA temporary_schema;
1: SET search_path TO temporary_schema;
1: SELECT diskquota.set_schema_quota('temporary_schema', '1 MB');
1: SELECT diskquota.wait_for_worker_new_epoch();
1: CREATE TEMPORARY TABLE temporary_table(id int) DISTRIBUTED BY (id);
1: INSERT INTO temporary_table SELECT generate_series(1, 100000);
-- Wait for the diskquota bgworker refreshing the size of 'temporary_table'.
1: SELECT diskquota.wait_for_worker_new_epoch();
1q:

-- Restart cluster fastly
!\retcode gpstop -afr;

-- Indicates that there is no temporary table in pg_catalog.pg_class
1: SELECT oid FROM pg_catalog.pg_class WHERE relname = 'temporary_table';
-- Indicates that there are no entries in diskquota.table_size that are not present in pg_catalog.pg_class
1: SELECT tableid FROM diskquota.table_size WHERE NOT EXISTS (SELECT 1 FROM pg_catalog.pg_class WHERE tableid = oid) AND segid = -1;
1: DROP SCHEMA temporary_schema CASCADE;
1q:

0 comments on commit 14b861d

Please sign in to comment.