From 2fa1f766abe989a274aa916f13f40fc9ded2c293 Mon Sep 17 00:00:00 2001 From: Andrew Pogrebnoy Date: Fri, 5 Apr 2024 20:31:26 +0300 Subject: [PATCH 1/5] XLog encryption prototype Encrypt/decrypt whole Pages while pwrite/pread see TODOs in the code --- pg.patch | 113 ++++++++++++++++++++++ src/access/pg_tde_xlog.c | 159 +++++++++++++++++++++++++++++++ src/include/access/pg_tde_xlog.h | 12 +++ src/pg_tde.c | 1 + 4 files changed, 285 insertions(+) create mode 100644 pg.patch diff --git a/pg.patch b/pg.patch new file mode 100644 index 00000000..b494d9f3 --- /dev/null +++ b/pg.patch @@ -0,0 +1,113 @@ +diff --git a/contrib/meson.build b/contrib/meson.build +index bd4a57c43c..faf90133ec 100644 +--- a/contrib/meson.build ++++ b/contrib/meson.build +@@ -52,6 +52,7 @@ subdir('pg_trgm') + subdir('pg_visibility') + subdir('pg_walinspect') + subdir('postgres_fdw') ++subdir('postgres-tde-ext') + subdir('seg') + subdir('sepgsql') + subdir('spi') +diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c +index 8b0710abe6..e2c89a6e1f 100644 +--- a/src/backend/access/transam/xlog.c ++++ b/src/backend/access/transam/xlog.c +@@ -62,6 +62,7 @@ + #include "access/xlogprefetcher.h" + #include "access/xlogreader.h" + #include "access/xlogrecovery.h" ++#include "access/xlog_smgr.h" + #include "access/xlogutils.h" + #include "backup/basebackup.h" + #include "catalog/catversion.h" +@@ -2194,7 +2195,7 @@ XLogWrite(XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible) + INSTR_TIME_SET_ZERO(start); + + pgstat_report_wait_start(WAIT_EVENT_WAL_WRITE); +- written = pg_pwrite(openLogFile, from, nleft, startoffset); ++ written = xlog_smgr->seg_write(openLogFile, from, nleft, startoffset); + pgstat_report_wait_end(); + + /* +diff --git a/src/backend/access/transam/xlogreader.c b/src/backend/access/transam/xlogreader.c +index c9f9f6e98f..b7a8ab0b79 100644 +--- a/src/backend/access/transam/xlogreader.c ++++ b/src/backend/access/transam/xlogreader.c +@@ -29,6 +29,7 @@ + #include "access/xlog_internal.h" + #include "access/xlogreader.h" + #include "access/xlogrecord.h" ++#include "access/xlog_smgr.h" + #include "catalog/pg_control.h" + #include "common/pg_lzcompress.h" + #include "replication/origin.h" +@@ -65,6 +66,21 @@ static void WALOpenSegmentInit(WALOpenSegment *seg, WALSegmentContext *segcxt, + */ + #define DEFAULT_DECODE_BUFFER_SIZE (64 * 1024) + ++/* ++ * XLog storage manager ++ * ++ * TODO: should be in xlog.c or new xlog_smgr.c ? ++ * Now it's here because pg_rewind and other tools compile only ++ * w/ xlogreader.c ++ */ ++XLogSmgr *xlog_smgr = &xlog_smgr_standard; ++ ++void ++SetXLogSmgr(XLogSmgr *xlsmgr) ++{ ++ xlog_smgr = xlsmgr; ++} ++ + /* + * Construct a string in state->errormsg_buf explaining what's wrong with + * the current record being read. +@@ -1543,7 +1559,7 @@ WALRead(XLogReaderState *state, + + /* Reset errno first; eases reporting non-errno-affecting errors */ + errno = 0; +- readbytes = pg_pread(state->seg.ws_file, p, segbytes, (off_t) startoff); ++ readbytes = xlog_smgr->seg_read(state->seg.ws_file, p, segbytes, (off_t) startoff); + + #ifndef FRONTEND + pgstat_report_wait_end(); +diff --git a/src/backend/access/transam/xlogrecovery.c b/src/backend/access/transam/xlogrecovery.c +index becc2bda62..3d5010554f 100644 +--- a/src/backend/access/transam/xlogrecovery.c ++++ b/src/backend/access/transam/xlogrecovery.c +@@ -39,6 +39,7 @@ + #include "access/xlogprefetcher.h" + #include "access/xlogreader.h" + #include "access/xlogrecovery.h" ++#include "access/xlog_smgr.h" + #include "access/xlogutils.h" + #include "backup/basebackup.h" + #include "catalog/pg_control.h" +@@ -3313,7 +3314,7 @@ retry: + readOff = targetPageOff; + + pgstat_report_wait_start(WAIT_EVENT_WAL_READ); +- r = pg_pread(readFile, readBuf, XLOG_BLCKSZ, (off_t) readOff); ++ r = xlog_smgr->seg_read(readFile, readBuf, XLOG_BLCKSZ, (off_t) readOff); + if (r != XLOG_BLCKSZ) + { + char fname[MAXFNAMELEN]; +diff --git a/src/include/access/xlog_internal.h b/src/include/access/xlog_internal.h +index b0fd338a00..e42b131b36 100644 +--- a/src/include/access/xlog_internal.h ++++ b/src/include/access/xlog_internal.h +@@ -78,8 +78,10 @@ typedef XLogLongPageHeaderData *XLogLongPageHeader; + #define XLP_BKP_REMOVABLE 0x0004 + /* Replaces a missing contrecord; see CreateOverwriteContrecordRecord */ + #define XLP_FIRST_IS_OVERWRITE_CONTRECORD 0x0008 ++/* The page is encrypted */ ++#define XLP_ENCRYPTED 0x0010 + /* All defined flag bits in xlp_info (used for validity checking of header) */ +-#define XLP_ALL_FLAGS 0x000F ++#define XLP_ALL_FLAGS 0x001F + + #define XLogPageHeaderSize(hdr) \ + (((hdr)->xlp_info & XLP_LONG_HEADER) ? SizeOfXLogLongPHD : SizeOfXLogShortPHD) diff --git a/src/access/pg_tde_xlog.c b/src/access/pg_tde_xlog.c index b87344e9..149e8751 100644 --- a/src/access/pg_tde_xlog.c +++ b/src/access/pg_tde_xlog.c @@ -15,11 +15,20 @@ #include "access/xlog.h" #include "access/xlog_internal.h" #include "access/xloginsert.h" +#include "storage/bufmgr.h" +#include "utils/memutils.h" #include "access/pg_tde_tdemap.h" #include "access/pg_tde_xlog.h" #include "catalog/tde_master_key.h" +#include "encryption/enc_tde.h" + +/* Buffer for the XLog encryption */ +static char *TDEXLogEncryptBuf; + +static void SetXLogPageIVPrefix(TimeLineID tli, XLogRecPtr lsn, uint32 offset, char* iv_prefix); +static int XLOGChooseNumBuffers(void); /* * TDE fork XLog */ @@ -103,3 +112,153 @@ pg_tde_rmgr_identify(uint8 info) return NULL; } + +/* + * XLog Storage Manager + * TODO: + * - Should be a config option "on/off"? + * - Currently it encrypts WAL XLog Pages, should we encrypt whole Segments? `initdb` for + * example generates a write of 312 pages - so 312 "gen IV" and "encrypt" runs instead of one. + * Would require though an extra read() during recovery/was_send etc to check `XLogPageHeader` + * if segment is encrypted. + * We could also encrypt Records while adding them to the XLog Buf but it'll be the slowest (?). + */ +static int +XLOGChooseNumBuffers(void) +{ + int xbuffers; + + xbuffers = NBuffers / 32; + if (xbuffers > (wal_segment_size / XLOG_BLCKSZ)) + xbuffers = (wal_segment_size / XLOG_BLCKSZ); + if (xbuffers < 8) + xbuffers = 8; + return xbuffers; +} + +void +TDEInitXLogSmgr(void) +{ + int xbuffers; + + /* + * Alloc memory for encrypition buffer. I should fit XLog buffers (XLOG_BLCKSZ * wal_buffers). + * We can't (re)alloc this buf in pg_tde_xlog_seg_write() based on the write sezie as + * it's called in the CRIT section, hence no allocations are allowed. + * + * TODO: + * - alloc in the shmem to save memory + * - ? alloc smaller (config option) and write in chunks (slower)? + */ + xbuffers = (XLOGbuffers == -1) ? XLOGChooseNumBuffers() : XLOGbuffers; + TDEXLogEncryptBuf = (char *) MemoryContextAlloc(TopMemoryContext, (Size) XLOG_BLCKSZ * xbuffers); + memset(TDEXLogEncryptBuf, 0, (Size) XLOG_BLCKSZ * 512); + + SetXLogSmgr(&tde_xlog_smgr); +} + +/* + * TODO: proper key management + * where to store the ref to the master and internal key? + */ +static InternalKey XLogInternalKey = {.key = {0xD,}}; + +ssize_t +pg_tde_xlog_seg_write(int fd, const void *buf, size_t count, off_t offset) +{ + Size page_off = 0; + char iv_prefix[16] = {0,}; + uint32 data_size = 0; + XLogPageHeader page; + XLogPageHeader crypt_page; + RelKeyData key = {.internal_key = XLogInternalKey}; + + Assert((count % (Size) XLOG_BLCKSZ) == 0); + + elog(DEBUG1, "==> pg_tde_xlog_seg_WRITE, pages: %d", count / (Size) XLOG_BLCKSZ); + + /* Encrypt pages */ + for (page_off = 0; page_off < count; page_off += (Size) XLOG_BLCKSZ) + { + page = (XLogPageHeader) ((char *) buf + page_off); + + Assert(page->xlp_magic == XLOG_PAGE_MAGIC); + + crypt_page = (XLogPageHeader) (((char *) TDEXLogEncryptBuf) + page_off); + memcpy(crypt_page, page, (Size) XLogPageHeaderSize(page)); + crypt_page->xlp_info |= XLP_ENCRYPTED; + + data_size = (uint32) XLOG_BLCKSZ - (uint32) XLogPageHeaderSize(crypt_page); + SetXLogPageIVPrefix(crypt_page->xlp_tli, crypt_page->xlp_pageaddr, offset + page_off, iv_prefix); + PG_TDE_ENCRYPT_DATA(iv_prefix, 0, (char *) page + XLogPageHeaderSize(page), data_size, (char *) crypt_page + (Size) XLogPageHeaderSize(crypt_page), &key); + } + + return pg_pwrite(fd, TDEXLogEncryptBuf, count, offset); +} + +ssize_t +pg_tde_xlog_seg_read(int fd, void *buf, size_t count, off_t offset) +{ + ssize_t readsz; + Size page_off; + char iv_prefix[16] = {0,}; + uint32 data_size = 0; + XLogPageHeader page; + RelKeyData key = {.internal_key = XLogInternalKey}; + char *decrypt_buf = NULL; + + elog(DEBUG1, "==> pg_tde_xlog_seg_READ, pages: %d", count / (Size) XLOG_BLCKSZ); + + readsz = pg_pread(fd, buf, count, offset); + + for (page_off = 0; page_off < count; page_off += (Size) XLOG_BLCKSZ) + { + page = (XLogPageHeader) ((char *) buf + page_off); + + Assert(page->xlp_magic == XLOG_PAGE_MAGIC); + + if (page->xlp_info & XLP_ENCRYPTED) + { + if (decrypt_buf == NULL) { + decrypt_buf = (char *) palloc(XLOG_BLCKSZ - SizeOfXLogShortPHD); + } + data_size = (uint32) XLOG_BLCKSZ - (uint32) XLogPageHeaderSize(page); + SetXLogPageIVPrefix(page->xlp_tli, page->xlp_pageaddr, offset + page_off, iv_prefix); + PG_TDE_DECRYPT_DATA(iv_prefix, 0, (char *) page + XLogPageHeaderSize(page), data_size, decrypt_buf, &key); + + memcpy((char *) page + XLogPageHeaderSize(page), decrypt_buf, data_size); + } + } + + if (decrypt_buf != NULL) { + pfree(decrypt_buf); + } + + return readsz; +} + +/* IV: TLI(uint32) + XLogRecPtr(uint64) + Off(uint32)*/ +static void +SetXLogPageIVPrefix(TimeLineID tli, XLogRecPtr lsn, uint32 offset, char* iv_prefix) +{ + elog(DEBUG1, "==> XlogIV %u, %lu, %u", tli, lsn, offset); + + iv_prefix[0] = (tli >> 24); + iv_prefix[1] = ((tli >> 16) & 0xFF); + iv_prefix[2] = ((tli >> 8) & 0xFF); + iv_prefix[3] = (tli & 0xFF); + + iv_prefix[4] = (lsn >> 56); + iv_prefix[5] = ((lsn >> 48) & 0xFF); + iv_prefix[6] = ((lsn >> 40) & 0xFF); + iv_prefix[7] = ((lsn >> 32) & 0xFF); + iv_prefix[8] = ((lsn >> 24) & 0xFF); + iv_prefix[9] = ((lsn >> 16) & 0xFF); + iv_prefix[10] = ((lsn >> 8) & 0xFF); + iv_prefix[11] = (lsn & 0xFF); + + iv_prefix[12] = (offset >> 24); + iv_prefix[13] = ((offset >> 16) & 0xFF); + iv_prefix[14] = ((offset >> 8) & 0xFF); + iv_prefix[15] = (offset & 0xFF); +} \ No newline at end of file diff --git a/src/include/access/pg_tde_xlog.h b/src/include/access/pg_tde_xlog.h index bc32c979..14aba09d 100644 --- a/src/include/access/pg_tde_xlog.h +++ b/src/include/access/pg_tde_xlog.h @@ -9,7 +9,9 @@ #ifndef PG_TDE_XLOG_H #define PG_TDE_XLOG_H +#include "postgres.h" #include "access/xlog_internal.h" +#include "access/xlog_smgr.h" /* TDE XLOG resource manager */ #define XLOG_TDE_ADD_RELATION_KEY 0x00 @@ -32,4 +34,14 @@ static const RmgrData pg_tde_rmgr = { .rm_identify = pg_tde_rmgr_identify }; +extern ssize_t pg_tde_xlog_seg_read(int fd, void *buf, size_t count, off_t offset); +extern ssize_t pg_tde_xlog_seg_write(int fd, const void *buf, size_t count, off_t offset); + +static const XLogSmgr tde_xlog_smgr = { + .seg_read = pg_tde_xlog_seg_read, + .seg_write = pg_tde_xlog_seg_write, +}; + +extern void TDEInitXLogSmgr(void); + #endif /* PG_TDE_XLOG_H */ diff --git a/src/pg_tde.c b/src/pg_tde.c index 7b076592..324f7281 100644 --- a/src/pg_tde.c +++ b/src/pg_tde.c @@ -98,6 +98,7 @@ _PG_init(void) InstallFileKeyring(); InstallVaultV2Keyring(); RegisterCustomRmgr(RM_TDERMGR_ID, &pg_tde_rmgr); + TDEInitXLogSmgr(); } Datum pg_tde_extension_initialize(PG_FUNCTION_ARGS) From 332044877225ae31fb2ee2dc1d8017c639269629 Mon Sep 17 00:00:00 2001 From: Andrew Pogrebnoy Date: Mon, 8 Apr 2024 20:08:59 +0300 Subject: [PATCH 2/5] Use proper IV and counter --- src/access/pg_tde_xlog.c | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/src/access/pg_tde_xlog.c b/src/access/pg_tde_xlog.c index 149e8751..d20d96a6 100644 --- a/src/access/pg_tde_xlog.c +++ b/src/access/pg_tde_xlog.c @@ -27,7 +27,7 @@ /* Buffer for the XLog encryption */ static char *TDEXLogEncryptBuf; -static void SetXLogPageIVPrefix(TimeLineID tli, XLogRecPtr lsn, uint32 offset, char* iv_prefix); +static void SetXLogPageIVPrefix(TimeLineID tli, XLogRecPtr lsn, char* iv_prefix); static int XLOGChooseNumBuffers(void); /* * TDE fork XLog @@ -189,8 +189,8 @@ pg_tde_xlog_seg_write(int fd, const void *buf, size_t count, off_t offset) crypt_page->xlp_info |= XLP_ENCRYPTED; data_size = (uint32) XLOG_BLCKSZ - (uint32) XLogPageHeaderSize(crypt_page); - SetXLogPageIVPrefix(crypt_page->xlp_tli, crypt_page->xlp_pageaddr, offset + page_off, iv_prefix); - PG_TDE_ENCRYPT_DATA(iv_prefix, 0, (char *) page + XLogPageHeaderSize(page), data_size, (char *) crypt_page + (Size) XLogPageHeaderSize(crypt_page), &key); + SetXLogPageIVPrefix(crypt_page->xlp_tli, crypt_page->xlp_pageaddr, iv_prefix); + PG_TDE_ENCRYPT_DATA(iv_prefix, (uint32) offset + page_off, (char *) page + XLogPageHeaderSize(page), data_size, (char *) crypt_page + (Size) XLogPageHeaderSize(crypt_page), &key); } return pg_pwrite(fd, TDEXLogEncryptBuf, count, offset); @@ -223,8 +223,8 @@ pg_tde_xlog_seg_read(int fd, void *buf, size_t count, off_t offset) decrypt_buf = (char *) palloc(XLOG_BLCKSZ - SizeOfXLogShortPHD); } data_size = (uint32) XLOG_BLCKSZ - (uint32) XLogPageHeaderSize(page); - SetXLogPageIVPrefix(page->xlp_tli, page->xlp_pageaddr, offset + page_off, iv_prefix); - PG_TDE_DECRYPT_DATA(iv_prefix, 0, (char *) page + XLogPageHeaderSize(page), data_size, decrypt_buf, &key); + SetXLogPageIVPrefix(page->xlp_tli, page->xlp_pageaddr, iv_prefix); + PG_TDE_DECRYPT_DATA(iv_prefix, (uint32) offset + page_off, (char *) page + XLogPageHeaderSize(page), data_size, decrypt_buf, &key); memcpy((char *) page + XLogPageHeaderSize(page), decrypt_buf, data_size); } @@ -237,11 +237,11 @@ pg_tde_xlog_seg_read(int fd, void *buf, size_t count, off_t offset) return readsz; } -/* IV: TLI(uint32) + XLogRecPtr(uint64) + Off(uint32)*/ +/* IV: TLI(uint32) + XLogRecPtr(uint64)*/ static void -SetXLogPageIVPrefix(TimeLineID tli, XLogRecPtr lsn, uint32 offset, char* iv_prefix) +SetXLogPageIVPrefix(TimeLineID tli, XLogRecPtr lsn, char* iv_prefix) { - elog(DEBUG1, "==> XlogIV %u, %lu, %u", tli, lsn, offset); + elog(DEBUG1, "==> XlogIV %u, %lu", tli, lsn); iv_prefix[0] = (tli >> 24); iv_prefix[1] = ((tli >> 16) & 0xFF); @@ -256,9 +256,4 @@ SetXLogPageIVPrefix(TimeLineID tli, XLogRecPtr lsn, uint32 offset, char* iv_pref iv_prefix[9] = ((lsn >> 16) & 0xFF); iv_prefix[10] = ((lsn >> 8) & 0xFF); iv_prefix[11] = (lsn & 0xFF); - - iv_prefix[12] = (offset >> 24); - iv_prefix[13] = ((offset >> 16) & 0xFF); - iv_prefix[14] = ((offset >> 8) & 0xFF); - iv_prefix[15] = (offset & 0xFF); } \ No newline at end of file From b0a60e9df9856d28b9730ce0d5f24e90de63e4bc Mon Sep 17 00:00:00 2001 From: Andrew Pogrebnoy Date: Thu, 11 Apr 2024 14:25:36 +0300 Subject: [PATCH 3/5] Use shmem for the encrypt buffer --- src/access/pg_tde_xlog.c | 68 +++++++++++++++++++++----------- src/include/access/pg_tde_xlog.h | 9 +++++ src/include/pg_tde_defines.h | 1 + src/pg_tde.c | 6 ++- 4 files changed, 60 insertions(+), 24 deletions(-) diff --git a/src/access/pg_tde_xlog.c b/src/access/pg_tde_xlog.c index d20d96a6..0e9ea9bd 100644 --- a/src/access/pg_tde_xlog.c +++ b/src/access/pg_tde_xlog.c @@ -12,10 +12,12 @@ #include "postgres.h" +#include "pg_tde_defines.h" #include "access/xlog.h" #include "access/xlog_internal.h" #include "access/xloginsert.h" #include "storage/bufmgr.h" +#include "storage/shmem.h" #include "utils/memutils.h" #include "access/pg_tde_tdemap.h" @@ -24,8 +26,7 @@ #include "encryption/enc_tde.h" -/* Buffer for the XLog encryption */ -static char *TDEXLogEncryptBuf; +static char *TDEXLogEncryptBuf = NULL; static void SetXLogPageIVPrefix(TimeLineID tli, XLogRecPtr lsn, char* iv_prefix); static int XLOGChooseNumBuffers(void); @@ -123,6 +124,7 @@ pg_tde_rmgr_identify(uint8 info) * if segment is encrypted. * We could also encrypt Records while adding them to the XLog Buf but it'll be the slowest (?). */ + static int XLOGChooseNumBuffers(void) { @@ -136,30 +138,49 @@ XLOGChooseNumBuffers(void) return xbuffers; } -void -TDEInitXLogSmgr(void) +/* + * Defines the size of the XLog encryption buffer + */ +Size +TDEXLogEncryptBuffSize() { - int xbuffers; - - /* - * Alloc memory for encrypition buffer. I should fit XLog buffers (XLOG_BLCKSZ * wal_buffers). - * We can't (re)alloc this buf in pg_tde_xlog_seg_write() based on the write sezie as - * it's called in the CRIT section, hence no allocations are allowed. - * - * TODO: - * - alloc in the shmem to save memory - * - ? alloc smaller (config option) and write in chunks (slower)? - */ + int xbuffers; + xbuffers = (XLOGbuffers == -1) ? XLOGChooseNumBuffers() : XLOGbuffers; - TDEXLogEncryptBuf = (char *) MemoryContextAlloc(TopMemoryContext, (Size) XLOG_BLCKSZ * xbuffers); - memset(TDEXLogEncryptBuf, 0, (Size) XLOG_BLCKSZ * 512); + return (Size) XLOG_BLCKSZ * xbuffers; +} +/* + * Alloc memory for encrypition buffer. + * + * It should fit XLog buffers (XLOG_BLCKSZ * wal_buffers). We can't + * (re)alloc this buf in pg_tde_xlog_seg_write() based on the write size as + * it's called in the CRIT section, hence no allocations are allowed. + * + * Access to this buffer happens during XLogWrite() call which should + * be called with WALWriteLock held, hence no need in extra locks. + */ +void +TDEXLogShmemInit(void) +{ + bool foundBuf; + + TDEXLogEncryptBuf = (char *) + TYPEALIGN(PG_IO_ALIGN_SIZE, + ShmemInitStruct("TDE XLog Encrypt Buffer", + XLOG_TDE_ENC_BUFF_ALIGNED_SIZE, + &foundBuf)); +} + +void +TDEInitXLogSmgr(void) +{ SetXLogSmgr(&tde_xlog_smgr); } /* * TODO: proper key management - * where to store the ref to the master and internal key? + * where to store refs to the master and internal keys? */ static InternalKey XLogInternalKey = {.key = {0xD,}}; @@ -175,8 +196,9 @@ pg_tde_xlog_seg_write(int fd, const void *buf, size_t count, off_t offset) Assert((count % (Size) XLOG_BLCKSZ) == 0); - elog(DEBUG1, "==> pg_tde_xlog_seg_WRITE, pages: %d", count / (Size) XLOG_BLCKSZ); - +#ifdef TDE_XLOG_DEBUG + elog(DEBUG1, "Write to a WAL segment, pages amount: %d", count / (Size) XLOG_BLCKSZ); +#endif /* Encrypt pages */ for (page_off = 0; page_off < count; page_off += (Size) XLOG_BLCKSZ) { @@ -207,7 +229,9 @@ pg_tde_xlog_seg_read(int fd, void *buf, size_t count, off_t offset) RelKeyData key = {.internal_key = XLogInternalKey}; char *decrypt_buf = NULL; - elog(DEBUG1, "==> pg_tde_xlog_seg_READ, pages: %d", count / (Size) XLOG_BLCKSZ); +#ifdef TDE_XLOG_DEBUG + elog(DEBUG1, "Read from a WAL segment, pages amount: %d", count / (Size) XLOG_BLCKSZ); +#endif readsz = pg_pread(fd, buf, count, offset); @@ -241,8 +265,6 @@ pg_tde_xlog_seg_read(int fd, void *buf, size_t count, off_t offset) static void SetXLogPageIVPrefix(TimeLineID tli, XLogRecPtr lsn, char* iv_prefix) { - elog(DEBUG1, "==> XlogIV %u, %lu", tli, lsn); - iv_prefix[0] = (tli >> 24); iv_prefix[1] = ((tli >> 16) & 0xFF); iv_prefix[2] = ((tli >> 8) & 0xFF); diff --git a/src/include/access/pg_tde_xlog.h b/src/include/access/pg_tde_xlog.h index 14aba09d..80601241 100644 --- a/src/include/access/pg_tde_xlog.h +++ b/src/include/access/pg_tde_xlog.h @@ -10,6 +10,7 @@ #define PG_TDE_XLOG_H #include "postgres.h" +#include "access/xlog.h" #include "access/xlog_internal.h" #include "access/xlog_smgr.h" @@ -34,6 +35,14 @@ static const RmgrData pg_tde_rmgr = { .rm_identify = pg_tde_rmgr_identify }; +/* XLog encryption staff */ + +extern Size TDEXLogEncryptBuffSize(); + +#define XLOG_TDE_ENC_BUFF_ALIGNED_SIZE add_size(TDEXLogEncryptBuffSize(), PG_IO_ALIGN_SIZE) + +extern void TDEXLogShmemInit(void); + extern ssize_t pg_tde_xlog_seg_read(int fd, void *buf, size_t count, off_t offset); extern ssize_t pg_tde_xlog_seg_write(int fd, const void *buf, size_t count, off_t offset); diff --git a/src/include/pg_tde_defines.h b/src/include/pg_tde_defines.h index aaa49722..0c9847bb 100644 --- a/src/include/pg_tde_defines.h +++ b/src/include/pg_tde_defines.h @@ -22,6 +22,7 @@ //#define ENCRYPTION_DEBUG 1 //#define KEYRING_DEBUG 1 //#define TDE_FORK_DEBUG 1 +// #define TDE_XLOG_DEBUG 1 #define pg_tde_fill_tuple heap_fill_tuple #define pg_tde_form_tuple heap_form_tuple diff --git a/src/pg_tde.c b/src/pg_tde.c index 324f7281..6070e0c9 100644 --- a/src/pg_tde.c +++ b/src/pg_tde.c @@ -59,6 +59,9 @@ tde_shmem_request(void) { Size sz = TdeRequiredSharedMemorySize(); int required_locks = TdeRequiredLocksCount(); + + sz = add_size(sz, XLOG_TDE_ENC_BUFF_ALIGNED_SIZE); + if (prev_shmem_request_hook) prev_shmem_request_hook(); RequestAddinShmemSpace(sz); @@ -74,6 +77,8 @@ tde_shmem_startup(void) TdeShmemInit(); AesInit(); + TDEXLogShmemInit(); + TDEInitXLogSmgr(); } void @@ -98,7 +103,6 @@ _PG_init(void) InstallFileKeyring(); InstallVaultV2Keyring(); RegisterCustomRmgr(RM_TDERMGR_ID, &pg_tde_rmgr); - TDEInitXLogSmgr(); } Datum pg_tde_extension_initialize(PG_FUNCTION_ARGS) From 555cdc30d218ba6c9a0bf453efeb9c180e79a7db Mon Sep 17 00:00:00 2001 From: Andrew Pogrebnoy Date: Wed, 24 Apr 2024 15:44:07 +0300 Subject: [PATCH 4/5] Handle streaming replication --- src/access/pg_tde_xlog.c | 139 +++++++++++++++++++++++++++++---------- 1 file changed, 103 insertions(+), 36 deletions(-) diff --git a/src/access/pg_tde_xlog.c b/src/access/pg_tde_xlog.c index 0e9ea9bd..b2ca4350 100644 --- a/src/access/pg_tde_xlog.c +++ b/src/access/pg_tde_xlog.c @@ -28,6 +28,9 @@ static char *TDEXLogEncryptBuf = NULL; +static XLogPageHeaderData EncryptCurrentPageHrd; +static XLogPageHeaderData DecryptCurrentPageHrd; + static void SetXLogPageIVPrefix(TimeLineID tli, XLogRecPtr lsn, char* iv_prefix); static int XLOGChooseNumBuffers(void); /* @@ -151,7 +154,7 @@ TDEXLogEncryptBuffSize() } /* - * Alloc memory for encrypition buffer. + * Alloc memory for the encryption buffer. * * It should fit XLog buffers (XLOG_BLCKSZ * wal_buffers). We can't * (re)alloc this buf in pg_tde_xlog_seg_write() based on the write size as @@ -184,78 +187,142 @@ TDEInitXLogSmgr(void) */ static InternalKey XLogInternalKey = {.key = {0xD,}}; +/* + * Encrypt XLog page(s) from the buf and write to the segment file. + */ ssize_t pg_tde_xlog_seg_write(int fd, const void *buf, size_t count, off_t offset) { - Size page_off = 0; char iv_prefix[16] = {0,}; uint32 data_size = 0; - XLogPageHeader page; - XLogPageHeader crypt_page; + XLogPageHeader curr_page_hdr = &EncryptCurrentPageHrd; + XLogPageHeader enc_buf_page; RelKeyData key = {.internal_key = XLogInternalKey}; + off_t enc_off; + size_t page_size = XLOG_BLCKSZ - offset % XLOG_BLCKSZ; + uint32 iv_ctr = 0; - Assert((count % (Size) XLOG_BLCKSZ) == 0); #ifdef TDE_XLOG_DEBUG - elog(DEBUG1, "Write to a WAL segment, pages amount: %d", count / (Size) XLOG_BLCKSZ); + elog(DEBUG1, "write to a WAL segment, pages amount: %d, size: %lu offset: %ld", count / (Size) XLOG_BLCKSZ, count, offset); #endif - /* Encrypt pages */ - for (page_off = 0; page_off < count; page_off += (Size) XLOG_BLCKSZ) + + /* + * Go through the buf page-by-page and encrypt them. + * We may start or finish writing from/in the middle of the page + * (walsender or `full_page_writes = off`). So preserve a page header + * for the IV init data. + * + * TODO: check if walsender restarts form the beggining of the page + * in case of the crash. + */ + for (enc_off = 0; enc_off < count;) { - page = (XLogPageHeader) ((char *) buf + page_off); + if (page_size == XLOG_BLCKSZ) + { + memcpy((char *) curr_page_hdr, (char *) buf + enc_off, SizeOfXLogShortPHD); + + /* + * Need to use a separate buf for the encryption so the page remains non-crypted + * in the XLog buf (XLogInsert has to have access to records' lsn). + */ + enc_buf_page = (XLogPageHeader) (((char *) TDEXLogEncryptBuf) + enc_off); + memcpy((char *) enc_buf_page, (char *) buf + enc_off, (Size) XLogPageHeaderSize(curr_page_hdr)); + enc_buf_page->xlp_info |= XLP_ENCRYPTED; + + enc_off += XLogPageHeaderSize(curr_page_hdr); + /* it's a beginning of the page */ + iv_ctr = 0; + } + else + { + /* we're in the middle of the page */ + iv_ctr = (offset % XLOG_BLCKSZ) - XLogPageHeaderSize(curr_page_hdr); + } - Assert(page->xlp_magic == XLOG_PAGE_MAGIC); + data_size = Min((enc_off / XLOG_BLCKSZ +1) * XLOG_BLCKSZ, count) - enc_off; - crypt_page = (XLogPageHeader) (((char *) TDEXLogEncryptBuf) + page_off); - memcpy(crypt_page, page, (Size) XLogPageHeaderSize(page)); - crypt_page->xlp_info |= XLP_ENCRYPTED; + /* the page is zeroed (no data), no sense to enctypt */ + if (curr_page_hdr->xlp_magic == 0) + { + memcpy((char *) enc_buf_page, (char *) buf + enc_off, (Size) data_size); + } + else + { + SetXLogPageIVPrefix(curr_page_hdr->xlp_tli, curr_page_hdr->xlp_pageaddr, iv_prefix); + PG_TDE_ENCRYPT_DATA(iv_prefix, iv_ctr, (char *) buf + enc_off, data_size, + (char *) TDEXLogEncryptBuf + enc_off, &key); + } - data_size = (uint32) XLOG_BLCKSZ - (uint32) XLogPageHeaderSize(crypt_page); - SetXLogPageIVPrefix(crypt_page->xlp_tli, crypt_page->xlp_pageaddr, iv_prefix); - PG_TDE_ENCRYPT_DATA(iv_prefix, (uint32) offset + page_off, (char *) page + XLogPageHeaderSize(page), data_size, (char *) crypt_page + (Size) XLogPageHeaderSize(crypt_page), &key); + page_size = XLOG_BLCKSZ; + enc_off += data_size; } return pg_pwrite(fd, TDEXLogEncryptBuf, count, offset); } +/* + * Read the XLog pages from the segment file and dectypt if need. + */ ssize_t pg_tde_xlog_seg_read(int fd, void *buf, size_t count, off_t offset) { ssize_t readsz; - Size page_off; char iv_prefix[16] = {0,}; uint32 data_size = 0; - XLogPageHeader page; + XLogPageHeader curr_page_hdr = &DecryptCurrentPageHrd; RelKeyData key = {.internal_key = XLogInternalKey}; - char *decrypt_buf = NULL; + size_t page_size = XLOG_BLCKSZ - offset % XLOG_BLCKSZ; + off_t dec_off; + uint32 iv_ctr = 0; #ifdef TDE_XLOG_DEBUG - elog(DEBUG1, "Read from a WAL segment, pages amount: %d", count / (Size) XLOG_BLCKSZ); + elog(DEBUG1, "read from a WAL segment, pages amount: %d / sz: %lu, off: %lu", count / (Size) XLOG_BLCKSZ, count, offset); #endif readsz = pg_pread(fd, buf, count, offset); - for (page_off = 0; page_off < count; page_off += (Size) XLOG_BLCKSZ) + /* + * Read the buf page by page and decypt ecnrypted pages. + * We may start or fihish reading from/in the middle of the page (walreceiver) + * in such a case we should preserve the last read page header for + * the IV data and the encryption state. + * + * TODO: check if walsender/receiver restarts form the beggining of the page + * in case of the crash. + */ + for (dec_off = 0; dec_off < count;) { - page = (XLogPageHeader) ((char *) buf + page_off); + if (page_size == XLOG_BLCKSZ) + { + memcpy((char *) curr_page_hdr, (char *) buf + dec_off, SizeOfXLogShortPHD); + + /* set the flag to "not encrypted" for the walreceiver */ + ((XLogPageHeader) ((char *) buf + dec_off))->xlp_info &= ~XLP_ENCRYPTED; - Assert(page->xlp_magic == XLOG_PAGE_MAGIC); + Assert(curr_page_hdr->xlp_magic == XLOG_PAGE_MAGIC || curr_page_hdr->xlp_magic == 0); + dec_off += XLogPageHeaderSize(curr_page_hdr); + /* it's a beginning of the page */ + iv_ctr = 0; + } + else + { + /* we're in the middle of the page */ + iv_ctr = (offset % XLOG_BLCKSZ) - XLogPageHeaderSize(curr_page_hdr); + } - if (page->xlp_info & XLP_ENCRYPTED) + data_size = Min((dec_off / XLOG_BLCKSZ +1) * XLOG_BLCKSZ, count) - dec_off; + + if (curr_page_hdr->xlp_info & XLP_ENCRYPTED) { - if (decrypt_buf == NULL) { - decrypt_buf = (char *) palloc(XLOG_BLCKSZ - SizeOfXLogShortPHD); - } - data_size = (uint32) XLOG_BLCKSZ - (uint32) XLogPageHeaderSize(page); - SetXLogPageIVPrefix(page->xlp_tli, page->xlp_pageaddr, iv_prefix); - PG_TDE_DECRYPT_DATA(iv_prefix, (uint32) offset + page_off, (char *) page + XLogPageHeaderSize(page), data_size, decrypt_buf, &key); - - memcpy((char *) page + XLogPageHeaderSize(page), decrypt_buf, data_size); + SetXLogPageIVPrefix(curr_page_hdr->xlp_tli, curr_page_hdr->xlp_pageaddr, iv_prefix); + PG_TDE_DECRYPT_DATA( + iv_prefix, iv_ctr, + (char *) buf + dec_off, data_size, (char *) buf + dec_off, &key); } - } - - if (decrypt_buf != NULL) { - pfree(decrypt_buf); + + page_size = XLOG_BLCKSZ; + dec_off += data_size; } return readsz; From 0713871d06061c49a9df9fda0abe1e34998c74ed Mon Sep 17 00:00:00 2001 From: Andrew Pogrebnoy Date: Wed, 24 Apr 2024 15:58:55 +0300 Subject: [PATCH 5/5] Update PG patch --- pg.patch | 70 ++++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 56 insertions(+), 14 deletions(-) diff --git a/pg.patch b/pg.patch index b494d9f3..fb2f96ea 100644 --- a/pg.patch +++ b/pg.patch @@ -1,17 +1,5 @@ -diff --git a/contrib/meson.build b/contrib/meson.build -index bd4a57c43c..faf90133ec 100644 ---- a/contrib/meson.build -+++ b/contrib/meson.build -@@ -52,6 +52,7 @@ subdir('pg_trgm') - subdir('pg_visibility') - subdir('pg_walinspect') - subdir('postgres_fdw') -+subdir('postgres-tde-ext') - subdir('seg') - subdir('sepgsql') - subdir('spi') diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c -index 8b0710abe6..e2c89a6e1f 100644 +index 8b0710abe6..a0cac5e0f5 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -62,6 +62,7 @@ @@ -22,9 +10,11 @@ index 8b0710abe6..e2c89a6e1f 100644 #include "access/xlogutils.h" #include "backup/basebackup.h" #include "catalog/catversion.h" -@@ -2194,7 +2195,7 @@ XLogWrite(XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible) +@@ -2193,8 +2194,9 @@ XLogWrite(XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible) + else INSTR_TIME_SET_ZERO(start); ++ pgstat_report_wait_start(WAIT_EVENT_WAL_WRITE); - written = pg_pwrite(openLogFile, from, nleft, startoffset); + written = xlog_smgr->seg_write(openLogFile, from, nleft, startoffset); @@ -95,6 +85,27 @@ index becc2bda62..3d5010554f 100644 if (r != XLOG_BLCKSZ) { char fname[MAXFNAMELEN]; +diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c +index feff709435..f78eef1266 100644 +--- a/src/backend/replication/walreceiver.c ++++ b/src/backend/replication/walreceiver.c +@@ -57,6 +57,7 @@ + #include "access/xlog_internal.h" + #include "access/xlogarchive.h" + #include "access/xlogrecovery.h" ++#include "access/xlog_smgr.h" + #include "catalog/pg_authid.h" + #include "catalog/pg_type.h" + #include "common/ip.h" +@@ -936,7 +937,7 @@ XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr, TimeLineID tli) + /* OK to write the logs */ + errno = 0; + +- byteswritten = pg_pwrite(recvFile, buf, segbytes, (off_t) startoff); ++ byteswritten = xlog_smgr->seg_write(recvFile, buf, segbytes, (off_t) startoff); + if (byteswritten <= 0) + { + char xlogfname[MAXFNAMELEN]; diff --git a/src/include/access/xlog_internal.h b/src/include/access/xlog_internal.h index b0fd338a00..e42b131b36 100644 --- a/src/include/access/xlog_internal.h @@ -111,3 +122,34 @@ index b0fd338a00..e42b131b36 100644 #define XLogPageHeaderSize(hdr) \ (((hdr)->xlp_info & XLP_LONG_HEADER) ? SizeOfXLogLongPHD : SizeOfXLogShortPHD) +diff --git a/src/include/access/xlog_smgr.h b/src/include/access/xlog_smgr.h +new file mode 100644 +index 0000000000..e79e290ddc +--- /dev/null ++++ b/src/include/access/xlog_smgr.h +@@ -0,0 +1,24 @@ ++#ifndef XLOG_SMGR_H ++#define XLOG_SMGR_H ++ ++#include "postgres.h" ++ ++#include ++ ++/* XLog storage manager interface */ ++typedef struct XLogSmgr { ++ ssize_t (*seg_read) (int fd, void *buf, size_t count, off_t offset); ++ ++ ssize_t (*seg_write) (int fd, const void *buf, size_t count, off_t offset); ++} XLogSmgr; ++ ++/* Default (standard) XLog storage manager */ ++static const XLogSmgr xlog_smgr_standard = { ++ .seg_read = pg_pread, ++ .seg_write = pg_pwrite, ++}; ++ ++extern XLogSmgr *xlog_smgr; ++extern void SetXLogSmgr(XLogSmgr *xlsmgr); ++ ++#endif /* XLOG_SMGR_H */ +\ No newline at end of file