Skip to content

Commit 6a8bd08

Browse files
committed
Ensure decrypted internal keys never swapped
We have already had a locked memory for internal keys. But we read and created keys into usually palloced memory before copying them to the locked memory. This commit changes the approach. Now, the user must allocate (acquire) a locked memory first and use it as a buffer for decryption/creating the key. This locked memory is kinda generic, so the user requests an amount of bytes (rather than objects). Although currently, during the key search, we assume that everything the memory contains only `RelKeyCacheRec` objects. With two exceptions: 1) server creates its own locked page to store the WAL write key; 2) `pg_tde_perform_rotate_key` cheekily allocates a space there for the keys re-encryption but releases this memory when it's done. In future, if we switch to the full _map files encryption, we can gulp (mmap) the whole file into that memory. Fixes PG-1445
1 parent 2616e27 commit 6a8bd08

File tree

9 files changed

+205
-181
lines changed

9 files changed

+205
-181
lines changed

contrib/pg_tde/src/access/pg_tde_tdemap.c

+136-145
Large diffs are not rendered by default.

contrib/pg_tde/src/access/pg_tde_xlog_encrypt.c

+37-24
Original file line numberDiff line numberDiff line change
@@ -70,12 +70,7 @@ typedef struct EncryptionStateData
7070

7171
static EncryptionStateData *EncryptionState = NULL;
7272

73-
/* TODO: can be swapped out to the disk */
74-
static InternalKey EncryptionKey =
75-
{
76-
.rel_type = MAP_ENTRY_EMPTY,
77-
.start_lsn = InvalidXLogRecPtr,
78-
};
73+
static InternalKey *EncryptionKey = NULL;
7974
static void *EncryptionCryptCtx = NULL;
8075

8176
static int XLOGChooseNumBuffers(void);
@@ -165,48 +160,67 @@ TDEXLogWriteEncryptedPages(int fd, const void *buf, size_t count, off_t offset,
165160
TimeLineID tli, XLogSegNo segno)
166161
{
167162
char iv_prefix[16];
168-
InternalKey *key = &EncryptionKey;
169163
char *enc_buff = EncryptionState->segBuf;
170164

171165
Assert(count <= TDEXLogEncryptBuffSize());
172166

173167
#ifdef TDE_XLOG_DEBUG
174168
elog(DEBUG1, "write encrypted WAL, size: %lu, offset: %ld [%lX], seg: %X/%X, key_start_lsn: %X/%X",
175-
count, offset, offset, LSN_FORMAT_ARGS(segno), LSN_FORMAT_ARGS(key->start_lsn));
169+
count, offset, offset, LSN_FORMAT_ARGS(segno), LSN_FORMAT_ARGS(EncryptionKey->start_lsn));
176170
#endif
177171

178-
CalcXLogPageIVPrefix(tli, segno, key->base_iv, iv_prefix);
172+
CalcXLogPageIVPrefix(tli, segno, EncryptionKey->base_iv, iv_prefix);
179173
PG_TDE_ENCRYPT_DATA(iv_prefix, offset,
180174
(char *) buf, count,
181-
enc_buff, key, &EncryptionCryptCtx);
175+
enc_buff, EncryptionKey, &EncryptionCryptCtx);
182176

183177
return pg_pwrite(fd, enc_buff, count, offset);
184178
}
185179

186-
#endif /* !FRONTEND */
187-
188-
void
189-
TDEXLogSmgrInit(void)
180+
static void
181+
tde_init_wal_encryption_key(void)
190182
{
191-
#ifndef FRONTEND
192-
/* TODO: move to the separate func, it's not an SMGR init */
193183
InternalKey *key = pg_tde_read_last_wal_key();
194184

195185
/* TDOO: clean-up this mess */
196186
if ((!key && EncryptXLog) || (key &&
197187
((key->rel_type & TDE_KEY_TYPE_WAL_ENCRYPTED && !EncryptXLog) ||
198188
(key->rel_type & TDE_KEY_TYPE_WAL_UNENCRYPTED && EncryptXLog))))
199189
{
200-
pg_tde_create_wal_key(&EncryptionKey, &GLOBAL_SPACE_RLOCATOR(XLOG_TDE_OID),
201-
(EncryptXLog ? TDE_KEY_TYPE_WAL_ENCRYPTED : TDE_KEY_TYPE_WAL_UNENCRYPTED));
190+
key = pg_tde_create_wal_key(&GLOBAL_SPACE_RLOCATOR(XLOG_TDE_OID),
191+
(EncryptXLog ? TDE_KEY_TYPE_WAL_ENCRYPTED : TDE_KEY_TYPE_WAL_UNENCRYPTED));
202192
}
203193
else if (key)
204194
{
205-
EncryptionKey = *key;
206-
pfree(key);
207-
pg_atomic_write_u64(&EncryptionState->enc_key_lsn, EncryptionKey.start_lsn);
195+
pg_atomic_write_u64(&EncryptionState->enc_key_lsn, key->start_lsn);
208196
}
209197

198+
199+
/*
200+
* The relation cache might be moved to the new address space when growing
201+
* later on. It happens when there are a lot of tde_heap relations during
202+
* the recovery. So copy the key to the own locked mem. We could rewrite
203+
* the EncryptionKey pointer after the grow, but it'd be more complicated
204+
* and error-prone. We can sacrifice a couple of Kb.
205+
*
206+
* TODO: genralize and use `tde_alloc_safe_mem` instead?
207+
*/
208+
if (key)
209+
{
210+
long pageSize = sysconf(_SC_PAGESIZE);
211+
212+
EncryptionKey = (InternalKey *) MemoryContextAllocAligned(TopMemoryContext, pageSize, pageSize, MCXT_ALLOC_ZERO);
213+
memcpy(EncryptionKey, key, sizeof(InternalKey));
214+
}
215+
}
216+
217+
#endif /* !FRONTEND */
218+
219+
void
220+
TDEXLogInit(void)
221+
{
222+
#ifndef FRONTEND
223+
tde_init_wal_encryption_key();
210224
pg_tde_set_db_file_path(GLOBAL_SPACE_RLOCATOR(XLOG_TDE_OID).dbOid, EncryptionState->db_map_path);
211225

212226
#endif
@@ -224,15 +238,14 @@ tdeheap_xlog_seg_write(int fd, const void *buf, size_t count, off_t offset,
224238
*
225239
* This func called with WALWriteLock held, so no need in any extra sync.
226240
*/
227-
if (EncryptionKey.rel_type & TDE_KEY_TYPE_GLOBAL &&
228-
pg_atomic_read_u64(&EncryptionState->enc_key_lsn) == 0)
241+
if (EncryptionKey && pg_atomic_read_u64(&EncryptionState->enc_key_lsn) == 0)
229242
{
230243
XLogRecPtr lsn;
231244

232245
XLogSegNoOffsetToRecPtr(segno, offset, wal_segment_size, lsn);
233246

234247
pg_tde_wal_last_key_set_lsn(lsn, EncryptionState->db_map_path);
235-
EncryptionKey.start_lsn = lsn;
248+
EncryptionKey->start_lsn = lsn;
236249
pg_atomic_write_u64(&EncryptionState->enc_key_lsn, lsn);
237250
}
238251

contrib/pg_tde/src/include/access/pg_tde_tdemap.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ extern WALKeyCacheRec *pg_tde_get_wal_cache_keys(void);
100100
extern void pg_tde_wal_last_key_set_lsn(XLogRecPtr lsn, const char *keyfile_path);
101101

102102
extern InternalKey *pg_tde_create_smgr_key(const RelFileLocatorBackend *newrlocator);
103-
extern void pg_tde_create_wal_key(InternalKey *rel_key_data, const RelFileLocator *newrlocator, uint32 flags);
103+
extern InternalKey *pg_tde_create_wal_key(const RelFileLocator *newrlocator, uint32 flags);
104104
extern void pg_tde_free_key_map_entry(const RelFileLocator *rlocator, off_t offset);
105105
extern void pg_tde_write_key_map_entry_redo(const TDEMapEntry *write_map_entry, TDESignedPrincipalKeyInfo *signed_key_info);
106106

contrib/pg_tde/src/include/access/pg_tde_xlog_encrypt.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,6 @@
1313

1414
extern Size TDEXLogEncryptStateSize(void);
1515
extern void TDEXLogShmemInit(void);
16-
extern void TDEXLogSmgrInit(void);
16+
extern void TDEXLogInit(void);
1717

1818
#endif /* PG_TDE_XLOGENCRYPT_H */

contrib/pg_tde/src/pg_tde.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ tde_shmem_startup(void)
9292
AesInit();
9393

9494
TDEXLogShmemInit();
95-
TDEXLogSmgrInit();
95+
TDEXLogInit();
9696
}
9797

9898
void

contrib/pg_tde/src/smgr/pg_tde_smgr.c

+6-6
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ typedef struct TDESMgrRelationData
2121
struct _MdfdVec *md_seg_fds[MAX_FORKNUM + 1];
2222

2323
bool encrypted_relation;
24-
InternalKey relKey;
24+
InternalKey *relKey;
2525
} TDESMgrRelationData;
2626

2727
typedef TDESMgrRelationData *TDESMgrRelation;
@@ -81,7 +81,7 @@ tde_mdwritev(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
8181
const void **buffers, BlockNumber nblocks, bool skipFsync)
8282
{
8383
TDESMgrRelation tdereln = (TDESMgrRelation) reln;
84-
InternalKey *int_key = &tdereln->relKey;
84+
InternalKey *int_key = tdereln->relKey;
8585

8686
if (!tdereln->encrypted_relation)
8787
{
@@ -120,7 +120,7 @@ tde_mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
120120
const void *buffer, bool skipFsync)
121121
{
122122
TDESMgrRelation tdereln = (TDESMgrRelation) reln;
123-
InternalKey *int_key = &tdereln->relKey;
123+
InternalKey *int_key = tdereln->relKey;
124124

125125
if (!tdereln->encrypted_relation)
126126
{
@@ -149,7 +149,7 @@ tde_mdreadv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
149149
void **buffers, BlockNumber nblocks)
150150
{
151151
TDESMgrRelation tdereln = (TDESMgrRelation) reln;
152-
InternalKey *int_key = &tdereln->relKey;
152+
InternalKey *int_key = tdereln->relKey;
153153

154154
mdreadv(reln, forknum, blocknum, buffers, nblocks);
155155

@@ -236,7 +236,7 @@ tde_mdcreate(RelFileLocator relold, SMgrRelation reln, ForkNumber forknum, bool
236236
if (key)
237237
{
238238
tdereln->encrypted_relation = true;
239-
tdereln->relKey = *key;
239+
tdereln->relKey = key;
240240
}
241241
else
242242
{
@@ -257,7 +257,7 @@ tde_mdopen(SMgrRelation reln)
257257
if (key)
258258
{
259259
tdereln->encrypted_relation = true;
260-
tdereln->relKey = *key;
260+
tdereln->relKey = key;
261261
}
262262
else
263263
{

contrib/pg_tde/t/009_wal_encrypt.pl

+20-1
Original file line numberDiff line numberDiff line change
@@ -91,9 +91,28 @@
9191

9292
$stdout = $node->safe_psql('postgres', "SELECT pg_drop_replication_slot('tde_slot');", extra_params => ['-a']);
9393
PGTDE::append_to_file($stdout);
94+
PGTDE::append_to_file("-- server restart with wal encryption and recovery with a lot of tde_heap relations");
95+
$node->safe_psql('postgres',
96+
q{
97+
SELECT pg_tde_add_key_provider_file('file-keyring-010-2','/tmp/pg_tde_test_keyring010_2.per');
98+
SELECT pg_tde_set_principal_key('test-db-principal-key','file-keyring-010-2');
99+
100+
do $$
101+
DECLARE idx integer;
102+
begin
103+
for idx in 0..700 loop
104+
EXECUTE format('CREATE TABLE t%s (c1 int) USING tde_heap', idx);
105+
end loop;
106+
end; $$;
107+
});
108+
109+
$rt_value = $node->restart();
110+
ok($rt_value == 1, "Restart Server");
111+
112+
# TODO: add WAL content testing after the wal rework
94113

95114
# DROP EXTENSION
96-
$stdout = $node->safe_psql('postgres', 'DROP EXTENSION pg_tde;', extra_params => ['-a']);
115+
$stdout = $node->safe_psql('postgres', 'DROP EXTENSION pg_tde CASCADE;', extra_params => ['-a']);
97116
PGTDE::append_to_file($stdout);
98117
# Stop the server
99118
$node->stop();

contrib/pg_tde/t/expected/009_wal_encrypt.out

+2-1
Original file line numberDiff line numberDiff line change
@@ -38,4 +38,5 @@ table public.test_wal: INSERT: id[integer]:6 k[integer]:6
3838
COMMIT 742
3939
SELECT pg_drop_replication_slot('tde_slot');
4040

41-
DROP EXTENSION pg_tde;
41+
-- server restart with wal encryption and recovery with a lot of tde_heap relations
42+
DROP EXTENSION pg_tde CASCADE;

src/bin/pg_waldump/pg_waldump.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -1156,7 +1156,7 @@ main(int argc, char **argv)
11561156
if (kringdir != NULL)
11571157
{
11581158
pg_tde_fe_init(kringdir);
1159-
TDEXLogSmgrInit();
1159+
TDEXLogInit();
11601160
}
11611161
#endif
11621162

0 commit comments

Comments
 (0)