Skip to content

Commit 88de2ff

Browse files
committed
TLI
1 parent 54d131a commit 88de2ff

File tree

8 files changed

+131
-25
lines changed

8 files changed

+131
-25
lines changed

contrib/pg_tde/src/access/pg_tde_tdemap.c

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
#define TDE_FILE_HEADER_SIZE sizeof(TDEFileHeader)
4545

4646
#define MaxXLogRecPtr (~(XLogRecPtr)0)
47+
#define MaxTimeLineID (~(TimeLineID)0)
4748

4849
typedef struct TDEFileHeader
4950
{
@@ -369,13 +370,19 @@ pg_tde_delete_principal_key(Oid dbOid)
369370
* needs keyfile_path
370371
*/
371372
void
372-
pg_tde_wal_last_key_set_lsn(XLogRecPtr lsn, const char *keyfile_path)
373+
pg_tde_wal_last_key_set_lsn(XLogRecPtr lsn, TimeLineID tli, const char *keyfile_path)
373374
{
374375
LWLock *lock_pk = tde_lwlock_enc_keys();
375376
int fd;
376377
off_t read_pos,
377378
write_pos,
378379
last_key_idx;
380+
struct
381+
{
382+
XLogRecPtr start_lsn;
383+
TimeLineID tli;
384+
} lsn_tli;
385+
379386

380387
LWLockAcquire(lock_pk, LW_EXCLUSIVE);
381388

@@ -384,7 +391,10 @@ pg_tde_wal_last_key_set_lsn(XLogRecPtr lsn, const char *keyfile_path)
384391
last_key_idx = ((lseek(fd, 0, SEEK_END) - TDE_FILE_HEADER_SIZE) / MAP_ENTRY_SIZE) - 1;
385392
write_pos = TDE_FILE_HEADER_SIZE + (last_key_idx * MAP_ENTRY_SIZE) + offsetof(TDEMapEntry, enc_key) + offsetof(InternalKey, start_lsn);
386393

387-
if (pg_pwrite(fd, &lsn, sizeof(XLogRecPtr), write_pos) != sizeof(XLogRecPtr))
394+
lsn_tli.start_lsn = lsn;
395+
lsn_tli.tli = tli;
396+
397+
if (pg_pwrite(fd, &lsn_tli, sizeof(lsn_tli), write_pos) != sizeof(lsn_tli))
388398
{
389399
ereport(ERROR,
390400
errcode_for_file_access(),
@@ -408,7 +418,7 @@ pg_tde_wal_last_key_set_lsn(XLogRecPtr lsn, const char *keyfile_path)
408418
errmsg("could not read previous WAL key: %m"));
409419
}
410420

411-
if (prev_map_entry.enc_key.start_lsn >= lsn)
421+
if (prev_map_entry.enc_key.start_lsn >= lsn && prev_map_entry.enc_key.tli >= tli)
412422
{
413423
prev_map_entry.enc_key.type = TDE_KEY_TYPE_WAL_INVALID;
414424

@@ -1071,6 +1081,7 @@ pg_tde_fetch_wal_keys(XLogRecPtr start_lsn)
10711081
WALKeyCacheRec *wal_rec;
10721082
InternalKey stub_key = {
10731083
.start_lsn = InvalidXLogRecPtr,
1084+
.tli = 0,
10741085
};
10751086

10761087
wal_rec = pg_tde_add_wal_key_to_cache(&stub_key, InvalidXLogRecPtr);
@@ -1132,8 +1143,10 @@ pg_tde_add_wal_key_to_cache(InternalKey *key, XLogRecPtr start_lsn)
11321143
MemoryContextSwitchTo(oldCtx);
11331144
#endif
11341145

1146+
wal_rec->start_tli = key->tli;
11351147
wal_rec->start_lsn = start_lsn;
11361148
wal_rec->end_lsn = MaxXLogRecPtr;
1149+
wal_rec->end_tli = MaxTimeLineID;
11371150
wal_rec->key = *key;
11381151
wal_rec->crypt_ctx = NULL;
11391152
if (!tde_wal_key_last_rec)
@@ -1145,6 +1158,7 @@ pg_tde_add_wal_key_to_cache(InternalKey *key, XLogRecPtr start_lsn)
11451158
{
11461159
tde_wal_key_last_rec->next = wal_rec;
11471160
tde_wal_key_last_rec->end_lsn = wal_rec->start_lsn;
1161+
tde_wal_key_last_rec->end_tli = wal_rec->start_tli;
11481162
tde_wal_key_last_rec = wal_rec;
11491163
}
11501164

contrib/pg_tde/src/access/pg_tde_xlog_smgr.c

Lines changed: 54 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ typedef struct EncryptionStateData
6767
{
6868
char db_map_path[MAXPGPATH];
6969
pg_atomic_uint64 enc_key_lsn; /* to sync with readers */
70+
pg_atomic_uint64 enc_key_tli; /* to sync with readers */
7071
} EncryptionStateData;
7172

7273
static EncryptionStateData *EncryptionState = NULL;
@@ -85,6 +86,18 @@ TDEXLogSetEncKeyLsn(XLogRecPtr start_lsn)
8586
pg_atomic_write_u64(&EncryptionState->enc_key_lsn, start_lsn);
8687
}
8788

89+
static TimeLineID
90+
TDEXLogGetEncKeyTli()
91+
{
92+
return (TimeLineID) pg_atomic_read_u64(&EncryptionState->enc_key_tli);
93+
}
94+
95+
static void
96+
TDEXLogSetEncKeyTli(TimeLineID tli)
97+
{
98+
pg_atomic_write_u64(&EncryptionState->enc_key_tli, tli);
99+
}
100+
88101
static Size TDEXLogEncryptBuffSize(void);
89102

90103
static int XLOGChooseNumBuffers(void);
@@ -159,6 +172,7 @@ TDEXLogShmemInit(void)
159172
}
160173

161174
pg_atomic_init_u64(&EncryptionState->enc_key_lsn, 0);
175+
pg_atomic_init_u64(&EncryptionState->enc_key_tli, 0);
162176

163177
elog(DEBUG1, "pg_tde: initialized encryption buffer %lu bytes", TDEXLogEncryptStateSize());
164178
}
@@ -169,6 +183,7 @@ typedef struct EncryptionStateData
169183
{
170184
char db_map_path[MAXPGPATH];
171185
XLogRecPtr enc_key_lsn; /* to sync with reader */
186+
XLogRecPtr enc_key_tli; /* to sync with reader */
172187
} EncryptionStateData;
173188

174189
static EncryptionStateData EncryptionStateD = {0};
@@ -186,7 +201,19 @@ TDEXLogGetEncKeyLsn()
186201
static void
187202
TDEXLogSetEncKeyLsn(XLogRecPtr start_lsn)
188203
{
189-
EncryptionState->enc_key_lsn = EncryptionKey.start_lsn;
204+
EncryptionState->enc_key_lsn = start_lsn;
205+
}
206+
207+
static TimeLineID
208+
TDEXLogGetEncKeyTli()
209+
{
210+
return (TimeLineID) EncryptionState->enc_key_tli;
211+
}
212+
213+
static void
214+
TDEXLogSetEncKeyTli(TimeLineID tli)
215+
{
216+
EncryptionState->enc_key_lsn = tli;
190217
}
191218

192219
#endif /* FRONTEND */
@@ -221,6 +248,7 @@ TDEXLogSmgrInitWrite(bool encrypt_xlog)
221248
{
222249
EncryptionKey = *key;
223250
TDEXLogSetEncKeyLsn(EncryptionKey.start_lsn);
251+
TDEXLogSetEncKeyTli(EncryptionKey.tli);
224252
}
225253

226254
if (key)
@@ -245,8 +273,8 @@ TDEXLogWriteEncryptedPages(int fd, const void *buf, size_t count, off_t offset,
245273
#endif
246274

247275
#ifdef TDE_XLOG_DEBUG
248-
elog(DEBUG1, "write encrypted WAL, size: %lu, offset: %ld [%lX], seg: %X/%X, key_start_lsn: %X/%X",
249-
count, offset, offset, LSN_FORMAT_ARGS(segno), LSN_FORMAT_ARGS(key->start_lsn));
276+
elog(DEBUG1, "write encrypted WAL, size: %lu, offset: %ld [%lX] tli %u, seg: %X/%X, key_start: %u_%X/%X",
277+
count, offset, offset, tli, LSN_FORMAT_ARGS(segno), key->tli, LSN_FORMAT_ARGS(key->start_lsn));
250278
#endif
251279

252280
CalcXLogPageIVPrefix(tli, segno, key->base_iv, iv_prefix);
@@ -272,9 +300,11 @@ tdeheap_xlog_seg_write(int fd, const void *buf, size_t count, off_t offset,
272300

273301
XLogSegNoOffsetToRecPtr(segno, offset, segSize, lsn);
274302

275-
pg_tde_wal_last_key_set_lsn(lsn, EncryptionState->db_map_path);
303+
pg_tde_wal_last_key_set_lsn(lsn, tli, EncryptionState->db_map_path);
276304
EncryptionKey.start_lsn = lsn;
305+
EncryptionKey.tli = tli;
277306
TDEXLogSetEncKeyLsn(lsn);
307+
TDEXLogSetEncKeyTli(tli);
278308
}
279309

280310
if (EncryptionKey.type == TDE_KEY_TYPE_WAL_ENCRYPTED)
@@ -293,8 +323,8 @@ tdeheap_xlog_seg_read(int fd, void *buf, size_t count, off_t offset,
293323
ssize_t readsz;
294324

295325
#ifdef TDE_XLOG_DEBUG
296-
elog(DEBUG1, "read from a WAL segment, size: %lu offset: %ld [%lX], seg: %X/%X",
297-
count, offset, offset, LSN_FORMAT_ARGS(segno));
326+
elog(DEBUG1, "read from a WAL segment, size: %lu offset: %ld [%lX], tli: %u, seg: %X/%X",
327+
count, offset, offset, tli, LSN_FORMAT_ARGS(segno));
298328
#endif
299329

300330
readsz = pg_pread(fd, buf, count, offset);
@@ -318,6 +348,8 @@ TDEXLogCryptBuffer(void *buf, size_t count, off_t offset,
318348
XLogRecPtr write_key_lsn;
319349
XLogRecPtr data_start;
320350
XLogRecPtr data_end;
351+
KeyTliLsn data_start_t = {.tli = tli};
352+
KeyTliLsn data_end_t = {.tli = tli};
321353

322354
if (!keys)
323355
{
@@ -330,11 +362,14 @@ TDEXLogCryptBuffer(void *buf, size_t count, off_t offset,
330362
if (!XLogRecPtrIsInvalid(write_key_lsn))
331363
{
332364
WALKeyCacheRec *last_key = pg_tde_get_last_wal_key();
365+
KeyTliLsn last_key_time = {.tli = last_key->start_tli, .lsn = last_key->start_lsn};
366+
KeyTliLsn write_key_time = {.tli = TDEXLogGetEncKeyTli(), .lsn = write_key_lsn};
333367

334368
Assert(last_key);
335369

336370
/* write has generated a new key, need to fetch it */
337-
if (last_key->start_lsn < write_key_lsn)
371+
if (key_tli_lsn_cmp(last_key_time, write_key_time) == -1)
372+
// if (last_key->start_lsn < write_key_lsn)
338373
{
339374
pg_tde_fetch_wal_keys(write_key_lsn);
340375

@@ -346,16 +381,22 @@ TDEXLogCryptBuffer(void *buf, size_t count, off_t offset,
346381
XLogSegNoOffsetToRecPtr(segno, offset, segSize, data_start);
347382
XLogSegNoOffsetToRecPtr(segno, offset + count, segSize, data_end);
348383

384+
data_start_t.lsn = data_start;
385+
data_end_t.lsn = data_end;
386+
349387
/*
350388
* TODO: this is higly ineffective. We should get rid of linked list and
351389
* search from the last key as this is what the walsender is useing.
352390
*/
353391
for (WALKeyCacheRec *curr_key = keys; curr_key != NULL; curr_key = curr_key->next)
354392
{
393+
KeyTliLsn key_start_t = {.lsn = curr_key->start_lsn, .tli = curr_key->start_tli};
394+
KeyTliLsn key_end_t = {.lsn = curr_key->end_lsn, .tli = curr_key->end_tli};
395+
355396
#ifdef TDE_XLOG_DEBUG
356-
elog(DEBUG1, "WAL key %X/%X-%X/%X, encrypted: %s",
357-
LSN_FORMAT_ARGS(curr_key->start_lsn),
358-
LSN_FORMAT_ARGS(curr_key->end_lsn),
397+
elog(DEBUG1, "WAL key %u_%X/%X - %u_%X/%X, encrypted: %s",
398+
curr_key->start_tli, LSN_FORMAT_ARGS(curr_key->start_lsn),
399+
curr_key->end_tli, LSN_FORMAT_ARGS(curr_key->end_lsn),
359400
curr_key->key.type == TDE_KEY_TYPE_WAL_ENCRYPTED ? "yes" : "no");
360401
#endif
361402

@@ -366,7 +407,7 @@ TDEXLogCryptBuffer(void *buf, size_t count, off_t offset,
366407
* Check if the key's range overlaps with the buffer's and decypt
367408
* the part that does.
368409
*/
369-
if (data_start < curr_key->end_lsn && data_end > curr_key->start_lsn)
410+
if (key_tli_lsn_cmp(data_start_t, key_end_t) == -1 && key_tli_lsn_cmp(data_end_t, key_start_t) == 1)
370411
{
371412
char iv_prefix[16];
372413
off_t dec_off = XLogSegmentOffset(Max(data_start, curr_key->start_lsn), segSize);
@@ -387,8 +428,8 @@ TDEXLogCryptBuffer(void *buf, size_t count, off_t offset,
387428
dec_sz = dec_end - dec_off;
388429

389430
#ifdef TDE_XLOG_DEBUG
390-
elog(DEBUG1, "decrypt WAL, dec_off: %lu [buff_off %lu], sz: %lu | key %X/%X",
391-
dec_off, dec_off - offset, dec_sz, LSN_FORMAT_ARGS(curr_key->key->start_lsn));
431+
elog(DEBUG1, "decrypt WAL, dec_off: %lu [buff_off %lu] tli %u, sz: %lu | key %u_%X/%X",
432+
dec_off, dec_off - offset, tli, dec_sz, curr_key->key.tli, LSN_FORMAT_ARGS(curr_key->start_lsn));
392433
#endif
393434
pg_tde_stream_crypt(iv_prefix, dec_off, dec_buf, dec_sz, dec_buf,
394435
&curr_key->key, &curr_key->crypt_ctx);

contrib/pg_tde/src/include/access/pg_tde_tdemap.h

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,33 @@ typedef struct InternalKey
2525
uint32 type;
2626

2727
XLogRecPtr start_lsn;
28+
TimeLineID tli;
2829
} InternalKey;
2930

31+
typedef struct KeyTliLsn
32+
{
33+
TimeLineID tli;
34+
XLogRecPtr lsn;
35+
} KeyTliLsn;
36+
37+
static inline int
38+
key_tli_lsn_cmp(KeyTliLsn t1, KeyTliLsn t2)
39+
{
40+
if (t1.tli < t2.tli)
41+
return -1;
42+
43+
if (t1.tli > t2.tli)
44+
return 1;
45+
46+
if (t1.lsn < t2.lsn)
47+
return -1;
48+
49+
if (t1.lsn > t2.lsn)
50+
return 1;
51+
52+
return 0;
53+
}
54+
3055
#define MAP_ENTRY_IV_SIZE 16
3156
#define MAP_ENTRY_AEAD_TAG_SIZE 16
3257

@@ -62,6 +87,8 @@ typedef struct WALKeyCacheRec
6287
{
6388
XLogRecPtr start_lsn;
6489
XLogRecPtr end_lsn;
90+
TimeLineID start_tli;
91+
TimeLineID end_tli;
6592

6693
InternalKey key;
6794
void *crypt_ctx;
@@ -73,7 +100,7 @@ extern InternalKey *pg_tde_read_last_wal_key(void);
73100
extern WALKeyCacheRec *pg_tde_get_last_wal_key(void);
74101
extern WALKeyCacheRec *pg_tde_fetch_wal_keys(XLogRecPtr start_lsn);
75102
extern WALKeyCacheRec *pg_tde_get_wal_cache_keys(void);
76-
extern void pg_tde_wal_last_key_set_lsn(XLogRecPtr lsn, const char *keyfile_path);
103+
extern void pg_tde_wal_last_key_set_lsn(XLogRecPtr lsn, TimeLineID tli, const char *keyfile_path);
77104
extern void pg_tde_create_wal_key(InternalKey *rel_key_data, const RelFileLocator *newrlocator, TDEMapEntryType entry_type);
78105

79106
#define PG_TDE_MAP_FILENAME "%d_keys"

contrib/pg_tde/t/RewindTest.pm

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ use PostgreSQL::Test::Cluster;
4444
use PostgreSQL::Test::RecursiveCopy;
4545
use PostgreSQL::Test::Utils;
4646
use Test::More;
47+
use pgtde;
4748

4849
our @EXPORT = qw(
4950
$node_primary
@@ -199,7 +200,7 @@ sub create_standby
199200
$node_standby =
200201
PostgreSQL::Test::Cluster->new(
201202
'standby' . ($extra_name ? "_${extra_name}" : ''));
202-
$node_primary->backup('my_backup');
203+
PGTDE::backup($node_primary, 'my_backup');
203204
$node_standby->init_from_backup($node_primary, 'my_backup');
204205
my $connstr_primary = $node_primary->connstr();
205206

contrib/pg_tde/t/pgtde.pm

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,4 +108,17 @@ sub compare_results
108108
return compare($expected_filename_with_path, $out_filename_with_path);
109109
}
110110

111+
sub backup
112+
{
113+
my ($node, $backup_name, %params) = @_;
114+
my $backup_dir = $node->backup_dir . '/'. $backup_name;
115+
116+
mkdir $backup_dir;
117+
118+
PostgreSQL::Test::RecursiveCopy::copypath($node->data_dir . '/pg_tde',
119+
$backup_dir . '/pg_tde');
120+
121+
$node->backup($backup_name, %params);
122+
}
123+
111124
1;

contrib/pg_tde/t/standby_source.pl

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
use File::Copy;
3535
use PostgreSQL::Test::Cluster;
3636
use RewindTest;
37+
use pgtde;
3738

3839
my $tmp_folder = PostgreSQL::Test::Utils::tempdir;
3940

@@ -57,15 +58,19 @@
5758
# Set up node B and C, as cascaded standbys
5859
#
5960
# A (primary) <--- B (standby) <--- C (standby)
60-
$node_a->backup('my_backup');
61+
my $backup_name = 'my_backup';
62+
63+
PGTDE::backup(node_a, $backup_name);
6164
$node_b = PostgreSQL::Test::Cluster->new('node_b');
62-
$node_b->init_from_backup($node_a, 'my_backup', has_streaming => 1);
65+
$node_b->init_from_backup($node_a, $backup_name, has_streaming => 1);
6366
$node_b->set_standby_mode();
6467
$node_b->start;
6568

66-
$node_b->backup('my_backup');
69+
PostgreSQL::Test::RecursiveCopy::copypath($node_b->data_dir . '/pg_tde',
70+
$node_b->backup_dir . '/'. $backup_name . '/pg_tde');
71+
PGTDE::backup(node_b, $backup_name);
6772
$node_c = PostgreSQL::Test::Cluster->new('node_c');
68-
$node_c->init_from_backup($node_b, 'my_backup', has_streaming => 1);
73+
$node_c->init_from_backup($node_b, $backup_name, has_streaming => 1);
6974
$node_c->set_standby_mode();
7075
$node_c->start;
7176

src/bin/pg_basebackup/pg_basebackup.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -634,9 +634,6 @@ StartLogStreamer(char *startpos, uint32 timeline, char *sysidentifier,
634634
uint32 hi,
635635
lo;
636636
char statusdir[MAXPGPATH];
637-
#ifdef PERCONA_EXT
638-
char tdedir[MAXPGPATH];
639-
#endif
640637

641638
param = pg_malloc0(sizeof(logstreamer_param));
642639
param->timeline = timeline;
@@ -671,9 +668,13 @@ StartLogStreamer(char *startpos, uint32 timeline, char *sysidentifier,
671668
"pg_xlog" : "pg_wal");
672669

673670
#ifdef PERCONA_EXT
671+
{
672+
char tdedir[MAXPGPATH];
673+
674674
snprintf(tdedir, sizeof(tdedir), "%s/%s", basedir, PG_TDE_DATA_DIR);
675675
pg_tde_fe_init(tdedir);
676676
TDEXLogSmgrInit();
677+
}
677678
#endif
678679

679680
/* Temporary replication slots are only supported in 10 and newer */

0 commit comments

Comments
 (0)