Skip to content

Commit 2fe62ae

Browse files
committed
TLI
1 parent 9fb23a2 commit 2fe62ae

File tree

8 files changed

+105
-24
lines changed

8 files changed

+105
-24
lines changed

contrib/pg_tde/src/access/pg_tde_tdemap.c

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
#define TDE_FILE_HEADER_SIZE sizeof(TDEFileHeader)
4545

4646
#define MaxXLogRecPtr (~(XLogRecPtr)0)
47+
#define MaxTimeLineID (~(TimeLineID)0)
4748

4849
typedef struct TDEFileHeader
4950
{
@@ -546,13 +547,19 @@ pg_tde_delete_principal_key(Oid dbOid)
546547
* needs keyfile_path
547548
*/
548549
void
549-
pg_tde_wal_last_key_set_lsn(XLogRecPtr lsn, const char *keyfile_path)
550+
pg_tde_wal_last_key_set_lsn(XLogRecPtr lsn, TimeLineID tli, const char *keyfile_path)
550551
{
551552
LWLock *lock_pk = tde_lwlock_enc_keys();
552553
int fd;
553554
off_t read_pos,
554555
write_pos,
555556
last_key_idx;
557+
struct
558+
{
559+
XLogRecPtr start_lsn;
560+
TimeLineID tli;
561+
} lsn_tli;
562+
556563

557564
LWLockAcquire(lock_pk, LW_EXCLUSIVE);
558565

@@ -561,7 +568,10 @@ pg_tde_wal_last_key_set_lsn(XLogRecPtr lsn, const char *keyfile_path)
561568
last_key_idx = ((lseek(fd, 0, SEEK_END) - TDE_FILE_HEADER_SIZE) / MAP_ENTRY_SIZE) - 1;
562569
write_pos = TDE_FILE_HEADER_SIZE + (last_key_idx * MAP_ENTRY_SIZE) + offsetof(TDEMapEntry, enc_key) + offsetof(InternalKey, start_lsn);
563570

564-
if (pg_pwrite(fd, &lsn, sizeof(XLogRecPtr), write_pos) != sizeof(XLogRecPtr))
571+
lsn_tli.start_lsn = lsn;
572+
lsn_tli.tli = tli;
573+
574+
if (pg_pwrite(fd, &lsn_tli, sizeof(lsn_tli), write_pos) != sizeof(lsn_tli))
565575
{
566576
ereport(ERROR,
567577
errcode_for_file_access(),
@@ -585,7 +595,7 @@ pg_tde_wal_last_key_set_lsn(XLogRecPtr lsn, const char *keyfile_path)
585595
errmsg("could not read previous WAL key: %m"));
586596
}
587597

588-
if (prev_map_entry.enc_key.start_lsn >= lsn)
598+
if (prev_map_entry.enc_key.start_lsn >= lsn && prev_map_entry.enc_key.tli >= tli)
589599
{
590600
prev_map_entry.enc_key.type = TDE_KEY_TYPE_WAL_INVALID;
591601

@@ -1072,6 +1082,7 @@ pg_tde_fetch_wal_keys(XLogRecPtr start_lsn)
10721082
WALKeyCacheRec *wal_rec;
10731083
InternalKey stub_key = {
10741084
.start_lsn = InvalidXLogRecPtr,
1085+
.tli = 0,
10751086
};
10761087

10771088
wal_rec = pg_tde_add_wal_key_to_cache(&stub_key, InvalidXLogRecPtr);
@@ -1133,8 +1144,10 @@ pg_tde_add_wal_key_to_cache(InternalKey *key, XLogRecPtr start_lsn)
11331144
MemoryContextSwitchTo(oldCtx);
11341145
#endif
11351146

1147+
wal_rec->start_tli = key->tli;
11361148
wal_rec->start_lsn = start_lsn;
11371149
wal_rec->end_lsn = MaxXLogRecPtr;
1150+
wal_rec->end_tli = MaxTimeLineID;
11381151
wal_rec->key = *key;
11391152
wal_rec->crypt_ctx = NULL;
11401153
if (!tde_wal_key_last_rec)
@@ -1146,6 +1159,7 @@ pg_tde_add_wal_key_to_cache(InternalKey *key, XLogRecPtr start_lsn)
11461159
{
11471160
tde_wal_key_last_rec->next = wal_rec;
11481161
tde_wal_key_last_rec->end_lsn = wal_rec->start_lsn;
1162+
tde_wal_key_last_rec->end_tli = wal_rec->start_tli;
11491163
tde_wal_key_last_rec = wal_rec;
11501164
}
11511165

contrib/pg_tde/src/access/pg_tde_xlog_smgr.c

Lines changed: 28 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ typedef struct EncryptionStateData
5858
char *segBuf;
5959
char db_map_path[MAXPGPATH];
6060
pg_atomic_uint64 enc_key_lsn; /* to sync with readers */
61+
pg_atomic_uint64 enc_key_tli; /* to sync with readers */
6162
} EncryptionStateData;
6263

6364
static EncryptionStateData *EncryptionState = NULL;
@@ -145,6 +146,7 @@ TDEXLogShmemInit(void)
145146
}
146147

147148
pg_atomic_init_u64(&EncryptionState->enc_key_lsn, 0);
149+
pg_atomic_init_u64(&EncryptionState->enc_key_tli, 0);
148150

149151
elog(DEBUG1, "pg_tde: initialized encryption buffer %lu bytes", TDEXLogEncryptStateSize());
150152
}
@@ -163,8 +165,8 @@ TDEXLogWriteEncryptedPages(int fd, const void *buf, size_t count, off_t offset,
163165
Assert(count <= TDEXLogEncryptBuffSize());
164166

165167
#ifdef TDE_XLOG_DEBUG
166-
elog(DEBUG1, "write encrypted WAL, size: %lu, offset: %ld [%lX], seg: %X/%X, key_start_lsn: %X/%X",
167-
count, offset, offset, LSN_FORMAT_ARGS(segno), LSN_FORMAT_ARGS(key->start_lsn));
168+
elog(DEBUG1, "write encrypted WAL, size: %lu, offset: %ld [%lX] tli %u, seg: %X/%X, key_start: %u_%X/%X",
169+
count, offset, offset, tli, LSN_FORMAT_ARGS(segno), key->tli, LSN_FORMAT_ARGS(key->start_lsn));
168170
#endif
169171

170172
CalcXLogPageIVPrefix(tli, segno, key->base_iv, iv_prefix);
@@ -203,6 +205,7 @@ TDEXLogSmgrInit(void)
203205
{
204206
EncryptionKey = *key;
205207
pg_atomic_write_u64(&EncryptionState->enc_key_lsn, EncryptionKey.start_lsn);
208+
pg_atomic_write_u64(&EncryptionState->enc_key_tli, EncryptionKey.tli);
206209
}
207210

208211
if (key)
@@ -232,9 +235,11 @@ tdeheap_xlog_seg_write(int fd, const void *buf, size_t count, off_t offset,
232235

233236
XLogSegNoOffsetToRecPtr(segno, offset, wal_segment_size, lsn);
234237

235-
pg_tde_wal_last_key_set_lsn(lsn, EncryptionState->db_map_path);
238+
pg_tde_wal_last_key_set_lsn(lsn, tli, EncryptionState->db_map_path);
236239
EncryptionKey.start_lsn = lsn;
240+
EncryptionKey.tli = tli;
237241
pg_atomic_write_u64(&EncryptionState->enc_key_lsn, lsn);
242+
pg_atomic_write_u64(&EncryptionState->enc_key_tli, tli);
238243
}
239244

240245
if (EncryptXLog)
@@ -254,8 +259,8 @@ tdeheap_xlog_seg_read(int fd, void *buf, size_t count, off_t offset,
254259
ssize_t readsz;
255260

256261
#ifdef TDE_XLOG_DEBUG
257-
elog(DEBUG1, "read from a WAL segment, size: %lu offset: %ld [%lX], seg: %X/%X",
258-
count, offset, offset, LSN_FORMAT_ARGS(segno));
262+
elog(DEBUG1, "read from a WAL segment, size: %lu offset: %ld [%lX], tli: %u, seg: %X/%X",
263+
count, offset, offset, tli, LSN_FORMAT_ARGS(segno));
259264
#endif
260265

261266
readsz = pg_pread(fd, buf, count, offset);
@@ -279,6 +284,8 @@ TDEXLogCryptBuffer(void *buf, size_t count, off_t offset,
279284
XLogRecPtr write_key_lsn;
280285
XLogRecPtr data_start;
281286
XLogRecPtr data_end;
287+
KeyTliLsn data_start_t = {.tli = tli};
288+
KeyTliLsn data_end_t = {.tli = tli};
282289

283290
if (!keys)
284291
{
@@ -292,11 +299,14 @@ TDEXLogCryptBuffer(void *buf, size_t count, off_t offset,
292299
if (!XLogRecPtrIsInvalid(write_key_lsn))
293300
{
294301
WALKeyCacheRec *last_key = pg_tde_get_last_wal_key();
302+
KeyTliLsn last_key_time = {.tli = last_key->start_tli, .lsn = last_key->start_lsn};
303+
KeyTliLsn write_key_time = {.tli = pg_atomic_read_u64(&EncryptionState->enc_key_tli), .lsn = write_key_lsn};
295304

296305
Assert(last_key);
297306

298307
/* write has generated a new key, need to fetch it */
299-
if (last_key->start_lsn < write_key_lsn)
308+
if (key_tli_lsn_cmp(last_key_time, write_key_time) == -1)
309+
// if (last_key->start_lsn < write_key_lsn)
300310
{
301311
pg_tde_fetch_wal_keys(write_key_lsn);
302312

@@ -309,16 +319,22 @@ TDEXLogCryptBuffer(void *buf, size_t count, off_t offset,
309319
XLogSegNoOffsetToRecPtr(segno, offset, segSize, data_start);
310320
XLogSegNoOffsetToRecPtr(segno, offset + count, segSize, data_end);
311321

322+
data_start_t.lsn = data_start;
323+
data_end_t.lsn = data_end;
324+
312325
/*
313326
* TODO: this is higly ineffective. We should get rid of linked list and
314327
* search from the last key as this is what the walsender is useing.
315328
*/
316329
for (WALKeyCacheRec *curr_key = keys; curr_key != NULL; curr_key = curr_key->next)
317330
{
331+
KeyTliLsn key_start_t = {.lsn = curr_key->start_lsn, .tli = curr_key->start_tli};
332+
KeyTliLsn key_end_t = {.lsn = curr_key->end_lsn, .tli = curr_key->end_tli};
333+
318334
#ifdef TDE_XLOG_DEBUG
319-
elog(DEBUG1, "WAL key %X/%X-%X/%X, encrypted: %s",
320-
LSN_FORMAT_ARGS(curr_key->start_lsn),
321-
LSN_FORMAT_ARGS(curr_key->end_lsn),
335+
elog(DEBUG1, "WAL key %u_%X/%X - %u_%X/%X, encrypted: %s",
336+
curr_key->start_tli, LSN_FORMAT_ARGS(curr_key->start_lsn),
337+
curr_key->end_tli, LSN_FORMAT_ARGS(curr_key->end_lsn),
322338
curr_key->key.type == TDE_KEY_TYPE_WAL_ENCRYPTED ? "yes" : "no");
323339
#endif
324340

@@ -329,7 +345,7 @@ TDEXLogCryptBuffer(void *buf, size_t count, off_t offset,
329345
* Check if the key's range overlaps with the buffer's and decypt
330346
* the part that does.
331347
*/
332-
if (data_start < curr_key->end_lsn && data_end > curr_key->start_lsn)
348+
if (key_tli_lsn_cmp(data_start_t, key_end_t) == -1 && key_tli_lsn_cmp(data_end_t, key_start_t) == 1)
333349
{
334350
char iv_prefix[16];
335351
off_t dec_off = XLogSegmentOffset(Max(data_start, curr_key->start_lsn), segSize);
@@ -350,8 +366,8 @@ TDEXLogCryptBuffer(void *buf, size_t count, off_t offset,
350366
dec_sz = dec_end - dec_off;
351367

352368
#ifdef TDE_XLOG_DEBUG
353-
elog(DEBUG1, "decrypt WAL, dec_off: %lu [buff_off %lu], sz: %lu | key %X/%X",
354-
dec_off, dec_off - offset, dec_sz, LSN_FORMAT_ARGS(curr_key->key->start_lsn));
369+
elog(DEBUG1, "decrypt WAL, dec_off: %lu [buff_off %lu] tli %u, sz: %lu | key %u_%X/%X",
370+
dec_off, dec_off - offset, tli, dec_sz, curr_key->key.tli, LSN_FORMAT_ARGS(curr_key->start_lsn));
355371
#endif
356372
pg_tde_stream_crypt(iv_prefix, dec_off, dec_buf, dec_sz, dec_buf,
357373
&curr_key->key, &curr_key->crypt_ctx);

contrib/pg_tde/src/include/access/pg_tde_tdemap.h

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,33 @@ typedef struct InternalKey
2525
uint32 type;
2626

2727
XLogRecPtr start_lsn;
28+
TimeLineID tli;
2829
} InternalKey;
2930

31+
typedef struct KeyTliLsn
32+
{
33+
TimeLineID tli;
34+
XLogRecPtr lsn;
35+
} KeyTliLsn;
36+
37+
static inline int
38+
key_tli_lsn_cmp(KeyTliLsn t1, KeyTliLsn t2)
39+
{
40+
if (t1.tli < t2.tli)
41+
return -1;
42+
43+
if (t1.tli > t2.tli)
44+
return 1;
45+
46+
if (t1.lsn < t2.lsn)
47+
return -1;
48+
49+
if (t1.lsn > t2.lsn)
50+
return 1;
51+
52+
return 0;
53+
}
54+
3055
#define MAP_ENTRY_IV_SIZE 16
3156
#define MAP_ENTRY_AEAD_TAG_SIZE 16
3257

@@ -62,6 +87,8 @@ typedef struct WALKeyCacheRec
6287
{
6388
XLogRecPtr start_lsn;
6489
XLogRecPtr end_lsn;
90+
TimeLineID start_tli;
91+
TimeLineID end_tli;
6592

6693
InternalKey key;
6794
void *crypt_ctx;
@@ -73,7 +100,7 @@ extern InternalKey *pg_tde_read_last_wal_key(void);
73100
extern WALKeyCacheRec *pg_tde_get_last_wal_key(void);
74101
extern WALKeyCacheRec *pg_tde_fetch_wal_keys(XLogRecPtr start_lsn);
75102
extern WALKeyCacheRec *pg_tde_get_wal_cache_keys(void);
76-
extern void pg_tde_wal_last_key_set_lsn(XLogRecPtr lsn, const char *keyfile_path);
103+
extern void pg_tde_wal_last_key_set_lsn(XLogRecPtr lsn, TimeLineID tli, const char *keyfile_path);
77104
extern void pg_tde_create_wal_key(InternalKey *rel_key_data, const RelFileLocator *newrlocator, TDEMapEntryType entry_type);
78105

79106
#define PG_TDE_MAP_FILENAME "%d_keys"

contrib/pg_tde/t/RewindTest.pm

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ use PostgreSQL::Test::Cluster;
4444
use PostgreSQL::Test::RecursiveCopy;
4545
use PostgreSQL::Test::Utils;
4646
use Test::More;
47+
use pgtde;
4748

4849
our @EXPORT = qw(
4950
$node_primary
@@ -199,7 +200,7 @@ sub create_standby
199200
$node_standby =
200201
PostgreSQL::Test::Cluster->new(
201202
'standby' . ($extra_name ? "_${extra_name}" : ''));
202-
$node_primary->backup('my_backup');
203+
PGTDE::backup($node_primary, 'my_backup');
203204
$node_standby->init_from_backup($node_primary, 'my_backup');
204205
my $connstr_primary = $node_primary->connstr();
205206

contrib/pg_tde/t/pgtde.pm

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,4 +108,17 @@ sub compare_results
108108
return compare($expected_filename_with_path, $out_filename_with_path);
109109
}
110110

111+
sub backup
112+
{
113+
my ($node, $backup_name, %params) = @_;
114+
my $backup_dir = $node->backup_dir . '/'. $backup_name;
115+
116+
mkdir $backup_dir;
117+
118+
PostgreSQL::Test::RecursiveCopy::copypath($node->data_dir . '/pg_tde',
119+
$backup_dir . '/pg_tde');
120+
121+
$node->backup($backup_name, %params);
122+
}
123+
111124
1;

contrib/pg_tde/t/standby_source.pl

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
use File::Copy;
3535
use PostgreSQL::Test::Cluster;
3636
use RewindTest;
37+
use pgtde;
3738

3839
my $tmp_folder = PostgreSQL::Test::Utils::tempdir;
3940

@@ -57,15 +58,19 @@
5758
# Set up node B and C, as cascaded standbys
5859
#
5960
# A (primary) <--- B (standby) <--- C (standby)
60-
$node_a->backup('my_backup');
61+
my $backup_name = 'my_backup';
62+
63+
PGTDE::backup(node_a, $backup_name);
6164
$node_b = PostgreSQL::Test::Cluster->new('node_b');
62-
$node_b->init_from_backup($node_a, 'my_backup', has_streaming => 1);
65+
$node_b->init_from_backup($node_a, $backup_name, has_streaming => 1);
6366
$node_b->set_standby_mode();
6467
$node_b->start;
6568

66-
$node_b->backup('my_backup');
69+
PostgreSQL::Test::RecursiveCopy::copypath($node_b->data_dir . '/pg_tde',
70+
$node_b->backup_dir . '/'. $backup_name . '/pg_tde');
71+
PGTDE::backup(node_b, $backup_name);
6772
$node_c = PostgreSQL::Test::Cluster->new('node_c');
68-
$node_c->init_from_backup($node_b, 'my_backup', has_streaming => 1);
73+
$node_c->init_from_backup($node_b, $backup_name, has_streaming => 1);
6974
$node_c->set_standby_mode();
7075
$node_c->start;
7176

src/bin/pg_basebackup/pg_basebackup.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -634,9 +634,6 @@ StartLogStreamer(char *startpos, uint32 timeline, char *sysidentifier,
634634
uint32 hi,
635635
lo;
636636
char statusdir[MAXPGPATH];
637-
#ifdef PERCONA_EXT
638-
char tdedir[MAXPGPATH];
639-
#endif
640637

641638
param = pg_malloc0(sizeof(logstreamer_param));
642639
param->timeline = timeline;
@@ -671,9 +668,13 @@ StartLogStreamer(char *startpos, uint32 timeline, char *sysidentifier,
671668
"pg_xlog" : "pg_wal");
672669

673670
#ifdef PERCONA_EXT
671+
{
672+
char tdedir[MAXPGPATH];
673+
674674
snprintf(tdedir, sizeof(tdedir), "%s/%s", basedir, PG_TDE_DATA_DIR);
675675
pg_tde_fe_init(tdedir);
676676
TDEXLogSmgrInit();
677+
}
677678
#endif
678679

679680
/* Temporary replication slots are only supported in 10 and newer */

src/bin/pg_basebackup/receivelog.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1050,7 +1050,9 @@ ProcessXLogDataMsg(PGconn *conn, StreamCtl *stream, char *copybuf, int len,
10501050
int bytes_left;
10511051
int bytes_written;
10521052
int hdr_len;
1053+
#ifdef PERCONA_EXT
10531054
XLogSegNo segno;
1055+
#endif
10541056

10551057
/*
10561058
* Once we've decided we don't want to receive any more, just ignore any
@@ -1078,7 +1080,9 @@ ProcessXLogDataMsg(PGconn *conn, StreamCtl *stream, char *copybuf, int len,
10781080
/* Extract WAL location for this block */
10791081
xlogoff = XLogSegmentOffset(*blockpos, WalSegSz);
10801082

1083+
#ifdef PERCONA_EXT
10811084
XLByteToSeg(*blockpos, segno, WalSegSz);
1085+
#endif
10821086

10831087
/*
10841088
* Verify that the initial location in the stream matches where we think

0 commit comments

Comments
 (0)