Skip to content

Commit 4d3c4ba

Browse files
committed
pg_rewind: Re-encrypt fsm/vm forks for partial writes
We rewrite the internal key for relations when partially re-encrypting blocks. That makes its FSM and VM fork unreadable as they are still encrypted with the old key. To fix that, we re-encrypt such forks with the proper key after we finish processing the main fork file. As pg_rewind processes files in the order of operation types (see file_action_t) and whole-file copies occur before any partial writes, we assume that for files already in the target datadir, we rewrite them in-place.
1 parent 4d886d6 commit 4d3c4ba

6 files changed

Lines changed: 148 additions & 6 deletions

File tree

fetools/pg18/pg_rewind/libpq_source.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -625,7 +625,7 @@ process_queued_fetch_requests(libpq_source *src)
625625
{
626626
unsigned char *data = (unsigned char *) chunk + BLCKSZ * i;
627627

628-
encrypt_block(data, chunkoff + BLCKSZ * i);
628+
encrypt_block(data, chunkoff + BLCKSZ * i, MAIN_FORKNUM);
629629
}
630630
}
631631
write_target_range(chunk, chunkoff, chunksize);

fetools/pg18/pg_rewind/local_source.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@ local_queue_fetch_range(rewind_source *source, const char *path, off_t off,
177177
pg_fatal("unexpected EOF while reading file \"%s\"", srcpath);
178178

179179
/* Re-encrypt blocks with a proper key if neeed. */
180-
encrypt_block((unsigned char *) buf.data, begin);
180+
encrypt_block((unsigned char *) buf.data, begin, MAIN_FORKNUM);
181181

182182
write_target_range(buf.data, begin, readlen);
183183
begin += readlen;

fetools/pg18/pg_rewind/tde_ops.c

Lines changed: 70 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,12 +35,79 @@ static current_file_data current_tde_file =
3535
static char tde_tmp_scource[MAXPGPATH] = "/tmp/pg_tde_rewindXXXXXX";
3636
static bool source_has_tde = false;
3737

38+
static void
39+
recrypt_fork(ForkNumber fork)
40+
{
41+
int srcfd;
42+
int trgfd;
43+
char srcpath[MAXPGPATH];
44+
PGIOAlignedBlock buf;
45+
size_t written_len;
46+
RelPathStr rp = relpathperm(current_tde_file.rlocator, fork);
47+
48+
snprintf(srcpath, sizeof(srcpath), "%s/%s", datadir_target, rp.str);
49+
50+
/* check if fork exists, nothing to do if it does not */
51+
if (access(srcpath, F_OK) != 0)
52+
return;
53+
54+
srcfd = open(srcpath, O_RDONLY | PG_BINARY, 0);
55+
if (srcfd < 0)
56+
{
57+
/*
58+
* Server can recover from wrecked VM/FSM, hence only warnings here
59+
* and in the rest of the function
60+
*/
61+
pg_log_warning("could not open file for reading \"%s\": %m", srcpath);
62+
return;
63+
}
64+
65+
trgfd = open(srcpath, O_WRONLY | PG_BINARY, 0);
66+
if (trgfd < 0)
67+
{
68+
pg_log_warning("could not open file for writing \"%s\": %m", srcpath);
69+
close(srcfd);
70+
return;
71+
}
72+
73+
written_len = 0;
74+
for (;;)
75+
{
76+
ssize_t read_len;
77+
78+
read_len = read(srcfd, buf.data, sizeof(buf));
79+
80+
if (read_len < 0)
81+
pg_fatal("could not read file \"%s\": %m", srcpath);
82+
else if (read_len == 0)
83+
break; /* EOF reached */
84+
85+
encrypt_block((unsigned char *) buf.data, written_len, fork);
86+
87+
if (write(trgfd, buf.data, read_len) != read_len)
88+
{
89+
pg_log_warning("could not write block to fork file \"%s\": %m", srcpath);
90+
break;
91+
}
92+
written_len += read_len;
93+
}
94+
95+
close(srcfd);
96+
close(trgfd);
97+
}
98+
99+
38100
void
39101
flush_current_key(void)
40102
{
41103
if (current_tde_file.source_key == NULL)
42104
return;
43105

106+
pg_log_debug("ensure forks encryption for \"%s\"", current_tde_file.path);
107+
108+
recrypt_fork(FSM_FORKNUM);
109+
recrypt_fork(VISIBILITYMAP_FORKNUM);
110+
44111
pg_log_debug("update internal key for \"%s\"", current_tde_file.path);
45112
pg_tde_set_data_dir(tde_tmp_scource);
46113
pg_tde_save_smgr_key(current_tde_file.rlocator, current_tde_file.target_key, true);
@@ -96,7 +163,7 @@ ensure_tde_keys(const char *relpath)
96163
}
97164

98165
void
99-
encrypt_block(unsigned char *buf, off_t file_offset)
166+
encrypt_block(unsigned char *buf, off_t file_offset, ForkNumber fork)
100167
{
101168
BlockNumber blkno;
102169

@@ -109,8 +176,8 @@ encrypt_block(unsigned char *buf, off_t file_offset)
109176
blkno = file_offset / BLCKSZ + current_tde_file.segNo * RELSEG_SIZE;
110177

111178
pg_log_debug("re-encrypt block in %s, offset: %ld, blockNum: %u", current_tde_file.path, (long) file_offset, blkno);
112-
tde_decrypt_smgr_block(current_tde_file.source_key, MAIN_FORKNUM, blkno, buf, buf);
113-
tde_encrypt_smgr_block(current_tde_file.target_key, MAIN_FORKNUM, blkno, buf, buf);
179+
tde_decrypt_smgr_block(current_tde_file.source_key, fork, blkno, buf, buf);
180+
tde_encrypt_smgr_block(current_tde_file.target_key, fork, blkno, buf, buf);
114181
}
115182

116183

fetools/pg18/pg_rewind/tde_ops.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
#ifndef PG_REWIND_TDE_FILE_H
22
#define PG_REWIND_TDE_FILE_H
33

4+
#include "common/relpath.h"
5+
46
extern void flush_current_key(void);
57
extern void ensure_tde_keys(const char *relpath);
6-
extern void encrypt_block(unsigned char *buf, off_t file_offset);
8+
extern void encrypt_block(unsigned char *buf, off_t file_offset, ForkNumber fork);
79

810
extern void destroy_tde_tmp_dir(void);
911
extern void write_tmp_source_file(const char *fname, char *buf, size_t size);

meson.build

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -320,6 +320,7 @@ tap_tests = [
320320
't/pg_rewind_basic.pl',
321321
't/pg_rewind_databases.pl',
322322
't/pg_rewind_enc_copy_blocks.pl',
323+
't/pg_rewind_enc_fsm.pl',
323324
't/pg_rewind_enc_unchanged_rel.pl',
324325
't/pg_rewind_extrafiles.pl',
325326
't/pg_rewind_growing_files.pl',

t/pg_rewind_enc_fsm.pl

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
2+
# Copyright (c) 2021-2024, PostgreSQL Global Development Group
3+
4+
use strict;
5+
use warnings FATAL => 'all';
6+
use PostgreSQL::Test::Utils;
7+
use Test::More;
8+
9+
use FindBin;
10+
use lib $FindBin::RealBin;
11+
12+
use RewindTest;
13+
14+
sub run_test
15+
{
16+
my $test_mode = shift;
17+
my $extra_name = shift;
18+
my $extra_conf = shift;
19+
20+
my $cluster_name = $test_mode;
21+
22+
$cluster_name = $cluster_name . $extra_name if defined $extra_name;
23+
24+
RewindTest::setup_cluster($cluster_name, [], $extra_conf);
25+
RewindTest::start_primary();
26+
RewindTest::create_standby($cluster_name);
27+
28+
primary_psql(
29+
"CREATE TABLE tbl1 (id INTEGER GENERATED ALWAYS AS IDENTITY PRIMARY KEY, f1 TEXT) USING tde_heap"
30+
);
31+
primary_psql(
32+
"INSERT INTO tbl1 (f1) SELECT repeat('abcdeF', 1000) FROM generate_series(1, 1000)"
33+
);
34+
primary_psql("CHECKPOINT");
35+
36+
RewindTest::promote_standby();
37+
38+
# Trigger updated blocks in FSM
39+
standby_psql(
40+
"DELETE FROM tbl1 WHERE id % 15 = 0;"
41+
);
42+
standby_psql(
43+
"INSERT INTO tbl1 (f1) SELECT repeat('ghijk', 100) FROM generate_series(1, 1000)"
44+
);
45+
46+
47+
RewindTest::run_pg_rewind($test_mode);
48+
49+
ok( !$RewindTest::node_primary->log_contains(
50+
'; zeroing out page'
51+
),
52+
'verify there are no corrupted _fsm relations');
53+
54+
check_query(
55+
'SELECT count(*) FROM tbl1',
56+
qq(1934
57+
),
58+
'check table');
59+
60+
RewindTest::clean_rewind_test();
61+
return;
62+
}
63+
64+
# Run the test in both modes
65+
run_test('local');
66+
run_test('remote');
67+
run_test('archive');
68+
69+
my @conf_params = ("pg_tde.cipher = 'aes_256'");
70+
run_test('local', "_aes_256", \@conf_params);
71+
72+
done_testing();

0 commit comments

Comments
 (0)