Skip to content

Commit ef0b6fb

Browse files
avaminglituhaihe
authored andcommitted
Fix SIGSEGV in fsm_extend when vacuuming tables in non-default tablespace
The commit df1e2ff ("Prevent CREATE TABLE from using dangling tablespace") added a call to TablespaceLockTuple() inside TablespaceCreateDbspace() for every non-default tablespace, including the common path where the per-database directory already exists. That lock acquisition calls LockSharedObject(), which calls AcceptInvalidationMessages(), allowing pending sinval messages to be processed at an unexpected point deep inside smgrcreate(). This creates a crash window during VACUUM of a heap table (including auxiliary tables such as the aoseg table of an AO relation) that lives in a non-default tablespace and has never been vacuumed before (so no FSM or VM fork exists yet): lazy_scan_heap -> visibilitymap_pin -> vm_readbuf -> vm_extend smgrcreate(VM fork) + CacheInvalidateSmgr() <- queues SHAREDINVALSMGR_ID -> RecordPageWithFreeSpace -> fsm_readbuf -> fsm_extend smgrcreate(FSM fork) -> mdcreate -> TablespaceCreateDbspace [CBDB-specific for non-default tablespaces] -> TablespaceLockTuple -> LockSharedObject -> AcceptInvalidationMessages() -> smgrclosenode() -> rel->rd_smgr = NULL rel->rd_smgr->smgr_cached_nblocks[FSM_FORKNUM] = ... -> SIGSEGV (NULL dereference) at freespace.c:637 Fix: add RelationOpenSmgr(rel) after smgrcreate() in both fsm_extend() (freespace.c) and vm_extend() (visibilitymap.c). RelationOpenSmgr is a no-op when rd_smgr is still valid, and re-opens the smgr handle if the sinval handler has closed it. The identical guard already exists earlier in both functions for the LockRelationForExtension path. Add a regression test (vacuum_fsm_nondefault_tablespace) covering both a plain heap table and an AO table in a non-default tablespace.
1 parent 90f9dc3 commit ef0b6fb

File tree

7 files changed

+143
-0
lines changed

7 files changed

+143
-0
lines changed

src/backend/access/heap/visibilitymap.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -645,6 +645,14 @@ vm_extend(Relation rel, BlockNumber vm_nblocks)
645645
!smgrexists(rel->rd_smgr, VISIBILITYMAP_FORKNUM))
646646
smgrcreate(rel->rd_smgr, VISIBILITYMAP_FORKNUM, false);
647647

648+
/*
649+
* Might have to re-open if smgrcreate triggered AcceptInvalidationMessages
650+
* (via TablespaceCreateDbspace -> LockSharedObject for non-default
651+
* tablespaces), which may have processed a pending SHAREDINVALSMGR_ID
652+
* message and closed our smgr entry.
653+
*/
654+
RelationOpenSmgr(rel);
655+
648656
/* Invalidate cache so that smgrnblocks() asks the kernel. */
649657
rel->rd_smgr->smgr_cached_nblocks[VISIBILITYMAP_FORKNUM] = InvalidBlockNumber;
650658
vm_nblocks_now = smgrnblocks(rel->rd_smgr, VISIBILITYMAP_FORKNUM);

src/backend/storage/freespace/freespace.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -633,6 +633,14 @@ fsm_extend(Relation rel, BlockNumber fsm_nblocks)
633633
!smgrexists(rel->rd_smgr, FSM_FORKNUM))
634634
smgrcreate(rel->rd_smgr, FSM_FORKNUM, false);
635635

636+
/*
637+
* Might have to re-open if smgrcreate triggered AcceptInvalidationMessages
638+
* (via TablespaceCreateDbspace -> LockSharedObject for non-default
639+
* tablespaces), which may have processed a pending SHAREDINVALSMGR_ID
640+
* message and closed our smgr entry.
641+
*/
642+
RelationOpenSmgr(rel);
643+
636644
/* Invalidate cache so that smgrnblocks() asks the kernel. */
637645
rel->rd_smgr->smgr_cached_nblocks[FSM_FORKNUM] = InvalidBlockNumber;
638646
fsm_nblocks_now = smgrnblocks(rel->rd_smgr, FSM_FORKNUM);

src/test/regress/expected/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,3 +70,4 @@
7070
/ao_unique_index_partition.out
7171
/bfv_copy.out
7272
/copy_encoding_error.out
73+
/vacuum_fsm_nondefault_tablespace.out

src/test/regress/greenplum_schedule

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,7 @@ test: instr_in_shmem_verify
167167
# hold locks.
168168
test: partition_locking
169169
test: vacuum_gp
170+
test: vacuum_fsm_nondefault_tablespace
170171
test: resource_queue_stat
171172
# background analyze may affect pgstat
172173
test: pg_stat
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
-- Test: VACUUM on a table in a non-default tablespace does not crash on first run.
2+
--
3+
-- Bug: SIGSEGV in fsm_extend() (freespace.c:637) when vacuuming a heap table
4+
-- (or an AO table's aoseg auxiliary table) that resides in a non-default
5+
-- tablespace for the very first time.
6+
--
7+
-- Root cause: commit "Prevent CREATE TABLE from using dangling tablespace"
8+
-- added TablespaceLockTuple() in TablespaceCreateDbspace() for non-default
9+
-- tablespaces. That call reaches AcceptInvalidationMessages() via
10+
-- LockSharedObject(), which processes a pending SHAREDINVALSMGR_ID message
11+
-- that vm_extend() had queued via CacheInvalidateSmgr(), nullifying
12+
-- rel->rd_smgr before fsm_extend() dereferences it at freespace.c:637.
13+
--
14+
-- Fix: added RelationOpenSmgr(rel) after smgrcreate() in both fsm_extend()
15+
-- (freespace.c) and vm_extend() (visibilitymap.c) so that rd_smgr is
16+
-- re-opened if the sinval handler closed it.
17+
18+
CREATE TABLESPACE fsm_ts_test LOCATION '@testtablespace@';
19+
20+
-- Case 1: plain heap table in non-default tablespace, first VACUUM.
21+
-- Before the fix this crashed with SIGSEGV at freespace.c:637:
22+
-- vm_extend() -> CacheInvalidateSmgr (queues SHAREDINVALSMGR_ID)
23+
-- fsm_extend() -> smgrcreate -> TablespaceCreateDbspace
24+
-- -> TablespaceLockTuple -> AcceptInvalidationMessages
25+
-- -> processes SHAREDINVALSMGR_ID -> rel->rd_smgr = NULL
26+
-- -> rel->rd_smgr->smgr_cached_nblocks[...] = ... SIGSEGV
27+
CREATE TABLE fsm_ts_heap (id int, val text)
28+
TABLESPACE fsm_ts_test
29+
DISTRIBUTED BY (id);
30+
INSERT INTO fsm_ts_heap SELECT i, repeat('x', 80) FROM generate_series(1, 500) i;
31+
VACUUM ANALYZE fsm_ts_heap;
32+
SELECT count(*) FROM fsm_ts_heap;
33+
-- Second VACUUM must also succeed (FSM/VM now exist, different code path).
34+
VACUUM ANALYZE fsm_ts_heap;
35+
SELECT count(*) FROM fsm_ts_heap;
36+
DROP TABLE fsm_ts_heap;
37+
38+
-- Case 2: AO table in non-default tablespace.
39+
-- The crash occurs inside the recursive vacuum of the aoseg auxiliary table
40+
-- (which is also a heap table stored in the same non-default tablespace).
41+
CREATE TABLE fsm_ts_ao (id int, val text)
42+
USING ao_row
43+
TABLESPACE fsm_ts_test
44+
DISTRIBUTED BY (id);
45+
INSERT INTO fsm_ts_ao SELECT i, repeat('y', 80) FROM generate_series(1, 500) i;
46+
VACUUM ANALYZE fsm_ts_ao;
47+
SELECT count(*) FROM fsm_ts_ao;
48+
-- Second VACUUM must also succeed.
49+
VACUUM ANALYZE fsm_ts_ao;
50+
SELECT count(*) FROM fsm_ts_ao;
51+
DROP TABLE fsm_ts_ao;
52+
53+
-- Cleanup.
54+
DROP TABLESPACE fsm_ts_test;
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
-- Test: VACUUM on a table in a non-default tablespace does not crash on first run.
2+
--
3+
-- Bug: SIGSEGV in fsm_extend() (freespace.c:637) when vacuuming a heap table
4+
-- (or an AO table's aoseg auxiliary table) that resides in a non-default
5+
-- tablespace for the very first time.
6+
--
7+
-- Root cause: commit "Prevent CREATE TABLE from using dangling tablespace"
8+
-- added TablespaceLockTuple() in TablespaceCreateDbspace() for non-default
9+
-- tablespaces. That call reaches AcceptInvalidationMessages() via
10+
-- LockSharedObject(), which processes a pending SHAREDINVALSMGR_ID message
11+
-- that vm_extend() had queued via CacheInvalidateSmgr(), nullifying
12+
-- rel->rd_smgr before fsm_extend() dereferences it at freespace.c:637.
13+
--
14+
-- Fix: added RelationOpenSmgr(rel) after smgrcreate() in both fsm_extend()
15+
-- (freespace.c) and vm_extend() (visibilitymap.c) so that rd_smgr is
16+
-- re-opened if the sinval handler closed it.
17+
CREATE TABLESPACE fsm_ts_test LOCATION '@testtablespace@';
18+
-- Case 1: plain heap table in non-default tablespace, first VACUUM.
19+
-- Before the fix this crashed with SIGSEGV at freespace.c:637:
20+
-- vm_extend() -> CacheInvalidateSmgr (queues SHAREDINVALSMGR_ID)
21+
-- fsm_extend() -> smgrcreate -> TablespaceCreateDbspace
22+
-- -> TablespaceLockTuple -> AcceptInvalidationMessages
23+
-- -> processes SHAREDINVALSMGR_ID -> rel->rd_smgr = NULL
24+
-- -> rel->rd_smgr->smgr_cached_nblocks[...] = ... SIGSEGV
25+
CREATE TABLE fsm_ts_heap (id int, val text)
26+
TABLESPACE fsm_ts_test
27+
DISTRIBUTED BY (id);
28+
INSERT INTO fsm_ts_heap SELECT i, repeat('x', 80) FROM generate_series(1, 500) i;
29+
VACUUM ANALYZE fsm_ts_heap;
30+
SELECT count(*) FROM fsm_ts_heap;
31+
count
32+
-------
33+
500
34+
(1 row)
35+
36+
-- Second VACUUM must also succeed (FSM/VM now exist, different code path).
37+
VACUUM ANALYZE fsm_ts_heap;
38+
SELECT count(*) FROM fsm_ts_heap;
39+
count
40+
-------
41+
500
42+
(1 row)
43+
44+
DROP TABLE fsm_ts_heap;
45+
-- Case 2: AO table in non-default tablespace.
46+
-- The crash occurs inside the recursive vacuum of the aoseg auxiliary table
47+
-- (which is also a heap table stored in the same non-default tablespace).
48+
CREATE TABLE fsm_ts_ao (id int, val text)
49+
USING ao_row
50+
TABLESPACE fsm_ts_test
51+
DISTRIBUTED BY (id);
52+
INSERT INTO fsm_ts_ao SELECT i, repeat('y', 80) FROM generate_series(1, 500) i;
53+
VACUUM ANALYZE fsm_ts_ao;
54+
SELECT count(*) FROM fsm_ts_ao;
55+
count
56+
-------
57+
500
58+
(1 row)
59+
60+
-- Second VACUUM must also succeed.
61+
VACUUM ANALYZE fsm_ts_ao;
62+
SELECT count(*) FROM fsm_ts_ao;
63+
count
64+
-------
65+
500
66+
(1 row)
67+
68+
DROP TABLE fsm_ts_ao;
69+
-- Cleanup.
70+
DROP TABLESPACE fsm_ts_test;

src/test/regress/sql/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,3 +64,4 @@
6464
/ao_unique_index_partition.sql
6565
/bfv_copy.sql
6666
/copy_encoding_error.sql
67+
/vacuum_fsm_nondefault_tablespace.sql

0 commit comments

Comments
 (0)