Skip to content

Commit f9ca4fa

Browse files
authored
SIGSEGV in getCdbComponentInfo() when standby coordinator is on dedicated host (apache#1702)
* Fix null dereference on dedicated hot standby coordinator getCdbComponentInfo() populates hostPrimaryCountHash with primary hosts only. When IS_HOT_STANDBY_QD() is true, mirror and standby hosts are also looked up in the hash but return NULL on dedicated standby nodes that host no primary segments. Replace Assert(found) with a null-safe check to prevent SIGSEGV.
1 parent 980ed21 commit f9ca4fa

3 files changed

Lines changed: 21 additions & 9 deletions

File tree

src/backend/cdb/cdbutil.c

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -593,8 +593,13 @@ getCdbComponentInfo(void)
593593
continue;
594594

595595
hsEntry = (HostPrimaryCountEntry *) hash_search(hostPrimaryCountHash, cdbInfo->config->hostname, HASH_FIND, &found);
596-
Assert(found);
597-
cdbInfo->hostPrimaryCount = hsEntry->segmentCount;
596+
Assert(found || IS_HOT_STANDBY_QD());
597+
/*
598+
* Standby and mirror entries can legitimately live on hosts that do not
599+
* own any primary segments. In that case the lookup is absent and the
600+
* count should be treated as zero instead of dereferencing a NULL entry.
601+
*/
602+
cdbInfo->hostPrimaryCount = found ? hsEntry->segmentCount : 0;
598603
}
599604

600605
for (i = 0; i < component_databases->total_entry_dbs; i++)
@@ -605,8 +610,13 @@ getCdbComponentInfo(void)
605610
continue;
606611

607612
hsEntry = (HostPrimaryCountEntry *) hash_search(hostPrimaryCountHash, cdbInfo->config->hostname, HASH_FIND, &found);
608-
Assert(found);
609-
cdbInfo->hostPrimaryCount = hsEntry->segmentCount;
613+
Assert(found || IS_HOT_STANDBY_QD());
614+
/*
615+
* Standby and mirror entries can legitimately live on hosts that do not
616+
* own any primary segments. In that case the lookup is absent and the
617+
* count should be treated as zero instead of dereferencing a NULL entry.
618+
*/
619+
cdbInfo->hostPrimaryCount = found ? hsEntry->segmentCount : 0;
610620
}
611621

612622
hash_destroy(hostPrimaryCountHash);

src/test/regress/expected/vacuum_gp.out

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -446,6 +446,8 @@ create table relcache_leak_in_motion(v1 int);
446446
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'v1' as the Apache Cloudberry data distribution key for this table.
447447
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
448448
insert into relcache_leak_in_motion values(generate_series(0, 10000));
449+
BEGIN;
450+
SET LOCAL synchronous_commit = local;
449451
SELECT gp_inject_fault('interconnect_stop_recv_chunk', 'interrupt', dbid)
450452
FROM gp_segment_configuration WHERE content = -1 and role='p';
451453
gp_inject_fault
@@ -457,11 +459,8 @@ analyze relcache_leak_in_motion;
457459
ERROR: canceling statement due to user request
458460
SELECT gp_inject_fault('interconnect_stop_recv_chunk', 'reset', dbid)
459461
FROM gp_segment_configuration WHERE content = -1 and role='p';
460-
gp_inject_fault
461-
-----------------
462-
Success:
463-
(1 row)
464-
462+
ERROR: current transaction is aborted, commands ignored until end of transaction block
463+
COMMIT;
465464
-- start_ignore
466465
drop table if exists relcache_leak_in_motion;
467466
-- end_ignore

src/test/regress/sql/vacuum_gp.sql

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -298,11 +298,14 @@ drop table if exists relcache_leak_in_motion;
298298
-- end_ignore
299299
create table relcache_leak_in_motion(v1 int);
300300
insert into relcache_leak_in_motion values(generate_series(0, 10000));
301+
BEGIN;
302+
SET LOCAL synchronous_commit = local;
301303
SELECT gp_inject_fault('interconnect_stop_recv_chunk', 'interrupt', dbid)
302304
FROM gp_segment_configuration WHERE content = -1 and role='p';
303305
analyze relcache_leak_in_motion;
304306
SELECT gp_inject_fault('interconnect_stop_recv_chunk', 'reset', dbid)
305307
FROM gp_segment_configuration WHERE content = -1 and role='p';
308+
COMMIT;
306309
-- start_ignore
307310
drop table if exists relcache_leak_in_motion;
308311
-- end_ignore

0 commit comments

Comments
 (0)