Skip to content

Commit 9e1a4f3

Browse files
committed
Track PQresult allocations in server side (#1804)
When querying pg_locks (or using `pg_lock_status()`), approximately 75% of backend memory allocations for resulting tuples weren't registered with Vmtracker or Resource Group Control. This memory would also leak if the query was cancelled or failed. This happened because `CdbDispatchCommand()`, which is used by `pg_locks`, internally uses libpq to obtain the results, which were allocated as PQresult structures with bare `malloc()`, even on the server side. This patch fixes both untracked memory issues by enforcing Vmtracker routines for `PGresult` allocations on the server-side. Please note that the original error from ADBDEV-7145, which is an `AbortTransaction()` recurstion due to an OOM error while cancelling a query, is fixed only in the next commit. Ticket: ADBDEV-7691
1 parent 7bdf316 commit 9e1a4f3

8 files changed

Lines changed: 272 additions & 19 deletions

File tree

src/backend/cdb/dispatcher/cdbdisp_async.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -685,6 +685,8 @@ checkDispatchResult(CdbDispatcherState *ds, int timeout_sec)
685685
handlePollSuccess(pParms, fds);
686686
}
687687

688+
SIMPLE_FAULT_INJECTOR("check_dispatch_result_end");
689+
688690
pfree(fds);
689691
}
690692

src/interfaces/libpq/fe-exec.c

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ PQmakeEmptyPGresult(PGconn *conn, ExecStatusType status)
151151
{
152152
PGresult *result;
153153

154-
result = (PGresult *) malloc(sizeof(PGresult));
154+
result = (PGresult *) pqPalloc(sizeof(PGresult));
155155
if (!result)
156156
return NULL;
157157

@@ -407,7 +407,7 @@ dupEvents(PGEvent *events, int count)
407407
if (!events || count <= 0)
408408
return NULL;
409409

410-
newEvents = (PGEvent *) malloc(count * sizeof(PGEvent));
410+
newEvents = (PGEvent *) pqPalloc(count * sizeof(PGEvent));
411411
if (!newEvents)
412412
return NULL;
413413

@@ -417,12 +417,12 @@ dupEvents(PGEvent *events, int count)
417417
newEvents[i].passThrough = events[i].passThrough;
418418
newEvents[i].data = NULL;
419419
newEvents[i].resultInitialized = FALSE;
420-
newEvents[i].name = strdup(events[i].name);
420+
newEvents[i].name = pqPstrdup(events[i].name);
421421
if (!newEvents[i].name)
422422
{
423423
while (--i >= 0)
424-
free(newEvents[i].name);
425-
free(newEvents);
424+
pqPfree(newEvents[i].name);
425+
pqPfree(newEvents);
426426
return NULL;
427427
}
428428
}
@@ -585,7 +585,7 @@ pqResultAlloc(PGresult *res, size_t nBytes, bool isBinary)
585585
*/
586586
if (nBytes >= PGRESULT_SEP_ALLOC_THRESHOLD)
587587
{
588-
block = (PGresult_data *) malloc(nBytes + PGRESULT_BLOCK_OVERHEAD);
588+
block = (PGresult_data *) pqPalloc(nBytes + PGRESULT_BLOCK_OVERHEAD);
589589
if (!block)
590590
return NULL;
591591
space = block->space + PGRESULT_BLOCK_OVERHEAD;
@@ -609,7 +609,7 @@ pqResultAlloc(PGresult *res, size_t nBytes, bool isBinary)
609609
}
610610

611611
/* Otherwise, start a new block. */
612-
block = (PGresult_data *) malloc(PGRESULT_DATA_BLOCKSIZE);
612+
block = (PGresult_data *) pqPalloc(PGRESULT_DATA_BLOCKSIZE);
613613
if (!block)
614614
return NULL;
615615
block->next = res->curBlock;
@@ -709,18 +709,18 @@ PQclear(PGresult *res)
709709
}
710710

711711
if (res->events)
712-
free(res->events);
712+
pqPfree(res->events);
713713

714714
/* Free all the subsidiary blocks */
715715
while ((block = res->curBlock) != NULL)
716716
{
717717
res->curBlock = block->next;
718-
free(block);
718+
pqPfree(block);
719719
}
720720

721721
/* Free the top-level tuple pointer array */
722722
if (res->tuples)
723-
free(res->tuples);
723+
pqPfree(res->tuples);
724724

725725
/* zero out the pointer fields to catch programming errors */
726726
res->attDescs = NULL;
@@ -732,20 +732,20 @@ PQclear(PGresult *res)
732732
/* res->curBlock was zeroed out earlier */
733733

734734
if (res->extras)
735-
free(res->extras);
735+
pqPfree(res->extras);
736736
res->extraslen = 0;
737737
res->extras = NULL;
738738

739739
if (res->aotupcounts)
740-
free(res->aotupcounts);
740+
pqPfree(res->aotupcounts);
741741
res->naotupcounts = 0;
742742

743743
if (res->waitGxids)
744-
free(res->waitGxids);
744+
pqPfree(res->waitGxids);
745745
res->waitGxids = NULL;
746746
res->nWaits = 0;
747747
/* Free the PGresult structure itself */
748-
free(res);
748+
pqPfree(res);
749749
}
750750

751751
/*
@@ -951,10 +951,10 @@ pqAddTuple(PGresult *res, PGresAttValue *tup, const char **errmsgp)
951951

952952
if (res->tuples == NULL)
953953
newTuples = (PGresAttValue **)
954-
malloc(newSize * sizeof(PGresAttValue *));
954+
pqPalloc(newSize * sizeof(PGresAttValue *));
955955
else
956956
newTuples = (PGresAttValue **)
957-
realloc(res->tuples, newSize * sizeof(PGresAttValue *));
957+
pqRepalloc(res->tuples, newSize * sizeof(PGresAttValue *));
958958
if (!newTuples)
959959
return FALSE; /* malloc or realloc failed */
960960
res->tupArrSize = newSize;

src/interfaces/libpq/fe-protocol3.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -267,7 +267,7 @@ pqParseInput3(PGconn *conn)
267267

268268
/* now just loop through */
269269
conn->result->aotupcounts =
270-
malloc(sizeof(PQaoRelTupCount) * conn->result->naotupcounts);
270+
pqPalloc(sizeof(PQaoRelTupCount) * conn->result->naotupcounts);
271271
ao = conn->result->aotupcounts;
272272
for (i = 0; i < conn->result->naotupcounts; i++)
273273
{
@@ -529,7 +529,7 @@ pqParseInput3(PGconn *conn)
529529

530530
if (pqGetInt(&conn->result->extraslen, 4, conn))
531531
return;
532-
conn->result->extras = malloc(conn->result->extraslen);
532+
conn->result->extras = pqPalloc(conn->result->extraslen);
533533
if (pqGetnchar((char *)conn->result->extras, conn->result->extraslen, conn))
534534
return;
535535
conn->asyncStatus = PGASYNC_READY;
@@ -555,7 +555,7 @@ pqParseInput3(PGconn *conn)
555555
{
556556
if (conn->result->waitGxids == NULL)
557557
conn->result->waitGxids =
558-
malloc(sizeof(int) * conn->result->nWaits);
558+
pqPalloc(sizeof(int) * conn->result->nWaits);
559559
for (i = 0; i < conn->result->nWaits; i++)
560560
{
561561
int gxid;

src/interfaces/libpq/libpq-int.h

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,34 @@ typedef struct
8383
#endif
8484
#endif /* USE_SSL */
8585

86+
/*
87+
* Definitions meant to lessen the malloc() usage for CDB routines in
88+
* server-sided code for struct PGresult allocations.
89+
*/
90+
#ifndef FRONTEND
91+
#include "postgres.h"
92+
#include "utils/memutils.h"
93+
94+
/*
95+
* CurTransactionContext's lifetime lasts until the end of the current query,
96+
* which does the job well for backends. Auxiliary processes do not set
97+
* transaction contexts, so we have to use TopMemoryContext to achieve a
98+
* lifetime equivalent to malloc().
99+
*/
100+
#define PQ_PALLOC_CONTEXT \
101+
((CurTransactionContext != NULL) ? CurTransactionContext : TopMemoryContext)
102+
103+
#define pqPalloc(sz) MemoryContextAlloc(PQ_PALLOC_CONTEXT, sz)
104+
#define pqPstrdup(x) MemoryContextStrdup(PQ_PALLOC_CONTEXT, x)
105+
#define pqRepalloc(x, sz) repalloc(x, sz)
106+
#define pqPfree(x) pfree(x)
107+
#else
108+
#define pqPalloc(sz) malloc(sz)
109+
#define pqPstrdup(x) strdup(x)
110+
#define pqRepalloc(x, sz) realloc(x, sz)
111+
#define pqPfree(x) free(x)
112+
#endif
113+
86114
/*
87115
* POSTGRES backend dependent Constants.
88116
*/
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
-- start_matchsubs
2+
-- m/ERROR: Out of memory.*/
3+
-- s/ERROR: Out of memory.*/ERROR: Out of memory/
4+
-- m/ERROR: Canceling query because of high VMEM usage.*/
5+
-- s/ERROR: Canceling query because of high VMEM usage.*/ERROR: Out of memory/
6+
-- end_matchsubs
7+
8+
-- start_ignore
9+
CREATE EXTENSION gp_inject_fault;
10+
DROP FUNCTION gp_mock_cdbdispatchcommand(amount int);
11+
DROP ROLE role_oom_test;
12+
DROP RESOURCE GROUP rg_oom_test;
13+
-- end_ignore
14+
CREATE RESOURCE GROUP rg_oom_test
15+
WITH (cpu_rate_limit=20, memory_limit=20, memory_shared_quota=100);
16+
CREATE ROLE role_oom_test RESOURCE GROUP rg_oom_test;
17+
CREATE OR REPLACE FUNCTION gp_mock_cdbdispatchcommand(amount int)
18+
RETURNS SETOF text AS '@abs_srcdir@/../regress/regress.so',
19+
'gp_mock_cdbdispatchcommand' LANGUAGE C;
20+
21+
1: SET ROLE TO role_oom_test;
22+
23+
-- Freeze coordinator's session after it reads results from segments.
24+
SELECT gp_inject_fault('check_dispatch_result_end', 'suspend', dbid)
25+
FROM gp_segment_configuration WHERE role = 'p' AND content = -1;
26+
27+
-- Send the heavy query.
28+
1&: SELECT count(*) FROM gp_mock_cdbdispatchcommand(1000000);
29+
30+
-- Wait until we receive everything.
31+
SELECT gp_wait_until_triggered_fault('check_dispatch_result_end', 1, dbid)
32+
FROM gp_segment_configuration WHERE role = 'p' AND content = -1;
33+
34+
-- The query should've used ~135 MB of memory. Allow 15 MB error.
35+
WITH r AS (
36+
SELECT (memory_usage->'-1'->'used')::text::int AS mb
37+
FROM gp_toolkit.gp_resgroup_status WHERE rsgname = 'rg_oom_test'
38+
)
39+
SELECT r.mb < 150 AS "under 150 MB", r.mb > 120 AS "above 120 MB"
40+
FROM r;
41+
42+
SELECT gp_inject_fault('check_dispatch_result_end', 'reset', dbid) FROM
43+
gp_segment_configuration WHERE role = 'p' AND content = -1;
44+
45+
1<:
46+
47+
DROP FUNCTION gp_mock_cdbdispatchcommand(amount int);
48+
DROP ROLE role_oom_test;
49+
DROP RESOURCE GROUP rg_oom_test;

src/test/isolation2/isolation2_resgroup_schedule

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,4 +61,6 @@ test: resgroup/resgroup_large_group_id
6161

6262
test: resgroup/resgroup_startup_memory
6363

64+
test: resgroup/resgroup_oom
65+
6466
test: resgroup/disable_resgroup
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
-- start_matchsubs
2+
-- m/ERROR: Out of memory.*/
3+
-- s/ERROR: Out of memory.*/ERROR: Out of memory/
4+
-- m/ERROR: Canceling query because of high VMEM usage.*/
5+
-- s/ERROR: Canceling query because of high VMEM usage.*/ERROR: Out of memory/
6+
-- end_matchsubs
7+
8+
CREATE RESOURCE GROUP rg_oom_test WITH (cpu_rate_limit=20, memory_limit=20, memory_shared_quota=100);
9+
CREATE
10+
CREATE ROLE role_oom_test RESOURCE GROUP rg_oom_test;
11+
CREATE
12+
CREATE OR REPLACE FUNCTION gp_mock_cdbdispatchcommand(amount int) RETURNS SETOF text AS '@abs_srcdir@/../regress/regress.so', 'gp_mock_cdbdispatchcommand' LANGUAGE C;
13+
CREATE
14+
15+
1: SET ROLE TO role_oom_test;
16+
SET
17+
18+
-- Freeze coordinator's session after it reads results from segments.
19+
SELECT gp_inject_fault('check_dispatch_result_end', 'suspend', dbid) FROM gp_segment_configuration WHERE role = 'p' AND content = -1;
20+
gp_inject_fault
21+
-----------------
22+
Success:
23+
(1 row)
24+
25+
-- Send the heavy query.
26+
1&: SELECT count(*) FROM gp_mock_cdbdispatchcommand(1000000); <waiting ...>
27+
28+
-- Wait until we receive everything.
29+
SELECT gp_wait_until_triggered_fault('check_dispatch_result_end', 1, dbid) FROM gp_segment_configuration WHERE role = 'p' AND content = -1;
30+
gp_wait_until_triggered_fault
31+
-------------------------------
32+
Success:
33+
(1 row)
34+
35+
-- The query should've used ~135 MB of memory. Allow 15 MB error.
36+
WITH r AS ( SELECT (memory_usage->'-1'->'used')::text::int AS mb FROM gp_toolkit.gp_resgroup_status WHERE rsgname = 'rg_oom_test' ) SELECT r.mb < 150 AS "under 150 MB", r.mb > 120 AS "above 120 MB" FROM r;
37+
under 150 MB | above 120 MB
38+
--------------+--------------
39+
t | t
40+
(1 row)
41+
42+
SELECT gp_inject_fault('check_dispatch_result_end', 'reset', dbid) FROM gp_segment_configuration WHERE role = 'p' AND content = -1;
43+
gp_inject_fault
44+
-----------------
45+
Success:
46+
(1 row)
47+
48+
1<: <... completed>
49+
count
50+
---------
51+
4000000
52+
(1 row)
53+
54+
DROP FUNCTION gp_mock_cdbdispatchcommand(amount int);
55+
DROP
56+
DROP ROLE role_oom_test;
57+
DROP
58+
DROP RESOURCE GROUP rg_oom_test;
59+
DROP

0 commit comments

Comments
 (0)