Skip to content

Commit 0fac296

Browse files
committed
orca: fallback to Postgres optimizer on cross-slice replicated CTE Consumer.
Inspired by greengage 51fe92e: before Expr->DXL translation, walk the physical tree and track which slice each CTE Producer and Consumer lives on. If a Consumer is on a different slice than its Producer and the Producer's distribution is replicated, force a fallback to the Postgres optimizer. The replicated filter is essential: ordinary cross-slice CTE plans (non-replicated Producer with Gather/Redistribute Consumer) are a normal ORCA pattern and must not trigger fallback. 51fe92e doesn't trigger when a CTE over a replicated table is referenced from a scalar subquery, so the query hangs. This commit replaces the single-point check with a whole-tree walker that catches both cases. Tests: bring the original qp_orca_fallback case from 51fe92e and add a scalar-subquery reproducer in shared_scan guarded by statement_timeout. (cherry picked from commit open-gpdb/gpdb@3a9aebf)
1 parent 980ed21 commit 0fac296

9 files changed

Lines changed: 411 additions & 0 deletions

File tree

src/backend/gporca/libgpopt/include/gpopt/base/CUtils.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1026,6 +1026,21 @@ class CUtils
10261026
static CTableDescriptorHashSet *RemoveDuplicateMdids(
10271027
CMemoryPool *mp, CTableDescriptorHashSet *tabdescs);
10281028

1029+
// hash set from CTE ids
1030+
typedef CHashSet<ULONG, gpos::HashValue<ULONG>, gpos::Equals<ULONG>,
1031+
CleanupDelete<ULONG> >
1032+
UlongCteIdHashSet;
1033+
1034+
static void CollectConsumersAndProducers(CMemoryPool *mp,
1035+
CExpression *pexpr,
1036+
ULongPtrArray *cteConsumers,
1037+
UlongCteIdHashSet *cteProducerSet);
1038+
1039+
static BOOL hasUnpairedCTEConsumer(CMemoryPool *mp, CExpression *pexpr);
1040+
1041+
static BOOL FHasCrossSliceReplicatedCTEConsumer(CMemoryPool *mp,
1042+
CExpression *pexpr);
1043+
10291044
static CExpression *ReplaceColrefWithProjectExpr(CMemoryPool *mp,
10301045
CExpression *pexpr,
10311046
CColRef *pcolref,

src/backend/gporca/libgpopt/src/base/CUtils.cpp

Lines changed: 174 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -978,6 +978,180 @@ CUtils::FHasCTEAnchor(CExpression *pexpr)
978978
return false;
979979
}
980980

981+
// return CTEConsumers' and a set of CTEProducers' CTE ids in the given subtree
982+
void
983+
CUtils::CollectConsumersAndProducers(CMemoryPool *mp, CExpression *pexpr,
984+
ULongPtrArray *cteConsumers,
985+
UlongCteIdHashSet *cteProducerSet)
986+
{
987+
COperator *pop = pexpr->Pop();
988+
989+
if (COperator::EopPhysicalCTEConsumer == pop->Eopid())
990+
{
991+
cteConsumers->Append(GPOS_NEW(mp) ULONG(
992+
CPhysicalCTEConsumer::PopConvert(pop)->UlCTEId()));
993+
}
994+
else if (COperator::EopPhysicalCTEProducer == pop->Eopid())
995+
{
996+
cteProducerSet->Insert(GPOS_NEW(mp) ULONG(
997+
CPhysicalCTEProducer::PopConvert(pop)->UlCTEId()));
998+
}
999+
1000+
for (ULONG ul = 0; ul < pexpr->Arity(); ul++)
1001+
{
1002+
CExpression *pexprChild = (*pexpr)[ul];
1003+
1004+
if (!pexprChild->Pop()->FScalar())
1005+
{
1006+
CollectConsumersAndProducers(mp, pexprChild, cteConsumers,
1007+
cteProducerSet);
1008+
}
1009+
}
1010+
}
1011+
1012+
BOOL
1013+
CUtils::hasUnpairedCTEConsumer(CMemoryPool *mp, CExpression *pexpr)
1014+
{
1015+
BOOL hasUnpairedConsumer = false;
1016+
1017+
ULongPtrArray *cteConsumers = GPOS_NEW(mp) ULongPtrArray(mp);
1018+
UlongCteIdHashSet *cteProducerSet = GPOS_NEW(mp) UlongCteIdHashSet(mp);
1019+
1020+
CollectConsumersAndProducers(mp, pexpr, cteConsumers, cteProducerSet);
1021+
1022+
// check if every consumer's producer is in ProducerSet
1023+
for (ULONG ul = 0; ul < cteConsumers->Size(); ul++)
1024+
{
1025+
if (!cteProducerSet->Contains((*cteConsumers)[ul]))
1026+
{
1027+
hasUnpairedConsumer = true;
1028+
break;
1029+
}
1030+
}
1031+
cteConsumers->Release();
1032+
cteProducerSet->Release();
1033+
1034+
return hasUnpairedConsumer;
1035+
}
1036+
1037+
// True if the distribution is replicated-like.
1038+
static BOOL
1039+
FReplicatedLikeDistribution(CDistributionSpec::EDistributionType edt)
1040+
{
1041+
return (CDistributionSpec::EdtStrictReplicated == edt ||
1042+
CDistributionSpec::EdtTaintedReplicated == edt ||
1043+
CDistributionSpec::EdtUniversal == edt);
1044+
}
1045+
1046+
struct SCTEInfo
1047+
{
1048+
ULONG cteId;
1049+
ULONG sliceId;
1050+
1051+
SCTEInfo(ULONG cte_id, ULONG slice_id) : cteId(cte_id), sliceId(slice_id)
1052+
{
1053+
}
1054+
};
1055+
1056+
typedef CDynamicPtrArray<SCTEInfo, CleanupDelete<SCTEInfo> > CTEInfoArray;
1057+
1058+
// Walk the physical tree, recording the slice id of every replicated
1059+
// CTE Producer and every CTE Consumer. Slices are delimited by Motion
1060+
// nodes: each non-scalar child of a Motion lives in a fresh slice --
1061+
// same motId-stack idea as in apply_shareinput_xslice.
1062+
static void
1063+
CollectCTESlices(CMemoryPool *mp, CExpression *pexpr, ULONG curSlice,
1064+
ULONG *pNextSlice, CTEInfoArray *prodInfos,
1065+
CTEInfoArray *consInfos)
1066+
{
1067+
COperator *pop = pexpr->Pop();
1068+
1069+
if (COperator::EopPhysicalCTEProducer == pop->Eopid())
1070+
{
1071+
// Producer's distribution comes from its only child -- inspect
1072+
// it there. Skip non-replicated Producers; they cannot trigger
1073+
// the cross-slice issue we are checking for.
1074+
GPOS_ASSERT(1 == pexpr->Arity());
1075+
CExpression *pexprChild = (*pexpr)[0];
1076+
CDrvdPropPlan *pdpplan =
1077+
CDrvdPropPlan::Pdpplan(pexprChild->PdpDerive());
1078+
1079+
if (FReplicatedLikeDistribution(pdpplan->Pds()->Edt()))
1080+
{
1081+
prodInfos->Append(GPOS_NEW(mp) SCTEInfo(
1082+
CPhysicalCTEProducer::PopConvert(pop)->UlCTEId(), curSlice));
1083+
}
1084+
}
1085+
else if (COperator::EopPhysicalCTEConsumer == pop->Eopid())
1086+
{
1087+
// Consumer is a leaf -- record (cteId, curSlice) and let the
1088+
// caller decide later, once the whole tree has been walked.
1089+
consInfos->Append(GPOS_NEW(mp) SCTEInfo(
1090+
CPhysicalCTEConsumer::PopConvert(pop)->UlCTEId(), curSlice));
1091+
}
1092+
1093+
BOOL isMotion = CUtils::FPhysicalMotion(pop);
1094+
1095+
for (ULONG ul = 0; ul < pexpr->Arity(); ul++)
1096+
{
1097+
CExpression *pexprChild = (*pexpr)[ul];
1098+
1099+
if (pexprChild->Pop()->FScalar())
1100+
{
1101+
continue;
1102+
}
1103+
1104+
ULONG childSlice = curSlice;
1105+
if (isMotion)
1106+
{
1107+
(*pNextSlice)++;
1108+
childSlice = *pNextSlice;
1109+
}
1110+
1111+
CollectCTESlices(mp, pexprChild, childSlice, pNextSlice, prodInfos,
1112+
consInfos);
1113+
}
1114+
}
1115+
1116+
BOOL
1117+
CUtils::FHasCrossSliceReplicatedCTEConsumer(CMemoryPool *mp, CExpression *pexpr)
1118+
{
1119+
if (NULL == pexpr)
1120+
{
1121+
return false;
1122+
}
1123+
1124+
CTEInfoArray *prodInfos = GPOS_NEW(mp) CTEInfoArray(mp);
1125+
CTEInfoArray *consInfos = GPOS_NEW(mp) CTEInfoArray(mp);
1126+
ULONG nextSlice = 0;
1127+
1128+
CollectCTESlices(mp, pexpr, 0 /*curSlice*/, &nextSlice, prodInfos,
1129+
consInfos);
1130+
1131+
BOOL cross = false;
1132+
1133+
for (ULONG ic = 0; ic < consInfos->Size(); ic++)
1134+
{
1135+
SCTEInfo *cons = (*consInfos)[ic];
1136+
1137+
for (ULONG ip = 0; ip < prodInfos->Size(); ip++)
1138+
{
1139+
SCTEInfo *prod = (*prodInfos)[ip];
1140+
if (prod->cteId == cons->cteId && prod->sliceId != cons->sliceId)
1141+
{
1142+
cross = true;
1143+
goto lExit;
1144+
}
1145+
}
1146+
}
1147+
lExit:
1148+
1149+
prodInfos->Release();
1150+
consInfos->Release();
1151+
1152+
return cross;
1153+
}
1154+
9811155
//---------------------------------------------------------------------------
9821156
// @class:
9831157
// CUtils::FHasSubqueryOrApply

src/backend/gporca/libgpopt/src/translate/CTranslatorExprToDXL.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,20 @@ CTranslatorExprToDXL::PdxlnTranslate(CExpression *pexpr,
265265

266266
GPOS_ASSERT(nullptr == m_pdpplan);
267267

268+
// Walk the physical tree and detect a CTE Consumer placed on a
269+
// different slice than its Producer when the Producer's output is
270+
// replicated-like (StrictReplicated/TaintedReplicated/Universal).
271+
// Fall back to the Postgres optimizer if it is detected because
272+
// it breaks Producer-Consumer locality and can hang the
273+
// query at execution.
274+
if (CUtils::FHasCrossSliceReplicatedCTEConsumer(m_mp, pexpr))
275+
{
276+
GPOS_RAISE(
277+
gpdxl::ExmaDXL, gpdxl::ExmiExpr2DXLUnsupportedFeature,
278+
GPOS_WSZ_LIT(
279+
"CTE Consumer placed on a different slice than its replicated Producer"));
280+
}
281+
268282
m_pdpplan = CDrvdPropPlan::Pdpplan(pexpr->PdpDerive());
269283
m_pdpplan->AddRef();
270284

@@ -4250,6 +4264,10 @@ CTranslatorExprToDXL::BuildScalarSubplans(
42504264
{
42514265
const ULONG size = pdrgpcrInner->Size();
42524266

4267+
// Fallback to Postgres optimizer if the SubPlan's inner expression contains a
4268+
// CTE Consumer whose Producer lives outside this subtree. Such a Consumer
4269+
// would become a cross-slice Shared Scan reader without a local Producer,
4270+
// which can hang the query or fail at execution time.
42534271
CDXLNodeArray *pdrgpdxlnInner = GPOS_NEW(m_mp) CDXLNodeArray(m_mp);
42544272
for (ULONG ul = 0; ul < size; ul++)
42554273
{

src/test/regress/expected/qp_orca_fallback.out

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -293,6 +293,52 @@ SELECT * FROM jsonb_array_elements('["b", "a"]'::jsonb) WITH ORDINALITY;
293293
"a" | 2
294294
(2 rows)
295295

296+
-- ORCA should fallback in case there are any CTE Consumers beneath a duplicate-hazard motion
297+
-- start_ignore
298+
DROP TABLE IF EXISTS tbl1, tbl2;
299+
-- end_ignore
300+
CREATE TABLE tbl2 (
301+
id numeric NULL,
302+
refrcode varchar(255) NULL,
303+
referenceid numeric NULL
304+
)
305+
DISTRIBUTED REPLICATED;
306+
CREATE TABLE tbl1 (
307+
id bigserial NOT NULL,
308+
iscalctrg varchar(15) NOT NULL,
309+
iscalcdetail varchar(15) NULL
310+
)
311+
DISTRIBUTED REPLICATED;
312+
EXPLAIN WITH
313+
t1 AS (SELECT * FROM tbl1),
314+
t2 AS (SELECT id, refrcode FROM tbl2 WHERE REFERENCEID = 101991)
315+
SELECT p.*FROM t1 p
316+
JOIN t2 r
317+
ON p.isCalcTRG = r.RefrCode
318+
JOIN t2 r1
319+
ON p.isCalcDetail = r1.RefrCode
320+
LIMIT 1;
321+
QUERY PLAN
322+
---------------------------------------------------------------------------------------------
323+
Limit (cost=0.34..83.24 rows=1 width=104)
324+
-> Gather Motion 1:1 (slice1; segments: 1) (cost=0.34..581.83 rows=8 width=104)
325+
-> Hash Join (cost=0.34..581.69 rows=8 width=104)
326+
Hash Cond: ((tbl1.iscalcdetail)::text = (r1.refrcode)::text)
327+
-> Hash Join (cost=0.17..580.97 rows=130 width=104)
328+
Hash Cond: ((tbl1.iscalctrg)::text = (r.refrcode)::text)
329+
-> Seq Scan on tbl1 (cost=0.00..344.00 rows=24400 width=104)
330+
-> Hash (cost=0.11..0.11 rows=2 width=516)
331+
-> Subquery Scan on r (cost=0.00..0.11 rows=6 width=516)
332+
-> Seq Scan on tbl2 (cost=0.00..166.25 rows=6 width=548)
333+
Filter: (referenceid = '101991'::numeric)
334+
-> Hash (cost=0.11..0.11 rows=2 width=516)
335+
-> Subquery Scan on r1 (cost=0.00..0.11 rows=6 width=516)
336+
-> Seq Scan on tbl2 tbl2_1 (cost=0.00..166.25 rows=6 width=548)
337+
Filter: (referenceid = '101991'::numeric)
338+
Optimizer: Postgres query optimizer
339+
(16 rows)
340+
341+
DROP TABLE tbl1, tbl2;
296342
-- start_ignore
297343
-- FIXME: gpcheckcat fails due to mismatching distribution policy if this table isn't dropped
298344
-- Keep this table around once this is fixed

src/test/regress/expected/qp_orca_fallback_optimizer.out

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -353,6 +353,54 @@ DETAIL: Falling back to Postgres-based planner because GPORCA does not support
353353
"a" | 2
354354
(2 rows)
355355

356+
-- ORCA should fallback in case there are any CTE Consumers beneath a duplicate-hazard motion
357+
-- start_ignore
358+
DROP TABLE IF EXISTS tbl1, tbl2;
359+
-- end_ignore
360+
CREATE TABLE tbl2 (
361+
id numeric NULL,
362+
refrcode varchar(255) NULL,
363+
referenceid numeric NULL
364+
)
365+
DISTRIBUTED REPLICATED;
366+
CREATE TABLE tbl1 (
367+
id bigserial NOT NULL,
368+
iscalctrg varchar(15) NOT NULL,
369+
iscalcdetail varchar(15) NULL
370+
)
371+
DISTRIBUTED REPLICATED;
372+
EXPLAIN WITH
373+
t1 AS (SELECT * FROM tbl1),
374+
t2 AS (SELECT id, refrcode FROM tbl2 WHERE REFERENCEID = 101991)
375+
SELECT p.*FROM t1 p
376+
JOIN t2 r
377+
ON p.isCalcTRG = r.RefrCode
378+
JOIN t2 r1
379+
ON p.isCalcDetail = r1.RefrCode
380+
LIMIT 1;
381+
INFO: GPORCA failed to produce a plan, falling back to planner
382+
DETAIL: Feature not supported: CTE Consumer placed on a different slice than its replicated Producer
383+
QUERY PLAN
384+
---------------------------------------------------------------------------------------------
385+
Limit (cost=0.34..83.24 rows=1 width=104)
386+
-> Gather Motion 1:1 (slice1; segments: 1) (cost=0.34..581.83 rows=8 width=104)
387+
-> Hash Join (cost=0.34..581.69 rows=8 width=104)
388+
Hash Cond: ((tbl1.iscalcdetail)::text = (r1.refrcode)::text)
389+
-> Hash Join (cost=0.17..580.97 rows=130 width=104)
390+
Hash Cond: ((tbl1.iscalctrg)::text = (r.refrcode)::text)
391+
-> Seq Scan on tbl1 (cost=0.00..344.00 rows=24400 width=104)
392+
-> Hash (cost=0.11..0.11 rows=2 width=516)
393+
-> Subquery Scan on r (cost=0.00..0.11 rows=6 width=516)
394+
-> Seq Scan on tbl2 (cost=0.00..166.25 rows=6 width=548)
395+
Filter: (referenceid = '101991'::numeric)
396+
-> Hash (cost=0.11..0.11 rows=2 width=516)
397+
-> Subquery Scan on r1 (cost=0.00..0.11 rows=6 width=516)
398+
-> Seq Scan on tbl2 tbl2_1 (cost=0.00..166.25 rows=6 width=548)
399+
Filter: (referenceid = '101991'::numeric)
400+
Optimizer: Postgres query optimizer
401+
(16 rows)
402+
403+
DROP TABLE tbl1, tbl2;
356404
-- start_ignore
357405
-- FIXME: gpcheckcat fails due to mismatching distribution policy if this table isn't dropped
358406
-- Keep this table around once this is fixed

src/test/regress/expected/shared_scan.out

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,3 +234,31 @@ where
234234
Optimizer: Postgres query optimizer
235235
(37 rows)
236236

237+
-- ORCA should fallback when a CTE over a replicated table is referenced
238+
-- from multiple scalar subqueries.
239+
-- ss_t1 needs enough rows (40000) to push ORCA to the cross-slice plan;
240+
-- with fewer rows the bug does not manifest and the test would silently
241+
-- pass even without the fix.
242+
CREATE TABLE ss_t1 AS
243+
SELECT generate_series(1, 40000) id
244+
DISTRIBUTED BY (id);
245+
CREATE TABLE ss_t2 AS
246+
SELECT * FROM (VALUES (1, 10), (2, 20)) AS v(id, v)
247+
DISTRIBUTED REPLICATED;
248+
ANALYZE ss_t1;
249+
ANALYZE ss_t2;
250+
SET statement_timeout = '15s';
251+
WITH
252+
cte1 AS (SELECT v FROM ss_t2 WHERE id = 1),
253+
cte2 AS (SELECT v FROM ss_t2 WHERE id = 2)
254+
SELECT (SELECT v FROM cte1) + (SELECT v FROM cte2) +
255+
(SELECT v FROM cte1) + (SELECT v FROM cte2) AS result
256+
FROM ss_t1
257+
LIMIT 1;
258+
result
259+
--------
260+
60
261+
(1 row)
262+
263+
RESET statement_timeout;
264+
DROP TABLE ss_t1, ss_t2;

0 commit comments

Comments
 (0)