Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 6 additions & 5 deletions gpopt/translate/CTranslatorQueryToDXL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3269,11 +3269,12 @@ CTranslatorQueryToDXL::TranslateFromClauseToDXL(Node *node)

/* forceDistRandom is GPDB-only */

if (rte->lateral)
{
GPOS_RAISE(gpdxl::ExmaDXL, gpdxl::ExmiQuery2DXLUnsupportedFeature,
GPOS_WSZ_LIT("LATERAL"));
}
// LATERAL is supported. Outer references from a LATERAL RTE's inner
// query resolve against the parent's CMappingVarColId (set up by the
// translator ctor) via varlevelsup; the optimizer side requires the
// commutativity guard in CXformInnerJoinCommutativity and the
// outer-refs-as-PARAM_EXEC plumbing in PdxlnNLJoin / PdxlnHashJoin to
// execute correlated joins correctly.

if (rte->funcordinality)
{
Expand Down
8 changes: 8 additions & 0 deletions libgpopt/include/gpopt/xforms/CJoinOrderDPv2.h
Original file line number Diff line number Diff line change
Expand Up @@ -441,6 +441,14 @@ class CJoinOrderDPv2 : public CJoinOrder,
// outer references, if any
CColRefSet *m_outer_refs;

// LATERAL-style sibling requirements: m_atom_sibling_required[i] is the
// bitset of atom indices whose output columns atom i references. An atom
// can only appear in a join subset that also contains all of its
// requirements; otherwise the subset is unexecutable (the atom's outer
// refs would be unbound). Populated in the constructor and consulted in
// GetJoinExpr.
CBitSetArray *m_atom_sibling_required;

CMemoryPool *m_mp;

SLevelInfo *
Expand Down
16 changes: 11 additions & 5 deletions libgpopt/include/gpopt/xforms/CXformApply2Join.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,17 @@ class CXformApply2Join : public CXformExploration
return;
}

TApply *popApply = TApply::PopConvert(pexprApply->Pop());
CColRefArray *colref_array = popApply->PdrgPcrInner();
if (nullptr == colref_array)
{
// Apply created from LATERAL (or any non-scalar-subquery source)
// has no inner scalar colref; the correlated-apply form built
// here is scalar-subquery-shaped and does not apply. Skip.
return;
}
GPOS_ASSERT(1 == colref_array->Size());

CExpression *pexprInner = (*pexprApply)[1];
CExpression *pexprOuter = (*pexprApply)[0];
CExpression *pexprScalar = (*pexprApply)[2];
Expand All @@ -77,11 +88,6 @@ class CXformApply2Join : public CXformExploration
pexprScalar->AddRef();
CExpression *pexprResult = nullptr;

TApply *popApply = TApply::PopConvert(pexprApply->Pop());
CColRefArray *colref_array = popApply->PdrgPcrInner();
GPOS_ASSERT(nullptr != colref_array);
GPOS_ASSERT(1 == colref_array->Size());

colref_array->AddRef();
COperator::EOperatorId eopidSubq = popApply->EopidOriginSubq();
COperator::EOperatorId op_id = pexprApply->Pop()->Eopid();
Expand Down
7 changes: 1 addition & 6 deletions libgpopt/include/gpopt/xforms/CXformInnerJoinCommutativity.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,12 +57,7 @@ class CXformInnerJoinCommutativity : public CXformExploration
BOOL FCompatible(CXform::EXformId exfid) override;

// compute xform promise for a given expression handle
EXformPromise
Exfp(CExpressionHandle & // exprhdl
) const override
{
return CXform::ExfpHigh;
}
EXformPromise Exfp(CExpressionHandle &exprhdl) const override;

// actual transform
void Transform(CXformContext *pxfctxt, CXformResult *pxfres,
Expand Down
7 changes: 4 additions & 3 deletions libgpopt/src/base/CUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4928,7 +4928,8 @@ CUtils::PexprMatchEqualityOrINDF(
CExpression *
CUtils::MakeJoinWithoutInferredPreds(CMemoryPool *mp, CExpression *join_expr)
{
GPOS_ASSERT(COperator::EopLogicalInnerJoin == join_expr->Pop()->Eopid());
GPOS_ASSERT(COperator::EopLogicalInnerJoin == join_expr->Pop()->Eopid() ||
COperator::EopLogicalLeftOuterJoin == join_expr->Pop()->Eopid());

CExpressionHandle expression_handle(mp);
expression_handle.Attach(join_expr);
Expand Down Expand Up @@ -5047,11 +5048,11 @@ CUtils::Equals(const IMDId *mdid, const IMDId *other_mdid)
}

// operators from which the inferred predicates can be removed
// NB: currently, only inner join is included, but we can add more later.
BOOL
CUtils::CanRemoveInferredPredicates(COperator::EOperatorId op_id)
{
return op_id == COperator::EopLogicalInnerJoin;
return op_id == COperator::EopLogicalInnerJoin ||
op_id == COperator::EopLogicalLeftOuterJoin;
}

CExpressionArrays *
Expand Down
17 changes: 17 additions & 0 deletions libgpopt/src/operators/CExpressionPreprocessor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2486,6 +2486,23 @@ CExpressionPreprocessor::PexprPruneUnusedComputedColsRecursive(
CExpressionArray *pdrgpexpr = GPOS_NEW(mp) CExpressionArray(mp);
const ULONG ulChildren = pexpr->Arity();

// Sibling-correlated references: a relational child (e.g. the inner side
// of an Apply or a LATERAL-style Join) may reference columns produced by
// another relational child. Those references look like outer references
// to the inner child but are satisfied by the sibling, so they must be
// kept in pcrsReqd to prevent pruning the producing sibling's project
// list. Without this, derived-table computed columns (LATERAL inner ref
// to `a.val*2 AS dv`) get dropped from the outer side and leave dangling
// CScalarIdent in the inner predicate.
for (ULONG ul = 0; ul < ulChildren; ul++)
{
CExpression *pexprChild = (*pexpr)[ul];
if (pexprChild->Pop()->FLogical())
{
pcrsReqd->Include(pexprChild->DeriveOuterReferences());
}
}

for (ULONG ul = 0; ul < ulChildren; ul++)
{
CExpression *pexprChild =
Expand Down
8 changes: 8 additions & 0 deletions libgpopt/src/operators/CLogicalInnerApply.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,14 @@ CLogicalInnerApply::PopCopyWithRemappedColumns(CMemoryPool *mp,
UlongToColRefMap *colref_mapping,
BOOL must_exist)
{
if (nullptr == m_pdrgpcrInner)
{
// LATERAL-derived Apply has no inner scalar colref. The 2-arg ctor
// asserts pdrgpcrInner is non-null+non-empty, so use the 1-arg
// form which preserves the nullptr.
return GPOS_NEW(mp) CLogicalInnerApply(mp);
}

CColRefArray *pdrgpcrInner =
CUtils::PdrgpcrRemap(mp, m_pdrgpcrInner, colref_mapping, must_exist);

Expand Down
7 changes: 7 additions & 0 deletions libgpopt/src/operators/CLogicalLeftOuterApply.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,13 @@ COperator *
CLogicalLeftOuterApply::PopCopyWithRemappedColumns(
CMemoryPool *mp, UlongToColRefMap *colref_mapping, BOOL must_exist)
{
if (nullptr == m_pdrgpcrInner)
{
// LATERAL-derived Apply: no inner scalar colref. Use the 1-arg ctor
// (the 2-arg form asserts pdrgpcrInner is non-null+non-empty).
return GPOS_NEW(mp) CLogicalLeftOuterApply(mp);
}

CColRefArray *pdrgpcrInner =
CUtils::PdrgpcrRemap(mp, m_pdrgpcrInner, colref_mapping, must_exist);

Expand Down
38 changes: 38 additions & 0 deletions libgpopt/src/translate/CTranslatorDXLToExpr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,9 @@
#include "gpopt/operators/CLogicalForeignGet.h"
#include "gpopt/operators/CLogicalGbAgg.h"
#include "gpopt/operators/CLogicalGet.h"
#include "gpopt/operators/CLogicalInnerApply.h"
#include "gpopt/operators/CLogicalInsert.h"
#include "gpopt/operators/CLogicalLeftOuterApply.h"
#include "gpopt/operators/CLogicalIntersect.h"
#include "gpopt/operators/CLogicalIntersectAll.h"
#include "gpopt/operators/CLogicalLimit.h"
Expand Down Expand Up @@ -2094,6 +2096,42 @@ CTranslatorDXLToExpr::PexprLogicalJoin(const CDXLNode *dxlnode)
// get the scalar condition and then translate it
CDXLNode *pdxlnCond = (*dxlnode)[ulChildCount - 1];
CExpression *pexprCond = PexprScalar(pdxlnCond);

// LATERAL fast-path: when the right child's top operator is a
// CLogicalSelect whose predicate references the left child's output
// columns, rebuild the join as CLogicalApply. The standard
// CXformInnerApply2InnerJoin / CXformLeftOuterApply2LeftOuterJoin path
// then pulls the correlated equi predicate out of the Select and lowers
// the result to a plain CLogicalInnerJoin / CLogicalLeftOuterJoin —
// which downstream cost-picks HashJoin (matching PG's lateral pullup).
// Other LATERAL shapes (TVF args, ConstTableGet, Limit, Sort) stay on
// the plain-Join path and rely on the commutativity guard.
if ((EdxljtInner == join_type || EdxljtLeft == join_type) &&
2 == pdrgpexprChildren->Size())
{
CExpression *pexprLeft = (*pdrgpexprChildren)[0];
CExpression *pexprRight = (*pdrgpexprChildren)[1];
if (COperator::EopLogicalSelect == pexprRight->Pop()->Eopid())
{
CColRefSet *pcrsRightOuterRefs =
pexprRight->DeriveOuterReferences();
CColRefSet *pcrsLeftOutput = pexprLeft->DeriveOutputColumns();
if (!pcrsRightOuterRefs->IsDisjoint(pcrsLeftOutput))
{
pexprLeft->AddRef();
pexprRight->AddRef();
pdrgpexprChildren->Release();
if (EdxljtInner == join_type)
{
return CUtils::PexprLogicalApply<CLogicalInnerApply>(
m_mp, pexprLeft, pexprRight, pexprCond);
}
return CUtils::PexprLogicalApply<CLogicalLeftOuterApply>(
m_mp, pexprLeft, pexprRight, pexprCond);
}
}
}

pdrgpexprChildren->Append(pexprCond);

return CUtils::PexprLogicalJoin(m_mp, join_type, pdrgpexprChildren);
Expand Down
57 changes: 41 additions & 16 deletions libgpopt/src/translate/CTranslatorExprToDXL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4513,22 +4513,11 @@ CTranslatorExprToDXL::PdxlnNLJoin(CExpression *pexprInnerNLJ,
CExpression *pexprScalar = (*pexprInnerNLJ)[2];


#ifdef GPOS_DEBUG
// Allow outer refs in inner child when:
// (a) it's an index NLJ (outer refs are explicit and expected), or
// (b) inner child is PartitionSelector for DPE NLJ (outer refs are
// partition-key predicates referencing the probe child).
GPOS_ASSERT_IMP(
COperator::EopPhysicalInnerIndexNLJoin != pop->Eopid() &&
COperator::EopPhysicalLeftOuterIndexNLJoin != pop->Eopid() &&
COperator::EopPhysicalLeftSemiIndexNLJoin != pop->Eopid() &&
COperator::EopPhysicalLeftAntiSemiIndexNLJoin != pop->Eopid() &&
COperator::EopPhysicalPartitionSelector !=
pexprInnerChild->Pop()->Eopid(),
pexprInnerChild->DeriveOuterReferences()->IsDisjoint(
pexprOuterChild->DeriveOutputColumns()) &&
"detected outer references in NL inner child");
#endif // GPOS_DEBUG
// Outer refs in NL inner child are legal when handled below: (a) explicit
// index NLJ flavors, (b) DPE PartitionSelector, or (c) general LATERAL /
// correlated NL where the inner side references outer's output columns —
// in all three cases the refs are bound to PARAM_EXEC slots via the
// nest-params machinery in PdxlnNLJoin.

EdxlJoinType join_type = EdxljtSentinel;
BOOL is_index_nlj = false;
Expand Down Expand Up @@ -4591,6 +4580,42 @@ CTranslatorExprToDXL::PdxlnNLJoin(CExpression *pexprInnerNLJ,
GPOS_ASSERT(!"Invalid join type");
}

// General correlated NLJ (LATERAL, decorrelated subquery, or any case where
// ORCA picked plain CPhysicalInnerNLJoin / LeftOuterNLJoin with outer
// references in the inner subtree pointing back at the outer child's
// output columns). Treat as an index NLJ so the refs are bound via
// PARAM_EXEC nest params, matching the IndexNLJ code path below.
if (!is_index_nlj &&
COperator::EopPhysicalPartitionSelector !=
pexprInnerChild->Pop()->Eopid())
{
CColRefSet *pcrsInnerOuterRefs =
pexprInnerChild->DeriveOuterReferences();
CColRefSet *pcrsOuterOutput = pexprOuterChild->DeriveOutputColumns();
if (!pcrsInnerOuterRefs->IsDisjoint(pcrsOuterOutput))
{
CColRefSet *pcrsIntersect =
GPOS_NEW(m_mp) CColRefSet(m_mp, *pcrsInnerOuterRefs);
pcrsIntersect->Intersection(pcrsOuterOutput);
outer_refs = pcrsIntersect->Pdrgpcr(m_mp);
pcrsIntersect->Release();
is_index_nlj = true;
// Pre-populate the ident map so PdxlnScalar resolves these outer
// refs while emitting scalar DXL inside the inner subtree.
for (ULONG ul = 0; ul < outer_refs->Size(); ul++)
{
CColRef *pcr = (*outer_refs)[ul];
if (nullptr == m_phmcrdxlnIndexLookup->Find(pcr))
{
CDXLNode *dxlnode = CTranslatorExprToDXLUtils::PdxlnIdent(
m_mp, m_phmcrdxln, m_phmcrdxlnIndexLookup,
m_phmcrulPartColId, pcr);
m_phmcrdxlnIndexLookup->Insert(pcr, dxlnode);
}
}
}
}

// DPE NLJ: inner child is PartitionSelector wrapping AppendTableScan.
// Treat this like an index NLJ so outer (probe) column refs in the
// PartitionSelector filter are passed as PARAM_EXEC to the inner side,
Expand Down
74 changes: 73 additions & 1 deletion libgpopt/src/xforms/CJoinOrderDPv2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,8 @@ CJoinOrderDPv2::CJoinOrderDPv2(CMemoryPool *mp,
m_child_pred_indexes(childPredIndexes),
m_non_inner_join_dependencies(nullptr),
m_cross_prod_penalty(GPOPT_DPV2_CROSS_JOIN_DEFAULT_PENALTY),
m_outer_refs(outerRefs)
m_outer_refs(outerRefs),
m_atom_sibling_required(nullptr)
{
m_join_levels = GPOS_NEW(mp) DPv2Levels(mp, m_ulComps + 1);
// populate levels array with n+1 levels for an n-way join
Expand Down Expand Up @@ -136,6 +137,48 @@ CJoinOrderDPv2::CJoinOrderDPv2(CMemoryPool *mp,
}
}
PopulateExpressionToEdgeMapIfNeeded();

// Precompute per-atom sibling requirements. An atom's outer references
// that are not in m_outer_refs (those would propagate up to the parent of
// the NAryJoin) must be supplied by another atom in the NAryJoin. If
// atom j produces such a column, j is required whenever atom i appears in
// a join subset. This keeps join enumeration from forming subsets like
// {x, lateral_ref_to_y} that would leave the LATERAL's outer-ref unbound
// when the partial join expression is evaluated.
m_atom_sibling_required = GPOS_NEW(mp) CBitSetArray(mp, m_ulComps);
for (ULONG i = 0; i < m_ulComps; i++)
{
m_atom_sibling_required->Append(GPOS_NEW(mp) CBitSet(mp));
}
for (ULONG i = 0; i < m_ulComps; i++)
{
CExpression *pexpr_i = m_rgpcomp[i]->m_pexpr;
CColRefSet *outer_refs_i = pexpr_i->DeriveOuterReferences();
if (outer_refs_i->IsDisjoint(m_outer_refs) &&
0 == outer_refs_i->Size())
{
continue;
}
CColRefSet *sibling_refs = GPOS_NEW(mp) CColRefSet(mp, *outer_refs_i);
sibling_refs->Difference(m_outer_refs);
if (0 < sibling_refs->Size())
{
for (ULONG j = 0; j < m_ulComps; j++)
{
if (i == j)
{
continue;
}
CColRefSet *output_j =
m_rgpcomp[j]->m_pexpr->DeriveOutputColumns();
if (!sibling_refs->IsDisjoint(output_j))
{
(*m_atom_sibling_required)[i]->ExchangeSet(j);
}
}
}
sibling_refs->Release();
}
}


Expand All @@ -159,6 +202,7 @@ CJoinOrderDPv2::~CJoinOrderDPv2()
m_top_k_expressions->Release();
m_top_k_part_expressions->Release();
m_join_levels->Release();
CRefCount::SafeRelease(m_atom_sibling_required);
m_on_pred_conjuncts->Release();
m_outer_refs->Release();
}
Expand Down Expand Up @@ -339,6 +383,34 @@ CJoinOrderDPv2::GetJoinExpr(const SGroupAndExpression &left_child_expr,
SGroupInfo *right_group_info = right_child_expr.m_group_info;
SExpressionInfo *right_expr_info = right_child_expr.GetExprInfo();

// LATERAL sibling visibility: every atom in the combined subset must have
// all of its required-sibling atoms already present. Otherwise the
// combined expression has an atom whose outer references reach an atom
// that's not yet in the join, and the resulting partial plan would leave
// those refs unbound at execution time.
if (nullptr != m_atom_sibling_required)
{
CBitSet *combined_atoms =
GPOS_NEW(m_mp) CBitSet(m_mp, *left_group_info->m_atoms);
combined_atoms->Union(right_group_info->m_atoms);
CBitSetIter iter(*combined_atoms);
BOOL valid = true;
while (valid && iter.Advance())
{
ULONG atom_id = iter.Bit();
CBitSet *required = (*m_atom_sibling_required)[atom_id];
if (0 < required->Size() && !combined_atoms->ContainsAll(required))
{
valid = false;
}
}
combined_atoms->Release();
if (!valid)
{
return nullptr;
}
}

CExpression *scalar_expr = nullptr;
CBitSet *required_on_left = nullptr;
BOOL isLOJ =
Expand Down
Loading
Loading