diff --git a/src/backend/optimizer/plan/planshare.c b/src/backend/optimizer/plan/planshare.c index f60f45f933e..f3b716364ca 100644 --- a/src/backend/optimizer/plan/planshare.c +++ b/src/backend/optimizer/plan/planshare.c @@ -52,6 +52,9 @@ make_shareinputscan(PlannerInfo *root, Plan *inputplan) sisc->scan.plan.plan_rows = inputplan->plan_rows; sisc->scan.plan.plan_width = inputplan->plan_width; + sisc->scan.plan.locustype = inputplan->locustype; + sisc->scan.plan.parallel = 0; /* No parallel ShareInputScan */ + return sisc; } diff --git a/src/test/regress/expected/cbdb_parallel.out b/src/test/regress/expected/cbdb_parallel.out index 942705e7471..da3216896ff 100644 --- a/src/test/regress/expected/cbdb_parallel.out +++ b/src/test/regress/expected/cbdb_parallel.out @@ -3516,7 +3516,59 @@ WHERE e.salary > ( David (2 rows) - +-- +-- Test https://github.com/apache/cloudberry/issues/1376 +-- +create table t1(a int, b int); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +create table t2 (like t1); +NOTICE: table doesn't have 'DISTRIBUTED BY' clause, defaulting to distribution columns from LIKE table +set gp_cte_sharing = on; +explain(locus, costs off) with x as + (select a, count(*) as b from t1 group by a union all + select a, count(*) as b from t2 group by a) + select count(*) from x a join x b on a.a = b.b; + QUERY PLAN +------------------------------------------------------------------------ + Finalize Aggregate + Locus: Entry + -> Gather Motion 3:1 (slice1; segments: 3) + Locus: Entry + -> Partial Aggregate + Locus: Hashed + -> Hash Join + Locus: Hashed + Hash Cond: (b.b = a.a) + -> Redistribute Motion 3:3 (slice2; segments: 3) + Locus: Hashed + Hash Key: b.b + -> Subquery Scan on b + Locus: Strewn + -> Shared Scan (share slice:id 2:0) + Locus: Hashed + -> Hash + Locus: Hashed + -> Subquery Scan on a + Locus: Hashed + -> Shared Scan (share slice:id 1:0) + Locus: Hashed + -> Append + Locus: Hashed + -> HashAggregate + Locus: Hashed + Group Key: t1.a + -> Seq Scan on t1 + Locus: Hashed + -> HashAggregate + Locus: Hashed + Group Key: t2.a + -> Seq Scan on t2 + Locus: Hashed + Optimizer: Postgres query optimizer +(35 rows) + +reset gp_cte_sharing; reset enable_parallel; reset min_parallel_table_scan_size; -- start_ignore diff --git a/src/test/regress/sql/cbdb_parallel.sql b/src/test/regress/sql/cbdb_parallel.sql index 0ee6f72cb2a..f9d01dd8a00 100644 --- a/src/test/regress/sql/cbdb_parallel.sql +++ b/src/test/regress/sql/cbdb_parallel.sql @@ -1132,7 +1132,20 @@ WHERE e.salary > ( SELECT AVG(salary) FROM employees WHERE department_id = e.department_id); - + +-- +-- Test https://github.com/apache/cloudberry/issues/1376 +-- +create table t1(a int, b int); +create table t2 (like t1); +set gp_cte_sharing = on; + +explain(locus, costs off) with x as + (select a, count(*) as b from t1 group by a union all + select a, count(*) as b from t2 group by a) + select count(*) from x a join x b on a.a = b.b; + +reset gp_cte_sharing; reset enable_parallel; reset min_parallel_table_scan_size;