Skip to content

Commit 5f33f44

Browse files
author
kongfanshen
committed
backport: Fix right-semi-joins in HashJoin rescans (PG18 5668a857d)
Cherry-pick of upstream PostgreSQL commit 5668a857d (Richard Guo). When resetting a HashJoin node for rescan, a single-batch join with no inner parameter changes reuses the existing hash table. Join types that depend on inner-tuple match flags must reset those flags first. The original aa86129e1 patch missed JOIN_RIGHT_SEMI here (HJ_FILL_INNER is false for right-semi joins), which could produce incorrect results on rescan. Reset the match flags for JOIN_RIGHT_SEMI as well. Cherry picked from commit 5668a857de4f3f12066b2bbc626b77be4fc95ee5.
1 parent 804c0c4 commit 5f33f44

3 files changed

Lines changed: 97 additions & 3 deletions

File tree

src/backend/executor/nodeHashjoin.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1801,10 +1801,11 @@ ExecReScanHashJoin(HashJoinState *node)
18011801
/*
18021802
* Okay to reuse the hash table; needn't rescan inner, either.
18031803
*
1804-
* However, if it's a right/right-anti/full join, we'd better
1805-
* reset the inner-tuple match flags contained in the table.
1804+
* However, if it's a right/right-anti/right-semi/full join, we'd
1805+
* better reset the inner-tuple match flags contained in the
1806+
* table.
18061807
*/
1807-
if (HJ_FILL_INNER(node))
1808+
if (HJ_FILL_INNER(node) || node->js.jointype == JOIN_RIGHT_SEMI)
18081809
ExecHashTableResetMatchFlags(node->hj_HashTable);
18091810

18101811
/*

src/test/regress/expected/join.out

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3100,6 +3100,69 @@ where not exists (select 1 from tbl_ra t2 where t2.b = t1.a) and t1.b < 2;
31003100
reset enable_hashjoin;
31013101
reset enable_nestloop;
31023102
--
3103+
-- regression test for bug with hash-right-semi join
3104+
--
3105+
create temp table tbl_rs(a int, b int);
3106+
insert into tbl_rs select i, i from generate_series(1,10)i;
3107+
analyze tbl_rs;
3108+
set enable_nestloop to off;
3109+
set enable_hashagg to off;
3110+
-- ensure we get a hash right semi join with SubPlan in hash clauses
3111+
explain (costs off)
3112+
select * from tbl_rs t1
3113+
where (select a from tbl_rs t2
3114+
where exists (select 1 from
3115+
(select (b in (select b from tbl_rs t3)) as c from tbl_rs t4 where t4.a = 1) s
3116+
where c in (select t1.a = 1 from tbl_rs t5 union all select true))
3117+
order by a limit 1) >= 0;
3118+
QUERY PLAN
3119+
--------------------------------------------------------------------------------------
3120+
Seq Scan on tbl_rs t1
3121+
Filter: ((SubPlan 3) >= 0)
3122+
SubPlan 3
3123+
-> Limit
3124+
InitPlan 2
3125+
-> Hash Right Semi Join
3126+
Hash Cond: (((t1.a = 1)) = (ANY (t4.b = (hashed SubPlan 1).col1)))
3127+
-> Append
3128+
-> Seq Scan on tbl_rs t5
3129+
-> Result
3130+
-> Hash
3131+
-> Seq Scan on tbl_rs t4
3132+
Filter: (a = 1)
3133+
SubPlan 1
3134+
-> Seq Scan on tbl_rs t3
3135+
-> Sort
3136+
Sort Key: t2.a
3137+
-> Result
3138+
One-Time Filter: (InitPlan 2).col1
3139+
-> Seq Scan on tbl_rs t2
3140+
(20 rows)
3141+
3142+
-- and check we get the expected results
3143+
select * from tbl_rs t1
3144+
where (select a from tbl_rs t2
3145+
where exists (select 1 from
3146+
(select (b in (select b from tbl_rs t3)) as c from tbl_rs t4 where t4.a = 1) s
3147+
where c in (select t1.a = 1 from tbl_rs t5 union all select true))
3148+
order by a limit 1) >= 0;
3149+
a | b
3150+
----+----
3151+
1 | 1
3152+
2 | 2
3153+
3 | 3
3154+
4 | 4
3155+
5 | 5
3156+
6 | 6
3157+
7 | 7
3158+
8 | 8
3159+
9 | 9
3160+
10 | 10
3161+
(10 rows)
3162+
3163+
reset enable_nestloop;
3164+
reset enable_hashagg;
3165+
--
31033166
-- regression test for bug #13908 (hash join with skew tuples & nbatch increase)
31043167
--
31053168
-- start_ignore

src/test/regress/sql/join.sql

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -731,6 +731,36 @@ where not exists (select 1 from tbl_ra t2 where t2.b = t1.a) and t1.b < 2;
731731
reset enable_hashjoin;
732732
reset enable_nestloop;
733733

734+
--
735+
-- regression test for bug with hash-right-semi join
736+
--
737+
create temp table tbl_rs(a int, b int);
738+
insert into tbl_rs select i, i from generate_series(1,10)i;
739+
analyze tbl_rs;
740+
741+
set enable_nestloop to off;
742+
set enable_hashagg to off;
743+
744+
-- ensure we get a hash right semi join with SubPlan in hash clauses
745+
explain (costs off)
746+
select * from tbl_rs t1
747+
where (select a from tbl_rs t2
748+
where exists (select 1 from
749+
(select (b in (select b from tbl_rs t3)) as c from tbl_rs t4 where t4.a = 1) s
750+
where c in (select t1.a = 1 from tbl_rs t5 union all select true))
751+
order by a limit 1) >= 0;
752+
753+
-- and check we get the expected results
754+
select * from tbl_rs t1
755+
where (select a from tbl_rs t2
756+
where exists (select 1 from
757+
(select (b in (select b from tbl_rs t3)) as c from tbl_rs t4 where t4.a = 1) s
758+
where c in (select t1.a = 1 from tbl_rs t5 union all select true))
759+
order by a limit 1) >= 0;
760+
761+
reset enable_nestloop;
762+
reset enable_hashagg;
763+
734764
--
735765
-- regression test for bug #13908 (hash join with skew tuples & nbatch increase)
736766
--

0 commit comments

Comments
 (0)