Skip to content

Commit e810621

Browse files
Richard Guokongfanshen
authored andcommitted
Fix right-semi-joins in HashJoin rescans
When resetting a HashJoin node for rescans, if it is a single-batch join and there are no parameter changes for the inner subnode, we can just reuse the existing hash table without rebuilding it. However, for join types that depend on the inner-tuple match flags in the hash table, we need to reset these match flags to avoid incorrect results. This applies to right, right-anti, right-semi, and full joins. When I introduced "Right Semi Join" plan shapes in aa86129, I failed to reset the match flags in the hash table for right-semi joins in rescans. This oversight has been shown to produce incorrect results. This patch fixes it. Author: Richard Guo Discussion: https://postgr.es/m/CAMbWs4-nQF9io2WL2SkD0eXvfPdyBc9Q=hRwfQHCGV2usa0jyA@mail.gmail.com (cherry picked from commit 5668a85)
1 parent 4c87ddf commit e810621

3 files changed

Lines changed: 97 additions & 3 deletions

File tree

src/backend/executor/nodeHashjoin.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1801,10 +1801,11 @@ ExecReScanHashJoin(HashJoinState *node)
18011801
/*
18021802
* Okay to reuse the hash table; needn't rescan inner, either.
18031803
*
1804-
* However, if it's a right/right-anti/full join, we'd better
1805-
* reset the inner-tuple match flags contained in the table.
1804+
* However, if it's a right/right-anti/right-semi/full join, we'd
1805+
* better reset the inner-tuple match flags contained in the
1806+
* table.
18061807
*/
1807-
if (HJ_FILL_INNER(node))
1808+
if (HJ_FILL_INNER(node) || node->js.jointype == JOIN_RIGHT_SEMI)
18081809
ExecHashTableResetMatchFlags(node->hj_HashTable);
18091810

18101811
/*

src/test/regress/expected/join.out

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3100,6 +3100,69 @@ where not exists (select 1 from tbl_ra t2 where t2.b = t1.a) and t1.b < 2;
31003100
reset enable_hashjoin;
31013101
reset enable_nestloop;
31023102
--
3103+
-- regression test for bug with hash-right-semi join
3104+
--
3105+
create temp table tbl_rs(a int, b int);
3106+
insert into tbl_rs select i, i from generate_series(1,10)i;
3107+
analyze tbl_rs;
3108+
set enable_nestloop to off;
3109+
set enable_hashagg to off;
3110+
-- ensure we get a hash right semi join with SubPlan in hash clauses
3111+
explain (costs off)
3112+
select * from tbl_rs t1
3113+
where (select a from tbl_rs t2
3114+
where exists (select 1 from
3115+
(select (b in (select b from tbl_rs t3)) as c from tbl_rs t4 where t4.a = 1) s
3116+
where c in (select t1.a = 1 from tbl_rs t5 union all select true))
3117+
order by a limit 1) >= 0;
3118+
QUERY PLAN
3119+
--------------------------------------------------------------------------------------
3120+
Seq Scan on tbl_rs t1
3121+
Filter: ((SubPlan 3) >= 0)
3122+
SubPlan 3
3123+
-> Limit
3124+
InitPlan 2
3125+
-> Hash Right Semi Join
3126+
Hash Cond: (((t1.a = 1)) = (ANY (t4.b = (hashed SubPlan 1).col1)))
3127+
-> Append
3128+
-> Seq Scan on tbl_rs t5
3129+
-> Result
3130+
-> Hash
3131+
-> Seq Scan on tbl_rs t4
3132+
Filter: (a = 1)
3133+
SubPlan 1
3134+
-> Seq Scan on tbl_rs t3
3135+
-> Sort
3136+
Sort Key: t2.a
3137+
-> Result
3138+
One-Time Filter: (InitPlan 2).col1
3139+
-> Seq Scan on tbl_rs t2
3140+
(20 rows)
3141+
3142+
-- and check we get the expected results
3143+
select * from tbl_rs t1
3144+
where (select a from tbl_rs t2
3145+
where exists (select 1 from
3146+
(select (b in (select b from tbl_rs t3)) as c from tbl_rs t4 where t4.a = 1) s
3147+
where c in (select t1.a = 1 from tbl_rs t5 union all select true))
3148+
order by a limit 1) >= 0;
3149+
a | b
3150+
----+----
3151+
1 | 1
3152+
2 | 2
3153+
3 | 3
3154+
4 | 4
3155+
5 | 5
3156+
6 | 6
3157+
7 | 7
3158+
8 | 8
3159+
9 | 9
3160+
10 | 10
3161+
(10 rows)
3162+
3163+
reset enable_nestloop;
3164+
reset enable_hashagg;
3165+
--
31033166
-- regression test for bug #13908 (hash join with skew tuples & nbatch increase)
31043167
--
31053168
-- start_ignore

src/test/regress/sql/join.sql

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -731,6 +731,36 @@ where not exists (select 1 from tbl_ra t2 where t2.b = t1.a) and t1.b < 2;
731731
reset enable_hashjoin;
732732
reset enable_nestloop;
733733

734+
--
735+
-- regression test for bug with hash-right-semi join
736+
--
737+
create temp table tbl_rs(a int, b int);
738+
insert into tbl_rs select i, i from generate_series(1,10)i;
739+
analyze tbl_rs;
740+
741+
set enable_nestloop to off;
742+
set enable_hashagg to off;
743+
744+
-- ensure we get a hash right semi join with SubPlan in hash clauses
745+
explain (costs off)
746+
select * from tbl_rs t1
747+
where (select a from tbl_rs t2
748+
where exists (select 1 from
749+
(select (b in (select b from tbl_rs t3)) as c from tbl_rs t4 where t4.a = 1) s
750+
where c in (select t1.a = 1 from tbl_rs t5 union all select true))
751+
order by a limit 1) >= 0;
752+
753+
-- and check we get the expected results
754+
select * from tbl_rs t1
755+
where (select a from tbl_rs t2
756+
where exists (select 1 from
757+
(select (b in (select b from tbl_rs t3)) as c from tbl_rs t4 where t4.a = 1) s
758+
where c in (select t1.a = 1 from tbl_rs t5 union all select true))
759+
order by a limit 1) >= 0;
760+
761+
reset enable_nestloop;
762+
reset enable_hashagg;
763+
734764
--
735765
-- regression test for bug #13908 (hash join with skew tuples & nbatch increase)
736766
--

0 commit comments

Comments
 (0)