Skip to content

Commit 8f978ca

Browse files
committed
Refactor statement splitter to a simplified stack-based architecture (fixes #845)
1 parent 897eb2d commit 8f978ca

4 files changed

Lines changed: 179 additions & 34 deletions

File tree

CHANGELOG

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
Development Version
22
-------------------
33

4-
Nothing yet.
4+
Bug Fixes
5+
6+
* Fix statement splitting (issue845).
57

68

79
Release 0.5.5 (Dec 19, 2025)

sqlparse/engine/statement_splitter.py

Lines changed: 85 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,10 @@ def __init__(self):
1616

1717
def _reset(self):
1818
"""Set the filter attributes to its default values"""
19-
self._in_declare = False
20-
self._in_case = False
19+
self._block_stack = []
20+
self._parenthesis_level = 0
21+
self._unconfirmed_start = None
2122
self._is_create = False
22-
self._begin_depth = 0
2323
self._seen_begin = False
2424

2525
self.consume_ws = False
@@ -29,37 +29,44 @@ def _reset(self):
2929
def _change_splitlevel(self, ttype, value):
3030
"""Get the new split level (increase, decrease or remain equal)"""
3131

32+
# Semicolon resets unconfirmed loop starters
33+
if ttype is T.Punctuation and value == ';':
34+
self._unconfirmed_start = None
35+
3236
# parenthesis increase/decrease a level
3337
if ttype is T.Punctuation and value == '(':
38+
self._parenthesis_level += 1
3439
return 1
3540
elif ttype is T.Punctuation and value == ')':
41+
self._parenthesis_level = max(0, self._parenthesis_level - 1)
3642
return -1
3743
elif ttype not in T.Keyword: # if normal token return
3844
return 0
3945

4046
# Everything after here is ttype = T.Keyword
41-
# Also to note, once entered an If statement you are done and basically
42-
# returning
4347
unified = value.upper()
4448

45-
# three keywords begin with CREATE, but only one of them is DDL
4649
# DDL Create though can contain more words such as "or replace"
4750
if ttype is T.Keyword.DDL and unified.startswith('CREATE'):
4851
self._is_create = True
4952
return 0
5053

51-
# can have nested declare inside of being...
52-
if unified == 'DECLARE' and self._is_create and self._begin_depth == 0:
53-
self._in_declare = True
54+
# Handle DECLARE block start (only for CREATE statements)
55+
if unified == 'DECLARE' and self._is_create and not self._block_stack:
56+
self._block_stack.append('DECLARE')
5457
return 1
5558

59+
# Handle BEGIN block start
5660
if unified == 'BEGIN':
57-
self._begin_depth += 1
5861
self._seen_begin = True
59-
if self._is_create:
60-
# FIXME(andi): This makes no sense. ## this comment neither
62+
# Transition DECLARE to BEGIN if present
63+
if self._block_stack and self._block_stack[-1] == 'DECLARE':
64+
self._block_stack.pop()
65+
self._block_stack.append('BEGIN')
66+
return 0
67+
else:
68+
self._block_stack.append('BEGIN')
6169
return 1
62-
return 0
6370

6471
# Issue826: If we see a transaction keyword after BEGIN,
6572
# it's a transaction statement, not a block.
@@ -68,28 +75,72 @@ def _change_splitlevel(self, ttype, value):
6875
unified in ('TRANSACTION', 'WORK', 'TRAN',
6976
'DISTRIBUTED', 'DEFERRED',
7077
'IMMEDIATE', 'EXCLUSIVE'):
71-
self._begin_depth = max(0, self._begin_depth - 1)
7278
self._seen_begin = False
79+
if self._block_stack and self._block_stack[-1] == 'BEGIN':
80+
self._block_stack.pop()
81+
return -1
7382
return 0
7483

75-
# BEGIN and CASE/WHEN both end with END
76-
if unified == 'END':
77-
if not self._in_case:
78-
self._begin_depth = max(0, self._begin_depth - 1)
79-
else:
80-
self._in_case = False
81-
return -1
82-
83-
if (unified in ('IF', 'FOR', 'WHILE', 'CASE')
84-
and self._is_create and self._begin_depth > 0):
85-
if unified == 'CASE':
86-
self._in_case = True
87-
return 1
84+
# Inside a block, check for nested loop or control structures
85+
if 'BEGIN' in self._block_stack:
86+
if unified == 'FOR':
87+
self._unconfirmed_start = 'FOR'
88+
return 0
89+
elif unified == 'WHILE':
90+
self._unconfirmed_start = 'WHILE'
91+
return 0
92+
elif unified == 'LOOP':
93+
if self._unconfirmed_start in ('FOR', 'WHILE'):
94+
self._block_stack.append(self._unconfirmed_start)
95+
self._unconfirmed_start = None
96+
return 1
97+
else:
98+
self._block_stack.append('LOOP')
99+
return 1
100+
elif unified == 'DO':
101+
if self._unconfirmed_start in ('FOR', 'WHILE'):
102+
self._block_stack.append(self._unconfirmed_start)
103+
self._unconfirmed_start = None
104+
return 1
105+
elif unified == 'IF':
106+
self._block_stack.append('IF')
107+
return 1
108+
elif unified == 'CASE':
109+
self._block_stack.append('CASE')
110+
return 1
88111

89-
if unified in ('END IF', 'END FOR', 'END WHILE'):
90-
return -1
112+
# Handle closing keywords
113+
if unified == 'END IF':
114+
if self._block_stack and self._block_stack[-1] == 'IF':
115+
self._block_stack.pop()
116+
return -1
117+
elif unified == 'END FOR':
118+
if self._block_stack and self._block_stack[-1] == 'FOR':
119+
self._block_stack.pop()
120+
return -1
121+
elif unified == 'END WHILE':
122+
if self._block_stack and self._block_stack[-1] == 'WHILE':
123+
self._block_stack.pop()
124+
return -1
125+
elif unified == 'END LOOP':
126+
if self._block_stack and self._block_stack[-1] in ('LOOP', 'FOR', 'WHILE'):
127+
self._block_stack.pop()
128+
return -1
129+
elif unified == 'END CASE':
130+
if self._block_stack and self._block_stack[-1] == 'CASE':
131+
self._block_stack.pop()
132+
return -1
133+
elif unified == 'END':
134+
if self._block_stack:
135+
if self._block_stack[-1] in ('CASE', 'BEGIN'):
136+
self._block_stack.pop()
137+
return -1
138+
else:
139+
self._block_stack.pop()
140+
return -1
141+
else:
142+
return -1
91143

92-
# Default
93144
return 0
94145

95146
def process(self, stream):
@@ -125,10 +176,12 @@ def process(self, stream):
125176
# If we just saw BEGIN; then this is a transaction BEGIN,
126177
# not a BEGIN...END block, so decrement depth
127178
if self._seen_begin:
128-
self._begin_depth = max(0, self._begin_depth - 1)
179+
if self._block_stack and self._block_stack[-1] == 'BEGIN':
180+
self._block_stack.pop()
181+
self.level = max(0, self.level - 1)
129182
self._seen_begin = False
130183
# Split on semicolon if not inside a BEGIN...END block
131-
if self.level <= 0 and self._begin_depth == 0:
184+
if self.level <= 0 and 'BEGIN' not in self._block_stack:
132185
self.consume_ws = True
133186
elif ttype is T.Keyword and value.split()[0] == 'GO':
134187
self.consume_ws = True

sqlparse/keywords.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@
6969
(r'(?<![\w\])])(\[[^\]\[]+\])', tokens.Name),
7070
(r'((LEFT\s+|RIGHT\s+|FULL\s+)?(INNER\s+|OUTER\s+|STRAIGHT\s+)?'
7171
r'|(CROSS\s+|NATURAL\s+)?)?JOIN\b', tokens.Keyword),
72-
(r'END(\s+IF|\s+LOOP|\s+WHILE)?\b', tokens.Keyword),
72+
(r'END(\s+IF|\s+LOOP|\s+WHILE|\s+FOR|\s+CASE)?\b', tokens.Keyword),
7373
(r'IF\s+(NOT\s+)?EXISTS\b', tokens.Keyword),
7474
(r'NOT\s+NULL\b', tokens.Keyword),
7575
(r'(ASC|DESC)(\s+NULLS\s+(FIRST|LAST))?\b', tokens.Keyword.Order),

tests/test_split.py

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -284,3 +284,93 @@ def test_split_begin_transaction_formatted(): # issue826
284284
assert stmts[1].startswith('DELETE')
285285
assert stmts[2].startswith('INSERT')
286286
assert stmts[3] == 'END\nTRANSACTION;'
287+
288+
289+
def test_split_anonymous_begin_end_for(): # issue845 Case 1
290+
sql = """
291+
BEGIN
292+
SELECT 1;
293+
FOR R DO
294+
SELECT 1;
295+
END FOR;
296+
END;
297+
"""
298+
stmts = sqlparse.split(sql)
299+
assert len(stmts) == 1
300+
assert "END FOR;" in stmts[0]
301+
302+
303+
def test_split_anonymous_begin_end_case_inline(): # issue845 Case 2
304+
sql = """
305+
BEGIN
306+
SELECT 1;
307+
IF 1 THEN
308+
SELECT CASE WHEN 1 THEN 2 ELSE 3 END AS COUNT;
309+
ELSE
310+
SELECT 2;
311+
END IF;
312+
END;
313+
"""
314+
stmts = sqlparse.split(sql)
315+
assert len(stmts) == 1
316+
assert "END AS COUNT;" in stmts[0]
317+
318+
319+
def test_split_for_update_in_begin_end():
320+
# Verify that FOR UPDATE / FOR SHARE inside a BEGIN ... END block do not break level balancing
321+
sql = """
322+
BEGIN
323+
SELECT * FROM foo FOR UPDATE;
324+
SELECT * FROM bar FOR SHARE;
325+
END;
326+
SELECT 3;
327+
"""
328+
stmts = sqlparse.split(sql)
329+
assert len(stmts) == 2
330+
assert "SELECT 3;" in stmts[1]
331+
332+
333+
def test_split_multiple_for_loops_in_begin_end():
334+
# Verify that multiple sequential loops inside a BEGIN ... END block balance correctly
335+
sql = """
336+
BEGIN
337+
FOR x IN select_query LOOP
338+
SELECT 1;
339+
END LOOP;
340+
FOR y IN select_query LOOP
341+
SELECT 2;
342+
END LOOP;
343+
END;
344+
SELECT 3;
345+
"""
346+
stmts = sqlparse.split(sql)
347+
assert len(stmts) == 2
348+
assert "SELECT 3;" in stmts[1]
349+
350+
351+
def test_split_procedural_case_end_case():
352+
# Verify that CASE closed by END CASE inside a BEGIN block balances correctly
353+
sql = """
354+
BEGIN
355+
CASE val
356+
WHEN 1 THEN SELECT 'one';
357+
WHEN 2 THEN SELECT 'two';
358+
ELSE SELECT 'other';
359+
END CASE;
360+
END;
361+
SELECT 3;
362+
"""
363+
stmts = sqlparse.split(sql)
364+
assert len(stmts) == 2
365+
assert "SELECT 3;" in stmts[1]
366+
367+
368+
def test_split_standalone_for_update():
369+
# Verify that standalone FOR UPDATE statements split correctly
370+
sql = "SELECT * FROM foo FOR UPDATE; SELECT 3;"
371+
stmts = sqlparse.split(sql)
372+
assert len(stmts) == 2
373+
assert stmts[0] == "SELECT * FROM foo FOR UPDATE;"
374+
assert stmts[1] == "SELECT 3;"
375+
376+

0 commit comments

Comments
 (0)