Skip to content

Commit d189802

Browse files
declan-scaleclaude
andcommitted
fix(migrations): tighten transaction-nesting + add in-band-backfill rule
Address greptile review on PR #225: - transaction-nesting now scopes per call site (indent-based span detection) rather than short-circuiting on a file-level autocommit_block presence check. A migration that wraps one CREATE INDEX CONCURRENTLY in autocommit_block() but leaves a sibling call outside no longer slips past the linter. - in-band-backfill rule restored: flags op.execute() calls whose SQL contains UPDATE / DELETE FROM. The PR description claimed this rule existed but the implementation didn't; now it does, and it's added to _RULES. - Drop unused ENV_PY constant. Adds three unit tests: mixed concurrent-index scenario, UPDATE backfill, and DELETE FROM backfill. SELECT remains unflagged. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent cb37cc2 commit d189802

2 files changed

Lines changed: 167 additions & 18 deletions

File tree

agentex/scripts/ci_tools/migration_lint.py

Lines changed: 93 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,6 @@
8080

8181
REPO_ROOT = Path(__file__).resolve().parents[2]
8282
MIGRATIONS_DIR = REPO_ROOT / "database" / "migrations" / "alembic" / "versions"
83-
ENV_PY = REPO_ROOT / "database" / "migrations" / "alembic" / "env.py"
8483
ESCAPE_HATCH_ENV_VAR = "MIGRATION_UNSAFE_ACK"
8584

8685

@@ -127,6 +126,11 @@ def format(self) -> str:
127126
r"(lock_timeout|statement_timeout|idle_in_transaction_session_timeout)\b",
128127
re.IGNORECASE,
129128
)
129+
_DATA_BACKFILL = re.compile(
130+
r"\b(?:UPDATE\s+\w|DELETE\s+FROM\b)",
131+
re.IGNORECASE,
132+
)
133+
_AUTOCOMMIT_OPENER = re.compile(r"\bwith\b[^#\n]*\bautocommit_block\s*\(")
130134

131135

132136
def _slice_call(source: str, start: int) -> str:
@@ -281,24 +285,67 @@ def _check_adding_required_field(path: Path, source: str) -> Iterable[Finding]:
281285
)
282286

283287

288+
def _autocommit_spans(source: str) -> list[tuple[int, int]]:
289+
"""Return (start, end) byte-offset ranges enclosed by ``with ... autocommit_block():`` blocks.
290+
291+
Computed from indentation: a ``with`` line introducing ``autocommit_block()``
292+
opens a block; the block extends as long as subsequent non-blank lines are
293+
indented strictly more than the opener.
294+
"""
295+
lines = source.splitlines(keepends=True)
296+
line_starts: list[int] = []
297+
pos = 0
298+
for ln in lines:
299+
line_starts.append(pos)
300+
pos += len(ln)
301+
end_of_file = pos
302+
303+
spans: list[tuple[int, int]] = []
304+
for i, line in enumerate(lines):
305+
if not _AUTOCOMMIT_OPENER.search(line):
306+
continue
307+
opener_indent = len(line) - len(line.lstrip(" \t"))
308+
body_start = line_starts[i + 1] if i + 1 < len(lines) else end_of_file
309+
body_end = body_start
310+
for j in range(i + 1, len(lines)):
311+
li = lines[j]
312+
if not li.strip():
313+
body_end = line_starts[j + 1] if j + 1 < len(lines) else end_of_file
314+
continue
315+
indent = len(li) - len(li.lstrip(" \t"))
316+
if indent <= opener_indent:
317+
break
318+
body_end = line_starts[j + 1] if j + 1 < len(lines) else end_of_file
319+
spans.append((body_start, body_end))
320+
return spans
321+
322+
284323
def _check_transaction_nesting(path: Path, source: str) -> Iterable[Finding]:
285-
"""Flag postgresql_concurrently=True used outside an autocommit_block."""
286-
if "postgresql_concurrently=True" not in source:
287-
return
288-
if _AUTOCOMMIT_BLOCK.search(source) is None:
289-
first = re.search(r"postgresql_concurrently\s*=\s*True", source)
290-
line = _line_of(source, first.start()) if first else 1
291-
if not _has_noqa(source, line):
292-
yield Finding(
293-
path=path,
294-
line=line,
295-
rule="transaction-nesting",
296-
message=(
297-
"postgresql_concurrently=True must run outside the "
298-
"Alembic transaction. Wrap the call in "
299-
"`with op.get_context().autocommit_block():`."
300-
),
301-
)
324+
"""Flag each ``postgresql_concurrently=True`` call site that is not inside an
325+
``autocommit_block``.
326+
327+
Scans every concurrent-index occurrence individually rather than just
328+
asking whether ``autocommit_block`` appears anywhere in the file — a
329+
migration with two concurrent indexes where only one is wrapped would
330+
otherwise pass the linter and fail at runtime.
331+
"""
332+
spans = _autocommit_spans(source)
333+
for match in re.finditer(r"postgresql_concurrently\s*=\s*True", source):
334+
line = _line_of(source, match.start())
335+
if _has_noqa(source, line):
336+
continue
337+
if any(start <= match.start() < end for start, end in spans):
338+
continue
339+
yield Finding(
340+
path=path,
341+
line=line,
342+
rule="transaction-nesting",
343+
message=(
344+
"postgresql_concurrently=True must run inside "
345+
"`with op.get_context().autocommit_block():` — "
346+
"CREATE INDEX CONCURRENTLY cannot run inside a transaction."
347+
),
348+
)
302349

303350

304351
def _check_no_timeout_overrides(path: Path, source: str) -> Iterable[Finding]:
@@ -334,12 +381,40 @@ def _check_no_timeout_overrides(path: Path, source: str) -> Iterable[Finding]:
334381
)
335382

336383

384+
def _check_in_band_backfill(path: Path, source: str) -> Iterable[Finding]:
385+
"""Flag ``op.execute(...)`` calls whose SQL contains an UPDATE or DELETE FROM.
386+
387+
Data backfills run inside the migration transaction, hold row locks for
388+
its full duration, and prevent autovacuum from reclaiming dead tuples.
389+
They belong in an out-of-band operator runbook, not in the migration.
390+
"""
391+
for match in _OP_EXECUTE.finditer(source):
392+
line = _line_of(source, match.start())
393+
if _has_noqa(source, line):
394+
continue
395+
call = _slice_call(source, match.start())
396+
if _DATA_BACKFILL.search(call):
397+
yield Finding(
398+
path=path,
399+
line=line,
400+
rule="in-band-backfill",
401+
message=(
402+
"op.execute() containing UPDATE / DELETE FROM holds row "
403+
"locks for the entire migration transaction and prevents "
404+
"autovacuum from cleaning up. Move data backfills to an "
405+
"out-of-band operator runbook and keep the migration "
406+
"schema-only."
407+
),
408+
)
409+
410+
337411
_RULES = (
338412
_check_prefer_robust_stmts,
339413
_check_disallowed_unique_constraint,
340414
_check_adding_required_field,
341415
_check_transaction_nesting,
342416
_check_no_timeout_overrides,
417+
_check_in_band_backfill,
343418
)
344419

345420

agentex/tests/unit/scripts/test_migration_lint.py

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,80 @@ def upgrade():
211211
assert any(f.rule == "transaction-nesting" for f in findings)
212212

213213

214+
def test_concurrently_mixed_one_outside_one_inside_flags_only_outside(
215+
tmp_path: Path,
216+
) -> None:
217+
path = _write(
218+
tmp_path,
219+
"""
220+
from alembic import op
221+
def upgrade():
222+
op.create_index(
223+
"ix_foo_a", "foo", ["a"], postgresql_concurrently=True
224+
)
225+
with op.get_context().autocommit_block():
226+
op.create_index(
227+
"ix_foo_b", "foo", ["b"], postgresql_concurrently=True
228+
)
229+
""",
230+
)
231+
findings = [
232+
f for f in migration_lint.lint_file(path) if f.rule == "transaction-nesting"
233+
]
234+
assert len(findings) == 1, findings
235+
# The flagged occurrence is the one outside the autocommit_block — i.e.
236+
# the kwarg site of the first op.create_index, which is the lower-numbered
237+
# of the two postgresql_concurrently=True positions in the source.
238+
source_lines = path.read_text().splitlines()
239+
flagged_line = source_lines[findings[0].line - 1]
240+
assert "postgresql_concurrently=True" in flagged_line
241+
inside_lines = [
242+
i
243+
for i, line in enumerate(source_lines, start=1)
244+
if "postgresql_concurrently=True" in line
245+
]
246+
assert findings[0].line == min(inside_lines)
247+
248+
249+
def test_in_band_update_backfill_flagged(tmp_path: Path) -> None:
250+
path = _write(
251+
tmp_path,
252+
"""
253+
from alembic import op
254+
def upgrade():
255+
op.execute("UPDATE foo SET x = 1 WHERE y IS NULL")
256+
""",
257+
)
258+
findings = migration_lint.lint_file(path)
259+
assert any(f.rule == "in-band-backfill" for f in findings)
260+
261+
262+
def test_in_band_delete_backfill_flagged(tmp_path: Path) -> None:
263+
path = _write(
264+
tmp_path,
265+
"""
266+
from alembic import op
267+
def upgrade():
268+
op.execute("DELETE FROM foo WHERE created_at < now()")
269+
""",
270+
)
271+
findings = migration_lint.lint_file(path)
272+
assert any(f.rule == "in-band-backfill" for f in findings)
273+
274+
275+
def test_in_band_backfill_select_not_flagged(tmp_path: Path) -> None:
276+
path = _write(
277+
tmp_path,
278+
"""
279+
from alembic import op
280+
def upgrade():
281+
op.execute("SELECT * FROM foo")
282+
""",
283+
)
284+
findings = migration_lint.lint_file(path)
285+
assert all(f.rule != "in-band-backfill" for f in findings)
286+
287+
214288
def test_set_lock_timeout_flagged(tmp_path: Path) -> None:
215289
path = _write(
216290
tmp_path,

0 commit comments

Comments
 (0)