Skip to content

Commit 3308632

Browse files
declan-scaleclaude
andcommitted
fix(migrations): converge linter with sgp variant
Address two P1 Greptile findings on PR #225 and bring the agentex linter into parity with the egp-api-backend version (scaleapi#142283): - Issue 1: `op.execute("CREATE INDEX CONCURRENTLY ...")` outside an autocommit_block was silently accepted (the raw-SQL prefer-robust-stmts check excludes CONCURRENTLY because it's the safe form). Add a second pass in `_check_transaction_nesting` that catches this shape so it fails lint instead of failing at runtime with "CREATE INDEX CONCURRENTLY cannot run inside a transaction block". - Issue 2: indentation-based `_autocommit_spans` truncated the block on any non-blank line at column 0, so a triple-quoted SQL string starting at the margin would push trailing `postgresql_concurrently=True` out of the span and produce a false `transaction-nesting` finding on correctly-written migrations. Switch to AST-based span detection, which also handles the PEP 617 parenthesized `with` form for free. Parity with sgp linter: - `SET statement_timeout` is now exempted inside autocommit_block (the documented escape valve for long CIC builds), with `RESET` still flagged because RESET reverts to the role/server default rather than restoring the runner's 30s ceiling. - `_changed_migrations` resolves paths via `git rev-parse --show-toplevel` instead of `REPO_ROOT.parent` so it keeps working if the package is ever moved within the repo. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 02fb253 commit 3308632

2 files changed

Lines changed: 285 additions & 27 deletions

File tree

agentex/scripts/ci_tools/migration_lint.py

Lines changed: 119 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,16 @@
3939
- ``no-timeout-overrides`` (custom): forbids ``SET lock_timeout``,
4040
``SET statement_timeout``, ``SET idle_in_transaction_session_timeout``, and
4141
``RESET`` of those, so a migration cannot quietly disable the runtime
42-
guardrails.
42+
guardrails. Narrow exception: ``SET statement_timeout`` is permitted
43+
*inside* an ``autocommit_block`` span — that is the supported escape valve
44+
for long ``CREATE INDEX CONCURRENTLY`` builds, since the default 30s
45+
session-level ``statement_timeout`` would otherwise abort the build and
46+
leave an INVALID index behind. ``RESET statement_timeout`` is **not**
47+
exempted: ``RESET`` falls back to the database / role / server default
48+
(typically ``0``, i.e. no timeout), which strips the runner's 30s ceiling
49+
for every later migration in the batch instead of restoring it. Authors
50+
must restore the ceiling with an explicit ``SET statement_timeout = '30s'``
51+
at the end of the block.
4352
- ``in-band-backfill``: ``op.execute("UPDATE ...")`` or ``DELETE FROM`` inside
4453
a migration holds row locks for the entire transaction and prevents
4554
autovacuum from cleaning up. Move data backfills to an out-of-band
@@ -77,6 +86,7 @@
7786
from __future__ import annotations
7887

7988
import argparse
89+
import ast
8090
import os
8191
import re
8292
import subprocess
@@ -148,7 +158,10 @@ def format(self) -> str:
148158
r"\bADD\s+CONSTRAINT\b(?:[^;]*?\bFOREIGN\s+KEY\b)(?![^;]*?\bNOT\s+VALID\b)",
149159
re.IGNORECASE | re.DOTALL,
150160
)
151-
_AUTOCOMMIT_OPENER = re.compile(r"\bwith\b[^#\n]*\bautocommit_block\s*\(")
161+
_RAW_CONCURRENT_INDEX = re.compile(
162+
r"\bCREATE\s+(?:UNIQUE\s+)?INDEX\s+CONCURRENTLY\b",
163+
re.IGNORECASE,
164+
)
152165

153166

154167
def _slice_call(source: str, start: int) -> str:
@@ -349,38 +362,63 @@ def _check_adding_required_field(path: Path, source: str) -> Iterable[Finding]:
349362
)
350363

351364

365+
def _is_autocommit_call(node: ast.expr) -> bool:
366+
"""Return True if ``node`` is a call to ``autocommit_block``.
367+
368+
Matches both attribute access (``op.get_context().autocommit_block()``)
369+
and bare name access (``autocommit_block()``) — the call site is the
370+
relevant signal, not the receiver chain.
371+
"""
372+
if not isinstance(node, ast.Call):
373+
return False
374+
func = node.func
375+
if isinstance(func, ast.Attribute) and func.attr == "autocommit_block":
376+
return True
377+
if isinstance(func, ast.Name) and func.id == "autocommit_block":
378+
return True
379+
return False
380+
381+
352382
def _autocommit_spans(source: str) -> list[tuple[int, int]]:
353383
"""Return (start, end) byte-offset ranges enclosed by ``with ... autocommit_block():`` blocks.
354384
355-
Computed from indentation: a ``with`` line introducing ``autocommit_block()``
356-
opens a block; the block extends as long as subsequent non-blank lines are
357-
indented strictly more than the opener.
385+
Uses the AST so the body boundary is the parser's view of the block —
386+
not an indentation heuristic that breaks on triple-quoted strings whose
387+
content starts at column 0 (e.g. ``op.execute(\"\"\"\\nCREATE TABLE...\"\"\")``)
388+
and would otherwise terminate the span early. Also handles the PEP 617
389+
parenthesized ``with`` form correctly.
358390
"""
359-
lines = source.splitlines(keepends=True)
391+
try:
392+
tree = ast.parse(source)
393+
except SyntaxError:
394+
return []
395+
360396
line_starts: list[int] = []
361397
pos = 0
362-
for ln in lines:
398+
for ln in source.splitlines(keepends=True):
363399
line_starts.append(pos)
364400
pos += len(ln)
365-
end_of_file = pos
401+
line_starts.append(pos) # sentinel: end-of-file offset
366402

367403
spans: list[tuple[int, int]] = []
368-
for i, line in enumerate(lines):
369-
if not _AUTOCOMMIT_OPENER.search(line):
404+
for node in ast.walk(tree):
405+
if not isinstance(node, ast.With | ast.AsyncWith):
370406
continue
371-
opener_indent = len(line) - len(line.lstrip(" \t"))
372-
body_start = line_starts[i + 1] if i + 1 < len(lines) else end_of_file
373-
body_end = body_start
374-
for j in range(i + 1, len(lines)):
375-
li = lines[j]
376-
if not li.strip():
377-
body_end = line_starts[j + 1] if j + 1 < len(lines) else end_of_file
378-
continue
379-
indent = len(li) - len(li.lstrip(" \t"))
380-
if indent <= opener_indent:
381-
break
382-
body_end = line_starts[j + 1] if j + 1 < len(lines) else end_of_file
383-
spans.append((body_start, body_end))
407+
if not any(_is_autocommit_call(item.context_expr) for item in node.items):
408+
continue
409+
if not node.body:
410+
continue
411+
first = node.body[0]
412+
last = node.body[-1]
413+
start_lineno = first.lineno # 1-based
414+
end_lineno = getattr(last, "end_lineno", last.lineno) or last.lineno
415+
start_offset = (
416+
line_starts[start_lineno - 1]
417+
if start_lineno - 1 < len(line_starts)
418+
else pos
419+
)
420+
end_offset = line_starts[end_lineno] if end_lineno < len(line_starts) else pos
421+
spans.append((start_offset, end_offset))
384422
return spans
385423

386424

@@ -411,14 +449,57 @@ def _check_transaction_nesting(path: Path, source: str) -> Iterable[Finding]:
411449
),
412450
)
413451

452+
# Raw-SQL escape hatch: ``op.execute("CREATE INDEX CONCURRENTLY ...")``
453+
# outside an autocommit_block also fails at runtime. The
454+
# ``prefer-robust-stmts`` raw-SQL check correctly excludes CONCURRENTLY
455+
# (since CONCURRENTLY is the *safe* form), so we'd otherwise miss it
456+
# entirely.
457+
for match in _OP_EXECUTE.finditer(source):
458+
line = _line_of(source, match.start())
459+
if _has_noqa(source, line):
460+
continue
461+
call = _slice_call(source, match.start())
462+
if not _RAW_CONCURRENT_INDEX.search(call):
463+
continue
464+
if any(start <= match.start() < end for start, end in spans):
465+
continue
466+
yield Finding(
467+
path=path,
468+
line=line,
469+
rule="transaction-nesting",
470+
message=(
471+
'op.execute("CREATE INDEX CONCURRENTLY ...") must run inside '
472+
"`with op.get_context().autocommit_block():` — "
473+
"CREATE INDEX CONCURRENTLY cannot run inside a transaction."
474+
),
475+
)
476+
414477

415478
def _check_no_timeout_overrides(path: Path, source: str) -> Iterable[Finding]:
479+
spans = _autocommit_spans(source)
480+
481+
def _inside_autocommit_block(offset: int) -> bool:
482+
return any(start <= offset < end for start, end in spans)
483+
416484
for match in _SET_TIMEOUT.finditer(source):
417485
line = _line_of(source, match.start())
418486
if _has_noqa(source, line):
419487
continue
420488
if _is_in_python_comment(source, match.start()):
421489
continue
490+
# `statement_timeout` may be overridden inside an autocommit_block —
491+
# that is the supported escape valve for long CREATE INDEX
492+
# CONCURRENTLY builds (the default 30s session ceiling applies inside
493+
# autocommit blocks too, since `autocommit_block` only changes the
494+
# transaction isolation, not session-level GUCs). Authors must
495+
# restore the ceiling with an explicit `SET statement_timeout = '30s'`
496+
# at the end of the block — RESET is intentionally not exempted
497+
# because it falls back to the database / role default (typically 0).
498+
timeout_name = (match.group(2) or "").lower()
499+
if timeout_name == "statement_timeout" and _inside_autocommit_block(
500+
match.start()
501+
):
502+
continue
422503
yield Finding(
423504
path=path,
424505
line=line,
@@ -427,7 +508,9 @@ def _check_no_timeout_overrides(path: Path, source: str) -> Iterable[Finding]:
427508
"Migrations must not SET lock_timeout / statement_timeout / "
428509
"idle_in_transaction_session_timeout — those are configured "
429510
"by the migration runner. Apply the migration-unsafe-ack PR "
430-
"label if a maintenance window genuinely requires it."
511+
"label if a maintenance window genuinely requires it. "
512+
"(Exception: SET statement_timeout is permitted inside an "
513+
"autocommit_block for long CREATE INDEX CONCURRENTLY builds.)"
431514
),
432515
)
433516

@@ -444,7 +527,11 @@ def _check_no_timeout_overrides(path: Path, source: str) -> Iterable[Finding]:
444527
message=(
445528
"Migrations must not RESET lock_timeout / statement_timeout / "
446529
"idle_in_transaction_session_timeout — runtime guardrails "
447-
"must remain in effect for the whole migration."
530+
"must remain in effect for the whole migration. RESET falls "
531+
"back to the database / role / server default (typically 0, "
532+
"i.e. no timeout), so it does not restore the runner's 30s "
533+
"ceiling. Inside an autocommit_block, restore the ceiling "
534+
"with an explicit `SET statement_timeout = '30s'` instead."
448535
),
449536
)
450537

@@ -517,12 +604,17 @@ def _changed_migrations(base: str) -> list[Path]:
517604
# Fall back to two-dot diff if the merge base cannot be computed
518605
# (e.g. shallow clone in CI without that history).
519606
out = _git("diff", "--name-only", "--diff-filter=AM", base)
607+
# `git diff --name-only` returns paths relative to the actual git
608+
# top-level, not REPO_ROOT (which is the package root). Resolve the
609+
# toplevel explicitly so this keeps working if the package is moved
610+
# within the repo (instead of silently producing nonexistent
611+
# candidates and linting nothing).
612+
git_toplevel = Path(_git("rev-parse", "--show-toplevel").strip())
520613
paths: list[Path] = []
521-
repo_root_parent = REPO_ROOT.parent
522614
for line in out.splitlines():
523615
if not line.strip():
524616
continue
525-
candidate = (repo_root_parent / line).resolve()
617+
candidate = (git_toplevel / line).resolve()
526618
if not candidate.exists():
527619
continue
528620
try:

0 commit comments

Comments
 (0)