Skip to content

Commit 02fb253

Browse files
declan-scaleclaude
andcommitted
fix(migrations): address greptile P1/P2 review on linter
- prefer-robust-stmts now also inspects raw SQL inside op.execute(...): flags op.execute("CREATE INDEX ...") without CONCURRENTLY and op.execute("ALTER TABLE ... ADD CONSTRAINT ... FOREIGN KEY ...") without NOT VALID. Closes the documented escape hatch where copying SQL from a DBA runbook into op.execute bypassed every helper-level check (P1). - in-band-backfill no longer false-positives on INSERT ... ON CONFLICT DO UPDATE SET upserts (legitimate schema-init shape). Tightened _DATA_BACKFILL with a (?!SET\b) negative lookahead (P2). - no-timeout-overrides skips matches that fall after a # on the same line so a Python comment quoting an old SET command is no longer flagged (P2). - Module docstring's Rules section now lists in-band-backfill alongside the other five (developer searching the docstring for a CI failure rule will find it). Adds five unit tests: raw CREATE INDEX in op.execute, raw CREATE INDEX CONCURRENTLY passes, raw ADD CONSTRAINT FK in op.execute, raw FK with NOT VALID passes, upsert clause not flagged, and SET in Python comment not flagged. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent c1f2f2e commit 02fb253

2 files changed

Lines changed: 165 additions & 2 deletions

File tree

agentex/scripts/ci_tools/migration_lint.py

Lines changed: 70 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,10 @@
2222
- ``prefer-robust-stmts``: ``op.create_index`` without
2323
``postgresql_concurrently=True`` and ``op.create_foreign_key`` without
2424
``postgresql_not_valid=True`` on populated tables block writers for the
25-
duration of the operation.
25+
duration of the operation. Also catches the raw-SQL escape hatches
26+
``op.execute("CREATE INDEX ...")`` (without ``CONCURRENTLY``) and
27+
``op.execute("ALTER TABLE ... ADD CONSTRAINT ... FOREIGN KEY ...")``
28+
(without ``NOT VALID``).
2629
- ``disallowed-unique-constraint``: ``op.create_unique_constraint`` builds the
2730
supporting index while blocking writes; create the index concurrently first
2831
and attach the constraint with ``USING INDEX``.
@@ -37,6 +40,10 @@
3740
``SET statement_timeout``, ``SET idle_in_transaction_session_timeout``, and
3841
``RESET`` of those, so a migration cannot quietly disable the runtime
3942
guardrails.
43+
- ``in-band-backfill``: ``op.execute("UPDATE ...")`` or ``DELETE FROM`` inside
44+
a migration holds row locks for the entire transaction and prevents
45+
autovacuum from cleaning up. Move data backfills to an out-of-band
46+
operator runbook and keep migrations schema-only.
4047
4148
Escape hatch
4249
------------
@@ -127,9 +134,20 @@ def format(self) -> str:
127134
re.IGNORECASE,
128135
)
129136
_DATA_BACKFILL = re.compile(
130-
r"\b(?:UPDATE\s+\w|DELETE\s+FROM\b)",
137+
# ``UPDATE\s+(?!SET\b)\w`` skips the ``UPDATE SET`` clause that appears
138+
# inside ``INSERT ... ON CONFLICT DO UPDATE SET`` upserts (a legitimate
139+
# schema-init pattern that should not flag in-band-backfill).
140+
r"\b(?:UPDATE\s+(?!SET\b)\w|DELETE\s+FROM\b)",
131141
re.IGNORECASE,
132142
)
143+
_RAW_BLOCKING_INDEX = re.compile(
144+
r"\bCREATE\s+(?:UNIQUE\s+)?INDEX\b(?!\s+CONCURRENTLY)",
145+
re.IGNORECASE,
146+
)
147+
_RAW_VALIDATING_FK = re.compile(
148+
r"\bADD\s+CONSTRAINT\b(?:[^;]*?\bFOREIGN\s+KEY\b)(?![^;]*?\bNOT\s+VALID\b)",
149+
re.IGNORECASE | re.DOTALL,
150+
)
133151
_AUTOCOMMIT_OPENER = re.compile(r"\bwith\b[^#\n]*\bautocommit_block\s*\(")
134152

135153

@@ -164,6 +182,19 @@ def _has_noqa(source: str, line: int) -> bool:
164182
return False
165183

166184

185+
def _is_in_python_comment(source: str, pos: int) -> bool:
186+
"""Return True if ``pos`` falls after a ``#`` on the same source line.
187+
188+
Naive — does not account for ``#`` appearing inside a string literal — but
189+
Alembic migrations rarely embed ``#`` in SQL, and any false negative here
190+
just leaves the previous behavior unchanged. Used to keep regex-level
191+
rules from flagging text inside Python comments (e.g. a comment referencing
192+
a forbidden ``SET lock_timeout`` for documentation purposes).
193+
"""
194+
line_start = source.rfind("\n", 0, pos) + 1
195+
return "#" in source[line_start:pos]
196+
197+
167198
def _tables_created(source: str) -> set[str]:
168199
"""Return the set of table names created via ``op.create_table`` in this
169200
migration.
@@ -242,6 +273,39 @@ def _check_prefer_robust_stmts(path: Path, source: str) -> Iterable[Finding]:
242273
),
243274
)
244275

276+
# Catch the raw-SQL escape hatches: developers copying ``CREATE INDEX`` /
277+
# ``ADD CONSTRAINT FOREIGN KEY`` from a DBA runbook into ``op.execute(...)``
278+
# would otherwise bypass every helper-level check above.
279+
for match in _OP_EXECUTE.finditer(source):
280+
line = _line_of(source, match.start())
281+
if _has_noqa(source, line):
282+
continue
283+
call = _slice_call(source, match.start())
284+
if _RAW_BLOCKING_INDEX.search(call):
285+
yield Finding(
286+
path=path,
287+
line=line,
288+
rule="prefer-robust-stmts",
289+
message=(
290+
'op.execute("CREATE INDEX ...") without CONCURRENTLY '
291+
"takes ShareLock for the whole build and blocks writes. "
292+
"Use CREATE INDEX CONCURRENTLY inside an "
293+
"op.get_context().autocommit_block()."
294+
),
295+
)
296+
if _RAW_VALIDATING_FK.search(call):
297+
yield Finding(
298+
path=path,
299+
line=line,
300+
rule="prefer-robust-stmts",
301+
message=(
302+
'op.execute("ALTER TABLE ... ADD CONSTRAINT ... FOREIGN KEY ...") '
303+
"without NOT VALID takes ShareRowExclusiveLock and "
304+
"full-scans the child table to validate. Add NOT VALID, "
305+
"then VALIDATE CONSTRAINT in a follow-up migration."
306+
),
307+
)
308+
245309

246310
def _check_disallowed_unique_constraint(path: Path, source: str) -> Iterable[Finding]:
247311
fresh_tables = _tables_created(source)
@@ -353,6 +417,8 @@ def _check_no_timeout_overrides(path: Path, source: str) -> Iterable[Finding]:
353417
line = _line_of(source, match.start())
354418
if _has_noqa(source, line):
355419
continue
420+
if _is_in_python_comment(source, match.start()):
421+
continue
356422
yield Finding(
357423
path=path,
358424
line=line,
@@ -369,6 +435,8 @@ def _check_no_timeout_overrides(path: Path, source: str) -> Iterable[Finding]:
369435
line = _line_of(source, match.start())
370436
if _has_noqa(source, line):
371437
continue
438+
if _is_in_python_comment(source, match.start()):
439+
continue
372440
yield Finding(
373441
path=path,
374442
line=line,

agentex/tests/unit/scripts/test_migration_lint.py

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -272,6 +272,101 @@ def upgrade():
272272
assert any(f.rule == "in-band-backfill" for f in findings)
273273

274274

275+
def test_in_band_backfill_upsert_not_flagged(tmp_path: Path) -> None:
276+
"""ON CONFLICT DO UPDATE SET upserts are legitimate schema-init shape."""
277+
path = _write(
278+
tmp_path,
279+
"""
280+
from alembic import op
281+
def upgrade():
282+
op.execute(
283+
"INSERT INTO foo (id, name) VALUES (1, 'x') "
284+
"ON CONFLICT (id) DO UPDATE SET name = EXCLUDED.name"
285+
)
286+
""",
287+
)
288+
findings = migration_lint.lint_file(path)
289+
assert all(f.rule != "in-band-backfill" for f in findings)
290+
291+
292+
def test_raw_sql_create_index_in_op_execute_flagged(tmp_path: Path) -> None:
293+
path = _write(
294+
tmp_path,
295+
"""
296+
from alembic import op
297+
def upgrade():
298+
op.execute("CREATE INDEX idx_foo_bar ON foo (bar)")
299+
""",
300+
)
301+
findings = migration_lint.lint_file(path)
302+
assert any(f.rule == "prefer-robust-stmts" for f in findings)
303+
304+
305+
def test_raw_sql_create_index_concurrently_in_op_execute_passes(tmp_path: Path) -> None:
306+
path = _write(
307+
tmp_path,
308+
"""
309+
from alembic import op
310+
def upgrade():
311+
with op.get_context().autocommit_block():
312+
op.execute("CREATE INDEX CONCURRENTLY idx_foo_bar ON foo (bar)")
313+
""",
314+
)
315+
findings = [
316+
f for f in migration_lint.lint_file(path) if f.rule == "prefer-robust-stmts"
317+
]
318+
assert findings == []
319+
320+
321+
def test_raw_sql_add_fk_without_not_valid_flagged(tmp_path: Path) -> None:
322+
path = _write(
323+
tmp_path,
324+
"""
325+
from alembic import op
326+
def upgrade():
327+
op.execute(
328+
"ALTER TABLE foo ADD CONSTRAINT fk_foo_bar "
329+
"FOREIGN KEY (bar_id) REFERENCES bar (id)"
330+
)
331+
""",
332+
)
333+
findings = migration_lint.lint_file(path)
334+
assert any(f.rule == "prefer-robust-stmts" for f in findings)
335+
336+
337+
def test_raw_sql_add_fk_with_not_valid_passes(tmp_path: Path) -> None:
338+
path = _write(
339+
tmp_path,
340+
"""
341+
from alembic import op
342+
def upgrade():
343+
op.execute(
344+
"ALTER TABLE foo ADD CONSTRAINT fk_foo_bar "
345+
"FOREIGN KEY (bar_id) REFERENCES bar (id) NOT VALID"
346+
)
347+
""",
348+
)
349+
findings = [
350+
f for f in migration_lint.lint_file(path) if f.rule == "prefer-robust-stmts"
351+
]
352+
assert findings == []
353+
354+
355+
def test_set_timeout_in_python_comment_not_flagged(tmp_path: Path) -> None:
356+
"""A Python comment mentioning a forbidden SET shouldn't flag."""
357+
path = _write(
358+
tmp_path,
359+
"""
360+
from alembic import op
361+
def upgrade():
362+
# Previously we used: SET lock_timeout = '5s'
363+
op.execute("SELECT 1")
364+
""",
365+
)
366+
findings = migration_lint.lint_file(path)
367+
assert all(f.rule != "no-timeout-overrides" for f in findings)
368+
369+
275370
def test_in_band_backfill_select_not_flagged(tmp_path: Path) -> None:
276371
path = _write(
277372
tmp_path,

0 commit comments

Comments
 (0)