|
80 | 80 |
|
81 | 81 | REPO_ROOT = Path(__file__).resolve().parents[2] |
82 | 82 | MIGRATIONS_DIR = REPO_ROOT / "database" / "migrations" / "alembic" / "versions" |
83 | | -ENV_PY = REPO_ROOT / "database" / "migrations" / "alembic" / "env.py" |
84 | 83 | ESCAPE_HATCH_ENV_VAR = "MIGRATION_UNSAFE_ACK" |
85 | 84 |
|
86 | 85 |
|
@@ -127,6 +126,11 @@ def format(self) -> str: |
127 | 126 | r"(lock_timeout|statement_timeout|idle_in_transaction_session_timeout)\b", |
128 | 127 | re.IGNORECASE, |
129 | 128 | ) |
| 129 | +_DATA_BACKFILL = re.compile( |
| 130 | + r"\b(?:UPDATE\s+\w|DELETE\s+FROM\b)", |
| 131 | + re.IGNORECASE, |
| 132 | +) |
| 133 | +_AUTOCOMMIT_OPENER = re.compile(r"\bwith\b[^#\n]*\bautocommit_block\s*\(") |
130 | 134 |
|
131 | 135 |
|
132 | 136 | def _slice_call(source: str, start: int) -> str: |
@@ -281,24 +285,67 @@ def _check_adding_required_field(path: Path, source: str) -> Iterable[Finding]: |
281 | 285 | ) |
282 | 286 |
|
283 | 287 |
|
| 288 | +def _autocommit_spans(source: str) -> list[tuple[int, int]]: |
| 289 | + """Return (start, end) byte-offset ranges enclosed by ``with ... autocommit_block():`` blocks. |
| 290 | +
|
| 291 | + Computed from indentation: a ``with`` line introducing ``autocommit_block()`` |
| 292 | + opens a block; the block extends as long as subsequent non-blank lines are |
| 293 | + indented strictly more than the opener. |
| 294 | + """ |
| 295 | + lines = source.splitlines(keepends=True) |
| 296 | + line_starts: list[int] = [] |
| 297 | + pos = 0 |
| 298 | + for ln in lines: |
| 299 | + line_starts.append(pos) |
| 300 | + pos += len(ln) |
| 301 | + end_of_file = pos |
| 302 | + |
| 303 | + spans: list[tuple[int, int]] = [] |
| 304 | + for i, line in enumerate(lines): |
| 305 | + if not _AUTOCOMMIT_OPENER.search(line): |
| 306 | + continue |
| 307 | + opener_indent = len(line) - len(line.lstrip(" \t")) |
| 308 | + body_start = line_starts[i + 1] if i + 1 < len(lines) else end_of_file |
| 309 | + body_end = body_start |
| 310 | + for j in range(i + 1, len(lines)): |
| 311 | + li = lines[j] |
| 312 | + if not li.strip(): |
| 313 | + body_end = line_starts[j + 1] if j + 1 < len(lines) else end_of_file |
| 314 | + continue |
| 315 | + indent = len(li) - len(li.lstrip(" \t")) |
| 316 | + if indent <= opener_indent: |
| 317 | + break |
| 318 | + body_end = line_starts[j + 1] if j + 1 < len(lines) else end_of_file |
| 319 | + spans.append((body_start, body_end)) |
| 320 | + return spans |
| 321 | + |
| 322 | + |
284 | 323 | def _check_transaction_nesting(path: Path, source: str) -> Iterable[Finding]: |
285 | | - """Flag postgresql_concurrently=True used outside an autocommit_block.""" |
286 | | - if "postgresql_concurrently=True" not in source: |
287 | | - return |
288 | | - if _AUTOCOMMIT_BLOCK.search(source) is None: |
289 | | - first = re.search(r"postgresql_concurrently\s*=\s*True", source) |
290 | | - line = _line_of(source, first.start()) if first else 1 |
291 | | - if not _has_noqa(source, line): |
292 | | - yield Finding( |
293 | | - path=path, |
294 | | - line=line, |
295 | | - rule="transaction-nesting", |
296 | | - message=( |
297 | | - "postgresql_concurrently=True must run outside the " |
298 | | - "Alembic transaction. Wrap the call in " |
299 | | - "`with op.get_context().autocommit_block():`." |
300 | | - ), |
301 | | - ) |
| 324 | + """Flag each ``postgresql_concurrently=True`` call site that is not inside an |
| 325 | + ``autocommit_block``. |
| 326 | +
|
| 327 | + Scans every concurrent-index occurrence individually rather than just |
| 328 | + asking whether ``autocommit_block`` appears anywhere in the file — a |
| 329 | + migration with two concurrent indexes where only one is wrapped would |
| 330 | + otherwise pass the linter and fail at runtime. |
| 331 | + """ |
| 332 | + spans = _autocommit_spans(source) |
| 333 | + for match in re.finditer(r"postgresql_concurrently\s*=\s*True", source): |
| 334 | + line = _line_of(source, match.start()) |
| 335 | + if _has_noqa(source, line): |
| 336 | + continue |
| 337 | + if any(start <= match.start() < end for start, end in spans): |
| 338 | + continue |
| 339 | + yield Finding( |
| 340 | + path=path, |
| 341 | + line=line, |
| 342 | + rule="transaction-nesting", |
| 343 | + message=( |
| 344 | + "postgresql_concurrently=True must run inside " |
| 345 | + "`with op.get_context().autocommit_block():` — " |
| 346 | + "CREATE INDEX CONCURRENTLY cannot run inside a transaction." |
| 347 | + ), |
| 348 | + ) |
302 | 349 |
|
303 | 350 |
|
304 | 351 | def _check_no_timeout_overrides(path: Path, source: str) -> Iterable[Finding]: |
@@ -334,12 +381,40 @@ def _check_no_timeout_overrides(path: Path, source: str) -> Iterable[Finding]: |
334 | 381 | ) |
335 | 382 |
|
336 | 383 |
|
| 384 | +def _check_in_band_backfill(path: Path, source: str) -> Iterable[Finding]: |
| 385 | + """Flag ``op.execute(...)`` calls whose SQL contains an UPDATE or DELETE FROM. |
| 386 | +
|
| 387 | + Data backfills run inside the migration transaction, hold row locks for |
| 388 | + its full duration, and prevent autovacuum from reclaiming dead tuples. |
| 389 | + They belong in an out-of-band operator runbook, not in the migration. |
| 390 | + """ |
| 391 | + for match in _OP_EXECUTE.finditer(source): |
| 392 | + line = _line_of(source, match.start()) |
| 393 | + if _has_noqa(source, line): |
| 394 | + continue |
| 395 | + call = _slice_call(source, match.start()) |
| 396 | + if _DATA_BACKFILL.search(call): |
| 397 | + yield Finding( |
| 398 | + path=path, |
| 399 | + line=line, |
| 400 | + rule="in-band-backfill", |
| 401 | + message=( |
| 402 | + "op.execute() containing UPDATE / DELETE FROM holds row " |
| 403 | + "locks for the entire migration transaction and prevents " |
| 404 | + "autovacuum from cleaning up. Move data backfills to an " |
| 405 | + "out-of-band operator runbook and keep the migration " |
| 406 | + "schema-only." |
| 407 | + ), |
| 408 | + ) |
| 409 | + |
| 410 | + |
337 | 411 | _RULES = ( |
338 | 412 | _check_prefer_robust_stmts, |
339 | 413 | _check_disallowed_unique_constraint, |
340 | 414 | _check_adding_required_field, |
341 | 415 | _check_transaction_nesting, |
342 | 416 | _check_no_timeout_overrides, |
| 417 | + _check_in_band_backfill, |
343 | 418 | ) |
344 | 419 |
|
345 | 420 |
|
|
0 commit comments