diff --git a/.github/workflows/auto-tag.yml b/.github/workflows/auto-tag.yml deleted file mode 100644 index 1357274..0000000 --- a/.github/workflows/auto-tag.yml +++ /dev/null @@ -1,60 +0,0 @@ -name: Auto Tag and Release - -on: - push: - branches: [main] - -jobs: - auto-tag: - runs-on: ubuntu-latest - permissions: - contents: write - outputs: - tag: ${{ steps.tag.outputs.tag }} - created: ${{ steps.tag.outputs.created }} - - steps: - - name: Checkout code - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: Create tag if needed - id: tag - run: | - VERSION=$(grep '^version = ' pyproject.toml | sed 's/version = "\(.*\)"/\1/') - TAG="v${VERSION}" - - echo "Detected version: $VERSION" - echo "Tag: $TAG" - echo "tag=$TAG" >> "$GITHUB_OUTPUT" - - if git rev-parse "$TAG" >/dev/null 2>&1; then - echo "Tag $TAG already exists. Nothing to do." - echo "created=false" >> "$GITHUB_OUTPUT" - exit 0 - fi - - echo "Creating and pushing tag $TAG" - git tag "$TAG" - git push origin "$TAG" - echo "created=true" >> "$GITHUB_OUTPUT" - - release: - needs: auto-tag - if: needs.auto-tag.outputs.created == 'true' - runs-on: ubuntu-latest - permissions: - contents: write - - steps: - - name: Checkout code - uses: actions/checkout@v4 - - - name: Create GitHub Release - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: | - gh release create "${{ needs.auto-tag.outputs.tag }}" \ - --title "Release ${{ needs.auto-tag.outputs.tag }}" \ - --generate-notes diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index db8ba7d..bfe31fa 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -20,9 +20,9 @@ jobs: python-version: '3.12' - name: Install Poetry - uses: snok/install-poetry@v1 + uses: snok/install-poetry@76e04a911780d5b312d89783f7b1cd627778900a # v1.4.1 with: - version: latest + version: "2.1.3" virtualenvs-create: true virtualenvs-in-project: true @@ -32,5 +32,77 @@ jobs: - name: Run pre-commit hooks run: poetry run pre-commit run --all-files + - name: Check version consistency + run: | + PYPROJECT_VERSION=$(grep '^version = ' pyproject.toml | sed 's/version = "\(.*\)"/\1/' || true) + INIT_VERSION=$(grep '^__version__ = ' squawk_alembic/__init__.py | sed 's/__version__ = "\(.*\)"/\1/' || true) + if [ -z "$PYPROJECT_VERSION" ] || [ -z "$INIT_VERSION" ]; then + echo "::error::Could not parse version from pyproject.toml or __init__.py" + exit 1 + fi + if [ "$PYPROJECT_VERSION" != "$INIT_VERSION" ]; then + echo "::error::Version mismatch: pyproject.toml ($PYPROJECT_VERSION) != __init__.py ($INIT_VERSION)" + echo "Run 'make bump VERSION=x.y.z' to update both files." + exit 1 + fi + echo "Versions match: $PYPROJECT_VERSION" + - name: Run tests run: poetry run pytest tests/ -v + + auto-tag: + needs: lint-and-test + if: github.event_name == 'push' + runs-on: ubuntu-latest + permissions: + contents: write + outputs: + tag: ${{ steps.tag.outputs.tag }} + created: ${{ steps.tag.outputs.created }} + + steps: + - name: Checkout code + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Create tag if needed + id: tag + run: | + VERSION=$(grep '^version = ' pyproject.toml | sed 's/version = "\(.*\)"/\1/') + TAG="v${VERSION}" + + echo "Detected version: $VERSION" + echo "Tag: $TAG" + echo "tag=$TAG" >> "$GITHUB_OUTPUT" + + if git rev-parse "$TAG" >/dev/null 2>&1; then + echo "Tag $TAG already exists. Nothing to do." + echo "created=false" >> "$GITHUB_OUTPUT" + exit 0 + fi + + echo "Creating and pushing tag $TAG" + git tag "$TAG" + git push origin "$TAG" + echo "created=true" >> "$GITHUB_OUTPUT" + + release: + needs: auto-tag + if: needs.auto-tag.outputs.created == 'true' + runs-on: ubuntu-latest + permissions: + contents: write + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Create GitHub Release + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + TAG: ${{ needs.auto-tag.outputs.tag }} + run: | + gh release create "$TAG" \ + --title "Release $TAG" \ + --generate-notes diff --git a/.github/workflows/version-check.yml b/.github/workflows/version-check.yml deleted file mode 100644 index a3922e6..0000000 --- a/.github/workflows/version-check.yml +++ /dev/null @@ -1,29 +0,0 @@ -name: Version Check - -on: - pull_request: - branches: [main] - -jobs: - check-version-sync: - runs-on: ubuntu-latest - - steps: - - name: Checkout code - uses: actions/checkout@v4 - - - name: Check version consistency - run: | - PYPROJECT_VERSION=$(grep '^version = ' pyproject.toml | sed 's/version = "\(.*\)"/\1/') - INIT_VERSION=$(grep '^__version__ = ' squawk_alembic/__init__.py | sed 's/__version__ = "\(.*\)"/\1/') - - echo "pyproject.toml version: $PYPROJECT_VERSION" - echo "__init__.py version: $INIT_VERSION" - - if [ "$PYPROJECT_VERSION" != "$INIT_VERSION" ]; then - echo "::error::Version mismatch: pyproject.toml ($PYPROJECT_VERSION) != __init__.py ($INIT_VERSION)" - echo "Run 'make bump VERSION=x.y.z' to update both files." - exit 1 - fi - - echo "Versions match." diff --git a/.gitignore b/.gitignore index 2d1b0c7..8064b42 100644 --- a/.gitignore +++ b/.gitignore @@ -1,179 +1,32 @@ -# Byte-compiled / optimized / DLL files +# Byte-compiled / optimized __pycache__/ *.py[cod] *$py.class -# C extensions -*.so - # Distribution / packaging -.Python build/ -develop-eggs/ dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -wheels/ -share/python-wheels/ *.egg-info/ -.installed.cfg *.egg -MANIFEST -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec +# Virtual environments +.venv/ -# Installer logs -pip-log.txt -pip-delete-this-directory.txt +# Linting +.ruff_cache/ -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -*.py,cover -.hypothesis/ +# Test / coverage .pytest_cache/ -cover/ - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 -db.sqlite3-journal - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -.pybuilder/ -target/ - -# Jupyter Notebook -.ipynb_checkpoints - -# IPython -profile_default/ -ipython_config.py - -# pyenv -# For a library or package, you might want to ignore these files since the code is -# intended to run in multiple environments; otherwise, check them in: -# .python-version - -# pipenv -# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. -# However, in case of collaboration, if having platform-specific dependencies or dependencies -# having no cross-platform support, pipenv may install dependencies that don't work, or not -# install all needed dependencies. -#Pipfile.lock - -# poetry -# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. -# This is especially recommended for binary packages to ensure reproducibility, and is more -# commonly ignored for libraries. -# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control -#poetry.lock - -# pdm -# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. -#pdm.lock -# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it -# in version control. -# https://pdm.fming.dev/#use-with-ide -.pdm.toml - -# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm -__pypackages__/ - -# Celery stuff -celerybeat-schedule -celerybeat.pid - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Pyre type checker -.pyre/ - -# pytype static type analyzer -.pytype/ - -# Cython debug symbols -cython_debug/ +.coverage +htmlcov/ -# PyCharm -# JetBrains specific template is maintained in a separate JetBrains.gitignore that can -# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore -# and can be added to the global gitignore or merged into this file. For a more nuclear -# option (not recommended) you can uncomment the following to ignore the entire idea folder. +# Editors / OS .idea/ - +.vscode/ .DS_Store -.vscode -.python-version - -compare_output/ -export/backup/ -node_modules/ +# Environment +.env -.serverless/ - -# Local files with credentials/secrets -local_files/ - -# Claude skills/agents/config -.claude/ -CLAUDE.md -.mcp.json +# Python version +.python-version diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4e11afb..7d15c11 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -30,9 +30,9 @@ repos: hooks: - id: poetry-check - repo: https://github.com/facebook/pyrefly-pre-commit - rev: 0.0.1 + rev: 0.53.0 hooks: - - id: pyrefly-typecheck-system + - id: pyrefly-check name: Pyrefly (type checking) pass_filenames: false - always_run: true + language: system diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..e9ae36e --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,40 @@ +# Contributing + +We welcome contributions to squawk-pre-commit. Here's how to get started. + +## Development Setup + +1. Clone the repo and install dependencies: + +```bash +git clone https://github.com/kintsugi-tax/squawk-pre-commit.git +cd squawk-pre-commit +poetry install +``` + +2. Install pre-commit hooks: + +```bash +pre-commit install +``` + +3. Run the test suite: + +```bash +poetry run pytest tests/ -v +``` + +## Submitting Changes + +1. Fork the repo and create a branch from `main` +2. Make your changes and add tests where appropriate +3. Ensure all tests pass and pre-commit hooks are clean +4. Open a pull request against `main` + +## Reporting Issues + +Open an issue on [GitHub](https://github.com/kintsugi-tax/squawk-pre-commit/issues) with a clear description of the problem and steps to reproduce. + +## License + +By contributing, you agree that your contributions will be licensed under the [MIT License](LICENSE). diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..fc74eea --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2025 Kintsugi + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md index 3939d71..c3b0ef0 100644 --- a/README.md +++ b/README.md @@ -1,37 +1,76 @@ -# Kintsugi Squawk +# squawk-pre-commit A [pre-commit](https://pre-commit.com/) hook that lints SQL in [Alembic](https://alembic.sqlalchemy.org/) migrations using [squawk](https://squawkhq.com/), a PostgreSQL migration linter. Squawk operates on raw SQL files, but Alembic migrations are Python. This hook bridges the gap by generating DDL via `alembic upgrade --sql` (offline mode) and passing the complete SQL output to squawk for analysis. This captures all SQL statements a migration produces, including ORM operations like `op.create_index()`, `op.create_table()`, and `op.alter_column()`. -The hook also checks that concurrent index operations (`CONCURRENTLY` in `op.execute()` or `postgresql_concurrently=True` in `op.create_index()` / `op.drop_index()`) are wrapped in `autocommit_block()`. - ## Usage Add the following to your `.pre-commit-config.yaml`: ```yaml repos: - - repo: https://github.com/kintsugi-tax/kintsugi-squawk - rev: v0.2.0 + - repo: https://github.com/kintsugi-tax/squawk-pre-commit + rev: v0.3.0 hooks: - id: squawk-alembic ``` No additional configuration is required. The hook auto-detects your migrations directory by reading `script_location` from `alembic.ini`. The consumer's `alembic` must be available on PATH (the hook calls it via subprocess). +### Pinning the squawk version + +The hook depends on `squawk-cli >= 2.0`. To pin a specific squawk version (matching your local dev dependency, for example), use `additional_dependencies`: + +```yaml +repos: + - repo: https://github.com/kintsugi-tax/squawk-pre-commit + rev: v0.3.0 + hooks: + - id: squawk-alembic + additional_dependencies: ["squawk-cli==2.41.0"] +``` + +This overrides the default version and ensures pre-commit uses the exact squawk release you specify. + +### Only lint new migrations + +To skip migrations that already exist on a branch (useful for repos with existing violations you can't fix immediately), pass `--diff-branch`: + +```yaml +repos: + - repo: https://github.com/kintsugi-tax/squawk-pre-commit + rev: v0.3.0 + hooks: + - id: squawk-alembic + args: [--diff-branch, main] +``` + +With this flag, the hook checks whether each migration file exists on the specified branch. Files that already exist are skipped. New files (not yet on the branch) are linted. This makes `pre-commit run --all-files` safe to run in repos where older migrations would fail linting. + +The value must be a ref that resolves locally via `git rev-parse --verify`. In CI environments with shallow checkouts or no local tracking branch, use the remote form: + +```yaml + args: [--diff-branch, origin/main] +``` + ## How It Works When pre-commit runs, the hook: 1. Parses `alembic.ini` to find the migrations `versions/` directory 2. Filters staged files to only those under that directory -3. Checks for concurrent operations outside `autocommit_block()` -4. Runs `alembic upgrade --sql` to generate the complete DDL for each migration -5. Pipes the generated SQL to squawk for linting +3. Runs `alembic upgrade --sql` to generate the complete DDL for each migration +4. Pipes the generated SQL to squawk for linting Merge migrations (where `down_revision` is a tuple) are skipped since they produce no DDL. +## Known Limitations + +* The hook runs `alembic upgrade --sql`, which executes your project's `env.py` in offline mode. No database connection is made, but the Python code in `env.py` does run. +* If `DATABASE_URL` is not set, the hook provides a dummy fallback (`postgresql://localhost/lint`) so alembic's offline mode can generate SQL without a real connection string. +* If `alembic upgrade --sql` fails for a migration (e.g. due to missing dependencies or env configuration), the hook prints the error to stderr and fails the run. + ## Squawk Configuration Squawk reads its configuration from `.squawk.toml` in the consumer repo root. See the [squawk docs](https://squawkhq.com/docs/configuration/) for available options. @@ -40,7 +79,7 @@ Squawk reads its configuration from `.squawk.toml` in the consumer repo root. Se **Prerequisites:** -* Python (version 3.12) +* Python 3.10+ (3.12 recommended for development) * Poetry * squawk-cli (`pip install squawk-cli`) @@ -51,9 +90,11 @@ Squawk reads its configuration from `.squawk.toml` in the consumer repo root. Se 3. Install the pre-commit hooks: `pre-commit install` 4. Run tests: `poetry run pytest tests/ -v` +Some integration tests in `tests/test_squawk_config.py` require `squawk` on PATH and are automatically skipped if it is not installed. + To test the hook against a consumer repo locally: ```bash cd /path/to/consumer-repo -pre-commit try-repo /path/to/kintsugi-squawk squawk-alembic --all-files +pre-commit try-repo /path/to/squawk-pre-commit squawk-alembic --all-files ``` diff --git a/poetry.lock b/poetry.lock index bcbab05..dc10bd1 100644 --- a/poetry.lock +++ b/poetry.lock @@ -753,4 +753,4 @@ test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess [metadata] lock-version = "2.1" python-versions = ">=3.10" -content-hash = "4380964a98b1aeeaa126dbb3b2e8c7d08f7ffc68b3bceba04acd52e7b35c48f3" +content-hash = "571a388b10319527ae4d2cf3e415e7adb49268c9cb92e103ed02b7903613ca9a" diff --git a/pyproject.toml b/pyproject.toml index 40b9361..0ca6c55 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,13 +4,29 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "squawk-alembic" -version = "0.2.0" +version = "0.3.0" description = "Pre-commit hook to lint Alembic migration SQL with squawk" packages = [{include = "squawk_alembic"}] +readme = "README.md" +homepage = "https://www.trykintsugi.com/" +repository = "https://github.com/kintsugi-tax/squawk-pre-commit" +authors = ["Kintsugi "] +license = "MIT" +keywords = ["pre-commit", "squawk", "alembic", "postgresql", "migration", "linter"] +classifiers = [ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "Topic :: Software Development :: Quality Assurance", + "Topic :: Database", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", +] [tool.poetry.dependencies] python = ">=3.10" -squawk-cli = "*" +squawk-cli = ">=2.0" [tool.poetry.group.dev.dependencies] alembic = "*" diff --git a/squawk_alembic/__init__.py b/squawk_alembic/__init__.py index d3ec452..493f741 100644 --- a/squawk_alembic/__init__.py +++ b/squawk_alembic/__init__.py @@ -1 +1 @@ -__version__ = "0.2.0" +__version__ = "0.3.0" diff --git a/squawk_alembic/hook.py b/squawk_alembic/hook.py index ad64075..92a6785 100644 --- a/squawk_alembic/hook.py +++ b/squawk_alembic/hook.py @@ -1,13 +1,17 @@ """Pre-commit hook that generates DDL via alembic upgrade --sql and lints with squawk.""" -import ast -import configparser +import argparse import os +import re import subprocess import sys import tempfile +from ast import Assign, Constant, Name, Tuple, iter_child_nodes, parse +from configparser import ConfigParser, NoOptionError, NoSectionError from pathlib import Path +_BRANCH_RE = re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9._/\-]*$") + def find_migrations_path(): """Auto-detect the alembic migrations versions directory from alembic.ini.""" @@ -15,12 +19,12 @@ def find_migrations_path(): if not config_path.exists(): return None - config = configparser.ConfigParser() + config = ConfigParser() config.read(config_path) try: script_location = config.get("alembic", "script_location") - except (configparser.NoSectionError, configparser.NoOptionError): + except (NoSectionError, NoOptionError): return None script_location = script_location.removeprefix("./") @@ -45,35 +49,33 @@ def extract_revision_info(filepath): """Parse a migration file to extract revision and down_revision from module-level assignments.""" with open(filepath) as f: try: - tree = ast.parse(f.read()) + tree = parse(f.read()) except SyntaxError: return None revision = None down_revision = None - for node in ast.iter_child_nodes(tree): - if not isinstance(node, ast.Assign): + for node in iter_child_nodes(tree): + if not isinstance(node, Assign): continue - if len(node.targets) != 1 or not isinstance(node.targets[0], ast.Name): + if len(node.targets) != 1 or not isinstance(node.targets[0], Name): continue name = node.targets[0].id if name == "revision": - if isinstance(node.value, ast.Constant) and isinstance( - node.value.value, str - ): + if isinstance(node.value, Constant) and isinstance(node.value.value, str): revision = node.value.value elif name == "down_revision": - if isinstance(node.value, ast.Constant): + if isinstance(node.value, Constant): if isinstance(node.value.value, str): down_revision = node.value.value elif node.value.value is None: down_revision = None - elif isinstance(node.value, ast.Tuple): + elif isinstance(node.value, Tuple): values = [] for elt in node.value.elts: - if isinstance(elt, ast.Constant) and isinstance(elt.value, str): + if isinstance(elt, Constant) and isinstance(elt.value, str): values.append(elt.value) down_revision = tuple(values) @@ -86,8 +88,16 @@ def extract_revision_info(filepath): ) +class GenerateSqlError(Exception): + """Raised when alembic upgrade --sql fails.""" + + def generate_sql(filepath): - """Run alembic upgrade --sql to generate the complete DDL for a migration.""" + """Run alembic upgrade --sql to generate the complete DDL for a migration. + + Returns the SQL string, or None if the file should be skipped (merge migration, + unparseable revision). Raises GenerateSqlError if alembic fails. + """ info = extract_revision_info(filepath) if info is None: return None @@ -109,132 +119,98 @@ def generate_sql(filepath): text=True, env=env, ) - except FileNotFoundError: + except FileNotFoundError as exc: + raise GenerateSqlError( + "squawk-alembic: alembic not found. Ensure alembic is installed in your environment." + ) from exc + + if result.returncode != 0: + raise GenerateSqlError( + f"squawk-alembic: alembic upgrade --sql failed for {filepath}:\n{result.stderr}" + ) + + return result.stdout + + +def validate_branch(branch): + """Validate that a branch name is safe and exists in git.""" + if not _BRANCH_RE.match(branch) or ".." in branch: print( - "squawk-alembic: alembic not found. Ensure alembic is installed in your environment.", + f"squawk-alembic: invalid branch name: {branch!r}", file=sys.stderr, ) - return None - + return False + try: + result = subprocess.run( + ["git", "rev-parse", "--verify", branch], + capture_output=True, + ) + except FileNotFoundError: + print("squawk-alembic: git not found", file=sys.stderr) + return False if result.returncode != 0: print( - f"squawk-alembic: alembic upgrade --sql failed for {filepath}:\n{result.stderr}", + f"squawk-alembic: branch '{branch}' not found in git", file=sys.stderr, ) - return None + return False + return True - return result.stdout - -def check_autocommit_blocks(filepath): - """Check that CONCURRENTLY operations are inside autocommit blocks.""" - with open(filepath) as f: - try: - tree = ast.parse(f.read()) - except SyntaxError: - return [] - - checker = _AutocommitChecker() - checker.visit(tree) - return checker.warnings - - -def _has_concurrent_kwarg(node): - """Check if an AST Call node has postgresql_concurrently=True.""" - for kw in node.keywords: - if ( - kw.arg == "postgresql_concurrently" - and isinstance(kw.value, ast.Constant) - and kw.value.value is True - ): - return True - return False - - -class _AutocommitChecker(ast.NodeVisitor): - def __init__(self): - self.warnings = [] - self._in_autocommit = False - - def visit_With(self, node): - is_autocommit = any( - isinstance(item.context_expr, ast.Call) - and isinstance(item.context_expr.func, ast.Attribute) - and item.context_expr.func.attr == "autocommit_block" - for item in node.items +def file_exists_on_branch(filepath, branch): + """Check if a file exists on the given git branch.""" + try: + result = subprocess.run( + ["git", "cat-file", "-e", f"{branch}:{filepath}"], + capture_output=True, ) - if is_autocommit: - old = self._in_autocommit - self._in_autocommit = True - self.generic_visit(node) - self._in_autocommit = old - else: - self.generic_visit(node) - - def visit_Call(self, node): - if ( - isinstance(node.func, ast.Attribute) - and isinstance(node.func.value, ast.Name) - and node.func.value.id == "op" - ): - # op.execute("...CONCURRENTLY...") - if node.func.attr == "execute" and node.args: - sql = _extract_string(node.args[0]) - if sql and "concurrently" in sql.lower() and not self._in_autocommit: - self.warnings.append(node.lineno) - - # op.create_index(..., postgresql_concurrently=True) - # op.drop_index(..., postgresql_concurrently=True) - if node.func.attr in ("create_index", "drop_index"): - if _has_concurrent_kwarg(node) and not self._in_autocommit: - self.warnings.append(node.lineno) - - self.generic_visit(node) - - -def _extract_string(node): - """Extract a string value from an AST node, handling common wrappers.""" - if isinstance(node, ast.Constant) and isinstance(node.value, str): - return node.value - - if isinstance(node, ast.Call) and node.args: - if isinstance(node.func, ast.Attribute) and node.func.attr == "text": - return _extract_string(node.args[0]) - if isinstance(node.func, ast.Name) and node.func.id == "text": - return _extract_string(node.args[0]) - - return None + except FileNotFoundError: + return False + return result.returncode == 0 def main(): - files = sys.argv[1:] - if not files: + parser = argparse.ArgumentParser() + parser.add_argument( + "--diff-branch", + default=None, + help="Only lint migration files that don't exist on this branch.", + ) + parser.add_argument("files", nargs="*") + args = parser.parse_args() + + if not args.files: return 0 + if args.diff_branch and not validate_branch(args.diff_branch): + return 1 + migrations_path = find_migrations_path() if not migrations_path: print( "squawk-alembic: could not find alembic.ini or parse script_location", file=sys.stderr, ) - return 0 + return 1 exit_code = 0 - for filepath in files: + for filepath in args.files: try: Path(filepath).relative_to(migrations_path) except ValueError: continue - autocommit_warnings = check_autocommit_blocks(filepath) - for lineno in autocommit_warnings: - print( - f"{filepath}:{lineno}: CONCURRENTLY operation outside autocommit_block()" - ) + if args.diff_branch and file_exists_on_branch(filepath, args.diff_branch): + continue + + try: + sql = generate_sql(filepath) + except GenerateSqlError as exc: + print(str(exc), file=sys.stderr) exit_code = 1 + continue - sql = generate_sql(filepath) if not sql: continue @@ -244,7 +220,7 @@ def main(): try: result = subprocess.run( - ["squawk", tmp_path], + ["squawk", "--assume-in-transaction", tmp_path], capture_output=True, text=True, ) diff --git a/tests/test_autocommit_check.py b/tests/test_autocommit_check.py deleted file mode 100644 index 2738a27..0000000 --- a/tests/test_autocommit_check.py +++ /dev/null @@ -1,213 +0,0 @@ -"""Tests for CONCURRENTLY outside autocommit_block detection.""" - -import textwrap - -import pytest - -from squawk_alembic.hook import check_autocommit_blocks - - -@pytest.fixture() -def migration_file(tmp_path): - """Write a migration file and return its path.""" - - def _write(source): - path = tmp_path / "migration.py" - path.write_text(textwrap.dedent(source)) - return str(path) - - return _write - - -# --- op.execute with CONCURRENTLY inside autocommit --- - - -def test_create_index_execute_in_autocommit(migration_file): - path = migration_file(""" - from alembic import op - - def upgrade(): - with op.get_context().autocommit_block(): - op.execute("CREATE INDEX CONCURRENTLY ix_foo ON bar (baz)") - """) - assert check_autocommit_blocks(path) == [] - - -def test_drop_index_execute_in_autocommit(migration_file): - path = migration_file(""" - from alembic import op - - def upgrade(): - with op.get_context().autocommit_block(): - op.execute("DROP INDEX CONCURRENTLY ix_foo") - """) - assert check_autocommit_blocks(path) == [] - - -def test_multiple_execute_ops_in_autocommit(migration_file): - path = migration_file(""" - from alembic import op - - def upgrade(): - with op.get_context().autocommit_block(): - op.execute("DROP INDEX CONCURRENTLY IF EXISTS ix_old") - op.execute("CREATE INDEX CONCURRENTLY ix_new ON bar (baz)") - """) - assert check_autocommit_blocks(path) == [] - - -# --- op.execute with CONCURRENTLY outside autocommit --- - - -def test_create_index_execute_no_autocommit(migration_file): - path = migration_file(""" - from alembic import op - - def upgrade(): - op.execute("CREATE INDEX CONCURRENTLY ix_foo ON bar (baz)") - """) - assert len(check_autocommit_blocks(path)) == 1 - - -def test_drop_index_execute_no_autocommit(migration_file): - path = migration_file(""" - from alembic import op - - def upgrade(): - op.execute("DROP INDEX CONCURRENTLY ix_foo") - """) - assert len(check_autocommit_blocks(path)) == 1 - - -def test_execute_case_insensitive(migration_file): - path = migration_file(""" - from alembic import op - - def upgrade(): - op.execute("create index concurrently ix_foo on bar (baz)") - """) - assert len(check_autocommit_blocks(path)) == 1 - - -def test_execute_mixed_inside_and_outside(migration_file): - """One op inside autocommit, one outside. Only the outside one should warn.""" - path = migration_file(""" - from alembic import op - - def upgrade(): - op.execute("CREATE INDEX CONCURRENTLY ix_bad ON bar (baz)") - with op.get_context().autocommit_block(): - op.execute("CREATE INDEX CONCURRENTLY ix_good ON bar (qux)") - """) - assert len(check_autocommit_blocks(path)) == 1 - - -def test_execute_warning_includes_line_number(migration_file): - path = migration_file(""" - from alembic import op - - def upgrade(): - op.execute("CREATE INDEX CONCURRENTLY ix_foo ON bar (baz)") - """) - warnings = check_autocommit_blocks(path) - assert len(warnings) == 1 - assert isinstance(warnings[0], int) - - -# --- op.create_index / op.drop_index with postgresql_concurrently --- - - -def test_create_index_concurrent_inside_autocommit(migration_file): - path = migration_file(""" - from alembic import op - - def upgrade(): - with op.get_context().autocommit_block(): - op.create_index("ix_foo", "bar", ["baz"], postgresql_concurrently=True) - """) - assert check_autocommit_blocks(path) == [] - - -def test_create_index_concurrent_outside_autocommit(migration_file): - path = migration_file(""" - from alembic import op - - def upgrade(): - op.create_index("ix_foo", "bar", ["baz"], postgresql_concurrently=True) - """) - assert len(check_autocommit_blocks(path)) == 1 - - -def test_create_index_without_concurrent_kwarg(migration_file): - path = migration_file(""" - from alembic import op - - def upgrade(): - op.create_index("ix_foo", "bar", ["baz"]) - """) - assert check_autocommit_blocks(path) == [] - - -def test_drop_index_concurrent_outside_autocommit(migration_file): - path = migration_file(""" - from alembic import op - - def upgrade(): - op.drop_index("ix_foo", postgresql_concurrently=True) - """) - assert len(check_autocommit_blocks(path)) == 1 - - -def test_create_index_concurrent_mixed(migration_file): - """Only ops outside autocommit should warn.""" - path = migration_file(""" - from alembic import op - - def upgrade(): - op.create_index("ix_bad", "bar", ["baz"], postgresql_concurrently=True) - with op.get_context().autocommit_block(): - op.create_index("ix_good", "bar", ["qux"], postgresql_concurrently=True) - """) - assert len(check_autocommit_blocks(path)) == 1 - - -# --- No false positives --- - - -def test_non_concurrent_index(migration_file): - path = migration_file(""" - from alembic import op - - def upgrade(): - op.execute("CREATE INDEX ix_foo ON bar (baz)") - """) - assert check_autocommit_blocks(path) == [] - - -def test_no_execute_calls(migration_file): - path = migration_file(""" - from alembic import op - import sqlalchemy as sa - - def upgrade(): - op.add_column('users', sa.Column('email', sa.String(255))) - """) - assert check_autocommit_blocks(path) == [] - - -def test_concurrently_in_non_concurrent_string(migration_file): - """Only flag actual CONCURRENTLY SQL, not unrelated strings.""" - path = migration_file(""" - from alembic import op - - def upgrade(): - op.execute("SET lock_timeout = '10s'") - """) - assert check_autocommit_blocks(path) == [] - - -def test_syntax_error(migration_file): - path = migration_file(""" - this is not valid python {{{ - """) - assert check_autocommit_blocks(path) == [] diff --git a/tests/test_find_migrations_path.py b/tests/test_find_migrations_path.py index 80b496f..b9f560f 100644 --- a/tests/test_find_migrations_path.py +++ b/tests/test_find_migrations_path.py @@ -1,11 +1,11 @@ """Tests for alembic.ini auto-detection.""" -import pytest +from pytest import fixture from squawk_alembic.hook import find_migrations_path -@pytest.fixture() +@fixture() def repo(tmp_path, monkeypatch): """Set up a fake repo directory and chdir into it.""" monkeypatch.chdir(tmp_path) diff --git a/tests/test_main.py b/tests/test_main.py index 3b466e8..8fae3b5 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -3,12 +3,12 @@ import textwrap from unittest.mock import patch -import pytest +from pytest import fixture from squawk_alembic.hook import main -@pytest.fixture() +@fixture() def repo(tmp_path, monkeypatch): """Set up a fake repo with alembic config and a versions directory.""" monkeypatch.chdir(tmp_path) @@ -30,12 +30,21 @@ def make_result(returncode=0, stdout="", stderr=""): )() -def fake_subprocess(alembic_result=None, squawk_result=None): +def fake_subprocess( + alembic_result=None, + squawk_result=None, + git_exists_on_branch=False, + git_branch_valid=True, +): """Return a side_effect function that dispatches based on the command.""" alembic_res = alembic_result or make_result(stdout="CREATE TABLE foo (id int);\n") squawk_res = squawk_result or make_result() def side_effect(cmd, **kwargs): + if cmd[0] == "git": + if "rev-parse" in cmd: + return make_result(returncode=0 if git_branch_valid else 1) + return make_result(returncode=0 if git_exists_on_branch else 1) if cmd[0] == "alembic": return alembic_res if cmd[0] == "squawk": @@ -50,10 +59,12 @@ def test_no_files(repo): assert main() == 0 -def test_no_alembic_ini(tmp_path, monkeypatch): +def test_no_alembic_ini(tmp_path, monkeypatch, capsys): monkeypatch.chdir(tmp_path) with patch("sys.argv", ["squawk-alembic", "some_file.py"]): - assert main() == 0 + assert main() == 1 + captured = capsys.readouterr() + assert "could not find alembic.ini" in captured.err def test_file_outside_migrations_skipped(repo): @@ -88,6 +99,7 @@ def upgrade(): assert "def456:abc123" in alembic_call squawk_call = mock_run.call_args_list[1][0][0] assert squawk_call[0] == "squawk" + assert "--assume-in-transaction" in squawk_call def test_squawk_failure(repo, capsys): @@ -118,7 +130,7 @@ def upgrade(): assert "some squawk warning" in captured.out -def test_alembic_failure_skips_file(repo, capsys): +def test_alembic_failure_fails_run(repo, capsys): path = write_migration( repo, "004_alembic_fail.py", @@ -141,7 +153,7 @@ def upgrade(): ), ), ): - assert main() == 0 + assert main() == 1 captured = capsys.readouterr() assert "alembic upgrade --sql failed" in captured.err @@ -160,11 +172,17 @@ def upgrade(): op.execute("CREATE TABLE foo (id int)") """, ) + + def alembic_not_found(cmd, **kwargs): + if cmd[0] == "alembic": + raise FileNotFoundError + raise ValueError(f"unexpected command: {cmd}") + with ( patch("sys.argv", ["squawk-alembic", path]), - patch("subprocess.run", side_effect=FileNotFoundError), + patch("subprocess.run", side_effect=alembic_not_found), ): - assert main() == 0 + assert main() == 1 captured = capsys.readouterr() assert "alembic not found" in captured.err @@ -212,3 +230,155 @@ def upgrade(): assert main() == 0 alembic_call = mock_run.call_args_list[0][0][0] assert "base:first001" in alembic_call + + +def test_diff_branch_skips_existing_file(repo): + path = write_migration( + repo, + "008_existing.py", + """ + revision = 'exists01' + down_revision = 'prev001' + + from alembic import op + + def upgrade(): + op.execute("CREATE TABLE foo (id int)") + """, + ) + with ( + patch("sys.argv", ["squawk-alembic", "--diff-branch", "main", path]), + patch( + "subprocess.run", + side_effect=fake_subprocess(git_exists_on_branch=True), + ) as mock_run, + ): + assert main() == 0 + # git rev-parse (validation) + git cat-file (exists check), no alembic or squawk + assert mock_run.call_count == 2 + assert mock_run.call_args_list[0][0][0][0] == "git" + assert mock_run.call_args_list[1][0][0][0] == "git" + + +def test_diff_branch_lints_new_file(repo): + path = write_migration( + repo, + "009_new.py", + """ + revision = 'new001' + down_revision = 'prev001' + + from alembic import op + + def upgrade(): + op.execute("CREATE TABLE foo (id int)") + """, + ) + with ( + patch("sys.argv", ["squawk-alembic", "--diff-branch", "main", path]), + patch( + "subprocess.run", + side_effect=fake_subprocess(git_exists_on_branch=False), + ) as mock_run, + ): + assert main() == 0 + # git rev-parse + git cat-file + alembic + squawk = 4 calls + assert mock_run.call_count == 4 + + +def test_diff_branch_nonexistent_branch_errors(repo, capsys): + path = write_migration( + repo, + "011_nonexistent.py", + """ + revision = 'non001' + down_revision = 'prev001' + + from alembic import op + + def upgrade(): + op.execute("CREATE TABLE foo (id int)") + """, + ) + with ( + patch("sys.argv", ["squawk-alembic", "--diff-branch", "nonexistent", path]), + patch( + "subprocess.run", + side_effect=fake_subprocess(git_branch_valid=False), + ) as mock_run, + ): + assert main() == 1 + # Only the git rev-parse validation call, then early exit + assert mock_run.call_count == 1 + captured = capsys.readouterr() + assert "not found in git" in captured.err + + +def test_diff_branch_traversal_rejected(repo, capsys): + path = write_migration( + repo, + "013_traversal.py", + """ + revision = 'trv001' + down_revision = 'prev001' + + from alembic import op + + def upgrade(): + op.execute("CREATE TABLE foo (id int)") + """, + ) + with ( + patch("sys.argv", ["squawk-alembic", "--diff-branch", "refs/../main", path]), + patch("subprocess.run") as mock_run, + ): + assert main() == 1 + mock_run.assert_not_called() + captured = capsys.readouterr() + assert "invalid branch name" in captured.err + + +def test_diff_branch_missing_git_binary(repo, capsys): + path = write_migration( + repo, + "014_no_git.py", + """ + revision = 'git001' + down_revision = 'prev001' + + from alembic import op + + def upgrade(): + op.execute("CREATE TABLE foo (id int)") + """, + ) + with ( + patch("sys.argv", ["squawk-alembic", "--diff-branch", "main", path]), + patch("subprocess.run", side_effect=FileNotFoundError), + ): + assert main() == 1 + captured = capsys.readouterr() + assert "git not found" in captured.err + + +def test_without_diff_branch_lints_all(repo): + path = write_migration( + repo, + "010_all.py", + """ + revision = 'all001' + down_revision = 'prev001' + + from alembic import op + + def upgrade(): + op.execute("CREATE TABLE foo (id int)") + """, + ) + with ( + patch("sys.argv", ["squawk-alembic", path]), + patch("subprocess.run", side_effect=fake_subprocess()) as mock_run, + ): + assert main() == 0 + # No git call, just alembic + squawk = 2 calls + assert mock_run.call_count == 2 diff --git a/tests/test_revision_info.py b/tests/test_revision_info.py index 2ed96e1..d808196 100644 --- a/tests/test_revision_info.py +++ b/tests/test_revision_info.py @@ -2,12 +2,12 @@ import textwrap -import pytest +from pytest import fixture from squawk_alembic.hook import extract_revision_info -@pytest.fixture() +@fixture() def migration_file(tmp_path): """Write a migration file and return its path.""" diff --git a/tests/test_squawk_config.py b/tests/test_squawk_config.py new file mode 100644 index 0000000..0d36fb2 --- /dev/null +++ b/tests/test_squawk_config.py @@ -0,0 +1,47 @@ +"""Integration tests verifying squawk behavior with and without .squawk.toml configuration.""" + +import shutil +import subprocess + +from pytest import fixture, mark + +pytestmark = mark.skipif(shutil.which("squawk") is None, reason="squawk not installed") + + +SQL = "ALTER TABLE foo ADD COLUMN bar text;\n" + + +@fixture() +def sql_file(tmp_path): + path = tmp_path / "migration.sql" + path.write_text(SQL) + return path + + +def test_without_config_flags_prefer_robust_stmts(tmp_path, sql_file): + """Without assume_in_transaction, squawk flags prefer-robust-stmts.""" + result = subprocess.run( + ["squawk", str(sql_file)], + capture_output=True, + text=True, + cwd=tmp_path, + ) + assert result.returncode != 0 + assert "prefer-robust-stmts" in result.stdout + + +def test_with_assume_in_transaction_suppresses_prefer_robust_stmts(tmp_path, sql_file): + """With assume_in_transaction = true, squawk suppresses prefer-robust-stmts.""" + # require-timeout-settings fires independently of assume_in_transaction, + # so we exclude it here to isolate the prefer-robust-stmts behavior. + (tmp_path / ".squawk.toml").write_text( + 'assume_in_transaction = true\nexcluded_rules = ["require-timeout-settings"]\n' + ) + result = subprocess.run( + ["squawk", str(sql_file)], + capture_output=True, + text=True, + cwd=tmp_path, + ) + assert result.returncode == 0 + assert "prefer-robust-stmts" not in result.stdout