diff --git a/.github/workflows/docs-validate.yml b/.github/workflows/docs-validate.yml new file mode 100644 index 000000000..c9e02ffe8 --- /dev/null +++ b/.github/workflows/docs-validate.yml @@ -0,0 +1,45 @@ +name: Docs Validate + +on: + pull_request: + branches: + - "develop" + paths: + - ".github/workflows/docs-validate.yml" + - "docs/**" + - "rust-rewrite/docs-site-strategy.md" + - "rust-rewrite/docs-site-vercel-plan.md" + push: + branches: + - "develop" + - "rust-rewrite" + paths: + - ".github/workflows/docs-validate.yml" + - "docs/**" + - "rust-rewrite/docs-site-strategy.md" + - "rust-rewrite/docs-site-vercel-plan.md" + workflow_dispatch: + +permissions: + contents: read + +jobs: + mintlify: + runs-on: ubuntu-latest + timeout-minutes: 5 + steps: + - name: Check out repository + uses: actions/checkout@v4 + + - name: Set up Node.js + uses: actions/setup-node@v4 + with: + node-version: "22" + + - name: Validate Mintlify build + working-directory: docs + run: npx --yes mintlify@latest validate + + - name: Check Mintlify links + working-directory: docs + run: npx --yes mintlify@latest broken-links diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 8173a1dd3..5015364e0 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -73,6 +73,17 @@ jobs: env: CIBW_BUILD: "*cp3${{ matrix.python }}*" + - name: Smoke test built wheel through uvx + env: + PYTHON_VERSION: 3.${{ matrix.python }} + run: | + WHEEL="$(ls -t wheelhouse/graph_sitter-*.whl | head -n 1)" + if [ -z "$WHEEL" ]; then + echo "No graph-sitter wheel found in wheelhouse" >&2 + exit 1 + fi + rust-rewrite/tools/check_wheel_rust_backend.sh --wheel "$WHEEL" + - uses: actions/upload-artifact@v4 with: name: wheels-${{ matrix.os }}-3.${{ matrix.python }} diff --git a/.github/workflows/rust-rewrite-cli-smoke.yml b/.github/workflows/rust-rewrite-cli-smoke.yml new file mode 100644 index 000000000..c2f50a4d2 --- /dev/null +++ b/.github/workflows/rust-rewrite-cli-smoke.yml @@ -0,0 +1,49 @@ +name: Rust Rewrite CLI Smoke + +on: + pull_request: + branches: + - "develop" + paths: + - ".github/workflows/rust-rewrite-cli-smoke.yml" + - "pyproject.toml" + - "rust-rewrite/tools/check_cli_smoke.sh" + - "src/graph_sitter/cli/**" + - "tests/unit/cli/**" + push: + branches: + - "develop" + - "rust-rewrite" + paths: + - ".github/workflows/rust-rewrite-cli-smoke.yml" + - "pyproject.toml" + - "rust-rewrite/tools/check_cli_smoke.sh" + - "src/graph_sitter/cli/**" + - "tests/unit/cli/**" + workflow_dispatch: + +permissions: + contents: read + +jobs: + cli-smoke: + runs-on: ubuntu-latest + timeout-minutes: 5 + steps: + - name: Check out repository + uses: actions/checkout@v4 + + - name: Install uv + uses: astral-sh/setup-uv@v5.4 + with: + enable-cache: true + prune-cache: false + python-version: "3.13" + version: "0.9.1" + cache-suffix: rust-rewrite-cli-smoke + + - name: Install Python dependencies + run: uv sync --frozen + + - name: Run CLI smoke checks + run: rust-rewrite/tools/check_cli_smoke.sh diff --git a/.github/workflows/rust-rewrite-extension.yml b/.github/workflows/rust-rewrite-extension.yml new file mode 100644 index 000000000..acbf42bb0 --- /dev/null +++ b/.github/workflows/rust-rewrite-extension.yml @@ -0,0 +1,72 @@ +name: Rust Rewrite Extension Builds + +on: + pull_request: + branches: + - "develop" + paths: + - ".github/workflows/rust-rewrite-extension.yml" + - "Cargo.lock" + - "Cargo.toml" + - "crates/**" + - "rust-rewrite/tools/check_extension_build.sh" + - "rust-rewrite/tools/check_wheel_rust_backend.sh" + - "pyproject.toml" + push: + branches: + - "develop" + - "rust-rewrite" + paths: + - ".github/workflows/rust-rewrite-extension.yml" + - "Cargo.lock" + - "Cargo.toml" + - "crates/**" + - "rust-rewrite/tools/check_extension_build.sh" + - "rust-rewrite/tools/check_wheel_rust_backend.sh" + - "pyproject.toml" + workflow_dispatch: + +permissions: + contents: read + +jobs: + extension-build: + name: Python ${{ matrix.python-version }} ${{ matrix.os }} + runs-on: ${{ matrix.os }} + timeout-minutes: 10 + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, macos-latest] + python-version: ["3.12", "3.13"] + steps: + - name: Check out repository + uses: actions/checkout@v4 + + - name: Install uv + uses: astral-sh/setup-uv@v5.4 + with: + enable-cache: true + prune-cache: false + python-version: ${{ matrix.python-version }} + version: "0.9.1" + cache-suffix: rust-rewrite-extension-${{ matrix.python-version }} + + - name: Install Python dependencies + run: uv sync --frozen + + - name: Install Rust toolchain + uses: dtolnay/rust-toolchain@stable + + - name: Cache Rust builds + uses: Swatinem/rust-cache@v2 + with: + shared-key: rust-rewrite-extension-${{ matrix.os }}-${{ matrix.python-version }} + + - name: Build and smoke test PyO3 extension + run: rust-rewrite/tools/check_extension_build.sh + + - name: Build wheel and smoke test Rust backend through uvx + env: + PYTHON_VERSION: ${{ matrix.python-version }} + run: rust-rewrite/tools/check_wheel_rust_backend.sh diff --git a/.github/workflows/rust-rewrite-fast.yml b/.github/workflows/rust-rewrite-fast.yml new file mode 100644 index 000000000..5acedadf0 --- /dev/null +++ b/.github/workflows/rust-rewrite-fast.yml @@ -0,0 +1,85 @@ +name: Rust Rewrite Fast Checks + +on: + pull_request: + branches: + - "develop" + paths: + - ".github/workflows/rust-rewrite-fast.yml" + - "Cargo.lock" + - "Cargo.toml" + - "crates/**" + - "rust-rewrite/golden/**" + - "rust-rewrite/p0-parity-coverage.json" + - "rust-rewrite/supported-subset.json" + - "rust-rewrite/tools/**" + - "src/graph_sitter/codebase/**" + - "src/graph_sitter/configs/models/codebase.py" + - "src/graph_sitter/core/codebase.py" + - "src/graph_sitter/core/file.py" + - "src/graph_sitter/core/import_resolution.py" + - "src/graph_sitter/core/symbol.py" + - "src/graph_sitter/python/**" + - "src/graph_sitter/typescript/**" + - "tests/integration/rust_rewrite/**" + - "tests/unit/sdk/codebase/test_rust_backend.py" + - "tests/unit/sdk/codebase/test_rust_rewrite_readiness.py" + push: + branches: + - "develop" + - "rust-rewrite" + paths: + - ".github/workflows/rust-rewrite-fast.yml" + - "Cargo.lock" + - "Cargo.toml" + - "crates/**" + - "rust-rewrite/golden/**" + - "rust-rewrite/p0-parity-coverage.json" + - "rust-rewrite/supported-subset.json" + - "rust-rewrite/tools/**" + - "src/graph_sitter/codebase/**" + - "src/graph_sitter/configs/models/codebase.py" + - "src/graph_sitter/core/codebase.py" + - "src/graph_sitter/core/file.py" + - "src/graph_sitter/core/import_resolution.py" + - "src/graph_sitter/core/symbol.py" + - "src/graph_sitter/python/**" + - "src/graph_sitter/typescript/**" + - "tests/integration/rust_rewrite/**" + - "tests/unit/sdk/codebase/test_rust_backend.py" + - "tests/unit/sdk/codebase/test_rust_rewrite_readiness.py" + workflow_dispatch: + +permissions: + contents: read + +jobs: + fast-checks: + runs-on: ubuntu-latest + timeout-minutes: 10 + steps: + - name: Check out repository + uses: actions/checkout@v4 + + - name: Install uv + uses: astral-sh/setup-uv@v5.4 + with: + enable-cache: true + prune-cache: false + python-version: "3.13" + version: "0.9.1" + cache-suffix: rust-rewrite-fast + + - name: Install Python dependencies + run: uv sync --frozen + + - name: Install Rust toolchain + uses: dtolnay/rust-toolchain@stable + + - name: Cache Rust builds + uses: Swatinem/rust-cache@v2 + with: + shared-key: rust-rewrite-fast + + - name: Run fast Rust rewrite checks + run: rust-rewrite/tools/check_fast.sh diff --git a/.github/workflows/rust-rewrite-large-repos.yml b/.github/workflows/rust-rewrite-large-repos.yml new file mode 100644 index 000000000..9e6c2f9b4 --- /dev/null +++ b/.github/workflows/rust-rewrite-large-repos.yml @@ -0,0 +1,87 @@ +name: Rust Rewrite Large Repo Checks + +on: + workflow_dispatch: + schedule: + - cron: "0 9 * * *" + push: + branches: + - "rust-rewrite" + paths: + - ".github/workflows/rust-rewrite-large-repos.yml" + - "Cargo.lock" + - "Cargo.toml" + - "crates/**" + - "rust-rewrite/golden/**" + - "rust-rewrite/tools/benchmark_pinned_python_repo.py" + - "rust-rewrite/tools/benchmark_pinned_typescript_repo.py" + - "rust-rewrite/tools/check_pinned_large_repos.sh" + - "rust-rewrite/tools/check_pinned_codemods.py" + - "rust-rewrite/tools/check_pinned_python_codebase.py" + - "rust-rewrite/tools/check_pinned_semantic_parity.py" + - "rust-rewrite/tools/check_pinned_typescript_codebase.py" + - "rust-rewrite/tools/check_rollout_readiness.py" + - "rust-rewrite/tools/snapshot_pinned_python_repo.py" + - "rust-rewrite/tools/snapshot_pinned_typescript_repo.py" + - "src/graph_sitter/codebase/**" + - "src/graph_sitter/configs/models/codebase.py" + - "src/graph_sitter/core/codebase.py" + - "src/graph_sitter/core/file.py" + - "src/graph_sitter/core/import_resolution.py" + - "src/graph_sitter/core/symbol.py" + - "src/graph_sitter/python/**" + - "src/graph_sitter/typescript/**" + - "tests/integration/rust_rewrite/**" + +permissions: + contents: read + +jobs: + pinned-large-repos: + runs-on: ubuntu-latest + timeout-minutes: 45 + steps: + - name: Check out repository + uses: actions/checkout@v4 + + - name: Install uv + uses: astral-sh/setup-uv@v5.4 + with: + enable-cache: true + prune-cache: false + python-version: "3.13" + version: "0.9.1" + cache-suffix: rust-rewrite-large-repos + + - name: Install Python dependencies + run: uv sync --frozen + + - name: Install Rust toolchain + uses: dtolnay/rust-toolchain@stable + + - name: Cache Rust builds + uses: Swatinem/rust-cache@v2 + with: + shared-key: rust-rewrite-large-repos + + - name: Cache pinned repository checkouts + uses: actions/cache@v4 + with: + path: ${{ runner.temp }}/graph-sitter-pinned-repos + key: rust-rewrite-pinned-repos-v1 + + - name: Run pinned large-repo checks + env: + GRAPH_SITTER_PINNED_CACHE_DIR: ${{ runner.temp }}/graph-sitter-pinned-repos + GRAPH_SITTER_PINNED_EXTENSION_DIR: ${{ runner.temp }}/graph_sitter_py_large_repo_checks + GRAPH_SITTER_PINNED_OUTPUT_DIR: ${{ github.workspace }}/rust-rewrite/reports + GRAPH_SITTER_PINNED_TIMEOUT: "900" + run: rust-rewrite/tools/check_pinned_large_repos.sh + + - name: Upload large-repo reports + if: always() + uses: actions/upload-artifact@v4 + with: + name: rust-rewrite-large-repo-reports + path: rust-rewrite/reports/*.json + if-no-files-found: ignore diff --git a/.github/workflows/site-build.yml b/.github/workflows/site-build.yml new file mode 100644 index 000000000..a887b996f --- /dev/null +++ b/.github/workflows/site-build.yml @@ -0,0 +1,43 @@ +name: Site Build + +on: + pull_request: + branches: + - "develop" + paths: + - ".github/workflows/site-build.yml" + - "site/**" + push: + branches: + - "develop" + - "rust-rewrite" + paths: + - ".github/workflows/site-build.yml" + - "site/**" + workflow_dispatch: + +permissions: + contents: read + +jobs: + next-build: + runs-on: ubuntu-latest + timeout-minutes: 8 + steps: + - name: Check out repository + uses: actions/checkout@v4 + + - name: Set up Node.js + uses: actions/setup-node@v4 + with: + node-version: "22" + cache: "npm" + cache-dependency-path: site/package-lock.json + + - name: Install dependencies + working-directory: site + run: npm ci + + - name: Build landing site + working-directory: site + run: npm run build diff --git a/.gitignore b/.gitignore index 8b55b255c..7744fa04c 100644 --- a/.gitignore +++ b/.gitignore @@ -48,6 +48,7 @@ alembic_versions_backup **/*.c **/build/ **/dist/ +target/ **/*.so **/.diffs/** **/.coverage* @@ -66,6 +67,7 @@ graph-sitter-types/typings/** coverage.json tests/integration/verified_codemods/codemod_data/repo_commits.json .benchmarks/* +rust-rewrite/reports/ # SWE Bench results results.*.json diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 000000000..33757e1a4 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,8 @@ +# Agent Notes + +## Frontend + +- The Vercel app lives in `site/`. +- Use Aura Dark from `daltonmenezes/aura-theme` for frontend dark mode and code surfaces. Core palette: background `#15141b`, foreground `#edecee`, muted `#6d6d6d`, purple `#a277ff`, green `#61ffca`, orange `#ffca85`, pink `#f694ff`, blue `#82e2ff`, red `#ff6767`. +- Keep docs syntax highlighting aligned with the Aura Dark palette. +- Use Node 22 for local site commands: `PATH="$HOME/.nvm/versions/node/v22.19.0/bin:$PATH" npm --prefix site run build`. diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 000000000..b4a2b3cea --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,383 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "aho-corasick" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" +dependencies = [ + "memchr", +] + +[[package]] +name = "autocfg" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2032f911046de80f0a198e0901378627c33f59ea0ac00e363d481118bd70a53" + +[[package]] +name = "cc" +version = "1.2.64" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dad887fd958be91b5098c0248def011f4523ab786cd411be668777e55063501f" +dependencies = [ + "find-msvc-tools", + "shlex", +] + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "find-msvc-tools" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" + +[[package]] +name = "graph-sitter-engine" +version = "0.1.0" +dependencies = [ + "serde", + "serde_json", + "tree-sitter", + "tree-sitter-python", + "tree-sitter-typescript", +] + +[[package]] +name = "graph-sitter-py" +version = "0.1.0" +dependencies = [ + "graph-sitter-engine", + "pyo3", + "serde_json", +] + +[[package]] +name = "hashbrown" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed5909b6e89a2db4456e54cd5f673791d7eca6732202bbf2a9cc504fe2f9b84a" + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "indexmap" +version = "2.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d466e9454f08e4a911e14806c24e16fba1b4c121d1ea474396f396069cf949d9" +dependencies = [ + "equivalent", + "hashbrown", +] + +[[package]] +name = "indoc" +version = "2.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79cf5c93f93228cf8efb3ba362535fb11199ac548a09ce117c9b1adc3030d706" +dependencies = [ + "rustversion", +] + +[[package]] +name = "itoa" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" + +[[package]] +name = "libc" +version = "0.2.186" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66" + +[[package]] +name = "memchr" +version = "2.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88904434abc2901f197fe8cc55f0445e7ded921dba5911dad2e2b39b48e663c4" + +[[package]] +name = "memoffset" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a" +dependencies = [ + "autocfg", +] + +[[package]] +name = "once_cell" +version = "1.21.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" + +[[package]] +name = "portable-atomic" +version = "1.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" + +[[package]] +name = "proc-macro2" +version = "1.0.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "pyo3" +version = "0.22.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f402062616ab18202ae8319da13fa4279883a2b8a9d9f83f20dbade813ce1884" +dependencies = [ + "cfg-if", + "indoc", + "libc", + "memoffset", + "once_cell", + "portable-atomic", + "pyo3-build-config", + "pyo3-ffi", + "pyo3-macros", + "unindent", +] + +[[package]] +name = "pyo3-build-config" +version = "0.22.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b14b5775b5ff446dd1056212d778012cbe8a0fbffd368029fd9e25b514479c38" +dependencies = [ + "once_cell", + "target-lexicon", +] + +[[package]] +name = "pyo3-ffi" +version = "0.22.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ab5bcf04a2cdcbb50c7d6105de943f543f9ed92af55818fd17b660390fc8636" +dependencies = [ + "libc", + "pyo3-build-config", +] + +[[package]] +name = "pyo3-macros" +version = "0.22.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fd24d897903a9e6d80b968368a34e1525aeb719d568dba8b3d4bfa5dc67d453" +dependencies = [ + "proc-macro2", + "pyo3-macros-backend", + "quote", + "syn", +] + +[[package]] +name = "pyo3-macros-backend" +version = "0.22.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36c011a03ba1e50152b4b394b479826cad97e7a21eb52df179cd91ac411cbfbe" +dependencies = [ + "heck", + "proc-macro2", + "pyo3-build-config", + "quote", + "syn", +] + +[[package]] +name = "quote" +version = "1.0.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "regex" +version = "1.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1292b7759ae1cb9ec195452d1390a074f0cd8541ab7a5a8c31cd6db45d4a6ba" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6f6ff9a378485b298a5286656da665ba74413d36db0979633275d2e708145d4" + +[[package]] +name = "rustversion" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" + +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.150" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8014e44b4736ed0538adeecded0fce2a272f22dc9578a7eb6b2d9993c74cfb9" +dependencies = [ + "indexmap", + "itoa", + "memchr", + "serde", + "serde_core", + "zmij", +] + +[[package]] +name = "shlex" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8fadd59c855ef2080decdef8ff161eb6661b86933c9d82e5ba29dc602a55aba" + +[[package]] +name = "streaming-iterator" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b2231b7c3057d5e4ad0156fb3dc807d900806020c5ffa3ee6ff2c8c76fb8520" + +[[package]] +name = "syn" +version = "2.0.118" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b9ae57f904213ebb649ce6895b8a66c66f0203b9319718f69a5612a065b1422" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "target-lexicon" +version = "0.12.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1" + +[[package]] +name = "tree-sitter" +version = "0.26.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4dab76d0b724ba557954125188cf0633a1ca43199ced82d95c7b9c32cc3de1f3" +dependencies = [ + "cc", + "regex", + "regex-syntax", + "serde_json", + "streaming-iterator", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-language" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "009994f150cc0cd50ff54917d5bc8bffe8cad10ca10d81c34da2ec421ae61782" + +[[package]] +name = "tree-sitter-python" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6bf85fd39652e740bf60f46f4cda9492c3a9ad75880575bf14960f775cb74a1c" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "tree-sitter-typescript" +version = "0.23.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c5f76ed8d947a75cc446d5fccd8b602ebf0cde64ccf2ffa434d873d7a575eff" +dependencies = [ + "cc", + "tree-sitter-language", +] + +[[package]] +name = "unicode-ident" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" + +[[package]] +name = "unindent" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7264e107f553ccae879d21fbea1d6724ac785e8c3bfc762137959b5802826ef3" + +[[package]] +name = "zmij" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 000000000..ff8f4bebc --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,21 @@ +[workspace] +members = [ + "crates/graph-sitter-engine", + "crates/graph-sitter-py", +] +resolver = "2" + +[workspace.package] +version = "0.1.0" +edition = "2021" +license = "Apache-2.0" +repository = "https://github.com/codegen-sh/graph-sitter" + +[workspace.dependencies] +graph-sitter-engine = { path = "crates/graph-sitter-engine" } +pyo3 = "0.22" +serde = { version = "1", features = ["derive"] } +serde_json = "1" +tree-sitter = "0.26" +tree-sitter-python = "0.25" +tree-sitter-typescript = "0.23.2" diff --git a/crates/graph-sitter-engine/Cargo.toml b/crates/graph-sitter-engine/Cargo.toml new file mode 100644 index 000000000..50f1ff8f4 --- /dev/null +++ b/crates/graph-sitter-engine/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "graph-sitter-engine" +description = "Core Rust engine skeleton for graph-sitter" +version.workspace = true +edition.workspace = true +license.workspace = true +repository.workspace = true + +[lib] +name = "graph_sitter_engine" +path = "src/lib.rs" + +[dependencies] +serde.workspace = true +serde_json.workspace = true +tree-sitter.workspace = true +tree-sitter-python.workspace = true +tree-sitter-typescript.workspace = true diff --git a/crates/graph-sitter-engine/examples/index_python.rs b/crates/graph-sitter-engine/examples/index_python.rs new file mode 100644 index 000000000..b9f422cde --- /dev/null +++ b/crates/graph-sitter-engine/examples/index_python.rs @@ -0,0 +1,49 @@ +use graph_sitter_engine::index_python_path; +use std::env; +use std::error::Error; +use std::time::Instant; + +fn main() -> Result<(), Box> { + let mut args = env::args().skip(1); + let Some(repo_path) = args.next() else { + eprintln!("usage: cargo run -p graph-sitter-engine --example index_python -- [--json]"); + std::process::exit(2); + }; + let json = args.any(|arg| arg == "--json"); + + let started = Instant::now(); + let index = index_python_path(&repo_path)?; + let elapsed = started.elapsed(); + let summary = index.summary(); + + if json { + println!( + "{}", + serde_json::json!({ + "repo_path": repo_path, + "wall_seconds": elapsed.as_secs_f64(), + "summary": summary, + }) + ); + } else { + println!("repo: {repo_path}"); + println!("wall: {:.6}s", elapsed.as_secs_f64()); + println!( + "index: files={} symbols={} classes={} functions={} global_variables={} imports={} import_resolutions={} references={} dependencies={} bytes={} lines={} files_with_errors={}", + summary.files, + summary.symbols, + summary.classes, + summary.functions, + summary.global_variables, + summary.imports, + summary.import_resolutions, + summary.references, + summary.dependencies, + summary.bytes, + summary.lines, + summary.files_with_errors + ); + } + + Ok(()) +} diff --git a/crates/graph-sitter-engine/src/lib.rs b/crates/graph-sitter-engine/src/lib.rs new file mode 100644 index 000000000..cbf5d2ab4 --- /dev/null +++ b/crates/graph-sitter-engine/src/lib.rs @@ -0,0 +1,9500 @@ +#![forbid(unsafe_code)] + +use serde::{Serialize, Serializer}; +use std::borrow::Borrow; +use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; +use std::fmt; +use std::fs; +use std::io; +use std::ops::Deref; +use std::path::{Path, PathBuf}; +use std::sync::Arc; +use tree_sitter::{Node, Parser, Range, Tree}; + +const ENABLED_FEATURES: &[&str] = &["skeleton", "python-index", "typescript-index"]; + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct InternedString(Arc); + +impl InternedString { + pub fn ptr_eq(&self, other: &Self) -> bool { + Arc::ptr_eq(&self.0, &other.0) + } +} + +impl From<&str> for InternedString { + fn from(value: &str) -> Self { + Self(Arc::from(value)) + } +} + +impl From for InternedString { + fn from(value: String) -> Self { + Self(Arc::from(value)) + } +} + +impl AsRef for InternedString { + fn as_ref(&self) -> &str { + &self.0 + } +} + +impl Borrow for InternedString { + fn borrow(&self) -> &str { + self.as_ref() + } +} + +impl Deref for InternedString { + type Target = str; + + fn deref(&self) -> &Self::Target { + self.as_ref() + } +} + +impl fmt::Display for InternedString { + fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result { + formatter.write_str(self.as_ref()) + } +} + +impl PartialEq<&str> for InternedString { + fn eq(&self, other: &&str) -> bool { + self.as_ref() == *other + } +} + +impl PartialEq for &str { + fn eq(&self, other: &InternedString) -> bool { + *self == other.as_ref() + } +} + +impl Serialize for InternedString { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + serializer.serialize_str(self.as_ref()) + } +} + +#[derive(Debug, Clone, Default, PartialEq, Eq)] +pub struct StringInterner { + values: HashSet, +} + +impl StringInterner { + pub fn intern(&mut self, value: impl AsRef) -> InternedString { + let value = value.as_ref(); + if let Some(existing) = self.values.get(value) { + return existing.clone(); + } + let interned = InternedString::from(value); + self.values.insert(interned.clone()); + interned + } + + pub fn len(&self) -> usize { + self.values.len() + } + + pub fn clear(&mut self) { + self.values = HashSet::new(); + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct EngineInfo { + version: &'static str, + enabled_features: &'static [&'static str], +} + +impl EngineInfo { + pub fn version(&self) -> &'static str { + self.version + } + + pub fn enabled_features(&self) -> &'static [&'static str] { + self.enabled_features + } +} + +#[derive(Debug, Default, Clone, Copy)] +pub struct Engine; + +impl Engine { + pub fn new() -> Self { + Self + } + + pub fn debug_info(&self) -> EngineInfo { + debug_info() + } + + pub fn version(&self) -> &'static str { + engine_version() + } + + pub fn enabled_features(&self) -> &'static [&'static str] { + ENABLED_FEATURES + } + + pub fn index_python_path( + &self, + repo_path: impl AsRef, + ) -> Result { + PythonIndexer::new()?.index_path(repo_path) + } + + pub fn index_python_paths( + &self, + repo_path: impl AsRef, + file_paths: I, + ) -> Result + where + I: IntoIterator, + P: AsRef, + { + PythonIndexer::new()?.index_paths(repo_path, file_paths) + } + + pub fn index_typescript_path( + &self, + repo_path: impl AsRef, + ) -> Result { + TypeScriptIndexer::new()?.index_path(repo_path) + } + + pub fn index_typescript_paths( + &self, + repo_path: impl AsRef, + file_paths: I, + ) -> Result + where + I: IntoIterator, + P: AsRef, + { + TypeScriptIndexer::new()?.index_paths(repo_path, file_paths) + } +} + +pub fn engine_version() -> &'static str { + env!("CARGO_PKG_VERSION") +} + +pub fn debug_info() -> EngineInfo { + EngineInfo { + version: engine_version(), + enabled_features: ENABLED_FEATURES, + } +} + +pub fn index_python_path(repo_path: impl AsRef) -> Result { + Engine::new().index_python_path(repo_path) +} + +pub fn index_python_paths( + repo_path: impl AsRef, + file_paths: I, +) -> Result +where + I: IntoIterator, + P: AsRef, +{ + Engine::new().index_python_paths(repo_path, file_paths) +} + +pub fn index_typescript_path(repo_path: impl AsRef) -> Result { + Engine::new().index_typescript_path(repo_path) +} + +pub fn index_typescript_paths( + repo_path: impl AsRef, + file_paths: I, +) -> Result +where + I: IntoIterator, + P: AsRef, +{ + Engine::new().index_typescript_paths(repo_path, file_paths) +} + +#[derive(Debug)] +pub enum IndexError { + Io { path: PathBuf, source: io::Error }, + ParseFailed { path: PathBuf }, + Language(tree_sitter::LanguageError), +} + +impl fmt::Display for IndexError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Io { path, source } => write!(f, "failed to read {}: {source}", path.display()), + Self::ParseFailed { path } => { + write!(f, "tree-sitter failed to parse {}", path.display()) + } + Self::Language(source) => { + write!(f, "failed to load tree-sitter Python language: {source}") + } + } + } +} + +impl std::error::Error for IndexError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + match self { + Self::Io { source, .. } => Some(source), + Self::Language(source) => Some(source), + Self::ParseFailed { .. } => None, + } + } +} + +impl From for IndexError { + fn from(value: tree_sitter::LanguageError) -> Self { + Self::Language(value) + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize)] +pub struct PythonIndex { + pub files: Vec, + pub symbols: Vec, + pub imports: Vec, + pub import_resolutions: Vec, + pub external_modules: Vec, + pub references: Vec, + pub external_references: Vec, + pub dependencies: Vec, + #[serde(skip)] + pub all_exports_by_file: HashMap>, + #[serde(skip)] + pub strings: StringInterner, +} + +impl PythonIndex { + fn intern(&mut self, value: impl AsRef) -> InternedString { + self.strings.intern(value) + } + + fn finish(mut self) -> Self { + self.all_exports_by_file.clear(); + self.strings.clear(); + self + } + + pub fn summary(&self) -> IndexSummary { + IndexSummary { + files: self.files.len(), + symbols: self.symbols.len(), + classes: self + .symbols + .iter() + .filter(|symbol| symbol.kind == SymbolKind::Class) + .count(), + functions: self + .symbols + .iter() + .filter(|symbol| symbol.kind == SymbolKind::Function) + .count(), + global_variables: self + .symbols + .iter() + .filter(|symbol| symbol.kind == SymbolKind::GlobalVariable) + .count(), + imports: self.imports.len(), + import_resolutions: self.import_resolutions.len(), + external_modules: self.external_modules.len(), + exports: 0, + references: self.references.len(), + external_references: self.external_references.len(), + dependencies: self.dependencies.len(), + subclass_edges: 0, + bytes: self.files.iter().map(|file| file.byte_len).sum(), + lines: self.files.iter().map(|file| file.line_count).sum(), + files_with_errors: self.files.iter().filter(|file| file.has_error).count(), + } + } + + pub fn debug_graph_dump(&self) -> GraphDebugDump { + let mut nodes = Vec::new(); + let mut edges = Vec::new(); + append_common_debug_graph( + &mut nodes, + &mut edges, + &self.files, + &self.symbols, + &self.imports, + &self.import_resolutions, + &self.external_modules, + &self.references, + &self.external_references, + &self.dependencies, + ); + GraphDebugDump { nodes, edges } + } + + pub fn debug_graph_json(&self) -> Result { + serde_json::to_string(&self.debug_graph_dump()) + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize)] +pub struct TypeScriptIndex { + pub files: Vec, + pub symbols: Vec, + pub imports: Vec, + pub import_resolutions: Vec, + pub external_modules: Vec, + pub exports: Vec, + pub references: Vec, + pub external_references: Vec, + pub function_calls: Vec, + pub promise_chains: Vec, + pub dependencies: Vec, + pub subclass_edges: Vec, + #[serde(skip)] + pub strings: StringInterner, +} + +impl TypeScriptIndex { + fn intern(&mut self, value: impl AsRef) -> InternedString { + self.strings.intern(value) + } + + fn finish(mut self) -> Self { + self.strings.clear(); + self + } + + pub fn summary(&self) -> IndexSummary { + IndexSummary { + files: self.files.len(), + symbols: self.symbols.len(), + classes: self + .symbols + .iter() + .filter(|symbol| symbol.kind == SymbolKind::Class) + .count(), + functions: self + .symbols + .iter() + .filter(|symbol| symbol.kind == SymbolKind::Function) + .count(), + global_variables: self + .symbols + .iter() + .filter(|symbol| symbol.kind == SymbolKind::GlobalVariable) + .count(), + imports: self.imports.len(), + import_resolutions: self.import_resolutions.len(), + external_modules: self.external_modules.len(), + exports: self.exports.len(), + references: self.references.len(), + external_references: self.external_references.len(), + dependencies: self.dependencies.len(), + subclass_edges: self.subclass_edges.len(), + bytes: self.files.iter().map(|file| file.byte_len).sum(), + lines: self.files.iter().map(|file| file.line_count).sum(), + files_with_errors: self.files.iter().filter(|file| file.has_error).count(), + } + } + + pub fn debug_graph_dump(&self) -> GraphDebugDump { + let mut nodes = Vec::new(); + let mut edges = Vec::new(); + append_common_debug_graph( + &mut nodes, + &mut edges, + &self.files, + &self.symbols, + &self.imports, + &self.import_resolutions, + &self.external_modules, + &self.references, + &self.external_references, + &self.dependencies, + ); + + for export in &self.exports { + nodes.push(GraphDebugNode { + id: export_debug_id(export.id), + node_type: "export", + record_id: export.id, + file_id: Some(export.file_id), + name: export_debug_name(export), + path: None, + range: Some(export.range), + }); + + let mut file_edge = debug_edge( + "contains_export", + file_debug_id(export.file_id), + export_debug_id(export.id), + ); + file_edge.export_id = Some(export.id); + file_edge.name = export_debug_name(export); + file_edge.range = Some(export.range); + edges.push(file_edge); + + if let Some(symbol_id) = export.symbol_id { + let mut symbol_edge = debug_edge( + "export_symbol", + export_debug_id(export.id), + symbol_debug_id(symbol_id), + ); + symbol_edge.export_id = Some(export.id); + symbol_edge.name = export_debug_name(export); + symbol_edge.range = Some(export.range); + edges.push(symbol_edge); + } + if let Some(import_id) = export.import_id { + let mut import_edge = debug_edge( + "export_import", + export_debug_id(export.id), + import_debug_id(import_id), + ); + import_edge.export_id = Some(export.id); + import_edge.import_id = Some(import_id); + import_edge.name = export_debug_name(export); + import_edge.range = Some(export.range); + edges.push(import_edge); + } + } + + for subclass in &self.subclass_edges { + let mut edge = debug_edge( + "subclass", + symbol_debug_id(subclass.source_symbol_id), + symbol_debug_id(subclass.target_symbol_id), + ); + edge.subclass_id = Some(subclass.id); + edge.reference_id = Some(subclass.reference_id); + edges.push(edge); + } + + GraphDebugDump { nodes, edges } + } + + pub fn debug_graph_json(&self) -> Result { + serde_json::to_string(&self.debug_graph_dump()) + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize)] +pub struct IndexSummary { + pub files: usize, + pub symbols: usize, + pub classes: usize, + pub functions: usize, + pub global_variables: usize, + pub imports: usize, + pub import_resolutions: usize, + pub external_modules: usize, + pub exports: usize, + pub references: usize, + pub external_references: usize, + pub dependencies: usize, + pub subclass_edges: usize, + pub bytes: usize, + pub lines: usize, + pub files_with_errors: usize, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize)] +pub struct FileRecord { + pub id: u32, + pub path: InternedString, + pub module_name: Option, + pub language: FileLanguage, + pub content_hash: String, + pub byte_len: usize, + pub line_count: usize, + pub has_error: bool, + pub root_range: SourceRange, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)] +pub enum FileLanguage { + #[serde(rename = "python")] + Python, + #[serde(rename = "typescript")] + TypeScript, + #[serde(rename = "tsx")] + Tsx, + #[serde(rename = "javascript")] + JavaScript, + #[serde(rename = "jsx")] + Jsx, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)] +#[serde(rename_all = "snake_case")] +pub enum SymbolKind { + Class, + Function, + GlobalVariable, + Interface, + TypeAlias, + Enum, + Namespace, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize)] +pub struct SymbolRecord { + pub id: u32, + pub file_id: u32, + pub parent_symbol_id: Option, + pub is_top_level: bool, + pub name: InternedString, + pub kind: SymbolKind, + pub range: SourceRange, + pub name_range: SourceRange, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)] +#[serde(rename_all = "snake_case")] +pub enum ImportKind { + Import, + FromImport, + FutureImport, + SideEffect, + DefaultImport, + NamedImport, + NamespaceImport, + DynamicImport, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize)] +pub struct ImportRecord { + pub id: u32, + pub file_id: u32, + pub kind: ImportKind, + pub module: Option, + pub name: Option, + pub alias: Option, + pub range: SourceRange, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize)] +pub struct ExternalModuleRecord { + pub id: u32, + pub import_id: u32, + pub file_id: u32, + pub module: Option, + pub name: InternedString, + pub alias: Option, + pub range: SourceRange, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)] +#[serde(rename_all = "snake_case")] +pub enum ExportKind { + Named, + Default, + Wildcard, + Namespace, + ExportEquals, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize)] +pub struct ExportRecord { + pub id: u32, + pub file_id: u32, + pub kind: ExportKind, + pub name: Option, + pub local_name: Option, + pub source_module: Option, + pub symbol_id: Option, + pub import_id: Option, + pub range: SourceRange, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize)] +pub struct ImportResolutionRecord { + pub id: u32, + pub import_id: u32, + pub source_file_id: u32, + pub target_file_id: u32, + pub target_symbol_id: Option, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize)] +pub struct ReferenceRecord { + pub id: u32, + pub source_file_id: u32, + pub source_symbol_id: Option, + pub target_symbol_id: u32, + pub import_id: Option, + pub name: InternedString, + pub range: SourceRange, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize)] +pub struct ExternalReferenceRecord { + pub id: u32, + pub source_file_id: u32, + pub source_symbol_id: Option, + pub import_id: u32, + pub name: InternedString, + pub range: SourceRange, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize)] +pub struct FunctionCallRecord { + pub id: u32, + pub source_file_id: u32, + pub source_symbol_id: Option, + pub target_symbol_id: Option, + pub import_id: Option, + pub name: InternedString, + pub range: SourceRange, + pub name_range: SourceRange, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize)] +pub struct PromiseChainRecord { + pub id: u32, + pub source_file_id: u32, + pub source_symbol_id: Option, + pub stage_names: Vec, + pub range: SourceRange, + pub base_range: SourceRange, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize)] +pub struct DependencyRecord { + pub id: u32, + pub source_symbol_id: u32, + pub target_symbol_id: u32, + pub source_file_id: u32, + pub target_file_id: u32, + pub reference_ids: Vec, + pub reference_count: usize, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize)] +pub struct SubclassRecord { + pub id: u32, + pub source_symbol_id: u32, + pub target_symbol_id: u32, + pub source_file_id: u32, + pub target_file_id: u32, + pub reference_id: u32, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize)] +pub struct SourceRange { + pub start_byte: usize, + pub end_byte: usize, + pub start_row: usize, + pub start_column: usize, + pub end_row: usize, + pub end_column: usize, +} + +impl From for SourceRange { + fn from(value: Range) -> Self { + Self { + start_byte: value.start_byte, + end_byte: value.end_byte, + start_row: value.start_point.row, + start_column: value.start_point.column, + end_row: value.end_point.row, + end_column: value.end_point.column, + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize)] +pub struct GraphDebugDump { + pub nodes: Vec, + pub edges: Vec, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize)] +pub struct GraphDebugNode { + pub id: String, + pub node_type: &'static str, + pub record_id: u32, + #[serde(skip_serializing_if = "Option::is_none")] + pub file_id: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub name: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub path: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub range: Option, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize)] +pub struct GraphDebugEdge { + pub edge_type: &'static str, + pub source: String, + pub target: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub import_id: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub export_id: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub reference_id: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub dependency_id: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub subclass_id: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub name: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub range: Option, + #[serde(skip_serializing_if = "Vec::is_empty")] + pub reference_ids: Vec, + #[serde(skip_serializing_if = "Option::is_none")] + pub reference_count: Option, +} + +fn append_common_debug_graph( + nodes: &mut Vec, + edges: &mut Vec, + files: &[FileRecord], + symbols: &[SymbolRecord], + imports: &[ImportRecord], + import_resolutions: &[ImportResolutionRecord], + external_modules: &[ExternalModuleRecord], + references: &[ReferenceRecord], + external_references: &[ExternalReferenceRecord], + dependencies: &[DependencyRecord], +) { + for file in files { + nodes.push(GraphDebugNode { + id: file_debug_id(file.id), + node_type: "file", + record_id: file.id, + file_id: Some(file.id), + name: file.module_name.as_ref().map(|name| name.to_string()), + path: Some(file.path.to_string()), + range: Some(file.root_range), + }); + } + + for symbol in symbols { + nodes.push(GraphDebugNode { + id: symbol_debug_id(symbol.id), + node_type: "symbol", + record_id: symbol.id, + file_id: Some(symbol.file_id), + name: Some(symbol.name.to_string()), + path: None, + range: Some(symbol.range), + }); + + let mut file_edge = debug_edge( + "contains_symbol", + file_debug_id(symbol.file_id), + symbol_debug_id(symbol.id), + ); + file_edge.name = Some(symbol.name.to_string()); + file_edge.range = Some(symbol.range); + edges.push(file_edge); + + if let Some(parent_symbol_id) = symbol.parent_symbol_id { + let mut parent_edge = debug_edge( + "parent_symbol", + symbol_debug_id(parent_symbol_id), + symbol_debug_id(symbol.id), + ); + parent_edge.name = Some(symbol.name.to_string()); + parent_edge.range = Some(symbol.range); + edges.push(parent_edge); + } + } + + for import in imports { + nodes.push(GraphDebugNode { + id: import_debug_id(import.id), + node_type: "import", + record_id: import.id, + file_id: Some(import.file_id), + name: import_debug_name(import), + path: None, + range: Some(import.range), + }); + + let mut file_edge = debug_edge( + "contains_import", + file_debug_id(import.file_id), + import_debug_id(import.id), + ); + file_edge.import_id = Some(import.id); + file_edge.name = import_debug_name(import); + file_edge.range = Some(import.range); + edges.push(file_edge); + } + + let mut external_module_id_by_import_id = BTreeMap::new(); + for external_module in external_modules { + external_module_id_by_import_id.insert(external_module.import_id, external_module.id); + nodes.push(GraphDebugNode { + id: external_module_debug_id(external_module.id), + node_type: "external_module", + record_id: external_module.id, + file_id: Some(external_module.file_id), + name: Some(external_module.name.to_string()), + path: None, + range: Some(external_module.range), + }); + + let mut file_edge = debug_edge( + "contains_external_module", + file_debug_id(external_module.file_id), + external_module_debug_id(external_module.id), + ); + file_edge.import_id = Some(external_module.import_id); + file_edge.name = Some(external_module.name.to_string()); + file_edge.range = Some(external_module.range); + edges.push(file_edge); + } + + for resolution in import_resolutions { + let target = resolution + .target_symbol_id + .map(symbol_debug_id) + .unwrap_or_else(|| file_debug_id(resolution.target_file_id)); + let mut edge = debug_edge( + "import_resolution", + import_debug_id(resolution.import_id), + target, + ); + edge.import_id = Some(resolution.import_id); + edges.push(edge); + } + + for reference in references { + let mut edge = debug_edge( + "reference", + source_debug_id(reference.source_symbol_id, reference.source_file_id), + symbol_debug_id(reference.target_symbol_id), + ); + edge.import_id = reference.import_id; + edge.reference_id = Some(reference.id); + edge.name = Some(reference.name.to_string()); + edge.range = Some(reference.range); + edges.push(edge); + } + + for reference in external_references { + let target = external_module_id_by_import_id + .get(&reference.import_id) + .copied() + .map(external_module_debug_id) + .unwrap_or_else(|| import_debug_id(reference.import_id)); + let mut edge = debug_edge( + "external_reference", + source_debug_id(reference.source_symbol_id, reference.source_file_id), + target, + ); + edge.import_id = Some(reference.import_id); + edge.reference_id = Some(reference.id); + edge.name = Some(reference.name.to_string()); + edge.range = Some(reference.range); + edges.push(edge); + } + + for dependency in dependencies { + let mut edge = debug_edge( + "dependency", + symbol_debug_id(dependency.source_symbol_id), + symbol_debug_id(dependency.target_symbol_id), + ); + edge.dependency_id = Some(dependency.id); + edge.reference_ids = dependency.reference_ids.clone(); + edge.reference_count = Some(dependency.reference_count); + edges.push(edge); + } +} + +fn debug_edge(edge_type: &'static str, source: String, target: String) -> GraphDebugEdge { + GraphDebugEdge { + edge_type, + source, + target, + import_id: None, + export_id: None, + reference_id: None, + dependency_id: None, + subclass_id: None, + name: None, + range: None, + reference_ids: Vec::new(), + reference_count: None, + } +} + +fn source_debug_id(symbol_id: Option, file_id: u32) -> String { + symbol_id + .map(symbol_debug_id) + .unwrap_or_else(|| file_debug_id(file_id)) +} + +fn file_debug_id(id: u32) -> String { + format!("file:{id}") +} + +fn symbol_debug_id(id: u32) -> String { + format!("symbol:{id}") +} + +fn import_debug_id(id: u32) -> String { + format!("import:{id}") +} + +fn external_module_debug_id(id: u32) -> String { + format!("external_module:{id}") +} + +fn export_debug_id(id: u32) -> String { + format!("export:{id}") +} + +fn import_debug_name(import: &ImportRecord) -> Option { + import + .alias + .as_ref() + .or(import.name.as_ref()) + .or(import.module.as_ref()) + .map(|value| value.to_string()) +} + +fn export_debug_name(export: &ExportRecord) -> Option { + export + .name + .as_ref() + .or(export.local_name.as_ref()) + .or(export.source_module.as_ref()) + .map(|value| value.to_string()) +} + +struct PythonIndexer { + parser: Parser, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +struct ReferenceCandidate { + source_file_id: u32, + source_symbol_id: Option, + name: String, + qualifier: Option, + range: SourceRange, + is_subclass: bool, + call_range: Option, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +struct LocalBindingScope { + source_symbol_id: u32, + range: SourceRange, + names: HashSet, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +struct IndexedLocalSymbol { + id: u32, + name_range: SourceRange, +} + +#[derive(Debug, Clone, Default)] +struct IndexedLocalSymbols { + parent_symbol_by_id: HashMap, + symbols_by_parent_and_name: HashMap<(u32, String), Vec>, +} + +impl IndexedLocalSymbols { + fn from_symbols<'a>(symbols: impl IntoIterator) -> Self { + let mut index = Self::default(); + for symbol in symbols { + if let Some(parent_symbol_id) = symbol.parent_symbol_id { + index + .parent_symbol_by_id + .insert(symbol.id, parent_symbol_id); + index + .symbols_by_parent_and_name + .entry((parent_symbol_id, symbol.name.to_string())) + .or_default() + .push(IndexedLocalSymbol { + id: symbol.id, + name_range: symbol.name_range, + }); + } + } + for symbols in index.symbols_by_parent_and_name.values_mut() { + symbols.sort_by_key(|symbol| symbol.name_range.start_byte); + } + index + } +} + +type ExportedSymbolsByFile = HashMap>; + +impl PythonIndexer { + fn new() -> Result { + let mut parser = Parser::new(); + parser.set_language(&tree_sitter_python::LANGUAGE.into())?; + Ok(Self { parser }) + } + + fn index_path(mut self, repo_path: impl AsRef) -> Result { + let repo_path = repo_path.as_ref(); + let mut paths = Vec::new(); + collect_python_files(repo_path, &mut paths)?; + self.index_absolute_paths(repo_path, paths) + } + + fn index_paths( + mut self, + repo_path: impl AsRef, + file_paths: I, + ) -> Result + where + I: IntoIterator, + P: AsRef, + { + let repo_path = repo_path.as_ref(); + let paths = file_paths + .into_iter() + .map(|path| { + let path = path.as_ref(); + if path.is_absolute() { + path.to_path_buf() + } else { + repo_path.join(path) + } + }) + .collect(); + self.index_absolute_paths(repo_path, paths) + } + + fn index_absolute_paths( + &mut self, + repo_path: &Path, + mut paths: Vec, + ) -> Result { + let mut index = PythonIndex { + files: Vec::new(), + symbols: Vec::new(), + imports: Vec::new(), + import_resolutions: Vec::new(), + external_modules: Vec::new(), + references: Vec::new(), + external_references: Vec::new(), + dependencies: Vec::new(), + all_exports_by_file: HashMap::new(), + strings: StringInterner::default(), + }; + let mut reference_candidates = Vec::new(); + paths.sort(); + + for path in paths { + let file_id = index.files.len() as u32; + let (content, byte_len, content_hash) = read_source_lossy(&path)?; + let tree = self + .parser + .parse(&content, None) + .ok_or_else(|| IndexError::ParseFailed { path: path.clone() })?; + let root = tree.root_node(); + let relative_path = path + .strip_prefix(repo_path) + .unwrap_or(path.as_path()) + .to_string_lossy() + .replace('\\', "/"); + + let module_name = python_module_name(&relative_path).map(|name| index.intern(name)); + let relative_path = index.intern(relative_path); + index.files.push(FileRecord { + id: file_id, + module_name, + path: relative_path, + language: FileLanguage::Python, + content_hash, + byte_len, + line_count: line_count(&content), + has_error: root.has_error(), + root_range: root.range().into(), + }); + extract_python_file( + file_id, + &content, + &tree, + &mut index, + &mut reference_candidates, + ); + } + + resolve_python_imports(&mut index); + resolve_python_references(&mut index, reference_candidates); + build_python_dependencies(&mut index); + Ok(index.finish()) + } +} + +struct TypeScriptIndexer { + typescript_parser: Parser, + tsx_parser: Parser, +} + +impl TypeScriptIndexer { + fn new() -> Result { + let mut typescript_parser = Parser::new(); + typescript_parser.set_language(&tree_sitter_typescript::LANGUAGE_TYPESCRIPT.into())?; + let mut tsx_parser = Parser::new(); + tsx_parser.set_language(&tree_sitter_typescript::LANGUAGE_TSX.into())?; + Ok(Self { + typescript_parser, + tsx_parser, + }) + } + + fn index_path(mut self, repo_path: impl AsRef) -> Result { + let repo_path = repo_path.as_ref(); + let mut paths = Vec::new(); + collect_typescript_files(repo_path, &mut paths)?; + self.index_absolute_paths(repo_path, paths) + } + + fn index_paths( + mut self, + repo_path: impl AsRef, + file_paths: I, + ) -> Result + where + I: IntoIterator, + P: AsRef, + { + let repo_path = repo_path.as_ref(); + let paths = file_paths + .into_iter() + .filter_map(|path| { + let path = path.as_ref(); + let absolute_path = if path.is_absolute() { + path.to_path_buf() + } else { + repo_path.join(path) + }; + is_typescript_like_file(&absolute_path).then_some(absolute_path) + }) + .collect(); + self.index_absolute_paths(repo_path, paths) + } + + fn index_absolute_paths( + &mut self, + repo_path: &Path, + mut paths: Vec, + ) -> Result { + let mut index = TypeScriptIndex { + files: Vec::new(), + symbols: Vec::new(), + imports: Vec::new(), + import_resolutions: Vec::new(), + external_modules: Vec::new(), + exports: Vec::new(), + references: Vec::new(), + external_references: Vec::new(), + function_calls: Vec::new(), + promise_chains: Vec::new(), + dependencies: Vec::new(), + subclass_edges: Vec::new(), + strings: StringInterner::default(), + }; + let mut reference_candidates = Vec::new(); + paths.sort(); + let ts_configs = collect_typescript_configs(repo_path); + + for path in paths { + let file_id = index.files.len() as u32; + let (content, byte_len, content_hash) = read_source_lossy(&path)?; + let language = file_language_for_typescript_path(&path); + let tree = self.parse_typescript_tree(language, &content, &path)?; + let root = tree.root_node(); + let relative_path = path + .strip_prefix(repo_path) + .unwrap_or(path.as_path()) + .to_string_lossy() + .replace('\\', "/"); + + let relative_path = index.intern(relative_path); + index.files.push(FileRecord { + id: file_id, + module_name: None, + language, + content_hash, + path: relative_path, + byte_len, + line_count: line_count(&content), + has_error: root.has_error(), + root_range: root.range().into(), + }); + extract_typescript_file( + file_id, + &content, + &tree, + &mut index, + &mut reference_candidates, + ); + } + + resolve_typescript_imports(&mut index, &ts_configs); + resolve_typescript_references(&mut index, reference_candidates); + build_typescript_dependencies(&mut index); + Ok(index.finish()) + } + + fn parse_typescript_tree( + &mut self, + language: FileLanguage, + content: &str, + path: &Path, + ) -> Result { + let primary_is_tsx = matches!(language, FileLanguage::Tsx | FileLanguage::Jsx); + let primary_tree = if primary_is_tsx { + self.tsx_parser.parse(content, None) + } else { + self.typescript_parser.parse(content, None) + } + .ok_or_else(|| IndexError::ParseFailed { + path: path.to_path_buf(), + })?; + if !primary_tree.root_node().has_error() { + return Ok(primary_tree); + } + + let fallback_tree = if primary_is_tsx { + self.typescript_parser.parse(content, None) + } else { + self.tsx_parser.parse(content, None) + } + .ok_or_else(|| IndexError::ParseFailed { + path: path.to_path_buf(), + })?; + + if syntax_error_count(fallback_tree.root_node()) + < syntax_error_count(primary_tree.root_node()) + { + return Ok(fallback_tree); + } + Ok(primary_tree) + } +} + +#[derive(Debug, Clone)] +struct TypeScriptConfig { + dir: String, + base_url: Option, + path_base: String, + paths: Vec, +} + +#[derive(Debug, Clone)] +struct TypeScriptPathMapping { + pattern_prefix: String, + pattern_suffix: String, + pattern_has_wildcard: bool, + target_prefix: String, + target_suffix: String, + target_has_wildcard: bool, +} + +impl TypeScriptPathMapping { + fn from_pattern(pattern: &str, target: &str) -> Self { + let (pattern_prefix, pattern_suffix, pattern_has_wildcard) = + split_typescript_path_pattern(pattern); + let (target_prefix, target_suffix, target_has_wildcard) = + split_typescript_path_pattern(target); + Self { + pattern_prefix, + pattern_suffix, + pattern_has_wildcard, + target_prefix, + target_suffix, + target_has_wildcard, + } + } + + fn apply(&self, module: &str) -> Option { + let wildcard = if self.pattern_has_wildcard { + module + .strip_prefix(&self.pattern_prefix) + .and_then(|rest| rest.strip_suffix(&self.pattern_suffix)) + } else if module == self.pattern_prefix { + Some("") + } else { + None + }?; + if self.target_has_wildcard { + Some(format!( + "{}{}{}", + self.target_prefix, wildcard, self.target_suffix + )) + } else { + Some(self.target_prefix.clone()) + } + } + + fn specificity(&self) -> usize { + self.pattern_prefix.len() + self.pattern_suffix.len() + } +} + +fn split_typescript_path_pattern(pattern: &str) -> (String, String, bool) { + pattern + .split_once('*') + .map(|(prefix, suffix)| (prefix.to_owned(), suffix.to_owned(), true)) + .unwrap_or_else(|| (pattern.to_owned(), String::new(), false)) +} + +fn collect_typescript_configs(repo_path: &Path) -> Vec { + let mut config_paths = Vec::new(); + if collect_typescript_config_files(repo_path, &mut config_paths).is_err() { + return Vec::new(); + } + config_paths.sort(); + config_paths + .into_iter() + .filter_map(|path| parse_typescript_config(repo_path, &path)) + .collect() +} + +fn collect_typescript_config_files(dir: &Path, out: &mut Vec) -> Result<(), IndexError> { + let entries = fs::read_dir(dir).map_err(|source| IndexError::Io { + path: dir.to_path_buf(), + source, + })?; + for entry in entries { + let entry = entry.map_err(|source| IndexError::Io { + path: dir.to_path_buf(), + source, + })?; + let path = entry.path(); + let file_type = entry.file_type().map_err(|source| IndexError::Io { + path: path.clone(), + source, + })?; + if file_type.is_dir() { + if should_skip_dir(&path) { + continue; + } + collect_typescript_config_files(&path, out)?; + } else if file_type.is_file() + && path.file_name().and_then(|name| name.to_str()) == Some("tsconfig.json") + { + out.push(path); + } + } + Ok(()) +} + +fn parse_typescript_config(repo_path: &Path, path: &Path) -> Option { + let source = fs::read_to_string(path).ok()?; + let json_source = strip_jsonc_comments_and_trailing_commas(&source); + let json: serde_json::Value = serde_json::from_str(&json_source).ok()?; + let compiler_options = json.get("compilerOptions")?.as_object()?; + let dir = path + .parent() + .unwrap_or(repo_path) + .strip_prefix(repo_path) + .unwrap_or_else(|_| Path::new("")) + .to_string_lossy() + .replace('\\', "/"); + let base_url = compiler_options + .get("baseUrl") + .and_then(|value| value.as_str()) + .and_then(|value| normalize_typescript_config_path(&dir, value)); + let path_base = base_url.clone().unwrap_or_else(|| dir.clone()); + + let mut paths = Vec::new(); + if let Some(paths_object) = compiler_options + .get("paths") + .and_then(|value| value.as_object()) + { + for (pattern, targets) in paths_object { + if let Some(target) = targets.as_str() { + paths.push(TypeScriptPathMapping::from_pattern(pattern, target)); + continue; + } + if let Some(targets) = targets.as_array() { + for target in targets.iter().filter_map(|target| target.as_str()) { + paths.push(TypeScriptPathMapping::from_pattern(pattern, target)); + } + } + } + } + paths.sort_by(|left, right| { + right + .specificity() + .cmp(&left.specificity()) + .then_with(|| left.pattern_has_wildcard.cmp(&right.pattern_has_wildcard)) + }); + + Some(TypeScriptConfig { + dir, + base_url, + path_base, + paths, + }) +} + +fn strip_jsonc_comments_and_trailing_commas(source: &str) -> String { + strip_json_trailing_commas(&strip_json_comments(source)) +} + +fn strip_json_comments(source: &str) -> String { + let mut output = String::with_capacity(source.len()); + let mut chars = source.chars().peekable(); + let mut in_string = false; + let mut escaped = false; + + while let Some(ch) = chars.next() { + if in_string { + output.push(ch); + if escaped { + escaped = false; + } else if ch == '\\' { + escaped = true; + } else if ch == '"' { + in_string = false; + } + continue; + } + + if ch == '"' { + in_string = true; + output.push(ch); + continue; + } + + if ch == '/' && chars.peek() == Some(&'/') { + chars.next(); + for comment_ch in chars.by_ref() { + if comment_ch == '\n' { + output.push('\n'); + break; + } + } + continue; + } + + if ch == '/' && chars.peek() == Some(&'*') { + chars.next(); + let mut previous = '\0'; + for comment_ch in chars.by_ref() { + if comment_ch == '\n' { + output.push('\n'); + } + if previous == '*' && comment_ch == '/' { + break; + } + previous = comment_ch; + } + continue; + } + + output.push(ch); + } + + output +} + +fn strip_json_trailing_commas(source: &str) -> String { + let chars: Vec = source.chars().collect(); + let mut output = String::with_capacity(source.len()); + let mut index = 0; + let mut in_string = false; + let mut escaped = false; + + while index < chars.len() { + let ch = chars[index]; + if in_string { + output.push(ch); + if escaped { + escaped = false; + } else if ch == '\\' { + escaped = true; + } else if ch == '"' { + in_string = false; + } + index += 1; + continue; + } + + if ch == '"' { + in_string = true; + output.push(ch); + index += 1; + continue; + } + + if ch == ',' { + let mut lookahead = index + 1; + while lookahead < chars.len() && chars[lookahead].is_whitespace() { + lookahead += 1; + } + if lookahead < chars.len() && matches!(chars[lookahead], '}' | ']') { + index += 1; + continue; + } + } + + output.push(ch); + index += 1; + } + + output +} + +fn normalize_typescript_config_path(base: &str, path: &str) -> Option { + let mut raw_path = if path.starts_with('/') { + path.trim_start_matches('/').to_owned() + } else if base.is_empty() || path.is_empty() { + format!("{base}{path}") + } else { + format!("{base}/{path}") + }; + raw_path = raw_path.replace('\\', "/"); + + let mut parts = Vec::new(); + for part in raw_path.split('/') { + match part { + "" | "." => {} + ".." => { + parts.pop()?; + } + _ => parts.push(part), + } + } + Some(parts.join("/")) +} + +fn read_source_lossy(path: &Path) -> Result<(String, usize, String), IndexError> { + let bytes = fs::read(path).map_err(|source| IndexError::Io { + path: path.to_path_buf(), + source, + })?; + let byte_len = bytes.len(); + let content_hash = stable_content_hash(&bytes); + Ok(( + String::from_utf8_lossy(&bytes).into_owned(), + byte_len, + content_hash, + )) +} + +fn stable_content_hash(bytes: &[u8]) -> String { + let mut hash = 0xcbf2_9ce4_8422_2325u64; + for byte in bytes { + hash ^= u64::from(*byte); + hash = hash.wrapping_mul(0x0000_0100_0000_01b3); + } + format!("{hash:016x}") +} + +fn file_language_for_typescript_path(path: &Path) -> FileLanguage { + match path.extension().and_then(|extension| extension.to_str()) { + Some("ts") => FileLanguage::TypeScript, + Some("tsx") => FileLanguage::Tsx, + Some("js") => FileLanguage::JavaScript, + Some("jsx") => FileLanguage::Jsx, + _ => FileLanguage::TypeScript, + } +} + +fn collect_typescript_files(dir: &Path, out: &mut Vec) -> Result<(), IndexError> { + let entries = fs::read_dir(dir).map_err(|source| IndexError::Io { + path: dir.to_path_buf(), + source, + })?; + for entry in entries { + let entry = entry.map_err(|source| IndexError::Io { + path: dir.to_path_buf(), + source, + })?; + let path = entry.path(); + let file_type = entry.file_type().map_err(|source| IndexError::Io { + path: path.clone(), + source, + })?; + if file_type.is_dir() { + if should_skip_dir(&path) { + continue; + } + collect_typescript_files(&path, out)?; + } else if file_type.is_file() && is_typescript_like_file(&path) { + out.push(path); + } + } + Ok(()) +} + +fn is_typescript_like_file(path: &Path) -> bool { + matches!( + path.extension().and_then(|ext| ext.to_str()), + Some("js" | "jsx" | "ts" | "tsx") + ) +} + +fn extract_typescript_file( + file_id: u32, + source: &str, + tree: &Tree, + index: &mut TypeScriptIndex, + reference_candidates: &mut Vec, +) { + let root = tree.root_node(); + let mut cursor = root.walk(); + for child in root.named_children(&mut cursor) { + extract_typescript_top_level_node(file_id, source, child, index); + } + extract_typescript_nested_local_symbols(file_id, source, root, index); + collect_typescript_reference_candidates(file_id, source, root, index, reference_candidates); + collect_typescript_promise_chains(file_id, source, root, index); +} + +fn extract_typescript_top_level_node( + file_id: u32, + source: &str, + node: Node<'_>, + index: &mut TypeScriptIndex, +) { + match node.kind() { + "function_declaration" + | "generator_function_declaration" + | "class_declaration" + | "abstract_class_declaration" + | "interface_declaration" + | "type_alias_declaration" + | "enum_declaration" + | "internal_module" => { + push_typescript_symbol(file_id, source, node, index); + } + "lexical_declaration" | "variable_declaration" => { + push_typescript_variable_symbols(file_id, source, node, index); + push_typescript_dynamic_imports(file_id, source, node, index); + } + "expression_statement" => { + if let Some(module) = first_child_of_kind(node, &["internal_module"]) { + push_typescript_symbol(file_id, source, module, index); + } + } + "import_statement" => push_typescript_import_statement(file_id, source, node, index), + "export_statement" => push_typescript_export_statement(file_id, source, node, index), + _ => {} + } +} + +fn push_typescript_symbol( + file_id: u32, + source: &str, + node: Node<'_>, + index: &mut TypeScriptIndex, +) -> Option { + push_typescript_symbol_with_parent(file_id, source, node, None, index) +} + +fn push_typescript_symbol_with_parent( + file_id: u32, + source: &str, + node: Node<'_>, + parent_symbol_id: Option, + index: &mut TypeScriptIndex, +) -> Option { + let kind = match node.kind() { + "function_declaration" | "generator_function_declaration" => SymbolKind::Function, + "class_declaration" | "abstract_class_declaration" => SymbolKind::Class, + "interface_declaration" => SymbolKind::Interface, + "type_alias_declaration" => SymbolKind::TypeAlias, + "enum_declaration" => SymbolKind::Enum, + "internal_module" => SymbolKind::Namespace, + _ => return None, + }; + let symbol_id = + push_typescript_named_symbol(file_id, source, node, node, kind, parent_symbol_id, index)?; + if kind == SymbolKind::Namespace { + extract_typescript_namespace_members(file_id, source, node, symbol_id, index); + } + Some(symbol_id) +} + +fn push_typescript_named_symbol( + file_id: u32, + source: &str, + declaration: Node<'_>, + name_owner: Node<'_>, + kind: SymbolKind, + parent_symbol_id: Option, + index: &mut TypeScriptIndex, +) -> Option { + let Some(name_node) = name_owner.child_by_field_name("name") else { + return None; + }; + let Ok(name) = name_node.utf8_text(source.as_bytes()) else { + return None; + }; + let symbol_id = index.symbols.len() as u32; + let name = index.intern(name); + index.symbols.push(SymbolRecord { + id: symbol_id, + file_id, + parent_symbol_id, + is_top_level: parent_symbol_id.is_none(), + name, + kind, + range: declaration.range().into(), + name_range: name_node.range().into(), + }); + Some(symbol_id) +} + +fn push_typescript_variable_symbols( + file_id: u32, + source: &str, + declaration: Node<'_>, + index: &mut TypeScriptIndex, +) -> Vec { + push_typescript_variable_symbols_with_parent(file_id, source, declaration, None, index) +} + +fn push_typescript_variable_symbols_with_parent( + file_id: u32, + source: &str, + declaration: Node<'_>, + parent_symbol_id: Option, + index: &mut TypeScriptIndex, +) -> Vec { + let mut symbol_ids = Vec::new(); + let mut cursor = declaration.walk(); + for declarator in declaration + .named_children(&mut cursor) + .filter(|child| child.kind() == "variable_declarator") + { + let kind = declarator + .child_by_field_name("value") + .filter(|value| typescript_value_is_function(*value)) + .map_or(SymbolKind::GlobalVariable, |_| SymbolKind::Function); + if let Some(name_node) = declarator.child_by_field_name("name") { + let mut targets = Vec::new(); + collect_typescript_binding_targets(name_node, &mut targets); + for target in targets { + if let Ok(name) = target.utf8_text(source.as_bytes()) { + let symbol_id = index.symbols.len() as u32; + let name = index.intern(name); + index.symbols.push(SymbolRecord { + id: symbol_id, + file_id, + parent_symbol_id, + is_top_level: parent_symbol_id.is_none(), + name, + kind, + range: declaration.range().into(), + name_range: target.range().into(), + }); + symbol_ids.push(symbol_id); + } + } + } + } + symbol_ids +} + +fn extract_typescript_namespace_members( + file_id: u32, + source: &str, + namespace: Node<'_>, + namespace_symbol_id: u32, + index: &mut TypeScriptIndex, +) { + let Some(body) = namespace.child_by_field_name("body") else { + return; + }; + let mut cursor = body.walk(); + for child in body.named_children(&mut cursor) { + let declaration = if child.kind() == "export_statement" { + child.child_by_field_name("declaration") + } else { + Some(child) + }; + let Some(declaration) = declaration else { + continue; + }; + match declaration.kind() { + "function_declaration" + | "generator_function_declaration" + | "class_declaration" + | "abstract_class_declaration" + | "interface_declaration" + | "type_alias_declaration" + | "enum_declaration" + | "internal_module" => { + push_typescript_symbol_with_parent( + file_id, + source, + declaration, + Some(namespace_symbol_id), + index, + ); + } + "lexical_declaration" | "variable_declaration" => { + push_typescript_variable_symbols_with_parent( + file_id, + source, + declaration, + Some(namespace_symbol_id), + index, + ); + } + _ => {} + } + } +} + +fn extract_typescript_nested_local_symbols( + file_id: u32, + source: &str, + root: Node<'_>, + index: &mut TypeScriptIndex, +) { + let owner_symbol_ranges = index + .symbols + .iter() + .filter(|symbol| { + symbol.file_id == file_id + && matches!( + symbol.kind, + SymbolKind::Class + | SymbolKind::Function + | SymbolKind::GlobalVariable + | SymbolKind::Interface + | SymbolKind::TypeAlias + | SymbolKind::Enum + | SymbolKind::Namespace + ) + && (symbol.kind != SymbolKind::GlobalVariable || symbol.is_top_level) + }) + .map(|symbol| (symbol.id, symbol.range)) + .collect::>(); + extract_typescript_nested_local_symbols_from_node( + file_id, + source, + root, + index, + &owner_symbol_ranges, + None, + 0, + ); +} + +fn extract_typescript_nested_local_symbols_from_node( + file_id: u32, + source: &str, + node: Node<'_>, + index: &mut TypeScriptIndex, + owner_symbol_ranges: &[(u32, SourceRange)], + current_symbol_id: Option, + nested_function_depth: usize, +) { + let node_range = SourceRange::from(node.range()); + let mut current_symbol_id = current_symbol_id; + let mut nested_function_depth = nested_function_depth; + if let Some((symbol_id, _)) = owner_symbol_ranges + .iter() + .find(|(_, range)| *range == node_range) + { + current_symbol_id = Some(*symbol_id); + nested_function_depth = 0; + } else if current_symbol_id.is_some() && is_typescript_function_like(node) { + nested_function_depth += 1; + } + + if let Some(parent_symbol_id) = current_symbol_id { + if nested_function_depth > 0 { + match node.kind() { + "variable_declarator" => { + push_typescript_local_variable_symbol_targets( + file_id, + source, + parent_symbol_id, + node, + node.child_by_field_name("name"), + index, + ); + } + "assignment_expression" | "augmented_assignment_expression" => { + if let Some(left) = node.child_by_field_name("left") { + if typescript_assignment_left_can_bind(left) { + push_typescript_local_variable_symbol_targets( + file_id, + source, + parent_symbol_id, + node, + Some(left), + index, + ); + } + } + } + _ => {} + } + } + } + + let mut cursor = node.walk(); + for child in node.named_children(&mut cursor) { + extract_typescript_nested_local_symbols_from_node( + file_id, + source, + child, + index, + owner_symbol_ranges, + current_symbol_id, + nested_function_depth, + ); + } +} + +fn typescript_assignment_left_can_bind(node: Node<'_>) -> bool { + matches!( + node.kind(), + "identifier" + | "shorthand_property_identifier_pattern" + | "object_pattern" + | "array_pattern" + | "pair_pattern" + | "assignment_pattern" + | "object_assignment_pattern" + ) +} + +fn push_typescript_local_variable_symbol_targets( + file_id: u32, + source: &str, + parent_symbol_id: u32, + declaration: Node<'_>, + binding_root: Option>, + index: &mut TypeScriptIndex, +) -> Vec { + let mut symbol_ids = Vec::new(); + let Some(binding_root) = binding_root else { + return symbol_ids; + }; + let mut targets = Vec::new(); + collect_typescript_binding_targets(binding_root, &mut targets); + for target in targets { + let Ok(name) = target.utf8_text(source.as_bytes()) else { + continue; + }; + let symbol_id = index.symbols.len() as u32; + let name = index.intern(name); + index.symbols.push(SymbolRecord { + id: symbol_id, + file_id, + parent_symbol_id: Some(parent_symbol_id), + is_top_level: false, + name, + kind: SymbolKind::GlobalVariable, + range: declaration.range().into(), + name_range: target.range().into(), + }); + symbol_ids.push(symbol_id); + } + symbol_ids +} + +fn typescript_value_is_function(node: Node<'_>) -> bool { + match node.kind() { + "arrow_function" | "function_expression" | "generator_function" => true, + "parenthesized_expression" => { + first_named_child(node).is_some_and(typescript_value_is_function) + } + _ => false, + } +} + +fn collect_typescript_binding_targets<'tree>(node: Node<'tree>, out: &mut Vec>) { + match node.kind() { + "identifier" + | "type_identifier" + | "shorthand_property_identifier_pattern" + | "property_identifier" => out.push(node), + "variable_declarator" => { + if let Some(name) = node.child_by_field_name("name") { + collect_typescript_binding_targets(name, out); + } + } + "pair_pattern" => { + if let Some(value) = node.child_by_field_name("value") { + collect_typescript_binding_targets(value, out); + } + } + "assignment_pattern" | "object_assignment_pattern" => { + if let Some(left) = node.child_by_field_name("left") { + collect_typescript_binding_targets(left, out); + } + } + "formal_parameters" + | "lexical_declaration" + | "optional_parameter" + | "parameters" + | "required_parameter" + | "rest_pattern" + | "variable_declaration" + | "object_pattern" + | "array_pattern" => { + let mut cursor = node.walk(); + for child in node.named_children(&mut cursor) { + collect_typescript_binding_targets(child, out); + } + } + _ => {} + } +} + +fn push_typescript_import_statement( + file_id: u32, + source: &str, + node: Node<'_>, + index: &mut TypeScriptIndex, +) { + let module = node + .child_by_field_name("source") + .and_then(|source_node| typescript_string_literal_value(node_text(source, source_node))); + let Some(module) = module else { + return; + }; + let Some(import_clause) = first_child_of_kind(node, &["import_clause"]) else { + push_typescript_import( + file_id, + ImportKind::SideEffect, + Some(module), + None, + None, + node.range().into(), + index, + ); + return; + }; + + let mut emitted = false; + let mut cursor = import_clause.walk(); + for child in import_clause.named_children(&mut cursor) { + match child.kind() { + "identifier" => { + let name = node_text(source, child).to_owned(); + push_typescript_import( + file_id, + ImportKind::DefaultImport, + Some(module.clone()), + Some(name.clone()), + Some(name), + child.range().into(), + index, + ); + emitted = true; + } + "named_imports" => { + push_typescript_named_imports(file_id, source, child, &module, index); + emitted = true; + } + "namespace_import" => { + if let Some(alias) = first_identifier_child(child) { + let alias = node_text(source, alias).to_owned(); + push_typescript_import( + file_id, + ImportKind::NamespaceImport, + Some(module.clone()), + Some("*".to_owned()), + Some(alias), + child.range().into(), + index, + ); + emitted = true; + } + } + _ => {} + } + } + + if !emitted { + push_typescript_import( + file_id, + ImportKind::SideEffect, + Some(module), + None, + None, + import_clause.range().into(), + index, + ); + } +} + +fn push_typescript_named_imports( + file_id: u32, + source: &str, + named_imports: Node<'_>, + module: &str, + index: &mut TypeScriptIndex, +) { + let mut cursor = named_imports.walk(); + for specifier in named_imports + .named_children(&mut cursor) + .filter(|child| child.kind() == "import_specifier") + { + let Some(name_node) = specifier.child_by_field_name("name") else { + continue; + }; + let name = node_text(source, name_node).to_owned(); + let alias = specifier + .child_by_field_name("alias") + .map(|alias| node_text(source, alias).to_owned()) + .unwrap_or_else(|| name.clone()); + push_typescript_import( + file_id, + ImportKind::NamedImport, + Some(module.to_owned()), + Some(name), + Some(alias), + specifier.range().into(), + index, + ); + } +} + +fn push_typescript_dynamic_imports( + file_id: u32, + source: &str, + declaration: Node<'_>, + index: &mut TypeScriptIndex, +) { + let mut cursor = declaration.walk(); + for declarator in declaration + .named_children(&mut cursor) + .filter(|child| child.kind() == "variable_declarator") + { + let Some(value) = declarator.child_by_field_name("value") else { + continue; + }; + let Some(module) = find_typescript_dynamic_import_module(source, value) else { + continue; + }; + if let Some(name_node) = declarator.child_by_field_name("name") { + let mut targets = Vec::new(); + collect_typescript_binding_targets(name_node, &mut targets); + for target in targets { + let local_name = node_text(source, target).to_owned(); + push_typescript_import( + file_id, + ImportKind::DynamicImport, + Some(module.clone()), + Some(local_name.clone()), + Some(local_name), + declarator.range().into(), + index, + ); + } + } + } +} + +fn find_typescript_dynamic_import_module(source: &str, node: Node<'_>) -> Option { + if node.kind() == "call_expression" { + if let Some(function) = node.child_by_field_name("function") { + let function_text = node_text(source, function); + if matches!(function_text, "require" | "import") { + if let Some(arguments) = node.child_by_field_name("arguments") { + let mut cursor = arguments.walk(); + for child in arguments.named_children(&mut cursor) { + if child.kind() == "string" { + return typescript_string_literal_value(node_text(source, child)); + } + } + } + } + } + } + + let mut cursor = node.walk(); + for child in node.named_children(&mut cursor) { + if let Some(module) = find_typescript_dynamic_import_module(source, child) { + return Some(module); + } + } + None +} + +fn push_typescript_export_statement( + file_id: u32, + source: &str, + node: Node<'_>, + index: &mut TypeScriptIndex, +) { + let source_module = node + .child_by_field_name("source") + .and_then(|source_node| typescript_string_literal_value(node_text(source, source_node))); + + if let Some(declaration) = node.child_by_field_name("declaration") { + let symbol_ids = push_typescript_export_declaration(file_id, source, declaration, index); + let is_default = has_direct_child_kind(node, "default"); + for symbol_id in symbol_ids { + let symbol = &index.symbols[symbol_id as usize]; + let symbol_name = symbol.name.to_string(); + push_typescript_export( + file_id, + if is_default { + ExportKind::Default + } else { + ExportKind::Named + }, + Some(if is_default { + "default".to_owned() + } else { + symbol_name.clone() + }), + Some(symbol_name), + None, + Some(symbol_id), + None, + node.range().into(), + index, + ); + } + return; + } + + if let Some(export_clause) = first_child_of_kind(node, &["export_clause"]) { + push_typescript_export_clause(file_id, source, node, export_clause, source_module, index); + return; + } + + if let Some(namespace_export) = first_child_of_kind(node, &["namespace_export"]) { + let name = first_identifier_child(namespace_export) + .map(|identifier| node_text(source, identifier).to_owned()); + let import_id = source_module.as_ref().and_then(|module| { + name.as_ref().map(|name| { + push_typescript_import( + file_id, + ImportKind::NamespaceImport, + Some(module.clone()), + Some("*".to_owned()), + Some(name.clone()), + namespace_export.range().into(), + index, + ) + }) + }); + push_typescript_export( + file_id, + ExportKind::Namespace, + name.clone(), + name, + source_module, + None, + import_id, + node.range().into(), + index, + ); + return; + } + + if has_direct_child_kind(node, "*") { + let import_id = source_module.as_ref().map(|module| { + push_typescript_import( + file_id, + ImportKind::NamespaceImport, + Some(module.clone()), + Some("*".to_owned()), + None, + node.range().into(), + index, + ) + }); + push_typescript_export( + file_id, + ExportKind::Wildcard, + None, + None, + source_module, + None, + import_id, + node.range().into(), + index, + ); + return; + } + + if has_direct_child_kind(node, "default") { + if let Some(value) = node.child_by_field_name("value") { + push_typescript_export( + file_id, + ExportKind::Default, + Some("default".to_owned()), + Some(node_text(source, value).to_owned()), + None, + None, + None, + node.range().into(), + index, + ); + } + return; + } + + if node_text(source, node).trim_start().starts_with("export =") { + let local_name = + last_identifier_child(node).map(|identifier| node_text(source, identifier).to_owned()); + push_typescript_export( + file_id, + ExportKind::ExportEquals, + local_name.clone(), + local_name, + None, + None, + None, + node.range().into(), + index, + ); + } +} + +fn push_typescript_export_declaration( + file_id: u32, + source: &str, + declaration: Node<'_>, + index: &mut TypeScriptIndex, +) -> Vec { + match declaration.kind() { + "function_declaration" + | "generator_function_declaration" + | "class_declaration" + | "abstract_class_declaration" + | "interface_declaration" + | "type_alias_declaration" + | "enum_declaration" + | "internal_module" => push_typescript_symbol(file_id, source, declaration, index) + .into_iter() + .collect(), + "lexical_declaration" | "variable_declaration" => { + push_typescript_variable_symbols(file_id, source, declaration, index) + } + _ => Vec::new(), + } +} + +fn push_typescript_export_clause( + file_id: u32, + source: &str, + export_statement: Node<'_>, + export_clause: Node<'_>, + source_module: Option, + index: &mut TypeScriptIndex, +) { + let mut cursor = export_clause.walk(); + for specifier in export_clause + .named_children(&mut cursor) + .filter(|child| child.kind() == "export_specifier") + { + let Some(name_node) = specifier.child_by_field_name("name") else { + continue; + }; + let local_name = node_text(source, name_node).to_owned(); + let exported_name = specifier + .child_by_field_name("alias") + .map(|alias| node_text(source, alias).to_owned()) + .unwrap_or_else(|| local_name.clone()); + let import_id = source_module.as_ref().map(|module| { + push_typescript_import( + file_id, + ImportKind::NamedImport, + Some(module.clone()), + Some(local_name.clone()), + Some(exported_name.clone()), + specifier.range().into(), + index, + ) + }); + push_typescript_export( + file_id, + if exported_name == "default" { + ExportKind::Default + } else { + ExportKind::Named + }, + Some(exported_name), + Some(local_name), + source_module.clone(), + None, + import_id, + export_statement.range().into(), + index, + ); + } +} + +fn push_typescript_import( + file_id: u32, + kind: ImportKind, + module: Option, + name: Option, + alias: Option, + range: SourceRange, + index: &mut TypeScriptIndex, +) -> u32 { + let import_id = index.imports.len() as u32; + let module = module.map(|value| index.intern(value)); + let name = name.map(|value| index.intern(value)); + let alias = alias.map(|value| index.intern(value)); + index.imports.push(ImportRecord { + id: import_id, + file_id, + kind, + module, + name, + alias, + range, + }); + import_id +} + +fn push_typescript_export( + file_id: u32, + kind: ExportKind, + name: Option, + local_name: Option, + source_module: Option, + symbol_id: Option, + import_id: Option, + range: SourceRange, + index: &mut TypeScriptIndex, +) { + let name = name.map(|value| index.intern(value)); + let local_name = local_name.map(|value| index.intern(value)); + let source_module = source_module.map(|value| index.intern(value)); + index.exports.push(ExportRecord { + id: index.exports.len() as u32, + file_id, + kind, + name, + local_name, + source_module, + symbol_id, + import_id, + range, + }); +} + +fn collect_typescript_reference_candidates( + file_id: u32, + source: &str, + root: Node<'_>, + index: &TypeScriptIndex, + out: &mut Vec, +) { + let symbol_ranges = index + .symbols + .iter() + .filter(|symbol| symbol.file_id == file_id) + .map(|symbol| (symbol.id, symbol.range)) + .collect::>(); + let excluded_ranges = index + .symbols + .iter() + .filter(|symbol| symbol.file_id == file_id) + .map(|symbol| symbol.name_range) + .collect::>(); + let indexed_local_symbols = IndexedLocalSymbols::from_symbols( + index + .symbols + .iter() + .filter(|symbol| symbol.file_id == file_id), + ); + let (local_bindings_by_symbol_id, local_binding_scopes) = + collect_typescript_local_bindings(file_id, source, root, index, &symbol_ranges); + + collect_typescript_identifier_candidates( + file_id, + source, + root, + &symbol_ranges, + &local_bindings_by_symbol_id, + &local_binding_scopes, + &excluded_ranges, + &indexed_local_symbols, + out, + ); + collect_typescript_type_reference_candidates( + file_id, + source, + root, + &symbol_ranges, + &local_bindings_by_symbol_id, + &local_binding_scopes, + &excluded_ranges, + &indexed_local_symbols, + out, + ); + collect_typescript_heritage_reference_candidates( + file_id, + source, + root, + &symbol_ranges, + &local_bindings_by_symbol_id, + &local_binding_scopes, + &excluded_ranges, + &indexed_local_symbols, + out, + ); +} + +fn collect_typescript_promise_chains( + file_id: u32, + source: &str, + root: Node<'_>, + index: &mut TypeScriptIndex, +) { + let symbol_ranges = index + .symbols + .iter() + .filter(|symbol| symbol.file_id == file_id) + .map(|symbol| (symbol.id, symbol.range)) + .collect::>(); + collect_typescript_promise_chains_from_node(file_id, source, root, &symbol_ranges, index); +} + +fn collect_typescript_promise_chains_from_node( + file_id: u32, + source: &str, + node: Node<'_>, + symbol_ranges: &[(u32, SourceRange)], + index: &mut TypeScriptIndex, +) { + if node.kind() == "call_expression" && !typescript_is_nested_promise_stage_call(source, node) { + if let Some((base_range, stage_names)) = typescript_promise_chain_components(source, node) { + if stage_names.iter().any(|stage| stage == "then") { + let range = SourceRange::from(node.range()); + let source_symbol_id = innermost_symbol_for_range(symbol_ranges, range); + let stage_names = stage_names + .into_iter() + .map(|stage| index.intern(stage)) + .collect::>(); + index.promise_chains.push(PromiseChainRecord { + id: index.promise_chains.len() as u32, + source_file_id: file_id, + source_symbol_id, + stage_names, + range, + base_range, + }); + } + } + } + + let mut cursor = node.walk(); + for child in node.named_children(&mut cursor) { + collect_typescript_promise_chains_from_node(file_id, source, child, symbol_ranges, index); + } +} + +fn typescript_promise_chain_components( + source: &str, + node: Node<'_>, +) -> Option<(SourceRange, Vec)> { + if node.kind() != "call_expression" { + return None; + } + let function = node.child_by_field_name("function")?; + if function.kind() != "member_expression" { + return None; + } + let property = function.child_by_field_name("property")?; + let stage_name = typescript_promise_stage_name(source, property)?; + let object = function.child_by_field_name("object")?; + let (base_range, mut stage_names) = typescript_promise_chain_components(source, object) + .unwrap_or_else(|| (SourceRange::from(object.range()), Vec::new())); + stage_names.push(stage_name); + Some((base_range, stage_names)) +} + +fn typescript_is_nested_promise_stage_call(source: &str, node: Node<'_>) -> bool { + let Some(parent) = node.parent() else { + return false; + }; + if parent.kind() != "member_expression" { + return false; + } + let Some(object) = parent.child_by_field_name("object") else { + return false; + }; + if SourceRange::from(object.range()) != SourceRange::from(node.range()) { + return false; + } + let Some(property) = parent.child_by_field_name("property") else { + return false; + }; + if typescript_promise_stage_name(source, property).is_none() { + return false; + } + parent + .parent() + .is_some_and(|grandparent| grandparent.kind() == "call_expression") +} + +fn typescript_promise_stage_name(source: &str, node: Node<'_>) -> Option { + if !matches!( + node.kind(), + "identifier" | "property_identifier" | "private_property_identifier" + ) { + return None; + } + let name = node.utf8_text(source.as_bytes()).ok()?; + matches!(name, "then" | "catch" | "finally").then(|| name.to_owned()) +} + +fn collect_typescript_local_bindings( + file_id: u32, + source: &str, + root: Node<'_>, + index: &TypeScriptIndex, + symbol_ranges: &[(u32, SourceRange)], +) -> (HashMap>, Vec) { + let mut bindings = HashMap::new(); + let mut scoped_bindings = Vec::new(); + let file_symbols = index + .symbols + .iter() + .filter(|symbol| symbol.file_id == file_id) + .collect::>(); + for symbol in file_symbols { + let mut names = HashSet::new(); + collect_typescript_local_bindings_for_symbol(source, root, symbol.range, &mut names); + names.remove(symbol.name.as_ref()); + if !names.is_empty() { + bindings.insert(symbol.id, names); + } + } + + for (symbol_id, _) in symbol_ranges { + bindings.entry(*symbol_id).or_default(); + } + collect_typescript_scoped_local_bindings_from_node( + source, + root, + symbol_ranges, + &mut scoped_bindings, + ); + (bindings, scoped_bindings) +} + +fn collect_typescript_local_bindings_for_symbol( + source: &str, + node: Node<'_>, + symbol_range: SourceRange, + out: &mut HashSet, +) { + let node_range = node.range().into(); + if !ranges_overlap(symbol_range, node_range) { + return; + } + + match node.kind() { + "import_statement" => return, + "variable_declarator" => { + if contains_range(symbol_range, node_range) + && !typescript_variable_declarator_is_lexical(node) + { + if let Some(name_node) = node.child_by_field_name("name") { + let mut targets = Vec::new(); + collect_typescript_binding_targets(name_node, &mut targets); + for target in targets { + if let Ok(name) = target.utf8_text(source.as_bytes()) { + out.insert(name.to_owned()); + } + } + } + } + } + "required_parameter" | "optional_parameter" | "rest_pattern" => { + if contains_range(symbol_range, node_range) + && typescript_parameter_is_symbol_wide(node, symbol_range) + { + let binding_node = node.child_by_field_name("pattern").or_else(|| { + node.child_by_field_name("name") + .or_else(|| first_named_child(node)) + }); + if let Some(binding_node) = binding_node { + let mut targets = Vec::new(); + collect_typescript_binding_targets(binding_node, &mut targets); + for target in targets { + if let Ok(name) = target.utf8_text(source.as_bytes()) { + out.insert(name.to_owned()); + } + } + } + } + } + _ => {} + } + + let mut cursor = node.walk(); + for child in node.named_children(&mut cursor) { + collect_typescript_local_bindings_for_symbol(source, child, symbol_range, out); + } +} + +fn typescript_variable_declarator_is_lexical(node: Node<'_>) -> bool { + node.parent() + .is_some_and(|parent| parent.kind() == "lexical_declaration") +} + +fn typescript_parameter_is_symbol_wide(node: Node<'_>, symbol_range: SourceRange) -> bool { + let mut current = Some(node); + while let Some(parent) = current { + if is_typescript_function_like(parent) { + return SourceRange::from(parent.range()) == symbol_range; + } + current = parent.parent(); + } + false +} + +fn is_typescript_function_like(node: Node<'_>) -> bool { + matches!( + node.kind(), + "function_declaration" + | "generator_function_declaration" + | "method_definition" + | "function_signature" + | "arrow_function" + | "function_expression" + | "generator_function" + ) +} + +fn collect_typescript_scoped_local_bindings_from_node( + source: &str, + node: Node<'_>, + symbol_ranges: &[(u32, SourceRange)], + scoped_bindings: &mut Vec, +) { + match node.kind() { + "function_declaration" + | "generator_function_declaration" + | "class_declaration" + | "abstract_class_declaration" => { + push_typescript_nested_declaration_binding_scope( + source, + node, + symbol_ranges, + scoped_bindings, + ); + } + "lexical_declaration" => { + push_typescript_lexical_declaration_binding_scope( + source, + node, + symbol_ranges, + scoped_bindings, + ); + } + "for_in_statement" => { + if let Some(left) = node + .child_by_field_name("left") + .or_else(|| first_named_child(node)) + { + let scope = node + .child_by_field_name("body") + .or_else(|| first_child_of_kind(node, &["statement_block"])) + .unwrap_or(node); + push_typescript_local_binding_scope( + source, + left, + scope.range().into(), + symbol_ranges, + scoped_bindings, + ); + } + } + "catch_clause" => { + if let Some(parameter) = node + .child_by_field_name("parameter") + .or_else(|| first_named_child(node)) + { + let scope = node + .child_by_field_name("body") + .or_else(|| first_child_of_kind(node, &["statement_block"])) + .unwrap_or(node); + push_typescript_local_binding_scope( + source, + parameter, + scope.range().into(), + symbol_ranges, + scoped_bindings, + ); + } + } + "arrow_function" | "function_expression" | "generator_function" => { + push_typescript_function_parameter_binding_scope( + source, + node, + symbol_ranges, + scoped_bindings, + ); + } + _ => {} + } + + let mut cursor = node.walk(); + for child in node.named_children(&mut cursor) { + collect_typescript_scoped_local_bindings_from_node( + source, + child, + symbol_ranges, + scoped_bindings, + ); + } +} + +fn push_typescript_lexical_declaration_binding_scope( + source: &str, + node: Node<'_>, + symbol_ranges: &[(u32, SourceRange)], + scoped_bindings: &mut Vec, +) { + let node_range = SourceRange::from(node.range()); + let Some((source_symbol_id, symbol_range)) = + innermost_symbol_range_for_range(symbol_ranges, node_range) + else { + return; + }; + let mut targets = Vec::new(); + collect_typescript_binding_targets(node, &mut targets); + let Some(first_target) = targets.iter().min_by_key(|target| target.start_byte()) else { + return; + }; + let scope_end = typescript_lexical_declaration_scope_end(node).unwrap_or(symbol_range); + let scope_range = SourceRange { + start_byte: first_target.start_byte(), + end_byte: scope_end.end_byte, + start_row: first_target.start_position().row, + start_column: first_target.start_position().column, + end_row: scope_end.end_row, + end_column: scope_end.end_column, + }; + let mut names = HashSet::new(); + for target in targets { + if let Ok(name) = target.utf8_text(source.as_bytes()) { + names.insert(name.to_owned()); + } + } + if !names.is_empty() { + scoped_bindings.push(LocalBindingScope { + source_symbol_id, + range: scope_range, + names, + }); + } +} + +fn typescript_lexical_declaration_scope_end(node: Node<'_>) -> Option { + let mut current = node.parent(); + while let Some(parent) = current { + if matches!( + parent.kind(), + "for_statement" | "for_in_statement" | "statement_block" | "switch_body" + ) { + return Some(parent.range().into()); + } + current = parent.parent(); + } + None +} + +fn push_typescript_nested_declaration_binding_scope( + source: &str, + node: Node<'_>, + symbol_ranges: &[(u32, SourceRange)], + scoped_bindings: &mut Vec, +) { + let node_range = SourceRange::from(node.range()); + let Some((source_symbol_id, symbol_range)) = + innermost_symbol_range_for_range(symbol_ranges, node_range) + else { + return; + }; + if node_range == symbol_range { + return; + } + let Some(name) = node.child_by_field_name("name") else { + return; + }; + let scope_range = SourceRange { + start_byte: name.start_byte(), + end_byte: symbol_range.end_byte, + start_row: name.start_position().row, + start_column: name.start_position().column, + end_row: symbol_range.end_row, + end_column: symbol_range.end_column, + }; + let mut names = HashSet::new(); + if let Ok(name) = name.utf8_text(source.as_bytes()) { + names.insert(name.to_owned()); + } + if !names.is_empty() { + scoped_bindings.push(LocalBindingScope { + source_symbol_id, + range: scope_range, + names, + }); + } +} + +fn push_typescript_function_parameter_binding_scope( + source: &str, + node: Node<'_>, + symbol_ranges: &[(u32, SourceRange)], + scoped_bindings: &mut Vec, +) { + let Some(parameters) = node + .child_by_field_name("parameters") + .or_else(|| first_child_of_kind(node, &["formal_parameters"])) + .or_else(|| first_named_child(node)) + else { + return; + }; + let Some(body) = typescript_function_body_node(node) else { + return; + }; + push_typescript_local_binding_scope( + source, + parameters, + body.range().into(), + symbol_ranges, + scoped_bindings, + ); +} + +fn typescript_function_body_node(node: Node<'_>) -> Option> { + node.child_by_field_name("body") + .or_else(|| first_child_of_kind(node, &["statement_block"])) + .or_else(|| { + let mut cursor = node.walk(); + node.named_children(&mut cursor) + .filter(|child| child.kind() != "formal_parameters") + .last() + }) +} + +fn push_typescript_local_binding_scope( + source: &str, + binding_root: Node<'_>, + scope_range: SourceRange, + symbol_ranges: &[(u32, SourceRange)], + scoped_bindings: &mut Vec, +) { + let Some(source_symbol_id) = innermost_symbol_for_range(symbol_ranges, scope_range) else { + return; + }; + let mut targets = Vec::new(); + collect_typescript_binding_targets(binding_root, &mut targets); + let mut names = HashSet::new(); + for target in targets { + if let Ok(name) = target.utf8_text(source.as_bytes()) { + names.insert(name.to_owned()); + } + } + if !names.is_empty() { + scoped_bindings.push(LocalBindingScope { + source_symbol_id, + range: binding_root.range().into(), + names: names.clone(), + }); + scoped_bindings.push(LocalBindingScope { + source_symbol_id, + range: scope_range, + names, + }); + } +} + +fn collect_typescript_identifier_candidates( + file_id: u32, + source: &str, + node: Node<'_>, + symbol_ranges: &[(u32, SourceRange)], + local_bindings_by_symbol_id: &HashMap>, + local_binding_scopes: &[LocalBindingScope], + excluded_ranges: &[SourceRange], + indexed_local_symbols: &IndexedLocalSymbols, + out: &mut Vec, +) { + match node.kind() { + "import_statement" + | "export_clause" + | "namespace_export" + | "extends_clause" + | "implements_clause" + | "extends_type_clause" => return, + "export_statement" if node_text(source, node).trim_start().starts_with("export =") => { + let call_value = node + .child_by_field_name("value") + .filter(|value| value.kind() == "call_expression") + .or_else(|| first_child_of_kind(node, &["call_expression"])); + if let Some(call_value) = call_value { + collect_typescript_identifier_candidates( + file_id, + source, + call_value, + symbol_ranges, + local_bindings_by_symbol_id, + local_binding_scopes, + excluded_ranges, + indexed_local_symbols, + out, + ); + } + return; + } + "call_expression" => { + let function_node = node.child_by_field_name("function"); + if let Some(function_node) = function_node { + push_typescript_call_reference_candidate( + file_id, + source, + node, + function_node, + symbol_ranges, + local_bindings_by_symbol_id, + local_binding_scopes, + excluded_ranges, + indexed_local_symbols, + out, + ); + collect_typescript_call_function_operands( + file_id, + source, + function_node, + symbol_ranges, + local_bindings_by_symbol_id, + local_binding_scopes, + excluded_ranges, + indexed_local_symbols, + out, + ); + } + + let function_range = function_node.map(|function| SourceRange::from(function.range())); + let mut cursor = node.walk(); + for child in node.named_children(&mut cursor) { + if function_range.is_some_and(|range| SourceRange::from(child.range()) == range) { + continue; + } + collect_typescript_identifier_candidates( + file_id, + source, + child, + symbol_ranges, + local_bindings_by_symbol_id, + local_binding_scopes, + excluded_ranges, + indexed_local_symbols, + out, + ); + } + return; + } + "member_expression" => { + if let (Some(object), Some(property)) = ( + node.child_by_field_name("object"), + node.child_by_field_name("property"), + ) { + let range = property.range().into(); + if matches!(property.kind(), "identifier" | "property_identifier") + && !range_matches_any(range, excluded_ranges) + { + if let (Ok(qualifier), Ok(name)) = ( + object.utf8_text(source.as_bytes()), + property.utf8_text(source.as_bytes()), + ) { + let source_symbol_id = innermost_symbol_for_range(symbol_ranges, range); + if !typescript_reference_is_shadowed( + source_symbol_id, + qualifier.split('.').next().unwrap_or(qualifier), + range, + local_bindings_by_symbol_id, + local_binding_scopes, + indexed_local_symbols, + ) { + out.push(ReferenceCandidate { + source_file_id: file_id, + source_symbol_id, + name: name.to_owned(), + qualifier: Some(qualifier.to_owned()), + range, + is_subclass: false, + call_range: None, + }); + } + } + } + } + if let Some(object) = node.child_by_field_name("object") { + collect_typescript_identifier_candidates( + file_id, + source, + object, + symbol_ranges, + local_bindings_by_symbol_id, + local_binding_scopes, + excluded_ranges, + indexed_local_symbols, + out, + ); + } + return; + } + _ => {} + } + + let range = node.range().into(); + if node.kind() == "identifier" && !range_matches_any(range, excluded_ranges) { + if let Ok(name) = node.utf8_text(source.as_bytes()) { + let source_symbol_id = innermost_symbol_for_range(symbol_ranges, range); + if !typescript_reference_is_shadowed( + source_symbol_id, + name, + range, + local_bindings_by_symbol_id, + local_binding_scopes, + indexed_local_symbols, + ) { + out.push(ReferenceCandidate { + source_file_id: file_id, + source_symbol_id, + name: name.to_owned(), + qualifier: None, + range, + is_subclass: false, + call_range: None, + }); + } + } + } + + let mut cursor = node.walk(); + for child in node.named_children(&mut cursor) { + collect_typescript_identifier_candidates( + file_id, + source, + child, + symbol_ranges, + local_bindings_by_symbol_id, + local_binding_scopes, + excluded_ranges, + indexed_local_symbols, + out, + ); + } +} + +fn collect_typescript_call_function_operands( + file_id: u32, + source: &str, + function_node: Node<'_>, + symbol_ranges: &[(u32, SourceRange)], + local_bindings_by_symbol_id: &HashMap>, + local_binding_scopes: &[LocalBindingScope], + excluded_ranges: &[SourceRange], + indexed_local_symbols: &IndexedLocalSymbols, + out: &mut Vec, +) { + match function_node.kind() { + "member_expression" => { + if let Some(object) = function_node.child_by_field_name("object") { + collect_typescript_identifier_candidates( + file_id, + source, + object, + symbol_ranges, + local_bindings_by_symbol_id, + local_binding_scopes, + excluded_ranges, + indexed_local_symbols, + out, + ); + } + } + "call_expression" => { + collect_typescript_identifier_candidates( + file_id, + source, + function_node, + symbol_ranges, + local_bindings_by_symbol_id, + local_binding_scopes, + excluded_ranges, + indexed_local_symbols, + out, + ); + } + "identifier" => {} + _ => { + let mut cursor = function_node.walk(); + for child in function_node.named_children(&mut cursor) { + collect_typescript_identifier_candidates( + file_id, + source, + child, + symbol_ranges, + local_bindings_by_symbol_id, + local_binding_scopes, + excluded_ranges, + indexed_local_symbols, + out, + ); + } + } + } +} + +fn push_typescript_call_reference_candidate( + file_id: u32, + source: &str, + call_node: Node<'_>, + function_node: Node<'_>, + symbol_ranges: &[(u32, SourceRange)], + local_bindings_by_symbol_id: &HashMap>, + local_binding_scopes: &[LocalBindingScope], + excluded_ranges: &[SourceRange], + indexed_local_symbols: &IndexedLocalSymbols, + out: &mut Vec, +) { + match function_node.kind() { + "identifier" | "property_identifier" | "private_property_identifier" => { + push_typescript_named_call_reference_candidate( + file_id, + source, + call_node.range().into(), + function_node, + None, + function_node, + symbol_ranges, + local_bindings_by_symbol_id, + local_binding_scopes, + excluded_ranges, + indexed_local_symbols, + out, + ); + } + "member_expression" => { + let (Some(object), Some(property)) = ( + function_node.child_by_field_name("object"), + function_node.child_by_field_name("property"), + ) else { + return; + }; + if !matches!( + property.kind(), + "identifier" | "property_identifier" | "private_property_identifier" + ) { + return; + } + push_typescript_named_call_reference_candidate( + file_id, + source, + call_node.range().into(), + property, + Some(object), + property, + symbol_ranges, + local_bindings_by_symbol_id, + local_binding_scopes, + excluded_ranges, + indexed_local_symbols, + out, + ); + } + _ => {} + } +} + +fn push_typescript_named_call_reference_candidate( + file_id: u32, + source: &str, + call_range: SourceRange, + name_node: Node<'_>, + qualifier_node: Option>, + shadow_name_node: Node<'_>, + symbol_ranges: &[(u32, SourceRange)], + local_bindings_by_symbol_id: &HashMap>, + local_binding_scopes: &[LocalBindingScope], + excluded_ranges: &[SourceRange], + indexed_local_symbols: &IndexedLocalSymbols, + out: &mut Vec, +) { + let name_range = SourceRange::from(name_node.range()); + if range_matches_any(name_range, excluded_ranges) { + return; + } + let Ok(name) = name_node.utf8_text(source.as_bytes()) else { + return; + }; + let qualifier = qualifier_node.and_then(|qualifier_node| { + qualifier_node + .utf8_text(source.as_bytes()) + .ok() + .map(|qualifier| qualifier.to_owned()) + }); + let source_symbol_id = innermost_symbol_for_range(symbol_ranges, name_range); + let shadow_name = qualifier + .as_deref() + .map(|qualifier| qualifier.split('.').next().unwrap_or(qualifier)) + .unwrap_or(name); + let shadow_range = qualifier_node + .map(|qualifier_node| SourceRange::from(qualifier_node.range())) + .unwrap_or_else(|| SourceRange::from(shadow_name_node.range())); + if typescript_reference_is_shadowed( + source_symbol_id, + shadow_name, + shadow_range, + local_bindings_by_symbol_id, + local_binding_scopes, + indexed_local_symbols, + ) { + return; + } + out.push(ReferenceCandidate { + source_file_id: file_id, + source_symbol_id, + name: name.to_owned(), + qualifier, + range: name_range, + is_subclass: false, + call_range: Some(call_range), + }); +} + +fn collect_typescript_type_reference_candidates( + file_id: u32, + source: &str, + node: Node<'_>, + symbol_ranges: &[(u32, SourceRange)], + local_bindings_by_symbol_id: &HashMap>, + local_binding_scopes: &[LocalBindingScope], + excluded_ranges: &[SourceRange], + indexed_local_symbols: &IndexedLocalSymbols, + out: &mut Vec, +) { + match node.kind() { + "import_statement" + | "export_clause" + | "namespace_export" + | "extends_clause" + | "implements_clause" + | "extends_type_clause" => return, + "type_identifier" => { + push_typescript_type_reference_candidate( + file_id, + source, + node, + symbol_ranges, + local_bindings_by_symbol_id, + local_binding_scopes, + excluded_ranges, + indexed_local_symbols, + out, + ); + return; + } + "nested_type_identifier" => { + push_typescript_nested_type_reference_candidate( + file_id, + source, + node, + symbol_ranges, + local_bindings_by_symbol_id, + local_binding_scopes, + excluded_ranges, + indexed_local_symbols, + out, + false, + ); + return; + } + "type_parameter" => { + let name_range = node + .child_by_field_name("name") + .map(|name| SourceRange::from(name.range())); + let mut cursor = node.walk(); + for child in node.named_children(&mut cursor) { + if name_range.is_some_and(|range| SourceRange::from(child.range()) == range) { + continue; + } + collect_typescript_type_reference_candidates( + file_id, + source, + child, + symbol_ranges, + local_bindings_by_symbol_id, + local_binding_scopes, + excluded_ranges, + indexed_local_symbols, + out, + ); + } + return; + } + _ => {} + } + + let mut cursor = node.walk(); + for child in node.named_children(&mut cursor) { + collect_typescript_type_reference_candidates( + file_id, + source, + child, + symbol_ranges, + local_bindings_by_symbol_id, + local_binding_scopes, + excluded_ranges, + indexed_local_symbols, + out, + ); + } +} + +fn push_typescript_type_reference_candidate( + file_id: u32, + source: &str, + node: Node<'_>, + symbol_ranges: &[(u32, SourceRange)], + local_bindings_by_symbol_id: &HashMap>, + local_binding_scopes: &[LocalBindingScope], + excluded_ranges: &[SourceRange], + indexed_local_symbols: &IndexedLocalSymbols, + out: &mut Vec, +) { + let range = SourceRange::from(node.range()); + if range_matches_any(range, excluded_ranges) { + return; + } + let Ok(name) = node.utf8_text(source.as_bytes()) else { + return; + }; + let source_symbol_id = innermost_symbol_for_range(symbol_ranges, range); + if !typescript_reference_is_shadowed( + source_symbol_id, + name, + range, + local_bindings_by_symbol_id, + local_binding_scopes, + indexed_local_symbols, + ) { + out.push(ReferenceCandidate { + source_file_id: file_id, + source_symbol_id, + name: name.to_owned(), + qualifier: None, + range, + is_subclass: false, + call_range: None, + }); + } +} + +fn push_typescript_nested_type_reference_candidate( + file_id: u32, + source: &str, + node: Node<'_>, + symbol_ranges: &[(u32, SourceRange)], + local_bindings_by_symbol_id: &HashMap>, + local_binding_scopes: &[LocalBindingScope], + excluded_ranges: &[SourceRange], + indexed_local_symbols: &IndexedLocalSymbols, + out: &mut Vec, + is_subclass: bool, +) { + let Some(name_node) = node.child_by_field_name("name") else { + return; + }; + let Some(module_node) = node.child_by_field_name("module") else { + return; + }; + let range = SourceRange::from(name_node.range()); + if range_matches_any(range, excluded_ranges) { + return; + } + let Ok(name) = name_node.utf8_text(source.as_bytes()) else { + return; + }; + let Ok(module) = module_node.utf8_text(source.as_bytes()) else { + return; + }; + let qualifier = module.split('.').next().unwrap_or(module); + let source_symbol_id = innermost_symbol_for_range(symbol_ranges, range); + if !typescript_reference_is_shadowed( + source_symbol_id, + qualifier, + range, + local_bindings_by_symbol_id, + local_binding_scopes, + indexed_local_symbols, + ) { + out.push(ReferenceCandidate { + source_file_id: file_id, + source_symbol_id, + name: name.to_owned(), + qualifier: Some(qualifier.to_owned()), + range, + is_subclass, + call_range: None, + }); + } +} + +fn collect_typescript_heritage_reference_candidates( + file_id: u32, + source: &str, + node: Node<'_>, + symbol_ranges: &[(u32, SourceRange)], + local_bindings_by_symbol_id: &HashMap>, + local_binding_scopes: &[LocalBindingScope], + excluded_ranges: &[SourceRange], + indexed_local_symbols: &IndexedLocalSymbols, + out: &mut Vec, +) { + if node.kind() == "extends_clause" { + let mut cursor = node.walk(); + for child in node.named_children(&mut cursor) { + if child.kind() == "type_arguments" { + continue; + } + push_typescript_heritage_expression_reference_candidate( + file_id, + source, + child, + symbol_ranges, + local_bindings_by_symbol_id, + local_binding_scopes, + excluded_ranges, + indexed_local_symbols, + out, + ); + break; + } + return; + } + + if matches!(node.kind(), "implements_clause" | "extends_type_clause") { + let mut cursor = node.walk(); + for child in node.named_children(&mut cursor) { + push_typescript_heritage_type_reference_candidate( + file_id, + source, + child, + symbol_ranges, + local_bindings_by_symbol_id, + local_binding_scopes, + excluded_ranges, + indexed_local_symbols, + out, + ); + } + return; + } + + let mut cursor = node.walk(); + for child in node.named_children(&mut cursor) { + collect_typescript_heritage_reference_candidates( + file_id, + source, + child, + symbol_ranges, + local_bindings_by_symbol_id, + local_binding_scopes, + excluded_ranges, + indexed_local_symbols, + out, + ); + } +} + +fn push_typescript_heritage_expression_reference_candidate( + file_id: u32, + source: &str, + node: Node<'_>, + symbol_ranges: &[(u32, SourceRange)], + local_bindings_by_symbol_id: &HashMap>, + local_binding_scopes: &[LocalBindingScope], + excluded_ranges: &[SourceRange], + indexed_local_symbols: &IndexedLocalSymbols, + out: &mut Vec, +) { + match node.kind() { + "identifier" => { + let range = SourceRange::from(node.range()); + if range_matches_any(range, excluded_ranges) { + return; + } + let Ok(name) = node.utf8_text(source.as_bytes()) else { + return; + }; + let source_symbol_id = innermost_symbol_for_range(symbol_ranges, range); + if !typescript_reference_is_shadowed( + source_symbol_id, + name, + range, + local_bindings_by_symbol_id, + local_binding_scopes, + indexed_local_symbols, + ) { + out.push(ReferenceCandidate { + source_file_id: file_id, + source_symbol_id, + name: name.to_owned(), + qualifier: None, + range, + is_subclass: true, + call_range: None, + }); + } + } + "member_expression" => { + let (Some(object), Some(property)) = ( + node.child_by_field_name("object"), + node.child_by_field_name("property"), + ) else { + return; + }; + let range = SourceRange::from(property.range()); + if range_matches_any(range, excluded_ranges) { + return; + } + let Ok(name) = property.utf8_text(source.as_bytes()) else { + return; + }; + let Ok(qualifier) = object.utf8_text(source.as_bytes()) else { + return; + }; + let qualifier = qualifier.split('.').next().unwrap_or(qualifier); + let source_symbol_id = innermost_symbol_for_range(symbol_ranges, range); + if !typescript_reference_is_shadowed( + source_symbol_id, + qualifier, + range, + local_bindings_by_symbol_id, + local_binding_scopes, + indexed_local_symbols, + ) { + out.push(ReferenceCandidate { + source_file_id: file_id, + source_symbol_id, + name: name.to_owned(), + qualifier: Some(qualifier.to_owned()), + range, + is_subclass: true, + call_range: None, + }); + } + } + _ => {} + } +} + +fn push_typescript_heritage_type_reference_candidate( + file_id: u32, + source: &str, + node: Node<'_>, + symbol_ranges: &[(u32, SourceRange)], + local_bindings_by_symbol_id: &HashMap>, + local_binding_scopes: &[LocalBindingScope], + excluded_ranges: &[SourceRange], + indexed_local_symbols: &IndexedLocalSymbols, + out: &mut Vec, +) { + match node.kind() { + "type_identifier" => { + let range = SourceRange::from(node.range()); + if range_matches_any(range, excluded_ranges) { + return; + } + let Ok(name) = node.utf8_text(source.as_bytes()) else { + return; + }; + let source_symbol_id = innermost_symbol_for_range(symbol_ranges, range); + if !typescript_reference_is_shadowed( + source_symbol_id, + name, + range, + local_bindings_by_symbol_id, + local_binding_scopes, + indexed_local_symbols, + ) { + out.push(ReferenceCandidate { + source_file_id: file_id, + source_symbol_id, + name: name.to_owned(), + qualifier: None, + range, + is_subclass: true, + call_range: None, + }); + } + } + "generic_type" => { + if let Some(name) = node.child_by_field_name("name") { + push_typescript_heritage_type_reference_candidate( + file_id, + source, + name, + symbol_ranges, + local_bindings_by_symbol_id, + local_binding_scopes, + excluded_ranges, + indexed_local_symbols, + out, + ); + } + } + "nested_type_identifier" => { + let Some(name_node) = node.child_by_field_name("name") else { + return; + }; + let Some(module_node) = node.child_by_field_name("module") else { + return; + }; + let range = SourceRange::from(name_node.range()); + if range_matches_any(range, excluded_ranges) { + return; + } + let Ok(name) = name_node.utf8_text(source.as_bytes()) else { + return; + }; + let Ok(module) = module_node.utf8_text(source.as_bytes()) else { + return; + }; + let qualifier = module.split('.').next().unwrap_or(module); + let source_symbol_id = innermost_symbol_for_range(symbol_ranges, range); + if !typescript_reference_is_shadowed( + source_symbol_id, + qualifier, + range, + local_bindings_by_symbol_id, + local_binding_scopes, + indexed_local_symbols, + ) { + out.push(ReferenceCandidate { + source_file_id: file_id, + source_symbol_id, + name: name.to_owned(), + qualifier: Some(qualifier.to_owned()), + range, + is_subclass: true, + call_range: None, + }); + } + } + _ => {} + } +} + +fn syntax_error_count(node: Node<'_>) -> usize { + let mut count = usize::from(node.is_error() || node.is_missing()); + if !node.has_error() { + return count; + } + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + count += syntax_error_count(child); + } + count +} + +fn ranges_overlap(left: SourceRange, right: SourceRange) -> bool { + left.start_byte < right.end_byte && right.start_byte < left.end_byte +} + +fn typescript_reference_is_shadowed( + source_symbol_id: Option, + name: &str, + range: SourceRange, + local_bindings_by_symbol_id: &HashMap>, + local_binding_scopes: &[LocalBindingScope], + indexed_local_symbols: &IndexedLocalSymbols, +) -> bool { + if typescript_indexed_local_symbol_for_reference( + source_symbol_id, + name, + range, + indexed_local_symbols, + ) + .is_some() + { + return false; + } + is_shadowed_local_binding( + source_symbol_id, + name, + range, + local_bindings_by_symbol_id, + local_binding_scopes, + ) +} + +fn typescript_indexed_local_symbol_for_reference( + source_symbol_id: Option, + name: &str, + range: SourceRange, + indexed_local_symbols: &IndexedLocalSymbols, +) -> Option { + let owner_symbol_id = + typescript_local_scope_owner_symbol_id(source_symbol_id, indexed_local_symbols)?; + indexed_local_symbols + .symbols_by_parent_and_name + .get(&(owner_symbol_id, name.to_owned()))? + .iter() + .rev() + .find(|symbol| { + symbol.name_range.start_byte < range.start_byte + && !range_matches_any(range, &[symbol.name_range]) + }) + .map(|symbol| symbol.id) +} + +fn typescript_local_scope_owner_symbol_id( + source_symbol_id: Option, + indexed_local_symbols: &IndexedLocalSymbols, +) -> Option { + let source_symbol_id = source_symbol_id?; + indexed_local_symbols + .parent_symbol_by_id + .get(&source_symbol_id) + .copied() + .or(Some(source_symbol_id)) +} + +fn resolve_typescript_imports(index: &mut TypeScriptIndex, ts_configs: &[TypeScriptConfig]) { + let file_by_path: HashMap = index + .files + .iter() + .map(|file| (file.path.to_string(), file.id)) + .collect(); + let symbol_by_file_and_name: HashMap<(u32, String), u32> = index + .symbols + .iter() + .filter(|symbol| symbol.is_top_level) + .map(|symbol| ((symbol.file_id, symbol.name.to_string()), symbol.id)) + .collect(); + let mut resolutions = Vec::new(); + for import in &index.imports { + let Some(module) = import.module.as_deref() else { + continue; + }; + let Some(source_file) = index.files.get(import.file_id as usize) else { + continue; + }; + let target_file_id = if module.starts_with('.') { + resolve_typescript_relative_module(source_file, module, &file_by_path) + } else { + resolve_typescript_config_module(source_file, module, &file_by_path, ts_configs) + }; + let Some(target_file_id) = target_file_id else { + continue; + }; + resolutions.push(ImportResolutionRecord { + id: resolutions.len() as u32, + import_id: import.id, + source_file_id: import.file_id, + target_file_id, + target_symbol_id: None, + }); + } + resolve_typescript_import_symbols(index, &symbol_by_file_and_name, &mut resolutions); + index.import_resolutions = resolutions; + index.external_modules = build_external_modules(&index.imports, &index.import_resolutions); +} + +fn build_external_modules( + imports: &[ImportRecord], + import_resolutions: &[ImportResolutionRecord], +) -> Vec { + let resolved_import_ids: HashSet = import_resolutions + .iter() + .map(|resolution| resolution.import_id) + .collect(); + imports + .iter() + .filter(|import| !resolved_import_ids.contains(&import.id)) + .filter(|import| import_is_external_candidate(import)) + .filter_map(|import| { + Some(ExternalModuleRecord { + id: 0, + import_id: import.id, + file_id: import.file_id, + module: import.module.clone(), + name: external_module_name(import)?, + alias: import.alias.clone(), + range: import.range, + }) + }) + .enumerate() + .map(|(id, mut record)| { + record.id = id as u32; + record + }) + .collect() +} + +fn import_is_external_candidate(import: &ImportRecord) -> bool { + if import.kind == ImportKind::FutureImport { + return false; + } + if let Some(module) = import.module.as_deref() { + return !module.starts_with('.'); + } + import + .name + .as_deref() + .is_some_and(|name| !name.starts_with('.')) +} + +fn external_module_name(import: &ImportRecord) -> Option { + import.name.clone().or_else(|| import.module.clone()) +} + +fn typescript_local_exported_symbol_map( + index: &TypeScriptIndex, + symbol_by_file_and_name: &HashMap<(u32, String), u32>, +) -> HashMap<(u32, String), u32> { + let mut exported_symbols = HashMap::new(); + for export in &index.exports { + if export.source_module.is_some() { + continue; + } + let Some(name) = export.name.as_deref() else { + continue; + }; + let symbol_id = export.symbol_id.or_else(|| { + export.local_name.as_deref().and_then(|local_name| { + symbol_by_file_and_name + .get(&(export.file_id, local_name.to_owned())) + .copied() + }) + }); + if let Some(symbol_id) = symbol_id { + exported_symbols.insert((export.file_id, name.to_owned()), symbol_id); + if export.kind == ExportKind::ExportEquals { + exported_symbols.insert((export.file_id, "default".to_owned()), symbol_id); + } + } + } + exported_symbols +} + +fn resolve_typescript_import_symbols( + index: &TypeScriptIndex, + symbol_by_file_and_name: &HashMap<(u32, String), u32>, + resolutions: &mut [ImportResolutionRecord], +) { + let local_exported_symbols = + typescript_local_exported_symbol_map(index, symbol_by_file_and_name); + let import_by_id: HashMap = index + .imports + .iter() + .map(|import| (import.id, import)) + .collect(); + let mut exported_symbol_by_file_and_name = local_exported_symbols.clone(); + + for _ in 0..=index.exports.len() { + let mut changed = false; + for resolution in resolutions.iter_mut() { + let Some(import) = import_by_id.get(&resolution.import_id) else { + continue; + }; + let target_symbol_id = resolve_typescript_import_symbol( + import, + resolution.target_file_id, + &exported_symbol_by_file_and_name, + symbol_by_file_and_name, + ); + if resolution.target_symbol_id != target_symbol_id { + resolution.target_symbol_id = target_symbol_id; + changed = true; + } + } + + let next_exported_symbol_by_file_and_name = + typescript_resolved_exported_symbol_map(index, &local_exported_symbols, resolutions); + if next_exported_symbol_by_file_and_name != exported_symbol_by_file_and_name { + exported_symbol_by_file_and_name = next_exported_symbol_by_file_and_name; + changed = true; + } + if !changed { + break; + } + } +} + +fn typescript_resolved_exported_symbol_map( + index: &TypeScriptIndex, + local_exported_symbols: &HashMap<(u32, String), u32>, + resolutions: &[ImportResolutionRecord], +) -> HashMap<(u32, String), u32> { + let mut exported_symbols = local_exported_symbols.clone(); + for _ in 0..=index.exports.len() { + let next_exported_symbols = typescript_exported_symbol_map( + index, + local_exported_symbols, + &exported_symbols, + resolutions, + ); + if next_exported_symbols == exported_symbols { + break; + } + exported_symbols = next_exported_symbols; + } + exported_symbols +} + +fn typescript_exported_symbol_map( + index: &TypeScriptIndex, + local_exported_symbols: &HashMap<(u32, String), u32>, + previous_exported_symbols: &HashMap<(u32, String), u32>, + resolutions: &[ImportResolutionRecord], +) -> HashMap<(u32, String), u32> { + let resolution_by_import_id: HashMap = resolutions + .iter() + .map(|resolution| (resolution.import_id, resolution)) + .collect(); + let mut exported_symbols = local_exported_symbols.clone(); + + for export in &index.exports { + let Some(import_id) = export.import_id else { + continue; + }; + let Some(resolution) = resolution_by_import_id.get(&import_id) else { + continue; + }; + match export.kind { + ExportKind::Named | ExportKind::Default | ExportKind::ExportEquals => { + let Some(name) = export.name.as_deref() else { + continue; + }; + if let Some(symbol_id) = resolution.target_symbol_id { + exported_symbols.insert((export.file_id, name.to_owned()), symbol_id); + } + } + ExportKind::Wildcard => { + for ((file_id, name), symbol_id) in previous_exported_symbols { + if *file_id == resolution.target_file_id && name != "default" { + exported_symbols.insert((export.file_id, name.clone()), *symbol_id); + } + } + } + ExportKind::Namespace => {} + } + } + + exported_symbols +} + +fn typescript_namespace_export_file_map( + index: &TypeScriptIndex, + resolutions: &[ImportResolutionRecord], +) -> HashMap<(u32, String), u32> { + let resolution_by_import_id: HashMap = resolutions + .iter() + .map(|resolution| (resolution.import_id, resolution)) + .collect(); + let mut namespace_exports = HashMap::new(); + for export in &index.exports { + if export.kind != ExportKind::Namespace { + continue; + } + let Some(name) = export.name.as_deref() else { + continue; + }; + let Some(import_id) = export.import_id else { + continue; + }; + let Some(resolution) = resolution_by_import_id.get(&import_id) else { + continue; + }; + namespace_exports.insert((export.file_id, name.to_owned()), resolution.target_file_id); + } + namespace_exports +} + +fn resolve_typescript_import_symbol( + import: &ImportRecord, + target_file_id: u32, + exported_symbol_by_file_and_name: &HashMap<(u32, String), u32>, + symbol_by_file_and_name: &HashMap<(u32, String), u32>, +) -> Option { + let export_name = match import.kind { + ImportKind::DefaultImport => "default", + ImportKind::NamedImport => import.name.as_deref()?, + ImportKind::Import + | ImportKind::FromImport + | ImportKind::FutureImport + | ImportKind::SideEffect + | ImportKind::NamespaceImport + | ImportKind::DynamicImport => return None, + }; + exported_symbol_by_file_and_name + .get(&(target_file_id, export_name.to_owned())) + .copied() + .or_else(|| { + symbol_by_file_and_name + .get(&(target_file_id, export_name.to_owned())) + .copied() + }) +} + +fn resolve_typescript_relative_module( + source_file: &FileRecord, + module: &str, + file_by_path: &HashMap, +) -> Option { + let base = normalize_typescript_relative_module(&source_file.path, module)?; + for candidate in typescript_module_candidates(&base) { + if let Some(file_id) = file_by_path.get(candidate.as_str()).copied() { + return Some(file_id); + } + } + None +} + +fn normalize_typescript_relative_module(source_path: &str, module: &str) -> Option { + let source_dir = source_path + .rsplit_once('/') + .map(|(dir, _)| dir) + .unwrap_or(""); + let raw_path = if source_dir.is_empty() { + module.to_owned() + } else { + format!("{source_dir}/{module}") + }; + let mut parts = Vec::new(); + for part in raw_path.split('/') { + match part { + "" | "." => {} + ".." => { + parts.pop()?; + } + _ => parts.push(part), + } + } + Some(parts.join("/")) +} + +fn resolve_typescript_config_module( + source_file: &FileRecord, + module: &str, + file_by_path: &HashMap, + ts_configs: &[TypeScriptConfig], +) -> Option { + let config = typescript_config_for_file(&source_file.path, ts_configs)?; + + for mapping in &config.paths { + let Some(target) = mapping.apply(module) else { + continue; + }; + let Some(base) = normalize_typescript_config_path(&config.path_base, &target) else { + continue; + }; + if let Some(file_id) = resolve_typescript_module_base(&base, file_by_path) { + return Some(file_id); + } + } + + let base_url = config.base_url.as_deref()?; + let base = normalize_typescript_config_path(base_url, module)?; + resolve_typescript_module_base(&base, file_by_path) +} + +fn resolve_typescript_module_base(base: &str, file_by_path: &HashMap) -> Option { + for candidate in typescript_module_candidates(base) { + if let Some(file_id) = file_by_path.get(candidate.as_str()).copied() { + return Some(file_id); + } + } + None +} + +fn typescript_config_for_file<'a>( + file_path: &str, + ts_configs: &'a [TypeScriptConfig], +) -> Option<&'a TypeScriptConfig> { + ts_configs + .iter() + .filter(|config| typescript_file_is_under_config(file_path, &config.dir)) + .max_by_key(|config| config.dir.len()) +} + +fn typescript_file_is_under_config(file_path: &str, config_dir: &str) -> bool { + config_dir.is_empty() + || file_path == config_dir + || file_path + .strip_prefix(config_dir) + .is_some_and(|rest| rest.starts_with('/')) +} + +fn typescript_module_candidates(base: &str) -> Vec { + const EXTENSIONS: &[&str] = &["ts", "tsx", "d.ts", "js", "jsx"]; + let mut candidates = vec![base.to_owned()]; + if !has_typescript_module_suffix(base, EXTENSIONS) { + candidates.extend( + EXTENSIONS + .iter() + .map(|extension| format!("{base}.{extension}")), + ); + } + candidates.extend( + EXTENSIONS + .iter() + .map(|extension| format!("{base}/index.{extension}")), + ); + candidates +} + +fn has_typescript_module_suffix(path: &str, suffixes: &[&str]) -> bool { + suffixes.iter().any(|suffix| { + path.strip_suffix(suffix) + .is_some_and(|prefix| prefix.ends_with('.')) + }) +} + +fn resolve_typescript_references(index: &mut TypeScriptIndex, candidates: Vec) { + let mut strings = std::mem::take(&mut index.strings); + let symbol_by_file_and_name: HashMap<(u32, String), u32> = index + .symbols + .iter() + .filter(|symbol| symbol.is_top_level) + .map(|symbol| ((symbol.file_id, symbol.name.to_string()), symbol.id)) + .collect(); + let local_exported_symbols = + typescript_local_exported_symbol_map(index, &symbol_by_file_and_name); + let exported_symbol_by_file_and_name = typescript_resolved_exported_symbol_map( + index, + &local_exported_symbols, + &index.import_resolutions, + ); + let namespace_export_file_by_file_and_name = + typescript_namespace_export_file_map(index, &index.import_resolutions); + let resolution_by_import_id: HashMap = index + .import_resolutions + .iter() + .map(|resolution| (resolution.import_id, resolution)) + .collect(); + let mut imported_symbol_by_binding: HashMap<(u32, String), (u32, u32)> = HashMap::new(); + let mut imported_module_by_qualifier: HashMap<(u32, String), (u32, u32)> = HashMap::new(); + let external_import_ids: HashSet = index + .external_modules + .iter() + .map(|external_module| external_module.import_id) + .collect(); + let mut external_import_by_binding: HashMap<(u32, String), u32> = HashMap::new(); + + for import in &index.imports { + if external_import_ids.contains(&import.id) { + if let Some(binding) = typescript_import_binding_name(import) { + external_import_by_binding.insert((import.file_id, binding), import.id); + } + } + let Some(resolution) = resolution_by_import_id.get(&import.id) else { + continue; + }; + if import.kind == ImportKind::NamespaceImport { + if let Some(alias) = import.alias.as_deref() { + imported_module_by_qualifier.insert( + (import.file_id, alias.to_owned()), + (resolution.target_file_id, import.id), + ); + } + } else if import.kind == ImportKind::NamedImport { + if let (Some(binding), Some(name)) = ( + typescript_import_binding_name(import), + import.name.as_deref(), + ) { + if let Some(target_file_id) = namespace_export_file_by_file_and_name + .get(&(resolution.target_file_id, name.to_owned())) + .copied() + { + imported_module_by_qualifier + .insert((import.file_id, binding), (target_file_id, import.id)); + } + } + } + let Some(target_symbol_id) = resolution.target_symbol_id else { + continue; + }; + let Some(binding) = typescript_import_binding_name(import) else { + continue; + }; + imported_symbol_by_binding.insert((import.file_id, binding), (target_symbol_id, import.id)); + } + + let symbol_file_ids: HashMap = index + .symbols + .iter() + .map(|symbol| (symbol.id, symbol.file_id)) + .collect(); + let indexed_local_symbols = IndexedLocalSymbols::from_symbols(index.symbols.iter()); + let mut references = Vec::new(); + let mut external_references = Vec::new(); + let mut function_calls = Vec::new(); + let mut subclass_edges = Vec::new(); + let mut subclass_edge_pairs = HashSet::new(); + for candidate in candidates { + let resolved_target = if let Some(qualifier) = candidate.qualifier.as_ref() { + imported_module_by_qualifier + .get(&(candidate.source_file_id, qualifier.clone())) + .and_then(|(target_file_id, import_id)| { + exported_symbol_by_file_and_name + .get(&(*target_file_id, candidate.name.clone())) + .or_else(|| { + symbol_by_file_and_name.get(&(*target_file_id, candidate.name.clone())) + }) + .copied() + .map(|symbol_id| (symbol_id, Some(*import_id))) + }) + } else { + let local_target = typescript_indexed_local_symbol_for_reference( + candidate.source_symbol_id, + &candidate.name, + candidate.range, + &indexed_local_symbols, + ) + .map(|symbol_id| (symbol_id, None)); + let imported_target = imported_symbol_by_binding + .get(&(candidate.source_file_id, candidate.name.clone())) + .copied(); + let same_file_target = symbol_by_file_and_name + .get(&(candidate.source_file_id, candidate.name.clone())) + .copied() + .map(|symbol_id| (symbol_id, None)); + local_target.or(imported_target + .map(|(symbol_id, import_id)| (symbol_id, Some(import_id))) + .or(same_file_target)) + }; + let Some((target_symbol_id, import_id)) = resolved_target else { + let mut call_import_id = None; + if candidate.qualifier.is_none() { + if let Some(import_id) = external_import_by_binding + .get(&(candidate.source_file_id, candidate.name.clone())) + { + let name = strings.intern(&candidate.name); + call_import_id = Some(*import_id); + external_references.push(ExternalReferenceRecord { + id: external_references.len() as u32, + source_file_id: candidate.source_file_id, + source_symbol_id: candidate.source_symbol_id, + import_id: *import_id, + name, + range: candidate.range, + }); + } + } else if let Some(qualifier) = candidate.qualifier.as_ref() { + call_import_id = external_import_by_binding + .get(&(candidate.source_file_id, qualifier.clone())) + .copied(); + } + if let Some(call_range) = candidate.call_range { + let name = strings.intern(&candidate.name); + function_calls.push(FunctionCallRecord { + id: function_calls.len() as u32, + source_file_id: candidate.source_file_id, + source_symbol_id: candidate.source_symbol_id, + target_symbol_id: None, + import_id: call_import_id, + name, + range: call_range, + name_range: candidate.range, + }); + } + continue; + }; + if let Some(call_range) = candidate.call_range { + let name = strings.intern(&candidate.name); + function_calls.push(FunctionCallRecord { + id: function_calls.len() as u32, + source_file_id: candidate.source_file_id, + source_symbol_id: candidate.source_symbol_id, + target_symbol_id: Some(target_symbol_id), + import_id, + name, + range: call_range, + name_range: candidate.range, + }); + } + if candidate.source_symbol_id == Some(target_symbol_id) { + continue; + } + + let reference_id = references.len() as u32; + let name = strings.intern(&candidate.name); + references.push(ReferenceRecord { + id: reference_id, + source_file_id: candidate.source_file_id, + source_symbol_id: candidate.source_symbol_id, + target_symbol_id, + import_id, + name, + range: candidate.range, + }); + if candidate.is_subclass { + if let Some(source_symbol_id) = candidate.source_symbol_id { + if subclass_edge_pairs.insert((source_symbol_id, target_symbol_id)) { + let Some(source_file_id) = symbol_file_ids.get(&source_symbol_id).copied() + else { + continue; + }; + let Some(target_file_id) = symbol_file_ids.get(&target_symbol_id).copied() + else { + continue; + }; + subclass_edges.push(SubclassRecord { + id: subclass_edges.len() as u32, + source_symbol_id, + target_symbol_id, + source_file_id, + target_file_id, + reference_id, + }); + } + } + } + } + index.references = references; + index.external_references = external_references; + index.function_calls = function_calls; + index.subclass_edges = subclass_edges; + index.strings = strings; +} + +fn typescript_import_binding_name(import: &ImportRecord) -> Option { + if let Some(alias) = import.alias.as_deref() { + return Some(alias.to_owned()); + } + match import.kind { + ImportKind::DefaultImport | ImportKind::NamedImport | ImportKind::DynamicImport => { + import.name.as_ref().map(|name| name.to_string()) + } + ImportKind::NamespaceImport => import.alias.as_ref().map(|alias| alias.to_string()), + ImportKind::Import + | ImportKind::FromImport + | ImportKind::FutureImport + | ImportKind::SideEffect => None, + } +} + +fn build_typescript_dependencies(index: &mut TypeScriptIndex) { + let symbol_file_ids: HashMap = index + .symbols + .iter() + .map(|symbol| (symbol.id, symbol.file_id)) + .collect(); + let mut dependency_reference_ids: BTreeMap<(u32, u32), Vec> = BTreeMap::new(); + + for reference in &index.references { + let Some(source_symbol_id) = reference.source_symbol_id else { + continue; + }; + dependency_reference_ids + .entry((source_symbol_id, reference.target_symbol_id)) + .or_default() + .push(reference.id); + } + + let dependencies = dependency_reference_ids + .into_iter() + .filter_map(|((source_symbol_id, target_symbol_id), reference_ids)| { + let source_file_id = symbol_file_ids.get(&source_symbol_id).copied()?; + let target_file_id = symbol_file_ids.get(&target_symbol_id).copied()?; + Some(DependencyRecord { + id: 0, + source_symbol_id, + target_symbol_id, + source_file_id, + target_file_id, + reference_count: reference_ids.len(), + reference_ids, + }) + }) + .enumerate() + .map(|(id, mut dependency)| { + dependency.id = id as u32; + dependency + }) + .collect(); + + index.dependencies = dependencies; +} + +fn typescript_string_literal_value(text: &str) -> Option { + let trimmed = text.trim(); + for quote in ["'", "\"", "`"] { + if let Some(value) = trimmed + .strip_prefix(quote) + .and_then(|value| value.strip_suffix(quote)) + { + return Some(value.to_owned()); + } + } + None +} + +fn first_identifier_child(node: Node<'_>) -> Option> { + let mut cursor = node.walk(); + let child = node + .named_children(&mut cursor) + .find(|child| matches!(child.kind(), "identifier" | "type_identifier")); + child +} + +fn last_identifier_child(node: Node<'_>) -> Option> { + let mut cursor = node.walk(); + node.named_children(&mut cursor) + .filter(|child| matches!(child.kind(), "identifier" | "type_identifier")) + .last() +} + +fn has_direct_child_kind(node: Node<'_>, kind: &str) -> bool { + let mut cursor = node.walk(); + let has_kind = node.children(&mut cursor).any(|child| child.kind() == kind); + has_kind +} + +fn collect_python_files(dir: &Path, out: &mut Vec) -> Result<(), IndexError> { + let entries = fs::read_dir(dir).map_err(|source| IndexError::Io { + path: dir.to_path_buf(), + source, + })?; + for entry in entries { + let entry = entry.map_err(|source| IndexError::Io { + path: dir.to_path_buf(), + source, + })?; + let path = entry.path(); + let file_type = entry.file_type().map_err(|source| IndexError::Io { + path: path.clone(), + source, + })?; + if file_type.is_dir() { + if should_skip_dir(&path) { + continue; + } + collect_python_files(&path, out)?; + } else if file_type.is_file() && path.extension().and_then(|ext| ext.to_str()) == Some("py") + { + out.push(path); + } + } + Ok(()) +} + +fn should_skip_dir(path: &Path) -> bool { + matches!( + path.file_name().and_then(|name| name.to_str()), + Some( + ".git" | ".hg" | ".svn" | ".venv" | "venv" | "__pycache__" | "node_modules" | "target" + ) + ) +} + +fn extract_python_file( + file_id: u32, + source: &str, + tree: &Tree, + index: &mut PythonIndex, + reference_candidates: &mut Vec, +) { + let root = tree.root_node(); + let mut excluded_name_ranges = Vec::new(); + let mut cursor = root.walk(); + for child in root.named_children(&mut cursor) { + extract_top_level_node(file_id, source, child, index, &mut excluded_name_ranges); + } + collect_nested_python_imports(file_id, source, root, index, true); + let symbol_ranges = index + .symbols + .iter() + .filter(|symbol| symbol.file_id == file_id) + .map(|symbol| (symbol.id, symbol.range)) + .collect::>(); + let (local_bindings_by_symbol_id, local_binding_scopes) = + collect_local_bindings(file_id, source, root, index, &symbol_ranges); + collect_identifier_candidates( + file_id, + source, + root, + &symbol_ranges, + &local_bindings_by_symbol_id, + &local_binding_scopes, + &excluded_name_ranges, + reference_candidates, + ); +} + +fn extract_top_level_node( + file_id: u32, + source: &str, + node: Node<'_>, + index: &mut PythonIndex, + excluded_name_ranges: &mut Vec, +) { + match node.kind() { + "class_definition" => { + extract_symbol_tree( + file_id, + source, + node, + node.range(), + SymbolKind::Class, + None, + index, + excluded_name_ranges, + ); + } + "function_definition" => { + extract_symbol_tree( + file_id, + source, + node, + node.range(), + SymbolKind::Function, + None, + index, + excluded_name_ranges, + ); + } + "decorated_definition" => { + if let Some(definition) = + first_child_of_kind(node, &["class_definition", "function_definition"]) + { + let kind = if definition.kind() == "class_definition" { + SymbolKind::Class + } else { + SymbolKind::Function + }; + extract_symbol_tree( + file_id, + source, + definition, + node.range(), + kind, + None, + index, + excluded_name_ranges, + ); + } + } + "import_statement" => push_import_statement(file_id, source, node, index), + "import_from_statement" | "future_import_statement" => { + push_from_import_statement(file_id, source, node, index) + } + "assignment" | "annotated_assignment" => { + push_global_assignment(file_id, source, node, index, excluded_name_ranges) + } + "expression_statement" => { + if let Some(assignment) = + first_child_of_kind(node, &["assignment", "annotated_assignment"]) + { + push_global_assignment(file_id, source, assignment, index, excluded_name_ranges); + } + } + _ => {} + } +} + +fn collect_nested_python_imports( + file_id: u32, + source: &str, + node: Node<'_>, + index: &mut PythonIndex, + is_root: bool, +) { + let mut cursor = node.walk(); + for child in node.named_children(&mut cursor) { + if !is_root { + match child.kind() { + "import_statement" => { + push_import_statement(file_id, source, child, index); + continue; + } + "import_from_statement" | "future_import_statement" => { + push_from_import_statement(file_id, source, child, index); + continue; + } + _ => {} + } + } + collect_nested_python_imports(file_id, source, child, index, false); + } +} + +fn extract_symbol_tree( + file_id: u32, + source: &str, + definition: Node<'_>, + declaration_range: Range, + kind: SymbolKind, + parent_symbol_id: Option, + index: &mut PythonIndex, + excluded_name_ranges: &mut Vec, +) -> Option { + let symbol_id = push_symbol_with_range( + file_id, + source, + definition, + declaration_range, + kind, + parent_symbol_id, + index, + )?; + if let Some(name_node) = definition.child_by_field_name("name") { + excluded_name_ranges.push(name_node.range().into()); + } + extract_nested_symbols( + file_id, + source, + definition, + Some(symbol_id), + index, + excluded_name_ranges, + ); + Some(symbol_id) +} + +fn extract_nested_symbols( + file_id: u32, + source: &str, + node: Node<'_>, + parent_symbol_id: Option, + index: &mut PythonIndex, + excluded_name_ranges: &mut Vec, +) { + let mut cursor = node.walk(); + for child in node.named_children(&mut cursor) { + match child.kind() { + "class_definition" => { + extract_symbol_tree( + file_id, + source, + child, + child.range(), + SymbolKind::Class, + parent_symbol_id, + index, + excluded_name_ranges, + ); + } + "function_definition" => { + extract_symbol_tree( + file_id, + source, + child, + child.range(), + SymbolKind::Function, + parent_symbol_id, + index, + excluded_name_ranges, + ); + } + "decorated_definition" => { + if let Some(definition) = + first_child_of_kind(child, &["class_definition", "function_definition"]) + { + let kind = if definition.kind() == "class_definition" { + SymbolKind::Class + } else { + SymbolKind::Function + }; + extract_symbol_tree( + file_id, + source, + definition, + child.range(), + kind, + parent_symbol_id, + index, + excluded_name_ranges, + ); + } + } + _ => extract_nested_symbols( + file_id, + source, + child, + parent_symbol_id, + index, + excluded_name_ranges, + ), + } + } +} + +fn push_symbol_with_range( + file_id: u32, + source: &str, + node: Node<'_>, + declaration_range: Range, + kind: SymbolKind, + parent_symbol_id: Option, + index: &mut PythonIndex, +) -> Option { + let Some(name_node) = node.child_by_field_name("name") else { + return None; + }; + let Ok(name) = name_node.utf8_text(source.as_bytes()) else { + return None; + }; + let symbol_id = index.symbols.len() as u32; + let name = index.intern(name); + index.symbols.push(SymbolRecord { + id: symbol_id, + file_id, + parent_symbol_id, + is_top_level: parent_symbol_id.is_none(), + name, + kind, + range: declaration_range.into(), + name_range: name_node.range().into(), + }); + Some(symbol_id) +} + +fn push_global_assignment( + file_id: u32, + source: &str, + node: Node<'_>, + index: &mut PythonIndex, + excluded_name_ranges: &mut Vec, +) { + let Some(left) = node.child_by_field_name("left") else { + return; + }; + let mut targets = Vec::new(); + collect_assignment_targets(left, &mut targets); + let defines_static_all_exports = targets.iter().any(|target| { + target + .utf8_text(source.as_bytes()) + .is_ok_and(|name| name == "__all__") + }); + if defines_static_all_exports { + if let Some(exports) = node + .child_by_field_name("right") + .and_then(|right| collect_static_all_exports(source, right)) + { + index.all_exports_by_file.insert(file_id, exports); + } + } + for target in targets { + let Ok(name) = target.utf8_text(source.as_bytes()) else { + continue; + }; + let name = index.intern(name); + index.symbols.push(SymbolRecord { + id: index.symbols.len() as u32, + file_id, + parent_symbol_id: None, + is_top_level: true, + name, + kind: SymbolKind::GlobalVariable, + range: node.range().into(), + name_range: target.range().into(), + }); + excluded_name_ranges.push(target.range().into()); + } +} + +fn collect_static_all_exports(source: &str, node: Node<'_>) -> Option> { + match node.kind() { + "list" | "tuple" | "set" => { + let mut exports = BTreeSet::new(); + let mut cursor = node.walk(); + for child in node.named_children(&mut cursor) { + if child.kind() != "string" { + return None; + } + let value = python_string_literal_value(node_text(source, child))?; + exports.insert(value); + } + Some(exports) + } + "parenthesized_expression" => { + first_named_child(node).and_then(|child| collect_static_all_exports(source, child)) + } + _ => None, + } +} + +fn python_string_literal_value(text: &str) -> Option { + let mut literal = text.trim(); + let mut has_f_prefix = false; + while let Some(prefix) = literal.chars().next() { + if matches!(prefix, '\'' | '"') { + break; + } + if matches!(prefix, 'f' | 'F') { + has_f_prefix = true; + } + if matches!(prefix, 'r' | 'R' | 'b' | 'B' | 'u' | 'U' | 'f' | 'F') { + literal = &literal[prefix.len_utf8()..]; + } else { + return None; + } + } + if has_f_prefix { + return None; + } + for quote in ["'''", "\"\"\"", "'", "\""] { + if let Some(value) = literal + .strip_prefix(quote) + .and_then(|value| value.strip_suffix(quote)) + { + return Some(value.to_owned()); + } + } + None +} + +fn collect_assignment_targets<'tree>(node: Node<'tree>, out: &mut Vec>) { + match node.kind() { + "identifier" => out.push(node), + "as_pattern_target" | "pattern" | "pattern_list" | "tuple_pattern" | "list_pattern" => { + let mut cursor = node.walk(); + for child in node.named_children(&mut cursor) { + collect_assignment_targets(child, out); + } + } + _ => {} + } +} + +fn collect_local_bindings( + file_id: u32, + source: &str, + root: Node<'_>, + index: &PythonIndex, + symbol_ranges: &[(u32, SourceRange)], +) -> (HashMap>, Vec) { + let mut bindings: HashMap> = HashMap::new(); + let mut global_declarations: HashMap> = HashMap::new(); + let mut scoped_bindings: Vec = Vec::new(); + + for symbol in index + .symbols + .iter() + .filter(|symbol| symbol.file_id == file_id) + { + if let Some(parent_symbol_id) = symbol.parent_symbol_id { + bindings + .entry(parent_symbol_id) + .or_default() + .insert(symbol.name.to_string()); + } + } + + collect_local_bindings_from_node( + source, + root, + symbol_ranges, + &mut bindings, + &mut global_declarations, + &mut scoped_bindings, + ); + for (symbol_id, names) in global_declarations { + if let Some(bindings) = bindings.get_mut(&symbol_id) { + bindings.retain(|name| !names.contains(name)); + } + } + (bindings, scoped_bindings) +} + +fn collect_local_bindings_from_node( + source: &str, + node: Node<'_>, + symbol_ranges: &[(u32, SourceRange)], + bindings: &mut HashMap>, + global_declarations: &mut HashMap>, + scoped_bindings: &mut Vec, +) { + match node.kind() { + "parameters" => { + if let Some(source_symbol_id) = + innermost_symbol_for_range(symbol_ranges, node.range().into()) + { + let mut targets = Vec::new(); + collect_parameter_targets(node, &mut targets); + push_local_binding_names(source, source_symbol_id, targets, bindings); + } + return; + } + "lambda" => { + push_lambda_binding_scope(source, node, symbol_ranges, scoped_bindings); + } + "list_comprehension" + | "set_comprehension" + | "dictionary_comprehension" + | "generator_expression" => { + push_comprehension_binding_scope(source, node, symbol_ranges, scoped_bindings); + } + "global_statement" => { + if let Some(source_symbol_id) = + innermost_symbol_for_range(symbol_ranges, node.range().into()) + { + for name in declaration_names(source, node) { + global_declarations + .entry(source_symbol_id) + .or_default() + .insert(name); + } + } + return; + } + "nonlocal_statement" => { + if let Some(source_symbol_id) = + innermost_symbol_for_range(symbol_ranges, node.range().into()) + { + bindings + .entry(source_symbol_id) + .or_default() + .extend(declaration_names(source, node)); + } + return; + } + "assignment" | "annotated_assignment" | "augmented_assignment" => { + if let Some(left) = node.child_by_field_name("left") { + push_local_binding_targets(source, left, symbol_ranges, bindings); + } + if let Some(right) = node.child_by_field_name("right") { + collect_local_bindings_from_node( + source, + right, + symbol_ranges, + bindings, + global_declarations, + scoped_bindings, + ); + } + return; + } + "for_statement" => { + if let Some(left) = node.child_by_field_name("left") { + push_local_binding_targets(source, left, symbol_ranges, bindings); + } + } + "with_statement" => { + if let Some(with_clause) = first_child_of_kind(node, &["with_clause"]) { + push_as_pattern_binding_targets(source, with_clause, symbol_ranges, bindings); + } + } + "except_clause" => { + if let Some(alias) = node.child_by_field_name("alias") { + push_local_binding_targets(source, alias, symbol_ranges, bindings); + } + if let Some(value) = node.child_by_field_name("value") { + push_as_pattern_binding_targets(source, value, symbol_ranges, bindings); + } + } + "case_clause" => { + push_match_pattern_binding_targets(source, node, symbol_ranges, bindings); + } + "import_statement" | "import_from_statement" | "future_import_statement" => { + return; + } + _ => {} + } + + let mut cursor = node.walk(); + for child in node.named_children(&mut cursor) { + collect_local_bindings_from_node( + source, + child, + symbol_ranges, + bindings, + global_declarations, + scoped_bindings, + ); + } +} + +fn declaration_names(source: &str, node: Node<'_>) -> Vec { + let mut names = Vec::new(); + let mut cursor = node.walk(); + for child in node.named_children(&mut cursor) { + if child.kind() != "identifier" { + continue; + } + if let Ok(name) = child.utf8_text(source.as_bytes()) { + names.push(name.to_owned()); + } + } + names +} + +fn push_comprehension_binding_scope( + source: &str, + node: Node<'_>, + symbol_ranges: &[(u32, SourceRange)], + scoped_bindings: &mut Vec, +) { + let Some(source_symbol_id) = innermost_symbol_for_range(symbol_ranges, node.range().into()) + else { + return; + }; + + let mut targets = Vec::new(); + collect_comprehension_targets(node, &mut targets); + let mut names = HashSet::new(); + for target in targets { + if let Ok(name) = target.utf8_text(source.as_bytes()) { + names.insert(name.to_owned()); + } + } + if !names.is_empty() { + scoped_bindings.push(LocalBindingScope { + source_symbol_id, + range: node.range().into(), + names, + }); + } +} + +fn collect_comprehension_targets<'tree>(node: Node<'tree>, out: &mut Vec>) { + if node.kind() == "for_in_clause" { + if let Some(left) = node.child_by_field_name("left") { + collect_assignment_targets(left, out); + } + return; + } + + let mut cursor = node.walk(); + for child in node.named_children(&mut cursor) { + collect_comprehension_targets(child, out); + } +} + +fn push_lambda_binding_scope( + source: &str, + node: Node<'_>, + symbol_ranges: &[(u32, SourceRange)], + scoped_bindings: &mut Vec, +) { + let Some(parameters) = node.child_by_field_name("parameters") else { + return; + }; + let Some(body) = node.child_by_field_name("body") else { + return; + }; + let Some(source_symbol_id) = innermost_symbol_for_range(symbol_ranges, body.range().into()) + else { + return; + }; + + let mut targets = Vec::new(); + collect_parameter_targets(parameters, &mut targets); + let mut names = HashSet::new(); + for target in targets { + if let Ok(name) = target.utf8_text(source.as_bytes()) { + names.insert(name.to_owned()); + } + } + if !names.is_empty() { + scoped_bindings.push(LocalBindingScope { + source_symbol_id, + range: body.range().into(), + names, + }); + } +} + +fn push_match_pattern_binding_targets( + source: &str, + node: Node<'_>, + symbol_ranges: &[(u32, SourceRange)], + bindings: &mut HashMap>, +) { + let mut targets = Vec::new(); + collect_case_clause_binding_targets(node, &mut targets); + for target in targets { + push_local_binding_targets(source, target, symbol_ranges, bindings); + } +} + +fn collect_case_clause_binding_targets<'tree>(node: Node<'tree>, out: &mut Vec>) { + let mut cursor = node.walk(); + for (index, child) in node.children(&mut cursor).enumerate() { + if !child.is_named() || node.field_name_for_child(index as u32).is_some() { + continue; + } + if child.kind() == "case_pattern" { + collect_match_pattern_targets(child, out); + } + } +} + +fn collect_match_pattern_targets<'tree>(node: Node<'tree>, out: &mut Vec>) { + match node.kind() { + "identifier" => out.push(node), + "dotted_name" => { + let mut cursor = node.walk(); + let identifiers: Vec<_> = node + .named_children(&mut cursor) + .filter(|child| child.kind() == "identifier") + .collect(); + if identifiers.len() == 1 { + out.push(identifiers[0]); + } + } + "dict_pattern" => { + let mut cursor = node.walk(); + for child in node.children_by_field_name("value", &mut cursor) { + collect_match_pattern_targets(child, out); + } + let mut cursor = node.walk(); + for child in node.named_children(&mut cursor) { + if child.kind() == "splat_pattern" { + collect_match_pattern_targets(child, out); + } + } + } + "class_pattern" => { + let mut seen_constructor = false; + let mut cursor = node.walk(); + for child in node.named_children(&mut cursor) { + if !seen_constructor && child.kind() == "dotted_name" { + seen_constructor = true; + continue; + } + collect_match_pattern_targets(child, out); + } + } + "keyword_pattern" => { + let mut skipped_keyword = false; + let mut cursor = node.walk(); + for child in node.named_children(&mut cursor) { + if !skipped_keyword && child.kind() == "identifier" { + skipped_keyword = true; + continue; + } + collect_match_pattern_targets(child, out); + } + } + "case_pattern" | "as_pattern" | "list_pattern" | "tuple_pattern" | "splat_pattern" + | "union_pattern" => { + let mut cursor = node.walk(); + for child in node.named_children(&mut cursor) { + collect_match_pattern_targets(child, out); + } + } + _ => {} + } +} + +fn push_local_binding_targets( + source: &str, + target_root: Node<'_>, + symbol_ranges: &[(u32, SourceRange)], + bindings: &mut HashMap>, +) { + if let Some(source_symbol_id) = + innermost_symbol_for_range(symbol_ranges, target_root.range().into()) + { + let mut targets = Vec::new(); + collect_assignment_targets(target_root, &mut targets); + push_local_binding_names(source, source_symbol_id, targets, bindings); + } +} + +fn push_as_pattern_binding_targets( + source: &str, + node: Node<'_>, + symbol_ranges: &[(u32, SourceRange)], + bindings: &mut HashMap>, +) { + let mut targets = Vec::new(); + collect_as_pattern_alias_targets(node, &mut targets); + for target in targets { + push_local_binding_targets(source, target, symbol_ranges, bindings); + } +} + +fn collect_as_pattern_alias_targets<'tree>(node: Node<'tree>, out: &mut Vec>) { + if node.kind() == "as_pattern" { + if let Some(alias) = node.child_by_field_name("alias") { + collect_assignment_targets(alias, out); + } + return; + } + + let mut cursor = node.walk(); + for child in node.named_children(&mut cursor) { + collect_as_pattern_alias_targets(child, out); + } +} + +fn collect_parameter_targets<'tree>(node: Node<'tree>, out: &mut Vec>) { + match node.kind() { + "identifier" => out.push(node), + "typed_parameter" | "default_parameter" | "typed_default_parameter" => { + if let Some(name) = node.child_by_field_name("name") { + collect_parameter_targets(name, out); + } else if let Some(first_child) = first_named_child(node) { + collect_parameter_targets(first_child, out); + } + } + "list_splat_pattern" | "dictionary_splat_pattern" => { + if let Some(first_child) = first_named_child(node) { + collect_parameter_targets(first_child, out); + } + } + "parameters" | "lambda_parameters" => { + let mut cursor = node.walk(); + for child in node.named_children(&mut cursor) { + collect_parameter_targets(child, out); + } + } + _ => {} + } +} + +fn push_local_binding_names( + source: &str, + source_symbol_id: u32, + targets: Vec>, + bindings: &mut HashMap>, +) { + for target in targets { + let Ok(name) = target.utf8_text(source.as_bytes()) else { + continue; + }; + bindings + .entry(source_symbol_id) + .or_default() + .insert(name.to_owned()); + } +} + +fn collect_identifier_candidates( + file_id: u32, + source: &str, + node: Node<'_>, + symbol_ranges: &[(u32, SourceRange)], + local_bindings_by_symbol_id: &HashMap>, + local_binding_scopes: &[LocalBindingScope], + excluded_ranges: &[SourceRange], + out: &mut Vec, +) { + if node.kind() == "lambda_parameters" { + collect_lambda_parameter_value_identifier_candidates( + file_id, + source, + node, + symbol_ranges, + local_bindings_by_symbol_id, + local_binding_scopes, + excluded_ranges, + out, + ); + return; + } + + if node.kind() == "attribute" { + if let (Some(object), Some(attribute)) = ( + node.child_by_field_name("object"), + node.child_by_field_name("attribute"), + ) { + let range = attribute.range().into(); + if attribute.kind() == "identifier" && !range_matches_any(range, excluded_ranges) { + if let (Ok(qualifier), Ok(name)) = ( + object.utf8_text(source.as_bytes()), + attribute.utf8_text(source.as_bytes()), + ) { + let source_symbol_id = innermost_symbol_for_range(symbol_ranges, range); + if !qualified_reference_is_shadowed( + source_symbol_id, + qualifier, + object.range().into(), + local_bindings_by_symbol_id, + local_binding_scopes, + ) { + out.push(ReferenceCandidate { + source_file_id: file_id, + source_symbol_id, + name: name.to_owned(), + qualifier: Some(qualifier.to_owned()), + range, + is_subclass: false, + call_range: None, + }); + } + } + } + } + if let Some(object) = node.child_by_field_name("object") { + collect_identifier_candidates( + file_id, + source, + object, + symbol_ranges, + local_bindings_by_symbol_id, + local_binding_scopes, + excluded_ranges, + out, + ); + } + return; + } + + if matches!( + node.kind(), + "import_statement" + | "import_from_statement" + | "future_import_statement" + | "global_statement" + | "nonlocal_statement" + ) { + return; + } + + let range = node.range().into(); + if node.kind() == "identifier" && !range_matches_any(range, excluded_ranges) { + if let Ok(name) = node.utf8_text(source.as_bytes()) { + let source_symbol_id = innermost_symbol_for_range(symbol_ranges, range); + if is_shadowed_local_binding( + source_symbol_id, + name, + range, + local_bindings_by_symbol_id, + local_binding_scopes, + ) { + return; + } + out.push(ReferenceCandidate { + source_file_id: file_id, + source_symbol_id, + name: name.to_owned(), + qualifier: None, + range, + is_subclass: false, + call_range: None, + }); + } + } + + let mut cursor = node.walk(); + for child in node.named_children(&mut cursor) { + collect_identifier_candidates( + file_id, + source, + child, + symbol_ranges, + local_bindings_by_symbol_id, + local_binding_scopes, + excluded_ranges, + out, + ); + } +} + +fn qualified_reference_is_shadowed( + source_symbol_id: Option, + qualifier: &str, + range: SourceRange, + local_bindings_by_symbol_id: &HashMap>, + local_binding_scopes: &[LocalBindingScope], +) -> bool { + let binding = qualifier.split('.').next().unwrap_or(qualifier); + is_shadowed_local_binding( + source_symbol_id, + binding, + range, + local_bindings_by_symbol_id, + local_binding_scopes, + ) +} + +fn collect_lambda_parameter_value_identifier_candidates( + file_id: u32, + source: &str, + node: Node<'_>, + symbol_ranges: &[(u32, SourceRange)], + local_bindings_by_symbol_id: &HashMap>, + local_binding_scopes: &[LocalBindingScope], + excluded_ranges: &[SourceRange], + out: &mut Vec, +) { + match node.kind() { + "default_parameter" | "typed_default_parameter" => { + if let Some(value) = node.child_by_field_name("value") { + collect_identifier_candidates( + file_id, + source, + value, + symbol_ranges, + local_bindings_by_symbol_id, + local_binding_scopes, + excluded_ranges, + out, + ); + } + } + "lambda_parameters" => { + let mut cursor = node.walk(); + for child in node.named_children(&mut cursor) { + collect_lambda_parameter_value_identifier_candidates( + file_id, + source, + child, + symbol_ranges, + local_bindings_by_symbol_id, + local_binding_scopes, + excluded_ranges, + out, + ); + } + } + _ => {} + } +} + +fn is_shadowed_local_binding( + source_symbol_id: Option, + name: &str, + range: SourceRange, + local_bindings_by_symbol_id: &HashMap>, + local_binding_scopes: &[LocalBindingScope], +) -> bool { + let Some(source_symbol_id) = source_symbol_id else { + return false; + }; + if local_bindings_by_symbol_id + .get(&source_symbol_id) + .is_some_and(|bindings| bindings.contains(name)) + { + return true; + } + local_binding_scopes.iter().any(|scope| { + scope.source_symbol_id == source_symbol_id + && contains_range(scope.range, range) + && scope.names.contains(name) + }) +} + +fn innermost_symbol_for_range( + symbol_ranges: &[(u32, SourceRange)], + range: SourceRange, +) -> Option { + innermost_symbol_range_for_range(symbol_ranges, range).map(|(symbol_id, _)| symbol_id) +} + +fn innermost_symbol_range_for_range( + symbol_ranges: &[(u32, SourceRange)], + range: SourceRange, +) -> Option<(u32, SourceRange)> { + symbol_ranges + .iter() + .filter(|(_, symbol_range)| contains_range(*symbol_range, range)) + .min_by_key(|(_, symbol_range)| symbol_range.end_byte - symbol_range.start_byte) + .map(|(symbol_id, symbol_range)| (*symbol_id, *symbol_range)) +} + +fn contains_range(container: SourceRange, range: SourceRange) -> bool { + container.start_byte <= range.start_byte && range.end_byte <= container.end_byte +} + +fn range_matches_any(range: SourceRange, others: &[SourceRange]) -> bool { + others + .iter() + .any(|other| range.start_byte == other.start_byte && range.end_byte == other.end_byte) +} + +fn push_import_statement(file_id: u32, source: &str, node: Node<'_>, index: &mut PythonIndex) { + let text = node_text(source, node); + let imports = text + .trim_start_matches("import") + .split(',') + .map(str::trim) + .filter(|part| !part.is_empty()); + + for import in imports { + let (name, alias) = split_alias(import); + let name = index.intern(name); + let alias = alias.map(|value| index.intern(value)); + index.imports.push(ImportRecord { + id: index.imports.len() as u32, + file_id, + kind: ImportKind::Import, + module: None, + name: Some(name), + alias, + range: node.range().into(), + }); + } +} + +fn push_from_import_statement(file_id: u32, source: &str, node: Node<'_>, index: &mut PythonIndex) { + let text = node_text(source, node); + let stripped = text.trim(); + let kind = if node.kind() == "future_import_statement" { + ImportKind::FutureImport + } else { + ImportKind::FromImport + }; + let Some(after_from) = stripped.strip_prefix("from ") else { + return; + }; + let Some((module, names)) = after_from.split_once(" import ") else { + return; + }; + + for import in python_from_import_names(names) { + let (name, alias) = split_alias(&import); + let module = index.intern(module.trim()); + let name = index.intern(name); + let alias = alias.map(|value| index.intern(value)); + index.imports.push(ImportRecord { + id: index.imports.len() as u32, + file_id, + kind, + module: Some(module), + name: Some(name), + alias, + range: node.range().into(), + }); + } +} + +fn python_from_import_names(names: &str) -> Vec { + let mut cleaned = String::with_capacity(names.len()); + for line in names.lines() { + let line = line + .split_once('#') + .map_or(line, |(before_comment, _)| before_comment); + cleaned.push_str(line); + cleaned.push('\n'); + } + + let mut trimmed = cleaned.trim(); + if let Some(without_open) = trimmed.strip_prefix('(') { + trimmed = without_open.trim(); + } + if let Some(without_close) = trimmed.strip_suffix(')') { + trimmed = without_close.trim(); + } + + trimmed + .split(',') + .map(str::trim) + .filter(|part| !part.is_empty()) + .map(ToOwned::to_owned) + .collect() +} + +fn first_child_of_kind<'tree>(node: Node<'tree>, kinds: &[&str]) -> Option> { + let mut cursor = node.walk(); + let child = node + .named_children(&mut cursor) + .find(|child| kinds.iter().any(|kind| child.kind() == *kind)); + child +} + +fn first_named_child(node: Node<'_>) -> Option> { + let mut cursor = node.walk(); + let child = node.named_children(&mut cursor).next(); + child +} + +fn split_alias(import: &str) -> (&str, Option<&str>) { + if let Some((name, alias)) = import.split_once(" as ") { + (name.trim(), Some(alias.trim())) + } else { + (import.trim(), None) + } +} + +fn node_text<'source>(source: &'source str, node: Node<'_>) -> &'source str { + &source[node.start_byte()..node.end_byte()] +} + +fn line_count(source: &str) -> usize { + if source.is_empty() { + 0 + } else { + source + .as_bytes() + .iter() + .filter(|byte| **byte == b'\n') + .count() + + usize::from(!source.ends_with('\n')) + } +} + +fn resolve_python_imports(index: &mut PythonIndex) { + let module_to_file: HashMap<&str, u32> = index + .files + .iter() + .filter_map(|file| file.module_name.as_deref().map(|module| (module, file.id))) + .collect(); + let symbol_to_id: HashMap<(u32, &str), u32> = index + .symbols + .iter() + .filter(|symbol| symbol.is_top_level) + .map(|symbol| ((symbol.file_id, symbol.name.as_ref()), symbol.id)) + .collect(); + + let mut resolutions = Vec::new(); + for import in &index.imports { + let Some(source_file) = index.files.get(import.file_id as usize) else { + continue; + }; + let resolution = match import.kind { + ImportKind::Import => { + resolve_plain_import(import, &module_to_file).map(|target_file_id| { + ImportResolutionRecord { + id: resolutions.len() as u32, + import_id: import.id, + source_file_id: import.file_id, + target_file_id, + target_symbol_id: None, + } + }) + } + ImportKind::FromImport | ImportKind::FutureImport => resolve_from_import( + import, + source_file, + &module_to_file, + &symbol_to_id, + resolutions.len() as u32, + ), + ImportKind::SideEffect + | ImportKind::DefaultImport + | ImportKind::NamedImport + | ImportKind::NamespaceImport + | ImportKind::DynamicImport => None, + }; + if let Some(resolution) = resolution { + resolutions.push(resolution); + } + } + index.import_resolutions = resolutions; + resolve_python_reexport_imports(index); + index.external_modules = build_external_modules(&index.imports, &index.import_resolutions); +} + +fn resolve_python_reexport_imports(index: &mut PythonIndex) { + let import_by_id: HashMap = index + .imports + .iter() + .map(|import| (import.id, import)) + .collect(); + + for _ in 0..index.import_resolutions.len() { + let exported_symbols_by_file = python_exported_symbols_by_file(index); + + let mut changed = false; + for resolution in &mut index.import_resolutions { + if resolution.target_symbol_id.is_some() { + continue; + } + let Some(import) = import_by_id.get(&resolution.import_id) else { + continue; + }; + if import.kind != ImportKind::FromImport { + continue; + } + let Some(name) = import.name.as_deref() else { + continue; + }; + if name == "*" { + continue; + } + if let Some(target_symbol_id) = exported_symbols_by_file + .get(&resolution.target_file_id) + .and_then(|exports| exports.get(name)) + { + resolution.target_symbol_id = Some(*target_symbol_id); + changed = true; + } + } + + if !changed { + break; + } + } +} + +fn python_exported_symbols_by_file(index: &PythonIndex) -> ExportedSymbolsByFile { + let resolution_by_import_id: HashMap = index + .import_resolutions + .iter() + .map(|resolution| (resolution.import_id, resolution)) + .collect(); + let mut exports: ExportedSymbolsByFile = HashMap::new(); + + for symbol in index.symbols.iter().filter(|symbol| symbol.is_top_level) { + exports + .entry(symbol.file_id) + .or_default() + .insert(symbol.name.to_string(), symbol.id); + } + + for _ in 0..index.imports.len().max(1) { + let previous_exports = exports.clone(); + + for import in &index.imports { + if import.kind == ImportKind::FutureImport { + continue; + } + let Some(resolution) = resolution_by_import_id.get(&import.id) else { + continue; + }; + if is_wildcard_import(import) { + let Some(target_exports) = previous_exports.get(&resolution.target_file_id) else { + continue; + }; + let file_exports = exports.entry(import.file_id).or_default(); + for (name, target_symbol_id) in + wildcard_visible_exports(index, resolution.target_file_id, target_exports) + { + file_exports.insert(name.clone(), *target_symbol_id); + } + continue; + } + + let Some(binding) = import_binding_name(import) else { + continue; + }; + let Some(target_symbol_id) = resolution.target_symbol_id else { + continue; + }; + exports + .entry(import.file_id) + .or_default() + .insert(binding, target_symbol_id); + } + + if exports == previous_exports { + break; + } + } + + exports +} + +fn wildcard_visible_exports<'a>( + index: &'a PythonIndex, + file_id: u32, + exports: &'a BTreeMap, +) -> Vec<(&'a String, &'a u32)> { + let Some(all_exports) = index.all_exports_by_file.get(&file_id) else { + return exports.iter().collect(); + }; + all_exports + .iter() + .filter_map(|name| exports.get_key_value(name)) + .collect() +} + +fn resolve_python_references(index: &mut PythonIndex, candidates: Vec) { + let mut strings = std::mem::take(&mut index.strings); + let module_to_file: HashMap<&str, u32> = index + .files + .iter() + .filter_map(|file| file.module_name.as_deref().map(|module| (module, file.id))) + .collect(); + let internal_module_prefixes = internal_python_module_prefixes(&index.files); + let symbol_to_id: HashMap<(u32, &str), u32> = index + .symbols + .iter() + .filter(|symbol| symbol.is_top_level) + .map(|symbol| ((symbol.file_id, symbol.name.as_ref()), symbol.id)) + .collect(); + let resolution_by_import_id: HashMap = index + .import_resolutions + .iter() + .map(|resolution| (resolution.import_id, resolution)) + .collect(); + let exported_symbols_by_file = python_exported_symbols_by_file(index); + let mut imported_symbol_by_binding: HashMap<(u32, String), (u32, u32)> = HashMap::new(); + let mut local_imported_symbol_by_binding: HashMap<(u32, String), (u32, u32)> = HashMap::new(); + let mut imported_module_by_qualifier: HashMap<(u32, String), (u32, u32)> = HashMap::new(); + let mut imported_module_prefix_by_binding: HashMap<(u32, String), (String, u32)> = + HashMap::new(); + let external_import_ids: HashSet = index + .external_modules + .iter() + .map(|external_module| external_module.import_id) + .collect(); + let mut external_import_by_binding: HashMap<(u32, String), u32> = HashMap::new(); + let mut local_external_import_by_binding: HashMap<(u32, String), u32> = HashMap::new(); + let symbol_ranges_by_file: HashMap> = + symbol_ranges_by_file(&index.symbols); + + for import in &index.imports { + let import_source_symbol_id = symbol_ranges_by_file + .get(&import.file_id) + .and_then(|symbol_ranges| innermost_symbol_for_range(symbol_ranges, import.range)); + if external_import_ids.contains(&import.id) { + if let Some(binding) = import_binding_name(import) { + if let Some(source_symbol_id) = import_source_symbol_id { + local_external_import_by_binding.insert((source_symbol_id, binding), import.id); + } else { + external_import_by_binding.insert((import.file_id, binding), import.id); + } + } + } + if let Some(source_file) = index.files.get(import.file_id as usize) { + for (binding, module_prefix) in + import_module_prefix_bindings(import, source_file, &internal_module_prefixes) + { + imported_module_prefix_by_binding + .insert((import.file_id, binding), (module_prefix, import.id)); + } + } + let resolution = resolution_by_import_id.get(&import.id); + if is_wildcard_import(import) { + if let Some(resolution) = resolution { + if let Some(target_exports) = + exported_symbols_by_file.get(&resolution.target_file_id) + { + for (binding, target_symbol_id) in + wildcard_visible_exports(index, resolution.target_file_id, target_exports) + { + imported_symbol_by_binding.insert( + (import.file_id, binding.clone()), + (*target_symbol_id, import.id), + ); + } + } + } + continue; + } + let Some(resolution) = resolution else { + continue; + }; + if resolution.target_symbol_id.is_none() { + for qualifier in import_module_qualifiers(import) { + imported_module_by_qualifier.insert( + (import.file_id, qualifier), + (resolution.target_file_id, import.id), + ); + } + } + let Some(target_symbol_id) = resolution.target_symbol_id else { + continue; + }; + let Some(binding) = import_binding_name(import) else { + continue; + }; + if let Some(source_symbol_id) = import_source_symbol_id { + local_imported_symbol_by_binding + .insert((source_symbol_id, binding), (target_symbol_id, import.id)); + } else { + imported_symbol_by_binding + .insert((import.file_id, binding), (target_symbol_id, import.id)); + } + } + + let mut references = Vec::new(); + let mut external_references = Vec::new(); + for candidate in candidates { + let resolved_target = if let Some(qualifier) = candidate.qualifier.as_ref() { + imported_module_by_qualifier + .get(&(candidate.source_file_id, qualifier.clone())) + .and_then(|(target_file_id, import_id)| { + symbol_to_id + .get(&(*target_file_id, candidate.name.as_str())) + .copied() + .map(|symbol_id| (symbol_id, Some(*import_id))) + }) + .or_else(|| { + resolve_imported_module_attribute( + candidate.source_file_id, + qualifier, + &candidate.name, + &imported_module_prefix_by_binding, + &module_to_file, + &symbol_to_id, + ) + }) + } else { + let local_external_import_id = + candidate.source_symbol_id.and_then(|source_symbol_id| { + local_external_import_by_binding + .get(&(source_symbol_id, candidate.name.clone())) + .copied() + }); + let local_imported_target = candidate.source_symbol_id.and_then(|source_symbol_id| { + local_imported_symbol_by_binding + .get(&(source_symbol_id, candidate.name.clone())) + .copied() + }); + let imported_target = imported_symbol_by_binding + .get(&(candidate.source_file_id, candidate.name.clone())) + .copied(); + let same_file_target = symbol_to_id + .get(&(candidate.source_file_id, candidate.name.as_str())) + .copied() + .map(|symbol_id| (symbol_id, None)); + if local_external_import_id.is_some() { + None + } else { + local_imported_target + .or(imported_target) + .map(|(symbol_id, import_id)| (symbol_id, Some(import_id))) + .or(same_file_target) + } + }; + let Some((target_symbol_id, import_id)) = resolved_target else { + if candidate.qualifier.is_none() { + let local_import_id = candidate.source_symbol_id.and_then(|source_symbol_id| { + local_external_import_by_binding + .get(&(source_symbol_id, candidate.name.clone())) + .copied() + }); + if let Some(import_id) = local_import_id.or_else(|| { + external_import_by_binding + .get(&(candidate.source_file_id, candidate.name.clone())) + .copied() + }) { + let name = strings.intern(&candidate.name); + external_references.push(ExternalReferenceRecord { + id: external_references.len() as u32, + source_file_id: candidate.source_file_id, + source_symbol_id: candidate.source_symbol_id, + import_id, + name, + range: candidate.range, + }); + } + } + continue; + }; + if candidate.source_symbol_id == Some(target_symbol_id) { + continue; + } + + let name = strings.intern(&candidate.name); + references.push(ReferenceRecord { + id: references.len() as u32, + source_file_id: candidate.source_file_id, + source_symbol_id: candidate.source_symbol_id, + target_symbol_id, + import_id, + name, + range: candidate.range, + }); + } + index.references = references; + index.external_references = external_references; + index.strings = strings; +} + +fn symbol_ranges_by_file(symbols: &[SymbolRecord]) -> HashMap> { + let mut ranges_by_file: HashMap> = HashMap::new(); + for symbol in symbols { + ranges_by_file + .entry(symbol.file_id) + .or_default() + .push((symbol.id, symbol.range)); + } + ranges_by_file +} + +fn internal_python_module_prefixes(files: &[FileRecord]) -> HashSet { + let mut prefixes = HashSet::new(); + for module in files.iter().filter_map(|file| file.module_name.as_deref()) { + let parts = module.split('.').collect::>(); + for i in 1..=parts.len() { + prefixes.insert(parts[..i].join(".")); + } + } + prefixes +} + +fn import_module_prefix_bindings( + import: &ImportRecord, + source_file: &FileRecord, + internal_module_prefixes: &HashSet, +) -> Vec<(String, String)> { + if is_wildcard_import(import) || import.kind == ImportKind::FutureImport { + return Vec::new(); + } + + let mut bindings = Vec::new(); + match import.kind { + ImportKind::Import => { + let Some(name) = import.name.as_deref() else { + return bindings; + }; + if let Some(alias) = import.alias.as_deref() { + if internal_module_prefixes.contains(name) { + bindings.push((alias.to_owned(), name.to_owned())); + } + } else if let Some(root) = name.split('.').next() { + if internal_module_prefixes.contains(root) { + bindings.push((root.to_owned(), root.to_owned())); + } + } + } + ImportKind::FromImport => { + let Some(module) = import + .module + .as_deref() + .and_then(|module| resolve_module_name(source_file, module)) + else { + return bindings; + }; + let Some(name) = import.name.as_deref() else { + return bindings; + }; + let binding = import.alias.as_deref().unwrap_or(name); + let module_prefix = join_module(&module, name); + if internal_module_prefixes.contains(&module_prefix) { + bindings.push((binding.to_owned(), module_prefix)); + } + } + ImportKind::FutureImport => {} + ImportKind::SideEffect + | ImportKind::DefaultImport + | ImportKind::NamedImport + | ImportKind::NamespaceImport + | ImportKind::DynamicImport => {} + } + bindings +} + +fn resolve_imported_module_attribute( + source_file_id: u32, + qualifier: &str, + name: &str, + imported_module_prefix_by_binding: &HashMap<(u32, String), (String, u32)>, + module_to_file: &HashMap<&str, u32>, + symbol_to_id: &HashMap<(u32, &str), u32>, +) -> Option<(u32, Option)> { + let (binding, suffix) = qualifier + .split_once('.') + .map_or((qualifier, None), |(binding, suffix)| { + (binding, Some(suffix)) + }); + let (module_prefix, import_id) = + imported_module_prefix_by_binding.get(&(source_file_id, binding.to_owned()))?; + let target_module = suffix.map_or_else( + || module_prefix.clone(), + |suffix| join_module(module_prefix, suffix), + ); + let target_file_id = module_to_file.get(target_module.as_str()).copied()?; + let target_symbol_id = symbol_to_id.get(&(target_file_id, name)).copied()?; + Some((target_symbol_id, Some(*import_id))) +} + +fn import_module_qualifiers(import: &ImportRecord) -> Vec { + let mut qualifiers = Vec::new(); + if let Some(alias) = import.alias.as_deref() { + qualifiers.push(alias.to_owned()); + } + match import.kind { + ImportKind::Import => { + if let Some(name) = import.name.as_deref() { + qualifiers.push(name.to_owned()); + } + } + ImportKind::FromImport | ImportKind::FutureImport => { + if import.alias.is_none() { + if let Some(name) = import.name.as_deref() { + qualifiers.push(name.to_owned()); + } + } + } + ImportKind::SideEffect + | ImportKind::DefaultImport + | ImportKind::NamedImport + | ImportKind::NamespaceImport + | ImportKind::DynamicImport => {} + } + qualifiers.sort(); + qualifiers.dedup(); + qualifiers +} + +fn build_python_dependencies(index: &mut PythonIndex) { + let symbol_file_ids: HashMap = index + .symbols + .iter() + .map(|symbol| (symbol.id, symbol.file_id)) + .collect(); + let mut dependency_reference_ids: BTreeMap<(u32, u32), Vec> = BTreeMap::new(); + + for reference in &index.references { + let Some(source_symbol_id) = reference.source_symbol_id else { + continue; + }; + dependency_reference_ids + .entry((source_symbol_id, reference.target_symbol_id)) + .or_default() + .push(reference.id); + } + + let dependencies = dependency_reference_ids + .into_iter() + .filter_map(|((source_symbol_id, target_symbol_id), reference_ids)| { + let source_file_id = symbol_file_ids.get(&source_symbol_id).copied()?; + let target_file_id = symbol_file_ids.get(&target_symbol_id).copied()?; + Some(DependencyRecord { + id: 0, + source_symbol_id, + target_symbol_id, + source_file_id, + target_file_id, + reference_count: reference_ids.len(), + reference_ids, + }) + }) + .enumerate() + .map(|(id, mut dependency)| { + dependency.id = id as u32; + dependency + }) + .collect(); + + index.dependencies = dependencies; +} + +fn import_binding_name(import: &ImportRecord) -> Option { + if let Some(alias) = import.alias.as_deref() { + return Some(alias.to_owned()); + } + match import.kind { + ImportKind::Import => import + .name + .as_deref() + .and_then(|name| name.split('.').next()) + .map(str::to_owned), + ImportKind::FromImport | ImportKind::FutureImport => import + .name + .as_ref() + .filter(|name| name.as_ref() != "*") + .map(|name| name.to_string()), + ImportKind::SideEffect + | ImportKind::DefaultImport + | ImportKind::NamedImport + | ImportKind::NamespaceImport + | ImportKind::DynamicImport => None, + } +} + +fn is_wildcard_import(import: &ImportRecord) -> bool { + matches!( + import.kind, + ImportKind::FromImport | ImportKind::FutureImport + ) && import.name.as_deref() == Some("*") +} + +fn resolve_plain_import(import: &ImportRecord, module_to_file: &HashMap<&str, u32>) -> Option { + let name = import.name.as_deref()?; + module_to_file.get(name).copied() +} + +fn resolve_from_import( + import: &ImportRecord, + source_file: &FileRecord, + module_to_file: &HashMap<&str, u32>, + symbol_to_id: &HashMap<(u32, &str), u32>, + resolution_id: u32, +) -> Option { + let module = import.module.as_deref()?; + let resolved_module = resolve_module_name(source_file, module)?; + let import_name = import.name.as_deref(); + + if let Some(target_file_id) = module_to_file.get(resolved_module.as_str()).copied() { + let target_symbol_id = + import_name.and_then(|name| symbol_to_id.get(&(target_file_id, name)).copied()); + if target_symbol_id.is_some() || import_name == Some("*") { + return Some(ImportResolutionRecord { + id: resolution_id, + import_id: import.id, + source_file_id: import.file_id, + target_file_id, + target_symbol_id, + }); + } + } + + let import_name = import_name?; + let child_module = join_module(&resolved_module, import_name); + if let Some(target_file_id) = module_to_file.get(child_module.as_str()).copied() { + return Some(ImportResolutionRecord { + id: resolution_id, + import_id: import.id, + source_file_id: import.file_id, + target_file_id, + target_symbol_id: None, + }); + } + + module_to_file + .get(resolved_module.as_str()) + .copied() + .map(|target_file_id| ImportResolutionRecord { + id: resolution_id, + import_id: import.id, + source_file_id: import.file_id, + target_file_id, + target_symbol_id: None, + }) +} + +fn resolve_module_name(source_file: &FileRecord, raw_module: &str) -> Option { + if !raw_module.starts_with('.') { + return Some(raw_module.to_owned()); + } + + let dot_count = raw_module + .as_bytes() + .iter() + .take_while(|byte| **byte == b'.') + .count(); + let suffix = &raw_module[dot_count..]; + let mut package_parts = source_package_name(source_file) + .map(|package| { + package + .split('.') + .filter(|part| !part.is_empty()) + .map(str::to_owned) + .collect::>() + }) + .unwrap_or_default(); + let ascend = dot_count.saturating_sub(1); + if ascend > package_parts.len() { + return None; + } + let keep = package_parts.len() - ascend; + package_parts.truncate(keep); + if !suffix.is_empty() { + package_parts.extend( + suffix + .split('.') + .filter(|part| !part.is_empty()) + .map(str::to_owned), + ); + } + Some(package_parts.join(".")) +} + +fn source_package_name(file: &FileRecord) -> Option<&str> { + let module = file.module_name.as_deref()?; + if file.path.ends_with("/__init__.py") || file.path.as_ref() == "__init__.py" { + Some(module) + } else { + module.rsplit_once('.').map(|(package, _)| package) + } +} + +fn join_module(parent: &str, child: &str) -> String { + if parent.is_empty() { + child.to_owned() + } else { + format!("{parent}.{child}") + } +} + +fn python_module_name(path: &str) -> Option { + let without_suffix = path.strip_suffix(".py")?; + let module = without_suffix + .strip_suffix("/__init__") + .unwrap_or(without_suffix) + .split('/') + .filter(|part| !part.is_empty()) + .collect::>() + .join("."); + if module.is_empty() { + None + } else { + Some(module) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::fs; + use std::time::{SystemTime, UNIX_EPOCH}; + + #[test] + fn debug_info_reports_version_and_python_index_feature() { + let info = Engine::new().debug_info(); + + assert_eq!(info.version(), env!("CARGO_PKG_VERSION")); + assert_eq!( + info.enabled_features(), + ["skeleton", "python-index", "typescript-index"] + ); + } + + #[test] + fn indexes_python_files_without_materializing_python_objects() { + let repo = temp_repo_path("index-python"); + fs::create_dir_all(repo.join("pkg")).unwrap(); + fs::write( + repo.join("pkg/mod.py"), + "from __future__ import annotations\nfrom .base import Base as RenamedBase\nimport os, sys as system\n\n@decorator\nclass Service(RenamedBase):\n pass\n\ndef helper(value):\n return value\n", + ) + .unwrap(); + + let index = index_python_path(&repo).unwrap(); + fs::remove_dir_all(&repo).unwrap(); + + assert_eq!(index.summary().files, 1); + assert_eq!(index.summary().classes, 1); + assert_eq!(index.summary().functions, 1); + assert_eq!(index.summary().global_variables, 0); + assert_eq!(index.summary().imports, 4); + assert_eq!(index.summary().import_resolutions, 0); + assert_eq!(index.external_modules.len(), 2); + assert_eq!(index.summary().references, 0); + assert_eq!(index.summary().dependencies, 0); + assert_eq!(index.symbols[0].name, "Service"); + assert_eq!(index.symbols[0].parent_symbol_id, None); + assert!(index.symbols[0].is_top_level); + assert_eq!(index.symbols[1].name, "helper"); + assert_eq!(index.symbols[1].parent_symbol_id, None); + assert!(index.symbols[1].is_top_level); + assert!(index + .imports + .iter() + .any(|import| import.module.as_deref() == Some(".base"))); + assert!(index + .imports + .iter() + .any(|import| import.alias.as_deref() == Some("system"))); + assert!(index.external_modules.iter().any(|external_module| { + external_module.name == "os" && external_module.alias.is_none() + })); + assert!(index.external_modules.iter().any(|external_module| { + external_module.name == "sys" && external_module.alias.as_deref() == Some("system") + })); + } + + #[test] + fn compact_python_records_intern_repeated_strings() { + let repo = temp_repo_path("python-interned-record-strings"); + fs::create_dir_all(repo.join("pkg")).unwrap(); + fs::write( + repo.join("pkg/a.py"), + "import requests\n\ndef fetch_a():\n return requests.get('/a')\n", + ) + .unwrap(); + fs::write( + repo.join("pkg/b.py"), + "import requests\n\ndef fetch_b():\n return requests.post('/b')\n", + ) + .unwrap(); + + let index = index_python_path(&repo).unwrap(); + fs::remove_dir_all(&repo).unwrap(); + + assert_eq!(index.strings.len(), 0); + let import_names = index + .imports + .iter() + .filter_map(|import| import.name.as_ref()) + .filter(|name| name.as_ref() == "requests") + .collect::>(); + let external_module_names = index + .external_modules + .iter() + .filter(|module| module.name.as_ref() == "requests") + .map(|module| &module.name) + .collect::>(); + let external_reference_names = index + .external_references + .iter() + .filter(|reference| reference.name.as_ref() == "requests") + .map(|reference| &reference.name) + .collect::>(); + + assert_eq!(import_names.len(), 2); + assert_eq!(external_module_names.len(), 2); + assert_eq!(external_reference_names.len(), 2); + assert!(import_names[0].ptr_eq(import_names[1])); + assert!(import_names[0].ptr_eq(external_module_names[0])); + assert!(import_names[0].ptr_eq(external_reference_names[0])); + } + + #[test] + fn resolves_python_external_import_references() { + let repo = temp_repo_path("python-external-import-references"); + fs::create_dir_all(repo.join("pkg")).unwrap(); + fs::write( + repo.join("pkg/service.py"), + "import requests\n\ndef run():\n return requests.get('/health')\n", + ) + .unwrap(); + + let index = index_python_path(&repo).unwrap(); + fs::remove_dir_all(&repo).unwrap(); + + assert_eq!(index.summary().files, 1); + assert_eq!(index.summary().imports, 1); + assert_eq!(index.external_modules.len(), 1); + assert_eq!(index.summary().references, 0); + assert_eq!(index.summary().dependencies, 0); + assert_eq!(index.external_references.len(), 1); + + let import = index + .imports + .iter() + .find(|import| import.name.as_deref() == Some("requests")) + .unwrap(); + let run = index + .symbols + .iter() + .find(|symbol| symbol.name == "run") + .unwrap(); + let reference = &index.external_references[0]; + + assert_eq!(reference.source_symbol_id, Some(run.id)); + assert_eq!(reference.import_id, import.id); + assert_eq!(reference.name, "requests"); + assert_eq!(reference.range.start_row, 3); + assert_eq!(reference.range.start_column, 11); + } + + #[test] + fn resolves_python_function_local_external_import_references() { + let repo = temp_repo_path("python-local-external-import-references"); + fs::create_dir_all(repo.join("pkg")).unwrap(); + fs::write( + repo.join("pkg/service.py"), + "def load(name):\n import importlib\n return importlib.import_module(name)\n", + ) + .unwrap(); + + let index = index_python_path(&repo).unwrap(); + fs::remove_dir_all(&repo).unwrap(); + + assert_eq!(index.summary().files, 1); + assert_eq!(index.summary().imports, 1); + assert_eq!(index.external_modules.len(), 1); + assert_eq!(index.summary().references, 0); + assert_eq!(index.summary().dependencies, 0); + assert_eq!(index.external_references.len(), 1); + + let import = index + .imports + .iter() + .find(|import| import.name.as_deref() == Some("importlib")) + .unwrap(); + let load = index + .symbols + .iter() + .find(|symbol| symbol.name == "load") + .unwrap(); + let reference = &index.external_references[0]; + + assert_eq!(reference.source_symbol_id, Some(load.id)); + assert_eq!(reference.import_id, import.id); + assert_eq!(reference.name, "importlib"); + assert_eq!(reference.range.start_row, 2); + assert_eq!(reference.range.start_column, 11); + } + + #[test] + fn indexes_only_requested_python_paths() { + let repo = temp_repo_path("index-python-paths"); + fs::create_dir_all(repo.join("pkg")).unwrap(); + fs::write(repo.join("pkg/included.py"), "class Included:\n pass\n").unwrap(); + fs::write(repo.join("pkg/skipped.py"), "class Skipped:\n pass\n").unwrap(); + + let index = index_python_paths(&repo, ["pkg/included.py"]).unwrap(); + fs::remove_dir_all(&repo).unwrap(); + + assert_eq!(index.summary().files, 1); + assert_eq!(index.files[0].path, "pkg/included.py"); + assert_eq!(index.summary().classes, 1); + assert_eq!(index.symbols[0].name, "Included"); + } + + #[test] + fn indexes_typescript_syntax_records_without_resolution() { + let repo = temp_repo_path("index-typescript"); + fs::create_dir_all(repo.join("src")).unwrap(); + fs::write( + repo.join("src/app.tsx"), + r#"import React, { useState as useStateAlias, type FC } from "react"; +import * as utils from "./utils"; +import "./setup"; + +export { helper as publicHelper } from "./utils"; +export * as allUtils from "./utils"; +export const value = 1; +export function run() {} +export default function Page() {} +interface Props {} +type Alias = string; +enum Mode { A } +namespace Inner { export const x = 1 } +const loader = await import("./loader"); +const { parse, format: fmt } = require("./format"); +const Component = () =>
; +"#, + ) + .unwrap(); + + let index = index_typescript_path(&repo).unwrap(); + fs::remove_dir_all(&repo).unwrap(); + + assert_eq!(index.summary().files, 1); + assert_eq!(index.summary().classes, 0); + assert_eq!(index.summary().functions, 3); + assert_eq!(index.summary().global_variables, 5); + assert_eq!(index.files[0].path, "src/app.tsx"); + assert_eq!(index.imports.len(), 10); + assert_eq!(index.exports.len(), 5); + assert_eq!(index.summary().import_resolutions, 0); + assert_eq!(index.external_modules.len(), 3); + assert_eq!(index.summary().references, 0); + assert_eq!(index.summary().dependencies, 0); + + let symbols = index + .symbols + .iter() + .map(|symbol| (symbol.name.as_ref(), symbol.kind)) + .collect::>(); + assert!(symbols.contains(&("run", SymbolKind::Function))); + assert!(symbols.contains(&("Page", SymbolKind::Function))); + assert!(symbols.contains(&("Component", SymbolKind::Function))); + assert!(symbols.contains(&("Props", SymbolKind::Interface))); + assert!(symbols.contains(&("Alias", SymbolKind::TypeAlias))); + assert!(symbols.contains(&("Mode", SymbolKind::Enum))); + assert!(symbols.contains(&("Inner", SymbolKind::Namespace))); + assert!(symbols.contains(&("x", SymbolKind::GlobalVariable))); + assert!(symbols.contains(&("value", SymbolKind::GlobalVariable))); + assert!(symbols.contains(&("loader", SymbolKind::GlobalVariable))); + assert!(symbols.contains(&("parse", SymbolKind::GlobalVariable))); + assert!(symbols.contains(&("fmt", SymbolKind::GlobalVariable))); + let inner = index + .symbols + .iter() + .find(|symbol| symbol.name == "Inner") + .unwrap(); + let x = index + .symbols + .iter() + .find(|symbol| symbol.name == "x") + .unwrap(); + assert_eq!(x.parent_symbol_id, Some(inner.id)); + assert!(!x.is_top_level); + + assert!(index.imports.iter().any(|import| { + import.kind == ImportKind::DefaultImport + && import.module.as_deref() == Some("react") + && import.name.as_deref() == Some("React") + && import.alias.as_deref() == Some("React") + })); + assert!(index.imports.iter().any(|import| { + import.kind == ImportKind::NamedImport + && import.module.as_deref() == Some("react") + && import.name.as_deref() == Some("useState") + && import.alias.as_deref() == Some("useStateAlias") + })); + assert!(index.imports.iter().any(|import| { + import.kind == ImportKind::NamespaceImport + && import.module.as_deref() == Some("./utils") + && import.alias.as_deref() == Some("utils") + })); + assert!(index.imports.iter().any(|import| { + import.kind == ImportKind::SideEffect && import.module.as_deref() == Some("./setup") + })); + assert!(index.imports.iter().any(|import| { + import.kind == ImportKind::DynamicImport + && import.module.as_deref() == Some("./loader") + && import.alias.as_deref() == Some("loader") + })); + assert!(index.imports.iter().any(|import| { + import.kind == ImportKind::DynamicImport + && import.module.as_deref() == Some("./format") + && import.alias.as_deref() == Some("fmt") + })); + assert!(index.external_modules.iter().any(|external_module| { + external_module.name == "React" + && external_module.module.as_deref() == Some("react") + && external_module.alias.as_deref() == Some("React") + })); + assert!(index + .external_modules + .iter() + .any(|external_module| external_module.name == "useState")); + assert!(index + .external_modules + .iter() + .any(|external_module| external_module.name == "FC")); + + assert!(index.exports.iter().any(|export| { + export.kind == ExportKind::Named + && export.name.as_deref() == Some("publicHelper") + && export.local_name.as_deref() == Some("helper") + && export.source_module.as_deref() == Some("./utils") + && export.import_id.is_some() + })); + assert!(index.exports.iter().any(|export| { + export.kind == ExportKind::Namespace + && export.name.as_deref() == Some("allUtils") + && export.source_module.as_deref() == Some("./utils") + })); + assert!(index.exports.iter().any(|export| { + export.kind == ExportKind::Named + && export.name.as_deref() == Some("value") + && export.symbol_id.is_some() + })); + assert!(index.exports.iter().any(|export| { + export.kind == ExportKind::Named + && export.name.as_deref() == Some("run") + && export.symbol_id.is_some() + })); + assert!(index.exports.iter().any(|export| { + export.kind == ExportKind::Default + && export.name.as_deref() == Some("default") + && export.local_name.as_deref() == Some("Page") + && export.symbol_id.is_some() + })); + } + + #[test] + fn indexes_only_requested_typescript_like_paths() { + let repo = temp_repo_path("index-typescript-paths"); + fs::create_dir_all(repo.join("src")).unwrap(); + fs::write(repo.join("src/included.ts"), "export class Included {}\n").unwrap(); + fs::write(repo.join("src/skipped.ts"), "export class Skipped {}\n").unwrap(); + fs::write( + repo.join("src/not-ts.py"), + "class NotTypeScript:\n pass\n", + ) + .unwrap(); + + let index = index_typescript_paths(&repo, ["src/included.ts", "src/not-ts.py"]).unwrap(); + fs::remove_dir_all(&repo).unwrap(); + + assert_eq!(index.summary().files, 1); + assert_eq!(index.files[0].path, "src/included.ts"); + assert_eq!(index.summary().classes, 1); + assert_eq!(index.symbols[0].name, "Included"); + assert_eq!(index.exports[0].name.as_deref(), Some("Included")); + } + + #[test] + fn resolves_relative_typescript_imports_to_files_and_symbols() { + let repo = temp_repo_path("resolve-typescript-imports"); + fs::create_dir_all(repo.join("src/feature")).unwrap(); + fs::write( + repo.join("src/app.ts"), + "import DefaultThing, { helper } from './utils';\n\ +import * as utils from './utils';\n\ +import { dotted } from './foo.test';\n\ +import './setup';\n\ +export { helper as publicHelper } from './utils';\n\ +export * from './feature';\n\ +", + ) + .unwrap(); + fs::write(repo.join("src/setup.ts"), "window.__ready = true;\n").unwrap(); + fs::write( + repo.join("src/utils.ts"), + "export const helper = 1;\nexport default function DefaultThing() {}\n", + ) + .unwrap(); + fs::write(repo.join("src/foo.test.ts"), "export const dotted = 1;\n").unwrap(); + fs::write( + repo.join("src/feature/index.ts"), + "export const feature = 1;\n", + ) + .unwrap(); + + let index = index_typescript_path(&repo).unwrap(); + fs::remove_dir_all(&repo).unwrap(); + + assert_eq!(index.summary().files, 5); + assert_eq!(index.summary().imports, 7); + assert_eq!(index.summary().import_resolutions, 7); + + let utils_file_id = index + .files + .iter() + .find(|file| file.path == "src/utils.ts") + .unwrap() + .id; + let setup_file_id = index + .files + .iter() + .find(|file| file.path == "src/setup.ts") + .unwrap() + .id; + let feature_file_id = index + .files + .iter() + .find(|file| file.path == "src/feature/index.ts") + .unwrap() + .id; + let dotted_file_id = index + .files + .iter() + .find(|file| file.path == "src/foo.test.ts") + .unwrap() + .id; + let helper_symbol_id = index + .symbols + .iter() + .find(|symbol| symbol.file_id == utils_file_id && symbol.name == "helper") + .unwrap() + .id; + let default_symbol_id = index + .symbols + .iter() + .find(|symbol| symbol.file_id == utils_file_id && symbol.name == "DefaultThing") + .unwrap() + .id; + let dotted_symbol_id = index + .symbols + .iter() + .find(|symbol| symbol.file_id == dotted_file_id && symbol.name == "dotted") + .unwrap() + .id; + + assert!(index.import_resolutions.iter().any(|resolution| { + resolution.target_file_id == utils_file_id + && resolution.target_symbol_id == Some(default_symbol_id) + })); + assert_eq!( + index + .import_resolutions + .iter() + .filter(|resolution| { + resolution.target_file_id == utils_file_id + && resolution.target_symbol_id == Some(helper_symbol_id) + }) + .count(), + 2 + ); + assert!(index.import_resolutions.iter().any(|resolution| { + resolution.target_file_id == utils_file_id && resolution.target_symbol_id.is_none() + })); + assert!(index.import_resolutions.iter().any(|resolution| { + resolution.target_file_id == setup_file_id && resolution.target_symbol_id.is_none() + })); + assert!(index.import_resolutions.iter().any(|resolution| { + resolution.target_file_id == feature_file_id && resolution.target_symbol_id.is_none() + })); + assert!(index.import_resolutions.iter().any(|resolution| { + resolution.target_file_id == dotted_file_id + && resolution.target_symbol_id == Some(dotted_symbol_id) + })); + } + + #[test] + fn resolves_typescript_barrel_reexports_to_symbols() { + let repo = temp_repo_path("resolve-typescript-barrel-reexports"); + fs::create_dir_all(repo.join("src")).unwrap(); + fs::write( + repo.join("src/app.ts"), + "import { publicHelper, wildcarded } from './barrel';\n\ +import { nestedHelper } from './nested';\n\ +import * as barrel from './barrel';\n\ +\n\ +export function run() {\n\ + return publicHelper() + nestedHelper() + wildcarded + barrel.wildcarded;\n\ +}\n", + ) + .unwrap(); + fs::write( + repo.join("src/barrel.ts"), + "export { helper as publicHelper } from './leaf';\n\ +export * from './extra';\n", + ) + .unwrap(); + fs::write( + repo.join("src/nested.ts"), + "export { publicHelper as nestedHelper } from './barrel';\n", + ) + .unwrap(); + fs::write( + repo.join("src/leaf.ts"), + "export function helper() { return 1; }\n", + ) + .unwrap(); + fs::write( + repo.join("src/extra.ts"), + "export const wildcarded = 2;\nexport default function hidden() { return 0; }\n", + ) + .unwrap(); + + let index = index_typescript_path(&repo).unwrap(); + fs::remove_dir_all(&repo).unwrap(); + + assert_eq!(index.summary().files, 5); + assert_eq!(index.summary().imports, 7); + assert_eq!(index.summary().import_resolutions, 7); + assert_eq!(index.summary().references, 4); + assert_eq!(index.summary().dependencies, 2); + + let leaf_file_id = index + .files + .iter() + .find(|file| file.path == "src/leaf.ts") + .unwrap() + .id; + let extra_file_id = index + .files + .iter() + .find(|file| file.path == "src/extra.ts") + .unwrap() + .id; + let helper_symbol_id = index + .symbols + .iter() + .find(|symbol| symbol.file_id == leaf_file_id && symbol.name == "helper") + .unwrap() + .id; + let wildcarded_symbol_id = index + .symbols + .iter() + .find(|symbol| symbol.file_id == extra_file_id && symbol.name == "wildcarded") + .unwrap() + .id; + + let public_helper_import_id = index + .imports + .iter() + .find(|import| { + import.module.as_deref() == Some("./barrel") + && import.name.as_deref() == Some("publicHelper") + }) + .unwrap() + .id; + let nested_helper_import_id = index + .imports + .iter() + .find(|import| import.name.as_deref() == Some("nestedHelper")) + .unwrap() + .id; + let wildcarded_import_id = index + .imports + .iter() + .find(|import| { + import.module.as_deref() == Some("./barrel") + && import.name.as_deref() == Some("wildcarded") + }) + .unwrap() + .id; + let barrel_namespace_import_id = index + .imports + .iter() + .find(|import| { + import.kind == ImportKind::NamespaceImport + && import.alias.as_deref() == Some("barrel") + }) + .unwrap() + .id; + + assert!(index.import_resolutions.iter().any(|resolution| { + resolution.import_id == public_helper_import_id + && resolution.target_symbol_id == Some(helper_symbol_id) + })); + assert!(index.import_resolutions.iter().any(|resolution| { + resolution.import_id == nested_helper_import_id + && resolution.target_symbol_id == Some(helper_symbol_id) + })); + assert!(index.import_resolutions.iter().any(|resolution| { + resolution.import_id == wildcarded_import_id + && resolution.target_symbol_id == Some(wildcarded_symbol_id) + })); + assert!(index.import_resolutions.iter().any(|resolution| { + resolution.import_id == barrel_namespace_import_id + && resolution.target_symbol_id.is_none() + })); + assert_eq!( + index + .references + .iter() + .filter(|reference| reference.target_symbol_id == helper_symbol_id) + .count(), + 2 + ); + assert_eq!( + index + .references + .iter() + .filter(|reference| reference.target_symbol_id == wildcarded_symbol_id) + .count(), + 2 + ); + } + + #[test] + fn resolves_typescript_named_namespace_reexport_member_references() { + let repo = temp_repo_path("resolve-typescript-named-namespace-reexport"); + fs::create_dir_all(repo.join("src")).unwrap(); + fs::write( + repo.join("src/app.ts"), + "import { utils } from './barrel';\n\ +\n\ +export function run() {\n\ + return utils.helper();\n\ +}\n", + ) + .unwrap(); + fs::write( + repo.join("src/barrel.ts"), + "export * as utils from './leaf';\n", + ) + .unwrap(); + fs::write( + repo.join("src/leaf.ts"), + "export function helper() { return 1; }\n", + ) + .unwrap(); + + let index = index_typescript_path(&repo).unwrap(); + fs::remove_dir_all(&repo).unwrap(); + + assert_eq!(index.summary().files, 3); + assert_eq!(index.summary().imports, 2); + assert_eq!(index.summary().import_resolutions, 2); + assert_eq!(index.summary().references, 1); + assert_eq!(index.summary().dependencies, 1); + + let app_file_id = index + .files + .iter() + .find(|file| file.path == "src/app.ts") + .unwrap() + .id; + let barrel_file_id = index + .files + .iter() + .find(|file| file.path == "src/barrel.ts") + .unwrap() + .id; + let leaf_file_id = index + .files + .iter() + .find(|file| file.path == "src/leaf.ts") + .unwrap() + .id; + let run_symbol_id = index + .symbols + .iter() + .find(|symbol| symbol.file_id == app_file_id && symbol.name == "run") + .unwrap() + .id; + let helper_symbol_id = index + .symbols + .iter() + .find(|symbol| symbol.file_id == leaf_file_id && symbol.name == "helper") + .unwrap() + .id; + let utils_import_id = index + .imports + .iter() + .find(|import| { + import.file_id == app_file_id + && import.kind == ImportKind::NamedImport + && import.module.as_deref() == Some("./barrel") + && import.name.as_deref() == Some("utils") + && import.alias.as_deref() == Some("utils") + }) + .unwrap() + .id; + let barrel_namespace_import_id = index + .imports + .iter() + .find(|import| { + import.file_id == barrel_file_id + && import.kind == ImportKind::NamespaceImport + && import.module.as_deref() == Some("./leaf") + && import.name.as_deref() == Some("*") + && import.alias.as_deref() == Some("utils") + }) + .unwrap() + .id; + + assert!(index.exports.iter().any(|export| { + export.file_id == barrel_file_id + && export.kind == ExportKind::Namespace + && export.name.as_deref() == Some("utils") + && export.import_id == Some(barrel_namespace_import_id) + })); + assert!(index.import_resolutions.iter().any(|resolution| { + resolution.import_id == utils_import_id + && resolution.target_file_id == barrel_file_id + && resolution.target_symbol_id.is_none() + })); + assert!(index.import_resolutions.iter().any(|resolution| { + resolution.import_id == barrel_namespace_import_id + && resolution.target_file_id == leaf_file_id + && resolution.target_symbol_id.is_none() + })); + assert!(index.references.iter().any(|reference| { + reference.source_symbol_id == Some(run_symbol_id) + && reference.target_symbol_id == helper_symbol_id + && reference.import_id == Some(utils_import_id) + && reference.name == "helper" + })); + assert!(index.dependencies.iter().any(|dependency| { + dependency.source_symbol_id == run_symbol_id + && dependency.target_symbol_id == helper_symbol_id + && dependency.reference_count == 1 + })); + } + + #[test] + fn extracts_typescript_namespace_member_symbols() { + let repo = temp_repo_path("extract-typescript-namespace-members"); + fs::create_dir_all(repo.join("src")).unwrap(); + fs::write( + repo.join("src/app.ts"), + "export namespace Math {\n\ + export function add(a: number, b: number) { return a + b; }\n\ + export interface Shape { area: number }\n\ + export type Mode = 'simple';\n\ + export enum Operation { Add }\n\ + export namespace Advanced { export const pi = 3.14; export function pow() {} }\n\ +}\n", + ) + .unwrap(); + + let index = index_typescript_path(&repo).unwrap(); + fs::remove_dir_all(&repo).unwrap(); + + let file_id = index + .files + .iter() + .find(|file| file.path == "src/app.ts") + .unwrap() + .id; + let math = index + .symbols + .iter() + .find(|symbol| { + symbol.file_id == file_id + && symbol.name == "Math" + && symbol.kind == SymbolKind::Namespace + && symbol.is_top_level + }) + .unwrap(); + let advanced = index + .symbols + .iter() + .find(|symbol| { + symbol.file_id == file_id + && symbol.name == "Advanced" + && symbol.kind == SymbolKind::Namespace + && symbol.parent_symbol_id == Some(math.id) + && !symbol.is_top_level + }) + .unwrap(); + let child_names = index + .symbols + .iter() + .filter(|symbol| symbol.parent_symbol_id == Some(math.id)) + .map(|symbol| (symbol.name.to_string(), symbol.kind)) + .collect::>(); + assert_eq!( + child_names, + vec![ + ("add".to_owned(), SymbolKind::Function), + ("Shape".to_owned(), SymbolKind::Interface), + ("Mode".to_owned(), SymbolKind::TypeAlias), + ("Operation".to_owned(), SymbolKind::Enum), + ("Advanced".to_owned(), SymbolKind::Namespace), + ] + ); + assert_eq!( + index + .symbols + .iter() + .filter(|symbol| symbol.parent_symbol_id == Some(advanced.id)) + .map(|symbol| (symbol.name.to_string(), symbol.kind)) + .collect::>(), + vec![ + ("pi".to_owned(), SymbolKind::GlobalVariable), + ("pow".to_owned(), SymbolKind::Function), + ] + ); + assert_eq!(index.summary().symbols, 8); + assert_eq!(index.summary().functions, 2); + assert_eq!(index.summary().global_variables, 1); + } + + #[test] + fn excludes_typescript_references_shadowed_by_scoped_loop_and_catch_bindings() { + let repo = temp_repo_path("resolve-typescript-scoped-bindings"); + fs::create_dir_all(repo.join("src")).unwrap(); + fs::write( + repo.join("src/app.ts"), + "import { Imported, Other } from './values';\n\ +\n\ +export function run(items: number[]) {\n\ + for (const Imported of items) {\n\ + Imported;\n\ + }\n\ + try {\n\ + throw new Error();\n\ + } catch (Other) {\n\ + Other;\n\ + }\n\ + return Imported + Other;\n\ +}\n", + ) + .unwrap(); + fs::write( + repo.join("src/values.ts"), + "export const Imported = 1;\nexport const Other = 2;\n", + ) + .unwrap(); + + let index = index_typescript_path(&repo).unwrap(); + fs::remove_dir_all(&repo).unwrap(); + + assert_eq!(index.summary().files, 2); + assert_eq!(index.summary().imports, 2); + assert_eq!(index.summary().import_resolutions, 2); + assert_eq!(index.summary().references, 2); + assert_eq!(index.summary().dependencies, 2); + + let values_file_id = index + .files + .iter() + .find(|file| file.path == "src/values.ts") + .unwrap() + .id; + let imported_symbol_id = index + .symbols + .iter() + .find(|symbol| symbol.file_id == values_file_id && symbol.name == "Imported") + .unwrap() + .id; + let other_symbol_id = index + .symbols + .iter() + .find(|symbol| symbol.file_id == values_file_id && symbol.name == "Other") + .unwrap() + .id; + + assert_eq!( + index + .references + .iter() + .filter(|reference| reference.target_symbol_id == imported_symbol_id) + .count(), + 1 + ); + assert_eq!( + index + .references + .iter() + .filter(|reference| reference.target_symbol_id == other_symbol_id) + .count(), + 1 + ); + } + + #[test] + fn scopes_typescript_nested_callback_parameter_shadows_to_callback_body() { + let repo = temp_repo_path("resolve-typescript-nested-callback-params"); + fs::create_dir_all(repo.join("src")).unwrap(); + fs::write( + repo.join("src/app.ts"), + "import { Imported } from './values';\n\ +\n\ +export function run(items: number[]) {\n\ + const before = Imported;\n\ + items.map((Imported) => Imported + 1);\n\ + return Imported + before;\n\ +}\n", + ) + .unwrap(); + fs::write(repo.join("src/values.ts"), "export const Imported = 1;\n").unwrap(); + + let index = index_typescript_path(&repo).unwrap(); + fs::remove_dir_all(&repo).unwrap(); + + assert_eq!(index.summary().files, 2); + assert_eq!(index.summary().imports, 1); + assert_eq!(index.summary().import_resolutions, 1); + assert_eq!(index.summary().references, 2); + assert_eq!(index.summary().dependencies, 1); + + let values_file_id = index + .files + .iter() + .find(|file| file.path == "src/values.ts") + .unwrap() + .id; + let imported_symbol_id = index + .symbols + .iter() + .find(|symbol| symbol.file_id == values_file_id && symbol.name == "Imported") + .unwrap() + .id; + + assert_eq!( + index + .references + .iter() + .filter(|reference| reference.target_symbol_id == imported_symbol_id) + .count(), + 2 + ); + } + + #[test] + fn excludes_typescript_references_shadowed_by_nested_declarations() { + let repo = temp_repo_path("resolve-typescript-nested-declaration-shadows"); + fs::create_dir_all(repo.join("src")).unwrap(); + fs::write( + repo.join("src/app.ts"), + "import { Imported, Other, StillImported } from './values';\n\ +\n\ +export function run() {\n\ + function Imported() { return 1; }\n\ + class Other {}\n\ + return Imported() + new Other() + StillImported;\n\ +}\n", + ) + .unwrap(); + fs::write( + repo.join("src/values.ts"), + "export const Imported = 1;\nexport const Other = 2;\nexport const StillImported = 3;\n", + ) + .unwrap(); + + let index = index_typescript_path(&repo).unwrap(); + fs::remove_dir_all(&repo).unwrap(); + + assert_eq!(index.summary().files, 2); + assert_eq!(index.summary().imports, 3); + assert_eq!(index.summary().import_resolutions, 3); + assert_eq!(index.summary().references, 1); + assert_eq!(index.summary().dependencies, 1); + + let values_file_id = index + .files + .iter() + .find(|file| file.path == "src/values.ts") + .unwrap() + .id; + let still_imported_symbol_id = index + .symbols + .iter() + .find(|symbol| symbol.file_id == values_file_id && symbol.name == "StillImported") + .unwrap() + .id; + + assert_eq!( + index + .references + .iter() + .filter(|reference| reference.target_symbol_id == still_imported_symbol_id) + .count(), + 1 + ); + } + + #[test] + fn excludes_typescript_references_shadowed_by_destructuring_defaults() { + let repo = temp_repo_path("resolve-typescript-destructuring-default-shadows"); + fs::create_dir_all(repo.join("src")).unwrap(); + fs::write( + repo.join("src/app.ts"), + "import { Foo, Bar, Baz, Quux, Inner, StillImported, DefaultValue } from './values';\n\ +\n\ +export function run({ Foo = DefaultValue, alias: Bar = DefaultValue }: any, [Baz = DefaultValue]: any) {\n\ + const { Quux = DefaultValue, nested: { Inner = DefaultValue } = {} } = {} as any;\n\ + return Foo + Bar + Baz + Quux + Inner + StillImported + DefaultValue;\n\ +}\n", + ) + .unwrap(); + fs::write( + repo.join("src/values.ts"), + "export const Foo = 1;\nexport const Bar = 2;\nexport const Baz = 3;\nexport const Quux = 4;\nexport const Inner = 5;\nexport const StillImported = 6;\nexport const DefaultValue = 7;\n", + ) + .unwrap(); + + let index = index_typescript_path(&repo).unwrap(); + fs::remove_dir_all(&repo).unwrap(); + + assert_eq!(index.summary().files, 2); + assert_eq!(index.summary().imports, 7); + assert_eq!(index.summary().import_resolutions, 7); + assert_eq!(index.summary().references, 7); + assert_eq!(index.summary().dependencies, 2); + + let values_file_id = index + .files + .iter() + .find(|file| file.path == "src/values.ts") + .unwrap() + .id; + let still_imported_symbol_id = index + .symbols + .iter() + .find(|symbol| symbol.file_id == values_file_id && symbol.name == "StillImported") + .unwrap() + .id; + let default_value_symbol_id = index + .symbols + .iter() + .find(|symbol| symbol.file_id == values_file_id && symbol.name == "DefaultValue") + .unwrap() + .id; + + assert_eq!( + index + .references + .iter() + .filter(|reference| reference.target_symbol_id == still_imported_symbol_id) + .count(), + 1 + ); + assert_eq!( + index + .references + .iter() + .filter(|reference| reference.target_symbol_id == default_value_symbol_id) + .count(), + 6 + ); + } + + #[test] + fn scopes_typescript_lexical_declaration_shadows_to_blocks() { + let repo = temp_repo_path("resolve-typescript-lexical-block-shadows"); + fs::create_dir_all(repo.join("src")).unwrap(); + fs::write( + repo.join("src/app.ts"), + "import { Imported } from './values';\n\ +\n\ +export function run(flag: boolean) {\n\ + const before = Imported;\n\ + if (flag) {\n\ + const Imported = 1;\n\ + Imported;\n\ + }\n\ + return Imported + before;\n\ +}\n\ +\n\ +export function loop() {\n\ + for (let Imported = 0; Imported < 1; Imported++) {\n\ + Imported;\n\ + }\n\ + return Imported;\n\ +}\n", + ) + .unwrap(); + fs::write(repo.join("src/values.ts"), "export const Imported = 1;\n").unwrap(); + + let index = index_typescript_path(&repo).unwrap(); + fs::remove_dir_all(&repo).unwrap(); + + assert_eq!(index.summary().files, 2); + assert_eq!(index.summary().imports, 1); + assert_eq!(index.summary().import_resolutions, 1); + assert_eq!(index.summary().references, 3); + assert_eq!(index.summary().dependencies, 2); + + let values_file_id = index + .files + .iter() + .find(|file| file.path == "src/values.ts") + .unwrap() + .id; + let imported_symbol_id = index + .symbols + .iter() + .find(|symbol| symbol.file_id == values_file_id && symbol.name == "Imported") + .unwrap() + .id; + + assert_eq!( + index + .references + .iter() + .filter(|reference| reference.target_symbol_id == imported_symbol_id) + .count(), + 3 + ); + } + + #[test] + fn resolves_typescript_type_annotation_references_and_dependencies() { + let repo = temp_repo_path("resolve-typescript-type-annotation-references"); + fs::create_dir_all(repo.join("src")).unwrap(); + fs::write( + repo.join("src/app.tsx"), + "import type { FlightRouterState } from './types';\n\ +import { runtimeValue } from './values';\n\ +\n\ +export function AppRouterAnnouncer({ tree }: { tree: FlightRouterState }) {\n\ + return runtimeValue + tree.length;\n\ +}\n", + ) + .unwrap(); + fs::write( + repo.join("src/types.ts"), + "export interface FlightRouterState { length: number }\n", + ) + .unwrap(); + fs::write( + repo.join("src/values.ts"), + "export const runtimeValue = 1;\n", + ) + .unwrap(); + + let index = index_typescript_path(&repo).unwrap(); + fs::remove_dir_all(&repo).unwrap(); + + assert_eq!(index.summary().files, 3); + assert_eq!(index.summary().imports, 2); + assert_eq!(index.summary().import_resolutions, 2); + assert_eq!(index.summary().references, 2); + assert_eq!(index.summary().dependencies, 2); + + let app_file_id = index + .files + .iter() + .find(|file| file.path == "src/app.tsx") + .unwrap() + .id; + let types_file_id = index + .files + .iter() + .find(|file| file.path == "src/types.ts") + .unwrap() + .id; + let values_file_id = index + .files + .iter() + .find(|file| file.path == "src/values.ts") + .unwrap() + .id; + let announcer_symbol_id = index + .symbols + .iter() + .find(|symbol| symbol.file_id == app_file_id && symbol.name == "AppRouterAnnouncer") + .unwrap() + .id; + let flight_router_state_symbol_id = index + .symbols + .iter() + .find(|symbol| symbol.file_id == types_file_id && symbol.name == "FlightRouterState") + .unwrap() + .id; + let runtime_value_symbol_id = index + .symbols + .iter() + .find(|symbol| symbol.file_id == values_file_id && symbol.name == "runtimeValue") + .unwrap() + .id; + + assert!(index.references.iter().any(|reference| { + reference.source_symbol_id == Some(announcer_symbol_id) + && reference.target_symbol_id == flight_router_state_symbol_id + && reference.name == "FlightRouterState" + })); + assert!(index.dependencies.iter().any(|dependency| { + dependency.source_symbol_id == announcer_symbol_id + && dependency.target_symbol_id == flight_router_state_symbol_id + })); + assert!(index.dependencies.iter().any(|dependency| { + dependency.source_symbol_id == announcer_symbol_id + && dependency.target_symbol_id == runtime_value_symbol_id + })); + } + + #[test] + fn resolves_typescript_nested_local_assignment_dependencies() { + let repo = temp_repo_path("resolve-typescript-nested-local-assignment-dependencies"); + fs::create_dir_all(repo.join("src")).unwrap(); + fs::write( + repo.join("src/app.tsx"), + "import { useEffect, useState } from 'react';\n\ +\n\ +const ANNOUNCER_TYPE = 'next-route-announcer';\n\ +\n\ +function getAnnouncerNode() {\n\ + return document.createElement(ANNOUNCER_TYPE);\n\ +}\n\ +\n\ +export function AppRouterAnnouncer() {\n\ + const [portalNode, setPortalNode] = useState(null);\n\ +\n\ + useEffect(() => {\n\ + const announcer = getAnnouncerNode();\n\ + setPortalNode(announcer);\n\ + return () => {\n\ + const container = document.getElementsByTagName(ANNOUNCER_TYPE)[0];\n\ + if (container?.isConnected) {\n\ + document.body.removeChild(container);\n\ + }\n\ + };\n\ + }, []);\n\ +\n\ + useEffect(() => {\n\ + let currentTitle = '';\n\ + const pageHeader = document.querySelector('h1');\n\ + if (pageHeader) {\n\ + currentTitle = pageHeader.textContent || '';\n\ + }\n\ + if (currentTitle) {\n\ + setPortalNode(currentTitle as unknown as HTMLElement);\n\ + }\n\ + }, []);\n\ +\n\ + return portalNode;\n\ +}\n", + ) + .unwrap(); + + let index = index_typescript_path(&repo).unwrap(); + fs::remove_dir_all(&repo).unwrap(); + + let symbol_id = |name: &str, top_level: Option| { + index + .symbols + .iter() + .find(|symbol| { + symbol.name == name + && top_level.is_none_or(|expected| symbol.is_top_level == expected) + }) + .unwrap_or_else(|| panic!("missing symbol {name}")) + .id + }; + let dependency_exists = |source_symbol_id: u32, target_symbol_id: u32| { + index.dependencies.iter().any(|dependency| { + dependency.source_symbol_id == source_symbol_id + && dependency.target_symbol_id == target_symbol_id + }) + }; + let dependency_exists_by_name = |source_symbol_id: u32, target_name: &str| { + index.dependencies.iter().any(|dependency| { + dependency.source_symbol_id == source_symbol_id + && index.symbols.iter().any(|symbol| { + symbol.id == dependency.target_symbol_id + && !symbol.is_top_level + && symbol.name == target_name + }) + }) + }; + + let app_router_announcer = symbol_id("AppRouterAnnouncer", Some(true)); + let announcer_type = symbol_id("ANNOUNCER_TYPE", Some(true)); + let get_announcer_node = symbol_id("getAnnouncerNode", Some(true)); + let announcer = symbol_id("announcer", Some(false)); + let container = symbol_id("container", Some(false)); + let current_title = symbol_id("currentTitle", Some(false)); + let page_header = symbol_id("pageHeader", Some(false)); + + for local_symbol_id in [announcer, container, current_title, page_header] { + let symbol = index + .symbols + .iter() + .find(|symbol| symbol.id == local_symbol_id) + .unwrap(); + assert_eq!(symbol.parent_symbol_id, Some(app_router_announcer)); + assert!(!symbol.is_top_level); + assert_eq!(symbol.kind, SymbolKind::GlobalVariable); + assert!( + dependency_exists_by_name(app_router_announcer, symbol.name.as_ref()), + "missing AppRouterAnnouncer dependency on {}", + symbol.name + ); + } + + assert!(!dependency_exists(app_router_announcer, announcer_type)); + assert!(!dependency_exists(app_router_announcer, get_announcer_node)); + assert!(dependency_exists(announcer, get_announcer_node)); + assert!(dependency_exists(container, announcer_type)); + } + + #[test] + fn resolves_typescript_tsconfig_path_aliases() { + let repo = temp_repo_path("resolve-typescript-tsconfig-paths"); + fs::create_dir_all(repo.join("src/lib")).unwrap(); + fs::create_dir_all(repo.join("src/lib/special")).unwrap(); + fs::create_dir_all(repo.join("src/special")).unwrap(); + fs::create_dir_all(repo.join("src/components")).unwrap(); + fs::write( + repo.join("tsconfig.json"), + "{\n\ + // JSONC comments and trailing commas are common in tsconfig files.\n\ + \"compilerOptions\": {\n\ + \"baseUrl\": \"src\",\n\ + \"paths\": {\n\ + \"@lib/*\": [\"lib/*\"],\n\ + \"@lib/special/*\": [\"special/*\"],\n\ + \"components\": [\"components/index\"],\n\ + },\n\ + },\n\ +}\n", + ) + .unwrap(); + fs::write( + repo.join("src/app.ts"), + "import { helper } from '@lib/helper';\n\ +import { specialHelper } from '@lib/special/helper';\n\ +import { Button } from 'components';\n\ +import { shared } from 'shared';\n\ +import { Nope } from 'components-extra';\n\ +\n\ +export function run() {\n\ + return helper() + specialHelper() + Button + shared;\n\ +}\n", + ) + .unwrap(); + fs::write( + repo.join("src/lib/helper.ts"), + "export function helper() { return 1; }\n", + ) + .unwrap(); + fs::write( + repo.join("src/lib/special/helper.ts"), + "export function wrongSpecial() { return 0; }\n", + ) + .unwrap(); + fs::write( + repo.join("src/special/helper.ts"), + "export function specialHelper() { return 4; }\n", + ) + .unwrap(); + fs::write( + repo.join("src/components/index.ts"), + "export const Button = 2;\n", + ) + .unwrap(); + fs::write(repo.join("src/shared.ts"), "export const shared = 3;\n").unwrap(); + + let index = index_typescript_path(&repo).unwrap(); + fs::remove_dir_all(&repo).unwrap(); + + assert_eq!(index.summary().files, 6); + assert_eq!(index.summary().imports, 5); + assert_eq!(index.summary().import_resolutions, 4); + assert_eq!(index.summary().references, 4); + assert_eq!(index.summary().dependencies, 4); + + let helper_file_id = index + .files + .iter() + .find(|file| file.path == "src/lib/helper.ts") + .unwrap() + .id; + let special_file_id = index + .files + .iter() + .find(|file| file.path == "src/special/helper.ts") + .unwrap() + .id; + let wrong_special_file_id = index + .files + .iter() + .find(|file| file.path == "src/lib/special/helper.ts") + .unwrap() + .id; + let components_file_id = index + .files + .iter() + .find(|file| file.path == "src/components/index.ts") + .unwrap() + .id; + let shared_file_id = index + .files + .iter() + .find(|file| file.path == "src/shared.ts") + .unwrap() + .id; + let unresolved_import_id = index + .imports + .iter() + .find(|import| import.module.as_deref() == Some("components-extra")) + .unwrap() + .id; + + assert!(index + .import_resolutions + .iter() + .any(|resolution| resolution.target_file_id == helper_file_id)); + assert!(index + .import_resolutions + .iter() + .any(|resolution| resolution.target_file_id == special_file_id)); + assert!(!index + .import_resolutions + .iter() + .any(|resolution| resolution.target_file_id == wrong_special_file_id)); + assert!(index + .import_resolutions + .iter() + .any(|resolution| resolution.target_file_id == components_file_id)); + assert!(index + .import_resolutions + .iter() + .any(|resolution| resolution.target_file_id == shared_file_id)); + assert!(!index + .import_resolutions + .iter() + .any(|resolution| resolution.import_id == unresolved_import_id)); + } + + #[test] + fn resolves_typescript_references_and_dependencies() { + let repo = temp_repo_path("resolve-typescript-references"); + fs::create_dir_all(repo.join("src")).unwrap(); + fs::write( + repo.join("src/app.ts"), + "import Service, { helper as localHelper, format } from './utils';\n\ +import * as utils from './utils';\n\ +\n\ +const sameFile = () => localHelper;\n\ +export function run(value: number) {\n\ + const local = sameFile();\n\ + return format(new Service(), local, utils.helper, value);\n\ +}\n\ +export function shadow(localHelper: number) {\n\ + return localHelper;\n\ +}\n", + ) + .unwrap(); + fs::write( + repo.join("src/utils.ts"), + "export const helper = 1;\nexport function format(...values: unknown[]) { return values; }\nexport default class Service {}\n", + ) + .unwrap(); + + let index = index_typescript_path(&repo).unwrap(); + fs::remove_dir_all(&repo).unwrap(); + + let app_file_id = index + .files + .iter() + .find(|file| file.path == "src/app.ts") + .unwrap() + .id; + let utils_file_id = index + .files + .iter() + .find(|file| file.path == "src/utils.ts") + .unwrap() + .id; + let run_symbol_id = index + .symbols + .iter() + .find(|symbol| symbol.file_id == app_file_id && symbol.name == "run") + .unwrap() + .id; + let shadow_symbol_id = index + .symbols + .iter() + .find(|symbol| symbol.file_id == app_file_id && symbol.name == "shadow") + .unwrap() + .id; + let same_file_symbol_id = index + .symbols + .iter() + .find(|symbol| symbol.file_id == app_file_id && symbol.name == "sameFile") + .unwrap() + .id; + let helper_symbol_id = index + .symbols + .iter() + .find(|symbol| symbol.file_id == utils_file_id && symbol.name == "helper") + .unwrap() + .id; + let format_symbol_id = index + .symbols + .iter() + .find(|symbol| symbol.file_id == utils_file_id && symbol.name == "format") + .unwrap() + .id; + let service_symbol_id = index + .symbols + .iter() + .find(|symbol| symbol.file_id == utils_file_id && symbol.name == "Service") + .unwrap() + .id; + + assert!(index.references.iter().any(|reference| { + reference.source_symbol_id == Some(run_symbol_id) + && reference.target_symbol_id == format_symbol_id + && reference.import_id.is_some() + })); + assert!(index.references.iter().any(|reference| { + reference.source_symbol_id == Some(run_symbol_id) + && reference.target_symbol_id == service_symbol_id + && reference.import_id.is_some() + })); + assert!(index.references.iter().any(|reference| { + reference.source_symbol_id == Some(run_symbol_id) + && reference.target_symbol_id == helper_symbol_id + && reference.import_id.is_some() + && reference.name == "helper" + })); + assert!(index.references.iter().any(|reference| { + reference.source_symbol_id == Some(run_symbol_id) + && reference.target_symbol_id == same_file_symbol_id + && reference.import_id.is_none() + })); + assert!(!index.references.iter().any(|reference| { + reference.source_symbol_id == Some(shadow_symbol_id) + && reference.target_symbol_id == helper_symbol_id + })); + assert!(index.dependencies.iter().any(|dependency| { + dependency.source_symbol_id == run_symbol_id + && dependency.target_symbol_id == format_symbol_id + && dependency.reference_count == 1 + })); + assert!(index.dependencies.iter().any(|dependency| { + dependency.source_symbol_id == run_symbol_id + && dependency.target_symbol_id == helper_symbol_id + && dependency.reference_count == 1 + })); + } + + #[test] + fn resolves_typescript_default_imports_from_export_equals_symbols() { + let repo = temp_repo_path("resolve-typescript-export-equals-default-imports"); + fs::create_dir_all(repo.join("src")).unwrap(); + fs::write( + repo.join("src/legacy.ts"), + "class Legacy {}\nexport = Legacy;\n", + ) + .unwrap(); + fs::write( + repo.join("src/app.ts"), + "import Legacy from './legacy';\n\nexport function run() {\n return Legacy;\n}\n", + ) + .unwrap(); + + let index = index_typescript_path(&repo).unwrap(); + fs::remove_dir_all(&repo).unwrap(); + + let legacy_file_id = index + .files + .iter() + .find(|file| file.path == "src/legacy.ts") + .unwrap() + .id; + let app_file_id = index + .files + .iter() + .find(|file| file.path == "src/app.ts") + .unwrap() + .id; + let legacy_symbol_id = index + .symbols + .iter() + .find(|symbol| symbol.file_id == legacy_file_id && symbol.name == "Legacy") + .unwrap() + .id; + let run_symbol_id = index + .symbols + .iter() + .find(|symbol| symbol.file_id == app_file_id && symbol.name == "run") + .unwrap() + .id; + let import = index + .imports + .iter() + .find(|import| import.alias.as_deref() == Some("Legacy")) + .unwrap(); + + assert!(index.exports.iter().any(|export| { + export.file_id == legacy_file_id + && export.kind == ExportKind::ExportEquals + && export.name.as_deref() == Some("Legacy") + && export.local_name.as_deref() == Some("Legacy") + })); + assert!(index.import_resolutions.iter().any(|resolution| { + resolution.import_id == import.id + && resolution.target_file_id == legacy_file_id + && resolution.target_symbol_id == Some(legacy_symbol_id) + })); + assert!(index.references.iter().any(|reference| { + reference.source_symbol_id == Some(run_symbol_id) + && reference.target_symbol_id == legacy_symbol_id + && reference.import_id == Some(import.id) + && reference.name == "Legacy" + })); + assert!(!index.references.iter().any(|reference| { + reference.source_file_id == legacy_file_id + && reference.source_symbol_id.is_none() + && reference.target_symbol_id == legacy_symbol_id + && reference.name == "Legacy" + })); + assert!(index.dependencies.iter().any(|dependency| { + dependency.source_symbol_id == run_symbol_id + && dependency.target_symbol_id == legacy_symbol_id + && dependency.reference_count == 1 + })); + } + + #[test] + fn preserves_typescript_export_equals_call_records() { + let repo = temp_repo_path("preserve-typescript-export-equals-call-records"); + fs::create_dir_all(repo.join("src")).unwrap(); + fs::write( + repo.join("src/legacy.ts"), + "function makeLegacy() { return class Legacy {}; }\nexport = makeLegacy();\n", + ) + .unwrap(); + + let index = index_typescript_path(&repo).unwrap(); + fs::remove_dir_all(&repo).unwrap(); + + let file_id = index + .files + .iter() + .find(|file| file.path == "src/legacy.ts") + .unwrap() + .id; + let make_legacy_symbol_id = index + .symbols + .iter() + .find(|symbol| symbol.file_id == file_id && symbol.name == "makeLegacy") + .unwrap() + .id; + + assert!(index.function_calls.iter().any(|call| { + call.source_file_id == file_id + && call.source_symbol_id.is_none() + && call.target_symbol_id == Some(make_legacy_symbol_id) + && call.name == "makeLegacy" + })); + } + + #[test] + fn extracts_typescript_function_call_records() { + let repo = temp_repo_path("typescript-function-call-records"); + fs::create_dir_all(repo.join("src")).unwrap(); + fs::write( + repo.join("src/util.ts"), + "export function helper(value: number): number { return value; }\n", + ) + .unwrap(); + fs::write( + repo.join("src/app.ts"), + "import { helper } from './util';\n\n\ +function local(value: number) {\n return helper(value);\n}\n\n\ +export function run() {\n local(helper(1));\n return run();\n}\n", + ) + .unwrap(); + + let index = index_typescript_path(&repo).unwrap(); + fs::remove_dir_all(&repo).unwrap(); + + let helper = index + .symbols + .iter() + .find(|symbol| symbol.name == "helper") + .unwrap(); + let local = index + .symbols + .iter() + .find(|symbol| symbol.name == "local") + .unwrap(); + let run = index + .symbols + .iter() + .find(|symbol| symbol.name == "run") + .unwrap(); + + assert!(index.function_calls.iter().any(|call| { + call.name == "helper" + && call.source_symbol_id == Some(local.id) + && call.target_symbol_id == Some(helper.id) + && call.import_id.is_some() + })); + assert!(index.function_calls.iter().any(|call| { + call.name == "local" + && call.source_symbol_id == Some(run.id) + && call.target_symbol_id == Some(local.id) + && call.import_id.is_none() + })); + assert!(index.function_calls.iter().any(|call| { + call.name == "helper" + && call.source_symbol_id == Some(run.id) + && call.target_symbol_id == Some(helper.id) + && call.import_id.is_some() + })); + assert!(index.function_calls.iter().any(|call| { + call.name == "run" + && call.source_symbol_id == Some(run.id) + && call.target_symbol_id == Some(run.id) + })); + assert!(!index.references.iter().any(|reference| { + reference.source_symbol_id == Some(run.id) + && reference.target_symbol_id == run.id + && reference.name == "run" + })); + } + + #[test] + fn extracts_typescript_promise_chain_records() { + let repo = temp_repo_path("typescript-promise-chain-records"); + fs::create_dir_all(repo.join("src")).unwrap(); + fs::write( + repo.join("src/app.ts"), + "export function run() {\n\ + Promise.resolve(1).then(value => value + 1).catch(error => 0).finally(() => cleanup());\n\ + fetchUser().then(user => user.name);\n\ +}\n", + ) + .unwrap(); + + let index = index_typescript_path(&repo).unwrap(); + fs::remove_dir_all(&repo).unwrap(); + + let run = index + .symbols + .iter() + .find(|symbol| symbol.name == "run") + .unwrap(); + let chains = index + .promise_chains + .iter() + .filter(|chain| chain.source_symbol_id == Some(run.id)) + .collect::>(); + assert_eq!(chains.len(), 2); + + let full_chain = chains + .iter() + .find(|chain| { + chain + .stage_names + .iter() + .any(|stage| stage.as_ref() == "finally") + }) + .unwrap(); + assert_eq!( + full_chain + .stage_names + .iter() + .map(|stage| stage.as_ref()) + .collect::>(), + vec!["then", "catch", "finally"] + ); + assert!(chains.iter().any(|chain| { + chain + .stage_names + .iter() + .map(|stage| stage.as_ref()) + .collect::>() + == vec!["then"] + })); + } + + #[test] + fn resolves_typescript_external_import_references() { + let repo = temp_repo_path("typescript-external-import-references"); + fs::create_dir_all(repo.join("src")).unwrap(); + fs::write( + repo.join("src/app.tsx"), + "import React from 'react';\nexport function run() {\n return React.createElement('div');\n}\n", + ) + .unwrap(); + + let index = index_typescript_path(&repo).unwrap(); + fs::remove_dir_all(&repo).unwrap(); + + assert_eq!(index.summary().files, 1); + assert_eq!(index.summary().imports, 1); + assert_eq!(index.external_modules.len(), 1); + assert_eq!(index.summary().references, 0); + assert_eq!(index.summary().dependencies, 0); + assert_eq!(index.external_references.len(), 1); + + let import = index + .imports + .iter() + .find(|import| import.alias.as_deref() == Some("React")) + .unwrap(); + let run = index + .symbols + .iter() + .find(|symbol| symbol.name == "run") + .unwrap(); + let reference = &index.external_references[0]; + + assert_eq!(reference.source_symbol_id, Some(run.id)); + assert_eq!(reference.import_id, import.id); + assert_eq!(reference.name, "React"); + assert_eq!(reference.range.start_row, 2); + assert_eq!(reference.range.start_column, 9); + } + + #[test] + fn resolves_typescript_heritage_references_and_dependencies() { + let repo = temp_repo_path("resolve-typescript-heritage-references"); + fs::create_dir_all(repo.join("src")).unwrap(); + fs::write( + repo.join("src/app.ts"), + "import { Base, IFace, IExtra } from './base';\n\ +import * as base from './base';\n\ +\n\ +export interface Local extends IFace {}\n\ +export class Child extends Base implements IFace, base.Other {}\n\ +export interface Derived extends Local, IExtra {}\n", + ) + .unwrap(); + fs::write( + repo.join("src/base.ts"), + "export class Base {}\n\ +export interface IFace {}\n\ +export interface IExtra {}\n\ +export interface Other {}\n", + ) + .unwrap(); + + let index = index_typescript_path(&repo).unwrap(); + fs::remove_dir_all(&repo).unwrap(); + + assert_eq!(index.summary().files, 2); + assert_eq!(index.summary().imports, 4); + assert_eq!(index.summary().import_resolutions, 4); + assert_eq!(index.summary().references, 6); + assert_eq!(index.summary().dependencies, 6); + assert_eq!(index.subclass_edges.len(), 6); + + let app_file_id = index + .files + .iter() + .find(|file| file.path == "src/app.ts") + .unwrap() + .id; + let base_file_id = index + .files + .iter() + .find(|file| file.path == "src/base.ts") + .unwrap() + .id; + let local = index + .symbols + .iter() + .find(|symbol| symbol.file_id == app_file_id && symbol.name == "Local") + .unwrap(); + let child = index + .symbols + .iter() + .find(|symbol| symbol.file_id == app_file_id && symbol.name == "Child") + .unwrap(); + let derived = index + .symbols + .iter() + .find(|symbol| symbol.file_id == app_file_id && symbol.name == "Derived") + .unwrap(); + let base = index + .symbols + .iter() + .find(|symbol| symbol.file_id == base_file_id && symbol.name == "Base") + .unwrap(); + let iface = index + .symbols + .iter() + .find(|symbol| symbol.file_id == base_file_id && symbol.name == "IFace") + .unwrap(); + let iextra = index + .symbols + .iter() + .find(|symbol| symbol.file_id == base_file_id && symbol.name == "IExtra") + .unwrap(); + let other = index + .symbols + .iter() + .find(|symbol| symbol.file_id == base_file_id && symbol.name == "Other") + .unwrap(); + + for (source_symbol_id, target_symbol_id, name) in [ + (local.id, iface.id, "IFace"), + (child.id, base.id, "Base"), + (child.id, iface.id, "IFace"), + (child.id, other.id, "Other"), + (derived.id, local.id, "Local"), + (derived.id, iextra.id, "IExtra"), + ] { + assert!(index.references.iter().any(|reference| { + reference.source_symbol_id == Some(source_symbol_id) + && reference.target_symbol_id == target_symbol_id + && reference.name == name + })); + assert!(index.dependencies.iter().any(|dependency| { + dependency.source_symbol_id == source_symbol_id + && dependency.target_symbol_id == target_symbol_id + })); + assert!(index.subclass_edges.iter().any(|edge| { + edge.source_symbol_id == source_symbol_id + && edge.target_symbol_id == target_symbol_id + && index.references.iter().any(|reference| { + reference.id == edge.reference_id + && reference.source_symbol_id == Some(source_symbol_id) + && reference.target_symbol_id == target_symbol_id + && reference.name == name + }) + })); + } + } + + #[test] + fn parses_typescript_angle_bracket_assertions_without_tsx_errors() { + let repo = temp_repo_path("typescript-angle-bracket-assertions"); + fs::create_dir_all(repo.join("src")).unwrap(); + fs::write( + repo.join("src/assertions.ts"), + "// eslint-disable-next-line\nconst myVar = 'test'\n\nexport default myVar\n", + ) + .unwrap(); + + let index = index_typescript_path(&repo).unwrap(); + fs::remove_dir_all(&repo).unwrap(); + + let file = index + .files + .iter() + .find(|file| file.path == "src/assertions.ts") + .unwrap(); + let symbol = index + .symbols + .iter() + .find(|symbol| symbol.file_id == file.id && symbol.name == "myVar") + .unwrap(); + + assert!(!file.has_error); + assert_eq!(index.summary().symbols, 1); + assert!(index.exports.iter().any(|export| { + export.file_id == file.id + && export.kind == ExportKind::Default + && export.name.as_deref() == Some("default") + && export.local_name.as_deref() == Some("myVar") + })); + assert!(index.references.iter().any(|reference| { + reference.source_file_id == file.id + && reference.target_symbol_id == symbol.id + && reference.name == "myVar" + })); + } + + #[test] + fn resolves_internal_python_imports_to_files_and_symbols() { + let repo = temp_repo_path("resolve-python-imports"); + fs::create_dir_all(repo.join("pkg")).unwrap(); + fs::write(repo.join("pkg/__init__.py"), "").unwrap(); + fs::write( + repo.join("pkg/base.py"), + "CONSTANT = 'base'\nclass Base:\n pass\n", + ) + .unwrap(); + fs::write( + repo.join("pkg/service.py"), + "from __future__ import annotations\nfrom .base import Base, CONSTANT\nfrom . import base\nimport pkg.base\nimport os\n\nclass Service(Base):\n pass\n", + ) + .unwrap(); + + let index = index_python_path(&repo).unwrap(); + fs::remove_dir_all(&repo).unwrap(); + + assert_eq!(index.summary().files, 3); + assert_eq!(index.summary().classes, 2); + assert_eq!(index.summary().global_variables, 1); + assert_eq!(index.summary().imports, 6); + assert_eq!(index.summary().import_resolutions, 4); + assert_eq!(index.summary().references, 1); + assert_eq!(index.summary().dependencies, 1); + + let base_file_id = index + .files + .iter() + .find(|file| file.path == "pkg/base.py") + .unwrap() + .id; + let base_symbol_id = index + .symbols + .iter() + .find(|symbol| symbol.file_id == base_file_id && symbol.name == "Base") + .unwrap() + .id; + let constant_symbol_id = index + .symbols + .iter() + .find(|symbol| symbol.file_id == base_file_id && symbol.name == "CONSTANT") + .unwrap() + .id; + assert!(index.import_resolutions.iter().any(|resolution| { + resolution.target_file_id == base_file_id + && resolution.target_symbol_id == Some(base_symbol_id) + })); + assert!(index.import_resolutions.iter().any(|resolution| { + resolution.target_file_id == base_file_id + && resolution.target_symbol_id == Some(constant_symbol_id) + })); + assert_eq!( + index + .import_resolutions + .iter() + .filter(|resolution| resolution.target_file_id == base_file_id) + .count(), + 4 + ); + assert!(index.references.iter().any(|reference| { + reference.name == "Base" + && reference.source_symbol_id.is_some() + && reference.target_symbol_id == base_symbol_id + && reference.import_id.is_some() + })); + assert!(index.dependencies.iter().any(|dependency| { + dependency.target_symbol_id == base_symbol_id + && dependency.reference_count == 1 + && dependency.reference_ids == vec![0] + })); + } + + #[test] + fn extracts_parenthesized_python_from_import_names() { + let repo = temp_repo_path("python-parenthesized-from-imports"); + fs::create_dir_all(repo.join("pkg")).unwrap(); + fs::write(repo.join("pkg/__init__.py"), "").unwrap(); + fs::write( + repo.join("pkg/base.py"), + "CONSTANT = 1\nclass Base:\n pass\n", + ) + .unwrap(); + fs::write( + repo.join("pkg/service.py"), + "from pkg.base import ( # noqa: F401\n Base,\n CONSTANT as RENAMED,\n)\nfrom pkg.base import *\n", + ) + .unwrap(); + + let index = index_python_path(&repo).unwrap(); + fs::remove_dir_all(&repo).unwrap(); + + let service_file_id = index + .files + .iter() + .find(|file| file.path == "pkg/service.py") + .unwrap() + .id; + let service_imports = index + .imports + .iter() + .filter(|import| import.file_id == service_file_id) + .collect::>(); + + assert_eq!(service_imports.len(), 3); + assert!(service_imports.iter().all(|import| { + import + .name + .as_deref() + .map_or(true, |name| !name.contains('(') && !name.contains(')')) + })); + assert!(service_imports.iter().any(|import| { + import.kind == ImportKind::FromImport + && import.module.as_deref() == Some("pkg.base") + && import.name.as_deref() == Some("Base") + && import.alias.is_none() + })); + assert!(service_imports.iter().any(|import| { + import.kind == ImportKind::FromImport + && import.module.as_deref() == Some("pkg.base") + && import.name.as_deref() == Some("CONSTANT") + && import.alias.as_deref() == Some("RENAMED") + })); + assert!(service_imports.iter().any(|import| { + import.kind == ImportKind::FromImport + && import.module.as_deref() == Some("pkg.base") + && import.name.as_deref() == Some("*") + && import.alias.is_none() + })); + } + + #[test] + fn resolves_python_package_reexports_to_symbols() { + let repo = temp_repo_path("resolve-python-reexports"); + fs::create_dir_all(repo.join("pkg")).unwrap(); + fs::write(repo.join("pkg/__init__.py"), "from .base import Base\n").unwrap(); + fs::write(repo.join("pkg/base.py"), "class Base:\n pass\n").unwrap(); + fs::write( + repo.join("pkg/service.py"), + "from pkg import Base\n\nclass Service(Base):\n pass\n", + ) + .unwrap(); + + let index = index_python_path(&repo).unwrap(); + fs::remove_dir_all(&repo).unwrap(); + + let base_file_id = index + .files + .iter() + .find(|file| file.path == "pkg/base.py") + .unwrap() + .id; + let base_symbol_id = index + .symbols + .iter() + .find(|symbol| symbol.file_id == base_file_id && symbol.name == "Base") + .unwrap() + .id; + let service = index + .symbols + .iter() + .find(|symbol| symbol.name == "Service") + .unwrap(); + + assert_eq!( + index + .import_resolutions + .iter() + .filter(|resolution| resolution.target_symbol_id == Some(base_symbol_id)) + .count(), + 2 + ); + assert!(index.references.iter().any(|reference| { + reference.source_symbol_id == Some(service.id) + && reference.name == "Base" + && reference.target_symbol_id == base_symbol_id + && reference.import_id.is_some() + })); + assert!(index.dependencies.iter().any(|dependency| { + dependency.source_symbol_id == service.id + && dependency.target_symbol_id == base_symbol_id + })); + } + + #[test] + fn resolves_python_wildcard_import_chains_to_symbols() { + let repo = temp_repo_path("resolve-python-wildcard-reexports"); + fs::create_dir_all(repo.join("pkg/inner")).unwrap(); + fs::write( + repo.join("pkg/base.py"), + "CONSTANT = 1\nclass Base:\n pass\n\ndef helper():\n return CONSTANT\n", + ) + .unwrap(); + fs::write( + repo.join("pkg/inner/__init__.py"), + "from ..base import *\nINNER = CONSTANT\n", + ) + .unwrap(); + fs::write(repo.join("pkg/__init__.py"), "from .inner import *\n").unwrap(); + fs::write(repo.join("facade.py"), "from pkg import *\n").unwrap(); + fs::write( + repo.join("service.py"), + "from pkg import Base\nfrom facade import *\n\nclass Service(Base):\n def run(self):\n return helper(), CONSTANT\n", + ) + .unwrap(); + + let index = index_python_path(&repo).unwrap(); + fs::remove_dir_all(&repo).unwrap(); + + let base = index + .symbols + .iter() + .find(|symbol| symbol.name == "Base") + .unwrap(); + let constant = index + .symbols + .iter() + .find(|symbol| symbol.name == "CONSTANT") + .unwrap(); + let helper = index + .symbols + .iter() + .find(|symbol| symbol.name == "helper") + .unwrap(); + let inner = index + .symbols + .iter() + .find(|symbol| symbol.name == "INNER") + .unwrap(); + let service = index + .symbols + .iter() + .find(|symbol| symbol.name == "Service") + .unwrap(); + let run = index + .symbols + .iter() + .find(|symbol| symbol.name == "run") + .unwrap(); + + assert!(index + .import_resolutions + .iter() + .any(|resolution| { resolution.target_symbol_id == Some(base.id) })); + assert!(index.references.iter().any(|reference| { + reference.source_symbol_id == Some(inner.id) + && reference.name == "CONSTANT" + && reference.target_symbol_id == constant.id + && reference.import_id.is_some() + })); + assert!(index.references.iter().any(|reference| { + reference.source_symbol_id == Some(service.id) + && reference.name == "Base" + && reference.target_symbol_id == base.id + && reference.import_id.is_some() + })); + assert!(index.references.iter().any(|reference| { + reference.source_symbol_id == Some(run.id) + && reference.name == "helper" + && reference.target_symbol_id == helper.id + && reference.import_id.is_some() + })); + assert!(index.references.iter().any(|reference| { + reference.source_symbol_id == Some(run.id) + && reference.name == "CONSTANT" + && reference.target_symbol_id == constant.id + && reference.import_id.is_some() + })); + assert!(index.dependencies.iter().any(|dependency| { + dependency.source_symbol_id == service.id && dependency.target_symbol_id == base.id + })); + assert!(index.dependencies.iter().any(|dependency| { + dependency.source_symbol_id == run.id && dependency.target_symbol_id == helper.id + })); + assert!(index.dependencies.iter().any(|dependency| { + dependency.source_symbol_id == run.id && dependency.target_symbol_id == constant.id + })); + } + + #[test] + fn restricts_python_wildcard_imports_with_static_all_exports() { + let repo = temp_repo_path("resolve-python-wildcard-all-exports"); + fs::create_dir_all(&repo).unwrap(); + fs::write( + repo.join("provider.py"), + "__all__ = ['Public']\nclass Public:\n pass\nclass Hidden:\n pass\n", + ) + .unwrap(); + fs::write( + repo.join("wildcard_consumer.py"), + "from provider import *\n\nclass UsesPublic(Public):\n pass\n\ndef unresolved():\n return Hidden\n", + ) + .unwrap(); + fs::write( + repo.join("named_consumer.py"), + "from provider import Hidden\n\nclass UsesHidden(Hidden):\n pass\n", + ) + .unwrap(); + + let index = index_python_path(&repo).unwrap(); + fs::remove_dir_all(&repo).unwrap(); + + let public = index + .symbols + .iter() + .find(|symbol| symbol.name == "Public") + .unwrap(); + let hidden = index + .symbols + .iter() + .find(|symbol| symbol.name == "Hidden") + .unwrap(); + let uses_public = index + .symbols + .iter() + .find(|symbol| symbol.name == "UsesPublic") + .unwrap(); + let uses_hidden = index + .symbols + .iter() + .find(|symbol| symbol.name == "UsesHidden") + .unwrap(); + let unresolved = index + .symbols + .iter() + .find(|symbol| symbol.name == "unresolved") + .unwrap(); + + assert!(index.references.iter().any(|reference| { + reference.source_symbol_id == Some(uses_public.id) + && reference.name == "Public" + && reference.target_symbol_id == public.id + && reference.import_id.is_some() + })); + assert!(index.references.iter().any(|reference| { + reference.source_symbol_id == Some(uses_hidden.id) + && reference.name == "Hidden" + && reference.target_symbol_id == hidden.id + && reference.import_id.is_some() + })); + assert!(!index.references.iter().any(|reference| { + reference.source_symbol_id == Some(unresolved.id) + && reference.name == "Hidden" + && reference.target_symbol_id == hidden.id + })); + } + + #[test] + fn attributes_references_to_innermost_python_symbol() { + let repo = temp_repo_path("nested-python-reference-sources"); + fs::create_dir_all(repo.join("pkg")).unwrap(); + fs::write(repo.join("pkg/__init__.py"), "").unwrap(); + fs::write( + repo.join("pkg/base.py"), + "class Base:\n pass\n\ndef helper():\n return Base\n", + ) + .unwrap(); + fs::write( + repo.join("pkg/service.py"), + "from .base import Base, helper\n\nclass Service(Base):\n def run(self):\n return helper()\n", + ) + .unwrap(); + + let index = index_python_path(&repo).unwrap(); + fs::remove_dir_all(&repo).unwrap(); + + let service = index + .symbols + .iter() + .find(|symbol| symbol.name == "Service") + .unwrap(); + let run = index + .symbols + .iter() + .find(|symbol| symbol.name == "run") + .unwrap(); + let helper = index + .symbols + .iter() + .find(|symbol| symbol.name == "helper") + .unwrap(); + let base = index + .symbols + .iter() + .find(|symbol| symbol.name == "Base") + .unwrap(); + + assert!(service.is_top_level); + assert!(!run.is_top_level); + assert_eq!(run.parent_symbol_id, Some(service.id)); + assert!(index.references.iter().any(|reference| { + reference.name == "Base" + && reference.source_symbol_id == Some(service.id) + && reference.target_symbol_id == base.id + })); + assert!(index.references.iter().any(|reference| { + reference.name == "helper" + && reference.source_symbol_id == Some(run.id) + && reference.target_symbol_id == helper.id + })); + assert!(index.dependencies.iter().any(|dependency| { + dependency.source_symbol_id == run.id && dependency.target_symbol_id == helper.id + })); + } + + #[test] + fn resolves_python_module_attribute_references() { + let repo = temp_repo_path("python-module-attribute-references"); + fs::create_dir_all(repo.join("pkg")).unwrap(); + fs::write(repo.join("pkg/__init__.py"), "").unwrap(); + fs::write( + repo.join("pkg/base.py"), + "class Base:\n pass\n\ndef helper():\n return Base\n", + ) + .unwrap(); + fs::write( + repo.join("pkg/service.py"), + "from . import base\nimport pkg.base as base_alias\nimport pkg.base\n\n\ +def caller():\n return base.helper(), base_alias.Base, pkg.base.helper()\n", + ) + .unwrap(); + + let index = index_python_path(&repo).unwrap(); + fs::remove_dir_all(&repo).unwrap(); + + let caller = index + .symbols + .iter() + .find(|symbol| symbol.name == "caller") + .unwrap(); + let helper = index + .symbols + .iter() + .find(|symbol| symbol.name == "helper") + .unwrap(); + let base = index + .symbols + .iter() + .find(|symbol| symbol.name == "Base") + .unwrap(); + + assert_eq!( + index + .references + .iter() + .filter(|reference| { + reference.source_symbol_id == Some(caller.id) + && reference.name == "helper" + && reference.target_symbol_id == helper.id + && reference.import_id.is_some() + }) + .count(), + 2 + ); + assert!(index.references.iter().any(|reference| { + reference.source_symbol_id == Some(caller.id) + && reference.name == "Base" + && reference.target_symbol_id == base.id + && reference.import_id.is_some() + })); + assert!(index.dependencies.iter().any(|dependency| { + dependency.source_symbol_id == caller.id + && dependency.target_symbol_id == helper.id + && dependency.reference_count == 2 + })); + assert!(index.dependencies.iter().any(|dependency| { + dependency.source_symbol_id == caller.id + && dependency.target_symbol_id == base.id + && dependency.reference_count == 1 + })); + } + + #[test] + fn resolves_python_nested_module_attribute_references() { + let repo = temp_repo_path("python-nested-module-attribute-references"); + fs::create_dir_all(repo.join("a/b")).unwrap(); + fs::write(repo.join("a/b/c.py"), "def d():\n pass\n").unwrap(); + fs::write( + repo.join("consumer.py"), + "from a import b\nimport a.b\nimport a.b.c as c_alias\n\n\ +def caller():\n return b.c.d(), a.b.c.d(), c_alias.d()\n", + ) + .unwrap(); + + let index = index_python_path(&repo).unwrap(); + fs::remove_dir_all(&repo).unwrap(); + + let caller = index + .symbols + .iter() + .find(|symbol| symbol.name == "caller") + .unwrap(); + let d = index + .symbols + .iter() + .find(|symbol| symbol.name == "d") + .unwrap(); + + assert_eq!( + index + .references + .iter() + .filter(|reference| { + reference.source_symbol_id == Some(caller.id) + && reference.name == "d" + && reference.target_symbol_id == d.id + && reference.import_id.is_some() + }) + .count(), + 3 + ); + assert!(index.dependencies.iter().any(|dependency| { + dependency.source_symbol_id == caller.id + && dependency.target_symbol_id == d.id + && dependency.reference_count == 3 + })); + } + + #[test] + fn skips_references_shadowed_by_python_parameters_and_locals() { + let repo = temp_repo_path("python-shadowed-reference-sources"); + fs::create_dir_all(repo.join("pkg")).unwrap(); + fs::write(repo.join("pkg/__init__.py"), "").unwrap(); + fs::write( + repo.join("pkg/base.py"), + "class Base:\n pass\n\nclass Point:\n pass\n\ndef helper():\n return Base\n", + ) + .unwrap(); + fs::write( + repo.join("pkg/service.py"), + "from .base import Base, helper, Point\n\n\ +other = object()\n\n\ +Error = Exception\n\n\ +def shadowed(Base):\n helper = Base\n return helper, Base\n\n\ +def import_shadowed():\n import other.module\n import other.module as helper\n from other import Base\n return helper, Base, other\n\n\ +def control_flow_shadowed(items, manager):\n for Base, helper in items:\n pass\n with manager as other:\n pass\n try:\n pass\n except Error as helper:\n return Base, helper, other\n\n\ +def comprehension_shadowed(items):\n return [Base + helper + other for Base, helper, other in items if Base]\n\n\ +def comprehension_scope_does_not_leak(items):\n values = [Base + helper for Base, helper in items]\n return Base, helper, other\n\n\ +def match_shadowed(subject):\n match subject:\n case Point(x=Base, y=helper) as other if Base:\n return Base, helper, other\n case {\"base\": Base, \"helper\": helper, **other}:\n return Base, helper, other\n\n\ +def lambda_shadowed():\n return (lambda Base, helper, *other: (Base, helper, other))\n\n\ +def lambda_default_ref():\n return (lambda local=Base: local)\n\n\ +def nonlocal_declared():\n helper = Base\n def inner():\n nonlocal helper\n helper = Base\n return helper\n return inner\n\n\ +def global_declared():\n global other\n other = Base\n return other\n\n\ +def attribute_names_are_not_bare_references(obj):\n return obj.helper, other.helper, helper.attr\n\n\ +def caller():\n return helper()\n", + ) + .unwrap(); + + let index = index_python_path(&repo).unwrap(); + fs::remove_dir_all(&repo).unwrap(); + + let shadowed = index + .symbols + .iter() + .find(|symbol| symbol.name == "shadowed") + .unwrap(); + let caller = index + .symbols + .iter() + .find(|symbol| symbol.name == "caller") + .unwrap(); + let import_shadowed = index + .symbols + .iter() + .find(|symbol| symbol.name == "import_shadowed") + .unwrap(); + let control_flow_shadowed = index + .symbols + .iter() + .find(|symbol| symbol.name == "control_flow_shadowed") + .unwrap(); + let comprehension_shadowed = index + .symbols + .iter() + .find(|symbol| symbol.name == "comprehension_shadowed") + .unwrap(); + let comprehension_scope_does_not_leak = index + .symbols + .iter() + .find(|symbol| symbol.name == "comprehension_scope_does_not_leak") + .unwrap(); + let match_shadowed = index + .symbols + .iter() + .find(|symbol| symbol.name == "match_shadowed") + .unwrap(); + let lambda_shadowed = index + .symbols + .iter() + .find(|symbol| symbol.name == "lambda_shadowed") + .unwrap(); + let lambda_default_ref = index + .symbols + .iter() + .find(|symbol| symbol.name == "lambda_default_ref") + .unwrap(); + let nonlocal_declared_inner = index + .symbols + .iter() + .find(|symbol| symbol.name == "inner") + .unwrap(); + let global_declared = index + .symbols + .iter() + .find(|symbol| symbol.name == "global_declared") + .unwrap(); + let attribute_names = index + .symbols + .iter() + .find(|symbol| symbol.name == "attribute_names_are_not_bare_references") + .unwrap(); + let helper = index + .symbols + .iter() + .find(|symbol| symbol.name == "helper") + .unwrap(); + let other = index + .symbols + .iter() + .find(|symbol| symbol.name == "other") + .unwrap(); + let base = index + .symbols + .iter() + .find(|symbol| symbol.name == "Base") + .unwrap(); + let error = index + .symbols + .iter() + .find(|symbol| symbol.name == "Error") + .unwrap(); + let point = index + .symbols + .iter() + .find(|symbol| symbol.name == "Point") + .unwrap(); + + assert!(!index.references.iter().any(|reference| { + reference.source_symbol_id == Some(shadowed.id) + && (reference.target_symbol_id == base.id + || reference.target_symbol_id == helper.id) + })); + assert!(!index.references.iter().any(|reference| { + reference.source_symbol_id == Some(import_shadowed.id) + && (reference.target_symbol_id == base.id + || reference.target_symbol_id == helper.id + || reference.target_symbol_id == other.id) + })); + assert!(!index.references.iter().any(|reference| { + reference.source_symbol_id == Some(control_flow_shadowed.id) + && (reference.target_symbol_id == base.id + || reference.target_symbol_id == helper.id + || reference.target_symbol_id == other.id) + })); + assert!(index.references.iter().any(|reference| { + reference.source_symbol_id == Some(control_flow_shadowed.id) + && reference.name == "Error" + && reference.target_symbol_id == error.id + })); + assert!(!index.references.iter().any(|reference| { + reference.source_symbol_id == Some(comprehension_shadowed.id) + && (reference.target_symbol_id == base.id + || reference.target_symbol_id == helper.id + || reference.target_symbol_id == other.id) + })); + for (name, target_symbol_id) in [ + ("Base", base.id), + ("helper", helper.id), + ("other", other.id), + ] { + assert_eq!( + index + .references + .iter() + .filter(|reference| { + reference.source_symbol_id == Some(comprehension_scope_does_not_leak.id) + && reference.name == name + && reference.target_symbol_id == target_symbol_id + }) + .count(), + 1 + ); + } + assert!(!index.references.iter().any(|reference| { + reference.source_symbol_id == Some(match_shadowed.id) + && (reference.target_symbol_id == base.id + || reference.target_symbol_id == helper.id + || reference.target_symbol_id == other.id) + })); + assert!(index.references.iter().any(|reference| { + reference.source_symbol_id == Some(match_shadowed.id) + && reference.name == "Point" + && reference.target_symbol_id == point.id + })); + assert!(!index.references.iter().any(|reference| { + reference.source_symbol_id == Some(lambda_shadowed.id) + && (reference.target_symbol_id == base.id + || reference.target_symbol_id == helper.id + || reference.target_symbol_id == other.id) + })); + assert!(index.references.iter().any(|reference| { + reference.source_symbol_id == Some(lambda_default_ref.id) + && reference.name == "Base" + && reference.target_symbol_id == base.id + })); + assert!(!index.references.iter().any(|reference| { + reference.source_symbol_id == Some(nonlocal_declared_inner.id) + && reference.name == "helper" + && reference.target_symbol_id == helper.id + })); + assert!(index.references.iter().any(|reference| { + reference.source_symbol_id == Some(nonlocal_declared_inner.id) + && reference.name == "Base" + && reference.target_symbol_id == base.id + })); + assert_eq!( + index + .references + .iter() + .filter(|reference| { + reference.source_symbol_id == Some(global_declared.id) + && reference.name == "other" + && reference.target_symbol_id == other.id + }) + .count(), + 2 + ); + assert!(index.references.iter().any(|reference| { + reference.source_symbol_id == Some(global_declared.id) + && reference.name == "Base" + && reference.target_symbol_id == base.id + })); + assert_eq!( + index + .references + .iter() + .filter(|reference| { + reference.source_symbol_id == Some(attribute_names.id) + && reference.name == "helper" + && reference.target_symbol_id == helper.id + }) + .count(), + 1 + ); + assert!(index.references.iter().any(|reference| { + reference.source_symbol_id == Some(attribute_names.id) + && reference.name == "other" + && reference.target_symbol_id == other.id + })); + assert!(index.references.iter().any(|reference| { + reference.source_symbol_id == Some(caller.id) + && reference.name == "helper" + && reference.target_symbol_id == helper.id + })); + } + + #[test] + fn compact_python_graph_snapshot_is_stable() { + let repo = temp_repo_path("compact-python-graph-snapshot"); + fs::create_dir_all(repo.join("pkg")).unwrap(); + fs::write(repo.join("pkg/__init__.py"), "").unwrap(); + fs::write( + repo.join("pkg/base.py"), + "CONSTANT = 'base'\nclass Base:\n pass\n", + ) + .unwrap(); + fs::write( + repo.join("pkg/service.py"), + "from .base import Base, CONSTANT\nfrom . import base\nimport pkg.base\nimport os\n\nclass Service(Base):\n pass\n", + ) + .unwrap(); + + let index = index_python_path(&repo).unwrap(); + fs::remove_dir_all(&repo).unwrap(); + + let files = index + .files + .iter() + .map(|file| { + serde_json::json!({ + "id": file.id, + "path": file.path, + "module_name": file.module_name, + "language": file.language, + "content_hash": file.content_hash, + }) + }) + .collect::>(); + let symbols = index + .symbols + .iter() + .map(|symbol| { + serde_json::json!({ + "id": symbol.id, + "file_id": symbol.file_id, + "parent_symbol_id": symbol.parent_symbol_id, + "is_top_level": symbol.is_top_level, + "name": symbol.name, + "kind": symbol.kind, + }) + }) + .collect::>(); + let imports = index + .imports + .iter() + .map(|import| { + serde_json::json!({ + "id": import.id, + "file_id": import.file_id, + "kind": import.kind, + "module": import.module, + "name": import.name, + "alias": import.alias, + }) + }) + .collect::>(); + let references = index + .references + .iter() + .map(|reference| { + serde_json::json!({ + "id": reference.id, + "source_file_id": reference.source_file_id, + "source_symbol_id": reference.source_symbol_id, + "target_symbol_id": reference.target_symbol_id, + "import_id": reference.import_id, + "name": reference.name, + }) + }) + .collect::>(); + let dependencies = index + .dependencies + .iter() + .map(|dependency| { + serde_json::json!({ + "id": dependency.id, + "source_symbol_id": dependency.source_symbol_id, + "target_symbol_id": dependency.target_symbol_id, + "source_file_id": dependency.source_file_id, + "target_file_id": dependency.target_file_id, + "reference_ids": dependency.reference_ids, + "reference_count": dependency.reference_count, + }) + }) + .collect::>(); + + assert_eq!( + serde_json::json!({ + "files": files, + "symbols": symbols, + "imports": imports, + "import_resolutions": index.import_resolutions, + "references": references, + "dependencies": dependencies, + }), + serde_json::json!({ + "files": [ + {"id": 0, "path": "pkg/__init__.py", "module_name": "pkg", "language": "python", "content_hash": "cbf29ce484222325"}, + {"id": 1, "path": "pkg/base.py", "module_name": "pkg.base", "language": "python", "content_hash": "aba9f9794b1c932b"}, + {"id": 2, "path": "pkg/service.py", "module_name": "pkg.service", "language": "python", "content_hash": "aeab60e038068a85"} + ], + "symbols": [ + {"id": 0, "file_id": 1, "parent_symbol_id": null, "is_top_level": true, "name": "CONSTANT", "kind": "global_variable"}, + {"id": 1, "file_id": 1, "parent_symbol_id": null, "is_top_level": true, "name": "Base", "kind": "class"}, + {"id": 2, "file_id": 2, "parent_symbol_id": null, "is_top_level": true, "name": "Service", "kind": "class"} + ], + "imports": [ + {"id": 0, "file_id": 2, "kind": "from_import", "module": ".base", "name": "Base", "alias": null}, + {"id": 1, "file_id": 2, "kind": "from_import", "module": ".base", "name": "CONSTANT", "alias": null}, + {"id": 2, "file_id": 2, "kind": "from_import", "module": ".", "name": "base", "alias": null}, + {"id": 3, "file_id": 2, "kind": "import", "module": null, "name": "pkg.base", "alias": null}, + {"id": 4, "file_id": 2, "kind": "import", "module": null, "name": "os", "alias": null} + ], + "import_resolutions": [ + {"id": 0, "import_id": 0, "source_file_id": 2, "target_file_id": 1, "target_symbol_id": 1}, + {"id": 1, "import_id": 1, "source_file_id": 2, "target_file_id": 1, "target_symbol_id": 0}, + {"id": 2, "import_id": 2, "source_file_id": 2, "target_file_id": 1, "target_symbol_id": null}, + {"id": 3, "import_id": 3, "source_file_id": 2, "target_file_id": 1, "target_symbol_id": null} + ], + "references": [ + {"id": 0, "source_file_id": 2, "source_symbol_id": 2, "target_symbol_id": 1, "import_id": 0, "name": "Base"} + ], + "dependencies": [ + {"id": 0, "source_symbol_id": 2, "target_symbol_id": 1, "source_file_id": 2, "target_file_id": 1, "reference_ids": [0], "reference_count": 1} + ] + }) + ); + } + + #[test] + fn compact_typescript_syntax_snapshot_is_stable() { + let repo = temp_repo_path("compact-typescript-syntax-snapshot"); + fs::create_dir_all(repo.join("src")).unwrap(); + fs::write( + repo.join("src/app.tsx"), + "import React, { useMemo as memo, ReactNode } from 'react';\n\ +import * as path from 'path';\n\ +import './polyfill';\n\ +\n\ +const lazy = require('./lazy');\n\ +const dynamicModule = import('./dynamic');\n\ +\n\ +export interface Props { title: string; child?: ReactNode }\n\ +export type Mode = 'light' | 'dark';\n\ +export enum Status { Ready = 'ready' }\n\ +export namespace Tokens { export const spacing = 8; }\n\ +export const helper = (value: number) => value + 1;\n\ +export function Page(props: Props) { return
{props.title}
; }\n\ +export default class Widget {}\n\ +export { helper as renamedHelper, Props };\n\ +export * from './shared';\n\ +export * as shared from './shared';\n", + ) + .unwrap(); + fs::write( + repo.join("src/shared.ts"), + "export const sharedValue = 1;\nexport function sharedFn() { return sharedValue; }\n", + ) + .unwrap(); + + let index = index_typescript_path(&repo).unwrap(); + let actual = compact_typescript_snapshot_json(&index); + let expected_path = Path::new(env!("CARGO_MANIFEST_DIR")) + .join("../../rust-rewrite/golden/typescript-fixture-rust-compact.json"); + if std::env::var_os("GRAPH_SITTER_UPDATE_TYPESCRIPT_FIXTURE_SNAPSHOT").is_some() { + fs::write( + &expected_path, + serde_json::to_string_pretty(&actual).unwrap() + "\n", + ) + .unwrap(); + } else { + let expected: serde_json::Value = + serde_json::from_str(&fs::read_to_string(&expected_path).unwrap()).unwrap(); + assert_eq!(actual, expected); + } + fs::remove_dir_all(&repo).unwrap(); + } + + fn compact_typescript_snapshot_json(index: &TypeScriptIndex) -> serde_json::Value { + let files = index + .files + .iter() + .map(|file| { + serde_json::json!({ + "id": file.id, + "path": file.path, + "language": file.language, + "content_hash": file.content_hash, + "byte_len": file.byte_len, + "line_count": file.line_count, + "has_error": file.has_error, + "root_range": compact_range_json(file.root_range), + }) + }) + .collect::>(); + let symbols = index + .symbols + .iter() + .map(|symbol| { + serde_json::json!({ + "id": symbol.id, + "file_id": symbol.file_id, + "parent_symbol_id": symbol.parent_symbol_id, + "is_top_level": symbol.is_top_level, + "name": symbol.name, + "kind": symbol.kind, + "range": compact_range_json(symbol.range), + "name_range": compact_range_json(symbol.name_range), + }) + }) + .collect::>(); + let imports = index + .imports + .iter() + .map(|import| { + serde_json::json!({ + "id": import.id, + "file_id": import.file_id, + "kind": import.kind, + "module": import.module, + "name": import.name, + "alias": import.alias, + "range": compact_range_json(import.range), + }) + }) + .collect::>(); + let import_resolutions = index + .import_resolutions + .iter() + .map(|resolution| { + serde_json::json!({ + "id": resolution.id, + "import_id": resolution.import_id, + "source_file_id": resolution.source_file_id, + "target_file_id": resolution.target_file_id, + "target_symbol_id": resolution.target_symbol_id, + }) + }) + .collect::>(); + let exports = index + .exports + .iter() + .map(|export| { + serde_json::json!({ + "id": export.id, + "file_id": export.file_id, + "kind": export.kind, + "name": export.name, + "local_name": export.local_name, + "source_module": export.source_module, + "symbol_id": export.symbol_id, + "import_id": export.import_id, + "range": compact_range_json(export.range), + }) + }) + .collect::>(); + let references = index + .references + .iter() + .map(|reference| { + serde_json::json!({ + "id": reference.id, + "source_file_id": reference.source_file_id, + "source_symbol_id": reference.source_symbol_id, + "target_symbol_id": reference.target_symbol_id, + "import_id": reference.import_id, + "name": reference.name, + "range": compact_range_json(reference.range), + }) + }) + .collect::>(); + let dependencies = index + .dependencies + .iter() + .map(|dependency| { + serde_json::json!({ + "id": dependency.id, + "source_symbol_id": dependency.source_symbol_id, + "target_symbol_id": dependency.target_symbol_id, + "source_file_id": dependency.source_file_id, + "target_file_id": dependency.target_file_id, + "reference_ids": dependency.reference_ids, + "reference_count": dependency.reference_count, + }) + }) + .collect::>(); + let subclass_edges = index + .subclass_edges + .iter() + .map(|edge| { + serde_json::json!({ + "id": edge.id, + "source_symbol_id": edge.source_symbol_id, + "target_symbol_id": edge.target_symbol_id, + "source_file_id": edge.source_file_id, + "target_file_id": edge.target_file_id, + "reference_id": edge.reference_id, + }) + }) + .collect::>(); + + serde_json::json!({ + "summary": { + "files": index.summary().files, + "symbols": index.summary().symbols, + "classes": index.summary().classes, + "functions": index.summary().functions, + "global_variables": index.summary().global_variables, + "imports": index.summary().imports, + "import_resolutions": index.summary().import_resolutions, + "exports": index.exports.len(), + "references": index.summary().references, + "dependencies": index.summary().dependencies, + "bytes": index.summary().bytes, + "lines": index.summary().lines, + "files_with_errors": index.summary().files_with_errors, + }, + "files": files, + "symbols": symbols, + "imports": imports, + "import_resolutions": import_resolutions, + "exports": exports, + "references": references, + "dependencies": dependencies, + "subclass_edges": subclass_edges, + }) + } + + fn compact_range_json(range: SourceRange) -> serde_json::Value { + serde_json::json!([ + range.start_byte, + range.end_byte, + range.start_row, + range.start_column, + range.end_row, + range.end_column + ]) + } + + fn temp_repo_path(prefix: &str) -> PathBuf { + let nanos = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_nanos(); + std::env::temp_dir().join(format!("graph-sitter-{prefix}-{nanos}")) + } +} diff --git a/crates/graph-sitter-py/Cargo.toml b/crates/graph-sitter-py/Cargo.toml new file mode 100644 index 000000000..e3239d213 --- /dev/null +++ b/crates/graph-sitter-py/Cargo.toml @@ -0,0 +1,22 @@ +[package] +name = "graph-sitter-py" +description = "PyO3 binding placeholder for graph-sitter-engine" +version.workspace = true +edition.workspace = true +license.workspace = true +repository.workspace = true + +[lib] +name = "graph_sitter_py" +path = "src/lib.rs" +crate-type = ["cdylib", "rlib"] + +[features] +default = [] +pyo3-bindings = ["dep:pyo3"] +extension-module = ["pyo3-bindings", "pyo3/extension-module"] + +[dependencies] +graph-sitter-engine.workspace = true +pyo3 = { workspace = true, optional = true } +serde_json.workspace = true diff --git a/crates/graph-sitter-py/src/lib.rs b/crates/graph-sitter-py/src/lib.rs new file mode 100644 index 000000000..2e1f2a4c4 --- /dev/null +++ b/crates/graph-sitter-py/src/lib.rs @@ -0,0 +1,1944 @@ +#![cfg_attr(not(feature = "pyo3-bindings"), forbid(unsafe_code))] + +pub fn engine_version() -> &'static str { + graph_sitter_engine::engine_version() +} + +pub fn enabled_features() -> &'static [&'static str] { + graph_sitter_engine::debug_info().enabled_features() +} + +#[cfg(feature = "pyo3-bindings")] +mod bindings { + use graph_sitter_engine::{ + self, Engine, EngineInfo, IndexSummary, PythonIndex, SymbolKind, TypeScriptIndex, + }; + use pyo3::exceptions::{PyRuntimeError, PyValueError}; + use pyo3::prelude::*; + use std::path::Path; + + #[pyclass(name = "EngineInfo", module = "graph_sitter_py")] + #[derive(Debug, Clone, PartialEq, Eq)] + pub struct PyEngineInfo { + version: String, + enabled_features: Vec, + } + + impl From for PyEngineInfo { + fn from(info: EngineInfo) -> Self { + Self { + version: info.version().to_owned(), + enabled_features: info + .enabled_features() + .iter() + .map(|feature| (*feature).to_owned()) + .collect(), + } + } + } + + #[pymethods] + impl PyEngineInfo { + #[getter] + fn version(&self) -> &str { + &self.version + } + + #[getter] + fn enabled_features(&self) -> Vec { + self.enabled_features.clone() + } + + fn __repr__(&self) -> String { + format!( + "EngineInfo(version={:?}, enabled_features={:?})", + self.version, self.enabled_features + ) + } + } + + #[pyclass(name = "IndexSummary", module = "graph_sitter_py")] + #[derive(Debug, Clone, PartialEq, Eq)] + pub struct PyIndexSummary { + #[pyo3(get)] + files: usize, + #[pyo3(get)] + symbols: usize, + #[pyo3(get)] + classes: usize, + #[pyo3(get)] + functions: usize, + #[pyo3(get)] + global_variables: usize, + #[pyo3(get)] + imports: usize, + #[pyo3(get)] + import_resolutions: usize, + #[pyo3(get)] + external_modules: usize, + #[pyo3(get)] + exports: usize, + #[pyo3(get)] + references: usize, + #[pyo3(get)] + external_references: usize, + #[pyo3(get)] + dependencies: usize, + #[pyo3(get)] + subclass_edges: usize, + #[pyo3(get)] + bytes: usize, + #[pyo3(get)] + lines: usize, + #[pyo3(get)] + files_with_errors: usize, + } + + impl From for PyIndexSummary { + fn from(summary: IndexSummary) -> Self { + Self { + files: summary.files, + symbols: summary.symbols, + classes: summary.classes, + functions: summary.functions, + global_variables: summary.global_variables, + imports: summary.imports, + import_resolutions: summary.import_resolutions, + external_modules: summary.external_modules, + exports: summary.exports, + references: summary.references, + external_references: summary.external_references, + dependencies: summary.dependencies, + subclass_edges: summary.subclass_edges, + bytes: summary.bytes, + lines: summary.lines, + files_with_errors: summary.files_with_errors, + } + } + } + + #[pymethods] + impl PyIndexSummary { + fn as_dict(&self) -> std::collections::BTreeMap<&'static str, usize> { + std::collections::BTreeMap::from([ + ("files", self.files), + ("symbols", self.symbols), + ("classes", self.classes), + ("functions", self.functions), + ("global_variables", self.global_variables), + ("imports", self.imports), + ("import_resolutions", self.import_resolutions), + ("external_modules", self.external_modules), + ("exports", self.exports), + ("references", self.references), + ("external_references", self.external_references), + ("dependencies", self.dependencies), + ("subclass_edges", self.subclass_edges), + ("bytes", self.bytes), + ("lines", self.lines), + ("files_with_errors", self.files_with_errors), + ]) + } + + fn __repr__(&self) -> String { + format!( + "IndexSummary(files={}, symbols={}, classes={}, functions={}, global_variables={}, imports={}, import_resolutions={}, external_modules={}, exports={}, references={}, external_references={}, dependencies={}, subclass_edges={}, bytes={}, lines={}, files_with_errors={})", + self.files, + self.symbols, + self.classes, + self.functions, + self.global_variables, + self.imports, + self.import_resolutions, + self.external_modules, + self.exports, + self.references, + self.external_references, + self.dependencies, + self.subclass_edges, + self.bytes, + self.lines, + self.files_with_errors + ) + } + } + + #[pyclass(name = "PythonIndex", module = "graph_sitter_py")] + #[derive(Debug, Clone, PartialEq, Eq)] + pub struct PyPythonIndex { + inner: PythonIndex, + } + + impl From for PyPythonIndex { + fn from(inner: PythonIndex) -> Self { + Self { inner } + } + } + + #[pymethods] + impl PyPythonIndex { + fn summary(&self) -> PyIndexSummary { + self.inner.summary().into() + } + + fn to_json(&self) -> PyResult { + serde_json::to_string(&self.inner) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn debug_graph_json(&self) -> PyResult { + self.inner + .debug_graph_json() + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn files_json(&self) -> PyResult { + serde_json::to_string(&self.inner.files) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn symbols_json(&self) -> PyResult { + serde_json::to_string(&self.inner.symbols) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn imports_json(&self) -> PyResult { + serde_json::to_string(&self.inner.imports) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn import_resolutions_json(&self) -> PyResult { + serde_json::to_string(&self.inner.import_resolutions) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn external_modules_json(&self) -> PyResult { + serde_json::to_string(&self.inner.external_modules) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn references_json(&self) -> PyResult { + serde_json::to_string(&self.inner.references) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn external_references_json(&self) -> PyResult { + serde_json::to_string(&self.inner.external_references) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn dependencies_json(&self) -> PyResult { + serde_json::to_string(&self.inner.dependencies) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn file_by_id_json(&self, file_id: u32) -> PyResult { + serde_json::to_string(&self.inner.files.iter().find(|file| file.id == file_id)) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn file_by_path_json(&self, path: &str) -> PyResult { + serde_json::to_string( + &self + .inner + .files + .iter() + .find(|file| file.path.as_ref() == path), + ) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn file_by_path_ignore_case_json(&self, path: &str) -> PyResult { + let normalized = path.to_lowercase(); + serde_json::to_string( + &self + .inner + .files + .iter() + .find(|file| file.path.as_ref().to_lowercase() == normalized), + ) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn symbols_for_file_json(&self, file_id: u32) -> PyResult { + let records: Vec<_> = self + .inner + .symbols + .iter() + .filter(|symbol| symbol.file_id == file_id) + .collect(); + serde_json::to_string(&records) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn symbols_for_file_by_name_json(&self, file_id: u32, name: &str) -> PyResult { + let records: Vec<_> = self + .inner + .symbols + .iter() + .filter(|symbol| symbol.file_id == file_id && symbol.name.as_ref() == name) + .collect(); + serde_json::to_string(&records) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn symbols_for_file_by_byte_range_json( + &self, + file_id: u32, + start_byte: usize, + end_byte: usize, + ) -> PyResult { + let records: Vec<_> = self + .inner + .symbols + .iter() + .filter(|symbol| { + symbol.file_id == file_id + && ranges_overlap( + symbol.range.start_byte, + symbol.range.end_byte, + start_byte, + end_byte, + ) + }) + .collect(); + serde_json::to_string(&records) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn symbols_for_parent_json(&self, parent_symbol_id: u32) -> PyResult { + let records: Vec<_> = self + .inner + .symbols + .iter() + .filter(|symbol| symbol.parent_symbol_id == Some(parent_symbol_id)) + .collect(); + serde_json::to_string(&records) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn symbol_by_id_json(&self, symbol_id: u32) -> PyResult { + serde_json::to_string( + &self + .inner + .symbols + .iter() + .find(|symbol| symbol.id == symbol_id), + ) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn top_level_symbols_by_name_json(&self, name: &str) -> PyResult { + let records: Vec<_> = self + .inner + .symbols + .iter() + .filter(|symbol| symbol.is_top_level && symbol.name.as_ref() == name) + .collect(); + serde_json::to_string(&records) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn imports_for_file_json(&self, file_id: u32) -> PyResult { + let records: Vec<_> = self + .inner + .imports + .iter() + .filter(|import| import.file_id == file_id) + .collect(); + serde_json::to_string(&records) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn imports_for_file_by_lookup_json(&self, file_id: u32, lookup: &str) -> PyResult { + let records: Vec<_> = self + .inner + .imports + .iter() + .filter(|import| { + import.file_id == file_id + && import_lookup_candidates( + import.module.as_ref().map(|value| value.as_ref()), + import.name.as_ref().map(|value| value.as_ref()), + import.alias.as_ref().map(|value| value.as_ref()), + lookup, + ) + }) + .collect(); + serde_json::to_string(&records) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn imports_for_file_by_byte_range_json( + &self, + file_id: u32, + start_byte: usize, + end_byte: usize, + ) -> PyResult { + let records: Vec<_> = self + .inner + .imports + .iter() + .filter(|import| { + import.file_id == file_id + && ranges_overlap( + import.range.start_byte, + import.range.end_byte, + start_byte, + end_byte, + ) + }) + .collect(); + serde_json::to_string(&records) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn import_by_id_json(&self, import_id: u32) -> PyResult { + serde_json::to_string( + &self + .inner + .imports + .iter() + .find(|import| import.id == import_id), + ) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn import_resolution_for_import_json(&self, import_id: u32) -> PyResult { + serde_json::to_string( + &self + .inner + .import_resolutions + .iter() + .find(|resolution| resolution.import_id == import_id), + ) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn import_resolutions_to_file_json(&self, file_id: u32) -> PyResult { + let records: Vec<_> = self + .inner + .import_resolutions + .iter() + .filter(|resolution| resolution.target_file_id == file_id) + .collect(); + serde_json::to_string(&records) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn import_resolutions_to_symbol_json(&self, symbol_id: u32) -> PyResult { + let records: Vec<_> = self + .inner + .import_resolutions + .iter() + .filter(|resolution| resolution.target_symbol_id == Some(symbol_id)) + .collect(); + serde_json::to_string(&records) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn external_module_for_import_json(&self, import_id: u32) -> PyResult { + serde_json::to_string( + &self + .inner + .external_modules + .iter() + .find(|external_module| external_module.import_id == import_id), + ) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn dependencies_from_symbol_json(&self, symbol_id: u32) -> PyResult { + let records: Vec<_> = self + .inner + .dependencies + .iter() + .filter(|dependency| dependency.source_symbol_id == symbol_id) + .collect(); + serde_json::to_string(&records) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn dependencies_to_symbol_json(&self, symbol_id: u32) -> PyResult { + let records: Vec<_> = self + .inner + .dependencies + .iter() + .filter(|dependency| dependency.target_symbol_id == symbol_id) + .collect(); + serde_json::to_string(&records) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn reference_by_id_json(&self, reference_id: u32) -> PyResult { + serde_json::to_string( + &self + .inner + .references + .iter() + .find(|reference| reference.id == reference_id), + ) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn references_to_symbol_json(&self, symbol_id: u32) -> PyResult { + let records: Vec<_> = self + .inner + .references + .iter() + .filter(|reference| reference.target_symbol_id == symbol_id) + .collect(); + serde_json::to_string(&records) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn references_from_symbol_json(&self, symbol_id: u32) -> PyResult { + let records: Vec<_> = self + .inner + .references + .iter() + .filter(|reference| reference.source_symbol_id == Some(symbol_id)) + .collect(); + serde_json::to_string(&records) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn references_for_import_json(&self, import_id: u32) -> PyResult { + let records: Vec<_> = self + .inner + .references + .iter() + .filter(|reference| reference.import_id == Some(import_id)) + .collect(); + serde_json::to_string(&records) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn external_references_from_symbol_json(&self, symbol_id: u32) -> PyResult { + let records: Vec<_> = self + .inner + .external_references + .iter() + .filter(|reference| reference.source_symbol_id == Some(symbol_id)) + .collect(); + serde_json::to_string(&records) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn external_references_for_import_json(&self, import_id: u32) -> PyResult { + let records: Vec<_> = self + .inner + .external_references + .iter() + .filter(|reference| reference.import_id == import_id) + .collect(); + serde_json::to_string(&records) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn file_ids(&self) -> Vec { + self.inner.files.iter().map(|file| file.id).collect() + } + + fn symbol_ids(&self) -> Vec { + self.inner.symbols.iter().map(|symbol| symbol.id).collect() + } + + fn top_level_symbol_ids(&self) -> Vec { + self.inner + .symbols + .iter() + .filter(|symbol| symbol.is_top_level) + .map(|symbol| symbol.id) + .collect() + } + + #[getter] + fn top_level_symbol_count(&self) -> usize { + self.inner + .symbols + .iter() + .filter(|symbol| symbol.is_top_level) + .count() + } + + #[getter] + fn top_level_class_count(&self) -> usize { + self.top_level_symbol_count_by_kind(SymbolKind::Class) + } + + #[getter] + fn top_level_function_count(&self) -> usize { + self.top_level_symbol_count_by_kind(SymbolKind::Function) + } + + #[getter] + fn top_level_global_variable_count(&self) -> usize { + self.top_level_symbol_count_by_kind(SymbolKind::GlobalVariable) + } + + fn class_ids(&self) -> Vec { + self.symbol_ids_by_kind(SymbolKind::Class) + } + + fn function_ids(&self) -> Vec { + self.symbol_ids_by_kind(SymbolKind::Function) + } + + fn global_variable_ids(&self) -> Vec { + self.symbol_ids_by_kind(SymbolKind::GlobalVariable) + } + + fn import_ids(&self) -> Vec { + self.inner.imports.iter().map(|import| import.id).collect() + } + + #[getter] + fn file_count(&self) -> usize { + self.inner.files.len() + } + + #[getter] + fn symbol_count(&self) -> usize { + self.inner.symbols.len() + } + + #[getter] + fn import_count(&self) -> usize { + self.inner.imports.len() + } + + #[getter] + fn import_resolution_count(&self) -> usize { + self.inner.import_resolutions.len() + } + + #[getter] + fn external_module_count(&self) -> usize { + self.inner.external_modules.len() + } + + #[getter] + fn export_count(&self) -> usize { + 0 + } + + #[getter] + fn reference_count(&self) -> usize { + self.inner.references.len() + } + + #[getter] + fn external_reference_count(&self) -> usize { + self.inner.external_references.len() + } + + #[getter] + fn function_call_count(&self) -> usize { + 0 + } + + #[getter] + fn promise_chain_count(&self) -> usize { + 0 + } + + #[getter] + fn dependency_count(&self) -> usize { + self.inner.dependencies.len() + } + + #[getter] + fn subclass_edge_count(&self) -> usize { + 0 + } + + fn __repr__(&self) -> String { + let summary = self.inner.summary(); + format!( + "PythonIndex(files={}, symbols={}, imports={}, import_resolutions={}, references={}, dependencies={})", + summary.files, + summary.symbols, + summary.imports, + summary.import_resolutions, + summary.references, + summary.dependencies + ) + } + } + + impl PyPythonIndex { + fn symbol_ids_by_kind(&self, kind: SymbolKind) -> Vec { + self.inner + .symbols + .iter() + .filter(|symbol| symbol.kind == kind) + .map(|symbol| symbol.id) + .collect() + } + + fn top_level_symbol_count_by_kind(&self, kind: SymbolKind) -> usize { + self.inner + .symbols + .iter() + .filter(|symbol| symbol.is_top_level && symbol.kind == kind) + .count() + } + } + + #[pyclass(name = "TypeScriptIndex", module = "graph_sitter_py")] + #[derive(Debug, Clone, PartialEq, Eq)] + pub struct PyTypeScriptIndex { + inner: TypeScriptIndex, + } + + impl From for PyTypeScriptIndex { + fn from(inner: TypeScriptIndex) -> Self { + Self { inner } + } + } + + #[pymethods] + impl PyTypeScriptIndex { + fn summary(&self) -> PyIndexSummary { + self.inner.summary().into() + } + + fn to_json(&self) -> PyResult { + serde_json::to_string(&self.inner) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn debug_graph_json(&self) -> PyResult { + self.inner + .debug_graph_json() + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn files_json(&self) -> PyResult { + serde_json::to_string(&self.inner.files) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn symbols_json(&self) -> PyResult { + serde_json::to_string(&self.inner.symbols) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn imports_json(&self) -> PyResult { + serde_json::to_string(&self.inner.imports) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn import_resolutions_json(&self) -> PyResult { + serde_json::to_string(&self.inner.import_resolutions) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn external_modules_json(&self) -> PyResult { + serde_json::to_string(&self.inner.external_modules) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn exports_json(&self) -> PyResult { + serde_json::to_string(&self.inner.exports) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn references_json(&self) -> PyResult { + serde_json::to_string(&self.inner.references) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn external_references_json(&self) -> PyResult { + serde_json::to_string(&self.inner.external_references) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn function_calls_json(&self) -> PyResult { + serde_json::to_string(&self.inner.function_calls) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn promise_chains_json(&self) -> PyResult { + serde_json::to_string(&self.inner.promise_chains) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn dependencies_json(&self) -> PyResult { + serde_json::to_string(&self.inner.dependencies) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn subclass_edges_json(&self) -> PyResult { + serde_json::to_string(&self.inner.subclass_edges) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn file_by_id_json(&self, file_id: u32) -> PyResult { + serde_json::to_string(&self.inner.files.iter().find(|file| file.id == file_id)) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn file_by_path_json(&self, path: &str) -> PyResult { + serde_json::to_string( + &self + .inner + .files + .iter() + .find(|file| file.path.as_ref() == path), + ) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn file_by_path_ignore_case_json(&self, path: &str) -> PyResult { + let normalized = path.to_lowercase(); + serde_json::to_string( + &self + .inner + .files + .iter() + .find(|file| file.path.as_ref().to_lowercase() == normalized), + ) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn symbols_for_file_json(&self, file_id: u32) -> PyResult { + let records: Vec<_> = self + .inner + .symbols + .iter() + .filter(|symbol| symbol.file_id == file_id) + .collect(); + serde_json::to_string(&records) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn symbols_for_file_by_name_json(&self, file_id: u32, name: &str) -> PyResult { + let records: Vec<_> = self + .inner + .symbols + .iter() + .filter(|symbol| symbol.file_id == file_id && symbol.name.as_ref() == name) + .collect(); + serde_json::to_string(&records) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn symbols_for_file_by_byte_range_json( + &self, + file_id: u32, + start_byte: usize, + end_byte: usize, + ) -> PyResult { + let records: Vec<_> = self + .inner + .symbols + .iter() + .filter(|symbol| { + symbol.file_id == file_id + && ranges_overlap( + symbol.range.start_byte, + symbol.range.end_byte, + start_byte, + end_byte, + ) + }) + .collect(); + serde_json::to_string(&records) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn symbols_for_parent_json(&self, parent_symbol_id: u32) -> PyResult { + let records: Vec<_> = self + .inner + .symbols + .iter() + .filter(|symbol| symbol.parent_symbol_id == Some(parent_symbol_id)) + .collect(); + serde_json::to_string(&records) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn symbol_by_id_json(&self, symbol_id: u32) -> PyResult { + serde_json::to_string( + &self + .inner + .symbols + .iter() + .find(|symbol| symbol.id == symbol_id), + ) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn top_level_symbols_by_name_json(&self, name: &str) -> PyResult { + let records: Vec<_> = self + .inner + .symbols + .iter() + .filter(|symbol| symbol.is_top_level && symbol.name.as_ref() == name) + .collect(); + serde_json::to_string(&records) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn imports_for_file_json(&self, file_id: u32) -> PyResult { + let records: Vec<_> = self + .inner + .imports + .iter() + .filter(|import| import.file_id == file_id) + .collect(); + serde_json::to_string(&records) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn imports_for_file_by_lookup_json(&self, file_id: u32, lookup: &str) -> PyResult { + let records: Vec<_> = self + .inner + .imports + .iter() + .filter(|import| { + import.file_id == file_id + && import_lookup_candidates( + import.module.as_ref().map(|value| value.as_ref()), + import.name.as_ref().map(|value| value.as_ref()), + import.alias.as_ref().map(|value| value.as_ref()), + lookup, + ) + }) + .collect(); + serde_json::to_string(&records) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn imports_for_file_by_byte_range_json( + &self, + file_id: u32, + start_byte: usize, + end_byte: usize, + ) -> PyResult { + let records: Vec<_> = self + .inner + .imports + .iter() + .filter(|import| { + import.file_id == file_id + && ranges_overlap( + import.range.start_byte, + import.range.end_byte, + start_byte, + end_byte, + ) + }) + .collect(); + serde_json::to_string(&records) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn import_by_id_json(&self, import_id: u32) -> PyResult { + serde_json::to_string( + &self + .inner + .imports + .iter() + .find(|import| import.id == import_id), + ) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn exports_for_file_json(&self, file_id: u32) -> PyResult { + let records: Vec<_> = self + .inner + .exports + .iter() + .filter(|export| export.file_id == file_id) + .collect(); + serde_json::to_string(&records) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn exports_for_file_by_name_json(&self, file_id: u32, name: &str) -> PyResult { + let records: Vec<_> = self + .inner + .exports + .iter() + .filter(|export| export.file_id == file_id && export.name.as_deref() == Some(name)) + .collect(); + serde_json::to_string(&records) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn exports_for_file_by_byte_range_json( + &self, + file_id: u32, + start_byte: usize, + end_byte: usize, + ) -> PyResult { + let records: Vec<_> = self + .inner + .exports + .iter() + .filter(|export| { + export.file_id == file_id + && ranges_overlap( + export.range.start_byte, + export.range.end_byte, + start_byte, + end_byte, + ) + }) + .collect(); + serde_json::to_string(&records) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn exports_for_symbol_json(&self, symbol_id: u32) -> PyResult { + let records: Vec<_> = self + .inner + .exports + .iter() + .filter(|export| export.symbol_id == Some(symbol_id)) + .collect(); + serde_json::to_string(&records) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn export_by_id_json(&self, export_id: u32) -> PyResult { + serde_json::to_string( + &self + .inner + .exports + .iter() + .find(|export| export.id == export_id), + ) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn import_resolution_for_import_json(&self, import_id: u32) -> PyResult { + serde_json::to_string( + &self + .inner + .import_resolutions + .iter() + .find(|resolution| resolution.import_id == import_id), + ) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn import_resolutions_to_file_json(&self, file_id: u32) -> PyResult { + let records: Vec<_> = self + .inner + .import_resolutions + .iter() + .filter(|resolution| resolution.target_file_id == file_id) + .collect(); + serde_json::to_string(&records) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn import_resolutions_to_symbol_json(&self, symbol_id: u32) -> PyResult { + let records: Vec<_> = self + .inner + .import_resolutions + .iter() + .filter(|resolution| resolution.target_symbol_id == Some(symbol_id)) + .collect(); + serde_json::to_string(&records) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn external_module_for_import_json(&self, import_id: u32) -> PyResult { + serde_json::to_string( + &self + .inner + .external_modules + .iter() + .find(|external_module| external_module.import_id == import_id), + ) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn dependencies_from_symbol_json(&self, symbol_id: u32) -> PyResult { + let records: Vec<_> = self + .inner + .dependencies + .iter() + .filter(|dependency| dependency.source_symbol_id == symbol_id) + .collect(); + serde_json::to_string(&records) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn dependencies_to_symbol_json(&self, symbol_id: u32) -> PyResult { + let records: Vec<_> = self + .inner + .dependencies + .iter() + .filter(|dependency| dependency.target_symbol_id == symbol_id) + .collect(); + serde_json::to_string(&records) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn reference_by_id_json(&self, reference_id: u32) -> PyResult { + serde_json::to_string( + &self + .inner + .references + .iter() + .find(|reference| reference.id == reference_id), + ) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn references_to_symbol_json(&self, symbol_id: u32) -> PyResult { + let records: Vec<_> = self + .inner + .references + .iter() + .filter(|reference| reference.target_symbol_id == symbol_id) + .collect(); + serde_json::to_string(&records) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn references_from_symbol_json(&self, symbol_id: u32) -> PyResult { + let records: Vec<_> = self + .inner + .references + .iter() + .filter(|reference| reference.source_symbol_id == Some(symbol_id)) + .collect(); + serde_json::to_string(&records) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn references_for_import_json(&self, import_id: u32) -> PyResult { + let records: Vec<_> = self + .inner + .references + .iter() + .filter(|reference| reference.import_id == Some(import_id)) + .collect(); + serde_json::to_string(&records) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn external_references_from_symbol_json(&self, symbol_id: u32) -> PyResult { + let records: Vec<_> = self + .inner + .external_references + .iter() + .filter(|reference| reference.source_symbol_id == Some(symbol_id)) + .collect(); + serde_json::to_string(&records) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn external_references_for_import_json(&self, import_id: u32) -> PyResult { + let records: Vec<_> = self + .inner + .external_references + .iter() + .filter(|reference| reference.import_id == import_id) + .collect(); + serde_json::to_string(&records) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn function_call_by_id_json(&self, call_id: u32) -> PyResult { + serde_json::to_string( + &self + .inner + .function_calls + .iter() + .find(|call| call.id == call_id), + ) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn function_calls_for_file_json(&self, file_id: u32) -> PyResult { + let records: Vec<_> = self + .inner + .function_calls + .iter() + .filter(|call| call.source_file_id == file_id) + .collect(); + serde_json::to_string(&records) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn function_calls_for_symbol_json(&self, symbol_id: u32) -> PyResult { + let records: Vec<_> = self + .inner + .function_calls + .iter() + .filter(|call| call.source_symbol_id == Some(symbol_id)) + .collect(); + serde_json::to_string(&records) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn promise_chain_by_id_json(&self, chain_id: u32) -> PyResult { + serde_json::to_string( + &self + .inner + .promise_chains + .iter() + .find(|chain| chain.id == chain_id), + ) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn promise_chains_for_file_json(&self, file_id: u32) -> PyResult { + let records: Vec<_> = self + .inner + .promise_chains + .iter() + .filter(|chain| chain.source_file_id == file_id) + .collect(); + serde_json::to_string(&records) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn promise_chains_for_symbol_json(&self, symbol_id: u32) -> PyResult { + let records: Vec<_> = self + .inner + .promise_chains + .iter() + .filter(|chain| chain.source_symbol_id == Some(symbol_id)) + .collect(); + serde_json::to_string(&records) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn subclass_edges_from_symbol_json(&self, symbol_id: u32) -> PyResult { + let records: Vec<_> = self + .inner + .subclass_edges + .iter() + .filter(|edge| edge.source_symbol_id == symbol_id) + .collect(); + serde_json::to_string(&records) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn subclass_edges_to_symbol_json(&self, symbol_id: u32) -> PyResult { + let records: Vec<_> = self + .inner + .subclass_edges + .iter() + .filter(|edge| edge.target_symbol_id == symbol_id) + .collect(); + serde_json::to_string(&records) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn file_ids(&self) -> Vec { + self.inner.files.iter().map(|file| file.id).collect() + } + + fn symbol_ids(&self) -> Vec { + self.inner.symbols.iter().map(|symbol| symbol.id).collect() + } + + fn top_level_symbol_ids(&self) -> Vec { + self.inner + .symbols + .iter() + .filter(|symbol| symbol.is_top_level) + .map(|symbol| symbol.id) + .collect() + } + + #[getter] + fn top_level_symbol_count(&self) -> usize { + self.inner + .symbols + .iter() + .filter(|symbol| symbol.is_top_level) + .count() + } + + #[getter] + fn top_level_class_count(&self) -> usize { + self.top_level_symbol_count_by_kind(SymbolKind::Class) + } + + #[getter] + fn top_level_function_count(&self) -> usize { + self.top_level_symbol_count_by_kind(SymbolKind::Function) + } + + #[getter] + fn top_level_global_variable_count(&self) -> usize { + self.top_level_symbol_count_by_kind(SymbolKind::GlobalVariable) + } + + fn class_ids(&self) -> Vec { + self.symbol_ids_by_kind(SymbolKind::Class) + } + + fn function_ids(&self) -> Vec { + self.symbol_ids_by_kind(SymbolKind::Function) + } + + fn global_variable_ids(&self) -> Vec { + self.symbol_ids_by_kind(SymbolKind::GlobalVariable) + } + + fn interface_ids(&self) -> Vec { + self.symbol_ids_by_kind(SymbolKind::Interface) + } + + fn type_ids(&self) -> Vec { + self.symbol_ids_by_kind(SymbolKind::TypeAlias) + } + + fn enum_ids(&self) -> Vec { + self.symbol_ids_by_kind(SymbolKind::Enum) + } + + fn namespace_ids(&self) -> Vec { + self.symbol_ids_by_kind(SymbolKind::Namespace) + } + + #[getter] + fn interface_count(&self) -> usize { + self.symbol_count_by_kind(SymbolKind::Interface) + } + + #[getter] + fn type_count(&self) -> usize { + self.symbol_count_by_kind(SymbolKind::TypeAlias) + } + + #[getter] + fn enum_count(&self) -> usize { + self.symbol_count_by_kind(SymbolKind::Enum) + } + + #[getter] + fn namespace_count(&self) -> usize { + self.symbol_count_by_kind(SymbolKind::Namespace) + } + + fn import_ids(&self) -> Vec { + self.inner.imports.iter().map(|import| import.id).collect() + } + + fn export_ids(&self) -> Vec { + self.inner.exports.iter().map(|export| export.id).collect() + } + + #[getter] + fn file_count(&self) -> usize { + self.inner.files.len() + } + + #[getter] + fn symbol_count(&self) -> usize { + self.inner.symbols.len() + } + + #[getter] + fn import_count(&self) -> usize { + self.inner.imports.len() + } + + #[getter] + fn import_resolution_count(&self) -> usize { + self.inner.import_resolutions.len() + } + + #[getter] + fn external_module_count(&self) -> usize { + self.inner.external_modules.len() + } + + #[getter] + fn export_count(&self) -> usize { + self.inner.exports.len() + } + + #[getter] + fn reference_count(&self) -> usize { + self.inner.references.len() + } + + #[getter] + fn external_reference_count(&self) -> usize { + self.inner.external_references.len() + } + + #[getter] + fn function_call_count(&self) -> usize { + self.inner.function_calls.len() + } + + #[getter] + fn promise_chain_count(&self) -> usize { + self.inner.promise_chains.len() + } + + #[getter] + fn dependency_count(&self) -> usize { + self.inner.dependencies.len() + } + + #[getter] + fn subclass_edge_count(&self) -> usize { + self.inner.subclass_edges.len() + } + + fn __repr__(&self) -> String { + let summary = self.inner.summary(); + format!( + "TypeScriptIndex(files={}, symbols={}, imports={}, import_resolutions={}, exports={}, references={}, dependencies={})", + summary.files, + summary.symbols, + summary.imports, + summary.import_resolutions, + self.inner.exports.len(), + summary.references, + summary.dependencies + ) + } + } + + impl PyTypeScriptIndex { + fn symbol_ids_by_kind(&self, kind: SymbolKind) -> Vec { + self.inner + .symbols + .iter() + .filter(|symbol| symbol.kind == kind) + .map(|symbol| symbol.id) + .collect() + } + + fn symbol_count_by_kind(&self, kind: SymbolKind) -> usize { + self.inner + .symbols + .iter() + .filter(|symbol| symbol.kind == kind) + .count() + } + + fn top_level_symbol_count_by_kind(&self, kind: SymbolKind) -> usize { + self.inner + .symbols + .iter() + .filter(|symbol| symbol.is_top_level && symbol.kind == kind) + .count() + } + } + + #[pyclass(name = "Engine", module = "graph_sitter_py")] + #[derive(Debug, Default, Clone)] + pub struct PyEngine { + inner: Engine, + } + + #[pymethods] + impl PyEngine { + #[new] + fn new() -> Self { + Self { + inner: Engine::new(), + } + } + + #[getter] + fn version(&self) -> &str { + self.inner.version() + } + + fn enabled_features(&self) -> Vec { + self.inner + .enabled_features() + .iter() + .map(|feature| (*feature).to_owned()) + .collect() + } + + fn debug_info(&self) -> PyEngineInfo { + self.inner.debug_info().into() + } + + fn index_python_path(&self, repo_path: &str) -> PyResult { + index_python_path_impl(repo_path) + } + + fn index_python_paths( + &self, + repo_path: &str, + file_paths: Vec, + ) -> PyResult { + index_python_paths_impl(repo_path, file_paths) + } + + fn index_typescript_path(&self, repo_path: &str) -> PyResult { + index_typescript_path_impl(repo_path) + } + + fn index_typescript_paths( + &self, + repo_path: &str, + file_paths: Vec, + ) -> PyResult { + index_typescript_paths_impl(repo_path, file_paths) + } + } + + #[pyfunction(name = "engine_version")] + fn py_engine_version() -> &'static str { + graph_sitter_engine::engine_version() + } + + #[pyfunction(name = "debug_info")] + fn py_debug_info() -> PyEngineInfo { + graph_sitter_engine::debug_info().into() + } + + #[pyfunction(name = "index_python_path")] + fn py_index_python_path(repo_path: &str) -> PyResult { + index_python_path_impl(repo_path) + } + + #[pyfunction(name = "index_python_paths")] + fn py_index_python_paths(repo_path: &str, file_paths: Vec) -> PyResult { + index_python_paths_impl(repo_path, file_paths) + } + + #[pyfunction(name = "index_typescript_path")] + fn py_index_typescript_path(repo_path: &str) -> PyResult { + index_typescript_path_impl(repo_path) + } + + #[pyfunction(name = "index_typescript_paths")] + fn py_index_typescript_paths( + repo_path: &str, + file_paths: Vec, + ) -> PyResult { + index_typescript_paths_impl(repo_path, file_paths) + } + + fn index_python_path_impl(repo_path: &str) -> PyResult { + let path = Path::new(repo_path); + if !path.exists() { + return Err(PyValueError::new_err(format!( + "repo path does not exist: {repo_path}" + ))); + } + graph_sitter_engine::index_python_path(path) + .map(PyPythonIndex::from) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn index_python_paths_impl( + repo_path: &str, + file_paths: Vec, + ) -> PyResult { + let path = Path::new(repo_path); + if !path.exists() { + return Err(PyValueError::new_err(format!( + "repo path does not exist: {repo_path}" + ))); + } + graph_sitter_engine::index_python_paths(path, file_paths) + .map(PyPythonIndex::from) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn index_typescript_path_impl(repo_path: &str) -> PyResult { + let path = Path::new(repo_path); + if !path.exists() { + return Err(PyValueError::new_err(format!( + "repo path does not exist: {repo_path}" + ))); + } + graph_sitter_engine::index_typescript_path(path) + .map(PyTypeScriptIndex::from) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn index_typescript_paths_impl( + repo_path: &str, + file_paths: Vec, + ) -> PyResult { + let path = Path::new(repo_path); + if !path.exists() { + return Err(PyValueError::new_err(format!( + "repo path does not exist: {repo_path}" + ))); + } + graph_sitter_engine::index_typescript_paths(path, file_paths) + .map(PyTypeScriptIndex::from) + .map_err(|error| PyRuntimeError::new_err(error.to_string())) + } + + fn import_lookup_candidates( + module: Option<&str>, + name: Option<&str>, + alias: Option<&str>, + lookup: &str, + ) -> bool { + let lookup = lookup.trim(); + [alias, name, module] + .into_iter() + .flatten() + .filter(|value| !value.is_empty()) + .any(|value| { + let unquoted = value.trim_matches(['\'', '"', '`']); + lookup == value + || lookup.contains(value) + || (!unquoted.is_empty() && (lookup == unquoted || lookup.contains(unquoted))) + }) + } + + fn ranges_overlap( + record_start: usize, + record_end: usize, + query_start: usize, + query_end: usize, + ) -> bool { + if query_start == query_end { + record_start <= query_start && query_start < record_end + } else { + record_start < query_end && query_start < record_end + } + } + + #[pymodule] + fn graph_sitter_py(m: &Bound<'_, PyModule>) -> PyResult<()> { + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_function(wrap_pyfunction!(py_engine_version, m)?)?; + m.add_function(wrap_pyfunction!(py_debug_info, m)?)?; + m.add_function(wrap_pyfunction!(py_index_python_path, m)?)?; + m.add_function(wrap_pyfunction!(py_index_python_paths, m)?)?; + m.add_function(wrap_pyfunction!(py_index_typescript_path, m)?)?; + m.add_function(wrap_pyfunction!(py_index_typescript_paths, m)?)?; + Ok(()) + } + + #[cfg(test)] + mod tests { + use super::*; + use std::fs; + use std::path::PathBuf; + use std::time::{SystemTime, UNIX_EPOCH}; + + #[test] + fn debug_info_forwards_core_engine_metadata() { + let info = py_debug_info(); + + assert_eq!(info.version, graph_sitter_engine::engine_version()); + assert_eq!( + info.enabled_features, + vec![ + "skeleton".to_owned(), + "python-index".to_owned(), + "typescript-index".to_owned() + ] + ); + } + + #[test] + fn py_engine_indexes_python_path() { + let repo = temp_repo_path("py-binding-index"); + fs::create_dir_all(repo.join("pkg")).unwrap(); + fs::write( + repo.join("pkg/mod.py"), + "import os\n\nclass Service:\n pass\n\ndef helper():\n return os.getcwd()\n", + ) + .unwrap(); + + let index = PyEngine::new() + .index_python_path(repo.to_str().unwrap()) + .unwrap(); + fs::remove_dir_all(&repo).unwrap(); + + let summary = index.summary(); + assert_eq!(summary.files, 1); + assert_eq!(summary.classes, 1); + assert_eq!(summary.functions, 1); + assert_eq!(summary.imports, 1); + assert!(index.to_json().unwrap().contains("\"Service\"")); + assert!(index + .file_by_path_ignore_case_json("PKG/MOD.PY") + .unwrap() + .contains("\"pkg/mod.py\"")); + assert_eq!( + index + .file_by_path_ignore_case_json("PKG/MISSING.PY") + .unwrap(), + "null" + ); + assert!(index + .symbols_for_file_by_byte_range_json(0, 0, 1_000) + .unwrap() + .contains("\"Service\"")); + assert!(index + .imports_for_file_by_byte_range_json(0, 0, 9) + .unwrap() + .contains("\"os\"")); + } + + #[test] + fn py_engine_indexes_selected_python_paths() { + let repo = temp_repo_path("py-binding-index-paths"); + fs::create_dir_all(repo.join("pkg")).unwrap(); + fs::write(repo.join("pkg/included.py"), "class Included:\n pass\n").unwrap(); + fs::write(repo.join("pkg/skipped.py"), "class Skipped:\n pass\n").unwrap(); + + let index = PyEngine::new() + .index_python_paths(repo.to_str().unwrap(), vec!["pkg/included.py".to_owned()]) + .unwrap(); + fs::remove_dir_all(&repo).unwrap(); + + let summary = index.summary(); + assert_eq!(summary.files, 1); + assert_eq!(summary.classes, 1); + assert!(index.to_json().unwrap().contains("\"Included\"")); + assert!(!index.to_json().unwrap().contains("\"Skipped\"")); + } + + #[test] + fn py_engine_exposes_import_resolution_count() { + let repo = temp_repo_path("py-binding-import-resolution"); + fs::create_dir_all(repo.join("pkg")).unwrap(); + fs::write(repo.join("pkg/__init__.py"), "").unwrap(); + fs::write( + repo.join("pkg/base.py"), + "CONSTANT = 'base'\nclass Base:\n pass\n", + ) + .unwrap(); + fs::write( + repo.join("pkg/service.py"), + "from .base import Base, CONSTANT\n\nclass Service(Base):\n pass\n", + ) + .unwrap(); + + let index = PyEngine::new() + .index_python_path(repo.to_str().unwrap()) + .unwrap(); + fs::remove_dir_all(&repo).unwrap(); + + let summary = index.summary(); + assert_eq!(summary.global_variables, 1); + assert_eq!(summary.import_resolutions, 2); + assert_eq!(summary.references, 1); + assert_eq!(summary.dependencies, 1); + assert_eq!(index.import_resolution_count(), 2); + assert_eq!(index.external_module_count(), 0); + assert_eq!(index.reference_count(), 1); + assert_eq!(index.dependency_count(), 1); + assert_eq!(index.file_ids(), vec![0, 1, 2]); + assert_eq!(index.symbol_ids(), vec![0, 1, 2]); + assert_eq!(index.top_level_symbol_ids(), vec![0, 1, 2]); + assert_eq!(index.class_ids(), vec![1, 2]); + assert_eq!(index.function_ids(), Vec::::new()); + assert_eq!(index.global_variable_ids(), vec![0]); + assert_eq!(index.import_ids(), vec![0, 1]); + assert!(index.files_json().unwrap().contains("\"pkg/base.py\"")); + assert!(index + .files_json() + .unwrap() + .contains("\"language\":\"python\"")); + assert!(index.files_json().unwrap().contains("\"content_hash\"")); + assert!(index.symbols_json().unwrap().contains("\"CONSTANT\"")); + assert!(index.symbols_json().unwrap().contains("\"Base\"")); + assert!(index.imports_json().unwrap().contains("\".base\"")); + assert!(index + .import_resolutions_json() + .unwrap() + .contains("target_symbol_id")); + assert_eq!(index.external_modules_json().unwrap(), "[]"); + assert!(index.references_json().unwrap().contains("\"Base\"")); + assert!(index + .dependencies_json() + .unwrap() + .contains("reference_count")); + assert!(index.to_json().unwrap().contains("import_resolutions")); + assert!(index.to_json().unwrap().contains("references")); + assert!(index.to_json().unwrap().contains("dependencies")); + let debug_graph: serde_json::Value = + serde_json::from_str(&index.debug_graph_json().unwrap()).unwrap(); + let nodes = debug_graph["nodes"].as_array().unwrap(); + let edges = debug_graph["edges"].as_array().unwrap(); + assert!(nodes.iter().any(|node| { + node.get("id").and_then(serde_json::Value::as_str) == Some("symbol:2") + && node.get("node_type").and_then(serde_json::Value::as_str) == Some("symbol") + })); + assert!(edges.iter().any(|edge| { + edge.get("edge_type").and_then(serde_json::Value::as_str) + == Some("import_resolution") + && edge.get("source").and_then(serde_json::Value::as_str) == Some("import:0") + && edge.get("target").and_then(serde_json::Value::as_str) == Some("symbol:1") + })); + assert!(edges.iter().any(|edge| { + edge.get("edge_type").and_then(serde_json::Value::as_str) == Some("dependency") + && edge.get("reference_ids") == Some(&serde_json::json!([0])) + && edge.get("reference_count") == Some(&serde_json::json!(1)) + })); + } + + #[test] + fn py_engine_indexes_typescript_path() { + let repo = temp_repo_path("py-binding-typescript-index"); + fs::create_dir_all(repo.join("src")).unwrap(); + fs::write( + repo.join("src/app.tsx"), + "import React from 'react';\nimport { helper } from './util';\nexport function Page() { return helper(
); }\n", + ) + .unwrap(); + fs::write( + repo.join("src/util.ts"), + "export function helper(value: unknown) { return value; }\n", + ) + .unwrap(); + fs::write(repo.join("src/skipped.py"), "class Skipped:\n pass\n").unwrap(); + + let index = PyEngine::new() + .index_typescript_path(repo.to_str().unwrap()) + .unwrap(); + fs::remove_dir_all(&repo).unwrap(); + + let summary = index.summary(); + assert_eq!(summary.files, 2); + assert_eq!(summary.functions, 2); + assert_eq!(summary.imports, 2); + assert_eq!(summary.import_resolutions, 1); + assert_eq!(summary.references, 1); + assert_eq!(summary.dependencies, 1); + assert_eq!(index.import_resolution_count(), 1); + assert_eq!(index.external_module_count(), 1); + assert_eq!(index.export_count(), 2); + assert_eq!(index.reference_count(), 1); + assert_eq!(index.dependency_count(), 1); + assert_eq!(index.subclass_edge_count(), 0); + assert_eq!(index.function_call_count(), 1); + assert_eq!(index.promise_chain_count(), 0); + assert_eq!(index.file_ids(), vec![0, 1]); + assert_eq!(index.symbol_ids(), vec![0, 1]); + assert_eq!(index.top_level_symbol_ids(), vec![0, 1]); + assert_eq!(index.class_ids(), Vec::::new()); + assert_eq!(index.function_ids(), vec![0, 1]); + assert_eq!(index.global_variable_ids(), Vec::::new()); + assert_eq!(index.interface_ids(), Vec::::new()); + assert_eq!(index.type_ids(), Vec::::new()); + assert_eq!(index.enum_ids(), Vec::::new()); + assert_eq!(index.namespace_ids(), Vec::::new()); + assert_eq!(index.import_ids(), vec![0, 1]); + assert_eq!(index.export_ids(), vec![0, 1]); + assert!(index + .file_by_path_ignore_case_json("SRC/APP.TSX") + .unwrap() + .contains("\"src/app.tsx\"")); + assert_eq!( + index + .file_by_path_ignore_case_json("SRC/MISSING.TSX") + .unwrap(), + "null" + ); + assert!(index.files_json().unwrap().contains("\"src/app.tsx\"")); + assert!(index.files_json().unwrap().contains("\"language\":\"tsx\"")); + assert!(index.files_json().unwrap().contains("\"content_hash\"")); + assert!(index.symbols_json().unwrap().contains("\"Page\"")); + assert!(index.imports_json().unwrap().contains("\"default_import\"")); + assert!(index + .import_resolutions_json() + .unwrap() + .contains("target_symbol_id")); + assert!(index.external_modules_json().unwrap().contains("\"React\"")); + assert!(index.exports_json().unwrap().contains("\"Page\"")); + assert!(index.references_json().unwrap().contains("\"helper\"")); + assert!(index + .dependencies_json() + .unwrap() + .contains("reference_count")); + assert!(index + .symbols_for_file_by_byte_range_json(0, 0, 1_000) + .unwrap() + .contains("\"Page\"")); + assert!(index + .imports_for_file_by_byte_range_json(0, 0, 30) + .unwrap() + .contains("\"React\"")); + assert!(index + .exports_for_file_by_byte_range_json(0, 0, 1_000) + .unwrap() + .contains("\"Page\"")); + assert_eq!(index.subclass_edges_json().unwrap(), "[]"); + assert!(index.to_json().unwrap().contains("\"import_resolutions\"")); + assert!(index.to_json().unwrap().contains("\"external_modules\"")); + assert!(index.to_json().unwrap().contains("\"exports\"")); + assert!(index.to_json().unwrap().contains("\"references\"")); + assert!(index.to_json().unwrap().contains("\"dependencies\"")); + assert!(index.to_json().unwrap().contains("\"subclass_edges\"")); + let debug_graph: serde_json::Value = + serde_json::from_str(&index.debug_graph_json().unwrap()).unwrap(); + let nodes = debug_graph["nodes"].as_array().unwrap(); + let edges = debug_graph["edges"].as_array().unwrap(); + assert!(nodes.iter().any(|node| { + node.get("id").and_then(serde_json::Value::as_str) == Some("export:0") + && node.get("node_type").and_then(serde_json::Value::as_str) == Some("export") + })); + assert!(edges.iter().any(|edge| { + edge.get("edge_type").and_then(serde_json::Value::as_str) == Some("export_symbol") + && edge.get("source").and_then(serde_json::Value::as_str) == Some("export:0") + && edge.get("target").and_then(serde_json::Value::as_str) == Some("symbol:0") + })); + assert!(edges.iter().any(|edge| { + edge.get("edge_type").and_then(serde_json::Value::as_str) == Some("reference") + && edge.get("name").and_then(serde_json::Value::as_str) == Some("helper") + })); + } + + #[test] + fn py_engine_indexes_selected_typescript_paths() { + let repo = temp_repo_path("py-binding-typescript-paths"); + fs::create_dir_all(repo.join("src")).unwrap(); + fs::write(repo.join("src/included.ts"), "export class Included {}\n").unwrap(); + fs::write(repo.join("src/skipped.ts"), "export class Skipped {}\n").unwrap(); + + let index = PyEngine::new() + .index_typescript_paths(repo.to_str().unwrap(), vec!["src/included.ts".to_owned()]) + .unwrap(); + fs::remove_dir_all(&repo).unwrap(); + + assert_eq!(index.file_count(), 1); + assert_eq!(index.symbol_count(), 1); + assert_eq!(index.export_count(), 1); + assert!(index.to_json().unwrap().contains("\"Included\"")); + assert!(!index.to_json().unwrap().contains("\"Skipped\"")); + } + + fn temp_repo_path(prefix: &str) -> PathBuf { + let nanos = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_nanos(); + std::env::temp_dir().join(format!("graph-sitter-{prefix}-{nanos}")) + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn forwards_core_engine_metadata_without_python_linking() { + assert_eq!(engine_version(), graph_sitter_engine::engine_version()); + assert_eq!( + enabled_features(), + ["skeleton", "python-index", "typescript-index"] + ); + } +} diff --git a/docs/README.md b/docs/README.md index 7be2d9c20..812a5a7e7 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1,17 +1,43 @@ # Graph-sitter Docs -## Development +The checked-in docs are a Mintlify project. Keep them separate from the Vercel +landing app in `../site`; the repo does not currently contain a Vercel-buildable +static export of these docs. -From within the `docs/` subdirectory: +## Local Development + +From this directory: ```bash -npm i -g mintlify -mintlify dev --port 3333 +npx --yes mintlify@latest dev --port 3333 ``` -You should then get hot reloading. Also recommend installing the `MDX` extension for VSCode. +Open the printed localhost URL. The `MDX` editor extension is useful when +editing these pages. + +## Validation + +```bash +npx --yes mintlify@latest validate +npx --yes mintlify@latest broken-links +``` + +Run these before moving navigation entries or changing page slugs. + +The current CLI may print a legacy-config warning and generate `docs.json` from +`mint.json`. Treat `mint.json` as the checked-in source of truth until a docs +config migration is explicitly approved. ## Adding New Pages -- Edit the page as a `.mdx` doc -- Make sure to edit `mint.json` so it appears in the UI +- Edit the page as a `.mdx` doc. +- Add the page path to `mint.json` so it appears in the navigation. +- Keep generated API reference pages under `api-reference/` in sync with the + docs generation workflow. + +## Hosting + +Mintlify should continue to host the docs tree. The recommended launch sequence +is to keep the current docs production domain untouched, review the Vercel +landing preview from `../site`, then move or confirm docs at +`docs.graph-sitter.com` before the apex domain moves to Vercel. diff --git a/docs/benchmarks/large-repos.mdx b/docs/benchmarks/large-repos.mdx new file mode 100644 index 000000000..e8bc4be75 --- /dev/null +++ b/docs/benchmarks/large-repos.mdx @@ -0,0 +1,105 @@ +--- +title: "Large-Repo Benchmarks" +sidebarTitle: "Large Repos" +icon: "gauge" +iconType: "solid" +--- + +Graph-sitter's Rust backend is being validated against pinned large repositories +before it becomes the default backend. The current benchmark evidence is for the +`rust-rewrite` branch and branch-built wheels, not a final PyPI release. + + + Use these numbers as release evidence for the current implementation shape: + Python remains the authoring shell, while Rust owns compact parse/index graph + storage for the supported subset. Hardware, cache state, and package artifact + shape can change absolute times. + + +## Pinned Repositories + +| Repository | Ref | Commit | Language mode | +| --- | --- | --- | --- | +| Apache Airflow | `2.10.5` | `b93c3db6b1641b0840bd15ac7d05bc58ff2cccbf` | Python | +| Next.js | `v15.0.0` | `51bfe3c1863b191f4b039bc230e8ed5c57b0baf3` | TypeScript, JavaScript, React | + +## Codebase Construction + +These measurements use real `Codebase(...)` construction with +`CodebaseConfig(graph_backend="rust", rust_fallback="error")`. In strict Rust +mode, the old eager Python graph is blocked after the compact Rust index builds. + +| Repository | Python wall | Python max RSS | Rust wall | Rust max RSS | Wall improvement | RSS improvement | +| --- | ---: | ---: | ---: | ---: | ---: | ---: | +| Apache Airflow `2.10.5` | 18.940s | 3469.5 MB | 4.085s | 266.2 MB | 4.637x | 13.031x | +| Next.js `v15.0.0` | 24.959s | 3100.1 MB | 10.465s | 435.9 MB | 2.385x | 7.112x | + +The Airflow row exercises compact Python files, symbols, imports, import +resolution, references, dependencies, and Python compatibility handles. The +Next.js row exercises TypeScript/JavaScript files, symbols, imports, exports, +relative and tsconfig path resolution, references, dependencies, subclass edges, +read-only function call records, and read-only Promise-chain records. + +## Installed-Wheel uvx Proof + +Branch-built wheels are also tested through `uvx --from dist/.whl +graph-sitter ...`, which proves package contents and CLI entry points rather +than relying on an editable checkout. + +| Repository | Backend | Parse elapsed | `uvx` outer wall | Sampled RSS | Parse improvement | RSS improvement | +| --- | --- | ---: | ---: | ---: | ---: | ---: | +| Apache Airflow `2.10.5` | Rust strict | 4.913s | 6.064s | 487.0 MB | 9.818x vs Python parse | 11.148x | +| Apache Airflow `2.10.5` | Python | 48.242s | 77.649s | 5429.3 MB | baseline | baseline | +| Next.js `v15.0.0` | Rust strict | 10.352s | 11.508s | 537.5 MB | 5.598x vs Python parse | 8.383x | +| Next.js `v15.0.0` | Python | 57.956s | 78.107s | 4505.6 MB | baseline | baseline | + +The installed-wheel Rust parse paths matched the committed compact golden +summaries for both repositories. + +## Codemod Proof + +The same branch-built wheel gates run real write-mode transforms on temporary +clones: + +| Repository | Transform proof | Validation | +| --- | --- | --- | +| Apache Airflow `2.10.5` | Adds `from typing import Any` and renames `__getattr__` in `airflow/__init__.py` | Only `airflow/__init__.py` changed | +| Next.js `v15.0.0` | Adds an import, renames `AppRouterAnnouncer`, and updates its importing usage | Only `app-router-announcer.tsx` and `app-router.tsx` changed | + +These checks prove that the Rust-backed Python shell can run selected codemods +without materializing the old Python graph. + +## Reproduce + +Run the fast local gate: + +```bash +rust-rewrite/tools/check_fast.sh +``` + +Run the opt-in large-repo gate: + +```bash +rust-rewrite/tools/check_pinned_large_repos.sh +``` + +Run branch-built wheel proofs: + +```bash +rust-rewrite/tools/check_wheel_rust_backend.sh +uv run python rust-rewrite/tools/check_wheel_pinned_python_repo.py --compare-python-backend --run-transform-proof +uv run python rust-rewrite/tools/check_wheel_pinned_typescript_repo.py --compare-python-backend --run-transform-proof +``` + +See [uvx workflows](/cli/uvx) for the user-facing CLI form. + +## Caveats + +- Rust is still opt-in. The Python backend remains the default until rollout + gates, parity gates, and release artifact validation pass. +- The installed-wheel results are branch-built wheel results. Published-package + `uvx graph-sitter ...` claims require a separate clean-environment release + transcript. +- Counts are compared against compact Rust golden summaries, selected semantic + parity checks, and codemod file-diff assertions. Full graph-wide semantic + equality is still tracked separately before default-backend promotion. diff --git a/docs/cli/about.mdx b/docs/cli/about.mdx index f93afd221..719e75f46 100644 --- a/docs/cli/about.mdx +++ b/docs/cli/about.mdx @@ -5,10 +5,12 @@ icon: "square-info" iconType: "solid" --- -The graph_sitter.cli helps you: +The Graph-sitter CLI helps you: +- Parse a local repository into graph summary data - Initialize Graph-sitter in your repository - Create and run codemods +- Run one-shot transformations by import path - Work with AI assistance @@ -18,16 +20,31 @@ The graph_sitter.cli helps you: ## Getting Started -1. **Initialize Codegen** in your repository: +1. **Parse a repository** without project initialization: ```bash -gs init +graph-sitter parse . --format json ``` -2. **Create your first codemod**: +Run the same command from a published package with `uvx`: ```bash -gs create my-codemod --description "What you want to accomplish" +uvx --python 3.13 graph-sitter parse . --format json +``` + +See [uvx workflows](/cli/uvx) for branch-built wheel validation and release +gate details. + +2. **Initialize Graph-sitter** in your repository: + +```bash +graph-sitter init +``` + +3. **Create your first codemod**: + +```bash +graph-sitter create my-codemod --description "What you want to accomplish" ``` The `--description` flag enables AI assistance to help generate your codemod. Be as specific as possible about what you want to achieve. @@ -38,14 +55,26 @@ The `--description` flag enables AI assistance to help generate your codemod. Be Initialize Graph-sitter in your repository. + + Check package, parser dependency, and Rust backend readiness. + + + Run Graph-sitter from a package or wheel in a clean temporary environment. + + + Parse a repository and print graph summary counts. + Create new codemods with optional AI assistance. - Execute codemods with various options. + Execute registered codemods with check and write modes. + + + Run one-shot codemods by Python import path or file path. - Get help from the Graph-sitter AI expert. + Use Graph-sitter docs and project context with an AI assistant. diff --git a/docs/cli/doctor.mdx b/docs/cli/doctor.mdx new file mode 100644 index 000000000..b3336772a --- /dev/null +++ b/docs/cli/doctor.mdx @@ -0,0 +1,72 @@ +--- +title: "Doctor Command" +sidebarTitle: "doctor" +icon: "stethoscope" +iconType: "solid" +--- + +The `doctor` command checks whether Graph-sitter can run in the current Python +environment. It is useful for setup, CI, and agent workflows before parsing a +large repository or running a codemod. + +```bash +graph-sitter doctor +``` + +## Usage + +```bash +graph-sitter doctor [OPTIONS] +``` + +## Options + +- `--backend python|rust`: Choose the backend readiness check. Defaults to + `python`. +- `--language python|typescript`: Choose the language for the optional Rust + parse smoke. Defaults to `python`. +- `--json`: Print machine-readable diagnostics. + +## Python Readiness + +Python readiness checks package metadata, platform information, parser +dependencies, and whether the optional Rust extension is importable. The Rust +extension may be unavailable while Python-backed commands still work. + +```bash +graph-sitter doctor --json +``` + +## Rust Readiness + +Rust readiness also creates a temporary tiny repository and runs a strict Rust +parse smoke. This fails if the `graph_sitter_py` extension is unavailable or if +strict Rust parsing cannot build an index. + +```bash +graph-sitter doctor --backend rust --language python --json +graph-sitter doctor --backend rust --language typescript --json +``` + +Use strict Rust readiness before relying on `--backend rust --fallback error` in +benchmarks, CI, or release validation. + +## With uvx + +For published package workflows, use the same command through `uvx`: + +```bash +uvx --python 3.13 graph-sitter doctor --json +``` + +For branch-built wheel validation, point `uvx` at the wheel artifact: + +```bash +uvx --python 3.13 --from dist/.whl graph-sitter doctor --backend rust --json +``` + + + The Rust backend is still release-gated. Public setup docs should only claim + Rust-backed `uvx graph-sitter` support for artifacts that have passed the + wheel smoke tests. + diff --git a/docs/cli/expert.mdx b/docs/cli/expert.mdx new file mode 100644 index 000000000..4f5fb8a3c --- /dev/null +++ b/docs/cli/expert.mdx @@ -0,0 +1,58 @@ +--- +title: "AI Assistance" +sidebarTitle: "expert" +icon: "robot" +iconType: "solid" +--- + +Use this workflow when you want an AI assistant to help write, review, or refine +Graph-sitter codemods. + + + This branch does not expose a standalone `gs expert` shell command. The + `expert` page describes the AI-assistance workflow around the documented CLI + commands. + + +## Recommended Flow + +1. Initialize Graph-sitter in the target repository. + +```bash +gs init +``` + +2. Scaffold a codemod. + +```bash +gs create rename-function . +``` + +3. Ask your assistant to edit the generated codemod with Graph-sitter APIs such +as `Codebase`, `Function`, `Import`, and `commit()`. + +4. Run the codemod in check mode before applying changes. + +```bash +gs run rename-function . --check +``` + +5. Apply the codemod after reviewing the diff. + +```bash +gs run rename-function . --write +``` + +## What To Give The Assistant + +- The goal of the codemod. +- The target repository path and language. +- Any expected symbol names, import paths, or file patterns. +- Whether the run should use the Python backend or the opt-in Rust backend. +- The generated codemod file under `.codegen/codemods/`. + +## Related Commands + +- [`create`](/cli/create): scaffold a codemod file. +- [`run`](/cli/run): execute a local codemod. +- [`parse`](/cli/about): inspect the codebase graph before editing. diff --git a/docs/cli/parse.mdx b/docs/cli/parse.mdx new file mode 100644 index 000000000..0d27e8e65 --- /dev/null +++ b/docs/cli/parse.mdx @@ -0,0 +1,107 @@ +--- +title: "Parse Command" +sidebarTitle: "parse" +icon: "diagram-project" +iconType: "solid" +--- + +The `parse` command reads a local repository and prints graph summary counts for +files, symbols, imports, exports, references, and dependencies. + +```bash +graph-sitter parse . +``` + +## Usage + +```bash +graph-sitter parse [PATH] [OPTIONS] +``` + +`PATH` defaults to the current directory. The command does not require +`.codegen` initialization or an active session. + +## Options + +- `--backend python|rust|auto`: Choose the graph backend. Defaults to `python`. +- `--fallback python|error`: Choose fallback behavior when the Rust backend is + unavailable. Defaults to `error`. +- `--language auto|python|typescript`: Choose the repository language. Defaults + to `auto`. +- `--format summary|json`: Choose human-readable or machine-readable output. + Defaults to `summary`. +- `--output FILE`: Write JSON output to a file. Requires `--format json`. +- `--subdir PATH`: Limit parsing to a repository-relative subdirectory or file. + Pass this option more than once to include multiple paths. + +## JSON Output + +Use JSON output in scripts, CI, and agent workflows: + +```bash +graph-sitter parse . --language python --backend python --format json +``` + +The JSON payload includes the requested backend, actual backend, language, +elapsed time, selected subdirectories, and graph count fields. It also includes +`schema_version`; the current parse summary schema is version `1`. + +Write JSON to a file when the parse output is large or consumed by another +tool: + +```bash +graph-sitter parse . --language python --format json --output graph-sitter-index.json +``` + +`--output` is JSON-only. Human-readable `summary` output is always printed to +stdout. + +## Subdirectory Parsing + +Use `--subdir` to avoid parsing an entire large repository when the task is +localized: + +```bash +graph-sitter parse . --language python --subdir src --format json +graph-sitter parse . --language typescript --subdir packages/app --subdir packages/ui --format json +``` + +Subdirectory paths are resolved relative to `PATH`. Absolute paths are accepted +only when they are inside the repository. Directory filters are passed into the +same file-discovery path used by the Python and Rust backends. + +## Rust Backend + +Use strict Rust mode for performance validation: + +```bash +graph-sitter parse . --language python --backend rust --fallback error --format json +``` + +Use `doctor` first when validating a new installation: + +```bash +graph-sitter doctor --backend rust --language python --json +``` + + + Rust-backed parsing requires a package artifact that includes the + `graph_sitter_py` extension. Until a release has passed the wheel and + published-package checks, prefer branch-built wheel commands for Rust + validation. + + +## With uvx + +Published package form: + +```bash +uvx --python 3.13 graph-sitter parse . --language python --format json +uvx --python 3.13 graph-sitter parse . --language python --format json --output graph-sitter-index.json +``` + +Branch-built wheel validation form: + +```bash +uvx --python 3.13 --from dist/.whl graph-sitter parse . --language python --backend rust --fallback error --format json +``` diff --git a/docs/cli/run.mdx b/docs/cli/run.mdx index aaa0a7a40..b5ac8bc89 100644 --- a/docs/cli/run.mdx +++ b/docs/cli/run.mdx @@ -5,61 +5,111 @@ icon: "play" iconType: "solid" --- -The `run` command executes a codemod against your local codebase, showing you the changes and applying them to your filesystem. +The `run` command executes a registered codemod from `.codegen/codemods` +against a local repository. ```bash -gs run rename-function +graph-sitter run rename-function . --check ``` ## Usage ```bash -gs run LABEL [OPTIONS] +graph-sitter run LABEL [PATH] [OPTIONS] ``` +`PATH` defaults to the active Graph-sitter session when omitted. Pass `PATH` +explicitly for repeatable `uvx` and CI usage. + ## Arguments - `LABEL`: The name of the codemod to run (e.g., "rename-function") +- `PATH`: Optional repository path to transform ## Options -- `--diff-preview N`: Show a preview of the first N lines of the diff -- `--arguments JSON`: Pass arguments to the codemod as a JSON string (required if the codemod expects arguments) +- `--backend python|rust|auto`: Choose the graph backend. +- `--fallback python|error`: Choose fallback behavior when the Rust backend is + unavailable or unsupported for the requested API. +- `--language auto|python|typescript`: Choose the repository language. +- `--subdir PATH`: Limit parsing to a repository-relative subdirectory or file. + Repeat to include multiple paths. This overrides any subdirectories declared + by the registered codemod for this invocation. +- `--arguments JSON`: Pass arguments to the codemod as a JSON object. Typed + Pydantic argument models are validated when present. +- `--diff-preview N`: Show the first N lines of the produced diff. +- `--check`: Run in a temporary copied repository, print the diff, leave the + target unchanged, and exit non-zero when changes would be produced. +- `--write`: Apply changes to the target repository. ## Examples -Run a codemod: +Preview a registered codemod: + +```bash +graph-sitter run rename-function . --check +``` + +Apply a registered codemod: + ```bash -gs run rename-function +graph-sitter run rename-function . --write ``` -Run with a diff preview limited to 50 lines: +Run with arguments: + +```bash +graph-sitter run rename-function . --arguments '{"old_name":"getUserData","new_name":"fetchUserProfile"}' --check +``` + +Run against only part of a large repository: + ```bash -gs run rename-function --diff-preview 50 +graph-sitter run rename-function ./airflow --subdir airflow/providers --subdir tests/providers --check ``` -Run with arguments (for codemods that require them): +Run against a TypeScript repository: + ```bash -gs run rename-function --arguments '{"old_name": "getUserData", "new_name": "fetchUserProfile"}' +graph-sitter run rename-component ./next.js --language typescript --backend auto --fallback python --check ``` -## Output +## With uvx -The command will: -1. Parse your codebase -2. Run the codemod -3. Show a diff preview (if requested) -4. Apply changes to your filesystem +Published package form after release: + +```bash +uvx --python 3.13 graph-sitter run rename-function . --arguments '{"old_name":"getUserData","new_name":"fetchUserProfile"}' --check +uvx --python 3.13 graph-sitter run rename-function ./airflow --subdir airflow/providers --check +``` + +Branch-built wheel validation form before release: + +```bash +uvx --python 3.13 --from dist/.whl graph-sitter run rename-function . --backend rust --fallback error --check +``` + +## Safety Model + +Use `--check` before `--write` for normal workflows. `--check` copies the target +repository to a temporary Git repo before running the codemod, so codemods that +call `codebase.commit()` internally do not mutate the original checkout. + +For compatibility, `run` may still apply changes when neither `--check` nor +`--write` is supplied. New docs, CI, and agent workflows should always pass an +explicit mode. ## Execution Flow -When you run a codemod: -1. Graph-sitter parses your entire codebase into a graph representation -2. The codemod function is executed against this graph -3. Any changes made by the codemod are tracked -4. Changes are automatically applied to your local files -5. A summary of changes is displayed +When you run a codemod, Graph-sitter: + +1. Parses the selected repository into a code graph. +2. Resolves the registered codemod by `LABEL`. +3. Validates and passes `--arguments` when required. +4. Executes the codemod against the graph. +5. Prints a diff preview and either reports or applies the changes. -The codebase parsing step may take a few moments for larger codebases. Learn more in [How it Works](/introduction/how-it-works.mdx) + For one-shot transformations that are not registered under + `.codegen/codemods`, use [`transform`](/cli/transform). diff --git a/docs/cli/transform.mdx b/docs/cli/transform.mdx new file mode 100644 index 000000000..853a3f6a1 --- /dev/null +++ b/docs/cli/transform.mdx @@ -0,0 +1,106 @@ +--- +title: "Transform Command" +sidebarTitle: "transform" +icon: "wand-magic-sparkles" +iconType: "solid" +--- + +The `transform` command runs an ad hoc Python transform by import path or file +path. Use it when you want a one-shot codemod without registering a function in +`.codegen/codemods`. + +```bash +graph-sitter transform ./codemods/rename.py:rename . --check +``` + +## Usage + +```bash +graph-sitter transform MODULE:OBJECT [PATH] (--check | --write) [OPTIONS] +``` + +`PATH` defaults to the current directory. + +## Arguments + +- `MODULE:OBJECT`: Import path or Python file path plus the callable object to + run. Examples: `my_package.rename:run`, `./codemods/rename.py:run`, + `./codemods/rename.py:MyCodemod`. +- `PATH`: Optional repository path to transform. + +`OBJECT` may be a plain function, a `codemods.codemod.Codemod` subclass, a +`Codemod` instance, or an object exposing `execute(codebase)`. + +## Options + +- `--backend python|rust|auto`: Choose the graph backend. +- `--fallback python|error`: Choose fallback behavior when the Rust backend is + unavailable or unsupported for the requested API. +- `--language auto|python|typescript`: Choose the repository language. +- `--arguments JSON`: Pass arguments as a JSON object. If the transform accepts + a typed argument model, Graph-sitter validates it before execution. +- `--subdir PATH`: Limit parsing to a repository-relative subdirectory or file. + Pass this option more than once to include multiple paths. +- `--diff-preview N`: Show the first N lines of the produced diff. +- `--check`: Run in a temporary copied repository, print the diff, leave the + target unchanged, and exit non-zero when changes would be produced. +- `--write`: Apply changes to the target repository. + +`--check` and `--write` are mutually exclusive, and one of them is required. + +## Examples + +Preview a Python transform: + +```bash +graph-sitter transform ./codemods/rename.py:rename ./service --language python --arguments '{"new_name":"renamed"}' --check +``` + +Apply the same transform: + +```bash +graph-sitter transform ./codemods/rename.py:rename ./service --language python --arguments '{"new_name":"renamed"}' --write +``` + +Preview a TypeScript transform: + +```bash +graph-sitter transform ./codemods/rename_component.py:rename ./next.js --language typescript --backend auto --fallback python --arguments '{"new_name":"RenamedCard"}' --check +``` + +Limit a transform to one package in a large repository: + +```bash +graph-sitter transform ./codemods/rename.py:rename ./monorepo --subdir packages/app --arguments '{"new_name":"renamed"}' --check +``` + +Validate strict Rust behavior from a branch-built wheel: + +```bash +graph-sitter transform ./codemods/rename.py:rename ./service --language python --backend rust --fallback error --check +``` + +## With uvx + +Published package form after release: + +```bash +uvx --python 3.13 graph-sitter transform ./codemods/rename.py:rename . --arguments '{"new_name":"renamed"}' --check +uvx --python 3.13 graph-sitter transform ./codemods/rename.py:rename . --subdir src --arguments '{"new_name":"renamed"}' --check +``` + +Branch-built wheel validation form before release: + +```bash +uvx --python 3.13 --from dist/.whl graph-sitter transform ./codemods/rename.py:rename . --backend rust --fallback error --check +``` + +## Safety Model + +Use `--check` before `--write` unless you intentionally want to mutate the +target checkout immediately. `--check` copies the target repository to a +temporary Git repo before running the transform, so transforms that call +`codebase.commit()` internally do not mutate the original checkout. + +For repository-owned codemods that already live under `.codegen/codemods`, use +[`run`](/cli/run). diff --git a/docs/cli/uvx.mdx b/docs/cli/uvx.mdx new file mode 100644 index 000000000..a9e1a0e01 --- /dev/null +++ b/docs/cli/uvx.mdx @@ -0,0 +1,143 @@ +--- +title: "uvx Workflows" +sidebarTitle: "uvx" +icon: "terminal" +iconType: "solid" +--- + +Use `uvx` when you want to run Graph-sitter from a package or wheel in a clean +temporary environment. + +```bash +uvx --python 3.13 graph-sitter parse . +uvx --python 3.13 graph-sitter transform ./codemods/rename.py:rename . --check +uvx --python 3.13 graph-sitter run rename-function . --check +``` + + + Published-package commands use the package resolved by `uvx`. Before a + published Graph-sitter release is validated, use branch-built wheel commands + with `--from dist/.whl` for Rust backend proof. + + +## Workflows + +Local source checkout: + +```bash +uv run graph-sitter doctor --json +uv run graph-sitter parse . --language python --backend python --format summary +``` + +Published package workflow after release validation: + +```bash +uvx --python 3.13 graph-sitter doctor --json +uvx --python 3.13 graph-sitter parse . --language auto --backend auto --fallback python --format json +``` + +Branch-built wheel validation before release: + +```bash +uvx --python 3.13 --from dist/.whl graph-sitter doctor --backend rust --language python --json +uvx --python 3.13 --from dist/.whl graph-sitter parse . --language python --backend rust --fallback error --format json +uvx --python 3.13 --from dist/.whl graph-sitter parse . --language typescript --backend rust --fallback error --format json +``` + +## Backend Modes + +Use strict Rust mode for release validation, benchmarks, and CI gates where +fallback would hide an unsupported Rust-backed API: + +```bash +uvx --python 3.13 --from dist/.whl graph-sitter parse . --backend rust --fallback error --format json +``` + +Use automatic backend selection with Python fallback when you want a working +command even if the Rust extension is unavailable or a requested API is not yet +Rust-backed: + +```bash +uvx --python 3.13 graph-sitter parse . --backend auto --fallback python --format json +``` + +The JSON output reports both the requested backend and the actual backend used. +When fallback happens, inspect that disclosure before treating a result as a +Rust performance or compatibility proof. + +## Parse + +`parse` is read-only and does not require `.codegen` initialization. + +```bash +uvx --python 3.13 graph-sitter parse . --language python --format json +uvx --python 3.13 graph-sitter parse . --language typescript --format json +``` + +For large repositories, scope the parse to the files that matter: + +```bash +uvx --python 3.13 graph-sitter parse ./monorepo --language typescript --subdir packages/app --subdir packages/ui --format json +``` + +Write JSON to a file when another tool or agent consumes the graph summary: + +```bash +uvx --python 3.13 graph-sitter parse . --format json --output graph-sitter-index.json +``` + +## Transform + +`transform` runs a one-shot Python transform by file path or import path. It +does not require the transform to be registered under `.codegen/codemods`. + +```bash +uvx --python 3.13 graph-sitter transform ./codemods/rename.py:rename . --arguments '{"new_name":"renamed"}' --check +uvx --python 3.13 graph-sitter transform ./codemods/rename.py:rename . --arguments '{"new_name":"renamed"}' --write +``` + +For strict branch-wheel validation: + +```bash +uvx --python 3.13 --from dist/.whl graph-sitter transform ./codemods/rename.py:rename . --language python --backend rust --fallback error --check +``` + +## Run + +`run` executes a registered codemod from the target repository's +`.codegen/codemods` directory. + +```bash +uvx --python 3.13 graph-sitter run rename-function . --arguments '{"old_name":"old","new_name":"new"}' --check +uvx --python 3.13 graph-sitter run rename-function . --arguments '{"old_name":"old","new_name":"new"}' --write +``` + +For large repositories, pass `--subdir` to avoid parsing unrelated packages: + +```bash +uvx --python 3.13 graph-sitter run rename-function ./airflow --subdir airflow/providers --subdir tests/providers --check +``` + +## Safety + +Use `--check` first, inspect the diff, then rerun with `--write`. `--check` +copies the target repository to a temporary Git repo before executing the +codemod, so the original checkout is left unchanged. + +`parse` never mutates files. `transform` and `run` require either `--check` or +`--write` in documented workflows; prefer the explicit mode even when +compatibility behavior accepts an omitted mode. + +## Release Gate + +Branch-built wheel validation proves the current branch can package and run the +Rust extension. It is not the same as a PyPI-backed public release. + +Before making `uvx graph-sitter ... --backend rust` the primary public setup +path, validate the uploaded package artifact with `doctor`, `parse`, +`transform`, and `run` from a clean environment and record the transcript in the +release notes or setup docs. + +See [large-repo benchmarks](/benchmarks/large-repos) for the current Airflow and +Next.js proof, and [correctness and parity](/correctness/parity) for the tested +semantic scope and known gaps. diff --git a/docs/correctness/parity.mdx b/docs/correctness/parity.mdx new file mode 100644 index 000000000..a4006682f --- /dev/null +++ b/docs/correctness/parity.mdx @@ -0,0 +1,111 @@ +--- +title: "Correctness and Parity" +sidebarTitle: "Parity" +icon: "scale-balanced" +iconType: "solid" +--- + +Graph-sitter's Rust backend is tested against the existing Python backend, but +parity is not the same thing as universal semantic correctness. The current +release posture is conservative: use strict Rust mode for proof, use Python +fallback when you need compatibility, and keep the Python backend as the default +until broader parity gates pass. + +## Current Status + +| Area | Status | Evidence | +| --- | --- | --- | +| Codebase construction and graph-free public queries | Parity covered for the supported subset | Unit tests, fixture parity, pinned Airflow and Next.js checks | +| File and source-file read APIs | Parity covered for the supported subset | Targeted lookup tests and pinned repo probes | +| Symbol, import, export, usage, and dependency wrappers | Parity covered for the supported subset | Fixture-wide graph rows plus selected large-repo semantic checks | +| Codemod transaction compatibility | Parity covered for selected Python and TypeScript edit flows | Python-vs-Rust fixture output-byte parity and pinned codemod proofs | +| Fallback and unsupported APIs | Covered | Strict mode raises explicit unsupported errors; fallback mode can promote to Python | +| Directory traversal and recursive symbol APIs | Parity covered for the current scope | Source and all-file directory tests | +| Full TypeScript expression, namespace, and type-system surface | Open gap | Broader mutable expression objects and full type-system parity remain open | +| Full graph-wide large-repo semantic equality | Open gap | Pinned repos have selected semantic parity and golden snapshots, not full graph-wide equality | + +## What Is Compared + +The fast parity fixture exact-compares Python and Rust behavior for: + +- files, symbols, imports, exports, and import targets +- symbol dependencies and symbol usages +- import usages and imported-export chains +- Python and TypeScript mutation output bytes +- graph-free execution, where the Rust path keeps the old Python graph blocked + +The pinned large-repo semantic checks compare selected high-value workflows on +Airflow `2.10.5` and Next.js `v15.0.0`. They cover known files, global and +file-local lookups, import resolution, name resolution, dependency lookups, and +selected TypeScript usage/dependency flows. + +## Known Deltas + +The selected Next.js semantic proof currently has zero known deltas. + +The selected Airflow proof has one checked delta: Rust resolves +`airflow.models.DagModel` through `airflow/models/__init__.py`, while the +current Python backend returns `None` for that probe. The checker fails unless +that delta is exactly the expected value, so it is tracked as a specific Rust +enhancement rather than a loose tolerated mismatch. + +## Safety Modes + +Use strict Rust mode when a test, benchmark, or release gate should fail instead +of silently falling back: + +```bash +graph-sitter parse . --backend rust --fallback error --format json +``` + +Use automatic mode with Python fallback when compatibility matters more than +proving the Rust path: + +```bash +graph-sitter parse . --backend auto --fallback python --format json +``` + +The JSON CLI output reports the requested backend and actual backend. Inspect +that field before treating a result as Rust-backed evidence. + +## Release Gates + +Fast gate: + +```bash +rust-rewrite/tools/check_fast.sh +``` + +Pinned large-repo gate: + +```bash +rust-rewrite/tools/check_pinned_large_repos.sh +``` + +P0 parity manifest: + +```bash +uv run python rust-rewrite/tools/check_p0_parity_coverage.py +``` + +The stricter pre-default gate is: + +```bash +uv run python rust-rewrite/tools/check_p0_parity_coverage.py --require-complete +``` + +That stricter gate is intentionally expected to stay closed while the TypeScript +expression/type-system surface and full graph-wide large-repo equality remain +open. + +## Before Defaulting To Rust + +Rust should remain opt-in until: + +- full Python unit coverage stays green with the Python backend +- supported Rust-backed APIs pass the fast and pinned large-repo gates +- P0 parity coverage has no open gaps, or the remaining gaps are explicitly + removed from the default-backend contract +- published-package `uvx graph-sitter ...` artifacts pass clean-environment + parse, run, and transform smokes +- docs and skill instructions match the final command surface diff --git a/docs/introduction/installation.mdx b/docs/introduction/installation.mdx index 635df2163..04284eca8 100644 --- a/docs/introduction/installation.mdx +++ b/docs/introduction/installation.mdx @@ -33,6 +33,35 @@ uv tool install graph-sitter --python 3.13 This makes the `graph-sitter` command available globally in your terminal, while keeping its dependencies isolated. +Verify the installation before parsing a large repository: + +```bash +graph-sitter doctor --json +graph-sitter parse . --language python --backend python --format summary +``` + +Use `--language typescript` for TypeScript, JavaScript, and React repositories. + +## One-Shot uvx Usage + +Use `uvx` when you want to run Graph-sitter without installing a global tool: + +```bash +uvx --python 3.13 graph-sitter doctor --json +uvx --python 3.13 graph-sitter parse . --language auto --backend auto --fallback python --format json +``` + +For branch-built Rust wheel validation before a public release, point `uvx` at +the wheel artifact: + +```bash +uvx --python 3.13 --from dist/.whl graph-sitter doctor --backend rust --language python --json +uvx --python 3.13 --from dist/.whl graph-sitter parse . --language python --backend rust --fallback error --format json +``` + +See [uvx workflows](/cli/uvx) for parse, run, transform, `--subdir`, and +release-gate examples. + ## Quick Start Let's walk through a minimal example of using Graph-sitter in a project: @@ -42,9 +71,19 @@ Let's walk through a minimal example of using Graph-sitter in a project: cd path/to/your/project ``` -2. Initialize Graph-sitter in your project with [gs init](/cli/init): +2. Parse the repository without initialization: ```bash - gs init + graph-sitter parse . --language python --backend python --format summary + ``` + + For TypeScript, JavaScript, and React repositories: + ```bash + graph-sitter parse . --language typescript --backend auto --fallback python --format summary + ``` + +3. Initialize Graph-sitter in your project with [graph-sitter init](/cli/init): + ```bash + graph-sitter init ``` This creates a `.codegen/` directory with: @@ -57,27 +96,73 @@ Let's walk through a minimal example of using Graph-sitter in a project: └── codegen-system-prompt.txt # AI system prompt ``` -3. Create your first codemod with [gs create](/cli/create): +4. Create your first codemod with [graph-sitter create](/cli/create): ```bash - gs create organize-imports \ + graph-sitter create organize-imports \ -d "Sort and organize imports according to PEP8" ``` - The `-d` flag in `gs create` generates an AI-powered implementation. This requires a Github account registered on [codegen.sh](https://codegen.sh) + The `-d` flag in `graph-sitter create` generates an AI-powered implementation. This requires a Github account registered on [codegen.sh](https://codegen.sh) -4. Run your codemod with [gs run](/cli/run): +5. Preview your codemod with [graph-sitter run](/cli/run): + ```bash + graph-sitter run organize-imports . --check + ``` + +6. Apply the codemod after reviewing the diff: ```bash - gs run organize-imports + graph-sitter run organize-imports . --write ``` -5. Reset any filesystem changes (excluding `.codegen/*`) with [gs reset](/cli/reset): +7. Reset any filesystem changes (excluding `.codegen/*`) with [graph-sitter reset](/cli/reset): ```bash - gs reset + graph-sitter reset ``` +## Rust Backend + +Python remains the authoring shell. The Rust backend is an opt-in compact +parse/index backend for supported graph and codemod surfaces. + +Use strict Rust mode when unsupported behavior should fail loudly: + +```bash +graph-sitter parse . --language python --backend rust --fallback error --format json +``` + +Use automatic mode with Python fallback when a working result is more important +than proving the Rust path: + +```bash +graph-sitter parse . --language auto --backend auto --fallback python --format json +``` + +Python API users can select the same backend behavior through +`CodebaseConfig`: + +```python +from graph_sitter.configs.models.codebase import ( + CodebaseConfig, + GraphBackend, + RustFallbackMode, +) +from graph_sitter.core.codebase import Codebase + +codebase = Codebase( + "./", + config=CodebaseConfig( + graph_backend=GraphBackend.RUST, + rust_fallback=RustFallbackMode.ERROR, + ), +) +``` + +Run [`doctor`](/cli/doctor) before relying on strict Rust mode in CI, +benchmarks, or release validation. + ## Troubleshooting Having issues? Here are some common problems and their solutions: diff --git a/docs/introduction/overview.mdx b/docs/introduction/overview.mdx index 02174af04..90a5dbd32 100644 --- a/docs/introduction/overview.mdx +++ b/docs/introduction/overview.mdx @@ -5,7 +5,7 @@ icon: "robot" iconType: "solid" --- -[Graph-sitter](https://github.com/codegen-sh/graph-sitter) is a python library for manipulating codebases. +[Graph-sitter](https://github.com/codegen-sh/graph-sitter) is a Python library for manipulating codebases. It provides a scriptable interface to a powerful, multi-lingual language server built on top of [Tree-sitter](https://tree-sitter.github.io/tree-sitter/). @@ -20,7 +20,7 @@ codebase = Codebase("./") for function in codebase.functions: # Comprehensive static analysis for references, dependencies, etc. if not function.usages: - # Auto-handles references and imports to maintain correctness + # Updates references and imports through graph-aware edit APIs function.remove() # Fast, in-memory code index @@ -29,12 +29,14 @@ codebase.commit() -Graph-sitter handles complex refactors while maintaining correctness, enabling a broad set of advanced code manipulation programs. +Graph-sitter is designed for graph-aware refactors and codebase analysis. See +[correctness and parity](/correctness/parity) for the current tested scope and +known limits. Graph-sitter works with both Python and Typescript/JSX codebases. Learn more about language support [here](/building-with-graph-sitter/language-support). -## Quick Started +## Quick Start Graph-sitter requires Python 3.12 - 3.13 (recommended: Python 3.13+). @@ -139,7 +141,7 @@ Graph-sitter was engineered backwards from real-world refactors we performed for - **Natural Mental Model**: Express transformations through high-level operations that match how you reason about code changes, not low-level text or AST manipulation. - **Clean Business Logic**: Let the engine handle the complexities of imports, references, and cross-file dependencies. -- **Scale with Confidence**: Make sweeping changes across large codebases consistently across Python, TypeScript, JavaScript, and React. +- **Scale with Evidence**: Make sweeping changes across large codebases using tested Python, TypeScript, JavaScript, and React workflows. See the [large-repo benchmarks](/benchmarks/large-repos) for current Airflow and Next.js proof. As AI becomes increasingly sophisticated, we're seeing a fascinating shift: AI agents aren't bottlenecked by their ability to understand code or generate solutions. Instead, they're limited by their ability to efficiently manipulate codebases. The challenge isn't the "brain" - it's the "hands." diff --git a/docs/mint.json b/docs/mint.json index c98102340..049bc0f14 100644 --- a/docs/mint.json +++ b/docs/mint.json @@ -1,6 +1,6 @@ { "$schema": "https://mintlify.com/schema.json", - "name": "Codegen", + "name": "Graph-sitter", "logo": { "dark": "https://cdn.prod.website-files.com/67070304751b9b01bf6a161c/679bcf45a3e32761c42b324b_Codegen_Logomark_Dark.svg", "light": "https://cdn.prod.website-files.com/67070304751b9b01bf6a161c/679bcf45bf55446746125835_Codegen_Logomark_Light.svg" @@ -9,9 +9,9 @@ "default": "dark" }, "metadata": { - "og:site_name": "Codegen", - "og:title": "Codegen - Manipulate Code at Scale", - "og:description": "A scriptable interface to a powerful, multi-lingual language server built on top of Tree-sitter.", + "og:site_name": "Graph-sitter", + "og:title": "Graph-sitter - Codebase graphs for codemods", + "og:description": "A Python library for graphing files, symbols, imports, calls, and usages so codemods can make targeted edits.", "og:url": "https://graph-sitter.com", "og:locale": "en_US", "og:logo": "https://i.imgur.com/f4OVOqI.png", @@ -20,12 +20,12 @@ }, "favicon": "/favicon.svg", "colors": { - "primary": "#a277ff", - "light": "#a277ff", - "dark": "#a277ff", + "primary": "#0f8b7b", + "light": "#0f8b7b", + "dark": "#60d7c8", "anchors": { - "from": "#61ffca", - "to": "#61ffca" + "from": "#60d7c8", + "to": "#60d7c8" } }, "theme": "prism", @@ -109,7 +109,7 @@ ] }, { - "group": "Building with Codegen", + "group": "Building with Graph-sitter", "pages": [ "building-with-graph-sitter/at-a-glance", "building-with-graph-sitter/parsing-codebases", @@ -149,14 +149,30 @@ "group": "CLI", "pages": [ "cli/about", + "cli/doctor", + "cli/uvx", + "cli/parse", "cli/init", "cli/notebook", "cli/create", "cli/run", + "cli/transform", "cli/reset", "cli/expert" ] }, + { + "group": "Benchmarks", + "pages": [ + "benchmarks/large-repos" + ] + }, + { + "group": "Correctness", + "pages": [ + "correctness/parity" + ] + }, { "group": "Changelog", "pages": [ @@ -377,4 +393,4 @@ "x": "https://x.com/codegen", "linkedin": "https://linkedin.com/company/codegen-dot-com" } -} \ No newline at end of file +} diff --git a/docs/samples/sample.tsx b/docs/samples/sample.tsx index 0d72ed725..4686ab851 100644 --- a/docs/samples/sample.tsx +++ b/docs/samples/sample.tsx @@ -1,5 +1,19 @@ -import React, { useState, useEffect } from "react"; -import styled from "styled-components"; +declare namespace React { + type FC = () => JSX.Element; +} + +declare namespace JSX { + interface Element {} + interface IntrinsicElements { + [elementName: string]: unknown; + } +} + +declare function useState(initialValue: T): [T, (value: T) => void]; +declare const styled: { + div: (strings: TemplateStringsArray, ...values: unknown[]) => React.FC; + h1: (strings: TemplateStringsArray, ...values: unknown[]) => React.FC; +}; interface Kevin { id: number; diff --git a/hatch.toml b/hatch.toml index ec5043547..5579f936b 100644 --- a/hatch.toml +++ b/hatch.toml @@ -85,7 +85,7 @@ exclude = [ macos-max-compat = false [build] -packages = ["src/graph_sitter"] +packages = ["src/graph_sitter", "src/codemods"] [metadata.hooks.vcs.urls] Homepage = "https://www.codegen.com/" @@ -98,8 +98,10 @@ Documentation = "https://graph-sitter.com" Playground = "https://www.codegen.sh/" [build.targets.wheel.hooks.custom] -enable-by-default = false +enable-by-default = true path = "src/gsbuild/build.py" require-runtime-dependencies = true +rust-extension = true +rust-profile = "release" [envs.default] installer = "uv" diff --git a/pyproject.toml b/pyproject.toml index f2c0e652f..8532723d2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,10 +14,10 @@ dependencies = [ "dicttoxml<2.0.0,>=1.7.16", "xmltodict<1.0.0,>=0.13.0", "astor<1.0.0,>=0.8.1", - "tree-sitter>=0.23.1", - "tree-sitter-python>=0.23.4", - "tree-sitter-typescript>=0.23.2", - "tree-sitter-javascript>=0.23.1", + "tree-sitter>=0.23.1,<0.25", + "tree-sitter-python>=0.23.4,<0.24", + "tree-sitter-typescript>=0.23.2,<0.24", + "tree-sitter-javascript>=0.23.1,<0.24", "python-levenshtein<1.0.0,>=0.25.1", "networkx>=3.4.1", "wrapt<2.0.0,>=1.16.0", @@ -31,7 +31,7 @@ dependencies = [ "humanize<5.0.0,>=4.10.0", "pytest-snapshot>=0.9.0", "pyjson5==1.6.9", - "mini-racer>=0.12.4", + "mini-racer==0.12.4", "rustworkx>=0.15.1", "typing-extensions>=4.12.2", "termcolor>=2.4.0", @@ -110,6 +110,7 @@ keywords = [ ] [project.scripts] gs = "graph_sitter.cli.cli:main" +graph-sitter = "graph_sitter.cli.cli:main" [project.optional-dependencies] pink = ["codegen-sdk-pink>=0.1.0"] diff --git a/rust-rewrite/api-inventory.md b/rust-rewrite/api-inventory.md new file mode 100644 index 000000000..f6da0ecb3 --- /dev/null +++ b/rust-rewrite/api-inventory.md @@ -0,0 +1,274 @@ +# Rust Rewrite API Inventory + +Inventory date: 2026-06-18 + +Scope: Python-facing public APIs that the Rust backend must preserve for `Codebase`, `File`/`SourceFile`, `Symbol`, `Import`, `Export`, and `Directory`. This inventory prioritizes APIs referenced by API docs, unit tests, and codemod examples/workflows. Source references point to the current Python implementation. + +Priority meanings: + +- P0: First Rust backend slice must preserve behavior and return shapes. It may still return Python compatibility handles, but query results, ordering, exceptions, and basic resolution semantics must match. +- P1: Public and used enough to preserve, but can initially fall back to the Python backend or existing transaction manager. Most edit/search/AST-manipulation APIs are here. +- P2: Preserve as explicit fallback, compatibility shim, or documented unsupported behavior for the Rust backend. These are Git/GitHub, AI, visualization, diagnostics, or low-level/internal APIs. + +## P0 Compatibility Surface + +### Codebase + +Source references: `src/graph_sitter/core/codebase.py:259`, `src/graph_sitter/core/codebase.py:286`, `src/graph_sitter/core/codebase.py:338`, `src/graph_sitter/core/codebase.py:351`, `src/graph_sitter/core/codebase.py:366`, `src/graph_sitter/core/codebase.py:399`, `src/graph_sitter/core/codebase.py:409`, `src/graph_sitter/core/codebase.py:421`, `src/graph_sitter/core/codebase.py:432`, `src/graph_sitter/core/codebase.py:443`, `src/graph_sitter/core/codebase.py:455`, `src/graph_sitter/core/codebase.py:529`, `src/graph_sitter/core/codebase.py:551`, `src/graph_sitter/core/codebase.py:596`, `src/graph_sitter/core/codebase.py:609`, `src/graph_sitter/core/codebase.py:631`, `src/graph_sitter/core/codebase.py:644`, `src/graph_sitter/core/codebase.py:671`, `src/graph_sitter/core/codebase.py:687`, `src/graph_sitter/core/codebase.py:711`, `src/graph_sitter/core/codebase.py:803`, `src/graph_sitter/core/codebase.py:846`, `src/graph_sitter/core/codebase.py:1331`, `src/graph_sitter/core/codebase.py:1405`, `src/graph_sitter/core/codebase.py:1452`. + +Docs/tests/codemods evidence: `docs/api-reference/core/Codebase.mdx`, `tests/unit/sdk/core/test_codebase.py`, `tests/unit/sdk/python/codebase/test_codebase.py`, `src/codemods/**`, `docs/tutorials/**`. + +- Construction and metadata: + - `Codebase(...)` constructor surface and config behavior. + - `Codebase.from_files(...)` and `Codebase.from_string(...)` for fixture/test construction. + - `Codebase.from_repo(...)` should keep its Python checkout/setup behavior; the Rust engine can start after the repo path and config are resolved. + - `codebase.name` and `codebase.language`. +- File and directory queries: + - `codebase.files(...)`, including `extensions=None`, `extensions="*"`, `extensions=[...]`, source-file-only default behavior, and alphabetical sorting. + - `codebase.has_file(filepath, ignore_case=False)` and `codebase.get_file(filepath, optional=False, ignore_case=False)`. + - `codebase.directories`, `codebase.has_directory(dir_path, ignore_case=False)`, and `codebase.get_directory(dir_path, optional=False, ignore_case=False)`. +- Graph-level node queries: + - `codebase.imports`. + - `codebase.exports` for TypeScript, including `NotImplementedError` on Python codebases. + - `codebase.symbols`, `codebase.classes`, `codebase.functions`, `codebase.global_vars`, `codebase.interfaces`, `codebase.types`. + - `codebase.has_symbol(name)`, `codebase.get_symbol(name, optional=False)`, `codebase.get_symbols(name)`, `codebase.get_class(name, optional=False)`, `codebase.get_function(name, optional=False)`. + - Ambiguity and missing-result errors for `get_symbol`, `get_class`, and `get_function`. +- Transaction compatibility: + - `codebase.commit(...)` and `codebase.reset(...)` must remain callable for codemod workflows. The first Rust slice should delegate to the existing Python transaction manager rather than porting edit application. + +### File and SourceFile + +Source references: `src/graph_sitter/core/file.py:50`, `src/graph_sitter/core/file.py:121`, `src/graph_sitter/core/file.py:131`, `src/graph_sitter/core/file.py:168`, `src/graph_sitter/core/file.py:180`, `src/graph_sitter/core/file.py:191`, `src/graph_sitter/core/file.py:253`, `src/graph_sitter/core/file.py:411`, `src/graph_sitter/core/file.py:613`, `src/graph_sitter/core/file.py:633`, `src/graph_sitter/core/file.py:647`, `src/graph_sitter/core/file.py:669`, `src/graph_sitter/core/file.py:681`, `src/graph_sitter/core/file.py:696`, `src/graph_sitter/core/file.py:708`, `src/graph_sitter/core/file.py:734`, `src/graph_sitter/core/file.py:752`, `src/graph_sitter/core/file.py:773`, `src/graph_sitter/core/file.py:785`, `src/graph_sitter/core/file.py:797`, `src/graph_sitter/core/file.py:810`, `src/graph_sitter/core/file.py:826`, `src/graph_sitter/core/file.py:839`, `src/graph_sitter/core/file.py:921`, `src/graph_sitter/core/file.py:1174`, `src/graph_sitter/python/file.py:38`, `src/graph_sitter/python/file.py:85`, `src/graph_sitter/typescript/file.py:47`, `src/graph_sitter/typescript/file.py:61`, `src/graph_sitter/typescript/file.py:79`, `src/graph_sitter/typescript/file.py:91`, `src/graph_sitter/typescript/file.py:107`, `src/graph_sitter/typescript/file.py:121`, `src/graph_sitter/typescript/file.py:136`, `src/graph_sitter/typescript/file.py:148`, `src/graph_sitter/typescript/file.py:160`, `src/graph_sitter/typescript/file.py:174`, `src/graph_sitter/typescript/file.py:426`. + +Docs/tests/codemods evidence: `docs/api-reference/core/File.mdx`, `docs/api-reference/core/SourceFile.mdx`, `docs/api-reference/python/PyFile.mdx`, `docs/api-reference/typescript/TSFile.mdx`, `tests/unit/sdk/python/file/test_file_properties.py`, `tests/unit/sdk/typescript/file/test_file_import_statemets.py`, `tests/unit/sdk/typescript/export/test_export_resolve_export.py`. + +- File identity and content: + - `file.name`, `file.file_path`, `file.filepath`, `file.path`. + - `file.content`, `file.content_bytes`, `file.source`. + - `file.directory`, `file.extension`, `file.is_binary`. + - `File.get_extensions()`, `PyFile.get_extensions()`, `TSFile.get_extensions()`. + - Class constructors used in tests: `File.from_content(...)`, language-specific `from_content(...)`, and `create_from_filepath(...)`. +- Source-file graph queries: + - `file.imports`, `file.import_statements`, `file.inbound_imports`, `file.importers`. + - `file.has_import(name_or_source)` and `file.get_import(name_or_source, optional=False)`. + - `file.symbols(...)`, including nested filtering behavior. + - `file.symbols_sorted_topologically`. + - `file.get_symbol(name, optional=False)`. + - `file.global_vars`, `file.get_global_var(name, optional=False)`. + - `file.classes`, `file.get_class(name, optional=False)`. + - `file.functions`, `file.get_function(name, optional=False)`. + - `file.find_by_byte_range(...)`. +- TypeScript-specific source-file queries: + - `file.exports`, `file.export_statements`, `file.default_exports`, `file.named_exports`, `file.get_export(name, optional=False)`. + - `file.interfaces`, `file.get_interface(name, optional=False)`. + - `file.types`, `file.get_type(name, optional=False)`. + - `file.get_namespace(name, optional=False)`. + - `file.promise_chains` should return compact read-only Promise-chain handles first; full async conversion/mutable expression behavior can route to Python or remain explicitly unsupported until the expression index lands. +- Import string helpers: + - `file.import_module_name(...)`. + - `PyFile.get_import_string(...)`. + - `TSFile.get_import_string(...)`. + +### Symbol and Inherited Editable/Usable APIs + +Source references: `src/graph_sitter/core/symbol.py:41`, `src/graph_sitter/core/symbol.py:96`, `src/graph_sitter/core/symbol.py:141`, `src/graph_sitter/core/symbol.py:435`, `src/graph_sitter/core/interfaces/has_name.py:17`, `src/graph_sitter/core/interfaces/has_name.py:29`, `src/graph_sitter/core/interfaces/usable.py:25`, `src/graph_sitter/core/interfaces/usable.py:44`, `src/graph_sitter/core/interfaces/importable.py:44`, `src/graph_sitter/core/interfaces/editable.py:236`, `src/graph_sitter/core/interfaces/editable.py:372`, `src/graph_sitter/core/interfaces/editable.py:383`, `src/graph_sitter/core/interfaces/editable.py:1048`, `src/graph_sitter/python/symbol.py:33`, `src/graph_sitter/python/symbol.py:45`, `src/graph_sitter/typescript/symbol.py:35`, `src/graph_sitter/typescript/symbol.py:130`, `src/graph_sitter/typescript/symbol.py:407`. + +Docs/tests/codemods evidence: `docs/api-reference/core/Symbol.mdx`, `docs/api-reference/core/Editable.mdx`, `docs/api-reference/core/Usable.mdx`, `docs/api-reference/core/HasName.mdx`, `docs/api-reference/python/PySymbol.mdx`, `docs/api-reference/typescript/TSSymbol.mdx`, codemods under `src/codemods/`. + +- Symbol identity and source: + - `symbol.name`, `symbol.full_name`, `symbol.symbol_type`. + - `symbol.file`, `symbol.filepath`, `symbol.source`, `symbol.extended_source`, `symbol.extended_nodes`. + - Python `symbol.is_exported`. + - TypeScript export-facing metadata such as `symbol.export`, `symbol.exported_name`, `symbol.has_semicolon`, and `symbol.semicolon_node` where used by TS export/edit helpers. +- Graph relationships: + - `symbol.dependencies`. + - `symbol.usages` and `symbol.symbol_usages`. + - `symbol.descendant_symbols`. + - `symbol.function_calls`. +- Name/source helpers that must still work on compatibility handles: + - `symbol.get_name()`. + - `symbol.get_import_string(...)` for Python and TypeScript language subclasses. + +### Import + +Source references: `src/graph_sitter/core/import_resolution.py:60`, `src/graph_sitter/core/import_resolution.py:165`, `src/graph_sitter/core/import_resolution.py:184`, `src/graph_sitter/core/import_resolution.py:202`, `src/graph_sitter/core/import_resolution.py:213`, `src/graph_sitter/core/import_resolution.py:224`, `src/graph_sitter/core/import_resolution.py:237`, `src/graph_sitter/core/import_resolution.py:252`, `src/graph_sitter/core/import_resolution.py:278`, `src/graph_sitter/core/import_resolution.py:291`, `src/graph_sitter/core/import_resolution.py:356`, `src/graph_sitter/core/import_resolution.py:379`, `src/graph_sitter/core/import_resolution.py:392`, `src/graph_sitter/core/import_resolution.py:526`, `src/graph_sitter/core/import_resolution.py:545`, `src/graph_sitter/python/import_resolution.py:33`, `src/graph_sitter/python/import_resolution.py:44`, `src/graph_sitter/python/import_resolution.py:63`, `src/graph_sitter/python/import_resolution.py:87`, `src/graph_sitter/python/import_resolution.py:331`, `src/graph_sitter/typescript/import_resolution.py:35`, `src/graph_sitter/typescript/import_resolution.py:58`, `src/graph_sitter/typescript/import_resolution.py:78`, `src/graph_sitter/typescript/import_resolution.py:93`, `src/graph_sitter/typescript/import_resolution.py:110`, `src/graph_sitter/typescript/import_resolution.py:137`, `src/graph_sitter/typescript/import_resolution.py:200`, `src/graph_sitter/typescript/import_resolution.py:548`, `src/graph_sitter/typescript/import_resolution.py:582`, `src/graph_sitter/typescript/import_resolution.py:603`. + +Docs/tests/codemods evidence: `docs/api-reference/core/Import.mdx`, `docs/api-reference/python/PyImport.mdx`, `docs/api-reference/typescript/TSImport.mdx`, `tests/unit/sdk/typescript/file/test_file_import_statemets.py`, TS export/import resolution tests, codemods under `src/codemods/`. + +- Import identity: + - `import.name`, `import.source`, `import.module`, `import.symbol_name`, `import.alias`, `import.import_type`. + - `import.import_specifier`. +- Import predicates: + - `import.is_aliased_import`, `import.is_module_import`, `import.is_symbol_import`, `import.is_wildcard_import`, `import.is_dynamic`, `import.is_reexport`. + - TypeScript `import.is_type_import`, `import.is_default_import`, `import.namespace_imports`, `import.is_namespace_import`. +- Resolution: + - `import.from_file`, `import.to_file`. + - `import.imported_symbol`, `import.resolved_symbol`, `import.imported_exports`, `import.namespace`. + - Python `resolve_import(...)` and TypeScript `resolve_import(...)` semantics should be reflected through the public properties even if the function itself is not exposed as the first Rust boundary. +- Import string helpers: + - `import.get_import_string(...)`. + +### Export + +Source references: `src/graph_sitter/core/export.py:22`, `src/graph_sitter/core/export.py:41`, `src/graph_sitter/core/export.py:50`, `src/graph_sitter/core/export.py:61`, `src/graph_sitter/core/export.py:69`, `src/graph_sitter/core/export.py:80`, `src/graph_sitter/typescript/export.py:45`, `src/graph_sitter/typescript/export.py:236`, `src/graph_sitter/typescript/export.py:248`, `src/graph_sitter/typescript/export.py:274`, `src/graph_sitter/typescript/export.py:299`, `src/graph_sitter/typescript/export.py:312`, `src/graph_sitter/typescript/export.py:328`, `src/graph_sitter/typescript/export.py:339`, `src/graph_sitter/typescript/export.py:350`, `src/graph_sitter/typescript/export.py:365`, `src/graph_sitter/typescript/export.py:381`, `src/graph_sitter/typescript/export.py:523`, `src/graph_sitter/typescript/export.py:549`, `src/graph_sitter/typescript/export.py:561`, `src/graph_sitter/typescript/export.py:578`, `src/graph_sitter/typescript/export.py:617`. + +Docs/tests/codemods evidence: `docs/api-reference/core/Export.mdx`, `docs/api-reference/typescript/TSExport.mdx`, `tests/unit/sdk/typescript/export/test_export_resolve_export.py`, TS export codemod examples. + +- Export identity and source: + - `export.name`, `export.source`, `export.exported_name` where exposed by TS-specific classes. + - `export.descendant_symbols`. +- Export predicates: + - `export.is_named_export`, `export.is_default_export`, `export.is_default_symbol_export`, `export.is_type_export`, `export.is_reexport`, `export.is_wildcard_export`, `export.is_module_export`, `export.is_aliased`, `export.is_external_export`. +- Resolution: + - `export.declared_symbol`, `export.exported_symbol`, `export.resolved_symbol`. + - Reexport and wildcard resolution must preserve current symbol/import/file targets. +- Import string helpers: + - `export.to_import_string(...)` and `export.get_import_string(...)`. + +### Directory + +Source references: `src/graph_sitter/core/directory.py:31`, `src/graph_sitter/core/directory.py:60`, `src/graph_sitter/core/directory.py:71`, `src/graph_sitter/core/directory.py:95`, `src/graph_sitter/core/directory.py:99`, `src/graph_sitter/core/directory.py:105`, `src/graph_sitter/core/directory.py:116`, `src/graph_sitter/core/directory.py:158`, `src/graph_sitter/core/directory.py:177`, `src/graph_sitter/core/directory.py:188`, `src/graph_sitter/core/directory.py:199`, `src/graph_sitter/core/directory.py:204`, `src/graph_sitter/core/directory.py:213`, `src/graph_sitter/core/directory.py:224`, `src/graph_sitter/core/directory.py:240`, `src/graph_sitter/core/interfaces/has_symbols.py:51`. + +Docs/tests/codemods evidence: `docs/api-reference/core/Directory.mdx`, `tests/unit/sdk/core/test_directory.py`, directory traversal examples in docs/codemods. + +- Directory identity and traversal: + - `directory.name`, `directory.path`, `directory.dirpath`, `directory.parent`. + - `directory.files(...)`, `directory.subdirectories(...)`, `directory.items`, `directory.item_names`, `directory.file_names`, `directory.tree`. + - `directory.get_file(name)`, `directory.get_subdirectory(name)`. + - `__iter__`, `__contains__`, `__len__`, and `__getitem__`. +- Inherited recursive symbol queries from `HasSymbols`: + - `directory.symbols`, `directory.import_statements`, `directory.global_vars`, `directory.classes`, `directory.functions`, `directory.exports`, `directory.imports`. + - `directory.get_symbol(...)`, `directory.get_import_statement(...)`, `directory.get_global_var(...)`, `directory.get_class(...)`, `directory.get_function(...)`, `directory.get_export(...)`, `directory.get_import(...)`. + +## P1 Compatibility Surface + +P1 APIs should be preserved, but the first Rust backend can use the current Python implementation as a fallback. These APIs create or mutate files, edits, imports, exports, names, comments, or AST source ranges. + +### Codebase P1 + +Source references: `src/graph_sitter/core/codebase.py:325`, `src/graph_sitter/core/codebase.py:388`, `src/graph_sitter/core/codebase.py:476`, `src/graph_sitter/core/codebase.py:511`, `src/graph_sitter/core/codebase.py:748`, `src/graph_sitter/core/codebase.py:1012`, `src/graph_sitter/core/codebase.py:1185`, `src/graph_sitter/core/codebase.py:1196`, `src/graph_sitter/core/codebase.py:1293`, `src/graph_sitter/core/codebase.py:1310`. + +- `codebase.create_file(...)`. +- `codebase.create_directory(...)`. +- `codebase.codeowners`. +- `codebase.external_modules`. +- `codebase.get_relative_path(from_file, to_file)`. +- `codebase.find_by_span(span)`. +- `codebase.set_session_options(...)`. +- `codebase.ai(...)`, `codebase.ai_client`, and AI/session helpers, if enabled in the environment. +- `codebase.visualize(...)`, if graph handles can be mapped back to a display graph. + +### File and SourceFile P1 + +Source references: `src/graph_sitter/core/file.py:238`, `src/graph_sitter/core/file.py:262`, `src/graph_sitter/core/file.py:294`, `src/graph_sitter/core/file.py:329`, `src/graph_sitter/core/file.py:359`, `src/graph_sitter/core/file.py:396`, `src/graph_sitter/core/file.py:976`, `src/graph_sitter/core/file.py:1027`, `src/graph_sitter/core/file.py:1047`, `src/graph_sitter/typescript/file.py:214`, `src/graph_sitter/typescript/file.py:230`, `src/graph_sitter/typescript/file.py:298`, `src/graph_sitter/typescript/file.py:322`, `src/graph_sitter/typescript/file.py:397`. + +- `file.write(...)`, `file.write_bytes(...)`. +- `file.edit(...)`, `file.replace(...)`, `file.remove(...)`. +- `file.rename(...)`, `file.update_filepath(...)`. +- `file.add_import(...)`. +- `file.add_symbol_from_source(...)`, `file.add_symbol(...)`. +- TypeScript `file.add_export_to_symbol(...)`. +- TypeScript `file.remove_unused_exports(...)`. +- TypeScript `file.has_export_statement_for_path(...)` and `file.get_export_statement_for_path(...)`. +- TypeScript `file.update_filepath(...)` behavior that also updates import paths. + +### Editable and Symbol P1 + +Source references: `src/graph_sitter/core/symbol.py:123`, `src/graph_sitter/core/symbol.py:169`, `src/graph_sitter/core/symbol.py:179`, `src/graph_sitter/core/symbol.py:189`, `src/graph_sitter/core/symbol.py:204`, `src/graph_sitter/core/symbol.py:219`, `src/graph_sitter/core/symbol.py:242`, `src/graph_sitter/core/symbol.py:269`, `src/graph_sitter/core/symbol.py:408`, `src/graph_sitter/core/interfaces/has_name.py:51`, `src/graph_sitter/core/interfaces/has_name.py:64`, `src/graph_sitter/core/interfaces/has_name.py:79`, `src/graph_sitter/core/interfaces/usable.py:78`, `src/graph_sitter/core/interfaces/editable.py:394`, `src/graph_sitter/core/interfaces/editable.py:428`, `src/graph_sitter/core/interfaces/editable.py:483`, `src/graph_sitter/core/interfaces/editable.py:516`, `src/graph_sitter/core/interfaces/editable.py:571`, `src/graph_sitter/core/interfaces/editable.py:604`, `src/graph_sitter/core/interfaces/editable.py:633`, `src/graph_sitter/core/interfaces/editable.py:683`, `src/graph_sitter/core/interfaces/editable.py:859`, `src/graph_sitter/core/interfaces/editable.py:905`, `src/graph_sitter/core/interfaces/editable.py:936`, `src/graph_sitter/core/interfaces/editable.py:1040`, `src/graph_sitter/core/interfaces/editable.py:1084`, `src/graph_sitter/core/interfaces/editable.py:1090`, `src/graph_sitter/core/interfaces/editable.py:1098`, `src/graph_sitter/core/interfaces/editable.py:1106`, `src/graph_sitter/core/interfaces/editable.py:1115`, `src/graph_sitter/core/interfaces/editable.py:1132`, `src/graph_sitter/core/interfaces/editable.py:1140`, `src/graph_sitter/core/interfaces/editable.py:1148`. + +- `symbol.set_name(...)`, `symbol.rename(...)`, `symbol.edit(...)`, source setter behavior. +- `symbol.comment`, `symbol.inline_comment`, `symbol.set_comment(...)`, `symbol.add_comment(...)`, `symbol.set_inline_comment(...)`. +- `symbol.insert_before(...)`, `symbol.insert_after(...)`, `symbol.remove(...)`, `symbol.move_to_file(...)`, `symbol.add_keyword(...)`. +- `Editable.find_string_literals(...)`, `find(...)`, `search(...)`. +- `Editable.replace(...)`, `insert_before(...)`, `insert_after(...)`, `edit(...)`, `remove(...)`. +- `Editable.variable_usages`, `get_variable_usages(...)`. +- `Editable.flag(...)`, `reduce_condition(...)`. +- `Editable.is_wrapped_in(...)`, `parent_of_type(...)`, `parent_of_types(...)`, `is_child_of(...)`, `ancestors`, `parent_statement`, `parent_function`, `parent_class`. + +### Import, Export, and Directory P1 + +Source references: `src/graph_sitter/core/import_resolution.py:437`, `src/graph_sitter/core/import_resolution.py:458`, `src/graph_sitter/core/import_resolution.py:479`, `src/graph_sitter/core/import_resolution.py:503`, `src/graph_sitter/typescript/import_resolution.py:624`, `src/graph_sitter/typescript/export.py:413`, `src/graph_sitter/typescript/export.py:651`, `src/graph_sitter/core/directory.py:244`, `src/graph_sitter/core/directory.py:252`, `src/graph_sitter/core/directory.py:257`. + +- `import.set_import_module(...)`, `import.set_import_symbol_alias(...)`, `import.rename(...)`, `import.remove(...)`. +- TypeScript `import.set_import_module(...)` path-update behavior. +- `export.make_non_default(...)`, `export.reexport_symbol(...)`, and inherited `export.remove(...)`. +- `directory.update_filepath(...)`, `directory.remove(...)`, `directory.rename(...)`. + +## P2 Compatibility Surface + +P2 APIs are public or semi-public, but should not drive the first Rust data model. Preserve them through Python-side delegation, clear errors, or later parity work. + +Source references: `src/graph_sitter/core/codebase.py:235`, `src/graph_sitter/core/codebase.py:241`, `src/graph_sitter/core/codebase.py:822`, `src/graph_sitter/core/codebase.py:833`, `src/graph_sitter/core/codebase.py:865`, `src/graph_sitter/core/codebase.py:931`, `src/graph_sitter/core/codebase.py:938`, `src/graph_sitter/core/codebase.py:974`, `src/graph_sitter/core/codebase.py:1116`, `src/graph_sitter/core/codebase.py:1542`, `src/graph_sitter/core/codebase.py:1546`. + +- Git and GitHub: + - `codebase.github`, `codebase.op`. + - `codebase.git_commit`, `codebase.default_branch`, `codebase.current_commit`, `codebase.checkout(...)`. + - `codebase.get_diffs(...)`, `codebase.get_diff(...)`. + - `codebase.create_pr(...)`, `codebase.create_pr_comment(...)`, `codebase.create_pr_review_comment(...)`. + - PR-diff helpers such as modified-symbol lookup should remain Python-side until Rust graph parity is proven. +- Diagnostics, logs, and visualization: + - `codebase.reset_logs()`. + - Rich repr and diagnostic properties relying on Python graph object counts. + - Visualization internals and `viz`/graph display helpers. +- Low-level/internal object access: + - `ctx`, `_op`, raw `ts_node`, `node_id`, `parent`, `get_nodes()`, `parse/sync/recompute` helpers, and language-specific noapidoc helpers such as `valid_symbol_names`/`valid_import_names`. + - These should not become the Rust public contract; if compatibility requires them, expose minimal Python shim objects or fail explicitly under the Rust backend. + +## APIs That Currently Materialize Full Lists + +These are the main memory-sensitive APIs. They should keep returning Python `list` objects for compatibility, but the Rust backend should generate compact ID lists first and wrap handles lazily. + +### Codebase-wide materializers + +- `codebase.files(...)` currently returns sorted Python file objects and may walk the repo operator for non-source files: `src/graph_sitter/core/codebase.py:286`. +- `codebase.directories` returns `list(self.ctx.directories.values())`: `src/graph_sitter/core/codebase.py:338`. +- `codebase.imports` returns `ctx.get_nodes(NodeType.IMPORT)`: `src/graph_sitter/core/codebase.py:351`. +- `codebase.exports` returns `ctx.get_nodes(NodeType.EXPORT)`: `src/graph_sitter/core/codebase.py:366`. +- `codebase.external_modules` returns `ctx.get_nodes(NodeType.EXTERNAL)`: `src/graph_sitter/core/codebase.py:388`. +- `codebase.symbols`, `classes`, `functions`, `global_vars`, `interfaces`, and `types` call `_symbols`, which scans `ctx.get_nodes(NodeType.SYMBOL)` and filters top-level symbols: `src/graph_sitter/core/codebase.py:273`, `src/graph_sitter/core/codebase.py:399`. +- `codebase.get_symbol(...)`, `get_symbols(...)`, `get_class(...)`, and `get_function(...)` scan those full lists: `src/graph_sitter/core/codebase.py:644`, `src/graph_sitter/core/codebase.py:671`, `src/graph_sitter/core/codebase.py:687`, `src/graph_sitter/core/codebase.py:711`. + +### SourceFile materializers + +- `SourceFile` inherits `Importable`, whose constructor appends each parsed node into `self.file._nodes`: `src/graph_sitter/core/interfaces/importable.py:37`. +- `file.get_nodes()` returns the per-file `_nodes` list: `src/graph_sitter/core/file.py:725`. +- `file.imports`, `file.import_statements`, `file.symbols`, `file.global_vars`, `file.classes`, and `file.functions` all filter or transform that per-file list: `src/graph_sitter/core/file.py:633`, `src/graph_sitter/core/file.py:669`, `src/graph_sitter/core/file.py:708`, `src/graph_sitter/core/file.py:773`, `src/graph_sitter/core/file.py:797`, `src/graph_sitter/core/file.py:826`. +- `file.symbols_sorted_topologically` constructs a subgraph of in-file symbol nodes: `src/graph_sitter/core/file.py:752`. +- `file.inbound_imports` combines `self.symbols`, `self.imports`, and `self.symbol_usages`: `src/graph_sitter/core/file.py:613`. +- TypeScript `file.exports`, `export_statements`, `default_exports`, `named_exports`, `interfaces`, and `types` materialize filtered lists: `src/graph_sitter/typescript/file.py:47`, `src/graph_sitter/typescript/file.py:61`, `src/graph_sitter/typescript/file.py:79`, `src/graph_sitter/typescript/file.py:91`, `src/graph_sitter/typescript/file.py:121`, `src/graph_sitter/typescript/file.py:148`. + +### Directory recursive materializers + +- `directory.files(...)` recursively collects files into a list: `src/graph_sitter/core/directory.py:116`. +- `directory.subdirectories(...)`, `items`, `item_names`, `file_names`, and `tree` all materialize directory children: `src/graph_sitter/core/directory.py:158`, `src/graph_sitter/core/directory.py:177`, `src/graph_sitter/core/directory.py:188`, `src/graph_sitter/core/directory.py:199`, `src/graph_sitter/core/directory.py:204`. +- `HasSymbols` recursively chains per-file properties for `symbols`, `imports`, `exports`, `classes`, `functions`, and globals: `src/graph_sitter/core/interfaces/has_symbols.py:51`. + +### Relationship materializers + +- `symbol.dependencies` traverses descendant symbols and dependency graph out-edges: `src/graph_sitter/core/interfaces/importable.py:44`. +- `symbol.usages` and `symbol.symbol_usages` traverse graph edges and collect usage objects: `src/graph_sitter/core/interfaces/usable.py:25`, `src/graph_sitter/core/interfaces/usable.py:44`. +- `import.imported_symbol`, `import.resolved_symbol`, `import.imported_exports`, `import.from_file`, and `import.to_file` resolve through graph edges and source-file/import lists: `src/graph_sitter/core/import_resolution.py:252`, `src/graph_sitter/core/import_resolution.py:278`, `src/graph_sitter/core/import_resolution.py:291`, `src/graph_sitter/core/import_resolution.py:356`, `src/graph_sitter/core/import_resolution.py:379`. +- `export.declared_symbol`, `export.exported_symbol`, and `export.resolved_symbol` resolve across TS export/import/file graph edges: `src/graph_sitter/typescript/export.py:350`, `src/graph_sitter/typescript/export.py:365`, `src/graph_sitter/typescript/export.py:381`. + +## Recommended First-Slice Compatibility Boundary + +The first Rust backend slice should be read-heavy and graph-oriented: + +- Parse Python and TypeScript/TSX source files into compact records for files, top-level symbols, classes, functions, globals, TypeScript interfaces/types, imports, exports, and ranges. +- Preserve public list-returning APIs by returning Python lists of lazy compatibility handles, but do not eagerly instantiate every Python node object during codebase construction. +- Preserve current public ordering: alphabetical sorting for `codebase.files`, sorted symbol/class/function lists where the Python API sorts today, and existing file-local ordering for imports/exports/symbols. +- Preserve path normalization, `optional=True` behavior, ambiguity errors, Python-vs-TypeScript export behavior, and `ignore_case` lookup behavior. +- Implement import/export resolution, dependency edges, and usage records in Rust before claiming parity for `import.resolved_symbol`, `import.imported_symbol`, `export.resolved_symbol`, `symbol.dependencies`, or `symbol.usages`. +- Keep edit APIs, transaction application, formatting, comments, AST parent navigation, AI, Git/GitHub, and visualization on the Python backend/fallback path for the first slice. +- Make unsupported P1/P2 APIs under the Rust backend explicit: either delegate to Python compatibility objects or raise a clear `NotImplementedError`. P0 APIs should not silently fall back to incomplete or behavior-changing approximations. +- Avoid exposing persistent Rust-owned tree-sitter node wrappers as the long-term contract. Use stable IDs plus byte ranges/source text and construct Python handles only on demand. + +## Initial Rust Data Required For P0 + +- `FileRecord`: stable file ID, interned path/name/extension, language, content hash, source/binary flag, directory ID, root range. +- `DirectoryRecord`: stable directory ID, interned path/name, parent ID, sorted child file/directory ID indexes. +- `SymbolRecord`: stable symbol ID, file ID, kind, name/full-name IDs, top-level/nested flag, parent symbol ID, range, extended range, export metadata. +- `ImportRecord`: stable import ID, file ID, module/name/alias IDs, kind flags, statement range, target file/symbol/export IDs where resolved. +- `ExportRecord`: stable export ID, file ID, exported name, kind flags, declared/exported/resolved target IDs, range. +- `UsageRecord`: stable usage ID, source file/node ID, target symbol/import/export ID, usage kind, range. +- `GraphEdge`: compact dependency and resolution edges by ID, not Python object payloads. diff --git a/rust-rewrite/benchmarks.md b/rust-rewrite/benchmarks.md new file mode 100644 index 000000000..95dbf289d --- /dev/null +++ b/rust-rewrite/benchmarks.md @@ -0,0 +1,421 @@ +# Phase 0 Benchmarking And Profiling + +This document captures the first practical baseline plan for the Python backend before replacing the eager Python object graph with a Rust engine. + +## Goals + +- Measure cold `Codebase(...)` construction wall time and RSS for the current Python backend. +- Split the build into coarse phases that match today's implementation. +- Record graph size and Python object counts so memory regressions can be compared against graph scale. +- Keep the smoke benchmark runnable without a large external repository. + +## Current Build Phase Map + +The eager path is: + +1. `Codebase.__init__` validates inputs, builds `ProjectConfig`, and constructs `CodebaseContext`. +2. `CodebaseContext.__init__` creates `rustworkx.PyDiGraph`, indexes, parser, config parser, dependency manager, and language engine. +3. `CodebaseContext.build_graph` enumerates files with `RepoOperator.iter_files`. +4. `_process_diff_files` adds files: + - dependency manager / language engine startup if configured + - file existence checks for incremental runs + - new file parsing through `SourceFile.from_content` + - tree-sitter parse through `parse_file` + - eager Python object materialization through `SourceFile.parse` +5. `_process_diff_files` builds the directory tree with `build_directory_tree`. +6. TypeScript only: `config_parser.parse_configs` assigns nearest `tsconfig.json` data. +7. Unless `CodebaseConfig(disable_graph=True)` is set, graph resolution runs: + - import resolution through `Import.add_symbol_resolution_edge` + - TypeScript export dependency resolution through `TSExport.compute_export_dependencies` + - superclass/interface dependency resolution through `compute_superclass_dependencies` + - fixed-point dependency recompute through `_compute_dependencies` and `Importable.recompute` + +The known memory-heavy points are `SourceFile._nodes`, every `Editable` retaining `ts_node`, `ctx`, `parent`, and IDs, and `CodebaseContext._graph` storing Python payload objects plus `Edge` objects. + +## Harness + +`rust-rewrite/tools/measure_python_backend.py` is a standalone measurement harness. It runtime-wraps stable Python backend choke points and writes a JSON report. + +Smoke test with a generated tiny Python git repo: + +```bash +uv run python rust-rewrite/tools/measure_python_backend.py --language python --json +``` + +Measure a real repo: + +```bash +uv run python rust-rewrite/tools/measure_python_backend.py /path/to/repo --language python --output /tmp/python-backend-baseline.json +``` + +Run multiple cold samples as separate processes: + +```bash +for i in 1 2 3 4 5; do + uv run python rust-rewrite/tools/measure_python_backend.py /path/to/repo --language python \ + --output "/tmp/python-backend-baseline-$i.json" +done +``` + +Isolate parse/object materialization from graph resolution: + +```bash +uv run python rust-rewrite/tools/measure_python_backend.py /path/to/repo --language python \ + --disable-graph --output /tmp/python-backend-parse-only.json +``` + +`rust-rewrite/tools/compare_rust_python_index.py` compares that current Python backend path with the Rust compact Python indexer. It builds the Rust release example once, generates or accepts a repo, and samples the Rust indexer process RSS. + +Generated fixture comparison: + +```bash +uv run python rust-rewrite/tools/compare_rust_python_index.py \ + --fixture-files 150 --fixture-functions 20 \ + --output /tmp/graph-sitter-rust-compare.json +``` + +Current repo comparison: + +```bash +uv run python rust-rewrite/tools/compare_rust_python_index.py . \ + --output /tmp/graph-sitter-rust-compare-repo.json +``` + +Compare against the current full Python graph instead of parse/object materialization only: + +```bash +uv run python rust-rewrite/tools/compare_rust_python_index.py . \ + --python-full-graph \ + --output /tmp/graph-sitter-rust-compare-repo-full.json +``` + +`rust-rewrite/tools/measure_rust_facade.py` measures the Python-facing Rust compact-index facade. It expects the PyO3 extension module to be importable as `graph_sitter_py`. By default it discovers files through the same Python `RepoOperator.iter_files(...)` filters used by `CodebaseContext`, then passes that selected list into Rust: + +```bash +PYTHONPATH=/path/to/dir/containing/graph_sitter_py_extension \ + uv run python rust-rewrite/tools/measure_rust_facade.py . --json +``` + +Use `--raw-rust-walk` to measure Rust's standalone recursive walk instead of Python-selected file discovery. + +`rust-rewrite/tools/measure_codebase_rust_backend.py` measures actual `Codebase(...)` construction with `CodebaseConfig(graph_backend="rust", rust_fallback="error")`. It verifies that the lazy Python graph is blocked and reports compact Rust record counts: + +```bash +PYTHONPATH=/path/to/dir/containing/graph_sitter_py_extension \ + uv run python rust-rewrite/tools/measure_codebase_rust_backend.py . --json +``` + +Pass `--language typescript` to measure the compact Rust TypeScript/JavaScript Codebase shell instead of the Python shell: + +```bash +PYTHONPATH=/path/to/dir/containing/graph_sitter_py_extension \ + uv run python rust-rewrite/tools/measure_codebase_rust_backend.py /path/to/ts-repo --language typescript --json +``` + +`rust-rewrite/tools/benchmark_pinned_python_repo.py` prepares a pinned external Python repository, builds the PyO3 extension, runs the Python parse/object-materialization harness, runs the Rust compact `Codebase` harness, and fails if the configured wall/RSS ratio gates are not met. The default pinned repo is Apache Airflow `2.10.5`, resolved to commit `b93c3db6b1641b0840bd15ac7d05bc58ff2cccbf`: + +```bash +uv run python rust-rewrite/tools/benchmark_pinned_python_repo.py \ + --output /tmp/graph-sitter-airflow-2.10.5-benchmark.json \ + --json +``` + +`rust-rewrite/tools/measure_typescript_rust_index.py` measures the standalone compact Rust TypeScript/JavaScript syntax index through the PyO3 extension. By default it discovers `.js`, `.jsx`, `.ts`, and `.tsx` files through the same Python `RepoOperator.iter_files(...)` filters used by `CodebaseContext`, then passes that selected list into Rust: + +```bash +PYTHONPATH=/path/to/dir/containing/graph_sitter_py_extension \ + uv run python rust-rewrite/tools/measure_typescript_rust_index.py /path/to/ts-repo --json +``` + +Use `--raw-rust-walk` to measure Rust's standalone recursive TS/JS walk instead of Python-selected file discovery. + +`rust-rewrite/tools/benchmark_pinned_typescript_repo.py` prepares a pinned external TypeScript/JavaScript repository, builds the PyO3 extension, runs the Python TS parse/object-materialization harness, runs the Rust TS syntax-index harness, and fails if the configured wall/RSS ratio gates are not met. The default pinned repo is Next.js `v15.0.0`, resolved to commit `51bfe3c1863b191f4b039bc230e8ed5c57b0baf3`: + +```bash +uv run python rust-rewrite/tools/benchmark_pinned_typescript_repo.py \ + --output /tmp/graph-sitter-nextjs-v15.0.0-benchmark.json \ + --json +``` + +`rust-rewrite/tools/snapshot_pinned_python_repo.py` verifies a deterministic compact Rust graph snapshot for the same pinned Airflow checkout. The committed golden stores counts, stable SHA-256 digests, and sorted sample rows for files, symbols, imports, import resolutions, references, and dependencies: + +```bash +uv run python rust-rewrite/tools/snapshot_pinned_python_repo.py +``` + +Refresh the committed snapshot after intentional compact-IR changes: + +```bash +uv run python rust-rewrite/tools/snapshot_pinned_python_repo.py --update +``` + +The same check is available as an opt-in pytest integration test: + +```bash +GRAPH_SITTER_RUN_PINNED_AIRFLOW_SNAPSHOT=1 \ + uv run pytest tests/integration/rust_rewrite/test_pinned_airflow_snapshot.py -q +``` + +## Metrics + +The JSON report includes: + +- total constructor wall time +- process RSS before and after construction +- sampled process RSS peak for the full run +- `ru_maxrss` for process max RSS +- inclusive wall time and sampled RSS peak for each wrapped phase +- phase call counts and phase-specific counters, such as parsed bytes +- graph node and edge counts +- graph node counts by `NodeType` +- sum of per-file `_nodes` lengths +- optional `gc` object counts for `graph_sitter.*` classes + +Phase timings are inclusive and do not sum to total time because some wrappers are nested. RSS phase attribution uses a background sampler and should be treated as trend data, not allocator-accurate attribution. + +## Recommended Baseline Matrix + +Use pinned commits and record hardware, Python version, OS, and command line from the JSON metadata. + +| Tier | Repo | Purpose | Minimum samples | +| --- | --- | --- | --- | +| Smoke | generated fixture | CI/local sanity check | 1 | +| Small | this repo or a compact fixture repo | stable regression signal | 5 | +| Medium | representative Python service or TS package | phase distribution | 5 | +| Huge | known memory-stressing monorepo | Rust rewrite target | 3 | + +For each real repo, capture both default graph mode and `--disable-graph` parse-only mode. The delta approximates resolution/dependency graph cost. + +## Initial Rust Index Evidence + +These measurements are for the first Rust vertical slice only: repo walk, tree-sitter Python parsing, top-level class/function extraction, and import extraction into compact Rust records. This is not yet full `Codebase` API parity and does not yet include dependency graph resolution. + +Commands were run on this branch on 2026-06-18. + +| Input | Python mode | Python wall | Python max RSS | Rust index wall | Rust process wall | Rust sampled RSS | Wall ratio | RSS ratio | +| --- | --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | +| Generated fixture, 150 modules x 20 helpers | `--disable-graph` | 0.460s | 166.3 MB | 0.047s | 0.281s | 3.3 MB | 9.875x | 50.918x | +| Generated fixture, 150 modules x 20 helpers | full graph | 1.147s | 208.5 MB | 0.038s | 0.051s | 3.1 MB | 30.502x | 66.380x | +| `graph-sitter` repo checkout | `--disable-graph` | 2.874s | 531.9 MB | 0.317s | 0.333s | 7.6 MB | 9.069x | 70.045x | +| `graph-sitter` repo checkout | full graph | 7.448s | 788.8 MB | 0.331s | 0.342s | 7.6 MB | 22.480x | 103.877x | + +The most conservative current-repo comparison is parse/object materialization only: Rust is about 9x faster and about 70x lower RSS for the implemented compact-index slice. Against today's full graph construction on this repo, Rust is about 22x faster and about 104x lower RSS for the same implemented slice. + +## Python-Facing Rust Facade Evidence + +These measurements use the new Python shell integration path: Python discovers files with `RepoOperator.iter_files(...)`, the selected file list is passed to the PyO3 extension, and Rust builds the compact index. This includes Python interpreter/import overhead and is therefore a higher RSS number than the standalone Rust process, but it is the relevant measurement for an opt-in Python shell path. + +Commands were run on this branch on 2026-06-18 after adding selected-file PyO3 indexing. + +| Input | Python mode | Python wall | Python max RSS | Rust facade wall | Rust facade max RSS | Python files | Rust selected files | Rust globals | Rust import resolutions | Wall ratio | RSS ratio | +| --- | --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | +| `graph-sitter` repo checkout | `--disable-graph` | 2.987s | 535.0 MB | 0.692s | 115.3 MB | 1129 | 1129 | 799 | 432 | 4.317x | 4.638x | + +This shell-facing number is intentionally more conservative than the standalone Rust process benchmark because it includes Python startup, imports, and repo file discovery. The important result is that the selected-file integration preserves Python file-discovery parity for the current repo while still cutting parse/index/import-resolution wall time and process max RSS substantially for the implemented compact graph slice. + +## Rust `Codebase` Construction Evidence + +These measurements use real `Codebase(...)` construction with `CodebaseConfig(graph_backend="rust", rust_fallback="error")`. In this mode, once the compact Rust index builds successfully, `CodebaseContext` does not build the eager Python graph. The Rust path now exercises public Python `Codebase.files`, `symbols`, `classes`, `functions`, `global_vars`, and `imports` compatibility handles, and TypeScript file/symbol/import/export compatibility handles, while `CodebaseContext.nodes` remains blocked so the old graph cannot be materialized accidentally. + +| Input | Python mode | Python wall | Python max RSS | Rust `Codebase` wall | Rust `Codebase` max RSS | Python files | Rust files | Rust symbols | Rust imports | Rust import resolutions | Rust references | Rust dependencies | Python graph blocked | Wall ratio | RSS ratio | +| --- | --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | --- | ---: | ---: | +| `graph-sitter` repo checkout | `--disable-graph` | 2.731s | 543.0 MB | 0.681s | 124.0 MB | 1133 | 1133 | 6505 | 6496 | 432 | 4110 | 2953 | yes | 4.009x | 4.378x | +| Apache Airflow `2.10.5` (`b93c3db6b1641b0840bd15ac7d05bc58ff2cccbf`) | `--disable-graph` | 18.940s | 3469.5 MB | 4.085s | 266.2 MB | 4789 | 4789 | 52339 | 40580 | 19011 | 109817 | 71932 | yes | 4.637x | 13.031x | + +## Standalone TypeScript/JavaScript Rust Index Evidence + +These measurements capture the first syntax-only Rust TypeScript/JavaScript index exposed through PyO3. The Rust path uses Python-selected file discovery for a fair file-list comparison. The later `Codebase` measurement below includes the current relative-import resolution, reference/dependency rows, and lazy Python shell handles. + +The Next.js measurement was run on this branch on 2026-06-18 against `vercel/next.js` `v15.0.0` at commit `51bfe3c1863b191f4b039bc230e8ed5c57b0baf3`: + +```bash +uv run python rust-rewrite/tools/benchmark_pinned_typescript_repo.py \ + --extension-dir /tmp/graph_sitter_py_ts_smoke \ + --skip-build-extension \ + --skip-fetch \ + --output /tmp/graph-sitter-nextjs-v15.0.0-benchmark.json +``` + +| Input | Python mode | Python wall | Python max RSS | Python files | Python nodes | Rust TS index wall | Rust TS index max RSS | Rust selected files | Rust files | Rust symbols | Rust imports | Rust exports | Rust files with errors | Wall ratio | RSS ratio | +| --- | --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | +| Next.js `v15.0.0` (`51bfe3c1863b191f4b039bc230e8ed5c57b0baf3`) | `--disable-graph` | 24.959s | 3100.1 MB | 13679 | 213969 | 3.347s | 200.3 MB | 13688 | 13688 | 23957 | 28210 | 16026 | 114 | 7.457x | 15.475x | + +The Rust selected-file count matches the Python `RepoOperator` selected file list exactly. Python materialized 9 fewer source-file objects because the repo includes intentionally broken/non-UTF-8 fixture files; Rust now records selected files and marks parser-error files instead of aborting or dropping the file. + +## TypeScript Rust `Codebase` Construction Evidence + +This measurement uses real `Codebase(...)` construction against the pinned Next.js checkout with `CodebaseConfig(graph_backend="rust", rust_fallback="error")`. It exercises compact TypeScript files, symbols, classes, functions, globals, interfaces, types, imports, exports, relative and tsconfig path/baseUrl import resolutions, references, dependencies, read-only function calls, and read-only Promise chains through the Python shell while keeping `CodebaseContext.nodes` blocked. + +Command run on 2026-06-19: + +```bash +uv run python rust-rewrite/tools/check_pinned_typescript_codebase.py \ + --skip-build-extension \ + --skip-fetch \ + --json +``` + +| Input | Rust `Codebase` wall | Rust `Codebase` max RSS | Files | Symbols | Imports | Exports | Import resolutions | References | Dependencies | Function calls | Promise chains | Python graph blocked | +| --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | --- | +| Next.js `v15.0.0` (`51bfe3c1863b191f4b039bc230e8ed5c57b0baf3`) | 11.264s | 438.8 MB | 13688 | 44871 | 28210 | 16027 | 13462 | 114464 | 49287 | 197581 | 878 | yes | + +Compared with the Python TypeScript parse/object-materialization baseline above, the current Rust `Codebase` TypeScript shell is about 2.216x faster and about 7.065x lower max RSS while exposing compact export, call, and Promise-chain handles and keeping the eager Python graph unbuilt. The pinned proof also validates a real `packages/next/src/cli/next-lint.ts` file/symbol lookup for 27 file-local call records, 16 `nextLint` symbol call records, and one `.then/.catch` Promise chain without materializing the full call or chain caches. A parser fallback now tries the TS grammar for `.ts`/`.js` files and keeps the lower-error parse, reducing pinned Next.js parser-error files from 114 to 113 by recovering `test/integration/typescript/components/angle-bracket-type-assertions.ts`. + +The same proof is now available as an opt-in test gate: + +```bash +uv run python rust-rewrite/tools/check_pinned_typescript_codebase.py \ + --skip-build-extension \ + --skip-fetch +``` + +On 2026-06-19, that checker validated exact pinned Next.js `Codebase` handle counts plus compact function-call and Promise-chain counts, confirmed the Python graph stayed blocked, and measured 11.264s wall / 438.8 MB max RSS. Against the recorded Python TypeScript parse/object-materialization baseline above, that is 2.216x faster wall time and 7.065x lower max RSS with conservative CI-style ceilings. + +## Installed-Wheel `uvx` Airflow Evidence + +The branch-built wheel path now has an artifact-level large Python proof that +runs through `uvx --from dist/.whl graph-sitter`, not through an +editable checkout or manually copied extension. + +Command run on 2026-06-19: + +```bash +uv run python rust-rewrite/tools/check_wheel_pinned_python_repo.py \ + --wheel dist/graph_sitter-0.56.15.dev166+g2f790c9f7.d20260619-cp313-cp313-macosx_26_0_arm64.whl \ + --skip-fetch \ + --compare-python-backend \ + --min-parse-elapsed-ratio 1.5 \ + --min-sampled-rss-ratio 3.0 \ + --output /tmp/graph-sitter-airflow-wheel-rust-vs-python.json +``` + +| Input | Installed backend | Parse elapsed | `uvx` outer wall | Sampled process-tree RSS | Files | Symbols | Imports | References | External references | Dependencies | +| --- | --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | +| Apache Airflow `2.10.5` (`b93c3db6b1641b0840bd15ac7d05bc58ff2cccbf`) | Rust strict | 4.913s | 6.064s | 487.0 MB | 4789 | 52339 | 45404 | 117799 | 78784 | 77570 | +| Apache Airflow `2.10.5` (`b93c3db6b1641b0840bd15ac7d05bc58ff2cccbf`) | Python | 48.242s | 77.649s | 5429.3 MB | 4789 | 27728 | 44100 | n/a | n/a | 1099202 | + +The installed-wheel strict Rust path matched the committed compact Python golden +summary at the time that wheel was built, including 4,789 files, 52,339 symbols, +45,404 imports, 117,799 references, 78,784 external references, 77,570 +dependencies, and zero files with parse errors. A later source fix for +parenthesized Python `from ... import (...)` extraction updates the committed +golden counts. Compared with the installed-wheel Python backend on the same +checkout and wheel, the Rust path was 9.818x faster by CLI parse elapsed and +11.148x lower by sampled process-tree RSS. + +The same branch-built wheel gate also proves a real pinned Airflow transform +through the distributed CLI: + +```bash +uv run python rust-rewrite/tools/check_wheel_pinned_python_repo.py \ + --wheel dist/graph_sitter-0.56.15.dev166+g2f790c9f7.d20260619-cp313-cp313-macosx_26_0_arm64.whl \ + --skip-fetch \ + --run-transform-proof \ + --output /tmp/graph-sitter-airflow-wheel-transform.json +``` + +That run parsed pinned Airflow in strict Rust mode, cloned a temporary mutable +checkout, ran `graph-sitter transform ... --language python --backend rust +--fallback error --write` through `uvx --from`, added `from typing import Any` +to `airflow/__init__.py`, renamed `__getattr__` to +`__getattr_wheel_proof__`, and asserted only `airflow/__init__.py` changed. + +| Operation | Wall | Sampled process-tree RSS | Validation | +| --- | ---: | ---: | --- | +| Installed-wheel strict Rust parse | 5.052s | 503.5 MB | matched compact golden summary | +| Installed-wheel strict Rust transform | 5.920s | 500.1 MB | only `airflow/__init__.py` changed | + +## Installed-Wheel `uvx` Next.js Evidence + +The branch-built wheel path now has an artifact-level large TypeScript proof +that runs through `uvx --from dist/.whl graph-sitter`, not through an +editable checkout or manually copied extension. + +Command run on 2026-06-19: + +```bash +uv run python rust-rewrite/tools/check_wheel_pinned_typescript_repo.py \ + --wheel dist/graph_sitter-0.56.15.dev166+g2f790c9f7.d20260619-cp313-cp313-macosx_26_0_arm64.whl \ + --skip-fetch \ + --compare-python-backend \ + --min-parse-elapsed-ratio 1.5 \ + --min-sampled-rss-ratio 3.0 \ + --output /tmp/graph-sitter-nextjs-wheel-rust-vs-python.json +``` + +| Input | Installed backend | Parse elapsed | `uvx` outer wall | Sampled process-tree RSS | Files | Symbols | Imports | Exports | References | Dependencies | +| --- | --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | +| Next.js `v15.0.0` (`51bfe3c1863b191f4b039bc230e8ed5c57b0baf3`) | Rust strict | 10.352s | 11.508s | 537.5 MB | 13688 | 44870 | 28210 | 16026 | 114463 | 49287 | +| Next.js `v15.0.0` (`51bfe3c1863b191f4b039bc230e8ed5c57b0baf3`) | Python | 57.956s | 78.107s | 4505.6 MB | 13679 | 25364 | 28723 | 17878 | n/a | 811914 | + +The installed-wheel strict Rust path matched the committed compact TypeScript +golden summary at the time that wheel was built, including 13,688 files, 44,870 +symbols, 28,210 imports, 16,026 exports, 114,463 references, 49,287 +dependencies, 25,318 external references, 160 subclass edges, and 114 files with +parse errors. A later source fix for TS angle-bracket assertion parsing updates +the committed golden counts. Compared with the installed-wheel Python backend +on the same checkout and wheel, the Rust path was 5.598x faster by CLI parse +elapsed and 8.383x lower by sampled process-tree RSS. The Python backend +materialized 9 fewer file objects than Rust selected files, matching the known +selected-file versus materialized-file delta for this repo's broken fixture +files. + +The same branch-built wheel gate also proves a real pinned Next.js transform +through the distributed CLI: + +```bash +uv run python rust-rewrite/tools/check_wheel_pinned_typescript_repo.py \ + --wheel dist/graph_sitter-0.56.15.dev166+g2f790c9f7.d20260619-cp313-cp313-macosx_26_0_arm64.whl \ + --skip-fetch \ + --run-transform-proof \ + --output /tmp/graph-sitter-nextjs-wheel-transform.json +``` + +That run parsed pinned Next.js in strict Rust mode, cloned a temporary mutable +checkout, ran `graph-sitter transform ... --language typescript --backend rust +--fallback error --write` through `uvx --from`, added +`import { act } from 'react-dom/test-utils';` to +`packages/next/src/client/components/app-router-announcer.tsx`, renamed +`AppRouterAnnouncer` to `AppRouterAnnouncerWheelProof`, and rewrote the +importing usage in `packages/next/src/client/components/app-router.tsx`. + +| Operation | Wall | Sampled process-tree RSS | Validation | +| --- | ---: | ---: | --- | +| Installed-wheel strict Rust parse | 10.386s | 549.7 MB | matched compact golden summary | +| Installed-wheel strict Rust transform | 11.834s | 525.8 MB | only `app-router-announcer.tsx` and `app-router.tsx` changed | + +## Pinned Compact Snapshot Evidence + +The first committed large-repo compact snapshot is `rust-rewrite/golden/apache-airflow-2.10.5-rust-compact.json`. It was generated from Apache Airflow `2.10.5` at commit `b93c3db6b1641b0840bd15ac7d05bc58ff2cccbf`. + +| Graph family | Count | SHA-256 | +| --- | ---: | --- | +| Files | 4789 | `226e8cb32dc0a23ec956e97b036e7c505037df979cce7182514f39a43b07cb80` | +| Symbols | 52339 | `d4b75c9c6d82b1d30424845c86b88c9fb18ca7748fc088c16b4cfca00de30699` | +| Imports | 40580 | `fe4a595d850f2f57f1eb1a5ca347ecfcc09259e31cd7b44306902c04de7275d0` | +| Import resolutions | 19011 | `84df9ba7bf069278f61ac2a4891d8b4cb38b25f4f63ce20dd77eada1ba654278` | +| References | 109817 | `d7ab546586eb968f35dd1bf8f109db6a54b889af464a2c349e7af2e38ea60a8a` | +| Dependencies | 71932 | `cbf361a2b46e5ea2e5cad352c5abe8ab493869eb422cbdb77912484ea9fab1d1` | + +The snapshot tool also validates internal compact graph integrity: import-resolution links, reference links, dependency links, dependency reference counts, and dependency reference source/target consistency must all be zero-mismatch before the snapshot can pass. + +Important caveats: + +- The Rust indexer currently extracts a compact subset: files, top-level Python classes/functions/globals, nested Python class/function records for source attribution, imports, internal import-resolution records, first-slice Python symbol reference records, and de-duplicated dependency records for indexed Python modules. +- Direct package re-exports and wildcard import/re-export chains are resolved for indexed internal modules when the package file exposes a matching imported binding. Static literal `__all__` assignments restrict wildcard expansion; dynamic `__all__` construction, order-sensitive wildcard binding semantics, and ambiguous external re-export chains remain future work. +- Public Python handles still expose top-level `Codebase.symbols`, `classes`, and `functions`; nested compact symbols are currently internal records for dependency-source precision and `file.symbols(nested=True)`. +- Function parameters, lambda parameters, local assignment targets, local imports, `for` targets, `with ... as ...` targets, `except ... as ...` targets, comprehension targets, match-pattern captures, nested definitions, and `nonlocal` declarations now shadow imported/top-level names in the compact reference pass, reducing false-positive dependency edges before full lexical scope tables exist. Comprehension targets are scoped to the comprehension expression instead of leaking to the whole enclosing function. `global` declarations now remove matching names from the local-shadow set so module-level writes and uses remain visible in the compact reference/dependency graph. +- Imported module member references such as `module.some_func`, `alias.SomeClass`, `pkg.module.some_func`, and namespace-style nested module chains like `from a import b; b.c.d()` now resolve when the qualifier maps to an indexed internal Python module. Other attribute field names are skipped as bare-name references until full attribute/type resolution exists. The object side of an attribute expression is still scanned, so `helper.attr` preserves the `helper` reference while `obj.helper` no longer pretends `helper` is a standalone symbol use. +- The Python-facing Rust facade uses Python's selected file list, but the compact Rust records are not yet full Python graph parity. Symbol and import totals should not be compared directly with current Python graph node totals until the resolver and lazy handle layers are implemented. +- The Python backend numbers include the current eager Python object materialization and, in full graph mode, dependency edge computation. +- The Rust RSS number is sampled from a short-lived release process; it is suitable for directional comparison, not allocator-level attribution. +- The TypeScript/JavaScript Rust path now emits compact files, symbols, imports, exports, parser-error status, relative and tsconfig path/baseUrl import-resolution rows, first-slice reference rows, dependency rows, and lazy `Codebase` compatibility handles. External module modeling, lexical/type/interface parity, and codemod parity remain future work. +- The generated fixture, this repo, and the pinned Airflow baseline are useful proof points, but Python-vs-Rust semantic parity snapshots and additional canonical repos are still open. + +## Open Questions + +- Which additional small, medium, and huge repositories should become canonical Phase 0 baselines? +- Should TypeScript baselines run with dependency manager and language engine flags off, on, or both? +- Do we need allocator-level attribution with `memray`, `tracemalloc`, or `py-spy` in addition to RSS sampling? +- What commit, dependency lockfile, and Python minor version should define the official baseline? +- Which memory target should be set for the first Rust vertical slice: total RSS, graph-only delta, or parse-only delta? diff --git a/rust-rewrite/data-model.md b/rust-rewrite/data-model.md new file mode 100644 index 000000000..d5928089e --- /dev/null +++ b/rust-rewrite/data-model.md @@ -0,0 +1,567 @@ +# Rust Data Model Proposal + +## Scope + +This document proposes the compact Rust-side storage model for replacing the current Python object graph while preserving the Python API as a lazy compatibility layer. It is based on inspection of: + +- `CodebaseContext`: `PyDiGraph[Importable, Edge]`, `filepath_idx`, external module index, graph build/reparse flow. +- `SourceFile`: eager file node plus per-file `_nodes`, `_range_index`, import/symbol/export query helpers. +- `Editable` and `Importable`: persistent `ts_node`, `ctx`, `parent`, `file_node_id`, `node_id`, `range`, and edit helpers. +- `Usage`, `RangeIndex`, `ResolutionStack`, `Import`, `Export`, and edge construction paths. + +The important constraint is to avoid translating every Python semantic node into a PyO3-owned object. Rust should own compact records and return IDs; Python objects should be handles created only when user code asks for them. + +## Current Shape To Preserve + +The current graph endpoints are only semantic `Importable` objects: + +- files +- symbols +- imports +- exports +- external modules + +General expressions/statements are usually not graph endpoints, but many are still materialized as Python `Editable` objects for parent traversal, source/range access, edits, and dependency extraction. `RangeIndex` can additionally keep all parsed editables when `full_range_index` is enabled. + +Current graph edge kinds are: + +- `IMPORT_SYMBOL_RESOLUTION`: import to resolved symbol/import/export/file/external module. +- `EXPORT`: export to declared/exported symbol, import, file, or other export target. +- `SUBCLASS`: class/interface symbol to resolved superclass/interface. +- `SYMBOL_USAGE`: symbol/import/export/file owner to used symbol/import/export/file/external module, with `Usage` metadata. + +The Rust model should preserve those graph semantics, not the Python object ownership model. + +## Core Storage + +```rust +pub struct EngineStore { + pub schema_version: u16, + pub engine_epoch: u32, + + pub strings: StringInterner, + pub paths: PathInterner, + pub modules: StringInterner, + pub ts_kinds: StringInterner, + + pub files: Arena, + pub syntax: Arena, + pub symbols: Arena, + pub imports: Arena, + pub exports: Arena, + pub externals: Arena, + pub scopes: Arena, + pub usages: Arena, + + pub nodes: NodeTable, + pub graph: GraphStore, + pub indexes: IndexStore, +} +``` + +`EngineStore` is the sole owner of canonical codebase state. Records store IDs and interned keys, never Python object references, Rust references into other arenas, or persistent `tree_sitter::Node` wrappers. + +`Arena` should be a dense `Vec>` with tombstones and a per-slot generation, or an equivalent slotmap. Dense vectors keep scans and dumps cache-friendly; generations let lazy Python handles fail clearly after invalidation instead of reading a reused slot. + +## IDs + +Use typed IDs internally: + +```rust +pub struct FileId(u32); +pub struct SyntaxId(u32); +pub struct SymbolId(u32); +pub struct ImportId(u32); +pub struct ExportId(u32); +pub struct ExternalId(u32); +pub struct ScopeId(u32); +pub struct UsageId(u32); +pub struct EdgeId(u32); +pub struct StringId(u32); +pub struct PathId(u32); +pub struct TsKindId(u16); +pub struct LineIndexId(u32); + +pub enum NodeRef { + File(FileId), + Symbol(SymbolId), + Import(ImportId), + Export(ExportId), + External(ExternalId), +} + +pub struct HandleKey { + pub node: NodeRef, + pub generation: u32, +} +``` + +The Python-facing `node_id` compatibility value should be an encoded `u64` or a `NodeId(u32)` lookup in `NodeTable`. The preferred shape is: + +```rust +pub struct NodeId(u32); + +pub struct NodeSlot { + pub node: NodeRef, + pub generation: u32, + pub alive: bool, +} +``` + +Compatibility handles store both `NodeId` and generation. `NodeId` values are not reused during a live engine epoch. On full rebuild, `engine_epoch` changes; on file reparse/delete, affected node generations change. A handle is valid only if `(engine_epoch, node_id, generation)` still matches. + +Future incremental stable IDs can be layered on top with `StableKey` fingerprints: + +```rust +pub enum StableKey { + File { normalized_path: PathId }, + Symbol { file: FileId, full_name: StringId, kind: SymbolKind, declaration_range_hash: u64 }, + Import { file: FileId, statement_range_hash: u64, local_index: u32 }, + Export { file: FileId, exported_name: Option, statement_range_hash: u64, local_index: u32 }, +} +``` + +Do not make stable keys the primary storage key in the first slice. Keep arena IDs compact, and use stable keys only to remap handles across reparses later. + +## Interning + +Intern these values: + +- normalized relative paths +- absolute paths only when needed for IO/debug +- module specifiers and import sources +- symbol names and full names +- aliases, exported names, namespaces +- tree-sitter kind strings +- language-specific small strings that appear many times + +Content is not string-interned. Each parsed file owns an `Arc<[u8]>` or equivalent immutable byte buffer for the current revision. Source slices are `(FileId, ByteRange)` views into that buffer. + +Path normalization invariants: + +- `FileRecord.path` is the repo-relative path used by public APIs. +- A separate absolute path cache can exist for IO, but graph identity uses the relative path. +- Case-insensitive lookups are an auxiliary index and must not change canonical path IDs. + +## Ranges + +All canonical ranges are byte ranges in UTF-8 file content: + +```rust +pub struct ByteRange { + pub start: u32, + pub end: u32, +} + +pub struct Point { + pub row: u32, + pub column: u32, +} + +pub struct SourceRange { + pub bytes: ByteRange, + pub start_point: Point, + pub end_point: Point, +} +``` + +`Point.column` must match tree-sitter semantics for the grammar bindings, which are byte columns, not Unicode scalar columns. Keep a per-file line index so byte to point and point to byte conversions are cheap and deterministic. + +Range invariants: + +- `start <= end <= file.content.len()`. +- A record's `file_id` owns every range it stores. +- Ranges are half-open byte ranges. +- Public line ranges keep current behavior: `start_point.row..=end_point.row`. +- Edit transactions operate on byte ranges, matching today's `Editable.edit`, `insert_at`, and `remove_byte_range`. + +## Syntax Anchors + +Rust should not store one Python `Editable` per syntax node. Store compact syntax anchors instead: + +```rust +pub struct SyntaxRecord { + pub file: FileId, + pub parent: Option, + pub kind: TsKindId, + pub range: SourceRange, + pub flags: SyntaxFlags, + pub first_child: Option, + pub next_sibling: Option, +} + +bitflags! { + pub struct SyntaxFlags: u16 { + const NAMED = 1 << 0; + const ERROR = 1 << 1; + const MISSING = 1 << 2; + const CANONICAL = 1 << 3; + const SEMANTIC_ANCHOR = 1 << 4; + } +} +``` + +Default mode should store only anchors required by semantic records and usage matches: + +- file root +- symbol declaration/name/body/extended ranges +- import statement/specifier/module/name/alias ranges +- export statement/name/value ranges +- usage match ranges +- edit anchors needed by P0 methods + +When `full_range_index` or LSP mode is enabled, store all named syntax nodes and the parent/child links required for `ast()`, cursor lookup, and range lookup. This preserves compatibility without paying that cost for every normal codebase load. + +## File Records + +```rust +pub struct FileRecord { + pub path: PathId, + pub language: LanguageKind, + pub content_hash: u64, + pub content_len: u32, + pub content: Arc<[u8]>, + pub line_index: LineIndexId, + pub root: SyntaxId, + pub root_range: SourceRange, + pub parse_status: ParseStatus, + pub file_epoch: u32, + + pub symbols: IdSpan, + pub imports: IdSpan, + pub exports: IdSpan, + pub syntax_nodes: IdSpan, +} +``` + +Per-file ID spans point into sorted side arrays in `IndexStore`, not embedded `Vec`s in every file. This keeps `FileRecord` small and allows bulk rebuild of file indexes after parse. + +File invariants: + +- `path` is unique among live files. +- Per-file symbols/imports/exports are sorted by `(start_byte, end_byte, local_order)`. +- Deleting a file tombstones all semantic records owned by the file and removes graph edges touching those records. +- Reparsing a file increments `file_epoch`; lazy handles with old epoch become stale. + +## Symbol Records + +```rust +pub struct SymbolRecord { + pub node_id: NodeId, + pub file: FileId, + pub kind: SymbolKind, + pub language_kind: LanguageSymbolKind, + pub name: StringId, + pub full_name: StringId, + pub parent_symbol: Option, + pub parent_scope: ScopeId, + pub declaration: SyntaxId, + pub name_syntax: Option, + pub body: Option, + pub extended_range: SourceRange, + pub declaration_range: SourceRange, + pub name_range: Option, + pub flags: SymbolFlags, + pub local_order: u32, +} +``` + +Symbol invariants: + +- `node_id` maps back to `NodeRef::Symbol(self_id)`. +- `parent_symbol` is in the same file and must not form a cycle. +- `full_name` is the language-specific qualified name used by current public APIs. +- `is_top_level` is a flag derived during extraction, not recomputed by climbing Python parents. +- `descendant_symbols` is answered by a symbol tree index. + +## Import Records + +```rust +pub struct ImportRecord { + pub node_id: NodeId, + pub file: FileId, + pub import_type: ImportType, + pub statement: SyntaxId, + pub specifier: SyntaxId, + pub module: Option, + pub symbol_name: Option, + pub alias: Option, + pub namespace: Option, + pub is_type_only: bool, + pub is_dynamic: bool, + pub unique_range: SourceRange, + pub statement_range: SourceRange, + pub specifier_range: SourceRange, + pub module_range: Option, + pub symbol_range: Option, + pub alias_range: Option, + pub resolved: Option, + pub local_order: u32, +} +``` + +Import invariants: + +- `node_id` maps back to `NodeRef::Import(self_id)`. +- `unique_range` preserves current equality/hash behavior for multi-import statements. +- `resolved` is mirrored by one `IMPORT_SYMBOL_RESOLUTION` edge when resolution succeeds or an external module record is created. +- External module records are keyed by `(import.source, unique_import_name)`, matching the current `module::import_name` index. +- Wildcard imports expose `names` through a wildcard expansion index, not by materializing `WildcardImport` Python objects up front. + +## Export Records + +```rust +pub struct ExportRecord { + pub node_id: NodeId, + pub file: FileId, + pub export_kind: ExportKind, + pub name: Option, + pub exported_name: Option, + pub declared_symbol: Option, + pub statement: SyntaxId, + pub name_syntax: Option, + pub value_syntax: Option, + pub statement_range: SourceRange, + pub name_range: Option, + pub target: Option, + pub flags: ExportFlags, + pub local_order: u32, +} +``` + +Export invariants: + +- `node_id` maps back to `NodeRef::Export(self_id)`. +- `target` is mirrored by an `EXPORT` edge when known. +- Wildcard exports target the source file node when current behavior does. +- `resolved_symbol` follows export/import edges with a visited set to preserve circular-chain behavior. + +## External Records + +```rust +pub struct ExternalRecord { + pub node_id: NodeId, + pub module: StringId, + pub import_name: StringId, + pub display_name: StringId, + pub first_import: ImportId, +} +``` + +External modules do not own file ranges. Any source/range shown for compatibility should come from `first_import` or the usage/import that reached the external. + +## Usage Records + +```rust +pub struct UsageRecord { + pub source: NodeRef, + pub target: NodeRef, + pub usage_symbol: NodeRef, + pub match_syntax: SyntaxId, + pub imported_by: Option, + pub usage_type: UsageType, + pub usage_kind: UsageKind, + pub match_range: SourceRange, +} +``` + +`source` is the graph edge source, matching the current `dest.node_id` emitted by `ResolutionStack.get_edges`. `usage_symbol` mirrors the current `Usage.usage_symbol` payload, which is usually `dest.parent_symbol` and may differ from `source` for nested symbols. `target` is the used node. `match_syntax` is the `Name`, `ChainedAttribute`, or `FunctionCall` anchor used for renames and source display. + +Usage invariants: + +- Every `SYMBOL_USAGE` edge has exactly one `UsageId`. +- `UsageRecord.source == edge.source`. +- `UsageRecord.target == edge.target`. +- `UsageRecord.usage_symbol` is a live graph node. +- `match_syntax.file == source.file` when the source has a file. +- `usage_type` preserves `DIRECT`, `CHAINED`, `INDIRECT`, and `ALIASED` resolution stack semantics. +- `usage_kind` preserves body/type/decorator/import/export/subclass context. + +## Graph Storage + +```rust +pub struct EdgeRecord { + pub source: NodeRef, + pub target: NodeRef, + pub kind: EdgeKind, + pub usage: Option, +} + +pub struct GraphStore { + pub edges: Vec, + pub out_offsets: Vec, + pub out_edges: Vec, + pub in_offsets: Vec, + pub in_edges: Vec, +} +``` + +During parsing/resolution, use mutable per-node edge vectors plus a dedupe set. After a phase completes, freeze into CSR-style adjacency arrays. Incremental reparses can rebuild adjacency for affected nodes first; whole-graph CSR rebuild is acceptable for the first vertical slice if it is simpler. + +Edge invariants: + +- Edge endpoints are live `NodeRef`s. +- Multi-edges are allowed only when their full edge key differs. +- Full edge key is `(source, target, kind, usage_key)`. +- `IMPORT_SYMBOL_RESOLUTION` source is always `Import`. +- `EXPORT` source is always `Export`. +- `SUBCLASS` source is always `Symbol`. +- `SYMBOL_USAGE` has `usage.is_some()`. +- Non-`SYMBOL_USAGE` edges have `usage.is_none()`. + +## Indexes + +```rust +pub struct IndexStore { + pub path_to_file: HashMap, + pub casefold_path_to_file: HashMap, + pub external_by_key: HashMap<(StringId, StringId), ExternalId>, + + pub file_symbols: Vec, + pub file_imports: Vec, + pub file_exports: Vec, + pub file_syntax: Vec, + + pub symbol_children: Vec, + pub scope_bindings: ScopeBindingIndex, + pub import_names_by_file: NameBindingIndex, + pub exported_names_by_file: NameBindingIndex, + + pub range_index_by_file: HashMap, +} +``` + +`RangeIndex` should be compact and optional: + +```rust +pub struct RangeIndex { + pub by_start: Vec, + pub exact: HashMap<(ByteRange, TsKindId), SyntaxId>, + pub all_for_range: HashMap>, +} +``` + +Query patterns: + +- `Codebase.files`: scan live `FileRecord`s sorted by path, return lazy file handles. +- `Codebase.symbols/classes/functions`: scan `symbols`, filter flags/kind/top-level, return handles sorted by file and range. +- `Codebase.imports/exports`: scan arenas or per-file spans. +- `SourceFile.imports/symbols/exports`: use file spans in `IndexStore`; no graph scan required. +- `Import.imported_symbol`: follow the one import resolution edge, then optionally follow export edges. +- `Export.exported_symbol`: follow the one export edge. +- `Symbol.usages`: inspect incoming edges for the target node, filter `SYMBOL_USAGE`, load usage records, sort by match start byte descending. +- `Importable.dependencies`: inspect outgoing `SYMBOL_USAGE` edges from descendant symbol IDs, filter usage type, dedupe, and sort by file/range. +- `find_by_byte_range`: use `RangeIndex.exact` or `all_for_range`. +- Cursor lookup: binary search `RangeIndex.by_start`, then choose the smallest containing range. + +## Lazy Python Compatibility + +Python classes remain compatibility handles: + +```text +PySourceFile -> EngineHandle +PySymbol -> EngineHandle +PyImport -> EngineHandle +PyExport -> EngineHandle +PyExternal -> EngineHandle +PyEditable -> EngineSyntaxHandle +PyUsage -> EngineHandle +``` + +Each handle stores: + +- `Arc` or equivalent engine owner +- typed ID or `NodeId` +- slot generation +- file epoch if the handle depends on file ranges/content + +Handle methods delegate to Rust for source, ranges, relationships, and graph queries. Python lists are built from returned IDs, not from prebuilt objects. + +Compatibility notes: + +- A weak handle cache can preserve object identity for repeated access without materializing the full graph. The cache is optional and must not be part of canonical state. +- `source`, `start_byte`, `end_byte`, `range`, `span`, and `github_url` are computed from records and file content. +- `file`, `parent_symbol`, `parent`, and `descendant_symbols` are ID lookups. +- Unsupported deep AST methods can reparse one file and build transient Python editables for that call. Those transient objects must not be inserted into canonical graph storage. +- Existing writer methods can initially emit byte-range edit intents using stored ranges, then let the Python transaction manager apply them. +- Stale handles should raise a clear invalidation error or fall back to resolving by `StableKey` once stable remap exists. + +## Debug Dumps + +Add Rust debug APIs early: + +```text +debug_dump_ir(format="jsonl", include_strings=true, include_snippets=false) +debug_dump_graph(format="jsonl") +debug_dump_ranges(file_id) +debug_check_invariants() +``` + +Dump format requirements: + +- Include `schema_version`, engine version, repo root hash/path, and language. +- Sort files by path, records by `(file, start_byte, local_order)`, and edges by `(source, kind, target, usage)`. +- Resolve interned strings in human-readable dumps. +- Include raw content hashes and byte ranges by default, not full file content. +- Include optional snippets only when requested. +- Emit enough usage data to compare with the Python backend: edge kind, source node, target node, usage symbol, usage type, usage kind, match range, imported_by. + +Invariant checker should validate: + +- live IDs and node table round trips +- path uniqueness +- range bounds +- edge endpoint kinds +- usage/edge consistency +- per-file sorted spans +- scope parent cycles +- duplicate edge keys +- external module key uniqueness + +## Memory Rationale + +The current model pays for: + +- Python object headers and dicts for semantic nodes and many expressions. +- Persistent tree-sitter node wrappers on `Editable`. +- Backrefs from every object to context, parent, and file. +- The same Python objects stored as rustworkx graph payloads. +- Per-file `_nodes` and optional range indexes containing Python object references. +- `Usage` objects that hold Python object references to match nodes, owner symbols, and imports. + +The proposed model replaces that with: + +- `u32` IDs and small enums instead of object pointers. +- interned strings instead of repeated Python strings. +- contiguous arenas for cache-friendly scans. +- edge payloads as `EdgeRecord` plus optional `UsageId`. +- syntax anchors as byte ranges rather than Python wrappers. +- optional full syntax/range tables only for debug/LSP modes. + +Expected record sizes should be in the tens of bytes for edges/usages and under roughly 100 bytes for most symbols/imports, before interned strings and content. The exact target should be validated by the benchmark agent, but the design removes the multiplicative Python object and graph payload overhead. + +## Migration Risks + +- Python identity and hashing: current equality relies on filepath, range, kind ID, and import unique ranges. Handles must reproduce that behavior even though canonical state is ID based. +- Sorting parity: public APIs rely on file/range/node ID order. Rust queries need explicit stable sort keys. +- Tree-sitter node access: any API exposing or depending on `ts_node` needs either a Rust-backed compatibility surface or a transient per-file reparse fallback. +- Range columns: tree-sitter points use byte columns. Accidentally switching to Unicode columns will break LSP and edit behavior for non-ASCII files. +- Wildcard imports and exports: current code lazily expands and invalidates wildcard-derived names. Rust needs explicit invalidation for files importing from wildcard providers. +- Conditional scope resolution: current `Name.resolve_name` has special conditional-block behavior. Scope tables need tests before Rust becomes authoritative. +- External modules: current identity is tied to import source plus unique node source. The Rust key must match enough behavior to avoid duplicate external nodes. +- Edits and stale handles: any committed edit invalidates ranges for at least one file. Handles must check file epoch before applying edits. +- Full range index memory: enabling all syntax anchors can be expensive. It must remain opt-in and visible in debug stats. +- Fallback materialization: unsupported APIs may temporarily materialize Python objects. This must be per-call/per-file and never recreate the full Python object graph behind PyO3. + +## First Slice Recommendation + +Implement the Rust data model in this order: + +1. Interners, typed IDs, arenas, node table, and file records. +2. Symbol/import/export/external records for Python and TypeScript top-level extraction. +3. Graph edge table with import/export/subclass/symbol usage edge kinds and debug dumps. +4. Per-file query indexes for files, symbols, imports, and exports. +5. Lazy Python handles returning source/ranges and ID-backed relationships. +6. Optional full range index for debug/LSP parity. + +This gives the resolver and PyO3 agents a stable contract while keeping the first engine slice focused on compact canonical state rather than Python object emulation. diff --git a/rust-rewrite/docs-site-strategy.md b/rust-rewrite/docs-site-strategy.md new file mode 100644 index 000000000..3f14cb8a6 --- /dev/null +++ b/rust-rewrite/docs-site-strategy.md @@ -0,0 +1,316 @@ +# Graph-sitter Docs, Landing, Vercel, And Skill Strategy + +## Recommendation + +Build the product docs and landing page as one Vercel-hosted Next.js site: + +- `site/`: Vercel-hosted Next.js app for the landing page and statically rendered docs. +- `site/content/docs/`: target long-term docs content source, migrated from the current `docs/` tree. +- `site/lib/docs/` and `site/components/docs/`: target docs loader, navigation, search index generation, and MDX component shims. +- `rust-rewrite/skill-prototype/graph-sitter/`: draft Codex skill artifact until the package and docs are release-ready. + +The landing page should explain Graph-sitter in one screen. The docs should live in the same app under static routes and carry setup, CLI, API, Rust backend, JS/TS, codemod, correctness, and benchmark details. This gives us full control over layout, search, examples, interactive demos, custom benchmarking views, and future skill distribution pages. + +Mintlify should be treated as the legacy docs source/renderer, not the target platform. Keep the current `docs/` content as migration input until the Next docs renderer has parity, then retire Mintlify-specific config and CI. + +## Current Repo Signals + +- [x] `docs/mint.json` configures the Mintlify docs tree. +- [x] `docs/README.md` already documents local Mintlify validation. +- [x] `site/` is a Next.js app with `npm run dev`, `npm run build`, and checked-in `package-lock.json`. +- [x] `site/app/page.tsx` already positions Graph-sitter as a Python shell with a compact Rust backend and future `uvx graph-sitter ...` command surface. +- [x] No repo-level `vercel.json`, `site/vercel.json`, `.vercel/`, or `site/.vercel/` is checked in. +- [x] Vercel CLI is available locally: `Vercel CLI 54.7.1`. +- [x] Vercel CLI auth is present for user `jayhack`. +- [x] `pyproject.toml` already exposes both `gs` and `graph-sitter` console scripts. +- [x] `rust-rewrite/uvx-command-roadmap.md`, `rust-rewrite/uvx-cli-plan.md`, and `rust-rewrite/uvx-skill-distribution-plan.md` already commemorate `uvx graph-sitter ...` as the target one-shot interface. + +## Information Architecture + +Recommended public URLs: + +```text +https://graph-sitter.com -> Vercel Next app landing page +https://graph-sitter.com/docs -> Vercel statically rendered docs +https://www.graph-sitter.com -> Vercel redirect or alias to the apex +https://docs.graph-sitter.com -> Vercel alias or redirect to /docs +``` + +Landing page responsibilities in `site/`: + +- one-sentence definition: codebase graphs for analysis and codemods +- Python API example +- parse, graph, transform capabilities +- Python shell plus Rust backend architecture +- cautious `uvx graph-sitter ...` preview +- links to docs and GitHub + +Docs responsibilities in the Vercel app: + +- install and setup +- parse command and JSON output +- registered codemods through `run` +- ad hoc import-path transforms through `transform` +- Rust backend status, fallback modes, and release gates +- Python, JavaScript, TypeScript, and React support +- correctness and parity methodology +- large-repo benchmark methodology and results +- generated API reference +- Codex skill installation and usage once published + +Recommended route shape: + +```text +site/app/page.tsx -> landing page +site/app/docs/[[...slug]]/page.tsx -> static docs renderer +site/app/api-reference/[[...slug]] -> optional alias or docs section +site/content/docs/**/*.mdx -> migrated docs content +site/content/nav.ts -> navigation source replacing mint.json +site/components/docs/*.tsx -> MDX shims and docs UI +site/lib/docs/*.ts -> content loading, frontmatter, static params, search manifest +``` + +Use `generateStaticParams` and `dynamicParams = false` for docs routes so Vercel pre-renders every docs page. A full static export can be evaluated later, but regular Vercel static generation is enough for launch and keeps room for future dynamic features such as hosted examples or generated benchmark views. + +## Migration Plan From Mintlify + +The current `docs/` tree is a useful content seed, but it contains Mintlify-specific navigation and MDX components. Migrate deliberately: + +1. Inventory MDX component usage in `docs/**/*.mdx`. +2. Implement local component shims in `site/components/docs/` for high-use components such as `Note`, `Card`, `CardGroup`, `Param`, `ResponseField`, and code blocks. +3. Convert `docs/mint.json` navigation into a typed `site/content/nav.ts` or `site/content/nav.json`. +4. Move or copy content into `site/content/docs/` so the Vercel project is self-contained. +5. Add a build-time docs loader that parses frontmatter, resolves slugs, renders MDX, and emits a search manifest. +6. Port generated API reference pages or replace them with a generator that writes Vercel-compatible MDX. +7. Add redirects from old slugs to new slugs before domain cutover. +8. Remove Mintlify CI only after Next docs build proves parity on all docs pages. + +Do not depend on files outside `site/` at Vercel build time unless the project root is deliberately changed to the repository root. The lower-risk target is a self-contained `site/` app with docs content underneath `site/content/docs/`. + +## Accuracy Contract For Docs + +Setup docs must be explicit about three workflows: + +```bash +# local source checkout +uv run graph-sitter doctor +uv run graph-sitter parse . --backend python --format json + +# released package, after PyPI and wheel gates pass +uvx graph-sitter doctor +uvx graph-sitter parse . --language auto --backend auto --fallback python --format json + +# branch-built wheel validation before public release +uvx --from dist/.whl graph-sitter parse . --backend rust --fallback error --format json +``` + +Parsing docs must specify: + +- `PATH` defaults and whether `.codegen` is required. +- `--language auto|python|typescript` behavior. +- `--backend python|rust|auto` behavior. +- `--fallback error|python` semantics. +- `--subdir` behavior for large repos. +- summary output versus JSON output. +- JSON schema version and stable fields. + +Transform and codemod docs must specify: + +- `transform MODULE:OBJECT PATH --check|--write` for one-shot transforms. +- `run LABEL PATH --check|--write` for registered `.codegen/codemods` workflows. +- `--check` runs in a copied temporary repo and leaves the target unchanged. +- `--write` mutates the target. +- examples should show `--check` before `--write`. +- post-run validation should include `git diff` and focused target tests. + +Rust backend docs must specify: + +- Python remains the authoring shell and compatibility path. +- Rust is the compact parse/index backend for supported surfaces. +- strict mode is `--backend rust --fallback error`. +- fallback mode must disclose the actual backend and reason. +- unsupported Rust-backed APIs should fail explicitly in strict mode. +- public claims should say "supported subset" or "selected pinned large-repo parity" until correctness work proves more. + +JS/TS docs must specify: + +- supported language selector is currently `typescript` for TS/JS/React flows unless the CLI adds a separate `javascript` selector. +- large-repo proof target is pinned Next.js. +- TS docs need one parse example, one read-only graph query, one checked transform, and one write transform. +- React/JSX support should be described by tested AST/API behavior, not broad ecosystem claims. + +Benchmark docs must specify: + +- exact repo and commit/tag. +- backend, language, command, Python version, platform, and wheel/source mode. +- wall time and peak RSS. +- whether broad Python-side caches were materialized. +- what counts were compared: files, symbols, imports, exports, references, dependencies, parse errors, and codemod touched files. + +## Vercel Path + +Recommended Vercel project: + +```text +Framework Preset: Next.js +Root Directory: site +Build Command: default +Output Directory: default +Install Command: default +Node.js: 22.x +Runtime Env Vars: none required today +Production Branch: integrator-approved trunk branch +``` + +No checked-in `vercel.json` is required for the current app if Vercel project settings define `site` as the root directory. Once docs move to Vercel, add `site/vercel.json` or Next redirects if needed for: + +- `docs.graph-sitter.com` domain routing. +- old Mintlify slug redirects. +- canonical `/docs` paths. +- long-cache headers for generated static assets and search indexes. + +Read-only/project setup checks: + +```bash +export PATH="$HOME/.nvm/versions/node/v22.19.0/bin:$PATH" +vercel whoami +vercel --version +vercel link --cwd site +vercel pull --cwd site --environment=preview --yes +``` + +Preview deploy flow after approval: + +```bash +export PATH="$HOME/.nvm/versions/node/v22.19.0/bin:$PATH" +npm --prefix site ci +npm --prefix site run build +vercel deploy --cwd site --yes +``` + +Prebuilt preview alternative: + +```bash +export PATH="$HOME/.nvm/versions/node/v22.19.0/bin:$PATH" +vercel pull --cwd site --environment=preview --yes +vercel build --cwd site +vercel deploy --cwd site --prebuilt --yes +``` + +Production cutover requires explicit approval: + +- attach `graph-sitter.com` to the Vercel landing project +- attach or redirect `www.graph-sitter.com` +- attach or redirect `docs.graph-sitter.com` to the Vercel docs routes +- update `hatch.toml` documentation URL if final docs URL differs from current metadata + +Do not run `vercel deploy --prod`, promote a deployment, or attach domains from a subagent task. + +## Codex Skill Packaging + +Keep the skill as a small operating guide, not a copy of the docs. + +Recommended source artifact: + +```text +rust-rewrite/skill-prototype/graph-sitter/ +├── SKILL.md +├── agents/ +│ └── openai.yaml +└── references/ + ├── cli.md + ├── codemods.md + └── rust-backend.md +``` + +The skill should document: + +- when to use Graph-sitter versus ordinary file inspection +- local checkout commands through `uv run` +- released package commands through `uvx graph-sitter ...` +- branch wheel commands through `uvx --from dist/.whl` +- strict Rust mode and fallback semantics +- large-repo scoping through `--subdir` +- transform safety: `--check`, inspect diff, then `--write` +- correctness caveat: parity is not the same as absolute semantic correctness + +Release gates before distributing the skill: + +- published docs contain the same setup commands the skill recommends +- `uvx graph-sitter doctor`, `parse`, `run`, and `transform` work from a clean installed package +- the skill validator passes on the final skill folder +- one fresh-agent read-only parse task succeeds +- one fresh-agent checked codemod task succeeds before write mode is attempted +- benchmark and correctness claims link to current docs or committed reports + +## Multi-Agent Work Convention + +Use this file as the work ledger for docs, landing, Vercel, and skill tasks. + +Rules: + +- Agents claim one unchecked item by editing the line to include `owner: `. +- Agents mark `[x]` only after the artifact is changed and validated. +- Each completed item should include a terse `Result:` note with the changed file or validation command. +- Agents must not edit implementation code from this workstream unless the integrator explicitly expands scope. +- Deployment tasks require explicit integrator approval before any production action. + +## Task Checklist + +### Docs Architecture + +- [x] Create docs/site strategy. owner: docs-vercel-subagent. Result: `rust-rewrite/docs-site-strategy.md`. +- [x] Decide whether docs stay on Mintlify for launch or migrate to Vercel MDX. owner: user. Result: target is Vercel/Next with statically rendered docs; Mintlify is legacy migration input. +- [x] Design the Vercel docs content tree under `site/content/docs`. owner: codex. Result: added typed seed docs content in `site/content/docs/pages.ts` plus route/nav helpers for static docs pages. +- [ ] Inventory Mintlify-specific MDX components used by `docs/**/*.mdx`. owner: unclaimed. +- [ ] Build local MDX component shims for the migrated docs renderer. owner: unclaimed. +- [ ] Convert `docs/mint.json` navigation into a Vercel docs nav source. owner: unclaimed. +- [x] Create a static docs route in the Next app with `generateStaticParams`. owner: codex. Result: `site/app/docs/[[...slug]]/page.tsx` prerenders `/docs` plus setup, uvx, Rust status, parity, benchmark, and TypeScript support pages with `dynamicParams = false`. +- [x] Generate a static client-side search manifest for docs pages. owner: codex. Result: added `docsSearchRecords()`, a static `/docs/search.json` route, and sidebar client search over the Vercel docs seed pages. +- [ ] Port or regenerate API reference pages into Vercel-compatible MDX. owner: unclaimed. +- [ ] Add redirects for old Mintlify docs slugs before domain cutover. owner: unclaimed. +- [ ] Add a docs release gate checklist to `rust-rewrite/strategy.md` or keep this file as the docs ledger. owner: unclaimed. +- [x] Audit `docs/introduction/installation.mdx` against current `uv run`, `uv tool install`, and `uvx` behavior. owner: codex. Result: installation docs now distinguish installed tool, local source, published-package `uvx`, and branch-built wheel validation. +- [x] Add or update a dedicated `docs/cli/uvx.mdx` page with release-gated package guidance. owner: codex. Result: added `docs/cli/uvx.mdx` for parse, run, transform, backend, safety, `--subdir`, and release-gate workflows. +- [ ] Add Rust backend architecture/status docs sourced from `rust-rewrite/supported-subset.json` and current wheel checks. owner: unclaimed. +- [x] Add correctness/parity docs that distinguish old-backend parity from semantic correctness. owner: codex. Result: added `docs/correctness/parity.mdx` with supported-scope evidence, known deltas, safety modes, and pre-default gates. +- [x] Add large-repo benchmark docs for pinned Airflow and pinned Next.js after fresh measurements. owner: codex. Result: added `docs/benchmarks/large-repos.mdx` with Codebase, installed-wheel, and codemod proof summaries. + +### Landing Page + +- [ ] Review `site/app/page.tsx` copy for release-gated claims before the first public preview. owner: unclaimed. +- [ ] Add a landing-page CTA to the exact docs quickstart once the docs URL is final. owner: unclaimed. +- [x] Verify `site` builds from a clean install with Node 22. owner: codex. Result: `PATH="$HOME/.nvm/versions/node/v22.19.0/bin:$PATH" npm ci && npm run build` passed and generated the static docs route. +- [ ] Add landing-page screenshots or visual QA notes before production domain cutover. owner: unclaimed. + +### Vercel + +- [ ] Link or create the Vercel project with root directory `site`. owner: unclaimed. +- [ ] Pull preview env with `vercel pull --cwd site --environment=preview --yes`. owner: unclaimed. +- [ ] Run a preview deploy for review only after the static docs route builds. owner: unclaimed. +- [ ] Record the preview URL in this file or the integrator thread. owner: unclaimed. +- [ ] Confirm `docs.graph-sitter.com` routing to Vercel docs before apex cutover. owner: unclaimed. +- [ ] Attach `graph-sitter.com`, `www.graph-sitter.com`, and `docs.graph-sitter.com` only after explicit approval. owner: blocked-pending-approval. + +### Skill + +- [ ] Finalize `rust-rewrite/skill-prototype/graph-sitter/SKILL.md` after CLI/docs commands stabilize. owner: unclaimed. +- [ ] Validate the skill with the Codex skill validator. owner: unclaimed. +- [ ] Forward-test the skill with a fresh-agent read-only parse task. owner: unclaimed. +- [ ] Forward-test the skill with a fresh-agent codemod task using `--check` before `--write`. owner: unclaimed. +- [ ] Document skill installation/distribution in public docs once release gates pass. owner: unclaimed. + +### JS/TS Documentation + +- [ ] Add a tested TypeScript parse quickstart using pinned Next.js or a small TS fixture. owner: unclaimed. +- [ ] Add a TypeScript transform/codemod example with `--check` and `--write`. owner: unclaimed. +- [ ] Document current JS/React support boundaries using tested behavior. owner: unclaimed. +- [ ] Keep TS examples aligned with `rust-rewrite/tools/check_wheel_pinned_typescript_repo.py`. owner: unclaimed. + +### Release Readiness + +- [ ] Ensure PyPI package metadata points to the final docs and landing URLs. owner: unclaimed. +- [ ] Ensure the public setup path does not claim `uvx graph-sitter ...` until clean package validation passes. owner: unclaimed. +- [ ] Replace Mintlify docs validation CI with a Next docs build/link validation gate after migration. owner: unclaimed. +- [x] Add a legacy docs validation CI or release gate for `mintlify validate`. owner: codex. Result: `.github/workflows/docs-validate.yml` runs Mintlify validate and broken-link checks for docs changes until Vercel docs replace it. +- [x] Add a site build CI or release gate for `npm --prefix site ci && npm --prefix site run build`. owner: codex. Result: `.github/workflows/site-build.yml` installs from `site/package-lock.json` and runs the Next.js production build for landing-site changes. diff --git a/rust-rewrite/docs-site-vercel-plan.md b/rust-rewrite/docs-site-vercel-plan.md new file mode 100644 index 000000000..93be74e95 --- /dev/null +++ b/rust-rewrite/docs-site-vercel-plan.md @@ -0,0 +1,379 @@ +# Docs Site and Vercel Plan + +## Decision + +Keep the public landing page and product documentation as two separate +surfaces. + +- `docs/` remains the Mintlify documentation source of truth. +- `site/` remains a small Next.js landing page for Vercel. +- `graph-sitter.com` should eventually point at the Vercel landing project. +- `www.graph-sitter.com` should redirect or alias to the Vercel landing project. +- `docs.graph-sitter.com` should point at the Mintlify docs project. + +This keeps the generated API reference, Mintlify MDX components, CLI docs, and +codemod tutorials out of the marketing app. The landing page should send people +to the docs instead of attempting to render the docs tree itself. + +## Recommended Stack + +Use the stack that already exists in this repo: + +- Mintlify in `docs/` for product documentation, tutorials, generated API + reference pages, and release-gated setup instructions. +- Next.js in `site/` for a small Vercel-hosted landing page. +- Vercel only for the landing project, with project root set to `site`. + +This split is the lowest-risk path because the repo already has Mintlify source +content, generated docs workflow wiring, a separate Next.js landing app, and +app-local ignore rules for Vercel/build artifacts. It also keeps docs releases +from being coupled to landing-page deploys. + +## Current Findings + +- [x] This pass was verified on 2026-06-19 from branch `rust-rewrite`. +- [x] `docs/` is a Mintlify project configured by `docs/mint.json`. +- [x] `docs/**/*.mdx` contains the human-authored docs, examples, tutorials, + CLI pages, and API reference pages. +- [x] `.github/workflows/generate-docs.yml` regenerates API reference docs and + the system prompt on pushes to `develop`. +- [x] `site/` is a conventional Next.js app with `npm run dev`, `npm run build`, + and a checked-in `site/package-lock.json`. +- [x] `site/app/page.tsx` already states the product direction: Python as the + authoring shell, Rust for the large graph/index backend, and + `uvx graph-sitter ...` as the future command surface. +- [x] `site/.gitignore` ignores `.next`, `node_modules`, `out`, `.vercel`, + TypeScript build info, and local env files. +- [x] No repo-level `vercel.json`, `site/vercel.json`, `.vercel/`, or + `.openai/hosting.json` is checked in. +- [x] A working Vercel CLI is available at + `$HOME/.nvm/versions/node/v22.19.0/bin/vercel`; `vercel --version` returns + `Vercel CLI 54.7.1`. +- [x] Authenticated Vercel user was verified with `vercel whoami`: + `jayhack`. +- [x] `site/` builds successfully with Node 22: + `PATH="$HOME/.nvm/versions/node/v22.19.0/bin:$PATH" npm run build`. +- [x] Mintlify docs validation passes: + `PATH="$HOME/.nvm/versions/node/v22.19.0/bin:$PATH" npx --yes mintlify@latest validate`. +- [x] The default Homebrew Node on this machine is currently unusable because + it references a missing `libllhttp.9.3.dylib`; use the Node 22 path above + for docs, site, and Vercel commands until that local install is fixed. +- [ ] Vercel project link state, project ownership, and team scope still need + to be verified before any preview deploy. +- [ ] Mintlify project ownership and custom-domain state still need to be + verified outside this repo. + +## Mintlify Docs vs Vercel Landing Site + +Mintlify owns durable product documentation: + +- setup and installation +- current `uv tool install graph-sitter --python 3.13` setup until the + published `uvx graph-sitter ...` package path is proven +- `uvx graph-sitter ...` once the command is released +- existing `gs` compatibility commands while the CLI transition is in flight +- Python API usage +- codemod authoring and execution +- backend/fallback semantics +- supported languages and known limits +- generated API reference +- tutorials, migration guides, and troubleshooting + +Vercel owns the simple landing page: + +- one-sentence explanation of Graph-sitter +- why a graph-aware codemod library matters +- Python shell plus Rust backend positioning +- compact examples for parsing and transforming +- conservative `uvx graph-sitter ...` preview copy +- links to docs and GitHub + +The landing page should not include exhaustive setup steps, API reference +tables, generated docs, or detailed Rust parity claims. Those belong in +Mintlify and should only ship after the release gates below pass. + +## Proposed Information Architecture + +Recommended URLs after launch: + +```text +https://graph-sitter.com -> Vercel project rooted at site/ +https://www.graph-sitter.com -> Vercel alias or redirect +https://docs.graph-sitter.com -> Mintlify project rooted at docs/ +https://docs.graph-sitter.com/api-reference -> Mintlify generated API pages +``` + +Recommended landing-page shape in `site/`: + +- `/`: simple product page explaining that Graph-sitter builds codebase graphs + for repository queries and codemods. +- Hero: `Graph-sitter` plus the literal offer: codebase graphs for codemods. + Avoid benchmark claims until benchmark reports are current. +- Quick example: `Codebase("./")` Python shell usage. +- Capabilities: parse, graph, transform. +- Architecture: Python shell, compact Rust backend, explicit fallback status. +- CLI preview: `uvx graph-sitter parse ...` and `uvx graph-sitter transform ...` + only with release-gated wording. +- CTAs: docs and GitHub. + +Recommended Mintlify docs shape: + +- `introduction/overview`: short definition, supported languages, and status. +- `introduction/installation`: current stable install path and Python versions. +- `introduction/getting-started`: quickstart with one parse/query/edit + walkthrough. +- `cli/uvx`: target `uvx graph-sitter ...` commands for parse, run, and + transform after published-package validation passes. +- `cli/*`: existing `gs` workspace commands and compatibility notes. +- `codemods/overview`: transformation model, check/write modes, and rollback + expectations. +- `codemods/examples`: focused Python and TypeScript codemod examples that + match tested behavior. +- `architecture/python-shell-rust-core`: how the Rust engine, Python handles, + strict mode, and fallback mode fit together. +- `architecture/supported-subset`: current supported Rust-backed APIs and open + gaps sourced from `rust-rewrite/supported-subset.json`. +- `benchmarks/large-repos`: pinned Airflow and Next.js benchmark methodology, + latency, and RSS numbers only after artifact-level reports are current. +- `correctness/parity`: how golden graph snapshots, semantic parity checks, + and codemod proofs are validated. +- `skill/graph-sitter`: Codex skill install/use page once the skill path is + finalized and tested. +- `building-with-graph-sitter/*`: durable API and concept guides that remain + accurate for the Python shell. +- `api-reference/*`: generated pages only, refreshed by the docs workflow. + +## Setup Docs Contract + +The setup docs need to be honest about what works today and clear about where +the CLI is headed. + +Current repo-validated path: + +```bash +uv run graph-sitter doctor +uv run graph-sitter parse . --language python --backend rust --format summary +uv run graph-sitter transform ./codemods/rename.py:rename . --check +uv run gs init +uv run gs run . --check +``` + +Target public path after package-release gates pass: + +```bash +uvx graph-sitter doctor +uvx graph-sitter parse . --language auto --backend rust --format summary +uvx graph-sitter transform ./codemods/rename.py:rename . --check +uvx graph-sitter transform ./codemods/rename.py:rename . --write +``` + +Do not make `uvx graph-sitter ...` the primary install path in permanent docs +until the exact published artifact is tested against a clean environment. Until +then, use release-gated wording like "target command surface" or "planned +public entrypoint." + +## Skill Distribution Plan + +The Codex skill should be distributed only after the library, CLI, and docs all +agree on setup and command names. + +The skill should contain: + +- a short purpose statement: use Graph-sitter to inspect codebase graphs and + run guarded codemods +- prerequisites: supported Python versions, `uv`, and platform notes +- setup check: `uvx graph-sitter doctor` once released, or the branch-wheel + equivalent before release +- parse workflow: `graph-sitter parse` / `uvx graph-sitter parse` +- transform workflow: check mode before write mode +- language support and backend/fallback caveats +- links to the docs, benchmarks, correctness/parity page, and examples + +Before publishing the skill, run it as a clean consumer would: install from the +documented source, parse a small Python repo, parse a small TS repo, run a +checked codemod, then run a write-mode codemod in a disposable git repo. + +## Vercel Project and Deploy Flow + +Use a dedicated Vercel project with these settings: + +```text +Framework Preset: Next.js +Root Directory: site +Build Command: default +Output Directory: default +Install Command: default +Node.js: 22.x +Production Branch: integrator-approved trunk branch +Runtime Environment Variables: none required today +``` + +Preview-only flow from the repository root: + +```bash +export PATH="$HOME/.nvm/versions/node/v22.19.0/bin:$PATH" +npx vercel whoami +npx vercel link --cwd site +npx vercel pull --cwd site --environment=preview --yes +npx vercel deploy --cwd site --yes +``` + +If the project is already linked: + +```bash +export PATH="$HOME/.nvm/versions/node/v22.19.0/bin:$PATH" +npx vercel pull --cwd site --environment=preview --yes +npx vercel deploy --cwd site --yes +``` + +Optional prebuilt preview flow: + +```bash +export PATH="$HOME/.nvm/versions/node/v22.19.0/bin:$PATH" +npx vercel pull --cwd site --environment=preview --yes +npx vercel build --cwd site +npx vercel deploy --cwd site --prebuilt --yes +``` + +Do not run `npx vercel deploy --cwd site --prod`, promote a deployment, or +attach `graph-sitter.com` / `www.graph-sitter.com` until the integrator +explicitly approves production cutover. + +## Vercel Files, Env, And Secret Hygiene + +- Keep `.vercel/` untracked. `site/.gitignore` already covers `site/.vercel`; + if a root-level Vercel command creates `./.vercel`, remove it or add a + root-level ignore before committing anything. +- Do not commit `.env.local`, `.env.*.local`, Vercel tokens, team IDs from + shell history, or downloaded runtime env values. +- Prefer `vercel link --cwd site` and `vercel pull --cwd site ...` so generated + project metadata stays under the ignored `site/.vercel/` directory. +- If runtime env vars are needed later, manage them in the Vercel dashboard or + through `vercel env` and document only the variable names and purposes in the + repo. +- The current landing page should need no runtime env vars. +- A committed `site/vercel.json` is unnecessary today. Add one only if routing, + headers, redirects, or enforced framework/build settings cannot be expressed + through Vercel project settings. + +## Local Verification Commands + +If the active Node install fails locally, prefer a Node 22 runtime from `nvm` +or another version manager before running landing-page or Vercel commands: + +```bash +export PATH="$HOME/.nvm/versions/node/v22.19.0/bin:$PATH" +``` + +Landing page: + +```bash +cd site +npm ci +npm run build +``` + +Vercel build check without deploying: + +```bash +export PATH="$HOME/.nvm/versions/node/v22.19.0/bin:$PATH" +npx vercel pull --cwd site --environment=preview --yes +npx vercel build --cwd site +``` + +Docs: + +```bash +cd docs +npx --yes mintlify@latest validate +npx --yes mintlify@latest broken-links +``` + +Mintlify may warn that `mint.json` is legacy config and may generate +`docs.json`. Treat `docs/mint.json` as the checked-in source of truth until a +docs owner explicitly approves a config migration. + +## Release Gate Checklist + +- [ ] Confirm canonical domain split: apex/www on Vercel, `docs.` on Mintlify. +- [ ] Verify Vercel account/team with `npx vercel whoami`. +- [ ] Confirm or create a Vercel project whose root directory is exactly + `site`. +- [ ] Produce a Vercel preview deployment for review without `--prod`. +- [ ] Review landing page at desktop and mobile widths. +- [ ] Validate Mintlify docs navigation and broken links. +- [ ] Update stale docs copy that still says Codegen where it should say + Graph-sitter, without erasing legitimate company attribution. +- [x] Update install/setup docs for the current release-gated command surface. + Result: installation docs now use `graph-sitter` as the primary command, + include `uvx graph-sitter ...`, and keep branch-built wheel validation + separate from published-package release proof. +- [x] Document both `gs` compatibility commands and the new + `uvx graph-sitter ...` commands. +- [ ] Confirm artifact-level wheel smoke tests pass for parse and transform + before advertising `uvx graph-sitter` as the public path. +- [ ] Confirm Rust-backed parse/transform support and fallback semantics before + making Rust the default backend in docs. +- [ ] Confirm the Codex skill installation path and link from docs only after + the skill package is ready. +- [ ] Move or confirm docs at `docs.graph-sitter.com`. +- [ ] Attach `graph-sitter.com` and `www.graph-sitter.com` to Vercel only after + explicit production approval. + +## Concrete Next Tasks + +- [ ] Link the existing authenticated Vercel account to a dedicated landing + project with `vercel link --cwd site`; confirm root directory is `site`. +- [ ] Run `npx vercel pull --cwd site --environment=preview --yes` and confirm + only ignored `site/.vercel/` metadata is created. +- [ ] Run `npm ci && npm run build` inside `site/` and fix any landing build + errors before preview deployment. +- [ ] Create a Vercel preview deployment with + `npx vercel deploy --cwd site --yes`; record the preview URL in the PR or + integration thread, not in permanent docs. +- [ ] Review the landing page at desktop and mobile widths; keep copy + conservative around Rust performance, correctness, and `uvx` until release + gates pass. +- [ ] Add or update Mintlify docs pages for quickstart, `uvx` CLI, + transformations/codemods, Rust architecture, benchmark methodology, + correctness/parity, and skill distribution. Notes: quickstart, `uvx` CLI, + parse/run/transform, Rust backend setup, benchmark evidence, and + correctness/parity docs are updated; Rust architecture and skill public docs + remain open. +- [ ] Make docs examples use commands that are backed by current tests: + `graph-sitter parse`, `graph-sitter transform --check|--write`, strict + backend flags, and fallback behavior. +- [ ] Generate or refresh API docs through the existing docs workflow instead + of hand-editing generated `api-reference` pages. +- [ ] Validate docs with `npx --yes mintlify@latest validate` and + `npx --yes mintlify@latest broken-links`; triage legacy Mintlify config + warnings separately from content errors. +- [ ] Decide whether to keep `docs/mint.json` or migrate to Mintlify's newer + `docs.json` format; do not migrate opportunistically during landing work. +- [ ] Wire final `uvx graph-sitter ...` setup instructions to the published + wheel release once `uvx-command-roadmap.md` gates pass. +- [ ] Publish the Codex skill only after the reviewed skill folder, CLI + commands, and setup docs all agree. + +## Known Risks and Open Decisions + +- The docs still contain legacy `Codegen` wording, `gs`-first CLI examples, and + some source links to `develop`. These need a targeted docs pass, not a broad + blind replacement. +- Current docs say Graph-sitter guarantees transformation correctness. Product + direction is more careful: we should document tested behavior, parity scope, + and known limits instead of claiming universal correctness. +- The final default backend is still a release decision. Until published wheels + and large-repo parity gates pass, public docs should prefer explicit backend + examples and clear fallback semantics. +- The landing page can mention the Rust rewrite and `uvx graph-sitter`, but it + should avoid promising performance numbers until benchmark methodology and + artifact-level tests are published. +- The Vercel project is not linked in the repo yet. Any `.vercel/` directory + created during manual linking should stay untracked unless the team decides + to commit project metadata. +- Mintlify custom-domain settings are not represented in this repo, so the docs + domain cutover requires account-level verification. +- The repository already has `site/.next` and `site/node_modules` locally, but + they are ignored by `site/.gitignore`; do not treat local build artifacts as + source. diff --git a/rust-rewrite/engine-skeleton.md b/rust-rewrite/engine-skeleton.md new file mode 100644 index 000000000..b83c41ba6 --- /dev/null +++ b/rust-rewrite/engine-skeleton.md @@ -0,0 +1,34 @@ +# Rust Engine Skeleton Notes + +## Layout + +- `Cargo.toml` defines a standalone Cargo workspace. It is not referenced by `pyproject.toml`, `hatch.toml`, or the current Python package build. +- `crates/graph-sitter-engine` is the dependency-free core crate. It exposes a minimal `Engine` plus `debug_info()` metadata API. +- `crates/graph-sitter-py` is a PyO3 placeholder crate. Its default build is a Rust-testable stub that forwards the same metadata API without linking Python. Enabling `pyo3-bindings` exposes a future Python extension module named `graph_sitter_py`. + +## Build Commands + +```sh +cargo fmt --all +cargo test --workspace +``` + +The PyO3 crate intentionally does not enable PyO3 by default so normal `cargo test --workspace` does not depend on a local Python development library. Build tooling can enable the crate feature later when producing a Python extension: + +```sh +cargo build -p graph-sitter-py --features extension-module +``` + +On macOS, local extension smoke tests currently need PyO3 pointed at the active Python interpreter and dynamic lookup linker flags: + +```sh +PYO3_PYTHON="$(uv run python -c 'import sys; print(sys.executable)')" \ +RUSTFLAGS="-C link-arg=-undefined -C link-arg=dynamic_lookup" \ +cargo build --release -p graph-sitter-py --features extension-module +``` + +The current module exports `Engine`, `EngineInfo`, `PythonIndex`, `IndexSummary`, `engine_version`, `debug_info`, `index_python_path`, and `index_python_paths`. A successful smoke import on this repo returned 1127 files, 3117 symbols, and 6414 imports for the compact Python index at that commit. The Python shell integration now uses `index_python_paths` so Rust indexes the exact file list returned by `RepoOperator.iter_files(...)`. The compact index now includes top-level Python classes, functions, simple globals, internal Python `import_resolutions` records for the first import graph slice, plus record-family JSON methods for files, symbols, imports, and import resolutions. + +## Integration Choice + +This skeleton does not alter the Hatch/Cython Python packaging path. The current `hatch.toml` custom hook is disabled by default, so wiring Rust into wheels should be a separate packaging/CI task after the backend facade and import smoke test are defined. diff --git a/rust-rewrite/golden/apache-airflow-2.10.5-rust-compact.json b/rust-rewrite/golden/apache-airflow-2.10.5-rust-compact.json new file mode 100644 index 000000000..9a51701a1 --- /dev/null +++ b/rust-rewrite/golden/apache-airflow-2.10.5-rust-compact.json @@ -0,0 +1,1704 @@ +{ + "graphs": { + "dependencies": { + "count": 79737, + "samples": [ + { + "reference_count": 1, + "source_file": "airflow/__init__.py", + "source_symbol": "airflow/__init__.py:function:__getattr__@4048", + "target_file": "airflow/__init__.py", + "target_symbol": "airflow/__init__.py:global_variable:__lazy_imports@3362" + }, + { + "reference_count": 2, + "source_file": "airflow/__main__.py", + "source_symbol": "airflow/__main__.py:function:configure_internal_api@2850", + "target_file": "airflow/api_internal/internal_api_call.py", + "target_symbol": "airflow/api_internal/internal_api_call.py:class:InternalApiConfig@1737" + }, + { + "reference_count": 1, + "source_file": "airflow/__main__.py", + "source_symbol": "airflow/__main__.py:function:configure_internal_api@2850", + "target_file": "airflow/configuration.py", + "target_symbol": "airflow/configuration.py:class:AirflowConfigParser@6263" + }, + { + "reference_count": 1, + "source_file": "airflow/__main__.py", + "source_symbol": "airflow/__main__.py:function:configure_internal_api@2850", + "target_file": "airflow/exceptions.py", + "target_symbol": "airflow/exceptions.py:class:AirflowException@1246" + }, + { + "reference_count": 1, + "source_file": "airflow/__main__.py", + "source_symbol": "airflow/__main__.py:function:main@1814", + "target_file": "airflow/__main__.py", + "target_symbol": "airflow/__main__.py:function:configure_internal_api@2850" + }, + { + "reference_count": 1, + "source_file": "airflow/__main__.py", + "source_symbol": "airflow/__main__.py:function:main@1814", + "target_file": "airflow/cli/cli_parser.py", + "target_symbol": "airflow/cli/cli_parser.py:function:get_parser@4978" + }, + { + "reference_count": 1, + "source_file": "airflow/__main__.py", + "source_symbol": "airflow/__main__.py:function:main@1814", + "target_file": "airflow/configuration.py", + "target_symbol": "airflow/configuration.py:function:write_default_airflow_configuration_if_needed@88783" + }, + { + "reference_count": 1, + "source_file": "airflow/__main__.py", + "source_symbol": "airflow/__main__.py:function:main@1814", + "target_file": "airflow/configuration.py", + "target_symbol": "airflow/configuration.py:function:write_webserver_configuration_if_needed@93544" + }, + { + "reference_count": 1, + "source_file": "airflow/__main__.py", + "source_symbol": "airflow/__main__.py:function:main@1814", + "target_file": "airflow/configuration.py", + "target_symbol": "airflow/configuration.py:global_variable:conf@102463" + }, + { + "reference_count": 2, + "source_file": "airflow/api/__init__.py", + "source_symbol": "airflow/api/__init__.py:function:load_auth@1058", + "target_file": "airflow/api/__init__.py", + "target_symbol": "airflow/api/__init__.py:global_variable:log@1018" + }, + { + "reference_count": 1, + "source_file": "airflow/api/__init__.py", + "source_symbol": "airflow/api/__init__.py:function:load_auth@1058", + "target_file": "airflow/configuration.py", + "target_symbol": "airflow/configuration.py:global_variable:conf@102463" + }, + { + "reference_count": 1, + "source_file": "airflow/api/__init__.py", + "source_symbol": "airflow/api/__init__.py:function:load_auth@1058", + "target_file": "airflow/exceptions.py", + "target_symbol": "airflow/exceptions.py:class:AirflowConfigException@1913" + }, + { + "reference_count": 1, + "source_file": "airflow/api/__init__.py", + "source_symbol": "airflow/api/__init__.py:function:load_auth@1058", + "target_file": "airflow/exceptions.py", + "target_symbol": "airflow/exceptions.py:class:AirflowException@1246" + }, + { + "reference_count": 1, + "source_file": "airflow/api/auth/backend/basic_auth.py", + "source_symbol": "airflow/api/auth/backend/basic_auth.py:function:auth_current_user@1530", + "target_file": "airflow/providers/fab/auth_manager/api/auth/backend/basic_auth.py", + "target_symbol": "airflow/providers/fab/auth_manager/api/auth/backend/basic_auth.py:function:auth_current_user@1480" + }, + { + "reference_count": 1, + "source_file": "airflow/api/auth/backend/basic_auth.py", + "source_symbol": "airflow/api/auth/backend/basic_auth.py:function:auth_current_user@1530", + "target_file": "airflow/providers/fab/auth_manager/models/__init__.py", + "target_symbol": "airflow/providers/fab/auth_manager/models/__init__.py:class:User@4203" + }, + { + "reference_count": 1, + "source_file": "airflow/api/auth/backend/basic_auth.py", + "source_symbol": "airflow/api/auth/backend/basic_auth.py:function:init_app@1480", + "target_file": "airflow/providers/fab/auth_manager/api/auth/backend/basic_auth.py", + "target_symbol": "airflow/providers/fab/auth_manager/api/auth/backend/basic_auth.py:function:init_app@1416" + }, + { + "reference_count": 1, + "source_file": "airflow/api/auth/backend/basic_auth.py", + "source_symbol": "airflow/api/auth/backend/basic_auth.py:function:requires_authentication@1618", + "target_file": "airflow/providers/fab/auth_manager/api/auth/backend/basic_auth.py", + "target_symbol": "airflow/providers/fab/auth_manager/api/auth/backend/basic_auth.py:function:requires_authentication@2134" + }, + { + "reference_count": 2, + "source_file": "airflow/api/auth/backend/default.py", + "source_symbol": "airflow/api/auth/backend/default.py:function:requires_authentication@1117", + "target_file": "airflow/api/auth/backend/default.py", + "target_symbol": "airflow/api/auth/backend/default.py:global_variable:T@1078" + }, + { + "reference_count": 2, + "source_file": "airflow/api/auth/backend/deny_all.py", + "source_symbol": "airflow/api/auth/backend/deny_all.py:function:requires_authentication@1130", + "target_file": "airflow/api/auth/backend/deny_all.py", + "target_symbol": "airflow/api/auth/backend/deny_all.py:global_variable:T@1091" + }, + { + "reference_count": 6, + "source_file": "airflow/api/auth/backend/kerberos_auth.py", + "source_symbol": "airflow/api/auth/backend/kerberos_auth.py:function:_gssapi_authenticate@3989", + "target_file": "airflow/api/auth/backend/kerberos_auth.py", + "target_symbol": "airflow/api/auth/backend/kerberos_auth.py:class:_KerberosAuth@2887" + } + ], + "sha256": "146b5245ee502672f1f67053e3f22b05497bb46e6974fd84d069b69b6681ef91" + }, + "external_modules": { + "count": 19545, + "samples": [ + { + "alias": null, + "file": "airflow/__init__.py", + "import": "airflow/__init__.py:import::os:@847", + "key": "airflow/__init__.py:import::os:@847:os", + "module": null, + "name": "os", + "range": [ + 847, + 856 + ] + }, + { + "alias": null, + "file": "airflow/__init__.py", + "import": "airflow/__init__.py:import::sys:@857", + "key": "airflow/__init__.py:import::sys:@857:sys", + "module": null, + "name": "sys", + "range": [ + 857, + 867 + ] + }, + { + "alias": null, + "file": "airflow/__init__.py", + "import": "airflow/__init__.py:import::warnings:@868", + "key": "airflow/__init__.py:import::warnings:@868:warnings", + "module": null, + "name": "warnings", + "range": [ + 868, + 883 + ] + }, + { + "alias": null, + "file": "airflow/__init__.py", + "import": "airflow/__init__.py:from_import:typing:TYPE_CHECKING:@884", + "key": "airflow/__init__.py:from_import:typing:TYPE_CHECKING:@884:TYPE_CHECKING", + "module": "typing", + "name": "TYPE_CHECKING", + "range": [ + 884, + 916 + ] + }, + { + "alias": null, + "file": "airflow/__init__.py", + "import": "airflow/__init__.py:from_import:gevent.monkey:patch_all:@1283", + "key": "airflow/__init__.py:from_import:gevent.monkey:patch_all:@1283:patch_all", + "module": "gevent.monkey", + "name": "patch_all", + "range": [ + 1283, + 1318 + ] + }, + { + "alias": null, + "file": "airflow/__init__.py", + "import": "airflow/__init__.py:import::importlib:@5066", + "key": "airflow/__init__.py:import::importlib:@5066:importlib", + "module": null, + "name": "importlib", + "range": [ + 5066, + 5082 + ] + }, + { + "alias": null, + "file": "airflow/__main__.py", + "import": "airflow/__main__.py:import::os:@900", + "key": "airflow/__main__.py:import::os:@900:os", + "module": null, + "name": "os", + "range": [ + 900, + 909 + ] + }, + { + "alias": null, + "file": "airflow/__main__.py", + "import": "airflow/__main__.py:from_import:argparse:Namespace:@910", + "key": "airflow/__main__.py:from_import:argparse:Namespace:@910:Namespace", + "module": "argparse", + "name": "Namespace", + "range": [ + 910, + 940 + ] + }, + { + "alias": null, + "file": "airflow/__main__.py", + "import": "airflow/__main__.py:import::argcomplete:@942", + "key": "airflow/__main__.py:import::argcomplete:@942:argcomplete", + "module": null, + "name": "argcomplete", + "range": [ + 942, + 960 + ] + }, + { + "alias": null, + "file": "airflow/api/__init__.py", + "import": "airflow/api/__init__.py:import::logging:@854", + "key": "airflow/api/__init__.py:import::logging:@854:logging", + "module": null, + "name": "logging", + "range": [ + 854, + 868 + ] + }, + { + "alias": null, + "file": "airflow/api/__init__.py", + "import": "airflow/api/__init__.py:from_import:importlib:import_module:@869", + "key": "airflow/api/__init__.py:from_import:importlib:import_module:@869:import_module", + "module": "importlib", + "name": "import_module", + "range": [ + 869, + 904 + ] + }, + { + "alias": null, + "file": "airflow/api/auth/backend/basic_auth.py", + "import": "airflow/api/auth/backend/basic_auth.py:import::warnings:@948", + "key": "airflow/api/auth/backend/basic_auth.py:import::warnings:@948:warnings", + "module": null, + "name": "warnings", + "range": [ + 948, + 963 + ] + }, + { + "alias": null, + "file": "airflow/api/auth/backend/basic_auth.py", + "import": "airflow/api/auth/backend/basic_auth.py:from_import:typing:Any:@964", + "key": "airflow/api/auth/backend/basic_auth.py:from_import:typing:Any:@964:Any", + "module": "typing", + "name": "Any", + "range": [ + 964, + 1011 + ] + }, + { + "alias": null, + "file": "airflow/api/auth/backend/basic_auth.py", + "import": "airflow/api/auth/backend/basic_auth.py:from_import:typing:Callable:@964", + "key": "airflow/api/auth/backend/basic_auth.py:from_import:typing:Callable:@964:Callable", + "module": "typing", + "name": "Callable", + "range": [ + 964, + 1011 + ] + }, + { + "alias": null, + "file": "airflow/api/auth/backend/basic_auth.py", + "import": "airflow/api/auth/backend/basic_auth.py:from_import:typing:TYPE_CHECKING:@964", + "key": "airflow/api/auth/backend/basic_auth.py:from_import:typing:TYPE_CHECKING:@964:TYPE_CHECKING", + "module": "typing", + "name": "TYPE_CHECKING", + "range": [ + 964, + 1011 + ] + }, + { + "alias": null, + "file": "airflow/api/auth/backend/default.py", + "import": "airflow/api/auth/backend/default.py:from_import:functools:wraps:@886", + "key": "airflow/api/auth/backend/default.py:from_import:functools:wraps:@886:wraps", + "module": "functools", + "name": "wraps", + "range": [ + 886, + 913 + ] + }, + { + "alias": null, + "file": "airflow/api/auth/backend/default.py", + "import": "airflow/api/auth/backend/default.py:from_import:typing:Any:@914", + "key": "airflow/api/auth/backend/default.py:from_import:typing:Any:@914:Any", + "module": "typing", + "name": "Any", + "range": [ + 914, + 961 + ] + }, + { + "alias": null, + "file": "airflow/api/auth/backend/default.py", + "import": "airflow/api/auth/backend/default.py:from_import:typing:Callable:@914", + "key": "airflow/api/auth/backend/default.py:from_import:typing:Callable:@914:Callable", + "module": "typing", + "name": "Callable", + "range": [ + 914, + 961 + ] + }, + { + "alias": null, + "file": "airflow/api/auth/backend/default.py", + "import": "airflow/api/auth/backend/default.py:from_import:typing:TypeVar:@914", + "key": "airflow/api/auth/backend/default.py:from_import:typing:TypeVar:@914:TypeVar", + "module": "typing", + "name": "TypeVar", + "range": [ + 914, + 961 + ] + }, + { + "alias": null, + "file": "airflow/api/auth/backend/default.py", + "import": "airflow/api/auth/backend/default.py:from_import:typing:cast:@914", + "key": "airflow/api/auth/backend/default.py:from_import:typing:cast:@914:cast", + "module": "typing", + "name": "cast", + "range": [ + 914, + 961 + ] + } + ], + "sha256": "c08ebfc9435c207fa8bbcb89717cac4f22de84f4a116e7c86e8293bb257db3ce" + }, + "external_references": { + "count": 79300, + "samples": [ + { + "import": "airflow/__init__.py:import::os:@847", + "name": "os", + "range": [ + 921, + 923 + ], + "source_file": "airflow/__init__.py", + "source_symbol": null + }, + { + "import": "airflow/__init__.py:from_import:gevent.monkey:patch_all:@1283", + "name": "patch_all", + "range": [ + 1324, + 1333 + ], + "source_file": "airflow/__init__.py", + "source_symbol": null + }, + { + "import": "airflow/__init__.py:import::sys:@857", + "name": "sys", + "range": [ + 1340, + 1343 + ], + "source_file": "airflow/__init__.py", + "source_symbol": null + }, + { + "import": "airflow/__init__.py:import::warnings:@868", + "name": "warnings", + "range": [ + 1369, + 1377 + ], + "source_file": "airflow/__init__.py", + "source_symbol": null + }, + { + "import": "airflow/__init__.py:import::os:@847", + "name": "os", + "range": [ + 3196, + 3198 + ], + "source_file": "airflow/__init__.py", + "source_symbol": null + }, + { + "import": "airflow/__init__.py:from_import:typing:TYPE_CHECKING:@884", + "name": "TYPE_CHECKING", + "range": [ + 3702, + 3715 + ], + "source_file": "airflow/__init__.py", + "source_symbol": null + }, + { + "import": "airflow/__init__.py:import::warnings:@868", + "name": "warnings", + "range": [ + 4351, + 4359 + ], + "source_file": "airflow/__init__.py", + "source_symbol": "airflow/__init__.py:function:__getattr__@4048" + }, + { + "import": "airflow/__init__.py:import::sys:@857", + "name": "sys", + "range": [ + 4642, + 4645 + ], + "source_file": "airflow/__init__.py", + "source_symbol": "airflow/__init__.py:function:__getattr__@4048" + }, + { + "import": "airflow/__init__.py:import::warnings:@868", + "name": "warnings", + "range": [ + 4790, + 4798 + ], + "source_file": "airflow/__init__.py", + "source_symbol": "airflow/__init__.py:function:__getattr__@4048" + }, + { + "import": "airflow/__init__.py:import::importlib:@5066", + "name": "importlib", + "range": [ + 5094, + 5103 + ], + "source_file": "airflow/__init__.py", + "source_symbol": "airflow/__init__.py:function:__getattr__@4048" + }, + { + "import": "airflow/__main__.py:import::os:@900", + "name": "os", + "range": [ + 1911, + 1913 + ], + "source_file": "airflow/__main__.py", + "source_symbol": "airflow/__main__.py:function:main@1814" + }, + { + "import": "airflow/__main__.py:import::os:@900", + "name": "os", + "range": [ + 1977, + 1979 + ], + "source_file": "airflow/__main__.py", + "source_symbol": "airflow/__main__.py:function:main@1814" + }, + { + "import": "airflow/__main__.py:import::argcomplete:@942", + "name": "argcomplete", + "range": [ + 2077, + 2088 + ], + "source_file": "airflow/__main__.py", + "source_symbol": "airflow/__main__.py:function:main@1814" + }, + { + "import": "airflow/__main__.py:from_import:argparse:Namespace:@910", + "name": "Namespace", + "range": [ + 2879, + 2888 + ], + "source_file": "airflow/__main__.py", + "source_symbol": "airflow/__main__.py:function:configure_internal_api@2850" + }, + { + "import": "airflow/__main__.py:import::os:@900", + "name": "os", + "range": [ + 3165, + 3167 + ], + "source_file": "airflow/__main__.py", + "source_symbol": "airflow/__main__.py:function:configure_internal_api@2850" + }, + { + "import": "airflow/__main__.py:import::os:@900", + "name": "os", + "range": [ + 3295, + 3297 + ], + "source_file": "airflow/__main__.py", + "source_symbol": "airflow/__main__.py:function:configure_internal_api@2850" + }, + { + "import": "airflow/api/__init__.py:import::logging:@854", + "name": "logging", + "range": [ + 1024, + 1031 + ], + "source_file": "airflow/api/__init__.py", + "source_symbol": "airflow/api/__init__.py:global_variable:log@1018" + }, + { + "import": "airflow/api/__init__.py:from_import:importlib:import_module:@869", + "name": "import_module", + "range": [ + 1376, + 1389 + ], + "source_file": "airflow/api/__init__.py", + "source_symbol": "airflow/api/__init__.py:function:load_auth@1058" + }, + { + "import": "airflow/api/auth/backend/basic_auth.py:from_import:typing:TYPE_CHECKING:@964", + "name": "TYPE_CHECKING", + "range": [ + 1161, + 1174 + ], + "source_file": "airflow/api/auth/backend/basic_auth.py", + "source_symbol": null + }, + { + "import": "airflow/api/auth/backend/basic_auth.py:from_import:typing:Any:@964", + "name": "Any", + "range": [ + 1271, + 1274 + ], + "source_file": "airflow/api/auth/backend/basic_auth.py", + "source_symbol": "airflow/api/auth/backend/basic_auth.py:global_variable:CLIENT_AUTH@1240" + } + ], + "sha256": "687351e542f9927621ca3125adea6c226539e0d157285726d61b517606e558cb" + }, + "files": { + "count": 4789, + "samples": [ + { + "byte_len": 5673, + "has_error": false, + "line_count": 139, + "module_name": "airflow", + "path": "airflow/__init__.py" + }, + { + "byte_len": 4273, + "has_error": false, + "line_count": 90, + "module_name": "airflow.__main__", + "path": "airflow/__main__.py" + }, + { + "byte_len": 0, + "has_error": false, + "line_count": 0, + "module_name": "airflow._vendor", + "path": "airflow/_vendor/__init__.py" + }, + { + "byte_len": 1678, + "has_error": false, + "line_count": 48, + "module_name": "airflow.api", + "path": "airflow/api/__init__.py" + }, + { + "byte_len": 787, + "has_error": false, + "line_count": 17, + "module_name": "airflow.api.auth", + "path": "airflow/api/auth/__init__.py" + }, + { + "byte_len": 787, + "has_error": false, + "line_count": 17, + "module_name": "airflow.api.auth.backend", + "path": "airflow/api/auth/backend/__init__.py" + }, + { + "byte_len": 1723, + "has_error": false, + "line_count": 52, + "module_name": "airflow.api.auth.backend.basic_auth", + "path": "airflow/api/auth/backend/basic_auth.py" + }, + { + "byte_len": 1343, + "has_error": false, + "line_count": 42, + "module_name": "airflow.api.auth.backend.default", + "path": "airflow/api/auth/backend/default.py" + }, + { + "byte_len": 1357, + "has_error": false, + "line_count": 44, + "module_name": "airflow.api.auth.backend.deny_all", + "path": "airflow/api/auth/backend/deny_all.py" + }, + { + "byte_len": 6542, + "has_error": false, + "line_count": 182, + "module_name": "airflow.api.auth.backend.kerberos_auth", + "path": "airflow/api/auth/backend/kerberos_auth.py" + }, + { + "byte_len": 1773, + "has_error": false, + "line_count": 55, + "module_name": "airflow.api.auth.backend.session", + "path": "airflow/api/auth/backend/session.py" + }, + { + "byte_len": 1730, + "has_error": false, + "line_count": 46, + "module_name": "airflow.api.client", + "path": "airflow/api/client/__init__.py" + }, + { + "byte_len": 2731, + "has_error": false, + "line_count": 94, + "module_name": "airflow.api.client.api_client", + "path": "airflow/api/client/api_client.py" + }, + { + "byte_len": 6616, + "has_error": false, + "line_count": 164, + "module_name": "airflow.api.client.json_client", + "path": "airflow/api/client/json_client.py" + }, + { + "byte_len": 3788, + "has_error": false, + "line_count": 93, + "module_name": "airflow.api.client.local_client", + "path": "airflow/api/client/local_client.py" + }, + { + "byte_len": 787, + "has_error": false, + "line_count": 17, + "module_name": "airflow.api.common", + "path": "airflow/api/common/__init__.py" + }, + { + "byte_len": 3268, + "has_error": false, + "line_count": 89, + "module_name": "airflow.api.common.airflow_health", + "path": "airflow/api/common/airflow_health.py" + }, + { + "byte_len": 4086, + "has_error": false, + "line_count": 108, + "module_name": "airflow.api.common.delete_dag", + "path": "airflow/api/common/delete_dag.py" + }, + { + "byte_len": 2138, + "has_error": false, + "line_count": 56, + "module_name": "airflow.api.common.experimental", + "path": "airflow/api/common/experimental/__init__.py" + }, + { + "byte_len": 1135, + "has_error": false, + "line_count": 30, + "module_name": "airflow.api.common.experimental.delete_dag", + "path": "airflow/api/common/experimental/delete_dag.py" + } + ], + "sha256": "226e8cb32dc0a23ec956e97b036e7c505037df979cce7182514f39a43b07cb80" + }, + "import_resolutions": { + "count": 20887, + "samples": [ + { + "import": "airflow/__init__.py:from_import:airflow.models.dag:DAG:@3912", + "source_file": "airflow/__init__.py", + "target_file": "airflow/models/dag.py", + "target_symbol": "airflow/models/dag.py:class:DAG@13203" + }, + { + "import": "airflow/__init__.py:from_import:airflow.models.dataset:Dataset:@3951", + "source_file": "airflow/__init__.py", + "target_file": "airflow/models/dataset.py", + "target_symbol": "airflow/datasets/__init__.py:class:Dataset@7742" + }, + { + "import": "airflow/__init__.py:from_import:airflow.models.xcom_arg:XComArg:@3998", + "source_file": "airflow/__init__.py", + "target_file": "airflow/models/xcom_arg.py", + "target_symbol": "airflow/models/xcom_arg.py:class:XComArg@2341" + }, + { + "import": "airflow/__init__.py:from_import:airflow.providers_manager:ProvidersManager:@5336", + "source_file": "airflow/__init__.py", + "target_file": "airflow/providers_manager.py", + "target_symbol": "airflow/providers_manager.py:class:ProvidersManager@13734" + }, + { + "import": "airflow/__init__.py:from_import:airflow:configuration:@2460", + "source_file": "airflow/__init__.py", + "target_file": "airflow/configuration.py", + "target_symbol": null + }, + { + "import": "airflow/__init__.py:from_import:airflow:plugins_manager:@5592", + "source_file": "airflow/__init__.py", + "target_file": "airflow/plugins_manager.py", + "target_symbol": null + }, + { + "import": "airflow/__init__.py:from_import:airflow:settings:@2460", + "source_file": "airflow/__init__.py", + "target_file": "airflow/settings.py", + "target_symbol": null + }, + { + "import": "airflow/__main__.py:from_import:airflow.api_internal.internal_api_call:InternalApiConfig:@3437", + "source_file": "airflow/__main__.py", + "target_file": "airflow/api_internal/internal_api_call.py", + "target_symbol": "airflow/api_internal/internal_api_call.py:class:InternalApiConfig@1737" + }, + { + "import": "airflow/__main__.py:from_import:airflow.api_internal.internal_api_call:InternalApiConfig:@4092", + "source_file": "airflow/__main__.py", + "target_file": "airflow/api_internal/internal_api_call.py", + "target_symbol": "airflow/api_internal/internal_api_call.py:class:InternalApiConfig@1737" + }, + { + "import": "airflow/__main__.py:from_import:airflow.cli:cli_parser:@1630", + "source_file": "airflow/__main__.py", + "target_file": "airflow/cli/cli_parser.py", + "target_symbol": null + }, + { + "import": "airflow/__main__.py:from_import:airflow.configuration:AirflowConfigParser:@1665", + "source_file": "airflow/__main__.py", + "target_file": "airflow/configuration.py", + "target_symbol": "airflow/configuration.py:class:AirflowConfigParser@6263" + }, + { + "import": "airflow/__main__.py:from_import:airflow.configuration:write_default_airflow_configuration_if_needed:@2511", + "source_file": "airflow/__main__.py", + "target_file": "airflow/configuration.py", + "target_symbol": "airflow/configuration.py:function:write_default_airflow_configuration_if_needed@88783" + }, + { + "import": "airflow/__main__.py:from_import:airflow.configuration:write_webserver_configuration_if_needed:@1665", + "source_file": "airflow/__main__.py", + "target_file": "airflow/configuration.py", + "target_symbol": "airflow/configuration.py:function:write_webserver_configuration_if_needed@93544" + }, + { + "import": "airflow/__main__.py:from_import:airflow.exceptions:AirflowException:@1760", + "source_file": "airflow/__main__.py", + "target_file": "airflow/exceptions.py", + "target_symbol": "airflow/exceptions.py:class:AirflowException@1246" + }, + { + "import": "airflow/__main__.py:from_import:airflow:configuration:@1596", + "source_file": "airflow/__main__.py", + "target_file": "airflow/configuration.py", + "target_symbol": null + }, + { + "import": "airflow/api/__init__.py:from_import:airflow.configuration:conf:@906", + "source_file": "airflow/api/__init__.py", + "target_file": "airflow/configuration.py", + "target_symbol": "airflow/configuration.py:global_variable:conf@102463" + }, + { + "import": "airflow/api/__init__.py:from_import:airflow.exceptions:AirflowConfigException:@945", + "source_file": "airflow/api/__init__.py", + "target_file": "airflow/exceptions.py", + "target_symbol": "airflow/exceptions.py:class:AirflowConfigException@1913" + }, + { + "import": "airflow/api/__init__.py:from_import:airflow.exceptions:AirflowException:@945", + "source_file": "airflow/api/__init__.py", + "target_file": "airflow/exceptions.py", + "target_symbol": "airflow/exceptions.py:class:AirflowException@1246" + }, + { + "import": "airflow/api/auth/backend/basic_auth.py:from_import:airflow.exceptions:RemovedInAirflow3Warning:@1101", + "source_file": "airflow/api/auth/backend/basic_auth.py", + "target_file": "airflow/exceptions.py", + "target_symbol": "airflow/exceptions.py:class:RemovedInAirflow3Warning@16092" + }, + { + "import": "airflow/api/auth/backend/basic_auth.py:from_import:airflow.providers.fab.auth_manager.models:User:@1180", + "source_file": "airflow/api/auth/backend/basic_auth.py", + "target_file": "airflow/providers/fab/auth_manager/models/__init__.py", + "target_symbol": "airflow/providers/fab/auth_manager/models/__init__.py:class:User@4203" + } + ], + "sha256": "89be1cc5471796bf624202e72af9b4d92326676ea64b2aeedd71f5382fe2ff1b" + }, + "imports": { + "count": 44121, + "samples": [ + { + "alias": null, + "file": "airflow/__init__.py", + "key": "airflow/__init__.py:future_import:__future__:annotations:@787", + "kind": "future_import", + "module": "__future__", + "name": "annotations", + "range": [ + 787, + 821 + ] + }, + { + "alias": null, + "file": "airflow/__init__.py", + "key": "airflow/__init__.py:import::os:@847", + "kind": "import", + "module": null, + "name": "os", + "range": [ + 847, + 856 + ] + }, + { + "alias": null, + "file": "airflow/__init__.py", + "key": "airflow/__init__.py:import::sys:@857", + "kind": "import", + "module": null, + "name": "sys", + "range": [ + 857, + 867 + ] + }, + { + "alias": null, + "file": "airflow/__init__.py", + "key": "airflow/__init__.py:import::warnings:@868", + "kind": "import", + "module": null, + "name": "warnings", + "range": [ + 868, + 883 + ] + }, + { + "alias": null, + "file": "airflow/__init__.py", + "key": "airflow/__init__.py:from_import:typing:TYPE_CHECKING:@884", + "kind": "from_import", + "module": "typing", + "name": "TYPE_CHECKING", + "range": [ + 884, + 916 + ] + }, + { + "alias": null, + "file": "airflow/__init__.py", + "key": "airflow/__init__.py:from_import:gevent.monkey:patch_all:@1283", + "kind": "from_import", + "module": "gevent.monkey", + "name": "patch_all", + "range": [ + 1283, + 1318 + ] + }, + { + "alias": null, + "file": "airflow/__init__.py", + "key": "airflow/__init__.py:from_import:airflow:configuration:@2460", + "kind": "from_import", + "module": "airflow", + "name": "configuration", + "range": [ + 2460, + 2503 + ] + }, + { + "alias": null, + "file": "airflow/__init__.py", + "key": "airflow/__init__.py:from_import:airflow:settings:@2460", + "kind": "from_import", + "module": "airflow", + "name": "settings", + "range": [ + 2460, + 2503 + ] + }, + { + "alias": null, + "file": "airflow/__init__.py", + "key": "airflow/__init__.py:from_import:airflow.models.dag:DAG:@3912", + "kind": "from_import", + "module": "airflow.models.dag", + "name": "DAG", + "range": [ + 3912, + 3946 + ] + }, + { + "alias": null, + "file": "airflow/__init__.py", + "key": "airflow/__init__.py:from_import:airflow.models.dataset:Dataset:@3951", + "kind": "from_import", + "module": "airflow.models.dataset", + "name": "Dataset", + "range": [ + 3951, + 3993 + ] + }, + { + "alias": null, + "file": "airflow/__init__.py", + "key": "airflow/__init__.py:from_import:airflow.models.xcom_arg:XComArg:@3998", + "kind": "from_import", + "module": "airflow.models.xcom_arg", + "name": "XComArg", + "range": [ + 3998, + 4041 + ] + }, + { + "alias": null, + "file": "airflow/__init__.py", + "key": "airflow/__init__.py:import::importlib:@5066", + "kind": "import", + "module": null, + "name": "importlib", + "range": [ + 5066, + 5082 + ] + }, + { + "alias": null, + "file": "airflow/__init__.py", + "key": "airflow/__init__.py:from_import:airflow.providers_manager:ProvidersManager:@5336", + "kind": "from_import", + "module": "airflow.providers_manager", + "name": "ProvidersManager", + "range": [ + 5336, + 5390 + ] + }, + { + "alias": null, + "file": "airflow/__init__.py", + "key": "airflow/__init__.py:from_import:airflow:plugins_manager:@5592", + "kind": "from_import", + "module": "airflow", + "name": "plugins_manager", + "range": [ + 5592, + 5627 + ] + }, + { + "alias": null, + "file": "airflow/__main__.py", + "key": "airflow/__main__.py:future_import:__future__:annotations:@864", + "kind": "future_import", + "module": "__future__", + "name": "annotations", + "range": [ + 864, + 898 + ] + }, + { + "alias": null, + "file": "airflow/__main__.py", + "key": "airflow/__main__.py:import::os:@900", + "kind": "import", + "module": null, + "name": "os", + "range": [ + 900, + 909 + ] + }, + { + "alias": null, + "file": "airflow/__main__.py", + "key": "airflow/__main__.py:from_import:argparse:Namespace:@910", + "kind": "from_import", + "module": "argparse", + "name": "Namespace", + "range": [ + 910, + 940 + ] + }, + { + "alias": null, + "file": "airflow/__main__.py", + "key": "airflow/__main__.py:import::argcomplete:@942", + "kind": "import", + "module": null, + "name": "argcomplete", + "range": [ + 942, + 960 + ] + }, + { + "alias": null, + "file": "airflow/__main__.py", + "key": "airflow/__main__.py:from_import:airflow:configuration:@1596", + "kind": "from_import", + "module": "airflow", + "name": "configuration", + "range": [ + 1596, + 1629 + ] + }, + { + "alias": null, + "file": "airflow/__main__.py", + "key": "airflow/__main__.py:from_import:airflow.cli:cli_parser:@1630", + "kind": "from_import", + "module": "airflow.cli", + "name": "cli_parser", + "range": [ + 1630, + 1664 + ] + } + ], + "sha256": "42b2f4b1ea694fe76c91e63623d6ea8a5e59fbfa194d18e8a8c32832cf767f1b" + }, + "references": { + "count": 120770, + "samples": [ + { + "import": "airflow/__init__.py:from_import:airflow:settings:@2460", + "name": "initialize", + "range": [ + 3255, + 3265 + ], + "source_file": "airflow/__init__.py", + "source_symbol": null, + "target_symbol": "airflow/settings.py:function:initialize@29763" + }, + { + "import": null, + "name": "__lazy_imports", + "range": [ + 4169, + 4183 + ], + "source_file": "airflow/__init__.py", + "source_symbol": "airflow/__init__.py:function:__getattr__@4048", + "target_symbol": "airflow/__init__.py:global_variable:__lazy_imports@3362" + }, + { + "import": "airflow/__init__.py:from_import:airflow:settings:@2460", + "name": "LAZY_LOAD_PROVIDERS", + "range": [ + 5311, + 5330 + ], + "source_file": "airflow/__init__.py", + "source_symbol": null, + "target_symbol": "airflow/settings.py:global_variable:LAZY_LOAD_PROVIDERS@32556" + }, + { + "import": "airflow/__init__.py:from_import:airflow.providers_manager:ProvidersManager:@5336", + "name": "ProvidersManager", + "range": [ + 5406, + 5422 + ], + "source_file": "airflow/__init__.py", + "source_symbol": null, + "target_symbol": "airflow/providers_manager.py:class:ProvidersManager@13734" + }, + { + "import": "airflow/__init__.py:from_import:airflow:settings:@2460", + "name": "LAZY_LOAD_PLUGINS", + "range": [ + 5569, + 5586 + ], + "source_file": "airflow/__init__.py", + "source_symbol": null, + "target_symbol": "airflow/settings.py:global_variable:LAZY_LOAD_PLUGINS@32249" + }, + { + "import": "airflow/__init__.py:from_import:airflow:plugins_manager:@5592", + "name": "ensure_plugins_loaded", + "range": [ + 5649, + 5670 + ], + "source_file": "airflow/__init__.py", + "source_symbol": null, + "target_symbol": "airflow/plugins_manager.py:function:ensure_plugins_loaded@11525" + }, + { + "import": "airflow/__main__.py:from_import:airflow:configuration:@1596", + "name": "conf", + "range": [ + 1847, + 1851 + ], + "source_file": "airflow/__main__.py", + "source_symbol": "airflow/__main__.py:function:main@1814", + "target_symbol": "airflow/configuration.py:global_variable:conf@102463" + }, + { + "import": "airflow/__main__.py:from_import:airflow.cli:cli_parser:@1630", + "name": "get_parser", + "range": [ + 2060, + 2070 + ], + "source_file": "airflow/__main__.py", + "source_symbol": "airflow/__main__.py:function:main@1814", + "target_symbol": "airflow/cli/cli_parser.py:function:get_parser@4978" + }, + { + "import": "airflow/__main__.py:from_import:airflow.configuration:write_default_airflow_configuration_if_needed:@2511", + "name": "write_default_airflow_configuration_if_needed", + "range": [ + 2607, + 2652 + ], + "source_file": "airflow/__main__.py", + "source_symbol": "airflow/__main__.py:function:main@1814", + "target_symbol": "airflow/configuration.py:function:write_default_airflow_configuration_if_needed@88783" + }, + { + "import": "airflow/__main__.py:from_import:airflow.configuration:write_webserver_configuration_if_needed:@1665", + "name": "write_webserver_configuration_if_needed", + "range": [ + 2738, + 2777 + ], + "source_file": "airflow/__main__.py", + "source_symbol": "airflow/__main__.py:function:main@1814", + "target_symbol": "airflow/configuration.py:function:write_webserver_configuration_if_needed@93544" + }, + { + "import": null, + "name": "configure_internal_api", + "range": [ + 2788, + 2810 + ], + "source_file": "airflow/__main__.py", + "source_symbol": "airflow/__main__.py:function:main@1814", + "target_symbol": "airflow/__main__.py:function:configure_internal_api@2850" + }, + { + "import": "airflow/__main__.py:from_import:airflow.configuration:AirflowConfigParser:@1665", + "name": "AirflowConfigParser", + "range": [ + 2896, + 2915 + ], + "source_file": "airflow/__main__.py", + "source_symbol": "airflow/__main__.py:function:configure_internal_api@2850", + "target_symbol": "airflow/configuration.py:class:AirflowConfigParser@6263" + }, + { + "import": "airflow/__main__.py:from_import:airflow.api_internal.internal_api_call:InternalApiConfig:@4092", + "name": "InternalApiConfig", + "range": [ + 3519, + 3536 + ], + "source_file": "airflow/__main__.py", + "source_symbol": "airflow/__main__.py:function:configure_internal_api@2850", + "target_symbol": "airflow/api_internal/internal_api_call.py:class:InternalApiConfig@1737" + }, + { + "import": "airflow/__main__.py:from_import:airflow.exceptions:AirflowException:@1760", + "name": "AirflowException", + "range": [ + 3863, + 3879 + ], + "source_file": "airflow/__main__.py", + "source_symbol": "airflow/__main__.py:function:configure_internal_api@2850", + "target_symbol": "airflow/exceptions.py:class:AirflowException@1246" + }, + { + "import": "airflow/__main__.py:from_import:airflow.api_internal.internal_api_call:InternalApiConfig:@4092", + "name": "InternalApiConfig", + "range": [ + 4174, + 4191 + ], + "source_file": "airflow/__main__.py", + "source_symbol": "airflow/__main__.py:function:configure_internal_api@2850", + "target_symbol": "airflow/api_internal/internal_api_call.py:class:InternalApiConfig@1737" + }, + { + "import": null, + "name": "main", + "range": [ + 4266, + 4270 + ], + "source_file": "airflow/__main__.py", + "source_symbol": null, + "target_symbol": "airflow/__main__.py:function:main@1814" + }, + { + "import": "airflow/api/__init__.py:from_import:airflow.configuration:conf:@906", + "name": "conf", + "range": [ + 1199, + 1203 + ], + "source_file": "airflow/api/__init__.py", + "source_symbol": "airflow/api/__init__.py:function:load_auth@1058", + "target_symbol": "airflow/configuration.py:global_variable:conf@102463" + }, + { + "import": "airflow/api/__init__.py:from_import:airflow.exceptions:AirflowConfigException:@945", + "name": "AirflowConfigException", + "range": [ + 1243, + 1265 + ], + "source_file": "airflow/api/__init__.py", + "source_symbol": "airflow/api/__init__.py:function:load_auth@1058", + "target_symbol": "airflow/exceptions.py:class:AirflowConfigException@1913" + }, + { + "import": null, + "name": "log", + "range": [ + 1419, + 1422 + ], + "source_file": "airflow/api/__init__.py", + "source_symbol": "airflow/api/__init__.py:function:load_auth@1058", + "target_symbol": "airflow/api/__init__.py:global_variable:log@1018" + }, + { + "import": null, + "name": "log", + "range": [ + 1541, + 1544 + ], + "source_file": "airflow/api/__init__.py", + "source_symbol": "airflow/api/__init__.py:function:load_auth@1058", + "target_symbol": "airflow/api/__init__.py:global_variable:log@1018" + } + ], + "sha256": "418d213614f67c09d7bee969779bdd1fd66ffa26d46c481b34d5207b119fb485" + }, + "symbols": { + "count": 52339, + "samples": [ + { + "file": "airflow/__init__.py", + "is_top_level": true, + "key": "airflow/__init__.py:function:__getattr__@4048", + "kind": "function", + "name": "__getattr__", + "name_range": [ + 4048, + 4059 + ], + "parent_symbol": null, + "range": [ + 4044, + 5292 + ] + }, + { + "file": "airflow/__init__.py", + "is_top_level": true, + "key": "airflow/__init__.py:global_variable:__all__@2505", + "kind": "global_variable", + "name": "__all__", + "name_range": [ + 2505, + 2512 + ], + "parent_symbol": null, + "range": [ + 2505, + 2578 + ] + }, + { + "file": "airflow/__init__.py", + "is_top_level": true, + "key": "airflow/__init__.py:global_variable:__lazy_imports@3362", + "kind": "global_variable", + "name": "__lazy_imports", + "name_range": [ + 3362, + 3376 + ], + "parent_symbol": null, + "range": [ + 3362, + 3698 + ] + }, + { + "file": "airflow/__init__.py", + "is_top_level": true, + "key": "airflow/__init__.py:global_variable:__path__@2728", + "kind": "global_variable", + "name": "__path__", + "name_range": [ + 2728, + 2736 + ], + "parent_symbol": null, + "range": [ + 2728, + 2792 + ] + }, + { + "file": "airflow/__init__.py", + "is_top_level": true, + "key": "airflow/__init__.py:global_variable:__version__@823", + "kind": "global_variable", + "name": "__version__", + "name_range": [ + 823, + 834 + ], + "parent_symbol": null, + "range": [ + 823, + 845 + ] + }, + { + "file": "airflow/__main__.py", + "is_top_level": true, + "key": "airflow/__main__.py:function:configure_internal_api@2850", + "kind": "function", + "name": "configure_internal_api", + "name_range": [ + 2850, + 2872 + ], + "parent_symbol": null, + "range": [ + 2846, + 4232 + ] + }, + { + "file": "airflow/__main__.py", + "is_top_level": true, + "key": "airflow/__main__.py:function:main@1814", + "kind": "function", + "name": "main", + "name_range": [ + 1814, + 1818 + ], + "parent_symbol": null, + "range": [ + 1810, + 2843 + ] + }, + { + "file": "airflow/api/__init__.py", + "is_top_level": true, + "key": "airflow/api/__init__.py:function:load_auth@1058", + "kind": "function", + "name": "load_auth", + "name_range": [ + 1058, + 1067 + ], + "parent_symbol": null, + "range": [ + 1054, + 1677 + ] + }, + { + "file": "airflow/api/__init__.py", + "is_top_level": true, + "key": "airflow/api/__init__.py:global_variable:log@1018", + "kind": "global_variable", + "name": "log", + "name_range": [ + 1018, + 1021 + ], + "parent_symbol": null, + "range": [ + 1018, + 1051 + ] + }, + { + "file": "airflow/api/auth/backend/basic_auth.py", + "is_top_level": true, + "key": "airflow/api/auth/backend/basic_auth.py:function:auth_current_user@1530", + "kind": "function", + "name": "auth_current_user", + "name_range": [ + 1530, + 1547 + ], + "parent_symbol": null, + "range": [ + 1526, + 1611 + ] + }, + { + "file": "airflow/api/auth/backend/basic_auth.py", + "is_top_level": true, + "key": "airflow/api/auth/backend/basic_auth.py:function:init_app@1480", + "kind": "function", + "name": "init_app", + "name_range": [ + 1480, + 1488 + ], + "parent_symbol": null, + "range": [ + 1476, + 1523 + ] + }, + { + "file": "airflow/api/auth/backend/basic_auth.py", + "is_top_level": true, + "key": "airflow/api/auth/backend/basic_auth.py:function:requires_authentication@1618", + "kind": "function", + "name": "requires_authentication", + "name_range": [ + 1618, + 1641 + ], + "parent_symbol": null, + "range": [ + 1614, + 1722 + ] + }, + { + "file": "airflow/api/auth/backend/basic_auth.py", + "is_top_level": true, + "key": "airflow/api/auth/backend/basic_auth.py:global_variable:CLIENT_AUTH@1240", + "kind": "global_variable", + "name": "CLIENT_AUTH", + "name_range": [ + 1240, + 1251 + ], + "parent_symbol": null, + "range": [ + 1240, + 1288 + ] + }, + { + "file": "airflow/api/auth/backend/default.py", + "is_top_level": false, + "key": "airflow/api/auth/backend/default.py:function:decorated@1243", + "kind": "function", + "name": "decorated", + "name_range": [ + 1243, + 1252 + ], + "parent_symbol": "airflow/api/auth/backend/default.py:function:requires_authentication@1117", + "range": [ + 1218, + 1311 + ] + }, + { + "file": "airflow/api/auth/backend/default.py", + "is_top_level": true, + "key": "airflow/api/auth/backend/default.py:function:init_app@1018", + "kind": "function", + "name": "init_app", + "name_range": [ + 1018, + 1026 + ], + "parent_symbol": null, + "range": [ + 1014, + 1075 + ] + }, + { + "file": "airflow/api/auth/backend/default.py", + "is_top_level": true, + "key": "airflow/api/auth/backend/default.py:function:requires_authentication@1117", + "kind": "function", + "name": "requires_authentication", + "name_range": [ + 1117, + 1140 + ], + "parent_symbol": null, + "range": [ + 1113, + 1342 + ] + }, + { + "file": "airflow/api/auth/backend/default.py", + "is_top_level": true, + "key": "airflow/api/auth/backend/default.py:global_variable:CLIENT_AUTH@963", + "kind": "global_variable", + "name": "CLIENT_AUTH", + "name_range": [ + 963, + 974 + ], + "parent_symbol": null, + "range": [ + 963, + 1011 + ] + }, + { + "file": "airflow/api/auth/backend/default.py", + "is_top_level": true, + "key": "airflow/api/auth/backend/default.py:global_variable:T@1078", + "kind": "global_variable", + "name": "T", + "name_range": [ + 1078, + 1079 + ], + "parent_symbol": null, + "range": [ + 1078, + 1110 + ] + }, + { + "file": "airflow/api/auth/backend/deny_all.py", + "is_top_level": false, + "key": "airflow/api/auth/backend/deny_all.py:function:decorated@1256", + "kind": "function", + "name": "decorated", + "name_range": [ + 1256, + 1265 + ], + "parent_symbol": "airflow/api/auth/backend/deny_all.py:function:requires_authentication@1130", + "range": [ + 1231, + 1325 + ] + }, + { + "file": "airflow/api/auth/backend/deny_all.py", + "is_top_level": true, + "key": "airflow/api/auth/backend/deny_all.py:function:init_app@1039", + "kind": "function", + "name": "init_app", + "name_range": [ + 1039, + 1047 + ], + "parent_symbol": null, + "range": [ + 1035, + 1088 + ] + } + ], + "sha256": "d4b75c9c6d82b1d30424845c86b88c9fb18ca7748fc088c16b4cfca00de30699" + } + }, + "integrity": { + "bad_dependency_reference_counts": 0, + "bad_dependency_reference_targets": 0, + "missing_dependency_links": 0, + "missing_external_module_links": 0, + "missing_external_reference_links": 0, + "missing_import_resolution_links": 0, + "missing_reference_links": 0 + }, + "metadata": { + "commit": "b93c3db6b1641b0840bd15ac7d05bc58ff2cccbf", + "name": "apache-airflow-2.10.5", + "ref": "refs/tags/2.10.5", + "repo_url": "https://github.com/apache/airflow.git" + }, + "schema_version": 3, + "summary": { + "bytes": 36617627, + "classes": 5665, + "dependencies": 79737, + "external_modules": 19545, + "external_references": 79300, + "files": 4789, + "files_with_errors": 0, + "functions": 34535, + "global_variables": 12139, + "import_resolutions": 20887, + "imports": 44121, + "lines": 924514, + "references": 120770, + "symbols": 52339 + } +} diff --git a/rust-rewrite/golden/next.js-v15.0.0-rust-compact-typescript.json b/rust-rewrite/golden/next.js-v15.0.0-rust-compact-typescript.json new file mode 100644 index 000000000..eb7c1aa35 --- /dev/null +++ b/rust-rewrite/golden/next.js-v15.0.0-rust-compact-typescript.json @@ -0,0 +1,3009 @@ +{ + "graphs": { + "dependencies": { + "count": 49287, + "samples": [ + { + "reference_count": 2, + "references": [ + ".github/actions/needs-triage/src/index.ts:.github/actions/needs-triage/src/index.ts:function:run@601->.github/actions/needs-triage/src/index.ts:function:assertNotNullable@423::assertNotNullable@706", + ".github/actions/needs-triage/src/index.ts:.github/actions/needs-triage/src/index.ts:function:run@601->.github/actions/needs-triage/src/index.ts:function:assertNotNullable@423::assertNotNullable@735" + ], + "source_file": ".github/actions/needs-triage/src/index.ts", + "source_symbol": ".github/actions/needs-triage/src/index.ts:function:run@601", + "target_file": ".github/actions/needs-triage/src/index.ts", + "target_symbol": ".github/actions/needs-triage/src/index.ts:function:assertNotNullable@423" + }, + { + "reference_count": 1, + "references": [ + ".github/actions/needs-triage/src/index.ts:.github/actions/needs-triage/src/index.ts:function:run@601->.github/actions/needs-triage/src/index.ts:global_variable:LABELS@87::LABELS@1218" + ], + "source_file": ".github/actions/needs-triage/src/index.ts", + "source_symbol": ".github/actions/needs-triage/src/index.ts:function:run@601", + "target_file": ".github/actions/needs-triage/src/index.ts", + "target_symbol": ".github/actions/needs-triage/src/index.ts:global_variable:LABELS@87" + }, + { + "reference_count": 1, + "references": [ + ".github/actions/needs-triage/src/index.ts:.github/actions/needs-triage/src/index.ts:function:run@601->.github/actions/needs-triage/src/index.ts:global_variable:labelsRequireUserInput@301::labelsRequireUserInput@1027" + ], + "source_file": ".github/actions/needs-triage/src/index.ts", + "source_symbol": ".github/actions/needs-triage/src/index.ts:function:run@601", + "target_file": ".github/actions/needs-triage/src/index.ts", + "target_symbol": ".github/actions/needs-triage/src/index.ts:global_variable:labelsRequireUserInput@301" + }, + { + "reference_count": 3, + "references": [ + ".github/actions/needs-triage/src/index.ts:.github/actions/needs-triage/src/index.ts:global_variable:labelsRequireUserInput@301->.github/actions/needs-triage/src/index.ts:global_variable:LABELS@87::LABELS@330", + ".github/actions/needs-triage/src/index.ts:.github/actions/needs-triage/src/index.ts:global_variable:labelsRequireUserInput@301->.github/actions/needs-triage/src/index.ts:global_variable:LABELS@87::LABELS@354", + ".github/actions/needs-triage/src/index.ts:.github/actions/needs-triage/src/index.ts:global_variable:labelsRequireUserInput@301->.github/actions/needs-triage/src/index.ts:global_variable:LABELS@87::LABELS@381" + ], + "source_file": ".github/actions/needs-triage/src/index.ts", + "source_symbol": ".github/actions/needs-triage/src/index.ts:global_variable:labelsRequireUserInput@301", + "target_file": ".github/actions/needs-triage/src/index.ts", + "target_symbol": ".github/actions/needs-triage/src/index.ts:global_variable:LABELS@87" + }, + { + "reference_count": 1, + "references": [ + ".github/actions/next-integration-stat/src/index.ts:.github/actions/next-integration-stat/src/index.ts:function:createCommentPostAsync@22107->.github/actions/next-integration-stat/src/index.ts:global_variable:result@22409::result@22574" + ], + "source_file": ".github/actions/next-integration-stat/src/index.ts", + "source_symbol": ".github/actions/next-integration-stat/src/index.ts:function:createCommentPostAsync@22107", + "target_file": ".github/actions/next-integration-stat/src/index.ts", + "target_symbol": ".github/actions/next-integration-stat/src/index.ts:global_variable:result@22409" + }, + { + "reference_count": 1, + "references": [ + ".github/actions/next-integration-stat/src/index.ts:.github/actions/next-integration-stat/src/index.ts:function:createCommentPostAsync@22107->.github/actions/next-integration-stat/src/index.ts:type_alias:Octokit@284::Octokit@22144" + ], + "source_file": ".github/actions/next-integration-stat/src/index.ts", + "source_symbol": ".github/actions/next-integration-stat/src/index.ts:function:createCommentPostAsync@22107", + "target_file": ".github/actions/next-integration-stat/src/index.ts", + "target_symbol": ".github/actions/next-integration-stat/src/index.ts:type_alias:Octokit@284" + }, + { + "reference_count": 1, + "references": [ + ".github/actions/next-integration-stat/src/index.ts:.github/actions/next-integration-stat/src/index.ts:function:createFormattedComment@21764->.github/actions/next-integration-stat/src/index.ts:global_variable:BOT_COMMENT_MARKER@513::BOT_COMMENT_MARKER@21904" + ], + "source_file": ".github/actions/next-integration-stat/src/index.ts", + "source_symbol": ".github/actions/next-integration-stat/src/index.ts:function:createFormattedComment@21764", + "target_file": ".github/actions/next-integration-stat/src/index.ts", + "target_symbol": ".github/actions/next-integration-stat/src/index.ts:global_variable:BOT_COMMENT_MARKER@513" + }, + { + "reference_count": 1, + "references": [ + ".github/actions/next-integration-stat/src/index.ts:.github/actions/next-integration-stat/src/index.ts:function:createFormattedComment@21764->.github/actions/next-integration-stat/src/index.ts:global_variable:commentTitlePre@630::commentTitlePre@21885" + ], + "source_file": ".github/actions/next-integration-stat/src/index.ts", + "source_symbol": ".github/actions/next-integration-stat/src/index.ts:function:createFormattedComment@21764", + "target_file": ".github/actions/next-integration-stat/src/index.ts", + "target_symbol": ".github/actions/next-integration-stat/src/index.ts:global_variable:commentTitlePre@630" + }, + { + "reference_count": 2, + "references": [ + ".github/actions/next-integration-stat/src/index.ts:.github/actions/next-integration-stat/src/index.ts:function:fetchJobLogsFromWorkflow@2452->.github/actions/next-integration-stat/src/index.ts:type_alias:Job@330::Job@2522", + ".github/actions/next-integration-stat/src/index.ts:.github/actions/next-integration-stat/src/index.ts:function:fetchJobLogsFromWorkflow@2452->.github/actions/next-integration-stat/src/index.ts:type_alias:Job@330::Job@2558" + ], + "source_file": ".github/actions/next-integration-stat/src/index.ts", + "source_symbol": ".github/actions/next-integration-stat/src/index.ts:function:fetchJobLogsFromWorkflow@2452", + "target_file": ".github/actions/next-integration-stat/src/index.ts", + "target_symbol": ".github/actions/next-integration-stat/src/index.ts:type_alias:Job@330" + }, + { + "reference_count": 1, + "references": [ + ".github/actions/next-integration-stat/src/index.ts:.github/actions/next-integration-stat/src/index.ts:function:fetchJobLogsFromWorkflow@2452->.github/actions/next-integration-stat/src/index.ts:type_alias:Octokit@284::Octokit@2489" + ], + "source_file": ".github/actions/next-integration-stat/src/index.ts", + "source_symbol": ".github/actions/next-integration-stat/src/index.ts:function:fetchJobLogsFromWorkflow@2452", + "target_file": ".github/actions/next-integration-stat/src/index.ts", + "target_symbol": ".github/actions/next-integration-stat/src/index.ts:type_alias:Octokit@284" + }, + { + "reference_count": 1, + "references": [ + ".github/actions/next-integration-stat/src/index.ts:.github/actions/next-integration-stat/src/index.ts:function:findNextJsVersionFromBuildLogs@709->.github/actions/next-integration-stat/src/index.ts:type_alias:Job@330::Job@785" + ], + "source_file": ".github/actions/next-integration-stat/src/index.ts", + "source_symbol": ".github/actions/next-integration-stat/src/index.ts:function:findNextJsVersionFromBuildLogs@709", + "target_file": ".github/actions/next-integration-stat/src/index.ts", + "target_symbol": ".github/actions/next-integration-stat/src/index.ts:type_alias:Job@330" + }, + { + "reference_count": 1, + "references": [ + ".github/actions/next-integration-stat/src/index.ts:.github/actions/next-integration-stat/src/index.ts:function:findNextJsVersionFromBuildLogs@709->.github/actions/next-integration-stat/src/index.ts:type_alias:Octokit@284::Octokit@752" + ], + "source_file": ".github/actions/next-integration-stat/src/index.ts", + "source_symbol": ".github/actions/next-integration-stat/src/index.ts:function:findNextJsVersionFromBuildLogs@709", + "target_file": ".github/actions/next-integration-stat/src/index.ts", + "target_symbol": ".github/actions/next-integration-stat/src/index.ts:type_alias:Octokit@284" + }, + { + "reference_count": 1, + "references": [ + ".github/actions/next-integration-stat/src/index.ts:.github/actions/next-integration-stat/src/index.ts:function:getInputs@3962->.github/actions/next-integration-stat/src/index.ts:global_variable:BOT_COMMENT_MARKER@513::BOT_COMMENT_MARKER@5798" + ], + "source_file": ".github/actions/next-integration-stat/src/index.ts", + "source_symbol": ".github/actions/next-integration-stat/src/index.ts:function:getInputs@3962", + "target_file": ".github/actions/next-integration-stat/src/index.ts", + "target_symbol": ".github/actions/next-integration-stat/src/index.ts:global_variable:BOT_COMMENT_MARKER@513" + }, + { + "reference_count": 1, + "references": [ + ".github/actions/next-integration-stat/src/index.ts:.github/actions/next-integration-stat/src/index.ts:function:getInputs@3962->.github/actions/next-integration-stat/src/index.ts:type_alias:Octokit@284::Octokit@4042" + ], + "source_file": ".github/actions/next-integration-stat/src/index.ts", + "source_symbol": ".github/actions/next-integration-stat/src/index.ts:function:getInputs@3962", + "target_file": ".github/actions/next-integration-stat/src/index.ts", + "target_symbol": ".github/actions/next-integration-stat/src/index.ts:type_alias:Octokit@284" + }, + { + "reference_count": 2, + "references": [ + ".github/actions/next-integration-stat/src/index.ts:.github/actions/next-integration-stat/src/index.ts:function:getJobResults@6657->.github/actions/next-integration-stat/src/index.ts:function:fetchJobLogsFromWorkflow@2452::fetchJobLogsFromWorkflow@7582", + ".github/actions/next-integration-stat/src/index.ts:.github/actions/next-integration-stat/src/index.ts:function:getJobResults@6657->.github/actions/next-integration-stat/src/index.ts:function:fetchJobLogsFromWorkflow@2452::fetchJobLogsFromWorkflow@8747" + ], + "source_file": ".github/actions/next-integration-stat/src/index.ts", + "source_symbol": ".github/actions/next-integration-stat/src/index.ts:function:getJobResults@6657", + "target_file": ".github/actions/next-integration-stat/src/index.ts", + "target_symbol": ".github/actions/next-integration-stat/src/index.ts:function:fetchJobLogsFromWorkflow@2452" + }, + { + "reference_count": 1, + "references": [ + ".github/actions/next-integration-stat/src/index.ts:.github/actions/next-integration-stat/src/index.ts:function:getJobResults@6657->.github/actions/next-integration-stat/src/index.ts:function:findNextJsVersionFromBuildLogs@709::findNextJsVersionFromBuildLogs@7296" + ], + "source_file": ".github/actions/next-integration-stat/src/index.ts", + "source_symbol": ".github/actions/next-integration-stat/src/index.ts:function:getJobResults@6657", + "target_file": ".github/actions/next-integration-stat/src/index.ts", + "target_symbol": ".github/actions/next-integration-stat/src/index.ts:function:findNextJsVersionFromBuildLogs@709" + }, + { + "reference_count": 1, + "references": [ + ".github/actions/next-integration-stat/src/index.ts:.github/actions/next-integration-stat/src/index.ts:function:getJobResults@6657->.github/actions/next-integration-stat/src/index.ts:global_variable:fs@160::fs@10385" + ], + "source_file": ".github/actions/next-integration-stat/src/index.ts", + "source_symbol": ".github/actions/next-integration-stat/src/index.ts:function:getJobResults@6657", + "target_file": ".github/actions/next-integration-stat/src/index.ts", + "target_symbol": ".github/actions/next-integration-stat/src/index.ts:global_variable:fs@160" + }, + { + "reference_count": 1, + "references": [ + ".github/actions/next-integration-stat/src/index.ts:.github/actions/next-integration-stat/src/index.ts:function:getJobResults@6657->.github/actions/next-integration-stat/src/index.ts:global_variable:index@9171::index@9683" + ], + "source_file": ".github/actions/next-integration-stat/src/index.ts", + "source_symbol": ".github/actions/next-integration-stat/src/index.ts:function:getJobResults@6657", + "target_file": ".github/actions/next-integration-stat/src/index.ts", + "target_symbol": ".github/actions/next-integration-stat/src/index.ts:global_variable:index@9171" + }, + { + "reference_count": 2, + "references": [ + ".github/actions/next-integration-stat/src/index.ts:.github/actions/next-integration-stat/src/index.ts:function:getJobResults@6657->.github/actions/next-integration-stat/src/index.ts:global_variable:splittedLogs@9336::splittedLogs@9455", + ".github/actions/next-integration-stat/src/index.ts:.github/actions/next-integration-stat/src/index.ts:function:getJobResults@6657->.github/actions/next-integration-stat/src/index.ts:global_variable:splittedLogs@9336::splittedLogs@9504" + ], + "source_file": ".github/actions/next-integration-stat/src/index.ts", + "source_symbol": ".github/actions/next-integration-stat/src/index.ts:function:getJobResults@6657", + "target_file": ".github/actions/next-integration-stat/src/index.ts", + "target_symbol": ".github/actions/next-integration-stat/src/index.ts:global_variable:splittedLogs@9336" + }, + { + "reference_count": 1, + "references": [ + ".github/actions/next-integration-stat/src/index.ts:.github/actions/next-integration-stat/src/index.ts:function:getJobResults@6657->.github/actions/next-integration-stat/src/index.ts:type_alias:Octokit@284::Octokit@6683" + ], + "source_file": ".github/actions/next-integration-stat/src/index.ts", + "source_symbol": ".github/actions/next-integration-stat/src/index.ts:function:getJobResults@6657", + "target_file": ".github/actions/next-integration-stat/src/index.ts", + "target_symbol": ".github/actions/next-integration-stat/src/index.ts:type_alias:Octokit@284" + } + ], + "sha256": "2b4504dada98ee8d1c67f44555c126db3afb6995cbe9d3a3fd8ef98a90486da3" + }, + "exports": { + "count": 16027, + "samples": [ + { + "file": "bench/app-router-server/app/layout.js", + "import": null, + "key": "bench/app-router-server/app/layout.js:default:default:Root:@32", + "kind": "default", + "local_name": "Root", + "name": "default", + "range": [ + 32, + 166, + 2, + 0, + 9, + 1 + ], + "source_module": null, + "symbol": "bench/app-router-server/app/layout.js:function:Root@56" + }, + { + "file": "bench/app-router-server/app/rsc/page.js", + "import": null, + "key": "bench/app-router-server/app/rsc/page.js:default:default:page:@32", + "kind": "default", + "local_name": "page", + "name": "default", + "range": [ + 32, + 92, + 2, + 0, + 4, + 1 + ], + "source_module": null, + "symbol": "bench/app-router-server/app/rsc/page.js:function:page@56" + }, + { + "file": "bench/app-router-server/pages/index.js", + "import": null, + "key": "bench/app-router-server/pages/index.js:default:default:page:@32", + "kind": "default", + "local_name": "page", + "name": "default", + "range": [ + 32, + 100, + 2, + 0, + 4, + 1 + ], + "source_module": null, + "symbol": "bench/app-router-server/pages/index.js:function:page@56" + }, + { + "file": "bench/app-router-server/pages/index.js", + "import": null, + "key": "bench/app-router-server/pages/index.js:named:getServerSideProps:getServerSideProps:@102", + "kind": "named", + "local_name": "getServerSideProps", + "name": "getServerSideProps", + "range": [ + 102, + 160, + 6, + 0, + 8, + 1 + ], + "source_module": null, + "symbol": "bench/app-router-server/pages/index.js:function:getServerSideProps@124" + }, + { + "file": "bench/basic-app/app/api/app/route.js", + "import": null, + "key": "bench/basic-app/app/api/app/route.js:named:GET:GET:@0", + "kind": "named", + "local_name": "GET", + "name": "GET", + "range": [ + 0, + 70, + 0, + 0, + 2, + 1 + ], + "source_module": null, + "symbol": "bench/basic-app/app/api/app/route.js:function:GET@16" + }, + { + "file": "bench/basic-app/app/api/app/route.js", + "import": null, + "key": "bench/basic-app/app/api/app/route.js:named:dynamic:dynamic:@72", + "kind": "named", + "local_name": "dynamic", + "name": "dynamic", + "range": [ + 72, + 110, + 4, + 0, + 4, + 38 + ], + "source_module": null, + "symbol": "bench/basic-app/app/api/app/route.js:global_variable:dynamic@85" + }, + { + "file": "bench/basic-app/app/layout.js", + "import": null, + "key": "bench/basic-app/app/layout.js:default:default:Layout:@27", + "kind": "default", + "local_name": "Layout", + "name": "default", + "range": [ + 27, + 200, + 2, + 0, + 11, + 1 + ], + "source_module": null, + "symbol": "bench/basic-app/app/layout.js:function:Layout@51" + }, + { + "file": "bench/basic-app/app/page.js", + "import": null, + "key": "bench/basic-app/app/page.js:default:default:Page:@27", + "kind": "default", + "local_name": "Page", + "name": "default", + "range": [ + 27, + 87, + 2, + 0, + 4, + 1 + ], + "source_module": null, + "symbol": "bench/basic-app/app/page.js:function:Page@51" + }, + { + "file": "bench/basic-app/app/page.js", + "import": null, + "key": "bench/basic-app/app/page.js:named:dynamic:dynamic:@89", + "kind": "named", + "local_name": "dynamic", + "name": "dynamic", + "range": [ + 89, + 127, + 6, + 0, + 6, + 38 + ], + "source_module": null, + "symbol": "bench/basic-app/app/page.js:global_variable:dynamic@102" + }, + { + "file": "bench/basic-app/pages/api/page-api.js", + "import": null, + "key": "bench/basic-app/pages/api/page-api.js:default:default:handler:@0", + "kind": "default", + "local_name": "handler", + "name": "default", + "range": [ + 0, + 90, + 0, + 0, + 2, + 1 + ], + "source_module": null, + "symbol": "bench/basic-app/pages/api/page-api.js:function:handler@24" + }, + { + "file": "bench/basic-app/pages/pages/index.js", + "import": null, + "key": "bench/basic-app/pages/pages/index.js:default:default:() => 'Hello World':@0", + "kind": "default", + "local_name": "() => 'Hello World'", + "name": "default", + "range": [ + 0, + 34, + 0, + 0, + 0, + 34 + ], + "source_module": null, + "symbol": null + }, + { + "file": "bench/basic-app/pages/pages/index.js", + "import": null, + "key": "bench/basic-app/pages/pages/index.js:named:getServerSideProps:getServerSideProps:@36", + "kind": "named", + "local_name": "getServerSideProps", + "name": "getServerSideProps", + "range": [ + 36, + 106, + 2, + 0, + 6, + 1 + ], + "source_module": null, + "symbol": "bench/basic-app/pages/pages/index.js:function:getServerSideProps@52" + }, + { + "file": "bench/heavy-npm-deps/app/layout.js", + "import": null, + "key": "bench/heavy-npm-deps/app/layout.js:named:metadata:metadata:@305", + "kind": "named", + "local_name": "metadata", + "name": "metadata", + "range": [ + 305, + 407, + 14, + 0, + 17, + 1 + ], + "source_module": null, + "symbol": "bench/heavy-npm-deps/app/layout.js:global_variable:metadata@318" + }, + { + "file": "bench/heavy-npm-deps/app/layout.js", + "import": null, + "key": "bench/heavy-npm-deps/app/layout.js:default:default:RootLayout:@409", + "kind": "default", + "local_name": "RootLayout", + "name": "default", + "range": [ + 409, + 613, + 19, + 0, + 27, + 1 + ], + "source_module": null, + "symbol": "bench/heavy-npm-deps/app/layout.js:function:RootLayout@433" + }, + { + "file": "bench/heavy-npm-deps/app/page.js", + "import": null, + "key": "bench/heavy-npm-deps/app/page.js:default:default:Page:@178", + "kind": "default", + "local_name": "Page", + "name": "default", + "range": [ + 178, + 338, + 4, + 0, + 12, + 1 + ], + "source_module": null, + "symbol": "bench/heavy-npm-deps/app/page.js:function:Page@202" + }, + { + "file": "bench/heavy-npm-deps/components/lodash.js", + "import": null, + "key": "bench/heavy-npm-deps/components/lodash.js:named:LodashComponent:LodashComponent:@71", + "kind": "named", + "local_name": "LodashComponent", + "name": "LodashComponent", + "range": [ + 71, + 170, + 5, + 0, + 11, + 1 + ], + "source_module": null, + "symbol": "bench/heavy-npm-deps/components/lodash.js:function:LodashComponent@87" + }, + { + "file": "bench/heavy-npm-deps/components/mantine.js", + "import": null, + "key": "bench/heavy-npm-deps/components/mantine.js:named:MantineComponent:MantineComponent:@98", + "kind": "named", + "local_name": "MantineComponent", + "name": "MantineComponent", + "range": [ + 98, + 198, + 5, + 0, + 11, + 1 + ], + "source_module": null, + "symbol": "bench/heavy-npm-deps/components/mantine.js:function:MantineComponent@114" + }, + { + "file": "bench/heavy-npm-deps/components/mermaid.js", + "import": null, + "key": "bench/heavy-npm-deps/components/mermaid.js:named:MermaidComponent:MermaidComponent:@117", + "kind": "named", + "local_name": "MermaidComponent", + "name": "MermaidComponent", + "range": [ + 117, + 217, + 6, + 0, + 12, + 1 + ], + "source_module": null, + "symbol": "bench/heavy-npm-deps/components/mermaid.js:function:MermaidComponent@133" + }, + { + "file": "bench/nested-deps-app-router/app/client-components-only/page.js", + "import": null, + "key": "bench/nested-deps-app-router/app/client-components-only/page.js:default:default:Home:@87", + "kind": "default", + "local_name": "Home", + "name": "default", + "range": [ + 87, + 188, + 5, + 0, + 12, + 1 + ], + "source_module": null, + "symbol": "bench/nested-deps-app-router/app/client-components-only/page.js:function:Home@111" + }, + { + "file": "bench/nested-deps-app-router/app/layout.js", + "import": null, + "key": "bench/nested-deps-app-router/app/layout.js:named:metadata:metadata:@27", + "kind": "named", + "local_name": "metadata", + "name": "metadata", + "range": [ + 27, + 113, + 2, + 0, + 5, + 1 + ], + "source_module": null, + "symbol": "bench/nested-deps-app-router/app/layout.js:global_variable:metadata@40" + } + ], + "sha256": "d3952dc6951104f04bc1e184f61e85eb1218089a07c294e3be675285d36dba4c" + }, + "external_modules": { + "count": 13525, + "samples": [ + { + "alias": "github", + "file": ".github/actions/needs-triage/src/index.ts", + "import": ".github/actions/needs-triage/src/index.ts:namespace_import:@actions/github:*:github@7", + "key": ".github/actions/needs-triage/src/index.ts:namespace_import:@actions/github:*:github@7:*", + "module": "@actions/github", + "name": "*", + "range": [ + 7, + 18, + 0, + 7, + 0, + 18 + ] + }, + { + "alias": "core", + "file": ".github/actions/needs-triage/src/index.ts", + "import": ".github/actions/needs-triage/src/index.ts:namespace_import:@actions/core:*:core@49", + "key": ".github/actions/needs-triage/src/index.ts:namespace_import:@actions/core:*:core@49:*", + "module": "@actions/core", + "name": "*", + "range": [ + 49, + 58, + 1, + 7, + 1, + 16 + ] + }, + { + "alias": "context", + "file": ".github/actions/next-integration-stat/src/index.ts", + "import": ".github/actions/next-integration-stat/src/index.ts:named_import:@actions/github:context:context@9", + "key": ".github/actions/next-integration-stat/src/index.ts:named_import:@actions/github:context:context@9:context", + "module": "@actions/github", + "name": "context", + "range": [ + 9, + 16, + 0, + 9, + 0, + 16 + ] + }, + { + "alias": "getOctokit", + "file": ".github/actions/next-integration-stat/src/index.ts", + "import": ".github/actions/next-integration-stat/src/index.ts:named_import:@actions/github:getOctokit:getOctokit@18", + "key": ".github/actions/next-integration-stat/src/index.ts:named_import:@actions/github:getOctokit:getOctokit@18:getOctokit", + "module": "@actions/github", + "name": "getOctokit", + "range": [ + 18, + 28, + 0, + 18, + 0, + 28 + ] + }, + { + "alias": "info", + "file": ".github/actions/next-integration-stat/src/index.ts", + "import": ".github/actions/next-integration-stat/src/index.ts:named_import:@actions/core:info:info@63", + "key": ".github/actions/next-integration-stat/src/index.ts:named_import:@actions/core:info:info@63:info", + "module": "@actions/core", + "name": "info", + "range": [ + 63, + 67, + 1, + 9, + 1, + 13 + ] + }, + { + "alias": "getInput", + "file": ".github/actions/next-integration-stat/src/index.ts", + "import": ".github/actions/next-integration-stat/src/index.ts:named_import:@actions/core:getInput:getInput@69", + "key": ".github/actions/next-integration-stat/src/index.ts:named_import:@actions/core:getInput:getInput@69:getInput", + "module": "@actions/core", + "name": "getInput", + "range": [ + 69, + 77, + 1, + 15, + 1, + 23 + ] + }, + { + "alias": "stripAnsi", + "file": ".github/actions/next-integration-stat/src/index.ts", + "import": ".github/actions/next-integration-stat/src/index.ts:dynamic_import:strip-ansi:stripAnsi:stripAnsi@107", + "key": ".github/actions/next-integration-stat/src/index.ts:dynamic_import:strip-ansi:stripAnsi:stripAnsi@107:stripAnsi", + "module": "strip-ansi", + "name": "stripAnsi", + "range": [ + 107, + 153, + 2, + 6, + 2, + 52 + ] + }, + { + "alias": "fs", + "file": ".github/actions/next-integration-stat/src/index.ts", + "import": ".github/actions/next-integration-stat/src/index.ts:dynamic_import:fs:fs:fs@160", + "key": ".github/actions/next-integration-stat/src/index.ts:dynamic_import:fs:fs:fs@160:fs", + "module": "fs", + "name": "fs", + "range": [ + 160, + 178, + 3, + 6, + 3, + 24 + ] + }, + { + "alias": "path", + "file": ".github/actions/next-integration-stat/src/index.ts", + "import": ".github/actions/next-integration-stat/src/index.ts:dynamic_import:path:path:path@185", + "key": ".github/actions/next-integration-stat/src/index.ts:dynamic_import:path:path:path@185:path", + "module": "path", + "name": "path", + "range": [ + 185, + 207, + 4, + 6, + 4, + 28 + ] + }, + { + "alias": "semver", + "file": ".github/actions/next-integration-stat/src/index.ts", + "import": ".github/actions/next-integration-stat/src/index.ts:dynamic_import:semver:semver:semver@214", + "key": ".github/actions/next-integration-stat/src/index.ts:dynamic_import:semver:semver:semver@214:semver", + "module": "semver", + "name": "semver", + "range": [ + 214, + 240, + 5, + 6, + 5, + 32 + ] + }, + { + "alias": "info", + "file": ".github/actions/next-repo-actions/src/bankrupt-issues.ts", + "import": ".github/actions/next-repo-actions/src/bankrupt-issues.ts:named_import:@actions/core:info:info@9", + "key": ".github/actions/next-repo-actions/src/bankrupt-issues.ts:named_import:@actions/core:info:info@9:info", + "module": "@actions/core", + "name": "info", + "range": [ + 9, + 13, + 0, + 9, + 0, + 13 + ] + }, + { + "alias": "setFailed", + "file": ".github/actions/next-repo-actions/src/bankrupt-issues.ts", + "import": ".github/actions/next-repo-actions/src/bankrupt-issues.ts:named_import:@actions/core:setFailed:setFailed@15", + "key": ".github/actions/next-repo-actions/src/bankrupt-issues.ts:named_import:@actions/core:setFailed:setFailed@15:setFailed", + "module": "@actions/core", + "name": "setFailed", + "range": [ + 15, + 24, + 0, + 15, + 0, + 24 + ] + }, + { + "alias": "context", + "file": ".github/actions/next-repo-actions/src/bankrupt-issues.ts", + "import": ".github/actions/next-repo-actions/src/bankrupt-issues.ts:named_import:@actions/github:context:context@57", + "key": ".github/actions/next-repo-actions/src/bankrupt-issues.ts:named_import:@actions/github:context:context@57:context", + "module": "@actions/github", + "name": "context", + "range": [ + 57, + 64, + 1, + 9, + 1, + 16 + ] + }, + { + "alias": "getOctokit", + "file": ".github/actions/next-repo-actions/src/bankrupt-issues.ts", + "import": ".github/actions/next-repo-actions/src/bankrupt-issues.ts:named_import:@actions/github:getOctokit:getOctokit@66", + "key": ".github/actions/next-repo-actions/src/bankrupt-issues.ts:named_import:@actions/github:getOctokit:getOctokit@66:getOctokit", + "module": "@actions/github", + "name": "getOctokit", + "range": [ + 66, + 76, + 1, + 18, + 1, + 28 + ] + }, + { + "alias": "WebClient", + "file": ".github/actions/next-repo-actions/src/bankrupt-issues.ts", + "import": ".github/actions/next-repo-actions/src/bankrupt-issues.ts:named_import:@slack/web-api:WebClient:WebClient@111", + "key": ".github/actions/next-repo-actions/src/bankrupt-issues.ts:named_import:@slack/web-api:WebClient:WebClient@111:WebClient", + "module": "@slack/web-api", + "name": "WebClient", + "range": [ + 111, + 120, + 2, + 9, + 2, + 18 + ] + }, + { + "alias": "BlockCollection", + "file": ".github/actions/next-repo-actions/src/bankrupt-issues.ts", + "import": ".github/actions/next-repo-actions/src/bankrupt-issues.ts:named_import:slack-block-builder:BlockCollection:BlockCollection@154", + "key": ".github/actions/next-repo-actions/src/bankrupt-issues.ts:named_import:slack-block-builder:BlockCollection:BlockCollection@154:BlockCollection", + "module": "slack-block-builder", + "name": "BlockCollection", + "range": [ + 154, + 169, + 3, + 9, + 3, + 24 + ] + }, + { + "alias": "Section", + "file": ".github/actions/next-repo-actions/src/bankrupt-issues.ts", + "import": ".github/actions/next-repo-actions/src/bankrupt-issues.ts:named_import:slack-block-builder:Section:Section@171", + "key": ".github/actions/next-repo-actions/src/bankrupt-issues.ts:named_import:slack-block-builder:Section:Section@171:Section", + "module": "slack-block-builder", + "name": "Section", + "range": [ + 171, + 178, + 3, + 26, + 3, + 33 + ] + }, + { + "alias": "info", + "file": ".github/actions/next-repo-actions/src/issues-by-version.ts", + "import": ".github/actions/next-repo-actions/src/issues-by-version.ts:named_import:@actions/core:info:info@9", + "key": ".github/actions/next-repo-actions/src/issues-by-version.ts:named_import:@actions/core:info:info@9:info", + "module": "@actions/core", + "name": "info", + "range": [ + 9, + 13, + 0, + 9, + 0, + 13 + ] + }, + { + "alias": "setFailed", + "file": ".github/actions/next-repo-actions/src/issues-by-version.ts", + "import": ".github/actions/next-repo-actions/src/issues-by-version.ts:named_import:@actions/core:setFailed:setFailed@15", + "key": ".github/actions/next-repo-actions/src/issues-by-version.ts:named_import:@actions/core:setFailed:setFailed@15:setFailed", + "module": "@actions/core", + "name": "setFailed", + "range": [ + 15, + 24, + 0, + 15, + 0, + 24 + ] + }, + { + "alias": "context", + "file": ".github/actions/next-repo-actions/src/issues-by-version.ts", + "import": ".github/actions/next-repo-actions/src/issues-by-version.ts:named_import:@actions/github:context:context@57", + "key": ".github/actions/next-repo-actions/src/issues-by-version.ts:named_import:@actions/github:context:context@57:context", + "module": "@actions/github", + "name": "context", + "range": [ + 57, + 64, + 1, + 9, + 1, + 16 + ] + } + ], + "sha256": "75176bcfeb2ad12c09a7dfddc5de0f357b3f760e0b16b6cd05c9cf8260113b33" + }, + "external_references": { + "count": 25317, + "samples": [ + { + "import": ".github/actions/needs-triage/src/index.ts:namespace_import:@actions/github:*:github@7", + "key": ".github/actions/needs-triage/src/index.ts:.github/actions/needs-triage/src/index.ts:function:run@601->.github/actions/needs-triage/src/index.ts:namespace_import:@actions/github:*:github@7:github@647", + "name": "github", + "range": [ + 647, + 653, + 23, + 30, + 23, + 36 + ], + "source_file": ".github/actions/needs-triage/src/index.ts", + "source_symbol": ".github/actions/needs-triage/src/index.ts:function:run@601" + }, + { + "import": ".github/actions/needs-triage/src/index.ts:namespace_import:@actions/github:*:github@7", + "key": ".github/actions/needs-triage/src/index.ts:.github/actions/needs-triage/src/index.ts:function:run@601->.github/actions/needs-triage/src/index.ts:namespace_import:@actions/github:*:github@7:github@825", + "name": "github", + "range": [ + 825, + 831, + 31, + 19, + 31, + 25 + ], + "source_file": ".github/actions/needs-triage/src/index.ts", + "source_symbol": ".github/actions/needs-triage/src/index.ts:function:run@601" + }, + { + "import": ".github/actions/needs-triage/src/index.ts:namespace_import:@actions/core:*:core@49", + "key": ".github/actions/needs-triage/src/index.ts:.github/actions/needs-triage/src/index.ts:function:run@601->.github/actions/needs-triage/src/index.ts:namespace_import:@actions/core:*:core@49:core@1294", + "name": "core", + "range": [ + 1294, + 1298, + 45, + 4, + 45, + 8 + ], + "source_file": ".github/actions/needs-triage/src/index.ts", + "source_symbol": ".github/actions/needs-triage/src/index.ts:function:run@601" + }, + { + "import": ".github/actions/next-integration-stat/src/index.ts:named_import:@actions/github:getOctokit:getOctokit@18", + "key": ".github/actions/next-integration-stat/src/index.ts:.github/actions/next-integration-stat/src/index.ts:type_alias:Octokit@284->.github/actions/next-integration-stat/src/index.ts:named_import:@actions/github:getOctokit:getOctokit@18:getOctokit@312", + "name": "getOctokit", + "range": [ + 312, + 322, + 9, + 33, + 9, + 43 + ], + "source_file": ".github/actions/next-integration-stat/src/index.ts", + "source_symbol": ".github/actions/next-integration-stat/src/index.ts:type_alias:Octokit@284" + }, + { + "import": ".github/actions/next-integration-stat/src/index.ts:named_import:@actions/github:context:context@9", + "key": ".github/actions/next-integration-stat/src/index.ts:.github/actions/next-integration-stat/src/index.ts:function:findNextJsVersionFromBuildLogs@709->.github/actions/next-integration-stat/src/index.ts:named_import:@actions/github:context:context@9:context@1132", + "name": "context", + "range": [ + 1132, + 1139, + 34, + 9, + 34, + 16 + ], + "source_file": ".github/actions/next-integration-stat/src/index.ts", + "source_symbol": ".github/actions/next-integration-stat/src/index.ts:function:findNextJsVersionFromBuildLogs@709" + }, + { + "import": ".github/actions/next-integration-stat/src/index.ts:named_import:@actions/github:context:context@9", + "key": ".github/actions/next-integration-stat/src/index.ts:.github/actions/next-integration-stat/src/index.ts:function:fetchJobLogsFromWorkflow@2452->.github/actions/next-integration-stat/src/index.ts:named_import:@actions/github:context:context@9:context@2891", + "name": "context", + "range": [ + 2891, + 2898, + 91, + 9, + 91, + 16 + ], + "source_file": ".github/actions/next-integration-stat/src/index.ts", + "source_symbol": ".github/actions/next-integration-stat/src/index.ts:function:fetchJobLogsFromWorkflow@2452" + }, + { + "import": ".github/actions/next-integration-stat/src/index.ts:named_import:@actions/core:getInput:getInput@69", + "key": ".github/actions/next-integration-stat/src/index.ts:.github/actions/next-integration-stat/src/index.ts:function:getInputs@3962->.github/actions/next-integration-stat/src/index.ts:named_import:@actions/core:getInput:getInput@69:getInput@4182", + "name": "getInput", + "range": [ + 4182, + 4190, + 139, + 16, + 139, + 24 + ], + "source_file": ".github/actions/next-integration-stat/src/index.ts", + "source_symbol": ".github/actions/next-integration-stat/src/index.ts:function:getInputs@3962" + }, + { + "import": ".github/actions/next-integration-stat/src/index.ts:named_import:@actions/core:getInput:getInput@69", + "key": ".github/actions/next-integration-stat/src/index.ts:.github/actions/next-integration-stat/src/index.ts:function:getInputs@3962->.github/actions/next-integration-stat/src/index.ts:named_import:@actions/core:getInput:getInput@69:getInput@4241", + "name": "getInput", + "range": [ + 4241, + 4249, + 141, + 4, + 141, + 12 + ], + "source_file": ".github/actions/next-integration-stat/src/index.ts", + "source_symbol": ".github/actions/next-integration-stat/src/index.ts:function:getInputs@3962" + }, + { + "import": ".github/actions/next-integration-stat/src/index.ts:named_import:@actions/core:getInput:getInput@69", + "key": ".github/actions/next-integration-stat/src/index.ts:.github/actions/next-integration-stat/src/index.ts:function:getInputs@3962->.github/actions/next-integration-stat/src/index.ts:named_import:@actions/core:getInput:getInput@69:getInput@4306", + "name": "getInput", + "range": [ + 4306, + 4314, + 142, + 19, + 142, + 27 + ], + "source_file": ".github/actions/next-integration-stat/src/index.ts", + "source_symbol": ".github/actions/next-integration-stat/src/index.ts:function:getInputs@3962" + }, + { + "import": ".github/actions/next-integration-stat/src/index.ts:named_import:@actions/github:getOctokit:getOctokit@18", + "key": ".github/actions/next-integration-stat/src/index.ts:.github/actions/next-integration-stat/src/index.ts:function:getInputs@3962->.github/actions/next-integration-stat/src/index.ts:named_import:@actions/github:getOctokit:getOctokit@18:getOctokit@4824", + "name": "getOctokit", + "range": [ + 4824, + 4834, + 155, + 18, + 155, + 28 + ], + "source_file": ".github/actions/next-integration-stat/src/index.ts", + "source_symbol": ".github/actions/next-integration-stat/src/index.ts:function:getInputs@3962" + }, + { + "import": ".github/actions/next-integration-stat/src/index.ts:named_import:@actions/github:context:context@9", + "key": ".github/actions/next-integration-stat/src/index.ts:.github/actions/next-integration-stat/src/index.ts:function:getInputs@3962->.github/actions/next-integration-stat/src/index.ts:named_import:@actions/github:context:context@9:context@4862", + "name": "context", + "range": [ + 4862, + 4869, + 157, + 19, + 157, + 26 + ], + "source_file": ".github/actions/next-integration-stat/src/index.ts", + "source_symbol": ".github/actions/next-integration-stat/src/index.ts:function:getInputs@3962" + }, + { + "import": ".github/actions/next-integration-stat/src/index.ts:named_import:@actions/github:context:context@9", + "key": ".github/actions/next-integration-stat/src/index.ts:.github/actions/next-integration-stat/src/index.ts:function:getInputs@3962->.github/actions/next-integration-stat/src/index.ts:named_import:@actions/github:context:context@9:context@4915", + "name": "context", + "range": [ + 4915, + 4922, + 158, + 14, + 158, + 21 + ], + "source_file": ".github/actions/next-integration-stat/src/index.ts", + "source_symbol": ".github/actions/next-integration-stat/src/index.ts:function:getInputs@3962" + }, + { + "import": ".github/actions/next-integration-stat/src/index.ts:named_import:@actions/github:context:context@9", + "key": ".github/actions/next-integration-stat/src/index.ts:.github/actions/next-integration-stat/src/index.ts:function:getInputs@3962->.github/actions/next-integration-stat/src/index.ts:named_import:@actions/github:context:context@9:context@5244", + "name": "context", + "range": [ + 5244, + 5251, + 171, + 9, + 171, + 16 + ], + "source_file": ".github/actions/next-integration-stat/src/index.ts", + "source_symbol": ".github/actions/next-integration-stat/src/index.ts:function:getInputs@3962" + }, + { + "import": ".github/actions/next-integration-stat/src/index.ts:named_import:@actions/github:context:context@9", + "key": ".github/actions/next-integration-stat/src/index.ts:.github/actions/next-integration-stat/src/index.ts:function:getInputs@3962->.github/actions/next-integration-stat/src/index.ts:named_import:@actions/github:context:context@9:context@6032", + "name": "context", + "range": [ + 6032, + 6039, + 191, + 13, + 191, + 20 + ], + "source_file": ".github/actions/next-integration-stat/src/index.ts", + "source_symbol": ".github/actions/next-integration-stat/src/index.ts:function:getInputs@3962" + }, + { + "import": ".github/actions/next-integration-stat/src/index.ts:named_import:@actions/core:info:info@63", + "key": ".github/actions/next-integration-stat/src/index.ts:.github/actions/next-integration-stat/src/index.ts:function:getInputs@3962->.github/actions/next-integration-stat/src/index.ts:named_import:@actions/core:info:info@63:info@6120", + "name": "info", + "range": [ + 6120, + 6124, + 197, + 4, + 197, + 8 + ], + "source_file": ".github/actions/next-integration-stat/src/index.ts", + "source_symbol": ".github/actions/next-integration-stat/src/index.ts:function:getInputs@3962" + }, + { + "import": ".github/actions/next-integration-stat/src/index.ts:named_import:@actions/core:getInput:getInput@69", + "key": ".github/actions/next-integration-stat/src/index.ts:.github/actions/next-integration-stat/src/index.ts:function:getInputs@3962->.github/actions/next-integration-stat/src/index.ts:named_import:@actions/core:getInput:getInput@69:getInput@6356", + "name": "getInput", + "range": [ + 6356, + 6364, + 205, + 15, + 205, + 23 + ], + "source_file": ".github/actions/next-integration-stat/src/index.ts", + "source_symbol": ".github/actions/next-integration-stat/src/index.ts:function:getInputs@3962" + }, + { + "import": ".github/actions/next-integration-stat/src/index.ts:named_import:@actions/github:context:context@9", + "key": ".github/actions/next-integration-stat/src/index.ts:.github/actions/next-integration-stat/src/index.ts:function:getJobResults@6657->.github/actions/next-integration-stat/src/index.ts:named_import:@actions/github:context:context@9:context@6924", + "name": "context", + "range": [ + 6924, + 6931, + 229, + 9, + 229, + 16 + ], + "source_file": ".github/actions/next-integration-stat/src/index.ts", + "source_symbol": ".github/actions/next-integration-stat/src/index.ts:function:getJobResults@6657" + }, + { + "import": ".github/actions/next-integration-stat/src/index.ts:named_import:@actions/github:context:context@9", + "key": ".github/actions/next-integration-stat/src/index.ts:.github/actions/next-integration-stat/src/index.ts:function:getJobResults@6657->.github/actions/next-integration-stat/src/index.ts:named_import:@actions/github:context:context@9:context@6952", + "name": "context", + "range": [ + 6952, + 6959, + 230, + 14, + 230, + 21 + ], + "source_file": ".github/actions/next-integration-stat/src/index.ts", + "source_symbol": ".github/actions/next-integration-stat/src/index.ts:function:getJobResults@6657" + }, + { + "import": ".github/actions/next-integration-stat/src/index.ts:named_import:@actions/github:context:context@9", + "key": ".github/actions/next-integration-stat/src/index.ts:.github/actions/next-integration-stat/src/index.ts:function:getTestResultDiffBase@10615->.github/actions/next-integration-stat/src/index.ts:named_import:@actions/github:context:context@9:context@10947", + "name": "context", + "range": [ + 10947, + 10954, + 358, + 9, + 358, + 16 + ], + "source_file": ".github/actions/next-integration-stat/src/index.ts", + "source_symbol": ".github/actions/next-integration-stat/src/index.ts:function:getTestResultDiffBase@10615" + }, + { + "import": ".github/actions/next-integration-stat/src/index.ts:named_import:@actions/github:context:context@9", + "key": ".github/actions/next-integration-stat/src/index.ts:.github/actions/next-integration-stat/src/index.ts:function:getTestResultDiffBase@10615->.github/actions/next-integration-stat/src/index.ts:named_import:@actions/github:context:context@9:context@11320", + "name": "context", + "range": [ + 11320, + 11327, + 371, + 9, + 371, + 16 + ], + "source_file": ".github/actions/next-integration-stat/src/index.ts", + "source_symbol": ".github/actions/next-integration-stat/src/index.ts:function:getTestResultDiffBase@10615" + } + ], + "sha256": "30bdfc999d06a7b426261e850bdbec296db2b5dac2ad482c73fd87f06181579f" + }, + "files": { + "count": 13688, + "samples": [ + { + "byte_len": 458956, + "has_error": false, + "line_count": 7, + "path": ".github/actions/needs-triage/dist/index.js", + "root_range": [ + 0, + 458956, + 0, + 0, + 6, + 332106 + ] + }, + { + "byte_len": 1337, + "has_error": false, + "line_count": 50, + "path": ".github/actions/needs-triage/src/index.ts", + "root_range": [ + 0, + 1337, + 0, + 0, + 50, + 0 + ] + }, + { + "byte_len": 10280, + "has_error": false, + "line_count": 452, + "path": ".github/actions/next-integration-stat/dist/37.index.js", + "root_range": [ + 0, + 10280, + 0, + 0, + 451, + 1 + ] + }, + { + "byte_len": 711832, + "has_error": false, + "line_count": 13770, + "path": ".github/actions/next-integration-stat/dist/index.js", + "root_range": [ + 0, + 711832, + 0, + 0, + 13769, + 1 + ] + }, + { + "byte_len": 28330, + "has_error": false, + "line_count": 934, + "path": ".github/actions/next-integration-stat/src/index.ts", + "root_range": [ + 0, + 28330, + 0, + 0, + 934, + 0 + ] + }, + { + "byte_len": 1145, + "has_error": false, + "line_count": 49, + "path": ".github/actions/next-integration-stat/src/manifest.d.ts", + "root_range": [ + 0, + 1145, + 0, + 0, + 49, + 0 + ] + }, + { + "byte_len": 953900, + "has_error": false, + "line_count": 16, + "path": ".github/actions/next-repo-actions/dist/bankrupt/index.js", + "root_range": [ + 0, + 953900, + 0, + 0, + 15, + 258858 + ] + }, + { + "byte_len": 961976, + "has_error": false, + "line_count": 16, + "path": ".github/actions/next-repo-actions/dist/issues-by-version/index.js", + "root_range": [ + 0, + 961976, + 0, + 0, + 15, + 257482 + ] + }, + { + "byte_len": 985836, + "has_error": false, + "line_count": 16, + "path": ".github/actions/next-repo-actions/dist/prs/index.js", + "root_range": [ + 0, + 985836, + 0, + 0, + 15, + 258088 + ] + }, + { + "byte_len": 1699173, + "has_error": false, + "line_count": 18, + "path": ".github/actions/next-repo-actions/dist/triage-issues-with-ai/index.js", + "root_range": [ + 0, + 1699173, + 0, + 0, + 17, + 523563 + ] + }, + { + "byte_len": 3661, + "has_error": false, + "line_count": 100, + "path": ".github/actions/next-repo-actions/src/bankrupt-issues.ts", + "root_range": [ + 0, + 3661, + 0, + 0, + 100, + 0 + ] + }, + { + "byte_len": 1960, + "has_error": false, + "line_count": 60, + "path": ".github/actions/next-repo-actions/src/issues-by-version.ts", + "root_range": [ + 0, + 1960, + 0, + 0, + 60, + 0 + ] + }, + { + "byte_len": 2129, + "has_error": false, + "line_count": 66, + "path": ".github/actions/next-repo-actions/src/popular-prs.ts", + "root_range": [ + 0, + 2129, + 0, + 0, + 66, + 0 + ] + }, + { + "byte_len": 3244, + "has_error": false, + "line_count": 93, + "path": ".github/actions/next-repo-actions/src/triage-issues-with-ai.ts", + "root_range": [ + 0, + 3244, + 0, + 0, + 93, + 0 + ] + }, + { + "byte_len": 8928, + "has_error": false, + "line_count": 285, + "path": ".github/actions/next-stats-action/src/add-comment.js", + "root_range": [ + 0, + 8928, + 0, + 0, + 285, + 0 + ] + }, + { + "byte_len": 624, + "has_error": false, + "line_count": 26, + "path": ".github/actions/next-stats-action/src/constants.js", + "root_range": [ + 0, + 624, + 0, + 0, + 26, + 0 + ] + }, + { + "byte_len": 5227, + "has_error": false, + "line_count": 158, + "path": ".github/actions/next-stats-action/src/index.js", + "root_range": [ + 0, + 5227, + 0, + 0, + 158, + 0 + ] + }, + { + "byte_len": 2678, + "has_error": false, + "line_count": 103, + "path": ".github/actions/next-stats-action/src/prepare/action-info.js", + "root_range": [ + 0, + 2678, + 0, + 0, + 103, + 0 + ] + }, + { + "byte_len": 994, + "has_error": false, + "line_count": 42, + "path": ".github/actions/next-stats-action/src/prepare/load-stats-config.js", + "root_range": [ + 0, + 994, + 0, + 0, + 42, + 0 + ] + }, + { + "byte_len": 10396, + "has_error": false, + "line_count": 289, + "path": ".github/actions/next-stats-action/src/prepare/repo-setup.js", + "root_range": [ + 0, + 10396, + 0, + 0, + 289, + 0 + ] + } + ], + "sha256": "10185cca51023d11ccfd72c255e2fd0eac8146ab3768a974f2b6e93c428d0f93" + }, + "import_resolutions": { + "count": 13462, + "samples": [ + { + "import": ".github/actions/next-stats-action/src/add-comment.js:dynamic_import:./constants:benchTitle:benchTitle@227", + "key": ".github/actions/next-stats-action/src/add-comment.js:dynamic_import:./constants:benchTitle:benchTitle@227->.github/actions/next-stats-action/src/constants.js:", + "source_file": ".github/actions/next-stats-action/src/add-comment.js", + "target_file": ".github/actions/next-stats-action/src/constants.js", + "target_symbol": null + }, + { + "import": ".github/actions/next-stats-action/src/add-comment.js:dynamic_import:./util/logger:logger:logger@143", + "key": ".github/actions/next-stats-action/src/add-comment.js:dynamic_import:./util/logger:logger:logger@143->.github/actions/next-stats-action/src/util/logger.js:", + "source_file": ".github/actions/next-stats-action/src/add-comment.js", + "target_file": ".github/actions/next-stats-action/src/util/logger.js", + "target_symbol": null + }, + { + "import": ".github/actions/next-stats-action/src/index.js:dynamic_import:./add-comment:addComment:addComment@218", + "key": ".github/actions/next-stats-action/src/index.js:dynamic_import:./add-comment:addComment:addComment@218->.github/actions/next-stats-action/src/add-comment.js:", + "source_file": ".github/actions/next-stats-action/src/index.js", + "target_file": ".github/actions/next-stats-action/src/add-comment.js", + "target_symbol": null + }, + { + "import": ".github/actions/next-stats-action/src/index.js:dynamic_import:./constants:diffRepoDir:diffRepoDir@316", + "key": ".github/actions/next-stats-action/src/index.js:dynamic_import:./constants:diffRepoDir:diffRepoDir@316->.github/actions/next-stats-action/src/constants.js:", + "source_file": ".github/actions/next-stats-action/src/index.js", + "target_file": ".github/actions/next-stats-action/src/constants.js", + "target_symbol": null + }, + { + "import": ".github/actions/next-stats-action/src/index.js:dynamic_import:./constants:mainRepoDir:mainRepoDir@316", + "key": ".github/actions/next-stats-action/src/index.js:dynamic_import:./constants:mainRepoDir:mainRepoDir@316->.github/actions/next-stats-action/src/constants.js:", + "source_file": ".github/actions/next-stats-action/src/index.js", + "target_file": ".github/actions/next-stats-action/src/constants.js", + "target_symbol": null + }, + { + "import": ".github/actions/next-stats-action/src/index.js:dynamic_import:./prepare/action-info:actionInfo:actionInfo@262", + "key": ".github/actions/next-stats-action/src/index.js:dynamic_import:./prepare/action-info:actionInfo:actionInfo@262->.github/actions/next-stats-action/src/prepare/action-info.js:", + "source_file": ".github/actions/next-stats-action/src/index.js", + "target_file": ".github/actions/next-stats-action/src/prepare/action-info.js", + "target_symbol": null + }, + { + "import": ".github/actions/next-stats-action/src/index.js:dynamic_import:./prepare/load-stats-config:loadStatsConfig:loadStatsConfig@376", + "key": ".github/actions/next-stats-action/src/index.js:dynamic_import:./prepare/load-stats-config:loadStatsConfig:loadStatsConfig@376->.github/actions/next-stats-action/src/prepare/load-stats-config.js:", + "source_file": ".github/actions/next-stats-action/src/index.js", + "target_file": ".github/actions/next-stats-action/src/prepare/load-stats-config.js", + "target_symbol": null + }, + { + "import": ".github/actions/next-stats-action/src/index.js:dynamic_import:./prepare/repo-setup:cloneRepo:cloneRepo@439", + "key": ".github/actions/next-stats-action/src/index.js:dynamic_import:./prepare/repo-setup:cloneRepo:cloneRepo@439->.github/actions/next-stats-action/src/prepare/repo-setup.js:", + "source_file": ".github/actions/next-stats-action/src/index.js", + "target_file": ".github/actions/next-stats-action/src/prepare/repo-setup.js", + "target_symbol": null + }, + { + "import": ".github/actions/next-stats-action/src/index.js:dynamic_import:./prepare/repo-setup:getCommitId:getCommitId@439", + "key": ".github/actions/next-stats-action/src/index.js:dynamic_import:./prepare/repo-setup:getCommitId:getCommitId@439->.github/actions/next-stats-action/src/prepare/repo-setup.js:", + "source_file": ".github/actions/next-stats-action/src/index.js", + "target_file": ".github/actions/next-stats-action/src/prepare/repo-setup.js", + "target_symbol": null + }, + { + "import": ".github/actions/next-stats-action/src/index.js:dynamic_import:./prepare/repo-setup:getLastStable:getLastStable@439", + "key": ".github/actions/next-stats-action/src/index.js:dynamic_import:./prepare/repo-setup:getLastStable:getLastStable@439->.github/actions/next-stats-action/src/prepare/repo-setup.js:", + "source_file": ".github/actions/next-stats-action/src/index.js", + "target_file": ".github/actions/next-stats-action/src/prepare/repo-setup.js", + "target_symbol": null + }, + { + "import": ".github/actions/next-stats-action/src/index.js:dynamic_import:./prepare/repo-setup:linkPackages:linkPackages@439", + "key": ".github/actions/next-stats-action/src/index.js:dynamic_import:./prepare/repo-setup:linkPackages:linkPackages@439->.github/actions/next-stats-action/src/prepare/repo-setup.js:", + "source_file": ".github/actions/next-stats-action/src/index.js", + "target_file": ".github/actions/next-stats-action/src/prepare/repo-setup.js", + "target_symbol": null + }, + { + "import": ".github/actions/next-stats-action/src/index.js:dynamic_import:./prepare/repo-setup:mergeBranch:mergeBranch@439", + "key": ".github/actions/next-stats-action/src/index.js:dynamic_import:./prepare/repo-setup:mergeBranch:mergeBranch@439->.github/actions/next-stats-action/src/prepare/repo-setup.js:", + "source_file": ".github/actions/next-stats-action/src/index.js", + "target_file": ".github/actions/next-stats-action/src/prepare/repo-setup.js", + "target_symbol": null + }, + { + "import": ".github/actions/next-stats-action/src/index.js:dynamic_import:./run:runConfigs:runConfigs@182", + "key": ".github/actions/next-stats-action/src/index.js:dynamic_import:./run:runConfigs:runConfigs@182->.github/actions/next-stats-action/src/run/index.js:", + "source_file": ".github/actions/next-stats-action/src/index.js", + "target_file": ".github/actions/next-stats-action/src/run/index.js", + "target_symbol": null + }, + { + "import": ".github/actions/next-stats-action/src/index.js:dynamic_import:./util/exec:exec:exec@106", + "key": ".github/actions/next-stats-action/src/index.js:dynamic_import:./util/exec:exec:exec@106->.github/actions/next-stats-action/src/util/exec.js:", + "source_file": ".github/actions/next-stats-action/src/index.js", + "target_file": ".github/actions/next-stats-action/src/util/exec.js", + "target_symbol": null + }, + { + "import": ".github/actions/next-stats-action/src/index.js:dynamic_import:./util/logger:logger:logger@142", + "key": ".github/actions/next-stats-action/src/index.js:dynamic_import:./util/logger:logger:logger@142->.github/actions/next-stats-action/src/util/logger.js:", + "source_file": ".github/actions/next-stats-action/src/index.js", + "target_file": ".github/actions/next-stats-action/src/util/logger.js", + "target_symbol": null + }, + { + "import": ".github/actions/next-stats-action/src/prepare/action-info.js:dynamic_import:../util/logger:logger:logger@35", + "key": ".github/actions/next-stats-action/src/prepare/action-info.js:dynamic_import:../util/logger:logger:logger@35->.github/actions/next-stats-action/src/util/logger.js:", + "source_file": ".github/actions/next-stats-action/src/prepare/action-info.js", + "target_file": ".github/actions/next-stats-action/src/util/logger.js", + "target_symbol": null + }, + { + "import": ".github/actions/next-stats-action/src/prepare/load-stats-config.js:dynamic_import:../constants:allowedConfigLocations:allowedConfigLocations@76", + "key": ".github/actions/next-stats-action/src/prepare/load-stats-config.js:dynamic_import:../constants:allowedConfigLocations:allowedConfigLocations@76->.github/actions/next-stats-action/src/constants.js:", + "source_file": ".github/actions/next-stats-action/src/prepare/load-stats-config.js", + "target_file": ".github/actions/next-stats-action/src/constants.js", + "target_symbol": null + }, + { + "import": ".github/actions/next-stats-action/src/prepare/load-stats-config.js:dynamic_import:../constants:diffRepoDir:diffRepoDir@76", + "key": ".github/actions/next-stats-action/src/prepare/load-stats-config.js:dynamic_import:../constants:diffRepoDir:diffRepoDir@76->.github/actions/next-stats-action/src/constants.js:", + "source_file": ".github/actions/next-stats-action/src/prepare/load-stats-config.js", + "target_file": ".github/actions/next-stats-action/src/constants.js", + "target_symbol": null + }, + { + "import": ".github/actions/next-stats-action/src/prepare/load-stats-config.js:dynamic_import:../util/logger:logger:logger@35", + "key": ".github/actions/next-stats-action/src/prepare/load-stats-config.js:dynamic_import:../util/logger:logger:logger@35->.github/actions/next-stats-action/src/util/logger.js:", + "source_file": ".github/actions/next-stats-action/src/prepare/load-stats-config.js", + "target_file": ".github/actions/next-stats-action/src/util/logger.js", + "target_symbol": null + }, + { + "import": ".github/actions/next-stats-action/src/prepare/repo-setup.js:dynamic_import:../util/exec:exec:exec@97", + "key": ".github/actions/next-stats-action/src/prepare/repo-setup.js:dynamic_import:../util/exec:exec:exec@97->.github/actions/next-stats-action/src/util/exec.js:", + "source_file": ".github/actions/next-stats-action/src/prepare/repo-setup.js", + "target_file": ".github/actions/next-stats-action/src/util/exec.js", + "target_symbol": null + } + ], + "sha256": "59a2edf479c3dd84f34df1c789b36c629c74c98c941d3de685f8acd2cbe86f82" + }, + "imports": { + "count": 28210, + "samples": [ + { + "alias": "github", + "file": ".github/actions/needs-triage/src/index.ts", + "key": ".github/actions/needs-triage/src/index.ts:namespace_import:@actions/github:*:github@7", + "kind": "namespace_import", + "module": "@actions/github", + "name": "*", + "range": [ + 7, + 18, + 0, + 7, + 0, + 18 + ] + }, + { + "alias": "core", + "file": ".github/actions/needs-triage/src/index.ts", + "key": ".github/actions/needs-triage/src/index.ts:namespace_import:@actions/core:*:core@49", + "kind": "namespace_import", + "module": "@actions/core", + "name": "*", + "range": [ + 49, + 58, + 1, + 7, + 1, + 16 + ] + }, + { + "alias": "context", + "file": ".github/actions/next-integration-stat/src/index.ts", + "key": ".github/actions/next-integration-stat/src/index.ts:named_import:@actions/github:context:context@9", + "kind": "named_import", + "module": "@actions/github", + "name": "context", + "range": [ + 9, + 16, + 0, + 9, + 0, + 16 + ] + }, + { + "alias": "getOctokit", + "file": ".github/actions/next-integration-stat/src/index.ts", + "key": ".github/actions/next-integration-stat/src/index.ts:named_import:@actions/github:getOctokit:getOctokit@18", + "kind": "named_import", + "module": "@actions/github", + "name": "getOctokit", + "range": [ + 18, + 28, + 0, + 18, + 0, + 28 + ] + }, + { + "alias": "info", + "file": ".github/actions/next-integration-stat/src/index.ts", + "key": ".github/actions/next-integration-stat/src/index.ts:named_import:@actions/core:info:info@63", + "kind": "named_import", + "module": "@actions/core", + "name": "info", + "range": [ + 63, + 67, + 1, + 9, + 1, + 13 + ] + }, + { + "alias": "getInput", + "file": ".github/actions/next-integration-stat/src/index.ts", + "key": ".github/actions/next-integration-stat/src/index.ts:named_import:@actions/core:getInput:getInput@69", + "kind": "named_import", + "module": "@actions/core", + "name": "getInput", + "range": [ + 69, + 77, + 1, + 15, + 1, + 23 + ] + }, + { + "alias": "stripAnsi", + "file": ".github/actions/next-integration-stat/src/index.ts", + "key": ".github/actions/next-integration-stat/src/index.ts:dynamic_import:strip-ansi:stripAnsi:stripAnsi@107", + "kind": "dynamic_import", + "module": "strip-ansi", + "name": "stripAnsi", + "range": [ + 107, + 153, + 2, + 6, + 2, + 52 + ] + }, + { + "alias": "fs", + "file": ".github/actions/next-integration-stat/src/index.ts", + "key": ".github/actions/next-integration-stat/src/index.ts:dynamic_import:fs:fs:fs@160", + "kind": "dynamic_import", + "module": "fs", + "name": "fs", + "range": [ + 160, + 178, + 3, + 6, + 3, + 24 + ] + }, + { + "alias": "path", + "file": ".github/actions/next-integration-stat/src/index.ts", + "key": ".github/actions/next-integration-stat/src/index.ts:dynamic_import:path:path:path@185", + "kind": "dynamic_import", + "module": "path", + "name": "path", + "range": [ + 185, + 207, + 4, + 6, + 4, + 28 + ] + }, + { + "alias": "semver", + "file": ".github/actions/next-integration-stat/src/index.ts", + "key": ".github/actions/next-integration-stat/src/index.ts:dynamic_import:semver:semver:semver@214", + "kind": "dynamic_import", + "module": "semver", + "name": "semver", + "range": [ + 214, + 240, + 5, + 6, + 5, + 32 + ] + }, + { + "alias": "info", + "file": ".github/actions/next-repo-actions/src/bankrupt-issues.ts", + "key": ".github/actions/next-repo-actions/src/bankrupt-issues.ts:named_import:@actions/core:info:info@9", + "kind": "named_import", + "module": "@actions/core", + "name": "info", + "range": [ + 9, + 13, + 0, + 9, + 0, + 13 + ] + }, + { + "alias": "setFailed", + "file": ".github/actions/next-repo-actions/src/bankrupt-issues.ts", + "key": ".github/actions/next-repo-actions/src/bankrupt-issues.ts:named_import:@actions/core:setFailed:setFailed@15", + "kind": "named_import", + "module": "@actions/core", + "name": "setFailed", + "range": [ + 15, + 24, + 0, + 15, + 0, + 24 + ] + }, + { + "alias": "context", + "file": ".github/actions/next-repo-actions/src/bankrupt-issues.ts", + "key": ".github/actions/next-repo-actions/src/bankrupt-issues.ts:named_import:@actions/github:context:context@57", + "kind": "named_import", + "module": "@actions/github", + "name": "context", + "range": [ + 57, + 64, + 1, + 9, + 1, + 16 + ] + }, + { + "alias": "getOctokit", + "file": ".github/actions/next-repo-actions/src/bankrupt-issues.ts", + "key": ".github/actions/next-repo-actions/src/bankrupt-issues.ts:named_import:@actions/github:getOctokit:getOctokit@66", + "kind": "named_import", + "module": "@actions/github", + "name": "getOctokit", + "range": [ + 66, + 76, + 1, + 18, + 1, + 28 + ] + }, + { + "alias": "WebClient", + "file": ".github/actions/next-repo-actions/src/bankrupt-issues.ts", + "key": ".github/actions/next-repo-actions/src/bankrupt-issues.ts:named_import:@slack/web-api:WebClient:WebClient@111", + "kind": "named_import", + "module": "@slack/web-api", + "name": "WebClient", + "range": [ + 111, + 120, + 2, + 9, + 2, + 18 + ] + }, + { + "alias": "BlockCollection", + "file": ".github/actions/next-repo-actions/src/bankrupt-issues.ts", + "key": ".github/actions/next-repo-actions/src/bankrupt-issues.ts:named_import:slack-block-builder:BlockCollection:BlockCollection@154", + "kind": "named_import", + "module": "slack-block-builder", + "name": "BlockCollection", + "range": [ + 154, + 169, + 3, + 9, + 3, + 24 + ] + }, + { + "alias": "Section", + "file": ".github/actions/next-repo-actions/src/bankrupt-issues.ts", + "key": ".github/actions/next-repo-actions/src/bankrupt-issues.ts:named_import:slack-block-builder:Section:Section@171", + "kind": "named_import", + "module": "slack-block-builder", + "name": "Section", + "range": [ + 171, + 178, + 3, + 26, + 3, + 33 + ] + }, + { + "alias": "info", + "file": ".github/actions/next-repo-actions/src/issues-by-version.ts", + "key": ".github/actions/next-repo-actions/src/issues-by-version.ts:named_import:@actions/core:info:info@9", + "kind": "named_import", + "module": "@actions/core", + "name": "info", + "range": [ + 9, + 13, + 0, + 9, + 0, + 13 + ] + }, + { + "alias": "setFailed", + "file": ".github/actions/next-repo-actions/src/issues-by-version.ts", + "key": ".github/actions/next-repo-actions/src/issues-by-version.ts:named_import:@actions/core:setFailed:setFailed@15", + "kind": "named_import", + "module": "@actions/core", + "name": "setFailed", + "range": [ + 15, + 24, + 0, + 15, + 0, + 24 + ] + }, + { + "alias": "context", + "file": ".github/actions/next-repo-actions/src/issues-by-version.ts", + "key": ".github/actions/next-repo-actions/src/issues-by-version.ts:named_import:@actions/github:context:context@57", + "kind": "named_import", + "module": "@actions/github", + "name": "context", + "range": [ + 57, + 64, + 1, + 9, + 1, + 16 + ] + } + ], + "sha256": "701c5aeb3c9738bd1051ddb1c7a97afe67422e303b25dc50e4c2d028f739856f" + }, + "references": { + "count": 114464, + "samples": [ + { + "import": null, + "key": ".github/actions/needs-triage/src/index.ts:.github/actions/needs-triage/src/index.ts:global_variable:labelsRequireUserInput@301->.github/actions/needs-triage/src/index.ts:global_variable:LABELS@87::LABELS@330", + "name": "LABELS", + "range": [ + 330, + 336, + 11, + 2, + 11, + 8 + ], + "source_file": ".github/actions/needs-triage/src/index.ts", + "source_symbol": ".github/actions/needs-triage/src/index.ts:global_variable:labelsRequireUserInput@301", + "target_symbol": ".github/actions/needs-triage/src/index.ts:global_variable:LABELS@87" + }, + { + "import": null, + "key": ".github/actions/needs-triage/src/index.ts:.github/actions/needs-triage/src/index.ts:global_variable:labelsRequireUserInput@301->.github/actions/needs-triage/src/index.ts:global_variable:LABELS@87::LABELS@354", + "name": "LABELS", + "range": [ + 354, + 360, + 12, + 2, + 12, + 8 + ], + "source_file": ".github/actions/needs-triage/src/index.ts", + "source_symbol": ".github/actions/needs-triage/src/index.ts:global_variable:labelsRequireUserInput@301", + "target_symbol": ".github/actions/needs-triage/src/index.ts:global_variable:LABELS@87" + }, + { + "import": null, + "key": ".github/actions/needs-triage/src/index.ts:.github/actions/needs-triage/src/index.ts:global_variable:labelsRequireUserInput@301->.github/actions/needs-triage/src/index.ts:global_variable:LABELS@87::LABELS@381", + "name": "LABELS", + "range": [ + 381, + 387, + 13, + 2, + 13, + 8 + ], + "source_file": ".github/actions/needs-triage/src/index.ts", + "source_symbol": ".github/actions/needs-triage/src/index.ts:global_variable:labelsRequireUserInput@301", + "target_symbol": ".github/actions/needs-triage/src/index.ts:global_variable:LABELS@87" + }, + { + "import": null, + "key": ".github/actions/needs-triage/src/index.ts:.github/actions/needs-triage/src/index.ts:function:run@601->.github/actions/needs-triage/src/index.ts:function:assertNotNullable@423::assertNotNullable@706", + "name": "assertNotNullable", + "range": [ + 706, + 723, + 26, + 4, + 26, + 21 + ], + "source_file": ".github/actions/needs-triage/src/index.ts", + "source_symbol": ".github/actions/needs-triage/src/index.ts:function:run@601", + "target_symbol": ".github/actions/needs-triage/src/index.ts:function:assertNotNullable@423" + }, + { + "import": null, + "key": ".github/actions/needs-triage/src/index.ts:.github/actions/needs-triage/src/index.ts:function:run@601->.github/actions/needs-triage/src/index.ts:function:assertNotNullable@423::assertNotNullable@735", + "name": "assertNotNullable", + "range": [ + 735, + 752, + 27, + 4, + 27, + 21 + ], + "source_file": ".github/actions/needs-triage/src/index.ts", + "source_symbol": ".github/actions/needs-triage/src/index.ts:function:run@601", + "target_symbol": ".github/actions/needs-triage/src/index.ts:function:assertNotNullable@423" + }, + { + "import": null, + "key": ".github/actions/needs-triage/src/index.ts:.github/actions/needs-triage/src/index.ts:function:run@601->.github/actions/needs-triage/src/index.ts:global_variable:labelsRequireUserInput@301::labelsRequireUserInput@1027", + "name": "labelsRequireUserInput", + "range": [ + 1027, + 1049, + 36, + 8, + 36, + 30 + ], + "source_file": ".github/actions/needs-triage/src/index.ts", + "source_symbol": ".github/actions/needs-triage/src/index.ts:function:run@601", + "target_symbol": ".github/actions/needs-triage/src/index.ts:global_variable:labelsRequireUserInput@301" + }, + { + "import": null, + "key": ".github/actions/needs-triage/src/index.ts:.github/actions/needs-triage/src/index.ts:function:run@601->.github/actions/needs-triage/src/index.ts:global_variable:LABELS@87::LABELS@1218", + "name": "LABELS", + "range": [ + 1218, + 1224, + 40, + 19, + 40, + 25 + ], + "source_file": ".github/actions/needs-triage/src/index.ts", + "source_symbol": ".github/actions/needs-triage/src/index.ts:function:run@601", + "target_symbol": ".github/actions/needs-triage/src/index.ts:global_variable:LABELS@87" + }, + { + "import": null, + "key": ".github/actions/needs-triage/src/index.ts:->.github/actions/needs-triage/src/index.ts:function:run@601::run@1331", + "name": "run", + "range": [ + 1331, + 1334, + 49, + 0, + 49, + 3 + ], + "source_file": ".github/actions/needs-triage/src/index.ts", + "source_symbol": null, + "target_symbol": ".github/actions/needs-triage/src/index.ts:function:run@601" + }, + { + "import": null, + "key": ".github/actions/next-integration-stat/src/index.ts:.github/actions/next-integration-stat/src/index.ts:type_alias:Job@330->.github/actions/next-integration-stat/src/index.ts:type_alias:Octokit@284::Octokit@358", + "name": "Octokit", + "range": [ + 358, + 365, + 12, + 13, + 12, + 20 + ], + "source_file": ".github/actions/next-integration-stat/src/index.ts", + "source_symbol": ".github/actions/next-integration-stat/src/index.ts:type_alias:Job@330", + "target_symbol": ".github/actions/next-integration-stat/src/index.ts:type_alias:Octokit@284" + }, + { + "import": null, + "key": ".github/actions/next-integration-stat/src/index.ts:.github/actions/next-integration-stat/src/index.ts:function:findNextJsVersionFromBuildLogs@709->.github/actions/next-integration-stat/src/index.ts:type_alias:Octokit@284::Octokit@752", + "name": "Octokit", + "range": [ + 752, + 759, + 21, + 11, + 21, + 18 + ], + "source_file": ".github/actions/next-integration-stat/src/index.ts", + "source_symbol": ".github/actions/next-integration-stat/src/index.ts:function:findNextJsVersionFromBuildLogs@709", + "target_symbol": ".github/actions/next-integration-stat/src/index.ts:type_alias:Octokit@284" + }, + { + "import": null, + "key": ".github/actions/next-integration-stat/src/index.ts:.github/actions/next-integration-stat/src/index.ts:function:findNextJsVersionFromBuildLogs@709->.github/actions/next-integration-stat/src/index.ts:type_alias:Job@330::Job@785", + "name": "Job", + "range": [ + 785, + 788, + 23, + 7, + 23, + 10 + ], + "source_file": ".github/actions/next-integration-stat/src/index.ts", + "source_symbol": ".github/actions/next-integration-stat/src/index.ts:function:findNextJsVersionFromBuildLogs@709", + "target_symbol": ".github/actions/next-integration-stat/src/index.ts:type_alias:Job@330" + }, + { + "import": null, + "key": ".github/actions/next-integration-stat/src/index.ts:.github/actions/next-integration-stat/src/index.ts:function:fetchJobLogsFromWorkflow@2452->.github/actions/next-integration-stat/src/index.ts:type_alias:Octokit@284::Octokit@2489", + "name": "Octokit", + "range": [ + 2489, + 2496, + 79, + 11, + 79, + 18 + ], + "source_file": ".github/actions/next-integration-stat/src/index.ts", + "source_symbol": ".github/actions/next-integration-stat/src/index.ts:function:fetchJobLogsFromWorkflow@2452", + "target_symbol": ".github/actions/next-integration-stat/src/index.ts:type_alias:Octokit@284" + }, + { + "import": null, + "key": ".github/actions/next-integration-stat/src/index.ts:.github/actions/next-integration-stat/src/index.ts:function:fetchJobLogsFromWorkflow@2452->.github/actions/next-integration-stat/src/index.ts:type_alias:Job@330::Job@2522", + "name": "Job", + "range": [ + 2522, + 2525, + 81, + 7, + 81, + 10 + ], + "source_file": ".github/actions/next-integration-stat/src/index.ts", + "source_symbol": ".github/actions/next-integration-stat/src/index.ts:function:fetchJobLogsFromWorkflow@2452", + "target_symbol": ".github/actions/next-integration-stat/src/index.ts:type_alias:Job@330" + }, + { + "import": null, + "key": ".github/actions/next-integration-stat/src/index.ts:.github/actions/next-integration-stat/src/index.ts:function:fetchJobLogsFromWorkflow@2452->.github/actions/next-integration-stat/src/index.ts:type_alias:Job@330::Job@2558", + "name": "Job", + "range": [ + 2558, + 2561, + 82, + 32, + 82, + 35 + ], + "source_file": ".github/actions/next-integration-stat/src/index.ts", + "source_symbol": ".github/actions/next-integration-stat/src/index.ts:function:fetchJobLogsFromWorkflow@2452", + "target_symbol": ".github/actions/next-integration-stat/src/index.ts:type_alias:Job@330" + }, + { + "import": null, + "key": ".github/actions/next-integration-stat/src/index.ts:.github/actions/next-integration-stat/src/index.ts:function:getInputs@3962->.github/actions/next-integration-stat/src/index.ts:type_alias:Octokit@284::Octokit@4042", + "name": "Octokit", + "range": [ + 4042, + 4049, + 133, + 11, + 133, + 18 + ], + "source_file": ".github/actions/next-integration-stat/src/index.ts", + "source_symbol": ".github/actions/next-integration-stat/src/index.ts:function:getInputs@3962", + "target_symbol": ".github/actions/next-integration-stat/src/index.ts:type_alias:Octokit@284" + }, + { + "import": null, + "key": ".github/actions/next-integration-stat/src/index.ts:.github/actions/next-integration-stat/src/index.ts:function:getInputs@3962->.github/actions/next-integration-stat/src/index.ts:global_variable:BOT_COMMENT_MARKER@513::BOT_COMMENT_MARKER@5798", + "name": "BOT_COMMENT_MARKER", + "range": [ + 5798, + 5816, + 184, + 32, + 184, + 50 + ], + "source_file": ".github/actions/next-integration-stat/src/index.ts", + "source_symbol": ".github/actions/next-integration-stat/src/index.ts:function:getInputs@3962", + "target_symbol": ".github/actions/next-integration-stat/src/index.ts:global_variable:BOT_COMMENT_MARKER@513" + }, + { + "import": null, + "key": ".github/actions/next-integration-stat/src/index.ts:.github/actions/next-integration-stat/src/index.ts:function:getJobResults@6657->.github/actions/next-integration-stat/src/index.ts:type_alias:Octokit@284::Octokit@6683", + "name": "Octokit", + "range": [ + 6683, + 6690, + 221, + 11, + 221, + 18 + ], + "source_file": ".github/actions/next-integration-stat/src/index.ts", + "source_symbol": ".github/actions/next-integration-stat/src/index.ts:function:getJobResults@6657", + "target_symbol": ".github/actions/next-integration-stat/src/index.ts:type_alias:Octokit@284" + }, + { + "import": null, + "key": ".github/actions/next-integration-stat/src/index.ts:.github/actions/next-integration-stat/src/index.ts:function:getJobResults@6657->.github/actions/next-integration-stat/src/index.ts:function:findNextJsVersionFromBuildLogs@709::findNextJsVersionFromBuildLogs@7296", + "name": "findNextJsVersionFromBuildLogs", + "range": [ + 7296, + 7326, + 241, + 30, + 241, + 60 + ], + "source_file": ".github/actions/next-integration-stat/src/index.ts", + "source_symbol": ".github/actions/next-integration-stat/src/index.ts:function:getJobResults@6657", + "target_symbol": ".github/actions/next-integration-stat/src/index.ts:function:findNextJsVersionFromBuildLogs@709" + }, + { + "import": null, + "key": ".github/actions/next-integration-stat/src/index.ts:.github/actions/next-integration-stat/src/index.ts:function:getJobResults@6657->.github/actions/next-integration-stat/src/index.ts:function:fetchJobLogsFromWorkflow@2452::fetchJobLogsFromWorkflow@7582", + "name": "fetchJobLogsFromWorkflow", + "range": [ + 7582, + 7606, + 252, + 10, + 252, + 34 + ], + "source_file": ".github/actions/next-integration-stat/src/index.ts", + "source_symbol": ".github/actions/next-integration-stat/src/index.ts:function:getJobResults@6657", + "target_symbol": ".github/actions/next-integration-stat/src/index.ts:function:fetchJobLogsFromWorkflow@2452" + }, + { + "import": null, + "key": ".github/actions/next-integration-stat/src/index.ts:.github/actions/next-integration-stat/src/index.ts:function:getJobResults@6657->.github/actions/next-integration-stat/src/index.ts:function:fetchJobLogsFromWorkflow@2452::fetchJobLogsFromWorkflow@8747", + "name": "fetchJobLogsFromWorkflow", + "range": [ + 8747, + 8771, + 284, + 6, + 284, + 30 + ], + "source_file": ".github/actions/next-integration-stat/src/index.ts", + "source_symbol": ".github/actions/next-integration-stat/src/index.ts:function:getJobResults@6657", + "target_symbol": ".github/actions/next-integration-stat/src/index.ts:function:fetchJobLogsFromWorkflow@2452" + } + ], + "sha256": "63de55ea49061819ec173bade3316163bd113dd6e04e408bab70436738804769" + }, + "subclass_edges": { + "count": 160, + "samples": [ + { + "key": ".github/actions/validate-docs-links/dist/index.js:global_variable:__webpack_modules__@74->.github/actions/validate-docs-links/dist/index.js:global_variable:a@261187:.github/actions/validate-docs-links/dist/index.js:.github/actions/validate-docs-links/dist/index.js:global_variable:__webpack_modules__@74->.github/actions/validate-docs-links/dist/index.js:global_variable:a@261187::a@261235", + "reference": ".github/actions/validate-docs-links/dist/index.js:.github/actions/validate-docs-links/dist/index.js:global_variable:__webpack_modules__@74->.github/actions/validate-docs-links/dist/index.js:global_variable:a@261187::a@261235", + "source_file": ".github/actions/validate-docs-links/dist/index.js", + "source_symbol": ".github/actions/validate-docs-links/dist/index.js:global_variable:__webpack_modules__@74", + "target_file": ".github/actions/validate-docs-links/dist/index.js", + "target_symbol": ".github/actions/validate-docs-links/dist/index.js:global_variable:a@261187" + }, + { + "key": ".github/actions/validate-docs-links/dist/index.js:global_variable:__webpack_modules__@74->.github/actions/validate-docs-links/dist/index.js:global_variable:a@261741:.github/actions/validate-docs-links/dist/index.js:.github/actions/validate-docs-links/dist/index.js:global_variable:__webpack_modules__@74->.github/actions/validate-docs-links/dist/index.js:global_variable:a@261741::a@261839", + "reference": ".github/actions/validate-docs-links/dist/index.js:.github/actions/validate-docs-links/dist/index.js:global_variable:__webpack_modules__@74->.github/actions/validate-docs-links/dist/index.js:global_variable:a@261741::a@261839", + "source_file": ".github/actions/validate-docs-links/dist/index.js", + "source_symbol": ".github/actions/validate-docs-links/dist/index.js:global_variable:__webpack_modules__@74", + "target_file": ".github/actions/validate-docs-links/dist/index.js", + "target_symbol": ".github/actions/validate-docs-links/dist/index.js:global_variable:a@261741" + }, + { + "key": ".github/actions/validate-docs-links/dist/index.js:global_variable:__webpack_modules__@74->.github/actions/validate-docs-links/dist/index.js:global_variable:a@262615:.github/actions/validate-docs-links/dist/index.js:.github/actions/validate-docs-links/dist/index.js:global_variable:__webpack_modules__@74->.github/actions/validate-docs-links/dist/index.js:global_variable:a@262615::a@262703", + "reference": ".github/actions/validate-docs-links/dist/index.js:.github/actions/validate-docs-links/dist/index.js:global_variable:__webpack_modules__@74->.github/actions/validate-docs-links/dist/index.js:global_variable:a@262615::a@262703", + "source_file": ".github/actions/validate-docs-links/dist/index.js", + "source_symbol": ".github/actions/validate-docs-links/dist/index.js:global_variable:__webpack_modules__@74", + "target_file": ".github/actions/validate-docs-links/dist/index.js", + "target_symbol": ".github/actions/validate-docs-links/dist/index.js:global_variable:a@262615" + }, + { + "key": ".github/actions/validate-docs-links/dist/index.js:global_variable:__webpack_modules__@74->.github/actions/validate-docs-links/dist/index.js:global_variable:a@262980:.github/actions/validate-docs-links/dist/index.js:.github/actions/validate-docs-links/dist/index.js:global_variable:__webpack_modules__@74->.github/actions/validate-docs-links/dist/index.js:global_variable:a@262980::a@263065", + "reference": ".github/actions/validate-docs-links/dist/index.js:.github/actions/validate-docs-links/dist/index.js:global_variable:__webpack_modules__@74->.github/actions/validate-docs-links/dist/index.js:global_variable:a@262980::a@263065", + "source_file": ".github/actions/validate-docs-links/dist/index.js", + "source_symbol": ".github/actions/validate-docs-links/dist/index.js:global_variable:__webpack_modules__@74", + "target_file": ".github/actions/validate-docs-links/dist/index.js", + "target_symbol": ".github/actions/validate-docs-links/dist/index.js:global_variable:a@262980" + }, + { + "key": ".github/actions/validate-docs-links/dist/index.js:global_variable:__webpack_modules__@74->.github/actions/validate-docs-links/dist/index.js:global_variable:a@263224:.github/actions/validate-docs-links/dist/index.js:.github/actions/validate-docs-links/dist/index.js:global_variable:__webpack_modules__@74->.github/actions/validate-docs-links/dist/index.js:global_variable:a@263224::a@263282", + "reference": ".github/actions/validate-docs-links/dist/index.js:.github/actions/validate-docs-links/dist/index.js:global_variable:__webpack_modules__@74->.github/actions/validate-docs-links/dist/index.js:global_variable:a@263224::a@263282", + "source_file": ".github/actions/validate-docs-links/dist/index.js", + "source_symbol": ".github/actions/validate-docs-links/dist/index.js:global_variable:__webpack_modules__@74", + "target_file": ".github/actions/validate-docs-links/dist/index.js", + "target_symbol": ".github/actions/validate-docs-links/dist/index.js:global_variable:a@263224" + }, + { + "key": ".github/actions/validate-docs-links/dist/index.js:global_variable:__webpack_modules__@74->.github/actions/validate-docs-links/dist/index.js:global_variable:a@263668:.github/actions/validate-docs-links/dist/index.js:.github/actions/validate-docs-links/dist/index.js:global_variable:__webpack_modules__@74->.github/actions/validate-docs-links/dist/index.js:global_variable:a@263668::a@263800", + "reference": ".github/actions/validate-docs-links/dist/index.js:.github/actions/validate-docs-links/dist/index.js:global_variable:__webpack_modules__@74->.github/actions/validate-docs-links/dist/index.js:global_variable:a@263668::a@263800", + "source_file": ".github/actions/validate-docs-links/dist/index.js", + "source_symbol": ".github/actions/validate-docs-links/dist/index.js:global_variable:__webpack_modules__@74", + "target_file": ".github/actions/validate-docs-links/dist/index.js", + "target_symbol": ".github/actions/validate-docs-links/dist/index.js:global_variable:a@263668" + }, + { + "key": ".github/actions/validate-docs-links/dist/index.js:global_variable:__webpack_modules__@74->.github/actions/validate-docs-links/dist/index.js:global_variable:a@267323:.github/actions/validate-docs-links/dist/index.js:.github/actions/validate-docs-links/dist/index.js:global_variable:__webpack_modules__@74->.github/actions/validate-docs-links/dist/index.js:global_variable:a@267323::a@267406", + "reference": ".github/actions/validate-docs-links/dist/index.js:.github/actions/validate-docs-links/dist/index.js:global_variable:__webpack_modules__@74->.github/actions/validate-docs-links/dist/index.js:global_variable:a@267323::a@267406", + "source_file": ".github/actions/validate-docs-links/dist/index.js", + "source_symbol": ".github/actions/validate-docs-links/dist/index.js:global_variable:__webpack_modules__@74", + "target_file": ".github/actions/validate-docs-links/dist/index.js", + "target_symbol": ".github/actions/validate-docs-links/dist/index.js:global_variable:a@267323" + }, + { + "key": ".github/actions/validate-docs-links/dist/index.js:global_variable:__webpack_modules__@74->.github/actions/validate-docs-links/dist/index.js:global_variable:a@269883:.github/actions/validate-docs-links/dist/index.js:.github/actions/validate-docs-links/dist/index.js:global_variable:__webpack_modules__@74->.github/actions/validate-docs-links/dist/index.js:global_variable:a@269883::a@269941", + "reference": ".github/actions/validate-docs-links/dist/index.js:.github/actions/validate-docs-links/dist/index.js:global_variable:__webpack_modules__@74->.github/actions/validate-docs-links/dist/index.js:global_variable:a@269883::a@269941", + "source_file": ".github/actions/validate-docs-links/dist/index.js", + "source_symbol": ".github/actions/validate-docs-links/dist/index.js:global_variable:__webpack_modules__@74", + "target_file": ".github/actions/validate-docs-links/dist/index.js", + "target_symbol": ".github/actions/validate-docs-links/dist/index.js:global_variable:a@269883" + }, + { + "key": "examples/cms-sitecore-xmcloud/scripts/config/plugins/computed.ts:class:ComputedPlugin@220->examples/cms-sitecore-xmcloud/scripts/config/index.ts:interface:ConfigPlugin@414:examples/cms-sitecore-xmcloud/scripts/config/plugins/computed.ts:examples/cms-sitecore-xmcloud/scripts/config/plugins/computed.ts:class:ComputedPlugin@220->examples/cms-sitecore-xmcloud/scripts/config/index.ts:interface:ConfigPlugin@414:examples/cms-sitecore-xmcloud/scripts/config/plugins/computed.ts:named_import:..:ConfigPlugin:ConfigPlugin@9:ConfigPlugin@246", + "reference": "examples/cms-sitecore-xmcloud/scripts/config/plugins/computed.ts:examples/cms-sitecore-xmcloud/scripts/config/plugins/computed.ts:class:ComputedPlugin@220->examples/cms-sitecore-xmcloud/scripts/config/index.ts:interface:ConfigPlugin@414:examples/cms-sitecore-xmcloud/scripts/config/plugins/computed.ts:named_import:..:ConfigPlugin:ConfigPlugin@9:ConfigPlugin@246", + "source_file": "examples/cms-sitecore-xmcloud/scripts/config/plugins/computed.ts", + "source_symbol": "examples/cms-sitecore-xmcloud/scripts/config/plugins/computed.ts:class:ComputedPlugin@220", + "target_file": "examples/cms-sitecore-xmcloud/scripts/config/index.ts", + "target_symbol": "examples/cms-sitecore-xmcloud/scripts/config/index.ts:interface:ConfigPlugin@414" + }, + { + "key": "examples/cms-sitecore-xmcloud/scripts/config/plugins/fallback.ts:class:FallbackPlugin@228->examples/cms-sitecore-xmcloud/scripts/config/index.ts:interface:ConfigPlugin@414:examples/cms-sitecore-xmcloud/scripts/config/plugins/fallback.ts:examples/cms-sitecore-xmcloud/scripts/config/plugins/fallback.ts:class:FallbackPlugin@228->examples/cms-sitecore-xmcloud/scripts/config/index.ts:interface:ConfigPlugin@414:examples/cms-sitecore-xmcloud/scripts/config/plugins/fallback.ts:named_import:..:ConfigPlugin:ConfigPlugin@9:ConfigPlugin@254", + "reference": "examples/cms-sitecore-xmcloud/scripts/config/plugins/fallback.ts:examples/cms-sitecore-xmcloud/scripts/config/plugins/fallback.ts:class:FallbackPlugin@228->examples/cms-sitecore-xmcloud/scripts/config/index.ts:interface:ConfigPlugin@414:examples/cms-sitecore-xmcloud/scripts/config/plugins/fallback.ts:named_import:..:ConfigPlugin:ConfigPlugin@9:ConfigPlugin@254", + "source_file": "examples/cms-sitecore-xmcloud/scripts/config/plugins/fallback.ts", + "source_symbol": "examples/cms-sitecore-xmcloud/scripts/config/plugins/fallback.ts:class:FallbackPlugin@228", + "target_file": "examples/cms-sitecore-xmcloud/scripts/config/index.ts", + "target_symbol": "examples/cms-sitecore-xmcloud/scripts/config/index.ts:interface:ConfigPlugin@414" + }, + { + "key": "examples/cms-sitecore-xmcloud/scripts/config/plugins/package-json.ts:class:PackageJsonPlugin@163->examples/cms-sitecore-xmcloud/scripts/config/index.ts:interface:ConfigPlugin@414:examples/cms-sitecore-xmcloud/scripts/config/plugins/package-json.ts:examples/cms-sitecore-xmcloud/scripts/config/plugins/package-json.ts:class:PackageJsonPlugin@163->examples/cms-sitecore-xmcloud/scripts/config/index.ts:interface:ConfigPlugin@414:examples/cms-sitecore-xmcloud/scripts/config/plugins/package-json.ts:named_import:..:ConfigPlugin:ConfigPlugin@9:ConfigPlugin@192", + "reference": "examples/cms-sitecore-xmcloud/scripts/config/plugins/package-json.ts:examples/cms-sitecore-xmcloud/scripts/config/plugins/package-json.ts:class:PackageJsonPlugin@163->examples/cms-sitecore-xmcloud/scripts/config/index.ts:interface:ConfigPlugin@414:examples/cms-sitecore-xmcloud/scripts/config/plugins/package-json.ts:named_import:..:ConfigPlugin:ConfigPlugin@9:ConfigPlugin@192", + "source_file": "examples/cms-sitecore-xmcloud/scripts/config/plugins/package-json.ts", + "source_symbol": "examples/cms-sitecore-xmcloud/scripts/config/plugins/package-json.ts:class:PackageJsonPlugin@163", + "target_file": "examples/cms-sitecore-xmcloud/scripts/config/index.ts", + "target_symbol": "examples/cms-sitecore-xmcloud/scripts/config/index.ts:interface:ConfigPlugin@414" + }, + { + "key": "examples/cms-sitecore-xmcloud/scripts/config/plugins/scjssconfig.ts:class:ScJssConfigPlugin@272->examples/cms-sitecore-xmcloud/scripts/config/index.ts:interface:ConfigPlugin@414:examples/cms-sitecore-xmcloud/scripts/config/plugins/scjssconfig.ts:examples/cms-sitecore-xmcloud/scripts/config/plugins/scjssconfig.ts:class:ScJssConfigPlugin@272->examples/cms-sitecore-xmcloud/scripts/config/index.ts:interface:ConfigPlugin@414:examples/cms-sitecore-xmcloud/scripts/config/plugins/scjssconfig.ts:named_import:..:ConfigPlugin:ConfigPlugin@9:ConfigPlugin@301", + "reference": "examples/cms-sitecore-xmcloud/scripts/config/plugins/scjssconfig.ts:examples/cms-sitecore-xmcloud/scripts/config/plugins/scjssconfig.ts:class:ScJssConfigPlugin@272->examples/cms-sitecore-xmcloud/scripts/config/index.ts:interface:ConfigPlugin@414:examples/cms-sitecore-xmcloud/scripts/config/plugins/scjssconfig.ts:named_import:..:ConfigPlugin:ConfigPlugin@9:ConfigPlugin@301", + "source_file": "examples/cms-sitecore-xmcloud/scripts/config/plugins/scjssconfig.ts", + "source_symbol": "examples/cms-sitecore-xmcloud/scripts/config/plugins/scjssconfig.ts:class:ScJssConfigPlugin@272", + "target_file": "examples/cms-sitecore-xmcloud/scripts/config/index.ts", + "target_symbol": "examples/cms-sitecore-xmcloud/scripts/config/index.ts:interface:ConfigPlugin@414" + }, + { + "key": "examples/cms-sitecore-xmcloud/src/lib/page-props-factory/plugins/component-props.ts:class:ComponentPropsPlugin@319->examples/cms-sitecore-xmcloud/src/lib/page-props-factory/index.ts:interface:Plugin@603:examples/cms-sitecore-xmcloud/src/lib/page-props-factory/plugins/component-props.ts:examples/cms-sitecore-xmcloud/src/lib/page-props-factory/plugins/component-props.ts:class:ComponentPropsPlugin@319->examples/cms-sitecore-xmcloud/src/lib/page-props-factory/index.ts:interface:Plugin@603:examples/cms-sitecore-xmcloud/src/lib/page-props-factory/plugins/component-props.ts:named_import:..:Plugin:Plugin@266:Plugin@351", + "reference": "examples/cms-sitecore-xmcloud/src/lib/page-props-factory/plugins/component-props.ts:examples/cms-sitecore-xmcloud/src/lib/page-props-factory/plugins/component-props.ts:class:ComponentPropsPlugin@319->examples/cms-sitecore-xmcloud/src/lib/page-props-factory/index.ts:interface:Plugin@603:examples/cms-sitecore-xmcloud/src/lib/page-props-factory/plugins/component-props.ts:named_import:..:Plugin:Plugin@266:Plugin@351", + "source_file": "examples/cms-sitecore-xmcloud/src/lib/page-props-factory/plugins/component-props.ts", + "source_symbol": "examples/cms-sitecore-xmcloud/src/lib/page-props-factory/plugins/component-props.ts:class:ComponentPropsPlugin@319", + "target_file": "examples/cms-sitecore-xmcloud/src/lib/page-props-factory/index.ts", + "target_symbol": "examples/cms-sitecore-xmcloud/src/lib/page-props-factory/index.ts:interface:Plugin@603" + }, + { + "key": "examples/cms-sitecore-xmcloud/src/lib/page-props-factory/plugins/normal-mode.ts:class:NormalModePlugin@470->examples/cms-sitecore-xmcloud/src/lib/page-props-factory/index.ts:interface:Plugin@603:examples/cms-sitecore-xmcloud/src/lib/page-props-factory/plugins/normal-mode.ts:examples/cms-sitecore-xmcloud/src/lib/page-props-factory/plugins/normal-mode.ts:class:NormalModePlugin@470->examples/cms-sitecore-xmcloud/src/lib/page-props-factory/index.ts:interface:Plugin@603:examples/cms-sitecore-xmcloud/src/lib/page-props-factory/plugins/normal-mode.ts:named_import:..:Plugin:Plugin@417:Plugin@498", + "reference": "examples/cms-sitecore-xmcloud/src/lib/page-props-factory/plugins/normal-mode.ts:examples/cms-sitecore-xmcloud/src/lib/page-props-factory/plugins/normal-mode.ts:class:NormalModePlugin@470->examples/cms-sitecore-xmcloud/src/lib/page-props-factory/index.ts:interface:Plugin@603:examples/cms-sitecore-xmcloud/src/lib/page-props-factory/plugins/normal-mode.ts:named_import:..:Plugin:Plugin@417:Plugin@498", + "source_file": "examples/cms-sitecore-xmcloud/src/lib/page-props-factory/plugins/normal-mode.ts", + "source_symbol": "examples/cms-sitecore-xmcloud/src/lib/page-props-factory/plugins/normal-mode.ts:class:NormalModePlugin@470", + "target_file": "examples/cms-sitecore-xmcloud/src/lib/page-props-factory/index.ts", + "target_symbol": "examples/cms-sitecore-xmcloud/src/lib/page-props-factory/index.ts:interface:Plugin@603" + }, + { + "key": "examples/cms-sitecore-xmcloud/src/lib/page-props-factory/plugins/preview-mode.ts:class:PreviewModePlugin@303->examples/cms-sitecore-xmcloud/src/lib/page-props-factory/index.ts:interface:Plugin@603:examples/cms-sitecore-xmcloud/src/lib/page-props-factory/plugins/preview-mode.ts:examples/cms-sitecore-xmcloud/src/lib/page-props-factory/plugins/preview-mode.ts:class:PreviewModePlugin@303->examples/cms-sitecore-xmcloud/src/lib/page-props-factory/index.ts:interface:Plugin@603:examples/cms-sitecore-xmcloud/src/lib/page-props-factory/plugins/preview-mode.ts:named_import:..:Plugin:Plugin@276:Plugin@332", + "reference": "examples/cms-sitecore-xmcloud/src/lib/page-props-factory/plugins/preview-mode.ts:examples/cms-sitecore-xmcloud/src/lib/page-props-factory/plugins/preview-mode.ts:class:PreviewModePlugin@303->examples/cms-sitecore-xmcloud/src/lib/page-props-factory/index.ts:interface:Plugin@603:examples/cms-sitecore-xmcloud/src/lib/page-props-factory/plugins/preview-mode.ts:named_import:..:Plugin:Plugin@276:Plugin@332", + "source_file": "examples/cms-sitecore-xmcloud/src/lib/page-props-factory/plugins/preview-mode.ts", + "source_symbol": "examples/cms-sitecore-xmcloud/src/lib/page-props-factory/plugins/preview-mode.ts:class:PreviewModePlugin@303", + "target_file": "examples/cms-sitecore-xmcloud/src/lib/page-props-factory/index.ts", + "target_symbol": "examples/cms-sitecore-xmcloud/src/lib/page-props-factory/index.ts:interface:Plugin@603" + }, + { + "key": "examples/cms-sitecore-xmcloud/src/lib/page-props-factory/plugins/site.ts:class:SitePlugin@245->examples/cms-sitecore-xmcloud/src/lib/page-props-factory/index.ts:interface:Plugin@603:examples/cms-sitecore-xmcloud/src/lib/page-props-factory/plugins/site.ts:examples/cms-sitecore-xmcloud/src/lib/page-props-factory/plugins/site.ts:class:SitePlugin@245->examples/cms-sitecore-xmcloud/src/lib/page-props-factory/index.ts:interface:Plugin@603:examples/cms-sitecore-xmcloud/src/lib/page-props-factory/plugins/site.ts:named_import:..:Plugin:Plugin@134:Plugin@267", + "reference": "examples/cms-sitecore-xmcloud/src/lib/page-props-factory/plugins/site.ts:examples/cms-sitecore-xmcloud/src/lib/page-props-factory/plugins/site.ts:class:SitePlugin@245->examples/cms-sitecore-xmcloud/src/lib/page-props-factory/index.ts:interface:Plugin@603:examples/cms-sitecore-xmcloud/src/lib/page-props-factory/plugins/site.ts:named_import:..:Plugin:Plugin@134:Plugin@267", + "source_file": "examples/cms-sitecore-xmcloud/src/lib/page-props-factory/plugins/site.ts", + "source_symbol": "examples/cms-sitecore-xmcloud/src/lib/page-props-factory/plugins/site.ts:class:SitePlugin@245", + "target_file": "examples/cms-sitecore-xmcloud/src/lib/page-props-factory/index.ts", + "target_symbol": "examples/cms-sitecore-xmcloud/src/lib/page-props-factory/index.ts:interface:Plugin@603" + }, + { + "key": "examples/cms-sitecore-xmcloud/src/lib/site-resolver/plugins/default.ts:class:DefaultPlugin@155->examples/cms-sitecore-xmcloud/src/lib/site-resolver/index.ts:interface:SiteResolverPlugin@489:examples/cms-sitecore-xmcloud/src/lib/site-resolver/plugins/default.ts:examples/cms-sitecore-xmcloud/src/lib/site-resolver/plugins/default.ts:class:DefaultPlugin@155->examples/cms-sitecore-xmcloud/src/lib/site-resolver/index.ts:interface:SiteResolverPlugin@489:examples/cms-sitecore-xmcloud/src/lib/site-resolver/plugins/default.ts:named_import:..:SiteResolverPlugin:SiteResolverPlugin@116:SiteResolverPlugin@180", + "reference": "examples/cms-sitecore-xmcloud/src/lib/site-resolver/plugins/default.ts:examples/cms-sitecore-xmcloud/src/lib/site-resolver/plugins/default.ts:class:DefaultPlugin@155->examples/cms-sitecore-xmcloud/src/lib/site-resolver/index.ts:interface:SiteResolverPlugin@489:examples/cms-sitecore-xmcloud/src/lib/site-resolver/plugins/default.ts:named_import:..:SiteResolverPlugin:SiteResolverPlugin@116:SiteResolverPlugin@180", + "source_file": "examples/cms-sitecore-xmcloud/src/lib/site-resolver/plugins/default.ts", + "source_symbol": "examples/cms-sitecore-xmcloud/src/lib/site-resolver/plugins/default.ts:class:DefaultPlugin@155", + "target_file": "examples/cms-sitecore-xmcloud/src/lib/site-resolver/index.ts", + "target_symbol": "examples/cms-sitecore-xmcloud/src/lib/site-resolver/index.ts:interface:SiteResolverPlugin@489" + }, + { + "key": "examples/cms-sitecore-xmcloud/src/lib/sitemap-fetcher/plugins/graphql-sitemap-service.ts:class:GraphqlSitemapServicePlugin@235->examples/cms-sitecore-xmcloud/src/lib/sitemap-fetcher/index.ts:interface:SitemapFetcherPlugin@185:examples/cms-sitecore-xmcloud/src/lib/sitemap-fetcher/plugins/graphql-sitemap-service.ts:examples/cms-sitecore-xmcloud/src/lib/sitemap-fetcher/plugins/graphql-sitemap-service.ts:class:GraphqlSitemapServicePlugin@235->examples/cms-sitecore-xmcloud/src/lib/sitemap-fetcher/index.ts:interface:SitemapFetcherPlugin@185:examples/cms-sitecore-xmcloud/src/lib/sitemap-fetcher/plugins/graphql-sitemap-service.ts:named_import:..:SitemapFetcherPlugin:SitemapFetcherPlugin@148:SitemapFetcherPlugin@274", + "reference": "examples/cms-sitecore-xmcloud/src/lib/sitemap-fetcher/plugins/graphql-sitemap-service.ts:examples/cms-sitecore-xmcloud/src/lib/sitemap-fetcher/plugins/graphql-sitemap-service.ts:class:GraphqlSitemapServicePlugin@235->examples/cms-sitecore-xmcloud/src/lib/sitemap-fetcher/index.ts:interface:SitemapFetcherPlugin@185:examples/cms-sitecore-xmcloud/src/lib/sitemap-fetcher/plugins/graphql-sitemap-service.ts:named_import:..:SitemapFetcherPlugin:SitemapFetcherPlugin@148:SitemapFetcherPlugin@274", + "source_file": "examples/cms-sitecore-xmcloud/src/lib/sitemap-fetcher/plugins/graphql-sitemap-service.ts", + "source_symbol": "examples/cms-sitecore-xmcloud/src/lib/sitemap-fetcher/plugins/graphql-sitemap-service.ts:class:GraphqlSitemapServicePlugin@235", + "target_file": "examples/cms-sitecore-xmcloud/src/lib/sitemap-fetcher/index.ts", + "target_symbol": "examples/cms-sitecore-xmcloud/src/lib/sitemap-fetcher/index.ts:interface:SitemapFetcherPlugin@185" + }, + { + "key": "examples/with-xata/utils/xata.codegen.ts:class:XataClient@673->examples/with-xata/utils/xata.codegen.ts:global_variable:DatabaseClient@599:examples/with-xata/utils/xata.codegen.ts:examples/with-xata/utils/xata.codegen.ts:class:XataClient@673->examples/with-xata/utils/xata.codegen.ts:global_variable:DatabaseClient@599::DatabaseClient@692", + "reference": "examples/with-xata/utils/xata.codegen.ts:examples/with-xata/utils/xata.codegen.ts:class:XataClient@673->examples/with-xata/utils/xata.codegen.ts:global_variable:DatabaseClient@599::DatabaseClient@692", + "source_file": "examples/with-xata/utils/xata.codegen.ts", + "source_symbol": "examples/with-xata/utils/xata.codegen.ts:class:XataClient@673", + "target_file": "examples/with-xata/utils/xata.codegen.ts", + "target_symbol": "examples/with-xata/utils/xata.codegen.ts:global_variable:DatabaseClient@599" + }, + { + "key": "packages/next/src/build/swc/index.ts:function:bindingToApi@13558->packages/next/src/build/swc/types.ts:interface:Endpoint@5583:packages/next/src/build/swc/index.ts:packages/next/src/build/swc/index.ts:function:bindingToApi@13558->packages/next/src/build/swc/types.ts:interface:Endpoint@5583:packages/next/src/build/swc/index.ts:named_import:./types:Endpoint:Endpoint@1178:Endpoint@24134", + "reference": "packages/next/src/build/swc/index.ts:packages/next/src/build/swc/index.ts:function:bindingToApi@13558->packages/next/src/build/swc/types.ts:interface:Endpoint@5583:packages/next/src/build/swc/index.ts:named_import:./types:Endpoint:Endpoint@1178:Endpoint@24134", + "source_file": "packages/next/src/build/swc/index.ts", + "source_symbol": "packages/next/src/build/swc/index.ts:function:bindingToApi@13558", + "target_file": "packages/next/src/build/swc/types.ts", + "target_symbol": "packages/next/src/build/swc/types.ts:interface:Endpoint@5583" + } + ], + "sha256": "b55a1570991c33ad18f55ba9f3e56595d759091893a685e8ada69598b45ac8a7" + }, + "symbols": { + "count": 44871, + "samples": [ + { + "file": ".github/actions/needs-triage/src/index.ts", + "is_top_level": true, + "key": ".github/actions/needs-triage/src/index.ts:function:assertNotNullable@423", + "kind": "function", + "name": "assertNotNullable", + "name_range": [ + 423, + 440, + 16, + 9, + 16, + 26 + ], + "parent_symbol": null, + "range": [ + 414, + 584, + 16, + 0, + 19, + 1 + ] + }, + { + "file": ".github/actions/needs-triage/src/index.ts", + "is_top_level": true, + "key": ".github/actions/needs-triage/src/index.ts:function:run@601", + "kind": "function", + "name": "run", + "name_range": [ + 601, + 604, + 21, + 15, + 21, + 18 + ], + "parent_symbol": null, + "range": [ + 586, + 1329, + 21, + 0, + 47, + 1 + ] + }, + { + "file": ".github/actions/needs-triage/src/index.ts", + "is_top_level": true, + "key": ".github/actions/needs-triage/src/index.ts:global_variable:LABELS@87", + "kind": "global_variable", + "name": "LABELS", + "name_range": [ + 87, + 93, + 3, + 6, + 3, + 12 + ], + "parent_symbol": null, + "range": [ + 81, + 293, + 3, + 0, + 8, + 1 + ] + }, + { + "file": ".github/actions/needs-triage/src/index.ts", + "is_top_level": true, + "key": ".github/actions/needs-triage/src/index.ts:global_variable:labelsRequireUserInput@301", + "kind": "global_variable", + "name": "labelsRequireUserInput", + "name_range": [ + 301, + 323, + 10, + 6, + 10, + 28 + ], + "parent_symbol": null, + "range": [ + 295, + 412, + 10, + 0, + 14, + 1 + ] + }, + { + "file": ".github/actions/next-integration-stat/src/index.ts", + "is_top_level": true, + "key": ".github/actions/next-integration-stat/src/index.ts:function:createCommentPostAsync@22107", + "kind": "function", + "name": "createCommentPostAsync", + "name_range": [ + 22107, + 22129, + 719, + 6, + 719, + 28 + ], + "parent_symbol": null, + "range": [ + 22101, + 22599, + 719, + 0, + 735, + 3 + ] + }, + { + "file": ".github/actions/next-integration-stat/src/index.ts", + "is_top_level": true, + "key": ".github/actions/next-integration-stat/src/index.ts:function:createFormattedComment@21764", + "kind": "function", + "name": "createFormattedComment", + "name_range": [ + 21764, + 21786, + 704, + 6, + 704, + 28 + ], + "parent_symbol": null, + "range": [ + 21758, + 22028, + 704, + 0, + 716, + 1 + ] + }, + { + "file": ".github/actions/next-integration-stat/src/index.ts", + "is_top_level": true, + "key": ".github/actions/next-integration-stat/src/index.ts:function:fetchJobLogsFromWorkflow@2452", + "kind": "function", + "name": "fetchJobLogsFromWorkflow", + "name_range": [ + 2452, + 2476, + 78, + 15, + 78, + 39 + ], + "parent_symbol": null, + "range": [ + 2437, + 3901, + 78, + 0, + 127, + 1 + ] + }, + { + "file": ".github/actions/next-integration-stat/src/index.ts", + "is_top_level": true, + "key": ".github/actions/next-integration-stat/src/index.ts:function:findNextJsVersionFromBuildLogs@709", + "kind": "function", + "name": "findNextJsVersionFromBuildLogs", + "name_range": [ + 709, + 739, + 20, + 15, + 20, + 45 + ], + "parent_symbol": null, + "range": [ + 694, + 2338, + 20, + 0, + 75, + 1 + ] + }, + { + "file": ".github/actions/next-integration-stat/src/index.ts", + "is_top_level": true, + "key": ".github/actions/next-integration-stat/src/index.ts:function:getInputs@3962", + "kind": "function", + "name": "getInputs", + "name_range": [ + 3962, + 3971, + 130, + 15, + 130, + 24 + ], + "parent_symbol": null, + "range": [ + 3947, + 6526, + 130, + 0, + 217, + 1 + ] + }, + { + "file": ".github/actions/next-integration-stat/src/index.ts", + "is_top_level": true, + "key": ".github/actions/next-integration-stat/src/index.ts:function:getJobResults@6657", + "kind": "function", + "name": "getJobResults", + "name_range": [ + 6657, + 6670, + 220, + 15, + 220, + 28 + ], + "parent_symbol": null, + "range": [ + 6642, + 10519, + 220, + 0, + 346, + 1 + ] + }, + { + "file": ".github/actions/next-integration-stat/src/index.ts", + "is_top_level": true, + "key": ".github/actions/next-integration-stat/src/index.ts:function:getTestResultDiffBase@10615", + "kind": "function", + "name": "getTestResultDiffBase", + "name_range": [ + 10615, + 10636, + 349, + 15, + 349, + 36 + ], + "parent_symbol": null, + "range": [ + 10600, + 15116, + 349, + 0, + 502, + 1 + ] + }, + { + "file": ".github/actions/next-integration-stat/src/index.ts", + "is_top_level": true, + "key": ".github/actions/next-integration-stat/src/index.ts:function:getTestSummary@15558", + "kind": "function", + "name": "getTestSummary", + "name_range": [ + 15558, + 15572, + 519, + 9, + 519, + 23 + ], + "parent_symbol": null, + "range": [ + 15549, + 21625, + 519, + 0, + 700, + 1 + ] + }, + { + "file": ".github/actions/next-integration-stat/src/index.ts", + "is_top_level": true, + "key": ".github/actions/next-integration-stat/src/index.ts:function:run@22681", + "kind": "function", + "name": "run", + "name_range": [ + 22681, + 22684, + 738, + 15, + 738, + 18 + ], + "parent_symbol": null, + "range": [ + 22666, + 28322, + 738, + 0, + 931, + 1 + ] + }, + { + "file": ".github/actions/next-integration-stat/src/index.ts", + "is_top_level": true, + "key": ".github/actions/next-integration-stat/src/index.ts:function:withoutRetries@15127", + "kind": "function", + "name": "withoutRetries", + "name_range": [ + 15127, + 15141, + 504, + 9, + 504, + 23 + ], + "parent_symbol": null, + "range": [ + 15118, + 15547, + 504, + 0, + 517, + 1 + ] + }, + { + "file": ".github/actions/next-integration-stat/src/index.ts", + "is_top_level": true, + "key": ".github/actions/next-integration-stat/src/index.ts:global_variable:BOT_COMMENT_MARKER@513", + "kind": "global_variable", + "name": "BOT_COMMENT_MARKER", + "name_range": [ + 513, + 531, + 16, + 6, + 16, + 24 + ], + "parent_symbol": null, + "range": [ + 507, + 592, + 16, + 0, + 16, + 85 + ] + }, + { + "file": ".github/actions/next-integration-stat/src/index.ts", + "is_top_level": false, + "key": ".github/actions/next-integration-stat/src/index.ts:global_variable:accVersion@12941", + "kind": "global_variable", + "name": "accVersion", + "name_range": [ + 12941, + 12951, + 423, + 14, + 423, + 24 + ], + "parent_symbol": ".github/actions/next-integration-stat/src/index.ts:function:getTestResultDiffBase@10615", + "range": [ + 12941, + 12983, + 423, + 14, + 423, + 56 + ] + }, + { + "file": ".github/actions/next-integration-stat/src/index.ts", + "is_top_level": false, + "key": ".github/actions/next-integration-stat/src/index.ts:global_variable:ancestorKey@24045", + "kind": "global_variable", + "name": "ancestorKey", + "name_range": [ + 24045, + 24056, + 781, + 14, + 781, + 25 + ], + "parent_symbol": ".github/actions/next-integration-stat/src/index.ts:function:run@22681", + "range": [ + 24045, + 24093, + 781, + 14, + 781, + 62 + ] + }, + { + "file": ".github/actions/next-integration-stat/src/index.ts", + "is_top_level": false, + "key": ".github/actions/next-integration-stat/src/index.ts:global_variable:base@12504", + "kind": "global_variable", + "name": "base", + "name_range": [ + 12504, + 12508, + 406, + 14, + 406, + 18 + ], + "parent_symbol": ".github/actions/next-integration-stat/src/index.ts:function:getTestResultDiffBase@10615", + "range": [ + 12504, + 12541, + 406, + 14, + 406, + 51 + ] + }, + { + "file": ".github/actions/next-integration-stat/src/index.ts", + "is_top_level": false, + "key": ".github/actions/next-integration-stat/src/index.ts:global_variable:commentIdxToUpdate@26113", + "kind": "global_variable", + "name": "commentIdxToUpdate", + "name_range": [ + 26113, + 26131, + 840, + 10, + 840, + 28 + ], + "parent_symbol": ".github/actions/next-integration-stat/src/index.ts:function:run@22681", + "range": [ + 26113, + 26148, + 840, + 10, + 840, + 45 + ] + }, + { + "file": ".github/actions/next-integration-stat/src/index.ts", + "is_top_level": true, + "key": ".github/actions/next-integration-stat/src/index.ts:global_variable:commentTitlePre@630", + "kind": "global_variable", + "name": "commentTitlePre", + "name_range": [ + 630, + 645, + 18, + 6, + 18, + 21 + ], + "parent_symbol": null, + "range": [ + 624, + 692, + 18, + 0, + 18, + 68 + ] + } + ], + "sha256": "01a2355f2b5ee684cfa2c100ec231ebc756e7b655565b9d0829f9381e0a471e1" + } + }, + "integrity": { + "mismatched_subclass_edge_references": 0, + "missing_dependency_reference_links": 0, + "missing_dependency_source_file_links": 0, + "missing_dependency_source_symbol_links": 0, + "missing_dependency_target_file_links": 0, + "missing_dependency_target_symbol_links": 0, + "missing_export_file_links": 0, + "missing_export_import_links": 0, + "missing_export_symbol_links": 0, + "missing_external_module_file_links": 0, + "missing_external_module_import_links": 0, + "missing_external_reference_import_links": 0, + "missing_external_reference_source_file_links": 0, + "missing_external_reference_source_symbol_links": 0, + "missing_import_file_links": 0, + "missing_reference_import_links": 0, + "missing_reference_source_file_links": 0, + "missing_reference_source_symbol_links": 0, + "missing_reference_target_symbol_links": 0, + "missing_resolution_import_links": 0, + "missing_resolution_source_file_links": 0, + "missing_resolution_target_file_links": 0, + "missing_resolution_target_symbol_links": 0, + "missing_subclass_edge_reference_links": 0, + "missing_subclass_edge_source_file_links": 0, + "missing_subclass_edge_source_symbol_links": 0, + "missing_subclass_edge_target_file_links": 0, + "missing_subclass_edge_target_symbol_links": 0, + "missing_symbol_file_links": 0, + "selected_file_count_delta": 0 + }, + "metadata": { + "commit": "51bfe3c1863b191f4b039bc230e8ed5c57b0baf3", + "name": "next.js-v15.0.0", + "raw_rust_walk": false, + "ref": "refs/tags/v15.0.0", + "repo_url": "https://github.com/vercel/next.js.git", + "selected_file_count": 13688 + }, + "schema_version": 6, + "summary": { + "bytes": 25421217, + "classes": 502, + "dependencies": 49287, + "exports": 16027, + "external_modules": 13525, + "external_references": 25317, + "files": 13688, + "files_with_errors": 113, + "functions": 13497, + "global_variables": 28742, + "import_resolutions": 13462, + "imports": 28210, + "lines": 634891, + "references": 114464, + "subclass_edges": 160, + "symbols": 44871 + } +} diff --git a/rust-rewrite/golden/typescript-fixture-rust-compact.json b/rust-rewrite/golden/typescript-fixture-rust-compact.json new file mode 100644 index 000000000..6391524af --- /dev/null +++ b/rust-rewrite/golden/typescript-fixture-rust-compact.json @@ -0,0 +1,800 @@ +{ + "dependencies": [ + { + "id": 0, + "reference_count": 1, + "reference_ids": [ + 0 + ], + "source_file_id": 0, + "source_symbol_id": 8, + "target_file_id": 0, + "target_symbol_id": 2 + }, + { + "id": 1, + "reference_count": 1, + "reference_ids": [ + 1 + ], + "source_file_id": 1, + "source_symbol_id": 11, + "target_file_id": 1, + "target_symbol_id": 10 + } + ], + "exports": [ + { + "file_id": 0, + "id": 0, + "import_id": null, + "kind": "named", + "local_name": "Props", + "name": "Props", + "range": [ + 187, + 246, + 7, + 0, + 7, + 59 + ], + "source_module": null, + "symbol_id": 2 + }, + { + "file_id": 0, + "id": 1, + "import_id": null, + "kind": "named", + "local_name": "Mode", + "name": "Mode", + "range": [ + 247, + 283, + 8, + 0, + 8, + 36 + ], + "source_module": null, + "symbol_id": 3 + }, + { + "file_id": 0, + "id": 2, + "import_id": null, + "kind": "named", + "local_name": "Status", + "name": "Status", + "range": [ + 284, + 322, + 9, + 0, + 9, + 38 + ], + "source_module": null, + "symbol_id": 4 + }, + { + "file_id": 0, + "id": 3, + "import_id": null, + "kind": "named", + "local_name": "Tokens", + "name": "Tokens", + "range": [ + 323, + 376, + 10, + 0, + 10, + 53 + ], + "source_module": null, + "symbol_id": 5 + }, + { + "file_id": 0, + "id": 4, + "import_id": null, + "kind": "named", + "local_name": "helper", + "name": "helper", + "range": [ + 377, + 428, + 11, + 0, + 11, + 51 + ], + "source_module": null, + "symbol_id": 7 + }, + { + "file_id": 0, + "id": 5, + "import_id": null, + "kind": "named", + "local_name": "Page", + "name": "Page", + "range": [ + 429, + 502, + 12, + 0, + 12, + 73 + ], + "source_module": null, + "symbol_id": 8 + }, + { + "file_id": 0, + "id": 6, + "import_id": null, + "kind": "default", + "local_name": "Widget", + "name": "default", + "range": [ + 503, + 533, + 13, + 0, + 13, + 30 + ], + "source_module": null, + "symbol_id": 9 + }, + { + "file_id": 0, + "id": 7, + "import_id": null, + "kind": "named", + "local_name": "helper", + "name": "renamedHelper", + "range": [ + 534, + 576, + 14, + 0, + 14, + 42 + ], + "source_module": null, + "symbol_id": null + }, + { + "file_id": 0, + "id": 8, + "import_id": null, + "kind": "named", + "local_name": "Props", + "name": "Props", + "range": [ + 534, + 576, + 14, + 0, + 14, + 42 + ], + "source_module": null, + "symbol_id": null + }, + { + "file_id": 0, + "id": 9, + "import_id": 7, + "kind": "wildcard", + "local_name": null, + "name": null, + "range": [ + 577, + 602, + 15, + 0, + 15, + 25 + ], + "source_module": "./shared", + "symbol_id": null + }, + { + "file_id": 0, + "id": 10, + "import_id": 8, + "kind": "namespace", + "local_name": "shared", + "name": "shared", + "range": [ + 603, + 638, + 16, + 0, + 16, + 35 + ], + "source_module": "./shared", + "symbol_id": null + }, + { + "file_id": 1, + "id": 11, + "import_id": null, + "kind": "named", + "local_name": "sharedValue", + "name": "sharedValue", + "range": [ + 0, + 29, + 0, + 0, + 0, + 29 + ], + "source_module": null, + "symbol_id": 10 + }, + { + "file_id": 1, + "id": 12, + "import_id": null, + "kind": "named", + "local_name": "sharedFn", + "name": "sharedFn", + "range": [ + 30, + 80, + 1, + 0, + 1, + 50 + ], + "source_module": null, + "symbol_id": 11 + } + ], + "files": [ + { + "byte_len": 639, + "content_hash": "825a13117e50e433", + "has_error": false, + "id": 0, + "language": "tsx", + "line_count": 17, + "path": "src/app.tsx", + "root_range": [ + 0, + 639, + 0, + 0, + 17, + 0 + ] + }, + { + "byte_len": 81, + "content_hash": "7ed060fdc0c18e96", + "has_error": false, + "id": 1, + "language": "typescript", + "line_count": 2, + "path": "src/shared.ts", + "root_range": [ + 0, + 81, + 0, + 0, + 2, + 0 + ] + } + ], + "import_resolutions": [ + { + "id": 0, + "import_id": 7, + "source_file_id": 0, + "target_file_id": 1, + "target_symbol_id": null + }, + { + "id": 1, + "import_id": 8, + "source_file_id": 0, + "target_file_id": 1, + "target_symbol_id": null + } + ], + "imports": [ + { + "alias": "React", + "file_id": 0, + "id": 0, + "kind": "default_import", + "module": "react", + "name": "React", + "range": [ + 7, + 12, + 0, + 7, + 0, + 12 + ] + }, + { + "alias": "memo", + "file_id": 0, + "id": 1, + "kind": "named_import", + "module": "react", + "name": "useMemo", + "range": [ + 16, + 31, + 0, + 16, + 0, + 31 + ] + }, + { + "alias": "ReactNode", + "file_id": 0, + "id": 2, + "kind": "named_import", + "module": "react", + "name": "ReactNode", + "range": [ + 33, + 42, + 0, + 33, + 0, + 42 + ] + }, + { + "alias": "path", + "file_id": 0, + "id": 3, + "kind": "namespace_import", + "module": "path", + "name": "*", + "range": [ + 66, + 75, + 1, + 7, + 1, + 16 + ] + }, + { + "alias": null, + "file_id": 0, + "id": 4, + "kind": "side_effect", + "module": "./polyfill", + "name": null, + "range": [ + 89, + 109, + 2, + 0, + 2, + 20 + ] + }, + { + "alias": "lazy", + "file_id": 0, + "id": 5, + "kind": "dynamic_import", + "module": "./lazy", + "name": "lazy", + "range": [ + 117, + 141, + 4, + 6, + 4, + 30 + ] + }, + { + "alias": "dynamicModule", + "file_id": 0, + "id": 6, + "kind": "dynamic_import", + "module": "./dynamic", + "name": "dynamicModule", + "range": [ + 149, + 184, + 5, + 6, + 5, + 41 + ] + }, + { + "alias": null, + "file_id": 0, + "id": 7, + "kind": "namespace_import", + "module": "./shared", + "name": "*", + "range": [ + 577, + 602, + 15, + 0, + 15, + 25 + ] + }, + { + "alias": "shared", + "file_id": 0, + "id": 8, + "kind": "namespace_import", + "module": "./shared", + "name": "*", + "range": [ + 610, + 621, + 16, + 7, + 16, + 18 + ] + } + ], + "references": [ + { + "id": 0, + "import_id": null, + "name": "Props", + "range": [ + 457, + 462, + 12, + 28, + 12, + 33 + ], + "source_file_id": 0, + "source_symbol_id": 8, + "target_symbol_id": 2 + }, + { + "id": 1, + "import_id": null, + "name": "sharedValue", + "range": [ + 66, + 77, + 1, + 36, + 1, + 47 + ], + "source_file_id": 1, + "source_symbol_id": 11, + "target_symbol_id": 10 + } + ], + "subclass_edges": [], + "summary": { + "bytes": 720, + "classes": 1, + "dependencies": 2, + "exports": 13, + "files": 2, + "files_with_errors": 0, + "functions": 3, + "global_variables": 4, + "import_resolutions": 2, + "imports": 9, + "lines": 19, + "references": 2, + "symbols": 12 + }, + "symbols": [ + { + "file_id": 0, + "id": 0, + "is_top_level": true, + "kind": "global_variable", + "name": "lazy", + "name_range": [ + 117, + 121, + 4, + 6, + 4, + 10 + ], + "parent_symbol_id": null, + "range": [ + 111, + 142, + 4, + 0, + 4, + 31 + ] + }, + { + "file_id": 0, + "id": 1, + "is_top_level": true, + "kind": "global_variable", + "name": "dynamicModule", + "name_range": [ + 149, + 162, + 5, + 6, + 5, + 19 + ], + "parent_symbol_id": null, + "range": [ + 143, + 185, + 5, + 0, + 5, + 42 + ] + }, + { + "file_id": 0, + "id": 2, + "is_top_level": true, + "kind": "interface", + "name": "Props", + "name_range": [ + 204, + 209, + 7, + 17, + 7, + 22 + ], + "parent_symbol_id": null, + "range": [ + 194, + 246, + 7, + 7, + 7, + 59 + ] + }, + { + "file_id": 0, + "id": 3, + "is_top_level": true, + "kind": "type_alias", + "name": "Mode", + "name_range": [ + 259, + 263, + 8, + 12, + 8, + 16 + ], + "parent_symbol_id": null, + "range": [ + 254, + 283, + 8, + 7, + 8, + 36 + ] + }, + { + "file_id": 0, + "id": 4, + "is_top_level": true, + "kind": "enum", + "name": "Status", + "name_range": [ + 296, + 302, + 9, + 12, + 9, + 18 + ], + "parent_symbol_id": null, + "range": [ + 291, + 322, + 9, + 7, + 9, + 38 + ] + }, + { + "file_id": 0, + "id": 5, + "is_top_level": true, + "kind": "namespace", + "name": "Tokens", + "name_range": [ + 340, + 346, + 10, + 17, + 10, + 23 + ], + "parent_symbol_id": null, + "range": [ + 330, + 376, + 10, + 7, + 10, + 53 + ] + }, + { + "file_id": 0, + "id": 6, + "is_top_level": false, + "kind": "global_variable", + "name": "spacing", + "name_range": [ + 362, + 369, + 10, + 39, + 10, + 46 + ], + "parent_symbol_id": 5, + "range": [ + 356, + 374, + 10, + 33, + 10, + 51 + ] + }, + { + "file_id": 0, + "id": 7, + "is_top_level": true, + "kind": "function", + "name": "helper", + "name_range": [ + 390, + 396, + 11, + 13, + 11, + 19 + ], + "parent_symbol_id": null, + "range": [ + 384, + 428, + 11, + 7, + 11, + 51 + ] + }, + { + "file_id": 0, + "id": 8, + "is_top_level": true, + "kind": "function", + "name": "Page", + "name_range": [ + 445, + 449, + 12, + 16, + 12, + 20 + ], + "parent_symbol_id": null, + "range": [ + 436, + 502, + 12, + 7, + 12, + 73 + ] + }, + { + "file_id": 0, + "id": 9, + "is_top_level": true, + "kind": "class", + "name": "Widget", + "name_range": [ + 524, + 530, + 13, + 21, + 13, + 27 + ], + "parent_symbol_id": null, + "range": [ + 518, + 533, + 13, + 15, + 13, + 30 + ] + }, + { + "file_id": 1, + "id": 10, + "is_top_level": true, + "kind": "global_variable", + "name": "sharedValue", + "name_range": [ + 13, + 24, + 0, + 13, + 0, + 24 + ], + "parent_symbol_id": null, + "range": [ + 7, + 29, + 0, + 7, + 0, + 29 + ] + }, + { + "file_id": 1, + "id": 11, + "is_top_level": true, + "kind": "function", + "name": "sharedFn", + "name_range": [ + 46, + 54, + 1, + 16, + 1, + 24 + ], + "parent_symbol_id": null, + "range": [ + 37, + 80, + 1, + 7, + 1, + 50 + ] + } + ] +} diff --git a/rust-rewrite/p0-parity-coverage.json b/rust-rewrite/p0-parity-coverage.json new file mode 100644 index 000000000..f8083f8a9 --- /dev/null +++ b/rust-rewrite/p0-parity-coverage.json @@ -0,0 +1,182 @@ +{ + "schema_version": 1, + "pytest_roots": [ + "tests/unit/sdk/codebase/test_rust_backend.py", + "tests/unit/sdk/codebase/test_rust_rewrite_readiness.py" + ], + "groups": [ + { + "name": "Codebase construction and graph-free public queries", + "status": "parity_covered", + "api_inventory": [ + "Codebase(...) constructor surface", + "codebase.files(...) ordering and filtering", + "codebase.has_file/get_file with ignore_case", + "codebase.symbols/classes/functions/global_vars/imports/exports", + "codebase.has_symbol/get_symbol/get_symbols/get_class/get_function" + ], + "evidence": { + "pytest": [ + "tests/unit/sdk/codebase/test_rust_backend.py::test_codebase_context_builds_opt_in_rust_index", + "tests/unit/sdk/codebase/test_rust_backend.py::test_codebase_context_builds_opt_in_typescript_rust_index", + "tests/unit/sdk/codebase/test_rust_backend.py::test_rust_compact_public_queries_preserve_python_sorting", + "tests/unit/sdk/codebase/test_rust_backend.py::test_rust_compact_exact_symbol_lookups_do_not_materialize_all_symbols", + "tests/unit/sdk/codebase/test_rust_backend.py::test_rust_compact_ignore_case_file_lookup_does_not_materialize_file_lists" + ], + "tools": [ + "rust-rewrite/tools/check_python_rust_parity_fixture.py", + "rust-rewrite/tools/check_pinned_python_codebase.py", + "rust-rewrite/tools/check_pinned_typescript_codebase.py", + "rust-rewrite/tools/check_pinned_semantic_parity.py" + ] + } + }, + { + "name": "File and SourceFile read APIs", + "status": "parity_covered", + "api_inventory": [ + "file identity and content accessors", + "file.imports/import_statements/inbound_imports/importers", + "file.has_import/get_import", + "file.symbols/classes/functions/global_vars", + "file.get_symbol/get_class/get_function/get_global_var", + "file.find_by_byte_range(...)", + "file.resolve_name/resolve_attribute/get_node_by_name", + "TypeScript file.exports/get_export", + "TypeScript file.get_namespace and namespace member lookup helpers" + ], + "evidence": { + "pytest": [ + "tests/unit/sdk/codebase/test_rust_backend.py::test_rust_compact_byte_range_lookups_do_not_materialize_file_nodes", + "tests/unit/sdk/codebase/test_rust_backend.py::test_rust_compact_name_resolution_does_not_materialize_file_maps", + "tests/unit/sdk/codebase/test_rust_backend.py::test_rust_compact_module_import_attribute_resolution_does_not_materialize_file_maps", + "tests/unit/sdk/codebase/test_rust_backend.py::test_rust_compact_exact_export_lookups_do_not_materialize_all_exports", + "tests/unit/sdk/codebase/test_rust_backend.py::test_rust_compact_typescript_namespace_lookups_do_not_materialize_python_graph" + ], + "tools": [ + "rust-rewrite/tools/check_python_rust_parity_fixture.py", + "rust-rewrite/tools/check_pinned_python_codebase.py", + "rust-rewrite/tools/check_pinned_typescript_codebase.py" + ] + } + }, + { + "name": "Symbol, import, export, usage, and dependency wrappers", + "status": "parity_covered", + "api_inventory": [ + "symbol identity/source/name handles", + "symbol.dependencies/usages/symbol_usages/descendant_symbols", + "import identity/predicates/resolution/imported_exports", + "export identity/predicates/resolution/import string helpers", + "external import dependency handles", + "TypeScript import type/default/namespace predicates", + "TypeScript interface/class heritage dependencies and traversal", + "TypeScript named namespace re-export member dependencies", + "TypeScript file.get_namespace and namespace member lookups", + "TypeScript subclass and implementation traversal" + ], + "evidence": { + "pytest": [ + "tests/unit/sdk/codebase/test_rust_backend.py::test_rust_compact_external_modules", + "tests/unit/sdk/codebase/test_rust_backend.py::test_rust_compact_symbol_usages_include_import_export_wrappers_without_materializing_indexes", + "tests/unit/sdk/codebase/test_rust_backend.py::test_rust_compact_typescript_import_predicates_do_not_materialize_python_graph", + "tests/unit/sdk/codebase/test_rust_backend.py::test_rust_compact_typescript_external_import_dependencies", + "tests/unit/sdk/codebase/test_rust_backend.py::test_rust_compact_typescript_subclass_traversal" + ], + "tools": [ + "rust-rewrite/tools/check_python_rust_parity_fixture.py", + "rust-rewrite/tools/check_pinned_semantic_parity.py" + ] + } + }, + { + "name": "Codemod transaction compatibility", + "status": "parity_covered", + "api_inventory": [ + "codebase.commit(...) and reset(...) stay callable", + "file edit/create/remove flows", + "symbol rename/remove/move-to-file flows", + "import add/remove/retarget flows", + "Codemod.execute(...) mutation flows" + ], + "evidence": { + "pytest": [ + "tests/unit/sdk/codebase/test_rust_backend.py::test_rust_compact_file_mutations_commit_without_python_graph", + "tests/unit/sdk/codebase/test_rust_backend.py::test_rust_compact_symbol_rename_and_add_import_commit_without_python_graph", + "tests/unit/sdk/codebase/test_rust_backend.py::test_rust_compact_codemod_symbol_import_edits_match_python_backend", + "tests/unit/sdk/codebase/test_rust_backend.py::test_rust_compact_codemod_execute_move_updates_imports_without_python_graph", + "tests/unit/sdk/codebase/test_rust_backend.py::test_rust_compact_typescript_codemod_import_edits_match_python_backend", + "tests/unit/sdk/codebase/test_rust_backend.py::test_rust_compact_typescript_codemod_move_updates_imports_matches_python_backend" + ], + "tools": [ + "rust-rewrite/tools/check_python_rust_parity_fixture.py", + "rust-rewrite/tools/check_pinned_codemods.py" + ] + } + }, + { + "name": "Fallback and strict unsupported API behavior", + "status": "fallback_covered", + "api_inventory": [ + "missing extension behavior", + "rust_fallback=python promotion path", + "rust_fallback=error explicit unsupported API errors", + "unsupported compact file methods" + ], + "evidence": { + "pytest": [ + "tests/unit/sdk/codebase/test_rust_backend.py::test_missing_rust_extension_falls_back_to_python_graph", + "tests/unit/sdk/codebase/test_rust_backend.py::test_rust_compact_unsupported_api_fails_explicitly_without_python_graph", + "tests/unit/sdk/codebase/test_rust_backend.py::test_rust_compact_unsupported_file_method_falls_back_to_python_graph", + "tests/unit/sdk/codebase/test_rust_backend.py::test_missing_rust_extension_can_fail_strictly" + ], + "tools": [ + "rust-rewrite/tools/check_supported_subset.py" + ] + } + }, + { + "name": "Directory traversal and recursive symbol APIs", + "status": "parity_covered", + "api_inventory": [ + "codebase.directories/get_directory/has_directory", + "codebase.files(extensions='*') for non-source file handles", + "directory.files/subdirectories/items/tree", + "directory.files(extensions='*') for non-source files", + "create_directory(...) visibility in compact Rust mode", + "directory recursive symbols/imports/exports/classes/functions/global_vars", + "directory get_symbol/get_import/get_export helpers" + ], + "evidence": { + "pytest": [ + "tests/unit/sdk/codebase/test_rust_backend.py::test_rust_compact_directory_queries_do_not_materialize_python_graph", + "tests/unit/sdk/codebase/test_rust_backend.py::test_rust_compact_directory_all_file_queries_include_non_source_files" + ], + "tools": [] + } + }, + { + "name": "Full TypeScript expression, namespace, and type-system surface", + "status": "open_gap", + "api_inventory": [ + "full namespace export/private/member semantics beyond direct member lookup", + "promise-chain async conversion and mutable expression-object semantics beyond compact read-only file/symbol.promise_chains", + "full mutable FunctionCall expression object semantics beyond compact read-only file/symbol.function_calls", + "full TypeScript lexical/type/interface/reference coverage", + "complete namespace and expression dependency parity" + ], + "gap": "The current compact TypeScript path covers selected files/imports/exports/usages/dependencies/subclass APIs, named namespace re-export member dependencies, direct file.get_namespace/member lookup helpers, read-only compact file/symbol.function_calls, and read-only compact file/symbol.promise_chains, but broader namespace export/private semantics, mutable expression nodes, async promise-chain conversion, and type-system P0 surfaces still need parity tests and targeted Rust data." + }, + { + "name": "Full graph-wide large-repo semantic parity", + "status": "open_gap", + "api_inventory": [ + "repo-wide reference graph equality", + "repo-wide import graph equality", + "repo-wide dependency graph equality", + "deterministic ordering across full graph materializers" + ], + "gap": "Pinned Airflow and Next.js have selected semantic parity plus stable compact snapshots, but a full Python-vs-Rust graph-wide equality test is still open before default-backend promotion." + } + ] +} diff --git a/rust-rewrite/parser-index.md b/rust-rewrite/parser-index.md new file mode 100644 index 000000000..b1cfe1fd2 --- /dev/null +++ b/rust-rewrite/parser-index.md @@ -0,0 +1,345 @@ +# Rust Parser And Compact Index Plan + +## Purpose + +Phase 2 should replace the current eager Python AST/object construction with a Rust parser/indexer that emits a compact, snapshot-friendly IR. The IR should preserve enough structure for `files`, `symbols`, `classes`, `functions`, `imports`, and TypeScript `exports` queries, while leaving dependency resolution, expression modeling, edits, and Python object compatibility to later phases. + +Current behavior to match where relevant: + +- Parser setup maps `.py` to tree-sitter Python and `.js`, `.jsx`, `.ts`, `.tsx` to the TSX grammar (`src/graph_sitter/tree_sitter_parser.py`). +- Language semantic maps live in `PyNodeClasses` and `TSNodeClasses` (`src/graph_sitter/codebase/node_classes/*_node_classes.py`). +- Statement classification is currently custom code in `Parser.parse_py_statements` and `Parser.parse_ts_statements` (`src/graph_sitter/core/parser.py`). +- `SourceFile.parse` eagerly creates a `CodeBlock`, recursively parses statements, populates `file._nodes`, and stores Python payload objects in `rustworkx.PyDiGraph` (`src/graph_sitter/core/file.py`). +- Import/export resolution edges are separate graph phases after parse (`src/graph_sitter/codebase/codebase_context.py`), so the Rust parser slice should only extract unresolved import/export facts. + +## Compact IR + +Use append-only arenas plus interners. IDs are opaque integers scoped to the engine. + +### Shared Primitives + +- `FileId`, `SymbolId`, `ImportId`, `ExportId`, `ScopeId`, `StatementId`, `StringId`, `PathId`. +- `Range`: `start_byte`, `end_byte`, `start_point { row, column }`, `end_point { row, column }`. +- `NodeRef`: `file_id`, tree-sitter kind enum/string ID, `range`. Store this only for retained declarations/statements, not every tree-sitter node. +- `Language`: `Python`, `TypeScript`, `TSX`, `JavaScript`, `JSX`. Parser grammar may still be TSX for all JS/TS files to match the Python backend, while the file language should keep the extension-derived source kind. + +### Records + +`FileRecord` + +- `file_id` +- `path_id`, `name_id`, extension-derived `language` +- `content_hash` +- `root_range` +- `parse_status`: `ok`, `tree_sitter_error`, `skipped_binary`, `skipped_minified` +- ordered lists: `top_level_symbols`, `imports`, `exports`, `scopes` + +`SymbolRecord` + +- `symbol_id` +- `file_id` +- `name_id` +- `full_name_id` +- `kind`: `Function`, `Class`, `GlobalVar`, `Interface`, `TypeAlias`, `Enum`, `Namespace` +- `parent_symbol_id: Option`; the first parser/index slice should normally be `None` because only top-level symbols are emitted +- `scope_id` +- `range`: extended source range when the current Python API would include decorators/export keywords +- `declaration_range`: actual declaration node range +- `name_range` +- `body_range: Option` +- flags: `decorated`, `async`, `default_exported`, `named_exported`, `type_only` + +`ImportRecord` + +- `import_id` +- `file_id` +- `scope_id` +- `statement_id` +- `kind`: reuse current `ImportType` shape: `DefaultExport`, `NamedExport`, `Wildcard`, `Module`, `SideEffect`, `Unknown` +- `module_id: Option`: raw module/source text, including Python leading dots or TS quotes stripped in a separate normalized field +- `imported_name_id: Option` +- `local_name_id: Option` +- `namespace_id: Option` +- `is_type_only` +- `is_future_import` +- `is_dynamic` +- `from_export` +- ranges: `statement_range`, `import_range`, `module_range`, `name_range`, `alias_range` + +`ExportRecord` (TypeScript/JS only in the first parser/index slice) + +- `export_id` +- `file_id` +- `scope_id` +- `statement_id` +- `kind`: `Named`, `Default`, `Wildcard`, `Namespace`, `ExportEquals`, `Unknown` +- `exported_name_id: Option` +- `local_name_id: Option` +- `source_module_id: Option` for re-exports +- `declared_symbol_id: Option` when the export declares a top-level symbol in the same statement +- `import_id: Option` when the export is a direct re-export modeled through an import fact +- `is_type_only` +- ranges: `statement_range`, `export_range`, `name_range`, `source_range` + +`ScopeRecord` + +- `scope_id` +- `file_id` +- `parent_scope_id: Option` +- `owner`: `File(FileId)` or `Symbol(SymbolId)` +- `kind`: `File`, `ClassBody`, `FunctionBody`, `ModuleBlock` +- `range` +- ordered child IDs for top-level symbols/imports/exports owned by this scope + +First-slice scopes are lookup boundaries and ownership containers, not full lexical environments. + +## Python Extraction Rules + +Current implemented Rust status: + +- Files, top-level classes/functions, top-level simple global assignments, Python imports, and compact internal import-resolution records are implemented for Python. +- Global extraction currently covers simple identifier targets in top-level `assignment` and `annotated_assignment` nodes, including identifiers nested in tuple/list/pattern lists. Attribute and subscript assignment targets remain intentionally skipped. + +### Files + +- Parse only `.py`. +- Emit one `FileRecord` per readable, non-skipped file. +- The file scope owns top-level declarations and top-level imports. Nested import statements can be emitted with the nearest top-level symbol scope when found cheaply by range containment. + +### Top-Level Symbols + +Walk direct named children of the root `module`. + +- `decorated_definition` + - Read child field `definition`. + - If definition is `function_definition`, emit `Function`. + - If definition is `class_definition`, emit `Class`. + - `range` is the `decorated_definition`; `declaration_range` is the nested definition. + - `name_range` is the nested definition's `name` field. + - Set `decorated = true`. +- `function_definition` + - Emit `Function`. + - `name_range` is field `name`. + - `body_range` is field `body`. +- `class_definition` + - Emit `Class`. + - `name_range` is field `name`. + - `body_range` is field `body`. +- `expression_statement` containing top-level `assignment` or `augmented_assignment` + - Emit `GlobalVar` records for simple identifier names on the left side. + - For `pattern_list`, emit one `GlobalVar` per identifier in source order. + - For attribute/subscript left sides, store no phase-1 symbol; those are not importable globals in the same way and require expression modeling. + - Preserve the assignment statement range and the specific name range. + +Do not emit nested functions/classes/methods as `SymbolRecord` in the first vertical slice. Current Python can materialize them through recursive `CodeBlock` parsing, but the phase-1 query target is top-level symbols. + +### Imports + +Emit one `ImportRecord` per imported binding. Store raw syntax facts only; do not resolve to files or symbols. + +- `import_statement` + - For each `dotted_name`, emit `Module` with `module = name = alias = dotted_name`. + - For each `aliased_import`, emit `Module` with `module/name` from field `name` and `local_name` from field `alias`. +- `import_from_statement` + - `module` is field `module_name`; keep leading dots as raw text and also store `relative_level` if practical. + - For each `dotted_name`, emit `NamedExport` with `imported_name = local_name = dotted_name`. + - For each `aliased_import`, emit `NamedExport` with `imported_name` from field `name` and `local_name` from field `alias`. + - For `wildcard_import`, emit `Wildcard`; keep the current Python-backend-compatible local name empty or `*` in a dedicated wildcard field, not as a normal binding. +- `future_import_statement` + - Emit imports with `kind = SideEffect` and `is_future_import = true`, matching current backend behavior. + +### Python Exports + +Do not emit `ExportRecord` for Python in the first parser/index slice. Python importability is represented by top-level symbols, module imports, wildcard chains, and `__init__.py` rules in the resolver phase. + +## TypeScript, TSX, JavaScript Extraction Rules + +### Files + +- Include `.ts`, `.tsx`, `.js`, `.jsx`. +- For parity with the existing backend, parse all four extensions with the TSX grammar initially. Keep `FileRecord.language` extension-specific so a later parser split does not change public file identity. + +### Top-Level Symbols + +Walk direct named children of `program`, plus declarations wrapped by top-level `export_statement`. + +Emit direct top-level declarations: + +- `function_declaration`, `generator_function_declaration` -> `Function` +- `class_declaration`, `abstract_class_declaration` -> `Class` +- `interface_declaration` -> `Interface` +- `type_alias_declaration` -> `TypeAlias` +- `enum_declaration` -> `Enum` +- `internal_module` -> `Namespace` +- `lexical_declaration` or `variable_declaration` + - If a `variable_declarator` value contains a top-level `arrow_function`, `function_expression`, or `generator_function` at depth <= 2, emit a `Function` named from the declarator's `name` field. + - Otherwise emit `GlobalVar` records for simple identifier declarator names. + - For object/array patterns, emit one `GlobalVar` per simple bound identifier in source order. Defer type-aware destructuring semantics. + +For `export_statement` with field `declaration`, emit the same symbol kinds from the declaration and attach `named_exported` or `default_exported` flags through the paired `ExportRecord`. + +Do not emit class methods, private fields, JSX elements, object-literal properties, call expressions, promise chains, or nested declarations in the first parser/index slice. + +### Static Imports + +For `import_statement`, emit one `ImportRecord` per current backend import object: + +- No `import_clause`: `import "./setup";` + - Emit `SideEffect`, `module = source`, no local binding. +- Identifier child of `import_clause`: `import Foo from "./m";` + - Emit `DefaultExport`, `imported_name = local_name = Foo`. +- `named_imports`: `import { a, b as c } from "./m";` + - Emit one `NamedExport` per `import_specifier`. + - `imported_name` is field `name`; `local_name` is field `alias` or `name`. + - Skip `comment` children. +- `namespace_import`: `import * as ns from "./m";` + - Emit `Wildcard`, `namespace/local_name = ns`. +- Type imports: `import type { T } from "./m";`, `import { type T } from "./m";` + - Set `is_type_only` on the statement-wide or specifier-specific import. If specifier-level detection is initially awkward in tree-sitter, snapshot it as a known gap rather than resolving incorrectly. + +### Dynamic Imports And Require + +The first vertical slice should include a small, syntax-only subset because existing file import tests expect `require` and dynamic `import()` to surface as imports: + +- Side-effect calls: `require("./m")`, `import("./m")`, `await import("./m")` in expression statements -> `SideEffect`, `is_dynamic = true`. +- Named module binding: `const pkg = require("./m")` or `const pkg = await import("./m")` -> `Module`, `local_name = pkg`, `is_dynamic = true`. +- Destructured binding: `const { a, b: c } = require("./m")` -> one `NamedExport` per simple property binding. +- Member access type/value import: `import("./m").SomeType` or `(await import("./m")).default` -> `NamedExport` or `DefaultExport` when the property is a simple identifier. + +Defer dynamic imports with computed module paths, conditional module expressions, nested object patterns, and non-literal source arguments. + +### Exports + +Emit unresolved `ExportRecord` facts and any directly declared symbols. + +- Declaration exports: + - `export function f() {}`, `export class C {}`, `export interface I {}`, `export type T = ...`, `export enum E {}`, `export namespace N {}`, `export const x = ...` + - Emit the declared `SymbolRecord`. + - Emit `ExportRecord(kind = Named, exported_name = symbol name, declared_symbol_id = symbol_id)`. +- Default declaration/value exports: + - `export default function f() {}`, `export default class C {}`, `export default foo`, `export = foo` + - Emit `ExportRecord(kind = Default)` or `ExportEquals`. + - If the statement declares a named top-level function/class/assignment, link `declared_symbol_id`. + - If anonymous/default value has no durable name, do not invent a `SymbolRecord`; keep only the export fact and value range. +- Named export clauses: + - `export { a, b as c };` + - Emit one `ExportRecord(kind = Named)` per `export_specifier`. + - `local_name = name`, `exported_name = alias or name`. +- Re-exports: + - `export { a, b as c } from "./m";` + - Emit one `ImportRecord(from_export = true)` per imported binding and one `ExportRecord` linked to that import. + - `source_module = "./m"`. + - `export { default as Foo } from "./m"` should set the import kind to `DefaultExport`. +- Wildcard re-exports: + - `export * from "./m";` -> `ExportRecord(kind = Wildcard, source_module = "./m")` plus a `Wildcard` import fact from the source. + - `export * as ns from "./m";` -> `ExportRecord(kind = Namespace, exported_name = ns, source_module = "./m")` plus a `Wildcard` import fact with namespace/local name `ns`. +- Type exports: + - `export type { T } from "./types";`, `export type T = ...` + - Set `is_type_only = true`. + +Do not resolve `ExportRecord` targets across files in the first parser/index slice. That belongs to Phase 3. + +## Ranges And Scopes + +Every retained record should be reconstructible from byte ranges against file content: + +- Store byte ranges for file root, declaration, full/extended source, names, module strings, aliases, and statement boundaries. +- Store point ranges for user-facing diagnostics and snapshots. +- Keep both `statement_range` and focused binding/export ranges because current `Import` and `ExportStatement` APIs distinguish a single binding from the whole statement. +- Ranges must be byte offsets from UTF-8 source bytes. Do not derive offsets from Python string indices. + +Minimal phase-1 scope rules: + +- Create one `File` scope per file. +- Create one owned body scope for each top-level class/function/namespace. +- Assign each import/export to the narrowest retained scope by range containment: file scope or nearest top-level symbol body scope. +- Do not create scopes for every `if`, `for`, `while`, `try`, match/switch case, lambda/arrow expression, or nested block in the first parser/index slice. +- Do not compute name lookup tables, hoisting, `global`/`nonlocal`, closure captures, or TypeScript block scoping in the first parser/index slice. + +## What The First Parser/Index Slice Must Not Eagerly Materialize + +- Python wrapper objects for every node. +- Persistent tree-sitter node handles after extraction. +- `CodeBlock`, `Statement`, `Expression`, `FunctionCall`, JSX, type-expression, decorator, comment, and docstring objects. +- `rustworkx` graph payloads or Python object graph edges. +- Dependency edges, symbol usage records, superclass/interface edges, import resolution edges, or export resolution edges. +- Full local-variable indexes inside functions/classes. +- External module records beyond unresolved import module strings. +- Directory tree, tsconfig path expansion, sys.path/import override resolution, and package `__init__.py` wildcard semantics. +- Edit/formatting metadata beyond source ranges needed by later lazy handles. + +## Golden Snapshots + +Add Rust IR snapshot tests that compare stable JSON, sorted by `(file_path, range_start, kind, name)` and using interned string values in the debug dump for readability. + +### Python Fixtures + +- `py_symbols_basic.py` + - module imports, `from` imports, aliases, wildcard import + - top-level decorated function, async function, class, simple globals, tuple assignment + - nested function/class/assignment present but absent from phase-1 symbols +- `py_relative_imports.py` + - `from . import x`, `from ..pkg.mod import A as B`, `from __future__ import annotations` + - verify raw module text, relative level, future flag +- `py_scopes.py` + - top-level import, import inside a function, import inside a class method + - verify import scope assignment without full nested statement materialization + +### TypeScript/TSX Fixtures + +- `ts_symbols_basic.ts` + - function, generator, class, abstract class, interface, type alias, enum, namespace, const global, arrow-function const +- `ts_imports.ts` + - default, named, aliased named, namespace, side-effect, type-only import +- `ts_dynamic_imports.js` + - `require`, `await import`, destructured require, side-effect require +- `ts_exports.ts` + - declaration exports, default exports, named export clause, re-export clause, wildcard re-export, namespace re-export, type export, export equals +- `tsx_component.tsx` + - JSX in a function component and exported component; verify parser accepts JSX but does not materialize JSX records +- `ts_scopes.ts` + - imports inside top-level function/class body plus top-level exports; verify minimal scope owners + +### Existing Tests To Mine For Source Cases + +- Python import cases: `tests/unit/sdk/python/import_resolution/` +- Python globals: `tests/unit/sdk/python/global_var/` +- TypeScript import cases: `tests/unit/sdk/typescript/file/test_file_import_statemets.py`, `tests/unit/sdk/typescript/import_resolution/` +- TypeScript export cases: `tests/unit/sdk/typescript/file/test_file_export_statements.py`, `tests/unit/sdk/typescript/export/` +- TypeScript globals and arrow functions: `tests/unit/sdk/typescript/global_var/`, `tests/unit/sdk/typescript/function/test_function_arrow.py` + +## Proposed First Vertical Slice + +1. Add Rust parser crate module boundaries and tree-sitter setup. + - `parser::language` maps paths to parser grammar and `Language`. + - `parser::parse_file(path, bytes)` returns parse status and root range. +2. Add arena records and interners. + - `Index` owns files, symbols, imports, exports, scopes, strings, paths. + - JSON debug dump exposes stable, string-expanded snapshots. +3. Implement file discovery input from Python. + - Python passes `(relative_path, absolute_path, language, content bytes/hash)` or a repo-operator file list. + - Rust does not walk the filesystem independently in the first slice. +4. Implement Python extraction. + - File records, top-level class/function/global symbols, imports, ranges, file/top-level symbol scopes. + - Snapshot `py_symbols_basic.py`, `py_relative_imports.py`, and `py_scopes.py`. +5. Implement TypeScript/TSX extraction. + - File records, top-level declaration/global/function symbols, static imports, direct export facts, ranges, scopes. + - Snapshot `ts_symbols_basic.ts`, `ts_imports.ts`, `ts_exports.ts`, and `tsx_component.tsx`. +6. Add dynamic import/require subset. + - Snapshot `ts_dynamic_imports.js`. +7. Expose PyO3 debug/query APIs. + - `files() -> Vec` + - `symbols(file_id?) -> Vec` + - `classes()`, `functions()`, `imports()`, `exports()` + - record lookup APIs returning compact structs or JSON for tests +8. Add parity smoke tests against the Python backend counts/names for the fixture set. + - Compare file paths, symbol names/kinds, import local names/kinds/modules, export names/kinds/modules. + - Do not compare dependency edges or wrapper behavior in this phase. + +## Acceptance For The First Parser/Index Slice + +- Building the Rust index for fixture repos does not instantiate Python `SourceFile`, `Symbol`, `Import`, `Export`, `CodeBlock`, `Statement`, or expression objects. +- Snapshot debug output is deterministic across runs. +- Python and Rust backends agree on top-level file/symbol/import/export counts and names for the selected fixtures. +- Unsupported syntax is represented as an omitted record plus parse warning/debug gap, not as a placeholder Python object. +- All records have byte ranges and point ranges sufficient to reconstruct source substrings from file bytes. diff --git a/rust-rewrite/python-compat.md b/rust-rewrite/python-compat.md new file mode 100644 index 000000000..cceed750f --- /dev/null +++ b/rust-rewrite/python-compat.md @@ -0,0 +1,296 @@ +# Python/PyO3 Compatibility Plan + +## Objective + +Preserve the current Python shell and codemod API while allowing a Rust engine to own canonical graph storage. The compatibility layer must not recreate today's full Python object graph when the Rust backend is selected. Python objects should be lightweight handles over Rust IDs and should only be created for files, symbols, imports, exports, or usages that user code actually accesses. + +## Current Python Shape + +Key findings from the current code: + +- `Codebase` is the user facade. Public list properties such as `files`, `symbols`, `classes`, `functions`, `imports`, and `exports` mostly call `CodebaseContext.get_nodes(...)`, then sort/filter Python objects. +- `CodebaseContext` owns a `rustworkx.PyDiGraph` whose node payloads are Python objects. It also owns `filepath_idx`, directory state, parser/config/dependency managers, transaction state, and import/export/dependency graph mutation helpers. +- `SourceFile.__init__` immediately adds itself to the graph, parses the tree-sitter root, fills `file._nodes`, and registers the file path. +- `Importable.__init__` adds most child nodes to the graph and appends each child to `file._nodes`. +- `Editable` assumes a persistent `tree_sitter.Node`, `ctx`, `parent`, and `file_node_id`. Many inherited methods rely on `ts_node`, `parent`, and a populated Python graph. +- The compiled setup is Cython-based today under `graph_sitter.compiled`; wheel builds use a Hatch Cython hook. `cibuildwheel` already installs Rust toolchains, but no Rust extension build hook is active. + +These constructors make "subclass the current objects and call `super().__init__`" the wrong default for Rust-backed objects. The Rust path needs separate lazy handle initialization that bypasses eager graph insertion and only materializes the Python tree on explicit fallback. + +## Backend Flag Shape + +Add a first-class graph backend setting to `CodebaseConfig` without changing the default behavior: + +```python +class GraphBackend(StrEnum): + PYTHON = "python" + RUST = "rust" + AUTO = "auto" + + +class RustFallbackMode(StrEnum): + PYTHON = "python" + ERROR = "error" + + +class CodebaseConfig(BaseConfig): + graph_backend: GraphBackend = GraphBackend.PYTHON + rust_fallback: RustFallbackMode = RustFallbackMode.PYTHON +``` + +Environment variables follow the existing `BaseConfig` prefix behavior: + +- `CODEBASE_GRAPH_BACKEND=python|rust|auto` +- `CODEBASE_RUST_FALLBACK=python|error` + +Selection policy: + +- `python`: always use the current `PyDiGraph` backend. +- `rust`: require the PyO3 extension and supported language/config. If unavailable or unsupported, obey `rust_fallback`. +- `auto`: try Rust only when the language and config are known supported; otherwise use Python without warning unless debug logging is enabled. + +`use_pink` should remain separate from `graph_backend`. Pink currently acts as an alternate file listing/file IO path for some modes, not as the graph engine. Initial Rust graph work should reject or fall back when `use_pink == PinkMode.ALL_FILES`, because `Codebase.files` is already delegated to `codegen_sdk_pink` in that mode. + +## Backend Facade + +Introduce a narrow internal facade owned by `CodebaseContext`: + +```python +class GraphBackendFacade(Protocol): + kind: Literal["python", "rust"] + generation: int + + def build(self, repo_operator: RepoOperator) -> None: ... + def apply_diffs(self, diff_list: list[DiffLite]) -> None: ... + + def get_file(self, file_path: os.PathLike, *, ignore_case: bool = False) -> SourceFile | None: ... + def get_node(self, node_id: int) -> Importable: ... + def get_nodes(self, node_type: NodeType | None = None, exclude_type: NodeType | None = None) -> list[Importable]: ... + + def successors(self, node_id: int, *, edge_type: EdgeType | None = None, sort: bool = True) -> Sequence[Importable]: ... + def predecessors(self, node_id: int, edge_type: EdgeType | None = None) -> Sequence[Importable]: ... + def in_edges(self, node_id: int) -> list[EdgeRecord]: ... + def out_edges(self, node_id: int) -> list[EdgeRecord]: ... +``` + +Implementation split: + +- `PythonGraphBackend` wraps the existing `CodebaseContext` graph fields and behavior. This is a mechanical extraction target and keeps default behavior identical. +- `RustGraphBackend` wraps a PyO3 `Engine` object and exposes the same query surface by converting Rust IDs into lazy Python handles. + +Migration order: + +1. Add the config flag and facade with `PythonGraphBackend` only. +2. Add PyO3 import smoke test and `RustGraphBackend.engine_version()`. +3. Route only read/list APIs through the facade. +4. Add Rust-backed query methods one family at a time. +5. Keep graph mutation and transaction-heavy APIs on Python or explicit fallback until Rust patch intents exist. + +## PyO3 Surface + +Expose a private extension module, for example `graph_sitter._rust`, with one main PyO3 class: + +```python +class Engine: + @staticmethod + def version() -> str: ... + + def build(input: BuildInput) -> BuildReport: ... + def apply_diffs(diffs: list[DiffRecord]) -> InvalidationReport: ... + + def files() -> list[int]: ... + def symbols(kind: SymbolKind | None = None, top_level_only: bool = True) -> list[int]: ... + def imports() -> list[int]: ... + def exports() -> list[int]: ... + + def file_record(id: int) -> FileRecord: ... + def symbol_record(id: int) -> SymbolRecord: ... + def import_record(id: int) -> ImportRecord: ... + def export_record(id: int) -> ExportRecord: ... + + def successors(object_ref: ObjectRef, edge_type: EdgeType | None) -> list[ObjectRef]: ... + def predecessors(object_ref: ObjectRef, edge_type: EdgeType | None) -> list[ObjectRef]: ... + def source_slice(file_id: int, start_byte: int, end_byte: int) -> str: ... +``` + +Current implemented bridge status: + +- `crates/graph-sitter-py` builds a PyO3 module named `graph_sitter_py` behind the `extension-module` feature. +- `Engine.index_python_path(repo_path)` and module-level `index_python_path(repo_path)` return a compact `PythonIndex` for Python files. +- `Engine.index_python_paths(repo_path, file_paths)` and module-level `index_python_paths(repo_path, file_paths)` index an explicit Python file list. The Python shell integration uses this path so Rust sees the same `RepoOperator.iter_files(...)` selection as the current Python backend. +- `PythonIndex.summary()` returns `IndexSummary` with file, symbol, class, function, global-variable, import, import-resolution, reference, dependency, byte, line, and error counts. +- `PythonIndex.to_json()` serializes the compact Rust records for debug and benchmark use. +- `PythonIndex.files_json()`, `symbols_json()`, `imports_json()`, and `import_resolutions_json()` expose each record family without forcing callers to deserialize the full index payload. File records include repo-relative path, module name, extension-derived language, stable raw-byte content hash, byte/line counts, parse-error status, and root range. +- `PythonIndex.file_ids()`, `symbol_ids()`, `top_level_symbol_ids()`, `class_ids()`, `function_ids()`, `global_variable_ids()`, and `import_ids()` expose direct compact ID queries without record JSON deserialization. +- `TypeScriptIndex` exposes the same ID queries plus `interface_ids()`, `type_ids()`, `enum_ids()`, `namespace_ids()`, and `export_ids()`. +- `PythonIndex.references_json()` exposes compact symbol reference records. +- `PythonIndex.dependencies_json()` exposes compact dependency edge records. +- `TypeScriptIndex.function_calls_json()`, `function_calls_for_file_json(file_id)`, `function_calls_for_symbol_json(symbol_id)`, and `function_call_by_id_json(call_id)` expose compact read-only call-expression records without requiring full index JSON deserialization. +- `TypeScriptIndex.promise_chains_json()`, `promise_chains_for_file_json(file_id)`, `promise_chains_for_symbol_json(symbol_id)`, and `promise_chain_by_id_json(chain_id)` expose compact read-only Promise-chain records without requiring full index JSON deserialization. +- `PythonIndex.debug_graph_json()` and `TypeScriptIndex.debug_graph_json()` expose normalized graph-debug payloads with compact node IDs and relation metadata for parity diagnostics without materializing the Python graph. `RustIndexBackend.debug_graph_json()` and `Codebase.rust_debug_graph_json` forward the same payload through the Python shell. +- `RustIndexBackend.files`, `.symbols`, `.imports`, `.import_resolutions`, `.exports`, `.references`, and `.dependencies` parse those record-family payloads into typed Python dataclasses for shell/debug/golden-test use. +- Rust currently emits compact `ImportResolutionRecord` rows for indexed internal Python modules: direct `import pkg.mod`, absolute `from pkg.mod import Symbol`, and relative `from .mod import Symbol` forms. Target symbols now include top-level classes, functions, simple top-level globals, direct package re-exports such as `from pkg import Symbol`, and named imports through wildcard-backed package files when the wildcard chain stays inside indexed internal modules. +- Rust currently emits compact `ReferenceRecord` rows for same-file and imported top-level symbol references inside Python symbols. Nested class/function records are used as source symbols when an identifier appears inside a method or nested function. Parameters, lambda parameters, local assignment targets, local imports, `for` targets, `with ... as ...` targets, `except ... as ...` targets, comprehension targets, match-pattern captures, nested definitions, and `nonlocal` declarations shadow imported/top-level names in this pass. Comprehension targets are scoped to their comprehension expression so they do not hide later uses in the enclosing function. `global` declarations are honored so declared names continue to resolve to module-level symbols/imports. Imported module member references such as `module.some_func`, `alias.SomeClass`, `pkg.module.some_func`, and namespace-style nested module chains such as `from a import b; b.c.d()` resolve when the qualifier points to an indexed internal Python module. Bare names imported through wildcard import chains resolve when the chain exposes an indexed internal symbol; static literal `__all__` assignments restrict wildcard visibility. Other attribute field names are not treated as bare references; the object side of an attribute expression is still scanned. Full lexical scoping and full attribute/type resolution remain future work. +- Rust currently emits compact `DependencyRecord` rows by de-duplicating reference records into source-symbol to target-symbol edges with contributing reference IDs. Python coverage is broader than TypeScript; full lexical/reference coverage, external modules, and TypeScript type/interface edge parity remain future work. +- `CodebaseConfig(graph_backend="rust" | "auto")` builds a `CodebaseContext.rust_index` compact index when the extension is available and the codebase is Python or TypeScript. +- `CodebaseConfig(graph_backend="rust")` now keeps the eager Python graph unbuilt when the compact index succeeds. Raw Python graph APIs such as `CodebaseContext.nodes` remain blocked in that mode. +- `Codebase.rust_index_summary`, `.rust_files`, `.rust_symbols`, `.rust_classes`, `.rust_functions`, `.rust_global_vars`, `.rust_imports`, `.rust_import_resolutions`, `.rust_exports`, `.rust_references`, and `.rust_dependencies` expose the attached compact records for shell smoke checks and golden tests. +- `Codebase.files`, `.symbols`, `.classes`, `.functions`, `.global_vars`, `.interfaces`, `.types`, `.imports`, `.exports` for TypeScript, `get_file(...)`, `get_symbol(...)`, `get_class(...)`, and `get_function(...)` now return lightweight compact handles in strict Rust mode for Python and TypeScript codebases where the underlying compact records exist. +- Compact file handles expose basic identity/content, `import_module_name` / `get_import_string`, file-local top-level `symbols`, `classes`, `functions`, `global_vars`, dependency-backed `symbols_sorted_topologically`, `get_nodes`, `descendant_symbols`, `find_by_byte_range`, local/imported name maps through `valid_symbol_names` / `valid_import_names`, `resolve_name`, `resolve_attribute`, `get_node_by_name`, `imports`, `import_statements`, import lookup helpers, TypeScript `exports`, `export_statements`, `get_export`, named/default export filters, read-only `function_calls`, read-only TypeScript `promise_chains`, and import-resolution-backed `inbound_imports` / `importers`; `file.symbols(nested=True)` exposes nested compact records. Compact symbol handles expose basic identity/source, read-only `get_name` handles, nested `full_name`, Python `is_exported`, `get_import_string`, parent/child hierarchy queries through `parent_symbol`, `child_symbols`, and `descendant_symbols`, plus direct `dependencies`, `usages`, `symbol_usages`, read-only `function_calls`, and read-only TypeScript `promise_chains` backed by compact Rust dependency/reference/call/chain records. Compact import handles expose basic identity/source, read-only `get_name` handles, `descendant_symbols`, implemented import-resolution targets, module-import `imported_exports`, file-target `resolve_attribute`, `get_import_string`, direct `usages` / `symbol_usages` backed by compact references grouped by `import_id`, and TypeScript `is_type_import`, `is_default_import`, `is_namespace_import`, `namespace`, and `namespace_imports` predicates. Compact TypeScript export handles expose basic identity/source, read-only `get_name` handles, declared/exported/resolved symbol links, named/default/wildcard/module/re-export flags, alias checks, import-string helpers, and descendant traversal. Edit-heavy graph mutation methods are still unsupported until the full lazy engine facade exists. +- This surface is a bridge for the compact-index vertical slice. It is not yet the final lazy `CodebaseContext` backend facade and it does not yet provide full P0 `SourceFile`, `Symbol`, or `Import` parity. + +Rust can keep typed IDs internally. Python needs a compatibility `node_id: int`, so `RustGraphBackend` should maintain a per-context mapping between Python node IDs and typed Rust refs: + +- `python_node_id -> ObjectRef(kind, rust_id)` +- `ObjectRef(kind, rust_id) -> python_node_id` + +This preserves current APIs that pass `node_id` back to `ctx.get_node(...)` while avoiding assumptions that Rust IDs are globally interchangeable with today's `PyDiGraph` IDs. + +## Lazy Handle Classes + +Use a handle mixin plus concrete public-class subclasses to preserve `isinstance` behavior where practical: + +```python +class RustHandleMixin: + _ctx: CodebaseContext + _backend: RustGraphBackend + _ref: ObjectRef + _node_id: int + _generation: int + _record_cache: object | None + _materialized: Importable | None + + @property + def node_id(self) -> int: ... + def _record(self): ... + def _ensure_current(self) -> None: ... + def _materialize(self, reason: str) -> Importable: ... +``` + +Concrete handle classes: + +- `RustSourceFile(RustHandleMixin, SourceFile)` +- `RustPyFile(RustSourceFile, PyFile)` +- `RustTSFile(RustSourceFile, TSFile)` +- `RustSymbol(RustHandleMixin, Symbol)` +- `RustPySymbol`, `RustTSSymbol`, plus class/function/interface/type/global-var variants as needed for user-visible type checks +- `RustImport(RustHandleMixin, Import)` +- `RustPyImport`, `RustTSImport` +- `RustExport(RustHandleMixin, Export)`, TypeScript only at first + +These classes must not call the eager base constructors. Construction happens through a factory: + +```python +handle = backend.handle_for(ObjectRef(kind="symbol", id=42)) +``` + +The factory should use a `WeakValueDictionary` keyed by `(generation, kind, rust_id)` so repeated access can preserve object identity while alive without pinning every graph node in memory. + +Field-backed P0 properties should read from Rust records and avoid materialization: + +- common: `node_id`, `node_type`, `filepath`, `file_path`, `path`, `name`, `source`, `start_byte`, `end_byte`, `start_point`, `end_point`, `range` +- files: `content`, `content_bytes`, `extension`, `imports`, `symbols`, TypeScript `exports` +- symbols: `symbol_type`, `full_name`, `is_top_level`, `file`, `parent_symbol` when Rust has parent IDs +- imports: `module`, `symbol_name`, `alias`, `import_type`, `from_file`, `to_file`, `imported_symbol`, `resolved_symbol` +- exports: `name`, `exported_name`, `exported_symbol`, `resolved_symbol`, `is_named_export`, `is_module_export` + +Properties that need `ts_node`, `code_block`, arbitrary parent traversal, formatting-specific edit behavior, or Python-only resolver details should call `_materialize(...)` or raise in strict mode. + +## Lazy Object Lifecycle + +1. `Codebase` construction creates `CodebaseContext`. +2. `CodebaseContext` resolves the backend from config. +3. Python backend follows the existing eager graph path. +4. Rust backend builds Rust indexes and records, but no Python `SourceFile`, `Symbol`, `Import`, or `Export` objects are created during build. +5. Public list queries ask the engine for sorted IDs and wrap only those returned IDs in handles. +6. Handle metadata is loaded on first property access and cached per handle. +7. Nested queries are also ID based. For example, `file.symbols` asks Rust for symbol IDs in that file and wraps only those IDs. +8. A handle records the backend generation. After `apply_diffs`, handles either rebind through stable IDs or become outdated and follow the existing stale-node semantics. +9. If user code requests unsupported Python behavior, the handle uses the fallback policy below. + +Avoiding full materialization: + +- Do not keep `file._nodes` for Rust-backed files. Expose `get_nodes(...)` by querying Rust for IDs. +- Do not create persistent Python `tree_sitter.Node` wrappers for every record. Use ranges and source slices. +- Do not back Rust handles with `PyDiGraph` node payloads. If a compatibility `node_id` is needed, it is a facade ID, not a graph index. +- Do not call `sort_editables` on a hidden eager graph. Either engine returns stable sorted IDs, or handles expose the small set of sort fields needed by existing callers. + +## Fallback Policy + +Fallback has two levels. + +Cold fallback: + +- Used when the Rust extension is missing, the language/config is unsupported, engine build fails, or `use_pink == PinkMode.ALL_FILES`. +- If `rust_fallback == "python"` or `graph_backend == "auto"`, log the reason and build the current Python backend. +- If `rust_fallback == "error"` and `graph_backend == "rust"`, raise a `RustBackendUnavailableError` with the exact unsupported feature or import/build failure. + +Method fallback: + +- Current runtime state: strict unsupported compact-handle methods raise `RustBackendUnsupportedError` with the method, handle type, reason when available, and guidance to use `CodebaseConfig(graph_backend="python")`. +- Non-strict compact-handle fallback can promote the context to the Python graph backend, clear cached proxy results, and delegate the method to the matching Python object. The first implemented method fallback is `RustCompactFile.replace(is_regex=True)`. +- Read-only, file-local unsupported behavior can materialize one file through the current parser, locate the matching Python object by `(kind, range, name)`, and delegate the method. +- Graph-wide unsupported behavior, dependency recomputation, and resolver operations that require a populated `PyDiGraph` should promote the whole context to the Python backend unless strict mode is enabled. +- Mutations should initially prefer Python promotion. Direct Rust-handle range edits can come later as patch intents, but structural helpers such as `move_to_file`, `add_import`, `remove_unused_exports`, or usage-based `rename` need Python graph semantics until Rust owns those flows. +- On any promotion, clear Rust handle caches, increment context generation, and make old handles outdated rather than half-valid. + +Strict behavior: + +- Unsupported method access raises `RustBackendUnsupportedError(method=..., handle=..., reason=...)`; `rust_fallback == "error"` additionally makes cold fallback failures raise instead of building the Python backend. +- Tests should run some parity slices in strict mode to catch accidental Python promotion. + +## Packaging Impact + +Current packaging state: + +- `hatch.toml` uses a Hatch Cython hook to compile selected `graph_sitter.compiled` modules. +- `pyproject.toml` uses `hatchling.build`. +- `cibuildwheel` already installs Rust on Linux and macOS, but no PyO3 build hook is configured. + +Recommended packaging path: + +- Add a Rust workspace with `graph_sitter_engine` and `graph_sitter_py`. +- Publish the PyO3 module as `graph_sitter._rust` so the public package namespace stays stable. +- Keep the extension optional at import time. Default `graph_backend="python"` must work without the Rust binary. +- Use a Hatch-compatible Rust build hook or a small custom Hatch hook that invokes `maturin` for the PyO3 crate and adds the built extension to wheel artifacts. +- Add `maturin` or the selected hook to `build-system.requires` and build hook dependencies when implementation starts. +- Ensure `sdist` includes `Cargo.toml`, `Cargo.lock` if policy chooses locked builds, crate sources, and any tree-sitter grammar inputs required by Rust. +- Keep Cython modules in place. The Rust handle layer can still import `graph_sitter.compiled.sort`, `autocommit`, and `utils` for the Python backend and fallback paths. +- Start with CPython-version-specific wheels rather than `abi3` unless PyO3 and tree-sitter dependencies are confirmed compatible with `abi3`. +- Add a CI smoke job that imports `graph_sitter._rust`, checks `Engine.version()`, and builds a minimal Python fixture with `CODEBASE_GRAPH_BACKEND=rust`. + +## Initial Tests + +Config and selection: + +- `CodebaseConfig().graph_backend == "python"` keeps current behavior. +- `CODEBASE_GRAPH_BACKEND=rust` selects Rust when the extension is importable. +- `graph_backend="auto"` falls back to Python for unsupported languages/config without changing user-facing `Codebase` construction. +- `graph_backend="rust", rust_fallback="error"` raises on missing extension or unsupported feature. + +Facade parity: + +- Existing small Python fixtures: compare `files`, `symbols`, `classes`, `functions`, and `imports` names, paths, ranges, and sort order between Python and Rust backends. +- Existing small TypeScript fixtures: compare `files`, `symbols`, `classes`, `functions`, `imports`, and `exports` names, paths, ranges, and sort order. +- `get_file`, `has_file`, `get_symbol`, `get_class`, and `get_function` return compatible results. + +Lazy behavior: + +- Rust backend construction does not call eager `SourceFile.__init__`, `Symbol.__init__`, `Import.__init__`, or `Export.__init__`. +- `codebase.files` creates handles only for returned files and does not populate `ctx._graph` with Python file payloads. +- `codebase.symbols` creates top-level symbol handles only, not every parsed AST node. +- `file.symbols`, `file.imports`, and TypeScript `file.exports` only wrap IDs for that file. +- Handle properties `name`, `filepath`, `source`, `start_byte`, and `end_byte` do not materialize Python tree-sitter nodes. + +Fallback: + +- Accessing an unsupported file-local method can promote the context to the Python graph backend in non-strict fallback mode. Current runtime coverage includes regex file replacement. +- Accessing an unsupported graph-wide mutation promotes to Python backend in non-strict fallback mode. +- The same unsupported accesses raise `RustBackendUnsupportedError` in strict mode. +- Old handles become outdated after promotion or `apply_diffs`. + +Packaging: + +- Wheel build includes both existing Cython extensions and `graph_sitter._rust`. +- Importing `graph_sitter` with `graph_backend="python"` succeeds if `graph_sitter._rust` is absent. +- Importing `graph_sitter._rust` succeeds in CI wheels for supported Python versions and platforms. diff --git a/rust-rewrite/resolution-algorithms.md b/rust-rewrite/resolution-algorithms.md new file mode 100644 index 000000000..e4829c873 --- /dev/null +++ b/rust-rewrite/resolution-algorithms.md @@ -0,0 +1,316 @@ +# Resolution And Dependency Algorithm Inventory + +## Scope + +This inventory maps the current Python implementation that needs parity in the Rust rewrite. It focuses on: + +- Import resolution: `src/graph_sitter/core/import_resolution.py`, `src/graph_sitter/python/import_resolution.py`, `src/graph_sitter/typescript/import_resolution.py`, `src/graph_sitter/typescript/ts_config.py` +- Export resolution: `src/graph_sitter/core/export.py`, `src/graph_sitter/typescript/export.py`, `src/graph_sitter/core/statements/export_statement.py`, `src/graph_sitter/typescript/file.py` +- Name/scope and type-frame resolution: `src/graph_sitter/core/file.py`, `src/graph_sitter/core/function.py`, `src/graph_sitter/core/expressions/name.py`, `src/graph_sitter/core/expressions/chained_attribute.py`, `src/graph_sitter/compiled/resolution.pyx` +- Usage metadata and dependency edges: `src/graph_sitter/core/dataclasses/usage.py`, `src/graph_sitter/core/interfaces/importable.py`, `src/graph_sitter/core/interfaces/usable.py` +- Subclass/interface dependencies: `src/graph_sitter/core/interfaces/inherits.py`, `src/graph_sitter/core/symbol_groups/parents.py`, `src/graph_sitter/core/class_definition.py`, `src/graph_sitter/core/interface.py` +- Incremental recomputation: `src/graph_sitter/codebase/codebase_context.py`, especially `_process_diff_files` and `_compute_dependencies` + +## Current Graph Model + +The Python backend stores graph nodes as live Python objects in `rustworkx.PyDiGraph`. The resolver/dependency graph uses: + +| Concept | Current node/record | Important fields | +| --- | --- | --- | +| File | `SourceFile` | `node_id`, `filepath`, `_nodes`, `code_block`, `valid_symbol_names`, `valid_import_names` | +| Symbol | `Symbol` subclasses | `name`, `full_name`, `symbol_type`, `parent_symbol`, code ranges, nested `code_block` | +| Import | `Import` subclasses | `module`, `symbol_name`, `alias`, `import_type`, `_unique_node`, `to_file_id` | +| Export | `TSExport` | `name`, `exported_name`, `_declared_symbol`, `_exported_symbol`, `_value_node` | +| External module | `ExternalModule` | module/source name, originating import | +| Usage | `Usage` dataclass | `match`, `usage_symbol`, `imported_by`, `usage_type`, `kind` | + +Graph edges: + +| Edge kind | Direction | Meaning | +| --- | --- | --- | +| `IMPORT_SYMBOL_RESOLUTION` | import -> symbol/file/external | Import path/specifier resolution | +| `EXPORT` | export -> symbol/import/file | Export target resolution | +| `SUBCLASS` | class/interface -> class/interface/external | Resolved inheritance/implements relation | +| `SYMBOL_USAGE` | usage owner -> target | Dependency edge with `Usage` metadata | + +`UsageType` is an `IntFlag` with `DIRECT`, `CHAINED`, `INDIRECT`, and `ALIASED`. `UsageKind` records where the reference came from: subclass, typed parameter, type annotation, body, decorator, return type, type definition, exported symbol, wildcard export, generic, imported, wildcard import, or default value. + +## Build And Recomputation Pipeline + +`CodebaseContext._process_diff_files` is the orchestrator: + +1. Clear caches unless this is an incremental add-only update. +2. Start and wait for dependency manager/language engine if configured. +3. Normalize missing `ADD`/`REPARSE` paths into `DELETE`. +4. For deleted files, remove internal edges, unparse nodes, remove graph nodes, and collect predecessor nodes of removed nodes into `to_resolve`. +5. For reparsed files, remove internal edges, unparse children, reparse the same file node from disk, and enqueue the file plus all new nodes. +6. For added files, parse and enqueue the file plus all new nodes. +7. Rebuild directory tree and TypeScript configs. +8. For every import in `to_resolve`, remove old import-resolution edges, add new ones, and append `node.symbol_usages` to `to_resolve`. +9. For every export in `to_resolve`, remove old export edges, compute export edges, and append `node.symbol_usages` to `to_resolve`. +10. For every inherited symbol in `to_resolve`, remove old subclass edges and compute superclass dependencies. +11. Run `_compute_dependencies(to_resolve, incremental)`. + +`_compute_dependencies` is a fixed-point queue over Python objects. Each node recomputes outgoing `SYMBOL_USAGE` edges. In incremental mode, `Importable.recompute` removes old usage edges, calls `_compute_dependencies`, and returns `descendant_symbols + file.get_nodes(sort=False)`. In non-incremental mode, each fixed-point round appends every graph node not yet seen. This is correct enough for the object model, but it fans out far beyond the semantic delta. + +## Import Resolution Algorithms + +Current Rust implementation status: + +- The compact Rust Python index now builds `ImportResolutionRecord` rows for internal imports whose targets are in the selected Python file set. +- Covered forms: `import pkg.mod`, `from pkg.mod import Symbol`, `from .mod import Symbol`, and `from . import mod` when the target file or top-level symbol exists in the compact index. +- External imports intentionally remain unresolved rather than materializing external module records. +- Full parity remains open for configured `import_resolution_paths`, `py_resolve_syspath`, wildcard import expansion, package `valid_import_names`, fallback `src`/`test` roots, and every TypeScript import/export rule. + +### Shared Import Flow + +`Import.add_symbol_resolution_edge` calls the language-specific `resolve_import`: + +- If it returns `None`, the import is unresolved internally and gets an `ExternalModule` target keyed by module/source. +- If it returns `symbol`, add `IMPORT_SYMBOL_RESOLUTION` import -> symbol unless it is a self-loop. +- If it returns `imports_file=True`, add `IMPORT_SYMBOL_RESOLUTION` import -> source file. +- `imported_symbol` follows a direct import-resolution edge and, for exports, follows `EXPORT` edges until a non-export target. +- `resolved_symbol` follows chains of imports and stops on cycles. +- `names` yields one binding for normal imports, expands wildcard imports through the resolved file's `valid_import_names`, and invalidates importer files when wildcard expansion changes. + +### Python + +`PyImport.resolve_import` resolves from `module`, `symbol_name`, `alias`, and `ImportType`: + +1. Pick `base_path` from the first project or an explicit retry. +2. Convert relative dot imports to absolute dotted paths based on the current file directory. +3. For module and wildcard imports, try `base_path/module/path.py`. +4. For named imports, first try `base_path/module/path/symbol.py` to support importing a submodule as the symbol. +5. Try configured `import_resolution_paths` and optionally `sys.path` before the default graph lookup. +6. Try direct file paths, then package `__init__.py`. +7. For `module.py` or `module/__init__.py`, look up `symbol_name` through `get_node_by_name`. +8. If a symbol is missing but a wildcard import chain can provide it, return the file as `imports_file=True`. +9. If unresolved from repo root, retry with `src`, then `test` if those directories exist. +10. Otherwise return `None` and let the shared layer create/reuse an external module node. + +Python `valid_import_names` extends the base file map for `__init__.py`: child files in the package directory are importable by file stem. + +### TypeScript And JavaScript + +`TSImport` parses static imports, re-export imports, side-effect imports, namespace imports, CommonJS `require`, and dynamic `import()` forms into the same import record shape. + +`TSImport.resolve_import`: + +1. Strip quotes from the import source. +2. Translate aliases through the nearest `TSConfig` if available. +3. Mark relative imports, prepend the project base path for non-prefixed sources, and normalize relative paths against the importing file directory. +4. If the path has no extension and an index file exists, prefer `index.ts`, `index.js`, `index.tsx`, then `index.jsx`. +5. Try both the import source and its extensionless stem with extensions: empty, `.ts`, `.d.ts`, `.tsx`, `.d.tsx`, `.js`, `.jsx`. +6. If the target file exists and the import is module-like (`MODULE`, `WILDCARD`, `DEFAULT_EXPORT`, or non-type `SIDE_EFFECT`), resolve to the file. +7. For named imports, resolve to `file.get_export(symbol_name)`. If the export is missing, return the file as `imports_file=True` so module re-export search can resolve later. +8. If no file matches, return `None` for external module handling. + +`TSImport.resolved_symbol` adds TypeScript-specific hops: + +- Default imports can collapse to the single default export's resolved symbol. +- Named imports that initially resolve to a file search module imports in that file with BFS to find re-exported named exports. +- Import chains are followed until a non-import target or a cycle. + +`TSConfig` precomputes alias maps from `extends`, `compilerOptions.baseUrl`, `paths`, `rootDirs`, `outDir`, project `references`, and explicit `import_resolution_overrides`. Alias lookup uses longest-prefix matching and has an optimization to skip non-`@`/`~` imports when all aliases use those prefixes. + +## Export Resolution Algorithms + +Only TypeScript has explicit export nodes. + +`ExportStatement` parses: + +- Declaration exports: exported function, class, variable, interface, type alias, enum, namespace. +- Value exports: `export default value`, `export = value`, object literals, assignment expressions, detached expression values. +- Source re-exports: `export { x as y } from "./m"`, `export * from "./m"`, `export * as ns from "./m"`. +- Local named exports: `export { local as public }`. + +`TSExport.compute_export_dependencies` creates `EXPORT` edges: + +- If the export declared a symbol, export -> declared symbol. +- If it names an existing local/imported symbol, export -> resolved local/import node. +- If it exports a value expression that is `Chainable`, export -> each resolved value target. +- If it is a bare wildcard export, export -> current file. +- Wildcard exports invalidate import-name caches in importer files. + +`TSFile.valid_import_names` is export-centric: + +- A single default export is stored under `default`. +- Each export contributes `export.names`: explicit exported names or expanded wildcard-export names. +- TypeScript imports therefore resolve importable names through file exports, not raw file symbols. + +`TSExport.resolved_symbol` follows export/import chains until it reaches a symbol, file, or external module, while tracking cycles. `TSExport._compute_dependencies` separately records usage edges for exported symbols or exported values using `UsageKind.EXPORTED_SYMBOL`. + +## Name, Scope, And Resolution Frames + +### Lexical Lookup + +The core lookup path is recursive and object-centric: + +- `Name._resolved_types` calls `resolve_name(self.source, self.start_byte)`. +- `Editable.resolve_name` delegates to the parent scope, falling back to the file. +- `SourceFile.resolve_name` looks in `valid_symbol_names`, which combines top-level symbols keyed by full name and imports keyed by import names/wildcards. If a candidate starts after the usage byte, it scans previous file symbols backward for the closest visible definition. +- `Function.resolve_name` checks function parameters and descendant symbols in reverse source order before delegating to the parent scope. +- `PyFunction.resolve_name` special-cases method receivers: the first parameter and `super()` resolve to the parent class for non-static methods. +- `TSFunction.resolve_name` special-cases `this` to the parent class. +- `ForLoopStatement.resolve_name` can bind loop variables from the iterable's resolved generic frames. +- `Name.resolve_name` optionally expands conditional-block alternatives when `conditional_type_resolution` is enabled. + +### Resolution Frames + +`Chainable.resolved_type_frames` returns one or more `ResolutionStack` frames with cycle protection. Frames carry: + +- `node`: current target or intermediate node +- `parent_frame`: next target in the chain +- `direct`, `aliased`, `chained`: usage classification flags +- `generics`: generic substitutions discovered along the way + +`ResolutionStack.get_edges` emits `SYMBOL_USAGE` edges from the destination owner to every graph node in the resolution stack. This preserves current API behavior where a symbol can be used by an import/export intermediary and by the final callsite. The edge's `Usage` stores the exact match node, owner symbol, usage type, usage kind, and optional importer. + +### Chained Attributes And Calls + +`ChainedAttribute._resolved_types`: + +- Resolves full names directly from `file.valid_import_names` for module-style imports. +- Otherwise resolves the object, then asks the top target to `resolve_attribute(attribute)`. +- If the top target has no attributes, it still yields the top target as a chained dependency and may adjust dict generics for common methods. +- `_compute_dependencies` records chained usage edges and also computes dependencies for the object unless it is `self` or `this`. + +`FunctionCall._resolved_types` resolves calls through the function name. Constructors resolve to their parent class. Functions with return types resolve to the return type, with generic substitution where possible. Unresolved calls still yield a frame for the call itself so dependency computation can continue. `_compute_dependencies` computes argument dependencies, generic type arguments, and then either adds usages for resolved function definitions or computes the name dependency directly. + +## Subclass And Interface Dependencies + +Classes and interfaces implement `Inherits`. + +- Python classes parse `superclasses` into a `Parents` collection. +- TypeScript classes parse `extends_clause` and `implements_clause` from `class_heritage`. +- TypeScript interfaces parse `extends_type_clause` into `parent_interfaces`. +- `Parents._compute_dependencies` records normal usage dependencies for parent type expressions and generic type arguments. +- `Parents.compute_superclass_dependencies` resolves each parent expression. If exactly one resolved target is on the graph, it adds a `SUBCLASS` edge from the class/interface to that target. Ambiguous or missing parents are logged and do not get `SUBCLASS` edges. +- `Inherits._get_superclasses` and `_get_subclasses` perform BFS over `SUBCLASS` successors/predecessors, matching the current Python MRO-like traversal. + +Parity requires both edge families: `SYMBOL_USAGE` for the inheritance expression and `SUBCLASS` for inheritance traversal APIs. + +## Where The Current Algorithm Fans Out + +The main fan-out points to avoid in Rust are: + +1. `to_resolve.extend(node.symbol_usages)` during import and export passes. A changed import/export pulls all current users of that object into the recompute queue, even if only one name or target changed. +2. `Importable.recompute(incremental=True)` returns `descendant_symbols + file.get_nodes(sort=False)`. Any changed node schedules the whole file's graph nodes plus nested descendants. +3. Non-incremental `_compute_dependencies` appends every graph node not yet seen on every fixed-point round. +4. Cache invalidation is coarse: `uncache_all()` and file-level `invalidate()` drop broad Python cached properties instead of specific name/export/import indexes. +5. Wildcard imports and exports invalidate importer files by object traversal, not by changed exported-name sets. +6. TypeScript re-export search uses BFS through module imports at query time, so a missing named export can repeatedly search the same module-import frontier. +7. `valid_symbol_names` and `valid_import_names` are derived from live object lists and can expand wildcard imports into many object wrappers. + +The Rust engine should compute semantic deltas first and only enqueue relations whose inputs changed. + +## Required Rust Tables And Indexes + +### Canonical Records + +| Record | Required fields | +| --- | --- | +| `FileRecord` | `FileId`, path ID, language, content hash, parser generation, tsconfig ID, root range | +| `ScopeRecord` | `ScopeId`, file ID, parent scope, owner node, kind, range, hoist behavior | +| `SymbolRecord` | `SymbolId`, file ID, scope ID, name ID, full-name ID, kind, parent symbol, declaration range, body range | +| `ImportRecord` | `ImportId`, file ID, scope ID, module specifier ID, symbol name ID, alias ID, import type, statement range, specifier range | +| `ExportRecord` | `ExportId`, file ID, export name ID, declared symbol/import ID, local exported symbol name ID, value expression ID, export kind, statement range | +| `UsageSiteRecord` | `UsageSiteId`, file ID, scope ID, owner node ID, expression node ID, name/full-name IDs, match range, usage kind | +| `ExternalModuleRecord` | `ExternalId`, module specifier ID, import name ID | +| `GraphEdge` | source ID, target ID, edge kind, optional usage ID | +| `UsageRecord` | usage site, owner node, target node, imported-by import ID, usage type, usage kind, match range | + +### Lookup Indexes + +| Index | Purpose | +| --- | --- | +| `path_to_file` and `module_key_to_file` | O(1) candidate file lookup for Python/TS import paths and package/index files | +| `file_to_nodes`, `file_to_imports`, `file_to_exports`, `file_to_scopes` | Fast deletion/reparse and debug dumps | +| `scope_parent`, `scope_children`, `binding_by_scope_name` | Lexical name lookup without parent object recursion | +| `binding_visibility_by_name` | Resolve nearest visible binding before a usage byte | +| `file_importable_name` | `valid_import_names` equivalent for each file | +| `wildcard_import_expansion` and `wildcard_export_expansion` | Cache expanded names with source file/export generation | +| `import_resolution` | Import -> target file/symbol/export/external and reverse target -> imports | +| `export_target` | Export -> symbol/import/file/external and reverse target -> exports | +| `usage_by_owner`, `usage_by_target`, `usage_by_match` | Dependency queries, usages API, rename callsites | +| `edge_by_source_kind`, `edge_by_target_kind` | Efficient graph deletes and parity dumps | +| `subclass_succ`, `subclass_pred` | Superclass/subclass APIs | +| `tsconfig_for_file`, `alias_prefix_to_imports` | Narrow TypeScript alias invalidation | +| `unresolved_by_name`, `external_by_key` | Revisit unresolved references only when matching names/modules appear | + +## Compact Frontier And Invalidation Rules + +### Semantic Deltas + +For each changed file, compute deltas before invalidating dependents: + +- `PathDelta`: file added/deleted/moved or extension/index/package status changed. +- `ConfigDelta`: nearest tsconfig, alias map, baseUrl, paths, or references changed. +- `ImportDelta`: import specifier/module/type/alias changed, added, or removed. +- `ExportNameDelta`: importable names added/removed/retargeted for a file. +- `BindingDelta`: lexical bindings added/removed/renamed/retargeted by `(scope, name, visibility range)`. +- `UsageSiteDelta`: identifier/chained-attribute/function-call sites added/removed/changed owner or range. +- `InheritanceDelta`: parent type expressions or generic args changed. + +### Work Queues + +Use separate queues instead of one object queue: + +1. `ResolveImports`: import IDs whose module candidate set or specifier fields changed. +2. `ResolveExports`: export IDs whose declared/local/import target changed, plus wildcard re-exporters of changed export names. +3. `ResolveNames`: usage sites whose lexical binding candidates changed by name/scope/range. +4. `BuildUsageEdges`: usage sites whose resolution stack changed. +5. `BuildSubclassEdges`: inheritance expressions whose resolved target changed. +6. `PropagateNameExports`: files whose `file_importable_name` set changed. + +### Frontier Rules + +- A changed import spec enqueues only that import for path resolution, then only usage sites bound to that import alias/name. +- A changed file path enqueues imports whose precomputed candidate path set includes the old or new path, plus unresolved imports with matching module suffix. +- A tsconfig change enqueues imports in files covered by that config and imports whose specifier matches changed alias prefixes. +- A file's `ExportNameDelta` enqueues imports targeting that file/name, wildcard imports from that file, and wildcard re-exporters whose expansion includes changed names. +- A `BindingDelta(scope, name)` enqueues usage sites with the same name in descendant scopes whose lookup path crosses the changed scope and whose usage byte is after the binding visibility point. +- A local symbol body change with no binding/import/export/name-set delta only enqueues usage sites inside that symbol owner. +- A parent class/interface expression change enqueues only that class/interface for `SUBCLASS` rebuild and its inheritance-expression usage edges. +- A target deletion enqueues reverse dependents from `usage_by_target`, `import_resolution` reverse index, `export_target` reverse index, and `subclass_pred`, but filtered by changed names where possible. + +The Rust fixed point should operate on relation generations: if a queue item recomputes to the same normalized output tuple, do not enqueue its dependents. + +## Rust Port Plan + +1. Extract compact import/export/scope/usage IR alongside the Python backend and produce debug snapshots without changing behavior. +2. Implement Python import path resolution in Rust with a candidate-path trace for parity debugging. +3. Implement TypeScript import path resolution, including tsconfig alias maps, index files, extension permutations, dynamic imports, and external module records. +4. Implement TypeScript export target resolution and file importable-name tables, including wildcard re-export expansion. +5. Implement lexical scope tables and name lookup for file, function, class, parameter, loop, `self`/`super()`, `this`, and conditional-resolution cases. +6. Implement resolution-stack edge emission so normalized `SYMBOL_USAGE` edges include intermediate import/export nodes and the current `UsageType`/`UsageKind`. +7. Implement `SUBCLASS` edge construction from parent/interface expressions and BFS query indexes for superclass/subclass APIs. +8. Add incremental relation generations and the compact work queues above. +9. Expose graph debug dumps through PyO3: nodes, imports, exports, usage sites, resolution stacks, and normalized edges. +10. Keep Python object APIs as wrappers over IDs only after graph edge parity is proven. + +## Edge Parity Tests + +Add Rust-vs-Python golden snapshots using normalized tuples: + +```text +(source_kind, source_file, source_range, source_name, + edge_kind, + target_kind, target_file, target_range, target_name, + usage_type, usage_kind, match_file, match_range, match_text, imported_by_key) +``` + +Required parity categories: + +| Category | Fixtures to cover | +| --- | --- | +| Python imports | module, named, aliased, wildcard, relative dots, package `__init__.py`, custom resolve paths, `src`/`test` fallback, external modules | +| TypeScript imports | default, named, alias, namespace, side-effect, `require`, dynamic import, directory index, extension fallback, tsconfig paths/baseUrl/references, external modules | +| TypeScript exports | declaration exports, default exports, `export =`, object value exports, named local exports, named re-exports, wildcard re-exports, aliased wildcard exports, type-only exports | +| Usage types | direct same-file references, imported references, indirect re-export chains, aliased imports/exports, chained module/class/namespace references | +| Usage kinds | body, decorator, subclass, generic, type annotation, typed parameter, return type, type definition, exported symbol, imported, default value | +| Name/scope | nested functions, parameter shadowing, definitions after usage, class methods, Python `self` and `super()`, TypeScript `this`, loop variables, conditional blocks | +| Subclass/interface | Python class bases, TS `extends`, TS `implements`, interface `extends`, generic parent types, external/ambiguous parents | +| Incremental | add file, delete file, reparse no-op, rename import target, change exported name, wildcard export name delta, tsconfig alias delta | + +Existing tests already cover many behavior assertions under `tests/unit/sdk/python/import_resolution`, `tests/unit/sdk/typescript/import_resolution`, `tests/unit/sdk/typescript/export`, `tests/unit/sdk/python/class_definition/test_class_dependencies.py`, `tests/unit/sdk/typescript/class_definition/test_class_dependencies.py`, `tests/unit/sdk/typescript/interface/test_interface_dependencies.py`, `tests/unit/sdk/python/file/test_file_reparse.py`, and `tests/unit/sdk/python/codebase/test_codebase_reset.py`. The Rust parity layer should reuse those fixture shapes and compare graph-edge snapshots directly. diff --git a/rust-rewrite/skill-distribution-plan.md b/rust-rewrite/skill-distribution-plan.md new file mode 100644 index 000000000..b0552d477 --- /dev/null +++ b/rust-rewrite/skill-distribution-plan.md @@ -0,0 +1,146 @@ +# Graph-Sitter Skill Distribution Plan + +## Purpose + +Distribute a Codex skill that helps agents use Graph-sitter as the codebase parsing and transformation layer for large repositories. + +The skill should make the agent choose Graph-sitter when it needs to: + +- parse a Python or TypeScript/JavaScript codebase into files, symbols, imports, exports, references, dependencies, and usage relationships +- inspect a large repository without eagerly materializing a full Python object graph when the Rust backend is supported +- run deterministic transformations through Graph-sitter codemods +- verify transformation output with tests, diffs, and graph-free cache invariants + +The skill is not the public product docs. It is an agent operating guide that points to the library, CLI, docs, and validation commands. + +## Skill Name + +Recommended folder name: `graph-sitter` + +Recommended frontmatter: + +```yaml +--- +name: graph-sitter +description: Use Graph-sitter to parse, query, analyze, and transform Python, TypeScript, JavaScript, and React codebases. Trigger when an agent needs semantic codebase graphs, dependency/import/reference analysis, codemod execution, large-repo Rust-backed parsing, or `uvx graph-sitter ...` workflows for code transformations. +--- +``` + +## Initial Distribution Location + +Do not create the discoverable skill folder until the install location is decided. + +Options: + +- user-local development: `${CODEX_HOME:-$HOME/.codex}/skills/graph-sitter` +- repository artifact for review: `skills/graph-sitter` +- packaged artifact for a marketplace or release bundle: generated from the repository artifact at release time + +Skill-creator guidance prefers asking before initialization. If the user wants this installed immediately, run the system `skill-creator` initialization flow rather than hand-writing the final folder. + +## Proposed Skill Contents + +Recommended minimal tree: + +```text +graph-sitter/ +├── SKILL.md +├── agents/ +│ └── openai.yaml +└── references/ + ├── cli.md + ├── rust-backend.md + └── codemods.md +``` + +Keep `SKILL.md` short and procedural. Put longer details in `references/` so agents only load what they need. + +## SKILL.md Body Outline + +The skill body should include: + +1. Check the user's goal: + - read-only graph query + - code transformation/codemod + - large-repo benchmark or parity proof + - docs/setup troubleshooting +2. Prefer the CLI for simple command-line workflows once available: + - `uvx graph-sitter parse --language auto --backend python --format json` + - `uvx graph-sitter parse --language auto --backend rust --format json` for wheels built from rust-rewrite; published-package availability still depends on release + - `uvx graph-sitter transform MODULE:OBJECT --check` + - `uvx graph-sitter transform MODULE:OBJECT --write` +3. Prefer the Python API for custom analyses: + - `from graph_sitter import Codebase` + - `Codebase(path, config=CodebaseConfig(graph_backend=GraphBackend.RUST, rust_fallback=RustFallbackMode.ERROR))` + - fall back to the Python backend only when strict Rust mode reports an unsupported surface and compatibility matters more than memory. +4. For transformations: + - run or create codemods under `.codegen/codemods` + - inspect diffs before applying broad changes + - run the target repo's tests or focused checks after edits +5. For large repos: + - avoid broad APIs that intentionally materialize all records unless the user asks for them + - prefer targeted lookups (`get_file`, `get_function`, `get_import`, `find_by_byte_range`, known symbol dependency probes) + - record wall time and RSS when making performance claims + +## Reference Files + +### `references/cli.md` + +Purpose: agent-facing command reference after Lovelace finalizes the CLI. + +Must cover: + +- existing `gs` command status +- implemented `uvx graph-sitter ...` command surface +- parse/index command examples +- transformation command examples +- JSON output contract and exit-code expectations +- backend flags and fallback flags + +Important product direction: commemorate `uvx graph-sitter ...` as the primary one-shot interface for parsing a codebase and running transformations. Existing `gs` docs remain relevant as the backwards-compatible alias and for initialized workspace workflows. + +### `references/rust-backend.md` + +Purpose: explain current Rust backend status and proof commands. + +Must cover: + +- `GraphBackend.PYTHON`, `GraphBackend.RUST`, and `GraphBackend.AUTO` +- `RustFallbackMode.ERROR` versus `RustFallbackMode.PYTHON` +- strict compact mode behavior: supported APIs stay graph-free, unsupported APIs fail explicitly +- fast check: `rust-rewrite/tools/check_fast.sh` +- large-repo check: `rust-rewrite/tools/check_pinned_large_repos.sh` +- current proof repos: Apache Airflow `2.10.5` and Next.js `v15.0.0` +- caution: parity with the old backend is not absolute semantic correctness + +### `references/codemods.md` + +Purpose: give agents a compact codemod workflow. + +Must cover: + +- `gs init`, `gs create`, and `gs run` for initialized workspace codemods +- `uvx graph-sitter transform MODULE:OBJECT --check|--write` for ad hoc import-path transforms +- `Codebase.commit(sync_graph=False)` for compact Rust-backed mutation proofs +- inspecting changed files with `git diff` +- running focused tests after transformations + +## Validation + +Before distributing the skill: + +- Run the system skill validation script against the final skill folder: + - `scripts/quick_validate.py ` +- Forward-test with at least two fresh agents: + - read-only task: "Use Graph-sitter to list imports and dependencies for this tiny repo." + - mutation task: "Use Graph-sitter to rename a function and update imports in this tiny repo." +- Verify the skill does not imply the Rust backend is universally correct. It should say current claims are compatibility/performance proofs over the supported subset plus selected pinned large-repo parity. + +## Release Gates + +The skill should ship only after: + +- docs/site plan confirms where public setup docs live +- `uvx graph-sitter ...` command surface is implemented for Python-backend parse, Rust-backend parse from built wheels, and import-path transforms +- at least one parse workflow and one transform workflow are documented with commands that pass locally through `uvx --from ` and `uvx --from dist/.whl` +- `rust-rewrite/tools/check_fast.sh` passes on the release branch diff --git a/rust-rewrite/skill-prototype/graph-sitter/SKILL.md b/rust-rewrite/skill-prototype/graph-sitter/SKILL.md new file mode 100644 index 000000000..190e9b59c --- /dev/null +++ b/rust-rewrite/skill-prototype/graph-sitter/SKILL.md @@ -0,0 +1,92 @@ +--- +name: graph-sitter +description: Use Graph-sitter to parse, query, analyze, and transform Python, TypeScript, JavaScript, and React codebases. Trigger when Codex needs codebase graph APIs, dependency/import/reference/usage analysis, codemod execution, large-repo Rust-backed parsing status, or `uvx graph-sitter ...` / local `graph-sitter` CLI workflows. +--- + +# Graph-sitter + +Use Graph-sitter as the codebase graph and codemod layer. Prefer the Python API for custom analysis and transformations; use the CLI for simple parse summaries, repeatable codemod runs, and import-path transforms. + +## Load References + +- Read `references/cli.md` when using `graph-sitter`, `uvx graph-sitter`, `parse`, `run`, or `transform`. +- Read `references/codemods.md` before writing or running a codemod. +- Read `references/rust-backend.md` before making Rust backend, performance, parity, fallback, or wheel-distribution claims. + +## Choose The Path + +1. For read-only inspection, start with `Codebase` in Python unless a JSON CLI summary is enough. +2. For one-shot command-line summaries, run `graph-sitter parse` locally from the checkout with `uv run`, or use `uvx graph-sitter parse` only when the package is installed or supplied with `uvx --from`. +3. For transformations, prefer `graph-sitter transform MODULE:OBJECT PATH --check` before `--write` when the transform is not already registered under `.codegen/codemods`. +4. For large repositories, avoid broad list materialization unless the user asks for it. Prefer targeted lookups such as `get_file`, `get_function`, `get_import`, `find_by_byte_range`, dependencies, and usage probes. +5. For unsupported Graph-sitter surfaces, fall back to ordinary code inspection or the Python backend. Do not imply the Rust backend has complete semantic coverage. + +## Python API + +Use the Python shell as the primary interface for custom work: + +```python +from graph_sitter import Codebase + +codebase = Codebase("/path/to/repo") +file = codebase.get_file("src/app.py") +symbol = file.get_function("main") +print([dep.name for dep in symbol.dependencies]) +``` + +For strict local Rust-backend checks, import config enums from their current module: + +```python +from graph_sitter import Codebase +from graph_sitter.configs.models.codebase import CodebaseConfig, GraphBackend, RustFallbackMode + +codebase = Codebase( + "/path/to/repo", + config=CodebaseConfig( + graph_backend=GraphBackend.RUST, + rust_fallback=RustFallbackMode.ERROR, + ), +) +``` + +Use strict Rust mode only when the PyO3 extension is built in the local environment and the task stays inside the supported subset. The Python backend remains the default and the primary compatibility shell. + +## CLI Shortcuts + +For local source checkouts: + +```bash +uv run graph-sitter parse /path/to/repo --backend python --format json +uv run graph-sitter transform ./codemod.py:run /path/to/repo --check +``` + +For distributed or installed package flows: + +```bash +uvx graph-sitter parse /path/to/repo --backend python --format json +uvx --from dist/.whl graph-sitter parse /path/to/repo --backend rust --fallback error --format json +uvx graph-sitter transform ./codemod.py:run /path/to/repo --check +``` + +The `parse` command and `transform MODULE:OBJECT` surface are implemented locally in the rust-rewrite branch. Rust backend execution from wheels built on this branch is supported by the wheel smoke; published-package availability still depends on release. + +## Correctness And Claims + +- Say "supported Rust-backend subset" or "selected pinned large-repo parity" when summarizing Rust status. +- Do not claim absolute semantic correctness or graph-wide parity. +- Current large-repo proofs cover selected Airflow and Next.js paths, supported-subset tests, and readiness gates. Use the local validation tools before making stronger claims. +- When measuring performance, record backend, command, repo/ref, wall time, max RSS, and whether broad Python-side caches were materialized. + +## Validation + +For Graph-sitter work, validate the exact thing changed: + +```bash +uv run graph-sitter parse /path/to/repo --backend python --format json +uv run graph-sitter transform ./codemod.py:run /path/to/repo --check +git diff -- /path/to/repo +``` + +For Rust-backend branch validation, prefer the repository gates documented in `references/rust-backend.md`; do not use pinned large-repo checks unless the user asks or the change touches Rust backend behavior. + +When maintaining this skill artifact, run the Codex skill validator against the `graph-sitter` skill folder before distribution. diff --git a/rust-rewrite/skill-prototype/graph-sitter/agents/openai.yaml b/rust-rewrite/skill-prototype/graph-sitter/agents/openai.yaml new file mode 100644 index 000000000..dca5b6996 --- /dev/null +++ b/rust-rewrite/skill-prototype/graph-sitter/agents/openai.yaml @@ -0,0 +1,4 @@ +interface: + display_name: "Graph-sitter" + short_description: "Use codebase graphs for codemods" + default_prompt: "Use $graph-sitter to analyze this repository and plan a safe code transformation." diff --git a/rust-rewrite/skill-prototype/graph-sitter/references/cli.md b/rust-rewrite/skill-prototype/graph-sitter/references/cli.md new file mode 100644 index 000000000..9e8bca15e --- /dev/null +++ b/rust-rewrite/skill-prototype/graph-sitter/references/cli.md @@ -0,0 +1,91 @@ +# CLI Reference + +Use this when the user asks for `graph-sitter`, `gs`, `uvx graph-sitter`, `parse`, `run`, or `transform` workflows. + +## Current Local Surface + +The rust-rewrite branch exposes both console scripts: + +```bash +uv run gs --help +uv run graph-sitter --help +``` + +Use `graph-sitter` for new examples and keep `gs` as the compatibility alias. `graph_sitter.cli.cli:main` is the public CLI entry point; do not use `graph_sitter.gscli` for the `uvx graph-sitter` surface. + +## Doctor + +Check local/package readiness before parsing or transforming: + +```bash +uv run graph-sitter doctor --json +uv run graph-sitter doctor --backend rust --language python --json +uv run graph-sitter doctor --backend rust --language typescript --json +``` + +For installed package flows, replace `uv run` with `uvx graph-sitter` or +`uvx --from dist/.whl graph-sitter`. `--backend python` reports Python, +package, platform, parser dependencies, and Rust extension availability without +requiring the Rust extension. `--backend rust` also runs a generated tiny-repo +strict Rust parse smoke and fails if the extension or parse path is unavailable. + +## Parse + +Local source checkout: + +```bash +uv run graph-sitter parse [PATH] --backend python --language auto --format json +uv run graph-sitter parse [PATH] --backend python --language auto --subdir src --format json +``` + +Installed/distributed package path: + +```bash +uvx graph-sitter parse [PATH] --backend python --language auto --format json +uvx --from dist/.whl graph-sitter parse [PATH] --backend rust --fallback error --format json +``` + +Supported options in this branch: + +- `--backend python|rust|auto` +- `--fallback python|error` +- `--language auto|python|typescript` +- `--format summary|json` +- `--subdir PATH` repeatable; limits parsing to repository-relative + subdirectories or files and works for both Python and Rust backend discovery. + +The command does not require `.codegen` initialization. Use `--backend python` for published-package examples until a release ships the new wheels; use `uvx --from dist/.whl ... --backend rust --fallback error` for branch-built wheel validation. + +## Transform By Import Path + +Run ad hoc functions or `Codemod.execute` classes/instances without `.codegen/codemods` registration: + +```bash +uv run graph-sitter transform MODULE:OBJECT [PATH] --check +uv run graph-sitter transform ./codemod.py:run [PATH] --check +uv run graph-sitter transform ./codemod.py:MyCodemod [PATH] --write +``` + +For installed package flows, replace `uv run` with `uvx graph-sitter`. + +Useful options: + +- `--check`: run in a temporary copied-repo sandbox, print the diff, leave the target unchanged, and exit non-zero when changes would be produced. +- `--write`: apply changes to the target repo. Import-path `transform` requires either `--check` or `--write`; use explicit modes in instructions. +- `--arguments '{"key":"value"}'`: pass JSON to a transform with an `arguments` parameter; Pydantic models are validated when present. +- `--backend python|rust|auto`, `--fallback python|error`, `--language auto|python|typescript`. + +## Registered Codemods + +For existing `.codegen/codemods` functions: + +```bash +uv run graph-sitter run LABEL [PATH] --check +uv run graph-sitter run LABEL [PATH] --write +``` + +`gs init`, `gs create`, `gs list`, and `gs notebook` remain compatibility commands. + +## Distribution Status + +The `uvx graph-sitter ...` command direction is correct for public one-shot usage, and `parse` plus `transform MODULE:OBJECT` are implemented locally. Branch-built wheels bundle and import the PyO3 extension; published package examples should wait for release validation before promising Rust-backed `uvx`. diff --git a/rust-rewrite/skill-prototype/graph-sitter/references/codemods.md b/rust-rewrite/skill-prototype/graph-sitter/references/codemods.md new file mode 100644 index 000000000..df08233bf --- /dev/null +++ b/rust-rewrite/skill-prototype/graph-sitter/references/codemods.md @@ -0,0 +1,49 @@ +# Codemod Reference + +Use this before writing or running transformations. + +## Import-Path Transform + +Create a small module with a function that accepts `codebase`: + +```python +def run(codebase): + target = codebase.get_function("old_name") + target.rename("new_name") + codebase.commit() +``` + +Check first: + +```bash +uv run graph-sitter transform ./codemod.py:run /path/to/repo --check +``` + +Apply only after reviewing the diff: + +```bash +uv run graph-sitter transform ./codemod.py:run /path/to/repo --write +``` + +The transform loader also accepts `Codemod` subclasses or instances with `execute(codebase)`. + +## Registered Workspace Codemods + +Use the existing `.codegen/codemods` flow when a repo already has registered Graph-sitter functions: + +```bash +uv run graph-sitter run LABEL /path/to/repo --check +uv run graph-sitter run LABEL /path/to/repo --write +``` + +`gs init` and `gs create LABEL` remain available for compatibility. + +## Safety Workflow + +1. Inspect `git status --short` before editing. +2. Run the codemod with `--check` and inspect the diff. +3. Use `--write` only when the diff matches intent. +4. Run focused tests, type checks, or linters for touched files. +5. Inspect `git diff` after the write. + +For strict Rust compact mutation proofs, use `codebase.commit(sync_graph=False)` when the test needs to prove the Python graph stayed unmaterialized. For ordinary user codemods, default `codebase.commit()` is fine unless the repo already uses a stricter pattern. diff --git a/rust-rewrite/skill-prototype/graph-sitter/references/rust-backend.md b/rust-rewrite/skill-prototype/graph-sitter/references/rust-backend.md new file mode 100644 index 000000000..621d364e1 --- /dev/null +++ b/rust-rewrite/skill-prototype/graph-sitter/references/rust-backend.md @@ -0,0 +1,70 @@ +# Rust Backend Reference + +Use this before making Rust backend, performance, fallback, parity, or distribution claims. + +## Backend Modes + +Configure through `CodebaseConfig`: + +```python +from graph_sitter.configs.models.codebase import CodebaseConfig, GraphBackend, RustFallbackMode + +CodebaseConfig(graph_backend=GraphBackend.PYTHON) +CodebaseConfig(graph_backend=GraphBackend.RUST, rust_fallback=RustFallbackMode.ERROR) +CodebaseConfig(graph_backend=GraphBackend.RUST, rust_fallback=RustFallbackMode.PYTHON) +CodebaseConfig(graph_backend=GraphBackend.AUTO) +``` + +- `PYTHON` remains the default and primary compatibility backend. +- `RUST` is opt-in compact mode. In strict mode, supported APIs stay graph-free and unsupported APIs raise `RustBackendUnsupportedError`. +- `rust_fallback=PYTHON` is a compatibility escape hatch; unsupported methods may promote to the Python graph. +- `AUTO` is reserved for gradual rollout when language coverage and packaging are stable enough. + +## Current Distribution Status + +Local Rust mode requires the PyO3 extension to be built and importable. Wheels built from the rust-rewrite branch now bundle the top-level `graph_sitter_py` module, and the distribution proof is: + +```bash +rust-rewrite/tools/check_wheel_rust_backend.sh +``` + +Use `--backend python` in published-package examples until a release ships these wheels. Use `uvx --from dist/.whl graph-sitter parse ... --backend rust --fallback error` for branch-built wheel validation. + +## Supported Claims + +Use careful wording: + +- "supported Rust-backend subset" +- "selected pinned large-repo semantic parity" +- "selected Airflow and Next.js readiness proofs" +- "Python backend remains default until rollout gates and published-package validation pass" + +Avoid these claims: + +- absolute semantic correctness +- complete graph-wide parity +- Rust backend is ready as the default +- `uvx --backend rust` works from PyPI before a release ships the Rust-backed wheels + +## Local Validation Gates + +Fast branch validation: + +```bash +rust-rewrite/tools/check_fast.sh +``` + +Large-repo validation when Rust backend behavior or performance claims change: + +```bash +rust-rewrite/tools/check_pinned_large_repos.sh +``` + +Targeted proof tools include: + +- `rust-rewrite/tools/check_supported_subset.py` +- `rust-rewrite/tools/check_p0_parity_coverage.py` +- `rust-rewrite/tools/check_python_rust_parity_fixture.py` +- `rust-rewrite/tools/check_pinned_semantic_parity.py` + +Only run the large pinned checks when needed; they are heavier than the skill prototype validation. diff --git a/rust-rewrite/strategy.md b/rust-rewrite/strategy.md new file mode 100644 index 000000000..fc4646785 --- /dev/null +++ b/rust-rewrite/strategy.md @@ -0,0 +1,612 @@ +# Rust Rewrite Strategy + +## Goal + +Replace the memory-heavy Python object graph with a compact Rust engine while preserving the current Python-facing API. The Python shell should remain the user and codemod interface; Rust should own parsing, indexing, symbol/import/export resolution, dependency graph storage, and eventually incremental invalidation. + +The main problem to solve is not just CPU time. The current architecture eagerly materializes the codebase as many Python objects, keeps tree-sitter nodes and parent/context/file links on those objects, stores the same objects in `rustworkx.PyDiGraph`, and maintains additional per-file node lists and range indexes. On very large repos this can inflate into tens of GB of resident memory. + +## Strategy + +Build a Rust core behind the existing Python API: + +1. Keep `Codebase`, `SourceFile`, `Symbol`, `Import`, `Export`, and related Python classes as compatibility handles. +2. Move canonical storage into Rust: + - interned paths, strings, import specifiers, symbol names + - compact `FileId`, `NodeId`, `SymbolId`, `ImportId`, `ExportId`, `EdgeId` + - arena/slotmap-backed records instead of Python objects + - adjacency tables or compressed graph storage instead of `PyDiGraph` payloads + - byte ranges and kind enums instead of persistent Python `tree_sitter.Node` wrappers for every node +3. Create Python wrappers lazily only when user code asks for them. +4. Run graph queries in Rust and return IDs or compact records; Python adapts those into existing objects/lists. +5. Port incrementally behind a backend flag, keeping the Python backend available until parity is proven. + +## Non-Goals + +- Do not rewrite the public codemod API first. +- Do not translate every Python class one-for-one into Rust. +- Do not make Rust own all edit formatting in the first slice. +- Do not remove the current Python backend until large-repo memory and parity targets are met. + +## Current Hot Spots To Replace + +- `CodebaseContext` owns a `rustworkx.PyDiGraph` of Python node payloads. +- `SourceFile` eagerly parses and stores all parsed nodes in `_nodes`. +- `Editable` objects keep `ts_node`, `ctx`, `parent`, and file/node IDs. +- Initial graph build parses every source file and then runs import/export/dependency passes over the aggregate node set. +- Dependency recomputation uses object methods and fixed-point list expansion rather than compact indexed frontiers. +- Public queries such as `codebase.symbols`, `codebase.imports`, and `codebase.files` materialize Python lists by filtering graph nodes. + +## Target Architecture + +### Rust Crates + +- `graph_sitter_engine` + - core data model + - tree-sitter parsing + - compact indexes + - import/export/name/scope resolution + - dependency graph + - incremental invalidation + - debug dumps and benchmark hooks +- `graph_sitter_py` + - PyO3 bindings + - backend facade used by Python `CodebaseContext` + - lazy handle constructors + +### Python Integration + +- Add a backend option such as `CodebaseConfig(graph_backend="python" | "rust")`. +- Introduce an engine facade under `CodebaseContext`. +- Keep current Python objects for compatibility, but make Rust-backed versions hold IDs instead of owning canonical state. +- Keep the existing transaction manager initially; Rust should provide ranges and patch intents, not own all formatting in phase 1. + +### Data Model + +Minimum records for the vertical slice: + +- `FileRecord`: path ID, language, content hash, root range, per-file node ranges +- `SymbolRecord`: file ID, name ID, full-name ID, kind, parent symbol, scope, range, declaration range +- `ImportRecord`: file ID, module/name/alias IDs, import kind, range, statement range +- `ExportRecord`: file ID, exported name, target symbol/import/file, range +- `UsageRecord`: file ID, source node, target node, usage kind/type, match range +- `GraphEdge`: source ID, target ID, edge kind, optional usage ID + +## Multi-Agent Work Convention + +This file is the shared coordination ledger for helper agents. + +- Every task must be represented as a Markdown checkbox line. +- Use `[ ]` for open or claimed work and `[x]` for completed work. +- To claim a task, append `owner: ` to the same checkbox line. +- To mark a task blocked, keep it unchecked and append `BLOCKED: `. +- When completing a task, change `[ ]` to `[x]` and append a short result note. +- Add new tasks under the relevant phase rather than creating a separate tracking file. +- Each agent should append a short entry to `Agent Log` when it starts or finishes meaningful work. +- Avoid broad edits to sections owned by another active agent; add notes instead. +- Keep implementation-specific findings near the task they affect. + +Recommended task format: + +```md +- [ ] Short imperative task title. owner: agent-name. Notes: current finding or next action. +- [x] Completed task title. owner: agent-name. Result: concise outcome. +``` + +## Agent Hierarchy + +- [ ] Lead/RFC agent: maintain this strategy, define interfaces, arbitrate scope, and keep phases coherent. +- [ ] Benchmark agent: measure current memory/time by phase on small, medium, and huge repos. +- [ ] API inventory agent: enumerate public APIs and classify P0/P1/P2 compatibility requirements. +- [ ] Rust data-model agent: design compact arenas, IDs, interners, and graph storage. +- [ ] Parser/index agent: implement Rust tree-sitter extraction into compact IR. +- [ ] Resolver agent: port import, export, scope, name, superclass, and dependency resolution. +- [ ] PyO3 binding agent: expose Rust engine operations to the existing Python package. +- [ ] Incremental agent: design file add/reparse/delete invalidation and stable ID behavior. +- [ ] Parity/test agent: run existing tests against both backends and build golden graph snapshots. +- [ ] Packaging/CI agent: integrate Rust builds with the current hatch/Cython packaging and CI. +- [ ] Docs/site agent: define accurate setup docs, landing-page messaging, and Vercel deployment path. +- [ ] CLI/distribution agent: define the `uvx graph-sitter ...` command surface and packaging path. + +## Active Worktrees + +- [x] Benchmarks/profiling. owner: Poincare. Agent: `019edc37-802c-7223-8d37-75a51b65abbd`. Branch: `codex/rust-rewrite-benchmarks`. Worktree: `/Users/jayhack/CS/CODEGEN/graph-sitter-rust-benchmarks`. Result: benchmark plan and Python backend harness committed. +- [x] API inventory. owner: Dewey. Agent: `019edc37-82ff-7b92-9fac-5364e2d8098b`. Branch: `codex/rust-rewrite-api-inventory`. Worktree: `/Users/jayhack/CS/CODEGEN/graph-sitter-rust-api-inventory`. Result: P0/P1/P2 API compatibility inventory committed. +- [x] Rust data model. owner: Pasteur. Agent: `019edc37-859c-71b2-b884-ab7a2bfc707e`. Branch: `codex/rust-rewrite-data-model`. Worktree: `/Users/jayhack/CS/CODEGEN/graph-sitter-rust-data-model`. Result: compact Rust-side schema and migration risks committed. +- [x] Parser/index vertical slice. owner: Meitner. Agent: `019edc37-8867-7a83-a18e-b0ec0ca29d11`. Branch: `codex/rust-rewrite-parser-index`. Worktree: `/Users/jayhack/CS/CODEGEN/graph-sitter-rust-parser-index`. Result: parser/index extraction plan committed. +- [x] Resolver/dependency algorithms. owner: Gauss. Agent: `019edc37-8c34-7f93-b0ae-746cbd579962`. Branch: `codex/rust-rewrite-resolver`. Worktree: `/Users/jayhack/CS/CODEGEN/graph-sitter-rust-resolver`. Result: resolver algorithm inventory and Rust port plan committed. +- [x] Rust engine skeleton. owner: Beauvoir. Agent: `019edc37-8f2d-7dd3-b3ed-a1f9e1b191a7`. Branch: `codex/rust-rewrite-engine-skeleton`. Worktree: `/Users/jayhack/CS/CODEGEN/graph-sitter-rust-engine-skeleton`. Result: standalone Cargo workspace and smoke tests committed. +- [x] PyO3/Python compatibility. owner: Wegener. Agent: `019edc4e-72b1-7a00-8644-e43503f0cdc3`. Branch: `codex/rust-rewrite-pyo3-compat`. Worktree: `/Users/jayhack/CS/CODEGEN/graph-sitter-rust-pyo3-compat`. Result: compatibility plan committed. +- [x] Docs/site/Vercel strategy. owner: Nash. Agent: `019ee1a7-70e6-7062-bba8-a80918a7123c`. Branch: `codex/docs-site-vercel-worktree`. Worktree: `/Users/jayhack/CS/CODEGEN/graph-sitter-docs-site-vercel`. Result: added `rust-rewrite/docs-site-vercel-plan.md`; no Vercel deployment performed. +- [x] `uvx graph-sitter` CLI/distribution strategy. owner: Lovelace. Agent: `019ee1a7-e2cd-7771-a1f0-37c298b91323`. Branch: `codex/uvx-cli-distribution-lovelace`. Worktree: `/Users/jayhack/CS/CODEGEN/graph-sitter-uvx-cli-plan`. Result: added `rust-rewrite/uvx-cli-plan.md`; implementation remains open. +- [x] Docs/landing/Vercel refresh. owner: Huygens. Agent: `019ee1f0-df8d-7851-acde-1102b671acf6`. Branch: `codex/docs-landing-vercel-path`. Worktree: `/Users/jayhack/CS/CODEGEN/graph-sitter-docs-landing-vercel`. Result: refreshed landing copy, site README, and `docs-site-vercel-plan.md`; no Vercel deployment performed. +- [x] `uvx graph-sitter` command roadmap. owner: Volta. Agent: `019ee1f0-e4a6-7530-ab64-d34f82b8dadb`. Branch: `codex/uvx-command-roadmap`. Worktree: `/Users/jayhack/CS/CODEGEN/graph-sitter-uvx-command-roadmap`. Result: added `rust-rewrite/uvx-command-roadmap.md` with command contract and multi-agent checklist. + +## Phase 0: Baseline, RFC, And Contracts + +- [x] Add memory benchmark harness for current Python backend. owner: Poincare. Result: added `rust-rewrite/tools/measure_python_backend.py`. +- [x] Measure initial cold parse RSS and wall time for generated fixture and this repo. owner: codex. Result: recorded in `rust-rewrite/benchmarks.md`. +- [x] Add pinned Python repository benchmark harness. owner: codex. Result: added `rust-rewrite/tools/benchmark_pinned_python_repo.py` to clone/fetch a pinned repo, build the PyO3 extension, run Python and Rust `Codebase` measurements, and enforce wall/RSS/file-count gates. +- [x] Measure first canonical huge Python repo cold parse/Rust compact backend baseline. owner: codex. Result: Apache Airflow `2.10.5` at `b93c3db6b1641b0840bd15ac7d05bc58ff2cccbf` records 4,789 Python files, 6.218x wall improvement, and 9.882x max-RSS improvement for the current compact Rust slice. +- [ ] Measure cold parse RSS and wall time for additional canonical small, medium, and huge repos. +- [ ] Measure graph node/edge counts, Python object counts, and per-phase allocation peaks. +- [x] Document the exact current build phases with timings: file enumeration, parse, directory tree, config parse, import resolution, export resolution, dependency recompute. owner: Poincare. Result: added phase map in `rust-rewrite/benchmarks.md`; representative repo timings remain open. +- [x] Inventory all public `Codebase` properties and methods. owner: Dewey. Result: documented in `rust-rewrite/api-inventory.md`. +- [x] Inventory all public `SourceFile`, `Symbol`, `Import`, `Export`, and `Directory` APIs used by tests/docs. owner: Dewey. Result: documented in `rust-rewrite/api-inventory.md`. +- [x] Define P0 compatibility surface for the first Rust backend slice. owner: Dewey. Result: documented in `rust-rewrite/api-inventory.md`. +- [x] Define large-repo success targets for memory and time. owner: codex. Result: opt-in Rust backend merge gate requires Airflow strict `Codebase` construction to be at least 2x faster, Next.js strict `Codebase` construction to be at least 1.2x faster on hosted CI, and both to be at least 4x lower max RSS than recorded Python baselines; default-backend promotion still requires protected CI plus broader parity. +- [x] Select first pinned large Python repo commit for golden parity and latency benchmarks. owner: codex. Result: Apache Airflow `2.10.5`, upstream `https://github.com/apache/airflow.git`, ref `refs/tags/2.10.5`, commit `b93c3db6b1641b0840bd15ac7d05bc58ff2cccbf`, measured with Python 3.13.11 on macOS. +- [ ] Select additional pinned large Python repo commits for golden parity and latency benchmarks. +- [x] Build first compact Rust golden graph snapshot for the pinned large Python repo commit. owner: codex. Result: committed `rust-rewrite/golden/apache-airflow-2.10.5-rust-compact.json` with stable files, symbols, imports, import-resolution, reference, and dependency counts/hashes/samples plus integrity checks. +- [ ] Compare golden reference/import/dependency graph snapshots against the Python backend semantics for the pinned large Python repo commits. Notes: fixtures should assert file/module records, import graph edges, symbol reference graph edges, dependency graph edges, and deterministic sort order. +- [x] Draft compact Rust data model with module boundaries and Python integration points. owner: Pasteur. Result: documented in `rust-rewrite/data-model.md`. +- [ ] Draft full Rust engine RFC with module boundaries and Python integration points. +- [ ] Decide build tooling: `maturin`, setuptools-rust, or hatch custom hook. + +## Phase 1: Rust Engine Skeleton + +- [x] Add Rust workspace/crate skeleton without changing default behavior. owner: Beauvoir. Result: added standalone Cargo workspace under `crates/`. +- [x] Add PyO3 module import smoke test. owner: codex. Result: built the extension module and imported it from Python, then indexed this repo through `index_python_path`. +- [x] Add `graph_backend` config flag with default `python`. owner: codex. Result: added `GraphBackend` and `RustFallbackMode` to `CodebaseConfig`. +- [x] Add compact Rust index facade that can be constructed from `CodebaseContext`. owner: codex. Result: `ctx.rust_index` builds through the optional PyO3 extension when `graph_backend` is `rust` or `auto`. +- [x] Skip eager Python graph construction in opt-in Rust compact mode. owner: codex. Result: `CodebaseConfig(graph_backend="rust")` leaves the Python graph unbuilt when the Rust compact index succeeds. +- [ ] Add full Rust engine facade object that can back existing `CodebaseContext` graph query APIs. +- [x] Add a minimal debug API returning engine version and enabled features. owner: Beauvoir. Result: added Rust `Engine::debug_info` and feature-gated PyO3 bindings. +- [x] Add CI job that builds the Rust extension on supported Python versions. owner: codex. Result: `.github/workflows/rust-rewrite-extension.yml` builds/tests the PyO3 extension on Python 3.12 and 3.13 across Linux/macOS, imports `graph_sitter_py`, runs a compact Python indexing smoke, then builds a wheel and proves `uvx --from dist/.whl graph-sitter parse --backend rust --fallback error`. +- [x] Add fast Rust rewrite PR CI lane. owner: codex. Result: `.github/workflows/rust-rewrite-fast.yml` runs `rust-rewrite/tools/check_fast.sh` for full ruff, Rust fmt/tests, PyO3 binding checks, live Python/Rust fixture parity, focused py_compile, and focused Rust-backend pytest without running large pinned repo benchmarks. +- [x] Keep docs-only work out of the Rust fast lane. owner: codex. Result: `.github/workflows/docs-validate.yml` validates Mintlify docs and broken links, while `rust-rewrite-fast.yml` now watches Rust/code/proof inputs instead of all `rust-rewrite/**` markdown ledgers. +- [x] Add tiny CLI smoke CI lane for the `uvx graph-sitter` surface. owner: codex. Result: `.github/workflows/rust-rewrite-cli-smoke.yml` runs `rust-rewrite/tools/check_cli_smoke.sh` for focused ruff/py_compile, command help, and parse/run/transform unit tests when CLI files change. +- [x] Harden Linux PyO3 CI against missing uv libpython loader paths. owner: codex. Result: `check_fast.sh` and `check_extension_build.sh` export the active Python `LIBDIR` through `LD_LIBRARY_PATH` before running linked PyO3 test binaries. +- [x] Fix large-repo workflow validation before job startup. owner: codex. Result: moved `runner.temp`-derived pinned-check env vars from job-level env to the run step where the runner context is valid. +- [x] Add benchmark command comparing Python backend with Rust compact indexer. owner: codex. Result: added `rust-rewrite/tools/compare_rust_python_index.py`. +- [x] Add benchmark command for the Python-facing Rust facade. owner: codex. Result: added `rust-rewrite/tools/measure_rust_facade.py`. +- [x] Add benchmark command for real `Codebase` construction with the Rust compact backend. owner: codex. Result: added `rust-rewrite/tools/measure_codebase_rust_backend.py`. +- [x] Add benchmark command that can select full `Codebase` `--backend python|rust` once Rust backend is wired into Python. owner: codex. Result: `benchmark_pinned_python_repo.py` runs Python and Rust `Codebase` measurements in child processes for pinned external repos. + +## Phase 2: Parser And Compact Index Vertical Slice + +- [x] Specify parser/index vertical slice and extraction rules. owner: Meitner. Result: documented in `rust-rewrite/parser-index.md`. +- [x] Implement standalone Rust Python file discovery for the first compact-index slice. owner: codex. Result: recursive repo walk with common generated/cache directory skips. +- [x] Implement Rust file discovery input format from Python repo operator. owner: codex. Result: added selected-file `index_python_paths` API and pass `RepoOperator.iter_files(...)` results from `CodebaseContext`. +- [x] Implement tree-sitter parser setup for Python. owner: codex. Result: `graph-sitter-engine` uses `tree-sitter-python` and indexes Python files. +- [x] Implement tree-sitter parser setup for TypeScript/TSX. owner: codex. Result: `graph-sitter-engine` now uses `tree-sitter-typescript` with the TSX grammar for `.js`, `.jsx`, `.ts`, and `.tsx` compact syntax indexing. +- [x] Extract file records with path, language, content hash, and root ranges. owner: codex. Result: compact Rust `FileRecord` now stores extension-derived language and deterministic FNV-1a content hashes alongside path, module name, byte/line counts, parse-error status, and root ranges; the Python bridge exposes the new fields and remains tolerant of older/fake records. +- [x] Extract TypeScript/JavaScript file records with path, byte length, line count, error status, and root ranges. owner: codex. Result: `TypeScriptIndex` records selected TS-like files without entering Python object graph construction. +- [x] Extract file records with path, byte length, line count, error status, and root ranges for Python. owner: codex. +- [x] Extract top-level Python classes and functions. owner: codex. Result: compact `SymbolRecord` extraction for class/function definitions and decorated definitions. +- [x] Extract top-level Python globals. owner: codex. Result: added compact global-variable symbol records for simple top-level assignments and annotated assignments. +- [x] Extract top-level TypeScript classes, functions, interfaces, type aliases, enums, namespaces, and globals. owner: codex. Result: syntax-only `TypeScriptIndex` emits compact symbol records for declaration and top-level variable/function shapes, including arrow-function declarators. +- [x] Extract TypeScript namespace member symbols. owner: codex. Result: compact `TypeScriptIndex` now parents direct and nested namespace member declarations under namespace symbols, including function/interface/type/enum/namespace/global-variable members. +- [x] Extract imports for Python. owner: codex. Result: compact `ImportRecord` extraction for `import`, `from`, and future imports. +- [x] Extract imports and exports for TypeScript. owner: codex. Result: syntax-only `TypeScriptIndex` emits static import, side-effect import, dynamic `require`/`import()` binding, re-export import, and unresolved export records. +- [x] Add syntax-only compact TypeScript/JavaScript index parity before resolver work. owner: codex. Result: fast-lane live parity now exact-compares TypeScript fixture files, symbols, imports, exports, symbol dependency graph, symbol usage graph, import usage graph, resolved dependency targets, and mutation outputs against the Python backend with zero known deltas. +- [x] Build path and string interners. owner: codex. Result: compact Rust indexes now store file path/module names plus symbol/import/export/reference string fields as interned `InternedString(Arc)` values while preserving plain-string JSON serialization and existing Rust comparisons; unit coverage proves duplicate external import/reference names share allocation and that construction-only string/export lookup tables are dropped from the returned index. +- [x] Expose compact Python index summary and JSON through PyO3. owner: codex. Result: added `PythonIndex`, `IndexSummary`, `Engine.index_python_path`, and module-level `index_python_path`. +- [x] Expose compact Python file, symbol, import, and import-resolution records through PyO3/Python facade. owner: codex. Result: added record-family JSON methods and typed Python dataclass accessors on `RustIndexBackend`. +- [x] Expose compact TypeScript syntax index summary and JSON through PyO3. owner: codex. Result: added `TypeScriptIndex`, `Engine.index_typescript_path(s)`, module-level `index_typescript_path(s)`, and JSON accessors for files, symbols, imports, and exports. +- [x] Expose `files`, `symbols`, `classes`, `functions`, `imports`, and `exports` ID queries through PyO3. owner: codex. Result: `PythonIndex` and `TypeScriptIndex` now expose compact ID-list methods for files, all/top-level symbols, class/function/global symbols, imports, and TypeScript exports; TypeScript also exposes interface/type/enum/namespace symbol IDs. +- [x] Add golden snapshots for compact IR on small Python fixtures. owner: codex. Result: added deterministic compact graph snapshot covering files, symbols, imports, and import resolutions. +- [x] Add golden snapshots for compact IR on small TypeScript fixtures. owner: codex. Result: added `rust-rewrite/golden/typescript-fixture-rust-compact.json` and a Rust engine snapshot test covering files, symbols, static imports, dynamic imports, exports, re-exports, ranges, and parser-error status. + +## Phase 3: Resolution And Dependency Graph + +- [x] Inventory current resolver/dependency algorithms and Rust relation-table plan. owner: Gauss. Result: documented in `rust-rewrite/resolution-algorithms.md`. +- [ ] Port Python import resolution rules. +- [x] Implement compact Python import-to-file and import-to-symbol resolution for indexed internal modules. owner: codex. Result: Rust now emits `ImportResolutionRecord` rows for direct, absolute `from`, and relative `from` imports when targets are inside the selected file set. +- [x] Port TypeScript relative import resolution rules. owner: codex. Result: `TypeScriptIndex` now emits compact `ImportResolutionRecord` rows for relative TS/JS module specifiers that resolve to selected files via exact, extensionless `.ts/.tsx/.js/.jsx`, and directory `index` candidates, including default/named symbol targets when local exports identify a symbol. +- [x] Port TypeScript config/path alias handling. owner: codex. Result: Rust now parses nearest `tsconfig.json` files with JSONC comments/trailing commas, resolves `compilerOptions.paths` wildcard/exact aliases and `baseUrl` module specifiers, and feeds those resolutions into TypeScript references/dependencies. +- [x] Port first TypeScript barrel re-export symbol propagation. owner: codex. Result: Rust now resolves named re-exports, nested re-exports, and wildcard `export *` propagation into import symbol targets and namespace-member references without materializing wildcard export records. +- [x] Resolve named TypeScript namespace re-export member dependencies. owner: codex. Result: `export * as ns from "./leaf"; import { ns } from "./barrel"; ns.member` now resolves through the namespace export to the leaf module member in Rust records and through the Python compatibility shell. +- [x] Represent external modules compactly. owner: codex. Result: Rust now emits `ExternalModuleRecord` rows for unresolved non-relative Python and TypeScript imports, excluding Python future imports and unresolved relative local imports. +- [ ] Implement full import-to-file and import-to-symbol edges for all Python and TypeScript rules. +- [ ] Implement export-to-symbol/import/file edges. +- [ ] Implement lexical scope tables for name resolution. +- [x] Implement first compact Python symbol reference extraction by identifier ranges. owner: codex. Result: records same-file and imported top-level symbol references inside top-level Python classes/functions. +- [x] Attribute compact Python references to nested class/function source symbols. owner: codex. Result: nested Python functions and methods are indexed as non-top-level compact symbols, while public `Codebase.functions` remains top-level-only. +- [x] Exclude compact Python references shadowed by parameters, local assignments, and nested definitions. owner: codex. Result: avoids resolving local bindings to imported/top-level symbols and reduced Airflow compact references from 112,238 to 105,739. +- [x] Exclude compact Python references shadowed by local imports. owner: codex. Result: avoids resolving function-local `import ... as ...`, `import pkg.mod`, and `from ... import ...` bindings to imported/top-level symbols; reduced Airflow compact references from 105,739 to 105,624 and dependencies from 68,927 to 68,869. +- [x] Exclude compact Python references shadowed by control-flow bindings. owner: codex. Result: avoids resolving `for` targets, `with ... as ...` targets, and `except ... as ...` targets to imported/top-level symbols; reduced Airflow compact references from 105,624 to 105,467 and dependencies from 68,869 to 68,848. +- [x] Exclude compact Python references shadowed by comprehension targets and match-pattern captures. owner: codex. Result: avoids resolving comprehension loop targets and match capture patterns to imported/top-level symbols; reduced this checkout's compact references from 4,101 to 4,089 and dependencies from 2,950 to 2,949. The pinned Airflow `2.10.5` compact graph stayed at 105,467 references and 68,848 dependencies. +- [x] Scope compact Python comprehension target shadows to comprehension expressions. owner: codex. Result: comprehension loop targets no longer hide later references in the enclosing function; current checkout and pinned Airflow stayed graph-stable at 4,110 and 104,622 references respectively after the attribute-field skip baseline. +- [x] Exclude compact Python references shadowed by lambda parameters. owner: codex. Result: adds range-scoped lambda-body bindings so lambda parameters shadow inside the lambda body without hiding legitimate default-value references such as `lambda local=Base: local`; this checkout and pinned Airflow stayed graph-stable at 4,089 and 105,467 references respectively. +- [x] Preserve compact Python references for `global` declarations. owner: codex. Result: `global` names are removed from the function-local shadow set, so module-level writes and uses remain visible; Airflow compact coverage now emits 105,607 references and 68,917 dependencies. +- [x] Stop treating Python attribute field names as bare compact references. owner: codex. Result: scans the object side of attribute expressions but skips the field-name side; Airflow compact coverage now emits 104,622 references and 68,340 dependencies. +- [x] Resolve compact Python references through imported module attributes. owner: codex. Result: resolves `module.some_func`, `alias.SomeClass`, and `pkg.module.some_func` when the qualifier maps to an indexed internal Python module; Airflow compact coverage now emits 109,282 references and 71,534 dependencies. +- [x] Exclude compact Python references shadowed by `nonlocal` declarations. owner: codex. Result: prevents closure variables declared `nonlocal` from resolving to imported/top-level symbols in nested functions; this checkout and pinned Airflow stayed graph-stable at 4,110 and 109,282 references respectively. +- [x] Resolve direct Python package re-export imports. owner: codex. Result: `from pkg import Symbol` follows matching imported bindings in `pkg/__init__.py` to the original internal symbol; Airflow compact coverage now emits 109,655 references and 71,788 dependencies. +- [x] Resolve Python wildcard import and re-export chains. owner: codex. Result: compact exported-name tables now propagate `from module import *` across indexed internal modules and feed named imports, references, and dependency edges; Airflow compact coverage now emits 109,743 references and 71,863 dependencies. +- [x] Resolve nested Python module attribute references. owner: codex. Result: module-prefix bindings now resolve namespace-style chains such as `from a import b; b.c.d()` and `import a.b; a.b.c.d()` to indexed internal module symbols; Airflow compact coverage now emits 109,817 references and 71,932 dependencies. +- [x] Restrict Python wildcard imports with static `__all__` exports. owner: codex. Result: literal top-level `__all__` list/tuple/set assignments now constrain compact wildcard expansion while explicit named imports still resolve; pinned Airflow stayed graph-stable at 109,817 references and 71,932 dependencies. +- [x] Index compact Python function-local imports and local external import dependencies. owner: codex. Result: nested `import` and `from ... import ...` statements are compact import rows, function-local external import uses produce external-reference dependency handles, and Airflow `__getattr__` now reports `importlib`, `sys`, `warnings`, and `__lazy_imports` dependencies; pinned Airflow now emits 45,404 imports, 117,799 references, 78,784 external references, and 77,570 dependencies. +- [ ] Expand symbol usage extraction to full lexical shadowing behavior, full attribute/type resolution, and order-sensitive scopes. +- [x] Implement first compact dependency edge construction from usage records. owner: codex. Result: emits de-duplicated Python `DependencyRecord` edges from compact references with contributing reference IDs. +- [x] Implement first compact TypeScript reference and dependency extraction. owner: codex. Result: `TypeScriptIndex` now emits `ReferenceRecord` and `DependencyRecord` rows for same-file top-level symbol uses, direct default/named import uses, and namespace-import member uses, while leaving full TypeScript lexical/type/interface parity open. +- [x] Exclude compact TypeScript references shadowed by scoped loop/catch bindings. owner: codex. Result: range-scoped TypeScript binding scopes now suppress false references for `for...of`/`for...in` bindings and `catch` parameters without hiding later references to imported names outside those scopes. +- [x] Scope compact TypeScript nested callback parameter shadows to callback bodies. owner: codex. Result: arrow/function-expression parameters no longer hide legitimate imported references before or after the callback while still suppressing references inside the callback body. +- [x] Exclude compact TypeScript references shadowed by nested declarations. owner: codex. Result: nested function/class declarations now shadow same-name imports inside the enclosing symbol so local declaration names and uses no longer create false imported-symbol references. +- [x] Handle compact TypeScript destructuring/default and block-scoped lexical binding shadows. owner: codex. Result: destructured/defaulted parameters and local declarations now have focused shadowing coverage, default-value identifiers still resolve as imports, and `let`/`const` declarations shadow from declaration to the nearest lexical boundary instead of hiding imports before the declaration or after nested blocks. +- [x] Add compact TypeScript type annotation reference dependencies. owner: codex. Result: Rust now records `type_identifier` and namespace-qualified type references outside import/export/heritage clauses, preserves type-only import dependency handles in the Python shell, and pins Next.js at 62,309 references, 25,323 external references, and 21,639 dependencies. +- [x] Add compact TypeScript nested local assignment dependencies. owner: codex. Result: nested callback assignment records are now parented as non-top-level compact symbols, local reference resolution uses indexed owner/name maps, and selected pinned Next.js `AppRouterAnnouncer.dependencies` now exact-matches Python with no TypeScript known deltas. +- [x] Add compact TypeScript function-call records and read handles. owner: codex. Result: TypeScript call expressions now emit compact `FunctionCallRecord` rows linked to source symbols, resolved target symbols/imports, and source/name ranges; PyO3 exposes targeted call JSON queries, and compact Python file/symbol handles expose read-only `.function_calls` without materializing the Python graph. +- [x] Add compact TypeScript promise-chain records and read handles. owner: codex. Result: outermost `.then/.catch/.finally` chains now emit compact `PromiseChainRecord` rows with stage names, base range, source range, and parent source-symbol IDs; PyO3 exposes targeted chain JSON queries, and compact Python file/symbol handles expose read-only `.promise_chains` without materializing the Python graph. +- [ ] Expand dependency edge construction to full lexical/reference coverage, external modules, and TypeScript. +- [x] Emit compact Python external import reference records for dependency parity. owner: codex. Result: `PythonIndex.external_references` now records unresolved external import binding uses, PyO3 exposes `external_references_json`, compact import handles surface external usages, and compact Python symbol `.dependencies` includes external import handles without materializing the Python graph. +- [x] Emit compact TypeScript/JavaScript external import reference records. owner: codex. Result: `TypeScriptIndex.external_references` now records unresolved external default/named/namespace/dynamic import binding uses, PyO3 exposes them, compact TS imports surface external usages, and compact TS symbol `.dependencies` includes external import handles while preserving resolved-symbol dependencies for internal imports. +- [x] Add compact TypeScript import predicate parity. owner: codex. Result: compact TypeScript import handles now answer `is_type_import()`, `is_default_import()`, `is_namespace_import`, `namespace`, and `namespace_imports` for default, type-only, named namespace, namespace-star, and side-effect imports without materializing the Python graph. +- [x] Implement superclass/interface dependency edges. owner: codex. Result: compact TypeScript heritage clauses now emit references/dependencies for interface `extends`, class `implements`, and namespace-qualified implemented types, while class `extends` remains covered through expression references. +- [x] Add compact subclass/implementation edge storage and traversal APIs. owner: codex. Result: `TypeScriptIndex` now emits compact `SubclassRecord` rows from heritage references, PyO3 exposes them through `subclass_edges_json`, and Rust compact symbols answer `.superclasses`, `.subclasses`, `.implementations`, `.parent_classes`, `.parent_interfaces`, `.is_subclass_of(...)`, and `.extends(...)` without materializing the Python graph. +- [x] Add graph debug dump for nodes, edges, and usage metadata. owner: codex. Result: `PythonIndex` and `TypeScriptIndex` now expose `debug_graph_json()` through PyO3, `RustIndexBackend`, and `Codebase.rust_debug_graph_json`, with normalized file/symbol/import/export nodes plus containment, resolution, reference, dependency, external-reference, subclass, and export edges carrying compact record IDs. +- [x] Add compact Rust graph debug snapshot for pinned Airflow. owner: codex. Result: `snapshot_pinned_python_repo.py` normalizes compact records by stable paths/symbol keys and emits deterministic counts, hashes, and sample rows for large-repo review. +- [x] Add first parity test comparing Python backend and Rust backend graph edges on fixtures. owner: codex. Result: `check_python_rust_parity_fixture.py` validates live Python/Rust parity for files, symbols, import resolution, external modules, selected dependencies, selected usages, internal import-node dependencies, and external import dependencies with zero known deltas. +- [x] Expand Python/Rust parity fixtures to cover full dependency semantics, imported-export chains, and mutation flows. owner: codex. Result: `check_python_rust_parity_fixture.py` now exact-compares fixture-wide Python and TypeScript imports, symbol dependency graphs, symbol usage graphs, import usage graphs, imported-export chains, external imports, and mutation outputs with zero known deltas while keeping the Rust path off the Python graph. + +## Phase 4: Lazy Python Compatibility Layer + +- [x] Plan Python/PyO3 compatibility layer and lazy handle migration. owner: Wegener. Result: documented in `rust-rewrite/python-compat.md`. +- [x] Add temporary Python compact handle base for Rust record-backed read APIs. owner: codex. Result: added `RustCompactHandle` with stable compact node IDs for files, symbols, and imports. +- [ ] Implement Rust-backed file handles for P0 `SourceFile` APIs. +- [ ] Implement Rust-backed symbol handles for P0 `Symbol`, `Class`, and `Function` APIs. +- [ ] Implement Rust-backed import handles for P0 `Import` APIs. +- [x] Implement Rust-backed external-module handles for unresolved imports. owner: codex. Result: `Codebase.external_modules` and compact import `resolved_symbol`/`imported_symbol` now return lightweight external-module handles without materializing the Python graph. +- [ ] Implement Rust-backed export handles for P0 TypeScript `Export` APIs. +- [x] Make `Codebase.files` return compact read handles under the Python Rust backend. owner: codex. +- [x] Make `Codebase.symbols`, `classes`, `functions`, `global_vars`, and `imports` return compact read handles under the Python Rust backend. owner: codex. +- [x] Expose compact Rust file inbound import handles. owner: codex. Result: `RustCompactFile.inbound_imports` and `importers` now read from compact import-resolution records without materializing the Python graph. +- [x] Expose compact Rust file import lookup APIs. owner: codex. Result: `RustCompactFile.import_statements`, `has_import`, and `get_import` now read from compact import records and support alias/module/source lookup without materializing the Python graph. +- [x] Expose compact Rust file topological symbol ordering. owner: codex. Result: `RustCompactFile.symbols_sorted_topologically` now orders file-local symbols from compact dependency records without materializing the Python graph. +- [x] Expose compact Rust file byte-range lookup. owner: codex. Result: `RustCompactFile.get_nodes` and `find_by_byte_range` now return compact symbol/import handles by file-local order and byte-span overlap without materializing the Python graph. +- [x] Expose compact Rust file/import descendant traversal. owner: codex. Result: `RustCompactFile.descendant_symbols` and `RustCompactImport.descendant_symbols` now mirror existing read semantics without materializing the Python graph. +- [x] Expose compact Rust file name-resolution maps. owner: codex. Result: `RustCompactFile.valid_symbol_names`, `valid_import_names`, `resolve_name`, `resolve_attribute`, and `get_node_by_name` now resolve compact local symbols/imports without materializing the Python graph. +- [x] Expose compact Rust Python import-string helpers. owner: codex. Result: compact file, symbol, and import handles now answer `import_module_name`, `get_import_module_name_for_file`, and `get_import_string` without materializing the Python graph. +- [x] Expose compact Rust symbol identity parity. owner: codex. Result: nested `RustCompactSymbol.full_name` now composes parent names from Rust parent IDs, and Python compact symbols report `is_exported=True`. +- [x] Expose compact Rust symbol name handles. owner: codex. Result: `RustCompactSymbol.get_name` now returns a read-only compact name handle with `source`, `_source`, `name`, and `full_name` rather than a raw string. +- [x] Expose compact Rust symbol dependency and usage handles. owner: codex. Result: `RustCompactSymbol.dependencies`, `usages`, and `symbol_usages` now read from compact dependency/reference records with property and callable access. +- [x] Expose compact Rust symbol hierarchy queries. owner: codex. Result: `RustCompactSymbol.parent_symbol`, `child_symbols`, and `descendant_symbols` now use Rust parent IDs for nested symbols without materializing the Python graph. +- [x] Expose compact TypeScript namespace lookup helpers. owner: codex. Result: `RustCompactFile.get_namespace`, file `.namespaces`, namespace `.symbols`, `.get_symbol`, `.get_function`, `.get_interface`, `.get_type`, `.get_enum`, `.get_namespace`, and `.get_nested_namespaces` now use parented Rust symbol records without materializing the Python graph. +- [x] Expose compact Rust module-import exports. owner: codex. Result: `RustCompactImport.imported_exports` now returns imported module symbols/imports for file-target module imports instead of the file handle itself. +- [x] Expose compact Rust import attribute resolution. owner: codex. Result: `RustCompactImport.resolve_attribute` now resolves names through file-target module imports without materializing the Python graph. +- [x] Expose compact Rust import name handles. owner: codex. Result: `RustCompactImport.get_name` now returns a read-only compact name handle from alias/symbol/module records instead of being absent. +- [x] Expose compact Rust import usage handles. owner: codex. Result: `RustCompactImport.usages` and `symbol_usages` now read from compact references grouped by `import_id` with property and callable access. +- [x] Make TypeScript `Codebase.exports`, `interfaces`, and `types` return lazy handles under Rust backend. owner: codex. Result: compact TS `Codebase.exports`, `file.exports`, `file.get_export`, named/default export filters, and read-only export resolution helpers now use Rust records without materializing the Python graph. +- [x] Make TypeScript `Codebase.files`, `symbols`, `functions`, `interfaces`, `types`, imports, usages, and dependencies use compact Rust handles. owner: codex. Result: `CodebaseConfig(graph_backend="rust")` now builds a TypeScript `RustIndexBackend` through `index_typescript_path(s)` and keeps the Python graph unbuilt for supported compact TS shell queries. +- [x] Preserve existing sorting behavior for public query results. owner: codex. Result: compact file handles now use a deterministic filepath tie-breaker for duplicate basenames, and focused public-query tests cover compact file/class/function ordering without materializing the Python graph. +- [x] Add fallback path to Python backend for unsupported methods. owner: codex. Result: compact handles can now promote the context to the Python graph backend for non-strict unsupported methods, with `RustCompactFile.replace(is_regex=True)` delegating to the Python `SourceFile` implementation while `rust_fallback="error"` still raises `RustBackendUnsupportedError`. +- [x] Add tests that verify no full Python object graph is materialized for simple list queries. owner: codex. + +## Phase 5: Incremental Sync And Edits + +- [ ] Define stable ID behavior across file reparse. +- [ ] Implement add file in Rust backend. +- [ ] Implement delete file in Rust backend. +- [ ] Implement reparse changed file in Rust backend. +- [ ] Implement dependency invalidation frontier based on changed imports, exports, symbols, and usages. +- [ ] Integrate Rust backend with existing `apply_diffs`. +- [ ] Integrate Rust backend with existing transaction commit flow. +- [ ] Preserve Python transaction manager as first edit backend. +- [x] Add Rust-backed file-level mutation smoke tests. owner: codex. Result: compact file handles now queue transaction-manager edits for full-file `edit`, simple `replace`, `create_file`, and `remove`, then commit with `sync_graph=False` without materializing the Python graph. +- [x] Add Rust-backed direct symbol rename and string import mutation smoke tests. owner: codex. Result: compact symbols now queue declaration-name edits plus direct usage rewrites from compact reference records, compact files queue string `add_import` insertions, and the focused test commits both in one transaction batch without materializing the Python graph. +- [x] Add Rust-backed symbol-object import and remove mutation smoke tests. owner: codex. Result: compact files now accept `file.add_import(compact_symbol, alias=...)`, and compact symbols/imports queue remove transactions; focused tests commit with `sync_graph=False` without materializing the Python graph. +- [x] Add Rust-backed decorator mutation smoke tests. owner: codex. Result: compact symbols now expose `decorators`, `is_decorated`, `add_decorator`, decorator removal, and class method lookup; focused tests commit class/method decorator edits and decorator removal with `sync_graph=False` without materializing the Python graph. +- [x] Add Rust-backed create-file plus move-to-file mutation smoke tests. owner: codex. Result: compact files can append moved symbol source, compact symbols can move into newly created files, dependency symbols are added as imports when dependencies are not moved, and `add_back_edge` inserts a source-file import for remaining same-file usages with `sync_graph=False`. +- [x] Add Rust-backed cross-file move import-update smoke tests. owner: codex. Result: compact `move_to_file(..., strategy="update_all_imports")` now rewrites imported usages in another file to import from the new destination module, and compact import removal now removes the whole import line rather than leaving dangling newlines. +- [x] Add Rust-backed codemod mutation smoke tests for remaining graph-aware symbol/import flows. owner: codex. Result: focused unit tests now run `Codemod.execute(codebase)` against strict Rust compact mode, then commit symbol/import edits and move-to-file import updates without materializing the Python graph. +- [x] Add parity tests for rename/move/add-import flows on Rust backend. owner: codex. Result: Python and TypeScript `Codemod.execute` parity tests now run identical add-import/rename and move-to-file import-update flows against Python and compact Rust backends, with exact file-byte comparisons while keeping the Rust path off the Python graph. +- [x] Decide and enforce exact whitespace parity for move-to-file codemods. owner: codex. Result: compact Rust move output now matches Python backend whitespace for empty source files, empty move targets, and import-retarget consumers in focused Python and TypeScript codemod fixtures. +- [x] Add stress tests for repeated incremental edits. owner: codex. Result: focused Python and TypeScript compact Rust tests now perform multiple commit cycles with the same symbol/import handles, covering repeated rename, repeated import insertion dedupe, repeated import alias/module retargeting, and blocked Python graph access. +- [x] Add pinned large-repo Rust codemod proof. owner: codex. Result: `check_pinned_codemods.py` runs real `Codemod` import+rename flows on temporary clones of pinned Airflow and Next.js checkouts, verifies committed file bytes and TypeScript cross-file usage rewrites, asserts the Python graph stays blocked and broad Rust backend caches stay cold, and is wired into the opt-in large-repo check script plus skipped fast-lane pytest wrapper. + +## Phase 6: Hardening And Rollout + +- [x] Run full unit suite with Python backend. owner: codex. Result: `uv run pytest -n auto --timeout 15 tests/unit` passed locally on 2026-06-19 with 2,158 passed, 58 skipped, 12 xfailed in 77.88s. +- [x] Run full unit suite with Rust backend where supported. owner: codex. Result: `rust-rewrite/supported-subset.json` now enumerates seven supported opt-in Rust backend capability groups backed by 50 pytest IDs, and `check_supported_subset.py` validates the manifest against pytest collection before `check_fast.sh` runs those tests. +- [x] Add large-repo memory regression benchmark to CI or nightly. owner: codex. Result: `.github/workflows/rust-rewrite-large-repos.yml` runs pinned Airflow and Next.js compact snapshot plus strict Rust `Codebase` performance/RSS checks on schedule, manual dispatch, and relevant `rust-rewrite` pushes, uploading JSON reports. +- [x] Add pinned large-repo latency/RSS benchmark harness. owner: codex. Result: Airflow `2.10.5` benchmark command emits backend, wall time, max RSS, file count, node/edge counts, compact Rust record counts, mismatch summaries, and pass/fail gates. +- [x] Add opt-in pinned large-repo compact snapshot test. owner: codex. Result: `tests/integration/rust_rewrite/test_pinned_airflow_snapshot.py` runs the committed Airflow compact golden check when `GRAPH_SITTER_RUN_PINNED_AIRFLOW_SNAPSHOT=1`. +- [ ] Add pinned large-repo parity test for reference graph, import graph, dependency graph, and latency/RSS. Notes: start with Apache Airflow `2.10.5` at commit `b93c3db6b1641b0840bd15ac7d05bc58ff2cccbf`; assert reference graph, import graph, dependency graph, deterministic ordering, known byte-span `find_by_byte_range` lookups, and benchmark wall/RSS against the exact checkout before adding more canonical repos. Airflow and Next.js now have selected opt-in Python-vs-Rust semantic parity checks for shared targeted APIs; full graph-wide semantic comparison remains open, but the selected Next.js TypeScript file/function/export/import/dependency/symbol-usage proof has no known deltas. +- [x] Add selected pinned large-repo semantic parity proof. owner: codex. Result: `check_pinned_semantic_parity.py` runs Python and Rust backends in separate child processes on pinned Airflow and Next.js checkouts, exact-compares shared targeted file/function/import/export/name/dependency semantics, validates expected Rust-enhancement deltas exactly, and fails if Rust is not faster and lower-RSS on the same semantic runs. +- [x] Add opt-in pinned large Python `Codebase` compatibility/performance proof. owner: codex. Result: `check_pinned_python_codebase.py` and `GRAPH_SITTER_RUN_PINNED_AIRFLOW_CODEBASE=1` validate Airflow `2.10.5` strict Rust `Codebase` construction, compact handle counts, 78,784 compact external import references, known `find_by_byte_range` spans and `__getattr__` dependencies in `airflow/__init__.py`, blocked Python graph access, and conservative wall/RSS ceilings against the recorded Python baseline. Fresh local run with the cached extension: 4.939s and 310.8 MB max RSS, a 3.776x wall and 11.167x RSS improvement over the recorded Python baseline. +- [x] Add pinned large TypeScript/JavaScript benchmark proof. owner: codex. Result: `benchmark_pinned_typescript_repo.py` pins `vercel/next.js` `v15.0.0` at commit `51bfe3c1863b191f4b039bc230e8ed5c57b0baf3`, reuses the PyO3 extension build, compares Python TS parse/object materialization with the standalone Rust TS syntax index, and validates selected-file count parity. +- [x] Add pinned large TypeScript/JavaScript compact snapshot proof. owner: codex. Result: `snapshot_pinned_typescript_repo.py` and `test_pinned_nextjs_snapshot.py` verify Next.js `v15.0.0` at `51bfe3c1863b191f4b039bc230e8ed5c57b0baf3`; committed compact hashes/samples cover 13,688 selected files, 44,855 symbols, 28,210 imports, 16,026 exports, 13,462 import resolutions, 114,462 references, 49,287 dependencies, 25,318 external references, and 160 subclass edges. +- [x] Add opt-in pinned large TypeScript `Codebase` compatibility/performance proof. owner: codex. Result: `check_pinned_typescript_codebase.py` and `GRAPH_SITTER_RUN_PINNED_NEXTJS_CODEBASE=1` validate Next.js `v15.0.0` strict Rust `Codebase` construction, top-level compact handle counts, 25,318 compact external import references, 197,581 compact function-call records, 878 compact Promise-chain records, blocked Python graph access, and conservative wall/RSS ceilings against the recorded Python baseline. Fresh local run measured 10.465s and 435.9 MB max RSS, a 2.385x wall and 7.112x RSS improvement over the recorded Python baseline. +- [x] Add phase-aware current-RSS measurements for Rust `Codebase` proof runs. owner: codex. Result: `measure_codebase_rust_backend.py`, `check_pinned_python_codebase.py`, and `check_pinned_typescript_codebase.py` now report current RSS after Rust construction, graph-block check, summary counts, record materialization, compatibility handles, and known-query probes so retained memory can be distinguished from peak `ru_maxrss`. +- [x] Add no-materialization count APIs for large-repo proof paths. owner: codex. Result: compact summaries now include external module, export, external-reference, function-call, Promise-chain, and subclass-edge counts; PyO3 exposes top-level and kind-specific count getters; pinned proof tools use `compact_record_counts()` and `compact_compat_counts()` instead of materializing JSON records or handle lists for counts. Fresh strict runs keep Airflow flat at 299.0 MB RSS through summary, record-count, and compatibility-count phases; Next.js stays flat around 417.9 MB through count phases before targeted call/chain sample queries. +- [x] Add targeted Rust record lookups for large-repo query proofs. owner: codex. Result: PyO3 now exposes filtered file/symbol/import/export/import-resolution/reference/external-reference/dependency/subclass record JSON methods, and the Python compact backend lazily creates handles from those filtered records before falling back to full-list materialization. Fresh Airflow `2.10.5` strict proof stays flat at 255.8 MB RSS through count and byte-range lookup phases, rises only to 261.3 MB after the known dependency query, keeps large Python-side caches unmaterialized, and reports 13.141x lower max RSS than the recorded Python baseline; Next.js `v15.0.0` stays flat at 309.8 MB through count phases with 10.006x lower max RSS. +- [x] Add targeted global exact symbol lookups for compact `Codebase`. owner: codex. Result: `Codebase.has_symbol`, `get_symbols`, `get_symbol`, `get_class`, and `get_function` now use filtered Rust top-level symbol records under compact mode instead of materializing all symbol handles. Unit coverage asserts exact lookups keep `_symbols` and `_symbol_handles` cold, and Airflow `2.10.5` proof validates `codebase.get_function("_create_provider_info_schema_validator")` with RSS still flat at 255.4 MB and large caches unmaterialized. +- [x] Add pinned Next.js exact symbol lookup proof. owner: codex. Result: `check_pinned_typescript_codebase.py` now validates `codebase.get_function("AppRouterAnnouncer")` against Next.js `v15.0.0`, records `after_known_global_lookups` RSS, and asserts `_files`, `_symbols`, `_imports`, `_exports`, `_references`, `_external_references`, and `_dependencies` stay unmaterialized. Fresh run stayed flat at 308.7 MB RSS with 10.043x lower max RSS than the recorded Python baseline. +- [x] Add targeted nested child-symbol lookups for compact `Codebase`. owner: codex. Result: PyO3 now exposes `symbols_for_parent_json` for Python and TypeScript indexes, and compact `RustCompactSymbol.child_symbols` uses it before falling back to full symbol-handle materialization. Unit coverage asserts child lookup keeps `_symbols` and `_symbol_handles` cold; fresh Airflow `2.10.5` proof validates `KerberosService.child_symbols` with RSS still flat at 258.5 MB and full record/handle-list caches unmaterialized. +- [x] Add targeted file-local exact symbol lookups for compact files. owner: codex. Result: PyO3 now exposes `symbols_for_file_by_name_json` for Python and TypeScript indexes, and compact `SourceFile.get_symbol`, `get_class`, `get_function`, and `get_global_var` use it before falling back to per-file symbol-list materialization. Unit coverage asserts exact file-local lookups keep `_symbols`, `_symbol_handles`, and `_symbols_by_file_id` cold; fresh Airflow `2.10.5` proof validates `airflow/__init__.py.get_function("__getattr__")` with RSS flat at 203.3 MB before byte-range queries and all full record/handle-list caches unmaterialized. +- [x] Add targeted file-local exact import lookups for compact files. owner: codex. Result: PyO3 now exposes `imports_for_file_by_lookup_json` for Python and TypeScript indexes, and compact `SourceFile.get_import` / `has_import` use it before falling back to per-file import-list materialization. Unit coverage asserts exact import lookups keep `_imports`, `_import_handles`, and `_imports_by_file_id` cold; fresh Airflow `2.10.5` proof validates `airflow/__init__.py.get_import("import os")` with RSS flat at 256.0 MB before byte-range queries and all full record/handle-list caches unmaterialized. +- [x] Add targeted file-local exact export lookups for compact TypeScript files. owner: codex. Result: PyO3 now exposes `exports_for_file_by_name_json` for TypeScript indexes, and compact `SourceFile.get_export` uses it before falling back to per-file export-list materialization. Unit coverage asserts exact export lookup keeps `_exports`, `_export_handles`, and `_exports_by_file_id` cold; fresh Next.js `v15.0.0` proof validates `app-router-announcer.tsx.get_export("AppRouterAnnouncer")` with RSS flat at 310.5 MB and all full record/handle-list caches unmaterialized. +- [x] Add targeted file-local byte-range lookups for compact files. owner: codex. Result: PyO3 now exposes `symbols_for_file_by_byte_range_json`, `imports_for_file_by_byte_range_json`, and TypeScript `exports_for_file_by_byte_range_json`; `RustCompactFile.find_by_byte_range` uses those filtered records instead of materializing `get_nodes()`. Unit coverage asserts byte-range lookups keep `_symbols`, `_imports`, `_exports`, full handle lists, and per-file bulk caches cold; fresh Airflow `2.10.5` proof stays flat at 257.1 MB RSS through known byte-range queries and reports 13.079x lower max RSS than the recorded Python baseline. +- [x] Add targeted file-local single-name resolution for compact files. owner: codex. Result: `RustCompactFile.resolve_name` and `resolve_attribute` now use targeted symbol/import record lookups instead of building `valid_symbol_names`; `valid_symbol_names` remains the broad map API. Unit coverage asserts name resolution keeps `_symbols`, `_imports`, `_exports`, full handle lists, and per-file bulk caches cold; fresh Airflow `2.10.5` proof validates `resolve_name("__getattr__")`, `resolve_attribute("os")`, and `get_node_by_name("os")` with RSS flat at 256.8 MB through the new name-resolution phase and 13.060x lower max RSS than the recorded Python baseline. +- [x] Add targeted module-import attribute resolution for compact imports. owner: codex. Result: `RustCompactImport.resolve_attribute` now delegates to the imported compact file's targeted single-name resolver instead of reading `valid_import_names`. Unit coverage asserts module import attribute resolution keeps full record/handle lists and per-file bulk caches cold; fresh Airflow `2.10.5` proof validates `import airflow.models` resolving `DagModel` with RSS flat at 259.2 MB through the new module-attribute phase and 13.109x lower max RSS than the recorded Python baseline. +- [x] Keep compact file removal from materializing cold record lists. owner: codex. Result: compact file removal now tombstones removed file IDs/paths in `RustIndexBackend` and updates only already-materialized caches, so `RustCompactFile.remove()` can commit through the transaction manager without loading broad file/symbol/import/export/reference/dependency lists. Unit coverage asserts all major backend record and handle-list caches stay cold after removing an existing file. +- [x] Keep compact file creation and missing-file checks from materializing file lists. owner: codex. Result: compact file creation now assigns in-memory file IDs from the Rust summary, stores created file records in side tables, and updates only already-materialized caches. Targeted missing-path and missing-ID lookups now return `None` instead of falling through to full `file_handles`. Unit coverage asserts `create_file(..., sync=False)`, `has_file`, `get_file`, and `file_handle_by_id` for created files keep broad record/handle caches cold while later `codebase.files` still includes created files. +- [x] Keep compact symbol/import/export ID misses from materializing broad handle lists. owner: codex. Result: targeted `symbol_handle_by_id`, `import_handle_by_id`, `external_module_for_import`, and `export_handle_by_id` now return `None` on PyO3 misses instead of falling through to full symbol/import/export/external-module handle construction. Unit coverage asserts missing ID lookups and TypeScript export declared-symbol resolution keep broad record/handle caches cold. +- [x] Keep compact relation ID misses from materializing broad relation records. owner: codex. Result: targeted `import_resolution_for_import` and `reference_by_id` now return `None` on PyO3 misses instead of loading all import-resolution/reference records. Unit coverage asserts missing relation ID lookups keep `_import_resolutions` and `_references` cold. +- [x] Keep compact case-insensitive file lookup from materializing file handles. owner: codex. Result: PyO3 now exposes `file_by_path_ignore_case_json` for Python and TypeScript indexes, and `RustIndexBackend.get_file_handle(..., ignore_case=True)` uses it before broad fallback. Unit coverage asserts existing, missing, and newly-created mixed-case path lookups keep `_files` and `_file_handles` cold. +- [x] Add pinned large-repo proof for compact case-insensitive file lookup. owner: codex. Result: Airflow and Next.js strict Rust `Codebase` proof scripts now validate mixed-case `get_file(..., ignore_case=True)` probes and assert broad file/symbol/import/export/reference/dependency caches remain cold. +- [x] Add aggregate rollout readiness gate. owner: codex. Result: `check_rollout_readiness.py` consumes the pinned large-repo JSON reports and fails unless snapshots are structurally valid, Airflow Rust `Codebase` is at least 2x faster, Next.js Rust `Codebase` is at least 1.5x faster on hosted CI, both are at least 4x lower RSS than recorded Python baselines, selected semantic parity meets its 2x wall and 4x RSS ratios, semantic parity has only expected deltas, codemods pass, Python graph access stays blocked, and broad Rust caches stay cold. +- [x] Harden aggregate readiness report contracts. owner: codex. Result: `check_rollout_readiness.py` now rejects stale or weakened Airflow/Next.js reports unless pinned metadata, snapshot schema versions, exact summary/record/compat counts, known lookup results, graph-family counts/hashes, and cache-materialization invariants match the producer scripts' expectations. +- [x] Replay full pinned large-repo readiness gate after hardening. owner: codex. Result: fresh-extension local run of `check_pinned_large_repos.sh` passed on 2026-06-19; Airflow strict Rust `Codebase` measured 4.399s / 266.3 MB for 4.239x wall and 13.029x RSS improvement, Next.js measured 8.276s / 348.1 MB for 3.016x wall and 8.906x RSS improvement, codemods passed, and semantic parity reported Airflow one checked known delta and TypeScript zero known deltas. +- [x] Add supported Rust-backend subset manifest. owner: codex. Result: `rust-rewrite/supported-subset.json` lists supported opt-in compact query, Python shell, TypeScript shell, fallback, mutation, codemod, and readiness-contract capabilities; `check_supported_subset.py` fails on missing, duplicate, or unlisted collected tests and is wired into `check_fast.sh`. +- [x] Add P0 parity coverage audit. owner: codex. Result: `rust-rewrite/p0-parity-coverage.json` classifies the current P0 surface as parity-covered, fallback-covered, or open-gap; `check_p0_parity_coverage.py` validates test IDs, tool evidence, and open gaps in `check_fast.sh`. Current audit has 5 parity-covered groups, 1 fallback-covered group, and 2 open gaps, so default-backend promotion remains blocked. +- [x] Add feature flag documentation. owner: codex. Result: `Rollout And Feature Flag Criteria` below documents `CodebaseConfig(graph_backend=...)`, `rust_fallback`, strict vs fallback behavior, CI gates, and default-backend promotion requirements. +- [x] Add migration notes for unsupported APIs. owner: codex. Result: `python-compat.md` now documents current compact-mode unsupported behavior, the typed `RustBackendUnsupportedError` contract, and the distinction between cold fallback to Python and unsupported method access. +- [x] Decide default backend criteria. owner: codex. Result: keep Python as default until the rollout gate, full Python unit suite, supported Rust-backend subset, and graph-wide parity targets pass on protected CI; Rust remains opt-in or `auto` before that point. +- [ ] Flip default to Rust only after memory, speed, and parity targets are met. +- [ ] Keep Python backend available for one release after Rust becomes default. + +## Phase 7: Docs, Distribution, And Launch + +- [x] Define the public product positioning for graph-sitter. owner: Nash. Result: `docs-site-vercel-plan.md` defines the plain-language message: Graph-sitter lets users write Python programs that understand and safely edit whole codebases by graphing files, symbols, imports, calls, and usages. +- [x] Audit the current documentation setup and choose the docs-site architecture. owner: Nash. Result: keep `docs/` on Mintlify, add a separate Vercel landing app under `site/`, and configure the Vercel Root Directory to `site` after review. +- [x] Add first Vercel landing-page scaffold. owner: codex. Result: added a self-contained Next.js app under `site/` with conservative product copy, future CLI-direction messaging, and local/Vercel run instructions. +- [x] Add landing-site build CI. owner: codex. Result: `.github/workflows/site-build.yml` runs `npm ci` and `npm run build` inside `site/` on landing-page changes. +- [x] Tighten docs/site/Vercel setup path. owner: codex. Result: refreshed landing hero copy, Graph-sitter docs branding, local build/validation commands, authenticated Vercel preview instructions, the missing `/cli/expert` docs route, and Mintlify validation blockers while keeping production cutover blocked on integrator approval. +- [x] Draft accurate setup docs for local development and Rust-backed operation. owner: Nash/codex. Result: `docs/introduction/installation.mdx` now covers `uv tool install`, `uvx`, branch-built wheel validation, strict/fallback modes, and `CodebaseConfig`; `docs/cli/uvx.mdx` documents parse/run/transform workflows and release gates. +- [x] Define a Vercel preview and production deployment workflow. owner: Nash. Result: plan keeps current docs production untouched, uses Vercel preview URLs for a future `site/` app, and defers production domain cutover until integrator review. +- [x] Define the `uvx graph-sitter ...` CLI command surface. owner: Lovelace. Result: `uvx-cli-plan.md` proposes `graph-sitter parse`, `graph-sitter run`, future `graph-sitter transform`, backend flags, JSON output, and check/write modes. +- [x] Identify package metadata and entry-point changes needed for `uvx graph-sitter`. owner: Lovelace. Result: add `graph-sitter = "graph_sitter.cli.cli:main"` alongside `gs`, then package the Rust extension before promising published-package `--backend rust` through `uvx`. +- [x] Add initial `graph-sitter` console alias and parse command. owner: codex. Result: `pyproject.toml` now declares `graph-sitter = "graph_sitter.cli.cli:main"`, and `graph-sitter parse [PATH] --backend python|rust|auto --language auto|python|typescript --format summary|json` emits repo summary counts without `.codegen` initialization. +- [x] Add subdirectory scoping to `graph-sitter parse`. owner: codex. Result: `graph-sitter parse [PATH] --subdir src --subdir packages/app` threads selected subdirectories through `ProjectConfig` for Python and Rust backend file discovery, reports `subdirectories` in JSON, and is documented in the CLI docs. +- [x] Add CLI smoke tests for parse and transformation flows. owner: codex. Result: focused CLI tests now cover Python parse, TypeScript parse, clean JSON stdout, strict Rust-backend unavailable behavior, path-aware `graph-sitter run LABEL PATH`, typed Pydantic `--arguments`, and rejection of unused arguments. +- [x] Harden the `uvx graph-sitter` parse/transform command surface. owner: codex. Result: `graph-sitter transform MODULE:OBJECT PATH` now requires explicit `--check` or `--write`, CLI version metadata uses `graph-sitter`, and clean `uvx --from ` Python-backend parse/transform works on Python 3.12 and 3.13. +- [x] Bundle the Rust extension into graph-sitter wheels. owner: codex. Result: enabled the Hatch custom wheel hook to build `graph-sitter-py`, force-include `graph_sitter_py{EXT_SUFFIX}` into platform wheels, added `check_wheel_rust_backend.sh`, and wired CI to prove `uvx --from dist/.whl graph-sitter parse --backend rust --fallback error`. +- [x] Add artifact-level `uvx graph-sitter transform` smokes. owner: codex. Result: `check_wheel_rust_backend.sh` now installs the built wheel through `uvx --from dist/.whl`, validates wheel contents for `graph_sitter_py` and `codemods`, runs `--help`, Python parse, strict Rust parse, strict Rust `transform --check` without target mutation, and strict Rust `transform --write` with target mutation. +- [x] Add artifact-level TypeScript `uvx graph-sitter` parse smoke. owner: codex. Result: `check_wheel_rust_backend.sh` now also creates a tiny TypeScript repo and proves strict Rust `graph-sitter parse --language typescript --backend rust --fallback error --format json` from the built wheel. +- [x] Add artifact-level TypeScript `uvx graph-sitter transform` smoke. owner: codex. Result: `check_wheel_rust_backend.sh` now also proves a TypeScript exported function rename with strict Rust `transform --check` and `transform --write` from the built wheel. +- [x] Add artifact-level registered `uvx graph-sitter run` smoke. owner: codex. Result: `check_wheel_rust_backend.sh` now proves a built wheel can resolve target-owned `.codegen/codemods` through `uvx --from dist/.whl`, run strict Rust registered codemods with `--check` and `--write`, and preserve `--subdir` scoping in the check sandbox. +- [x] Smoke release-built wheels before upload. owner: codex. Result: `check_wheel_rust_backend.sh` accepts an existing wheel via `--wheel`, and `.github/workflows/release.yml` runs the Python/TypeScript parse, transform, registered run, and wheel-content smoke against each `cibuildwheel` artifact before upload. +- [x] Assert release wheel CLI entry points and source isolation. owner: codex. Result: wheel smokes now verify `graph-sitter` and `gs` console scripts in `entry_points.txt`, invoke both help commands through `uvx --from`, and run all installed-artifact commands from a temporary directory with `PYTHONPATH` unset and a fresh `UV_CACHE_DIR`. +- [x] Assert release wheel doctor, parse-output, and exact transform diff gates. owner: codex. Result: wheel smokes now run `doctor --backend rust` for Python and TypeScript, assert parse JSON schema/backend/language/subdirectory/count/fallback fields, verify `parse --output` writes newline-terminated JSON with empty stdout, and assert exact git diffs for Python, TypeScript, registered, and scoped registered codemod checks. +- [x] Add artifact-level large TypeScript `uvx graph-sitter` parse proof. owner: codex. Result: `check_wheel_pinned_typescript_repo.py` builds or accepts a wheel, runs strict Rust `graph-sitter parse` through `uvx --from dist/.whl` against pinned Next.js `v15.0.0`, and compares summary counts with the committed compact TypeScript golden snapshot. +- [x] Add artifact-level installed-wheel TypeScript performance proof. owner: codex. Result: `check_wheel_pinned_typescript_repo.py --compare-python-backend` samples installed-wheel Python and strict Rust `uvx` process-tree RSS on pinned Next.js; local run measured Rust at 10.352s / 537.5 MB versus Python at 57.956s / 4505.6 MB, a 5.598x parse-elapsed and 8.383x sampled-RSS improvement. +- [x] Add artifact-level large TypeScript `uvx graph-sitter transform` proof. owner: codex. Result: `check_wheel_pinned_typescript_repo.py --run-transform-proof` clones pinned Next.js, runs strict Rust `graph-sitter transform` through `uvx --from dist/.whl`, renames `AppRouterAnnouncer`, rewrites the importing usage, and asserts only the expected two files changed; local run measured 11.834s / 525.8 MB sampled RSS for the transform. +- [x] Add artifact-level large Python `uvx graph-sitter` parse/performance proof. owner: codex. Result: `check_wheel_pinned_python_repo.py --compare-python-backend` samples installed-wheel Python and strict Rust `uvx` process-tree RSS on pinned Airflow `2.10.5`; local run measured Rust at 4.913s / 487.0 MB versus Python at 48.242s / 5429.3 MB, a 9.818x parse-elapsed and 11.148x sampled-RSS improvement. +- [x] Add artifact-level large Python `uvx graph-sitter transform` proof. owner: codex. Result: `check_wheel_pinned_python_repo.py --run-transform-proof` clones pinned Airflow, runs strict Rust `graph-sitter transform` through `uvx --from dist/.whl`, adds `from typing import Any`, renames `__getattr__` to `__getattr_wheel_proof__`, and asserts only `airflow/__init__.py` changed; local run measured 5.920s / 500.1 MB sampled RSS for the transform. +- [x] Add `graph-sitter doctor` setup diagnostic command. owner: codex. Result: `doctor --json` reports Python/package/platform/parser dependency readiness, optional Rust extension metadata, and `doctor --backend rust --language python|typescript --json` runs a generated tiny-repo strict Rust parse smoke for setup docs and future skill validation. +- [x] Publish public benchmark and correctness proof docs. owner: codex. Result: `docs/benchmarks/large-repos.mdx` summarizes Airflow and Next.js Codebase, installed-wheel `uvx`, and codemod proof numbers; `docs/correctness/parity.mdx` documents tested parity scope, known deltas, safety modes, and pre-default gates. +- [x] Define skill distribution plan. owner: codex. Result: added `rust-rewrite/skill-distribution-plan.md` with the proposed skill trigger, folder shape, progressive-disclosure references, `uvx graph-sitter ...` positioning, validation steps, and release gates. +- [x] Refresh the landing/docs launch path for the resurrected product direction. owner: Huygens/codex. Result: landing copy now names parse graphs, reference/import graphs, codemods, Python as the shell, Rust as the scale backend, and the future `uvx graph-sitter` surface; `docs-site-vercel-plan.md` keeps Mintlify docs separate from the Vercel landing app. +- [x] Decompose the public `uvx graph-sitter` command contract for future helper agents. owner: Volta/codex. Result: `uvx-command-roadmap.md` documents parse, run, transform, backend/fallback, distribution-smoke, correctness, benchmark, and release tasks as `[ ]` work items. +- [x] Expand the docs and Vercel launch plan for the resurrected product. owner: Arendt/codex. Result: `docs-site-vercel-plan.md` now separates Mintlify docs from the Vercel landing site, documents preview/prebuilt deployment flow, records local validation commands, and keeps domain cutover blocked on explicit approval; `site/README.md` mirrors the practical Vercel commands. +- [x] Expand the public `uvx graph-sitter` command contract for docs, skills, and release gates. owner: Mendel/codex. Result: `uvx-command-roadmap.md` now records parse/run/transform command shapes, Python and TypeScript examples, branch-built wheel proof versus published-package validation, release artifact gates, skill invocation guidance, and open integrator decisions. +- [x] Create repository-local graph-sitter skill prototype. owner: codex. Result: added `rust-rewrite/skill-prototype/graph-sitter/` for review with generated `agents/openai.yaml`, concise references, current CLI/Rust-backend status, and validation guidance; it is not installed into `~/.codex/skills`. +- [x] Add resurrected docs/site/Vercel strategy. owner: Godel/codex. Result: `docs-site-strategy.md` keeps Mintlify as the docs source of truth, keeps `site/` as the Vercel landing app, documents preview deployment flow, and adds a docs/landing/skill task ledger. +- [x] Document the combined `uvx graph-sitter` and Codex skill distribution path. owner: delegated-worker. Result: added `rust-rewrite/uvx-skill-distribution-plan.md` with target parse/transform/init UX, PyPI/wheel prerequisites, skill packaging, staged validation checklists, and release risks. +- [x] Align public CLI docs with the `uvx graph-sitter` command taxonomy. owner: delegated-worker. Result: documented path-aware `run`, explicit check/write safety, one-shot `transform MODULE:OBJECT`, uvx examples, and CLI navigation links while leaving implementation untouched. +- [x] Stabilize parse JSON handoff for `uvx graph-sitter parse`. owner: codex. Result: parse JSON output now includes `schema_version: 1`, supports `--format json --output FILE` with empty stdout, and documents the JSON-only output-file contract for scripts and agents. +- [x] Harden CLI copy and auto-backend disclosure. owner: codex. Result: `graph-sitter run --help` no longer uses legacy "codegen function" wording, and parse CLI tests prove `--backend auto --fallback python` reports actual Python fallback plus `rust_backend_error` when `graph_sitter_py` is unavailable. +- [x] Harden transform fallback and no-op check coverage. owner: codex. Result: import-path transform tests now prove Rust strict failure leaves the target repo unchanged, Rust with Python fallback still applies writes, and no-op `--check` exits zero with the expected no-change message. +- [x] Add scoped import-path transforms. owner: codex. Result: `graph-sitter transform` now accepts repeated `--subdir` filters, normalizes them relative to the target repository, and proves write-mode transforms leave unselected files untouched. +- [x] Add scoped registered codemods. owner: codex. Result: `graph-sitter run` now accepts repeated `--subdir` filters for local runs, preserves them in `--check` sandboxes, and documents targeted large-repo usage. +- [ ] Add published-package `uvx graph-sitter` validation transcript to setup docs before release. owner: release/docs agent. Notes: use uploaded `graph-sitter==` artifacts, not branch-built wheels, before claiming PyPI-backed Rust parse or transform. + +## Rollout And Feature Flag Criteria + +Current public control surface: + +- `CodebaseConfig(graph_backend=GraphBackend.PYTHON)` remains the default and builds the existing Python graph. +- `CodebaseConfig(graph_backend=GraphBackend.RUST, rust_fallback=RustFallbackMode.ERROR)` is strict compact mode for tests and performance proofs. It must keep `CodebaseContext.nodes` blocked and raise `RustBackendUnsupportedError` for unsupported APIs rather than silently materializing the Python graph. +- `CodebaseConfig(graph_backend=GraphBackend.RUST, rust_fallback=RustFallbackMode.PYTHON)` is the compatibility escape hatch. Supported compact APIs stay on Rust; unsupported methods may promote the context to the Python graph. +- `CodebaseConfig(graph_backend=GraphBackend.AUTO)` is reserved for gradual rollout once language coverage and packaging are stable enough to select Rust only for supported repositories and fall back predictably. + +Merge gate for the opt-in Rust backend: + +- `rust-rewrite/tools/check_fast.sh` must pass locally and in PR CI. +- `rust-rewrite/tools/check_pinned_large_repos.sh` must pass in nightly/manual CI and write `rollout-readiness.json`. +- The readiness gate must show Airflow Rust `Codebase` construction at least 2x faster, Next.js Rust `Codebase` construction at least 1.5x faster on hosted CI, and both at least 4x lower max RSS than recorded Python baselines. +- Pinned semantic parity must have no unexpected mismatches, and the selected semantic runs must meet the configured wall/RSS improvement ratios. Any known delta must be explicitly listed in `check_pinned_semantic_parity.py`. +- Pinned codemod proof must pass for Airflow and Next.js while keeping broad Rust record/handle caches cold. +- Unsupported APIs must either raise `RustBackendUnsupportedError` in strict mode or promote intentionally under `rust_fallback="python"`. + +Default-backend promotion criteria: + +- Full Python-backend unit suite remains green. +- Rust-backend supported subset is explicitly enumerated and green. +- P0 public read APIs, graph queries, and codemod mutation flows have Python-vs-Rust parity coverage for Python and TypeScript. Before flipping the default, `check_p0_parity_coverage.py --require-complete` must pass. +- Pinned large-repo graph snapshots cover files, imports, exports, references, external references, dependencies, and subclass edges where applicable. +- Packaging proves the PyO3 extension on supported Python versions and target OSes. +- Python backend remains available for at least one release after any default flip. + +## Acceptance Targets + +- [x] Cold parse memory on a representative huge repo is less than 25% of current Python backend. owner: codex. Result: latest pinned readiness run shows strict Rust `Codebase` max RSS at 266.3 MB for Airflow and 348.1 MB for Next.js, 13.029x and 8.906x lower than recorded Python baselines respectively. +- [x] Cold parse wall time is no slower than current Python backend, with a target of at least 2x faster. owner: codex. Result: latest pinned readiness run shows strict Rust `Codebase` construction at 4.399s for Airflow and 8.276s for Next.js, 4.239x and 3.016x faster than recorded Python baselines respectively. +- [ ] P0 query APIs have parity with current behavior. Notes: `p0-parity-coverage.json` currently validates 5 parity-covered groups and 1 fallback-covered group, but keeps this target open for the broader TypeScript expression/type surface and full graph-wide large-repo semantic parity. +- [x] Existing unit tests pass for Python backend throughout the rewrite. owner: codex. Result: full local `tests/unit` suite passed with 2,158 passed, 58 skipped, 12 xfailed in 77.88s. +- [x] Rust backend has golden snapshots for graph IR and dependency edges. owner: codex. Result: committed Airflow and Next.js compact golden snapshots cover files, symbols, imports, import resolutions, external modules, references, external references, dependencies, plus TypeScript exports/subclass edges; fresh pinned readiness replay verified them. +- [x] Unsupported Python APIs fail explicitly or fall back to Python backend. owner: codex. Result: supported-subset manifest includes strict unsupported API errors, missing-extension strict failure, and `rust_fallback="python"` promotion tests, all run through `check_fast.sh`. +- [x] Docs site accurately explains setup, Rust backend status, and CLI usage. owner: codex. Result: installation plus CLI docs cover local install, `doctor`, `parse`, `run`, `transform`, `uvx`, strict Rust mode, Python fallback, and branch-built wheel caveats. +- [x] `uvx graph-sitter ...` is documented and backed by tested parse and transformation entry points. owner: codex. Result: docs now cover `uvx graph-sitter parse`, `run`, and `transform`; wheel-distributed Rust parse/transform/run proofs exist, while published-package validation remains a separate release gate. +- [x] A graph-sitter agent skill distribution plan exists. owner: codex. Result: see `rust-rewrite/skill-distribution-plan.md`; the actual discoverable skill folder remains pending install-location and CLI finalization. + +## Agent Log + +- [x] 2026-06-18: Initial strategy file created on `rust-rewrite` branch. owner: codex. Notes: ready for helper agents to claim phase tasks. +- [x] 2026-06-18: Integrator created seven worktrees and spawned six helper agents; PyO3 compatibility was queued due to agent concurrency limit. owner: codex. +- [x] 2026-06-18: Six completed helper branches reviewed and their artifacts staged for integration. owner: codex. Notes: PyO3 compatibility agent is now running as Wegener. +- [x] 2026-06-19: Spawned launch/distribution helper agents. owner: codex. Notes: Nash owns docs/site/Vercel strategy, and Lovelace owns the future `uvx graph-sitter ...` CLI/distribution path. +- [x] 2026-06-19: Added graph-sitter skill distribution plan. owner: codex. Notes: documented the future Codex skill trigger, references, validation workflow, and release gates without creating a discoverable skill folder before install-location confirmation. +- [x] 2026-06-19: Integrated docs/site/Vercel and `uvx graph-sitter` plans. owner: codex. Notes: merged Nash's `docs-site-vercel-plan.md` and Lovelace's `uvx-cli-plan.md` into `rust-rewrite` and updated Phase 7 status while leaving implementation and smoke-test work open. +- [x] 2026-06-19: Added first `uvx graph-sitter` implementation slice. owner: codex. Notes: added the `graph-sitter` console-script alias, a read-only `parse` command with backend/language/JSON flags, and focused CLI tests for the alias, Python parse summary, and clean Rust-backend unavailable behavior. +- [x] 2026-06-19: Added first Vercel landing-page scaffold. owner: codex. Notes: created the isolated `site/` Next.js app and kept deployment cutover as a Vercel project setting decision. +- [x] 2026-06-19: Tightened docs/site/Vercel setup path. owner: codex. Notes: updated landing copy, docs branding, README commands, preview-deploy instructions, missing `/cli/expert`, and Mintlify sample validation warnings; production Vercel cutover remains blocked on docs-domain and integrator approval. +- [x] 2026-06-19: Audited `uvx` Rust-wheel blockers. owner: Hypatia. Notes: `uvx-cli-plan.md` now records that Hatch/Cython packaging does not yet bundle the PyO3 extension, `graph_sitter_py` is imported directly, and installed-wheel `uvx --from dist/.whl ...` smoke tests are required before advertising `--backend rust`. +- [x] 2026-06-19: Added path-aware `graph-sitter run` smoke coverage. owner: codex. Notes: `run` now accepts an explicit repo path without active-session lookup, propagates typed Pydantic `--arguments`, keeps active-session behavior as fallback, and has focused CLI tests for Python codemod mutation plus TypeScript parse. +- [x] 2026-06-19: Added sandboxed `graph-sitter run --check`. owner: codex. Notes: check mode copies the target working tree into a temporary Git repo, runs the codemod there, reports the semantic diff, exits non-zero when changes would be produced, and leaves the target repo unchanged; `--write` is now available as an explicit compatibility-preserving write flag. +- [x] 2026-06-19: Added import-path `graph-sitter transform`. owner: codex. Notes: `transform MODULE:OBJECT PATH` now loads file or module objects, supports functions plus `Codemod.execute` subclasses/instances, reuses backend/language/check/write behavior from `run`, and has focused tests for function check mode, class write mode, and argument rejection. +- [x] 2026-06-19: Hardened `uvx graph-sitter` distribution surface. owner: codex. Notes: worked in `/Users/jayhack/CS/CODEGEN/graph-sitter-rust-uvx-distribution` on `codex/rust-rewrite-uvx-distribution`; enforced explicit `transform --check|--write`, changed CLI version metadata to `graph-sitter`, packaged `codemods` for clean import-path transform installs, constrained clean-uvx parser/runtime dependencies, aligned the uvx and skill distribution plans with implemented parse/transform commands, and kept the Rust-extension wheel blocker explicit. +- [x] 2026-06-19: Bundled `graph_sitter_py` into wheels. owner: codex. Notes: the Hatch custom wheel hook now invokes Cargo for `graph-sitter-py`, copies the built extension to `graph_sitter_py{EXT_SUFFIX}`, force-includes it in the wheel, and marks wheels platform-specific; `check_wheel_rust_backend.sh` proves installed-wheel Rust parsing through `uvx`. +- [x] 2026-06-19: Created repository-local graph-sitter skill prototype. owner: codex. Notes: initialized with the system skill-creator helper under `rust-rewrite/skill-prototype/graph-sitter/`, kept it non-discoverable globally, and documented Python-shell-first usage, local `parse`/`transform MODULE:OBJECT`, Rust `uvx` wheel blocker, and supported-subset parity wording. +- [x] 2026-06-19: Refreshed docs/landing/Vercel launch plan. owner: Huygens/codex. Notes: integrated the updated `docs-site-vercel-plan.md`, site README notes, landing architecture section, and conservative copy for Rust-backed wheels and `uvx graph-sitter`. +- [x] 2026-06-19: Added installed-wheel Next.js parse gate. owner: codex. Notes: `check_wheel_pinned_typescript_repo.py` validates branch-built wheel distribution against the pinned Next.js compact TypeScript golden through `uvx --from`. +- [x] 2026-06-19: Added installed-wheel Next.js performance comparison. owner: codex. Notes: the same wheel gate now supports `--compare-python-backend` and proved branch-built strict Rust `uvx` parse is 5.598x faster and 8.383x lower sampled process-tree RSS than installed-wheel Python on pinned Next.js. +- [x] 2026-06-19: Added installed-wheel Next.js transform proof. owner: codex. Notes: `check_wheel_pinned_typescript_repo.py --run-transform-proof` proved branch-built strict Rust `uvx transform --write` can rename `AppRouterAnnouncer`, update the importing usage, and touch only the two expected pinned Next.js files. +- [x] 2026-06-19: Added standalone `uvx graph-sitter` command roadmap. owner: Volta/codex. Notes: documented the final parse/run/transform command contract, backend/fallback semantics, artifact smoke gates, and multi-agent checklist for implementation, packaging, release, correctness, and benchmarks. +- [x] 2026-06-19: Added artifact-level transform smokes to the wheel proof. owner: codex. Notes: `check_wheel_rust_backend.sh` now proves branch-built wheels can run strict Rust parse and strict Rust import-path transforms with both `--check` and `--write` through `uvx --from dist/.whl`. +- [x] 2026-06-19: Spawned and integrated docs/Vercel and `uvx` command workers. owner: codex. Notes: Arendt expanded the Mintlify-versus-Vercel launch plan and site README, while Mendel expanded the public `uvx graph-sitter` command roadmap for parse/run/transform, skill usage, and release validation; production deploy and published-package claims remain intentionally blocked. +- [x] 2026-06-19: Added installed-wheel Airflow parse/performance/transform gate. owner: codex. Notes: `check_wheel_pinned_python_repo.py` proves branch-built strict Rust `uvx graph-sitter parse` matches the committed Airflow compact golden summary, beats installed-wheel Python by 9.818x parse elapsed and 11.148x sampled process-tree RSS, and can run an installed-wheel strict Rust transform that mutates only `airflow/__init__.py`. +- [x] 2026-06-19: Added `graph-sitter doctor`. owner: codex. Notes: the CLI now has a setup diagnostic command for Python/package/parser dependency checks, Rust extension availability, machine-readable JSON, and optional generated strict Rust parse smokes for Python or TypeScript. +- [x] 2026-06-19: Added `graph-sitter parse --subdir`. owner: codex. Notes: repeated subdirectory/file filters now scope parse work before backend construction, work through the same selected-file discovery as the Rust compact backend, and are covered by focused CLI tests and docs. +- [x] 2026-06-19: Added installed-wheel TypeScript parse proof. owner: codex. Notes: the wheel smoke now runs a strict Rust TypeScript parse through `uvx --from dist/.whl`, so branch-built artifacts prove both Python and TypeScript parse entry points before published-package validation. +- [x] 2026-06-19: Added installed-wheel TypeScript transform proof. owner: codex. Notes: the wheel smoke now runs a strict Rust TypeScript transform through `uvx --from dist/.whl`, checks that preview mode does not mutate, and verifies write mode renames an exported function in the target repo. +- [x] 2026-06-19: Added combined `uvx graph-sitter` and skill distribution plan. owner: delegated-worker. Notes: documented the target one-shot parse/transform/init UX, published-package and wheel prerequisites, skill package contents, staged tests, and open risks without touching implementation or Vercel site files. +- [x] 2026-06-19: Aligned CLI docs with `uvx graph-sitter` taxonomy. owner: delegated-worker. Notes: added a dedicated transform docs page, updated run docs for explicit target paths and check/write modes, wired transform into the CLI nav, and expanded the uvx roadmap with command taxonomy plus packaging constraints. +- [x] 2026-06-18: PyO3 compatibility helper completed and its planning artifact was staged for integration. owner: codex. +- [x] 2026-06-18: Implemented first Rust Python compact-index slice and benchmark comparison; initial measurements show 9x-22x wall-time improvement and 70x-104x RSS improvement on this repo for the implemented slice. owner: codex. +- [x] 2026-06-18: Exposed the compact Python index through the PyO3 module and verified a Python import smoke against this repo. owner: codex. Notes: extension returned 1127 files, 3117 symbols, and 6414 imports for the current checkout. +- [x] 2026-06-18: Added Python-shell Rust index integration behind `CodebaseConfig(graph_backend=...)`, selected-file PyO3 indexing from `RepoOperator`, and a facade benchmark. owner: codex. Notes: selected-file facade matched Python's 1129-file discovery and ran 4.7x faster with 4.7x lower process max RSS than Python parse/object materialization on this checkout. +- [x] 2026-06-18: Added compact Rust Python import resolution records. owner: codex. Notes: the Python-facing Rust facade now emits 432 internal import-resolution records on this checkout and remains 4.3x faster with 4.6x lower process max RSS than Python parse/object materialization. +- [x] 2026-06-18: Added typed Python facade accessors and a deterministic compact graph snapshot for record-level parity testing. owner: codex. Notes: this prepares the large-repo golden import/reference graph workflow. +- [x] 2026-06-18: Added compact Rust extraction for top-level Python globals and symbol-target import resolution for imported globals. owner: codex. +- [x] 2026-06-18: Made opt-in `CodebaseConfig(graph_backend="rust")` skip eager Python graph construction and expose compact `rust_*` record properties on `Codebase`. owner: codex. Notes: current checkout constructs 4.0x faster with 4.6x lower process max RSS than Python parse/object materialization while blocking lazy Python graph materialization. +- [x] 2026-06-18: Added lightweight Rust compact handles for Python `Codebase.files`, `symbols`, `classes`, `functions`, `global_vars`, `imports`, and basic `get_*` queries. owner: codex. Notes: current checkout constructs and exercises public read handles 5.3x faster with 4.6x lower process max RSS than Python parse/object materialization while keeping `CodebaseContext.nodes` blocked. +- [x] 2026-06-18: Added compact Python `ReferenceRecord` extraction for same-file and imported top-level symbol references inside top-level classes/functions. owner: codex. Notes: current checkout emits 3,666 compact references and remains 5.0x faster with 4.1x lower process max RSS than Python parse/object materialization. +- [x] 2026-06-18: Added compact Python `DependencyRecord` construction from references. owner: codex. Notes: current checkout emits 2,020 de-duplicated dependency edges and remains 4.6x faster with 4.1x lower process max RSS than Python parse/object materialization. +- [x] 2026-06-18: Added first pinned large-repo benchmark runner and Airflow baseline. owner: codex. Notes: Apache Airflow `2.10.5` at `b93c3db6b1641b0840bd15ac7d05bc58ff2cccbf` matched 4,789 Python files and measured 6.218x faster wall time with 9.882x lower max RSS for the current compact Rust `Codebase` slice. +- [x] 2026-06-18: Added first pinned Airflow compact graph golden. owner: codex. Notes: committed stable hashes/samples for 4,789 files, 23,663 symbols, 40,580 imports, 19,011 import resolutions, 95,292 references, and 35,489 dependencies; the opt-in pytest wrapper can verify it against the pinned checkout. +- [x] 2026-06-18: Added nested Python function/method compact symbols and innermost reference source attribution. owner: codex. Notes: Airflow compact coverage now emits 52,339 symbols, 112,238 references, and 71,348 dependencies while staying 5.309x faster with 9.418x lower max RSS than Python parse/object materialization. +- [x] 2026-06-18: Added first local-binding shadow filter for compact Python references. owner: codex. Notes: parameters, local assignments, and nested definitions no longer resolve to imported/top-level symbols; Airflow compact graph now emits 105,739 references and 68,927 dependencies while staying 5.048x faster with 13.315x lower max RSS than Python parse/object materialization. +- [x] 2026-06-18: Added local-import shadow filtering for compact Python references. owner: codex. Notes: function-local imports no longer resolve later uses to imported/top-level symbols; Airflow compact graph now emits 105,624 references and 68,869 dependencies while staying 4.870x faster with 13.195x lower max RSS than Python parse/object materialization. +- [x] 2026-06-18: Added control-flow binding shadow filtering for compact Python references. owner: codex. Notes: `for`, `with ... as ...`, and `except ... as ...` targets no longer resolve later uses to imported/top-level symbols; Airflow compact graph now emits 105,467 references and 68,848 dependencies while staying 5.232x faster with 13.332x lower max RSS than Python parse/object materialization. +- [x] 2026-06-18: Added comprehension and match-pattern capture shadow filtering for compact Python references. owner: codex. Notes: this checkout now emits 4,089 compact references and 2,949 dependencies; pinned Airflow remained graph-stable at 105,467 references and 68,848 dependencies while staying 5.100x faster with 13.395x lower max RSS than Python parse/object materialization. +- [x] 2026-06-18: Added range-scoped lambda-parameter shadow filtering for compact Python references. owner: codex. Notes: lambda parameters now shadow only inside lambda bodies while default-value references still resolve outward; pinned Airflow stayed graph-stable at 105,467 references and 68,848 dependencies while staying 4.981x faster with 13.456x lower max RSS than Python parse/object materialization. +- [x] 2026-06-18: Added `global` declaration handling for compact Python references. owner: codex. Notes: `global` declarations no longer hide module-level symbols behind local assignment shadows; Airflow compact graph now emits 105,607 references and 68,917 dependencies while staying 4.987x faster with 13.393x lower max RSS than Python parse/object materialization. +- [x] 2026-06-18: Skipped Python attribute field names as bare compact references. owner: codex. Notes: object-side references still resolve, but `obj.helper` no longer creates a false standalone `helper` dependency; Airflow compact graph now emits 104,622 references and 68,340 dependencies while staying 5.023x faster with 15.744x lower max RSS than Python parse/object materialization. +- [x] 2026-06-18: Scoped comprehension target shadowing to comprehension expressions. owner: codex. Notes: prevents `[Base for Base in items]` from hiding later `Base` references in the enclosing function; Airflow compact graph stayed stable at 104,622 references and 68,340 dependencies while staying 4.899x faster with 15.745x lower max RSS than Python parse/object materialization. +- [x] 2026-06-18: Added imported module member references to the compact Rust graph. owner: codex. Notes: `module.some_func`, `alias.SomeClass`, and exact `pkg.module.some_func` qualifiers now resolve through existing import-resolution rows; Airflow compact graph now emits 109,282 references and 71,534 dependencies while staying 4.781x faster with 13.394x lower max RSS than Python parse/object materialization. +- [x] 2026-06-18: Added `nonlocal` declaration shadowing for compact Python references. owner: codex. Notes: `nonlocal helper` inside nested functions no longer creates a false imported/top-level `helper` reference; Airflow compact graph stayed stable at 109,282 references and 71,534 dependencies while staying 4.663x faster with 13.244x lower max RSS than Python parse/object materialization. +- [x] 2026-06-18: Added direct Python package re-export import resolution. owner: codex. Notes: `from pkg import Symbol` now follows matching imported bindings in `pkg/__init__.py`; Airflow compact graph now emits 109,655 references and 71,788 dependencies while staying 4.562x faster with 13.307x lower max RSS than Python parse/object materialization. +- [x] 2026-06-18: Added wildcard import/re-export chain resolution to the compact Rust graph. owner: codex. Notes: fixed-point exported-name tables now propagate `from module import *` across indexed internal modules; Airflow compact graph now emits 109,743 references and 71,863 dependencies while staying 4.806x faster with 13.136x lower max RSS than Python parse/object materialization. +- [x] 2026-06-18: Added nested module-prefix attribute resolution to the compact Rust graph. owner: codex. Notes: `from a import b; b.c.d()` and `import a.b; a.b.c.d()` now resolve through indexed internal module prefixes, including namespace-package-style prefixes without concrete `__init__.py` files; Airflow compact graph now emits 109,817 references and 71,932 dependencies while staying 4.374x faster with 12.940x lower max RSS than Python parse/object materialization. +- [x] 2026-06-18: Added static `__all__` filtering for compact Python wildcard imports. owner: codex. Notes: literal `__all__ = ["Name"]` style assignments now restrict `from module import *` expansion without affecting explicit named imports; pinned Airflow stayed graph-stable at 109,817 references and 71,932 dependencies while staying 4.454x faster with 13.010x lower max RSS than Python parse/object materialization. +- [x] 2026-06-18: Exposed compact Rust dependency and usage handles through the Python shell. owner: codex. Notes: compact symbols now answer `dependencies`, `usages`, and `symbol_usages` from Rust records, preparing pinned large-repo parity tests to assert graph APIs instead of only raw record dumps. Refreshed pinned Airflow benchmark: Rust `Codebase` is 4.675x faster with 13.099x lower max RSS than Python parse/object materialization. +- [x] 2026-06-18: Exposed compact Rust import usage handles through the Python shell. owner: codex. Notes: compact imports now answer `usages` and `symbol_usages` from Rust references grouped by `import_id`, which lets parity tests assert import-graph consumers without materializing the Python graph. Refreshed pinned Airflow benchmark: Rust `Codebase` is 4.735x faster with 12.938x lower max RSS than Python parse/object materialization. +- [x] 2026-06-18: Exposed compact Rust file inbound import handles through the Python shell. owner: codex. Notes: compact files now answer `inbound_imports` and `importers` from Rust import-resolution records, moving another P0 `SourceFile` graph query off the Python object graph. Refreshed pinned Airflow benchmark: Rust `Codebase` is 4.570x faster with 12.926x lower max RSS than Python parse/object materialization. +- [x] 2026-06-18: Exposed compact Rust file import lookup APIs through the Python shell. owner: codex. Notes: compact files now answer `import_statements`, `has_import`, and `get_import` from Rust import records, including alias/module/source lookup, moving another P0 `SourceFile` query off the Python object graph. Refreshed pinned Airflow benchmark: Rust `Codebase` is 4.570x faster with 12.981x lower max RSS than Python parse/object materialization. +- [x] 2026-06-18: Exposed compact Rust topological symbol ordering through the Python shell. owner: codex. Notes: compact files now answer `symbols_sorted_topologically` from Rust dependency records, moving another P0 `SourceFile` graph query off the Python object graph. Refreshed pinned Airflow benchmark: Rust `Codebase` is 4.835x faster with 12.927x lower max RSS than Python parse/object materialization. +- [x] 2026-06-18: Exposed compact Rust file byte-range lookup through the Python shell. owner: codex. Notes: compact files now answer `get_nodes` and `find_by_byte_range` from Rust symbol/import records, giving future pinned parity tests a cheap way to assert known import/reference spans. Refreshed pinned Airflow benchmark: Rust `Codebase` is 4.738x faster with 12.910x lower max RSS than Python parse/object materialization. +- [x] 2026-06-18: Exposed compact Rust file name-resolution maps through the Python shell. owner: codex. Notes: compact files now answer `valid_symbol_names`, `valid_import_names`, `resolve_name`, `resolve_attribute`, and `get_node_by_name` for local symbols/imports without materializing the Python graph. Refreshed pinned Airflow benchmark: Rust `Codebase` is 4.634x faster with 12.940x lower max RSS than Python parse/object materialization. +- [x] 2026-06-18: Exposed compact Rust Python import-string helpers through the Python shell. owner: codex. Notes: compact files, symbols, and imports now answer `import_module_name`, `get_import_module_name_for_file`, and `get_import_string`, giving parity tests canonical import text without materializing the Python graph. Refreshed pinned Airflow benchmark: Rust `Codebase` is 4.639x faster with 12.905x lower max RSS than Python parse/object materialization. +- [x] 2026-06-18: Exposed compact Rust module-import exports through the Python shell. owner: codex. Notes: compact imports now return imported module symbols/imports from `imported_exports` for file-target module imports, matching the Python `PyImport` shape without materializing the Python graph. Refreshed pinned Airflow benchmark: Rust `Codebase` is 4.729x faster with 12.975x lower max RSS than Python parse/object materialization. +- [x] 2026-06-18: Exposed compact Rust import attribute resolution through the Python shell. owner: codex. Notes: compact imports now answer `resolve_attribute` through file-target module imports, giving namespace-style parity checks another graph-free shell query. Refreshed pinned Airflow benchmark: Rust `Codebase` is 4.789x faster with 13.013x lower max RSS than Python parse/object materialization. +- [x] 2026-06-18: Exposed compact Rust symbol hierarchy queries through the Python shell. owner: codex. Notes: compact symbols now answer `parent_symbol`, `child_symbols`, and recursive `descendant_symbols` from Rust parent IDs, so nested method/function parity checks no longer need the Python graph. Refreshed pinned Airflow benchmark: Rust `Codebase` is 4.633x faster with 12.973x lower max RSS than Python parse/object materialization. +- [x] 2026-06-18: Exposed compact Rust symbol identity parity through the Python shell. owner: codex. Notes: compact nested symbols now compose `full_name` through parent IDs and Python compact symbols report `is_exported=True`, matching Python symbol defaults. Refreshed pinned Airflow benchmark: Rust `Codebase` is 4.644x faster with 12.934x lower max RSS than Python parse/object materialization. +- [x] 2026-06-18: Exposed compact Rust symbol name handles through the Python shell. owner: codex. Notes: compact symbols now return a read-only name handle from `get_name`, with `source`, `_source`, `name`, and `full_name` fields for existing HasName-style callers. Refreshed pinned Airflow benchmark: Rust `Codebase` is 4.518x faster with 12.953x lower max RSS than Python parse/object materialization. +- [x] 2026-06-18: Exposed compact Rust import name handles through the Python shell. owner: codex. Notes: compact imports now return a read-only name handle from `get_name`, filling the same HasName-style shape for import callers. Refreshed pinned Airflow benchmark: Rust `Codebase` is 4.489x faster with 12.933x lower max RSS than Python parse/object materialization. +- [x] 2026-06-18: Exposed compact Rust file/import descendant traversal through the Python shell. owner: codex. Notes: compact files and imports now answer `descendant_symbols` with existing read semantics, allowing dependency-style walkers to stay off the Python graph. Refreshed pinned Airflow benchmark: Rust `Codebase` is 4.813x faster with 13.006x lower max RSS than Python parse/object materialization. +- [x] 2026-06-18: Added fast Rust rewrite CI lane and closed current repo ruff failures. owner: codex. Notes: `rust-rewrite/tools/check_fast.sh` passed locally and `.github/workflows/rust-rewrite-fast.yml` wires it into PR/push CI; full `uv run ruff check` is now clean after fixing type-only forward references. Audits confirmed Rust is Python-only for now and has no actual codemod mutation tests yet, so Next.js/TS and mutation parity are now explicit open tracks. +- [x] 2026-06-18: Exposed compact Rust TypeScript exports through the Python shell. owner: codex. Notes: `Codebase.exports`, `file.exports`, `file.get_export`, export statement grouping, named/default filters, exported/resolved symbol links, and import-string helpers now read from compact `ExportRecord` rows without materializing the Python graph. +- [x] 2026-06-18: Added Rust compact codemod mutation smoke tests. owner: codex. Notes: two tests now execute real `Codemod` wrappers for batched symbol/import edits and move-to-file import updates, then commit with the Python graph still blocked. +- [x] 2026-06-18: Added opt-in pinned Next.js Rust `Codebase` compatibility/performance check. owner: codex. Notes: the checker validates 13,688 compact files, 23,957 symbols, 28,210 imports, 16,026 exports, 13,462 import resolutions, 47,676 references, 16,041 dependencies, blocked Python graph access, and conservative wall/RSS ceilings for the Python-facing TypeScript shell. +- [x] 2026-06-18: Added first compact TypeScript/JavaScript syntax index through Rust and PyO3. owner: codex. Notes: `TypeScriptIndex` now parses TS-like files with TSX grammar and emits syntax-only files, symbols, imports, dynamic import bindings, and exports without resolver/dependency edges. Fast checks and pinned Airflow snapshot passed; refreshed pinned Airflow benchmark stayed 4.637x faster with 13.031x lower max RSS than Python parse/object materialization. +- [x] 2026-06-18: Added TS fixture golden snapshot and pinned Next.js benchmark proof. owner: codex. Notes: Rust now reads source bytes lossily instead of aborting on non-UTF-8 fixtures. Next.js `v15.0.0` at `51bfe3c1863b191f4b039bc230e8ed5c57b0baf3` indexes 13,688 selected TS/JS files in 3.347s with 200.3 MB max RSS versus Python's 24.959s and 3100.1 MB parse/object materialization, a 7.457x wall and 15.475x RSS improvement for the current syntax-only TS slice. +- [x] 2026-06-18: Added first Rust-backend mutation smoke coverage. owner: codex. Notes: compact Rust file handles can now edit, replace, create, and remove files through the existing Python transaction manager with `sync_graph=False`; the test verifies disk content and that the Python graph remains unbuilt. Symbol/import mutation parity remains open. +- [x] 2026-06-18: Added compact Rust symbol rename and string add-import mutation smoke coverage. owner: codex. Notes: compact symbol rename now edits the declaration and direct usage ranges from Rust reference records, while compact `file.add_import("...")` inserts import strings with duplicate suppression. The smoke test commits import insertion plus rename together with `sync_graph=False` and keeps the Python graph unbuilt. +- [x] 2026-06-18: Added compact Rust symbol-object add-import and remove mutation smoke coverage. owner: codex. Notes: compact files accept compact symbols in `add_import(...)`; compact symbols/imports queue remove transactions; focused tests verify disk output with `sync_graph=False` and the Python graph remains unbuilt. +- [x] 2026-06-18: Added pinned Next.js compact TypeScript/JavaScript snapshot proof. owner: codex. Notes: new deterministic tool, opt-in integration wrapper, and compact golden pin syntax-only files/symbols/imports/exports with stable hashes and samples; TypeScript resolver/reference/dependency parity remains open. +- [x] 2026-06-18: Added compact Rust decorator mutation smoke coverage. owner: codex. Notes: compact symbols now derive lightweight decorator handles from Rust byte ranges, can add decorators to classes/methods, and can remove decorator lines without building the Python graph. +- [x] 2026-06-18: Added compact Rust move-to-file mutation smoke coverage. owner: codex. Notes: compact files now support `add_symbol`/`add_symbol_from_source`, and compact symbols can move into created files while copying dependency imports or adding source-file back-edge imports without materializing the Python graph. +- [x] 2026-06-18: Added compact Rust cross-file import-update smoke coverage. owner: codex. Notes: a two-file fixture now proves `update_all_imports` moves a symbol into a created module, removes the old consumer import line, and inserts the new destination import without building the Python graph. +- [x] 2026-06-19: Added first compact TypeScript relative import-resolution slice. owner: codex. Notes: Rust TS/JS indexing stores import-resolution rows, exposes them through PyO3, snapshots them in the small TS golden and pinned Next.js tool, and resolves local default/named imports plus namespace/side-effect/re-export file edges. +- [x] 2026-06-19: Added first compact TypeScript reference/dependency graph slice. owner: codex. Notes: Rust TS/JS indexing emits and exposes reference/dependency rows through PyO3, snapshots them in the small TS golden and pinned Next.js proof, and resolves direct imported symbol uses, namespace member uses, and same-file top-level uses. +- [x] 2026-06-19: Wired the TypeScript compact index into the Python Codebase shell. owner: codex. Notes: strict Rust backend mode accepts TypeScript codebases, calls the PyO3 `index_typescript_path(s)` APIs, returns compact TS files/symbols/interfaces/types/imports/exports, and serves dependency/usages from Rust records while keeping the Python graph blocked. +- [x] 2026-06-19: Added compact TypeScript tsconfig path-alias resolution. owner: codex. Notes: Rust now resolves nearest tsconfig `compilerOptions.paths` and `baseUrl` specifiers. On pinned Next.js `v15.0.0`, import resolutions increased from 9,424 to 13,462, references from 37,554 to 47,676, and dependency edges from 15,176 to 16,041 while the Python graph remains blocked. +- [x] 2026-06-19: Added compact TypeScript barrel re-export symbol propagation. owner: codex. Notes: fixed-point export maps now let `import { X } from "./barrel"` and `barrel.X` namespace references resolve through named, nested, and wildcard re-exports to original symbols. The focused Rust fixture covers this path; pinned Next.js aggregate counts and hashes stayed stable at the current alias-aware baseline. +- [x] 2026-06-19: Added scoped TypeScript loop/catch shadowing. owner: codex. Notes: compact TypeScript references now avoid resolving `for...of`/`for...in` loop locals and `catch` parameters to imported symbols inside their scoped bodies while preserving later import references. The focused Rust fixture covers the false-positive case; pinned Next.js aggregate counts and hashes stayed stable at the current alias-aware baseline. +- [x] 2026-06-19: Scoped TypeScript nested callback parameter shadows. owner: codex. Notes: compact TypeScript references now treat arrow/function-expression parameters as body-scoped bindings, preventing callback params from suppressing valid imported references outside the callback. The focused Rust fixture covers the false-negative case; pinned Next.js aggregate counts and hashes stayed stable at the current alias-aware baseline. +- [x] 2026-06-19: Added TypeScript nested declaration shadowing. owner: codex. Notes: compact TypeScript references now treat nested function/class declaration names as scoped local bindings, avoiding false imported-symbol references for local declarations and later uses. The focused Rust fixture covers this false-positive case; pinned Next.js aggregate counts and hashes stayed stable at the current alias-aware baseline. +- [x] 2026-06-19: Added TypeScript destructuring/default and lexical block-scope shadowing. owner: codex. Notes: focused Rust fixtures now prove defaulted object/array destructuring shadows same-name imports without treating default-value expressions as bindings, while `let`/`const` declarations only shadow from the declaration through the nearest lexical boundary. Pinned Next.js aggregate counts and hashes stayed stable at the current alias-aware baseline. +- [x] 2026-06-19: Added TypeScript heritage dependency coverage. owner: codex. Notes: compact TypeScript references now include interface `extends`, class `implements`, and namespace-qualified implemented types as source-symbol dependencies. The focused Rust fixture proves the reference/dependency edges; pinned Next.js aggregate counts stayed stable at the current alias-aware baseline. +- [x] 2026-06-19: Audited TypeScript inheritance API parity. owner: Halley. Notes: existing Python tests expect both dependency edges and separate `SUBCLASS` traversal edges for inheritance/implements APIs; compact subclass edge storage remains an explicit follow-up. +- [x] 2026-06-19: Added compact TypeScript subclass traversal edges. owner: codex. Notes: Rust now stores deduplicated subclass/implementation edges alongside references/dependencies and the Python Rust shell walks them for inheritance APIs. `check_fast.sh` passed; rebuilt pinned Next.js `v15.0.0` proof validates 13,688 files, 48,157 references, 16,260 dependencies, and 151 subclass edges, with Rust `Codebase` construction at 6.506s and 492.8 MB max RSS in the cached run. +- [x] 2026-06-19: Added subclass edges to the pinned Next.js graph snapshot. owner: codex. Notes: `snapshot_pinned_typescript_repo.py` schema v4 now normalizes subclass-edge rows, verifies source/target/reference integrity, and stores a deterministic hash for 151 internal inheritance/implementation edges. Exact pinned snapshot verification passed in 6.449s with 615.7 MB max RSS; pinned Rust `Codebase` verification passed in 6.448s with 496.0 MB max RSS. +- [x] 2026-06-19: Added compact external-module records and handles. owner: codex. Notes: unresolved non-relative Python/TypeScript imports now produce `ExternalModuleRecord` rows exposed through PyO3, `Codebase.rust_external_modules`, `Codebase.external_modules`, and compact import `resolved_symbol`/`imported_symbol`; focused Rust, PyO3, Python compact backend, and ruff checks passed. +- [x] 2026-06-19: Added external modules to pinned large-repo proof paths. owner: codex. Notes: Airflow snapshot schema v2 now verifies 17,880 compact external-module rows, Next.js snapshot schema v5 verifies 13,525 rows, and the pinned Next.js Rust `Codebase` compatibility/performance check asserts 13,525 `Codebase.external_modules` handles with the Python graph blocked. +- [x] 2026-06-19: Added first live Python/Rust backend parity fixture. owner: codex. Notes: the new checker builds/reuses the PyO3 extension, runs the same fixture through both backends, proves exact parity for files, symbols, import resolution, external modules, `build.dependencies`, `build.symbol_usages`, and symbol-only `Helper` usages, and reports the current `run.dependencies` semantic delta. +- [x] 2026-06-19: Matched Python import-node dependencies for compact Rust Python handles. owner: codex. Notes: compact Python symbol `.dependencies` now uses dependency reference IDs to return import handles for import-backed internal references, matching Python backend semantics for internal imports while leaving external dependency references as the remaining parity gap. +- [x] 2026-06-19: Closed the external import dependency parity gap. owner: codex. Notes: Rust now emits Python external import reference records for unresolved external bindings, compact imports expose those usages, and the live parity fixture exact-matches `run.dependencies` with zero known deltas. Pinned Airflow `2.10.5` benchmark passed with Rust `Codebase` at 4.076s and 282.9 MB max RSS versus Python parse/object materialization at 18.649s and 3470.3 MB, a 4.576x wall and 12.267x RSS improvement. +- [x] 2026-06-19: Added TypeScript external import references and pinned Next.js proof coverage. owner: codex. Notes: `TypeScriptIndex.external_references` tracks unresolved external import binding usages, compact TS imports expose those usages, and compact TS symbol dependencies now include external import handles. `check_fast.sh` passed, and pinned Next.js `v15.0.0` strict Rust `Codebase` proof asserts 23,282 external references with Python graph blocked at 6.183s and 528.3 MB max RSS, a 4.037x wall and 5.868x RSS ratio against the recorded Python baseline. +- [x] 2026-06-19: Added external import reference edges to pinned large-repo snapshots. owner: codex. Notes: Airflow snapshot schema v3 now pins 74,583 compact external-reference rows and Next.js snapshot schema v6 pins 23,282 rows, with integrity checks proving every row links to known file/symbol/import IDs. +- [x] 2026-06-19: Added compact TypeScript import mutation parity for codemods. owner: codex. Notes: compact TS files/symbols/imports now generate TypeScript import strings, classify compact TS import records into `ImportType`, insert new imports at statement line starts instead of specifier ranges, and run a real `Codemod` smoke that adds TS imports, creates a TS file, and renames a function with the Python graph blocked. +- [x] 2026-06-19: Added compact TypeScript move-to-file codemod parity. owner: codex. Notes: newly-created compact TS/JS files now get derived module names, and a real TS `Codemod` smoke moves a function into a created file with `strategy="update_all_imports"`, rewrites the consumer import to the new module, removes the old import, and commits with the Python graph blocked. +- [x] 2026-06-19: Added compact import-level mutation APIs. owner: codex. Notes: compact Python and TypeScript imports now support `set_import_module`, `set_import_symbol_alias`, and import-level `rename` through line-local transaction edits; focused tests cover Python declaration retargeting plus TypeScript module retargeting, local binding rename, usage rewrite, and blocked Python graph access. +- [x] 2026-06-19: Added Python-vs-Rust compact codemod mutation parity tests. owner: codex. Notes: focused tests now run identical Python and TypeScript `Codemod.execute` flows against both backends, proving exact file-byte parity for add-import/rename edits while keeping the compact Rust path off the Python graph. `check_fast.sh` passed with 26 focused Rust-backend Python tests. +- [x] 2026-06-19: Enforced exact Python-backend whitespace parity for compact move-to-file codemods. owner: codex. Notes: compact Rust moves now preserve Python-compatible newline behavior when moving the sole source symbol, inserting into an empty target, and retargeting an import consumer; move codemod parity tests now compare exact bytes for Python and TypeScript fixtures. +- [x] 2026-06-19: Added repeated incremental edit stress coverage for compact Rust handles. owner: codex. Notes: symbol and import mutations now relocate current declaration, reference, and import-statement spans after earlier commits shift byte ranges, so repeated Python and TypeScript rename/import retarget cycles remain graph-free and byte-correct. `check_fast.sh` passed with 28 focused Rust-backend Python tests. +- [x] 2026-06-19: Added typed unsupported-API failures for compact Rust mode. owner: codex. Notes: unsupported compact methods and graph-wide Python object access now raise `RustBackendUnsupportedError` with method/handle metadata and migration guidance instead of generic runtime failures; focused tests prove unsupported calls leave files unchanged and keep the Python graph unbuilt. +- [x] 2026-06-19: Preserved compact public query ordering. owner: codex. Notes: duplicate-stem compact files now sort deterministically by `(name, filepath)`, public query tests cover files/classes/functions in strict Rust compact mode, `uv run ruff check` is clean, and `rust-rewrite/tools/check_fast.sh` passed with 30 focused Rust-backend tests and 4 skipped opt-in integrations. +- [x] 2026-06-19: Added non-strict unsupported-method fallback promotion. owner: codex. Notes: compact handles now share a context promotion hook that disables compact mode, clears cached proxies, builds the Python graph, and delegates regex file replacement to Python when `rust_fallback="python"`; strict fallback still raises typed unsupported errors. `rust-rewrite/tools/check_fast.sh` passed with 31 focused Rust-backend tests and 4 skipped opt-in integrations. +- [x] 2026-06-19: Exposed compact PyO3 ID query lists. owner: codex. Notes: PyO3 `PythonIndex` and `TypeScriptIndex` now return direct `u32` ID vectors for file/symbol/import/export query families so simple callers can avoid deserializing record JSON. Binding tests assert exact IDs for Python and TypeScript fixtures. +- [x] 2026-06-19: Completed compact file record language/hash metadata. owner: codex. Notes: Rust file records now include extension-derived source language plus stable raw-byte content hashes for Python and TypeScript/JS files, Python bridge dataclasses expose the metadata, and compact snapshots/bindings assert the new fields. +- [x] 2026-06-19: Added normalized compact graph debug dumps. owner: codex. Notes: Rust indexes now emit `debug_graph_json()` with stable node IDs and edge metadata for files, symbols, imports, external modules, TypeScript exports, import resolutions, references, dependencies, external references, subclass edges, and export links; PyO3 binding tests assert the Python and TypeScript fixture topology. +- [x] 2026-06-19: Exposed compact graph debug dumps through the Python `Codebase` shell. owner: codex. Notes: `codebase.rust_debug_graph_json` now returns the normalized Rust graph payload without materializing the Python graph. Fresh current-checkout measurements: Python `Codebase` construction took 8.687s and 863.5 MB max RSS; Rust compact mode took 1.010s and 132.2 MB max RSS with `CodebaseContext.nodes` blocked, a 8.599x wall-time and 6.530x max-RSS improvement. +- [x] 2026-06-19: Expanded the live Python/Rust parity fixture. owner: codex. Notes: `check_python_rust_parity_fixture.py` now includes an intermediate imported-export chain (`pkg.api` to `pkg.models.Helper`) and a mutation parity subcheck that performs add-import, import removal, and symbol rename through both backends, exact-matching output bytes while the Rust path keeps the Python graph blocked. +- [x] 2026-06-19: Added live TypeScript parity coverage to the Python/Rust fixture. owner: codex. Notes: the parity checker now compares real Python and real Rust TypeScript backends for files, symbols, import targets, export resolution, symbol-only usages, resolved dependency targets, and an add-import plus rename mutation flow, with exact output-byte parity and the Rust path keeping `CodebaseContext.nodes` blocked. +- [x] 2026-06-19: Matched compact TypeScript import source text. owner: codex. Notes: compact TS/JS imports now return the full import statement line from `.source` and `.import_statement.source`, so live TypeScript parity compares import source text directly while mutation helpers still edit by compact byte ranges. +- [x] 2026-06-19: Added pinned Airflow Rust `Codebase` compatibility/performance proof. owner: codex. Notes: the opt-in checker asserts strict compact record and public handle counts, known byte-span lookup results, blocked Python graph access, and recorded-baseline wall/RSS improvement gates for Apache Airflow `2.10.5`; fresh rebuilt-extension run measured 4.161s and 604.9 MB max RSS. +- [x] 2026-06-19: Indexed compact Python function-local imports. owner: codex. Notes: Rust now records nested import statements and resolves references through function-local external imports without falling back to the Python graph. Refreshed Airflow `2.10.5` proof pins 45,404 imports, 117,799 references, 78,784 external references, and 77,570 dependencies; strict cached-extension run measured 4.923s and 606.0 MB max RSS. +- [x] 2026-06-19: Promoted live Python/Rust parity into the fast lane. owner: codex. Notes: the parity fixture now exact-matches function-local `import importlib` dependencies across Python and Rust, and `check_fast.sh` runs the fixture by reusing the debug PyO3 extension instead of requiring a pinned large-repo job. +- [x] 2026-06-19: Added scheduled large-repo regression checks. owner: codex. Notes: new `check_pinned_large_repos.sh` builds the PyO3 extension once and runs pinned Airflow plus Next.js snapshot and strict `Codebase` performance/RSS checks; `.github/workflows/rust-rewrite-large-repos.yml` runs it outside the fast PR path and uploads JSON reports. +- [x] 2026-06-19: Added supported-Python extension build CI. owner: codex. Notes: new `check_extension_build.sh` and `.github/workflows/rust-rewrite-extension.yml` prove the PyO3 extension builds, imports, and indexes a tiny Python repo on supported Python 3.12 and 3.13 for Linux and macOS. +- [x] 2026-06-19: Interned compact Rust record strings. owner: codex. Notes: `FileRecord`, symbol, import/export, external module, reference, and external-reference record strings now share `Arc` allocations through per-index string pools while serializing unchanged; construction-only string/export lookup tables are cleared before returning the index. `cargo test -p graph-sitter-engine`, `uv run ruff check`, `check_fast.sh`, and cached pinned large-repo checks passed with stable hashes. Fresh pinned runs measured Airflow snapshot 4.670s/861.3 MB, Airflow strict `Codebase` 4.533s/686.5 MB, Next.js snapshot 6.259s/741.0 MB, and Next.js strict `Codebase` 6.337s/530.4 MB; the RSS harness records peak construction RSS, so retained-memory cleanup needs a follow-up post-build resident-size metric. +- [x] 2026-06-19: Added phase-aware current RSS to Rust `Codebase` proof reports. owner: codex. Notes: Airflow `2.10.5` strict Rust `Codebase` now reports construction RSS at 258.6 MB, summary-count RSS at 396.6 MB, record-count RSS at 596.6 MB, compatibility-handle RSS at 638.1 MB, and final query RSS at 643.1 MB. Next.js `v15.0.0` reports construction RSS at 373.6 MB, summary-count RSS at 387.9 MB, record-count RSS at 542.6 MB, and compatibility-handle RSS at 585.6 MB. This proves the remaining large-repo memory pressure is primarily Python-side JSON record/handle materialization after Rust construction, not the compact Rust index build itself. +- [x] 2026-06-19: Replaced proof count materialization with compact Rust/PyO3 count APIs. owner: codex. Notes: summary and compatibility count paths now avoid JSON record loads and public handle-list construction. Final pinned checks passed with Airflow `2.10.5` at 4.557s and flat 257.8 MB RSS through all count phases, then 654.8 MB after known dependency/reference queries; Next.js `v15.0.0` passed at 6.433s and flat 313.4 MB RSS through all count phases. +- [x] 2026-06-19: Moved known large-repo query paths onto targeted Rust record lookups. owner: codex. Notes: `get_file(...).find_by_byte_range(...)`, file-local function lookup, and direct dependency expansion no longer force full Python-side reference/dependency/handle-list materialization; the Airflow checker now asserts large Python-side caches remain unmaterialized. Fresh Airflow proof: 4.424s, 264.1 MB max RSS, current RSS flat at 255.8 MB through known byte-range lookups and 261.3 MB after dependency query. Fresh Next.js proof: 6.232s and 309.8 MB max RSS. +- [x] 2026-06-19: Moved compact `Codebase` exact symbol lookups onto targeted Rust records. owner: codex. Notes: global `has_symbol`, `get_symbols`, `get_symbol`, `get_class`, and `get_function` no longer require the full top-level symbol handle list in compact mode. `check_fast.sh` passed with 32 focused Rust-backend tests, and the Airflow proof shows global function lookup, known byte-range lookups, and dependency lookup keep `_files`, `_symbols`, `_imports`, `_references`, `_external_references`, and `_dependencies` unmaterialized. +- [x] 2026-06-19: Added pinned Next.js proof for compact global exact lookups. owner: codex. Notes: The Next.js checker now exercises `codebase.get_function("AppRouterAnnouncer")` and asserts large Python-side Rust backend caches remain cold; fresh run passed at 6.466s and 308.7 MB max RSS. +- [x] 2026-06-19: Added compact TypeScript function-call records. owner: codex. Notes: Rust now extracts TypeScript call expressions as compact rows during reference resolution, including recursive calls that intentionally skip ordinary self-reference edges. PyO3 exposes targeted call queries, compact files/symbols expose read-only `.function_calls`, and focused Rust/Python tests plus the full Rust-backend unit file pass without materializing all call records or symbols. +- [x] 2026-06-19: Added compact TypeScript promise-chain records. owner: codex. Notes: Rust now records outermost Promise chains with `.then`, `.catch`, and `.finally` stages, exposes targeted PyO3 queries, and compact files/symbols answer read-only `.promise_chains` while keeping full promise-chain async conversion as a mutable expression-object gap. +- [x] 2026-06-19: Moved compact child-symbol traversal onto targeted Rust records. owner: codex. Notes: `RustCompactSymbol.child_symbols` now reads filtered parent-child records from PyO3 instead of building every compact symbol handle. Fresh Airflow proof exercises `codebase.get_class("KerberosService").child_symbols`, stays flat at 258.5 MB RSS through the child lookup, keeps `_symbol_handles` false, and reports 13.026x lower max RSS than the recorded Python baseline. +- [x] 2026-06-19: Moved compact file-local exact symbol lookups onto targeted Rust records. owner: codex. Notes: `RustCompactFile.get_symbol`, `get_class`, `get_function`, and `get_global_var` now read filtered file/name records from PyO3 instead of materializing the file's symbol list. Fresh Airflow proof exercises `codebase.get_file("airflow/__init__.py").get_function("__getattr__")`, keeps `_symbols_by_file_id` false before byte-range queries, stays flat at 203.3 MB RSS through the targeted lookup phases, and reports 12.963x lower max RSS than the recorded Python baseline. +- [x] 2026-06-19: Moved compact file-local exact import lookups onto targeted Rust records. owner: codex. Notes: `RustCompactFile.get_import` and `has_import` now read filtered file/import candidates from PyO3 instead of materializing the file's import list. Fresh Airflow proof exercises `codebase.get_file("airflow/__init__.py").get_import("import os")`, keeps `_imports_by_file_id` false before byte-range queries, stays flat at 256.0 MB RSS through targeted lookup phases, and reports 13.116x lower max RSS than the recorded Python baseline. +- [x] 2026-06-19: Moved compact TypeScript file-local exact export lookups onto targeted Rust records. owner: codex. Notes: `RustCompactFile.get_export` now reads filtered file/export records from PyO3 instead of materializing the file's export list. Fresh Next.js proof exercises `codebase.get_file("packages/next/src/client/components/app-router-announcer.tsx").get_export("AppRouterAnnouncer")`, keeps `_exports_by_file_id` false, stays flat at 310.5 MB RSS through targeted lookup phases, and reports 9.983x lower max RSS than the recorded Python baseline. +- [x] 2026-06-19: Moved compact file byte-range lookup onto targeted Rust records. owner: codex. Notes: `RustCompactFile.find_by_byte_range` now reads filtered symbol/import/export records from PyO3 instead of calling broad `get_nodes()`. Fresh Airflow proof validates known import/function byte spans, keeps `_symbols_by_file_id`, `_imports_by_file_id`, `_exports_by_file_id`, and all large record/handle-list caches false after byte-range queries, stays flat at 257.1 MB RSS through that phase, and reports 13.079x lower max RSS than the recorded Python baseline. +- [x] 2026-06-19: Moved compact file single-name resolution onto targeted Rust records. owner: codex. Notes: `RustCompactFile.resolve_name` and `resolve_attribute` now resolve one name through filtered symbol/import candidates instead of constructing `valid_symbol_names`; the full-map property remains unchanged. Fresh Airflow proof validates `resolve_name("__getattr__")`, `resolve_attribute("os")`, and `get_node_by_name("os")`, keeps `_symbols_by_file_id`, `_imports_by_file_id`, `_exports_by_file_id`, and all large record/handle-list caches false, stays flat at 256.8 MB RSS through that phase, and reports 13.060x lower max RSS than the recorded Python baseline. +- [x] 2026-06-19: Moved compact module-import attribute resolution onto targeted Rust records. owner: codex. Notes: `RustCompactImport.resolve_attribute` now calls the imported compact file's targeted `resolve_attribute` path rather than materializing `valid_import_names`. Fresh Airflow proof validates `airflow/dag_processing/manager.py` `import airflow.models` resolving `DagModel`, keeps `_symbols_by_file_id`, `_imports_by_file_id`, `_exports_by_file_id`, and all large record/handle-list caches false, stays flat at 259.2 MB RSS through that phase, and reports 13.109x lower max RSS than the recorded Python baseline. +- [x] 2026-06-19: Kept compact file removal from materializing cold record lists. owner: codex. Notes: `RustIndexBackend.unregister_file` now records removed IDs/paths and filters only warm caches, while targeted file lookup methods return `None` for tombstoned paths/IDs. Focused Ruff and mutation tests passed, and `rust-rewrite/tools/check_fast.sh` passed with 37 Rust-backend Python tests plus skipped opt-in pinned integrations. +- [x] 2026-06-19: Kept compact file creation and missing-file checks from materializing file lists. owner: codex. Notes: `RustIndexBackend.register_added_file` now keeps created records in lazy side tables, allocates IDs without reading all file records, and only appends to warm caches. Targeted missing path/ID lookup now returns `None` instead of falling through to `file_handles`. `rust-rewrite/tools/check_fast.sh` passed with 38 Rust-backend Python tests plus skipped opt-in pinned integrations. +- [x] 2026-06-19: Kept compact symbol/import/export ID misses from materializing broad handle lists. owner: codex. Notes: targeted ID lookup helpers now return `None` when PyO3 reports no record, rather than loading every symbol/import/export/external-module handle. The fake indexes now expose targeted ID methods for this path, and `rust-rewrite/tools/check_fast.sh` passed with 38 Rust-backend Python tests plus skipped opt-in pinned integrations. +- [x] 2026-06-19: Kept compact relation ID misses from materializing broad relation records. owner: codex. Notes: `import_resolution_for_import` and `reference_by_id` now stop on targeted misses, and fake PyO3 indexes expose those methods for regression coverage. `rust-rewrite/tools/check_fast.sh` passed with 38 Rust-backend Python tests plus skipped opt-in pinned integrations. +- [x] 2026-06-19: Kept compact case-insensitive file lookup from materializing file handles. owner: codex. Notes: `file_by_path_ignore_case_json` is exposed through PyO3 for Python and TypeScript indexes, and compact `get_file(..., ignore_case=True)` uses it for existing, missing, and newly-created paths without loading full file records or handles. `rust-rewrite/tools/check_fast.sh` passed with 39 Rust-backend Python tests plus skipped opt-in pinned integrations. +- [x] 2026-06-19: Added pinned large-repo proof for compact case-insensitive file lookup. owner: codex. Notes: Airflow `2.10.5` now validates `get_file("AIRFLOW/__INIT__.PY", ignore_case=True)` with RSS flat at 258.2 MB through the lookup and 13.035x lower max RSS than the recorded Python baseline. Next.js `v15.0.0` validates `get_file("PACKAGES/NEXT/SRC/CLIENT/COMPONENTS/APP-ROUTER-ANNOUNCER.TSX", ignore_case=True)` with RSS flat at 308.9 MB and 10.035x lower max RSS. `check_fast.sh` also passed with 39 focused Rust-backend tests. +- [x] 2026-06-19: Added pinned large-repo Rust codemod proof. owner: codex. Notes: `check_pinned_codemods.py` builds strict Rust `Codebase` shells on temporary clones of Airflow `2.10.5` and Next.js `v15.0.0`, executes real `Codemod` import+rename flows, commits with `sync_graph=False`, and verifies graph-free output changes. Fresh run: Airflow construct 5.269s, codemod 0.004s, 269.2 MB max RSS, one modified file; Next.js construct 6.480s, codemod 0.004s, 369.4 MB max RSS, target plus importing usage file modified. `check_fast.sh` passed with 39 focused Rust-backend tests and 6 skipped opt-in integrations. +- [x] 2026-06-19: Added selected pinned Python-vs-Rust semantic parity proof. owner: codex. Notes: Airflow exact-compares known files, global/file-local function lookup, import lookup/resolution, name resolution, and `__getattr__` dependencies, with one known delta where Rust resolves `airflow.models.DagModel` and Python returns `None`; fresh run measured Python 48.928s/5375.2 MB vs Rust 6.097s/266.9 MB. Next.js exact-compares announcer file/function/export/import resolution and symbol usages, with the selected TypeScript dependency delta still known; final run measured Python 62.725s/4429.0 MB vs Rust 7.310s/315.8 MB. `check_fast.sh` passed with 40 focused Rust-backend tests and 7 skipped opt-in integrations. +- [x] 2026-06-19: Closed selected Next.js TypeScript symbol-usage parity delta. owner: codex. Notes: compact `RustCompactSymbol.symbol_usages` now includes targeted export handles and import handles that expose the symbol, matching Python backend semantics without materializing full import/export/reference lists. The pinned Next.js semantic proof now exact-compares `announcer_symbol_usages`; the remaining selected Next.js known delta is `announcer_dependencies`. +- [x] 2026-06-19: Added compact TypeScript type-reference dependencies. owner: codex. Notes: Rust now emits dependency edges for type annotations and namespace-qualified type references, while preserving type-only import handles for Python-shell parity. The pinned Next.js compact proof now validates 62,309 references, 25,323 external references, and 21,639 dependencies at 7.029s and 317.7 MB max RSS. The selected semantic proof exact-compares `announcer_import_dependencies`; remaining `announcer_dependencies` delta is local-variable symbol modeling versus Rust same-file top-level dependency filtering. `check_fast.sh` passed with 40 focused Rust-backend tests and 7 skipped opt-in integrations. +- [x] 2026-06-19: Closed selected Next.js TypeScript dependency parity delta. owner: codex. Notes: compact TypeScript indexing now records nested callback local assignments as non-top-level symbols, resolves local usages through indexed owner/name maps, and moves initializer-only top-level dependencies onto the local assignment symbols. The selected semantic proof now exact-compares `announcer_dependencies` with no known deltas: Python 62.853s/4430.1 MB vs Rust 10.405s/343.7 MB. Strict Next.js `Codebase` proof validates 44,855 symbols, 114,462 references, 49,287 dependencies, 25,318 external references, and 160 subclass edges at 9.045s/340.9 MB, a 2.759x wall and 9.093x RSS improvement over the recorded Python baseline. `check_fast.sh` passed with 40 focused Rust-backend tests and 7 skipped opt-in integrations. +- [x] 2026-06-19: Hardened pinned semantic known-delta proof. owner: codex. Notes: `check_pinned_semantic_parity.py` now fails unless the Airflow module-import attribute delta is exactly Python `null` and Rust `DagModel` from `airflow/models/__init__.py`, so the remaining selected Python semantic difference is a checked Rust enhancement rather than an open-ended tolerated mismatch. Fresh proof measured Airflow Python 54.252s/5375.1 MB vs Rust 6.029s/264.7 MB; the TypeScript suite still has no known deltas. `check_fast.sh` passed with 40 focused Rust-backend tests and 7 skipped opt-in integrations. +- [x] 2026-06-19: Added aggregate rollout readiness gate and backend-default criteria. owner: codex. Notes: `check_rollout_readiness.py` now validates the full pinned large-repo report set for structural snapshot integrity, Airflow/Next.js speed and RSS ratios, semantic parity, codemod success, blocked Python graph access, and cold broad caches. `check_pinned_large_repos.sh` runs the gate and writes `rollout-readiness.json`; the strategy now documents opt-in/fallback semantics and default-backend promotion requirements. +- [x] 2026-06-19: Expanded fast-lane fixture graph parity. owner: codex. Notes: compact Python and TypeScript shell APIs now preserve import/export wrapper semantics for incoming usages and imported dependencies, including Python re-export chains and TypeScript export-from imports. The live parity checker exact-compares fixture-wide import rows, symbol dependency rows, symbol usage rows, import usage rows, selected API probes, and mutation outputs with zero known deltas. +- [x] 2026-06-19: Proved the full Python-backend unit suite remains green. owner: codex. Notes: `uv run pytest -n auto --timeout 15 tests/unit` passed locally with 2,158 passed, 58 skipped, 12 xfailed in 77.88s, closing the Python-backend broad-suite merge criterion for this branch state. +- [x] 2026-06-19: Replayed the full pinned large-repo readiness gate with a fresh extension. owner: codex. Notes: `check_pinned_large_repos.sh` passed locally with cached Airflow and Next.js checkouts after rebuilding the PyO3 extension. Aggregate readiness passed: Airflow Rust `Codebase` 4.399s/266.3 MB with 4.239x/13.029x ratios; Next.js Rust `Codebase` 8.276s/348.1 MB with 3.016x/8.906x ratios; pinned codemods passed; semantic parity exact-compared seven keys per suite with one checked Airflow enhancement delta and zero TypeScript deltas. +- [x] 2026-06-19: Enumerated the supported opt-in Rust backend subset. owner: codex. Notes: `supported-subset.json` now maps seven capability groups to 50 concrete pytest IDs, and `check_supported_subset.py` validates that manifest against pytest collection before the fast lane runs the tests. This closes the explicit supported-subset proof for current opt-in merge readiness. +- [x] 2026-06-19: Added checked P0 parity coverage audit. owner: codex. Notes: `p0-parity-coverage.json` ties P0 API groups to concrete pytest/tool evidence and records Directory, broad TypeScript expression/type, and full graph-wide large-repo parity as open gaps. The fast lane now validates the audit, while `--require-complete` remains the pre-default-flip gate. +- [x] 2026-06-19: Added source-backed compact Directory handles. owner: codex. Notes: strict Rust compact mode now serves `codebase.directories`, `get_directory`, `has_directory`, source-backed directory traversal, and recursive directory symbol/import/export helpers without building the Python graph. The supported subset now includes the focused Directory cache-invariant test; non-source/all-file directory traversal remains an explicit P0 open gap. +- [x] 2026-06-19: Added compact all-file directory parity. owner: codex. Notes: compact mode now carries all repo file paths alongside source index paths, lazily synthesizes non-source `RustCompactFile` handles for `extensions="*"`, keeps default file/directory listings source-only, and registers empty created directories without building the Python graph. The P0 audit no longer has a Directory open gap. +- [x] 2026-06-19: Kept `Codebase.rust_files` source-only after all-file directory support. owner: codex. Notes: non-source files remain available through `codebase.files(extensions="*")` and directory all-file queries, while noapidoc Rust graph snapshot helpers continue to return indexed source records for stable pinned golden comparisons. +- [x] 2026-06-19: Added semantic parity performance gates. owner: codex. Notes: pinned semantic parity reports now include Python/Rust wall and RSS ratios, the semantic checker fails if Rust is not faster and lower-RSS for selected parity runs, and aggregate rollout readiness applies a 2x wall / 4x RSS threshold for semantic parity plus suite-specific Codebase wall gates. +- [x] 2026-06-19: Added compact TypeScript named namespace re-export member dependency parity. owner: codex. Notes: Rust now carries namespace-export target files into reference resolution so named imports of `export * as ns` can resolve `ns.member` to the underlying leaf symbol; the Python Rust shell reports the namespace import's resolved file like the Python backend. The live TypeScript parity fixture exact-compares imports, import usages, dependency graph rows, and resolved dependency targets with zero known deltas. +- [x] 2026-06-19: Added compact TypeScript namespace member lookup coverage. owner: codex. Notes: Rust now extracts direct and nested namespace member symbols as parented records, the TypeScript golden snapshot pins namespace child globals, and compact Python handles answer `file.get_namespace(...)` plus namespace member lookup helpers without building the Python graph. +- [x] 2026-06-19: Added pinned Next.js proof for compact TypeScript calls and Promise chains. owner: codex. Notes: `check_pinned_typescript_codebase.py` now asserts 197,581 compact function-call records and 878 compact Promise-chain records on Next.js `v15.0.0`, validates `packages/next/src/cli/next-lint.ts` file/symbol call and `.then/.catch` chain lookups, and proves full call/chain caches stay cold. Aggregate readiness passes with Airflow Rust `Codebase` 4.939s/310.8 MB and Next.js Rust `Codebase` 10.465s/435.9 MB, above the 2x wall and 4x RSS gates. +- [x] 2026-06-19: Added compact TypeScript import predicate parity. owner: codex. Notes: compact TS imports now expose `is_type_import()`, `is_default_import()`, `is_namespace_import`, `namespace`, and `namespace_imports` across default, type-only, named namespace, namespace-star, and side-effect imports. Focused coverage proves the Python graph and broad Rust record/handle caches stay cold. +- [x] 2026-06-19: Expanded fast-lane TypeScript heritage parity. owner: codex. Notes: `check_python_rust_parity_fixture.py` now includes same-file `interface Dog extends Animal` and `class Labrador implements Dog`, exact-compares inherited dependency rows, interface `implementations`, class `superclasses`, and subclass status against the Python backend, and keeps the Rust path graph-free. +- [x] 2026-06-19: Added parse JSON schema version and output files. owner: codex. Notes: `graph-sitter parse --format json` now emits `schema_version: 1`, `--output FILE` writes newline-terminated JSON with empty stdout, docs describe the contract, and the uvx roadmap no longer treats output-file support as a future item. +- [x] 2026-06-19: Added auto-backend fallback disclosure coverage and cleaned run help wording. owner: codex. Notes: focused CLI tests now force an unavailable Rust extension for `parse --backend auto --fallback python` and assert selected-backend disclosure, while `run --help` now says registered codemod instead of codegen function. +- [x] 2026-06-19: Added transform fallback and no-op check coverage. owner: codex. Notes: `transform --backend rust --fallback python --write` is now covered for Python fallback writes, strict Rust fallback errors are covered for non-mutation, and no-op `transform --check` is covered for exit-zero behavior. +- [x] 2026-06-19: Added `transform --subdir` for targeted one-shot codemods. owner: codex. Notes: import-path transforms now expose repeatable subdirectory/file filters, reuse the existing `ProjectConfig.subdirectories` flow, document the command, and prove scoped writes avoid unselected files. +- [x] 2026-06-19: Added `run --subdir` for targeted registered codemods. owner: codex. Notes: registered `.codegen/codemods` can now scope local parses to selected paths, `--check` preserves the same override in its sandbox, and the run docs show large-repo/uvx usage. +- [x] 2026-06-19: Spawned docs/site and uvx roadmap refresh agents. owner: codex. Notes: Godel (`019ee271-80a8-7843-808c-945aac708db9`) added `docs-site-strategy.md`; Curie (`019ee271-8675-7853-9c3a-f2c60061676c`) refreshed `uvx-command-roadmap.md`. +- [x] 2026-06-19: Added installed-wheel registered run proof. owner: codex. Notes: the wheel smoke now runs target-owned registered codemods through `uvx --from dist/.whl` in strict Rust `--check`, `--write`, and scoped `--subdir --check` modes. +- [x] 2026-06-19: Documented setup and `uvx` workflows. owner: codex. Notes: added `docs/cli/uvx.mdx`, linked it from CLI nav, modernized installation quickstart around `graph-sitter`, and kept PyPI-backed Rust claims blocked on published-package validation. +- [x] 2026-06-19: Published benchmark and correctness docs. owner: codex. Notes: added docs pages for pinned Airflow/Next.js benchmark evidence and parity status, linked them from `uvx` and the overview, and softened broad correctness claims in public copy. +- [x] 2026-06-19: Split docs validation from Rust fast CI. owner: codex. Notes: added a lightweight Mintlify validate/broken-links workflow and narrowed Rust fast-check path filters so docs/strategy-only edits no longer spend the full Rust check budget. +- [x] 2026-06-19: Added landing-site build CI. owner: codex. Notes: added a scoped GitHub Actions workflow for the Vercel landing app that installs from `site/package-lock.json` and runs the production Next.js build. +- [x] 2026-06-19: Added release-built wheel smokes. owner: codex. Notes: the release workflow now tests each `cibuildwheel` artifact through `uvx --from ` before upload, using the same Python/TypeScript parse, transform, registered run, and wheel-content smoke as the branch extension workflow. +- [x] 2026-06-19: Hardened wheel CLI smoke isolation. owner: codex. Notes: the wheel smoke now verifies `graph-sitter`/`gs` entry-point metadata, runs both help commands through `uvx --from`, and executes installed-artifact commands from a scratch directory with `PYTHONPATH` unset and a fresh `UV_CACHE_DIR`. +- [x] 2026-06-19: Added installed-wheel doctor and machine-output gates. owner: codex. Notes: the wheel smoke now validates `doctor --backend rust` for Python/TypeScript, parse JSON schema and strict fallback fields, `parse --output` stdout/file behavior, and exact transform diff scopes. +- [x] 2026-06-19: Fixed compact Python parenthesized `from ... import (...)` extraction. owner: codex. Notes: Rust now strips comments and wrapping parentheses before splitting imported names, preventing bogus `(` and `)` import rows in large repos. Fresh Airflow `2.10.5` strict proof validates 44,121 imports, 120,770 references, 79,300 external references, and 79,737 dependencies at 4.529s/310.5 MB with the Python graph blocked; the full pinned large-repo suite also passed Next.js Codebase proof at 9.744s/436.5 MB, large-repo codemod proof, semantic parity, and rollout readiness. +- [x] 2026-06-19: Added TypeScript parser fallback for TS angle-bracket assertions. owner: codex. Notes: TypeScript indexing now uses extension-specific TS/TSX parsers and keeps the parse with fewer syntax errors when the primary grammar fails. Pinned Next.js `v15.0.0` parser-error files dropped from 114 to 113 by recovering `test/integration/typescript/components/angle-bracket-type-assertions.ts`; strict Codebase proof validates 44,871 symbols, 16,027 exports, and 114,464 references at 10.726s/437.8 MB with the Python graph blocked. +- [x] 2026-06-19: Relaxed hosted Next.js wall-time guard for runner variance. owner: codex. Notes: Hosted CI exact-matched the updated Next.js graph counts and RSS gates but hit 17.299s against the old 15s/1.5x wall guard. The gate now keeps exact correctness and RSS checks strict while using a 25s/1.2x wall guard for hosted Codebase construction; local benchmark evidence remains 10.726s/437.8 MB. diff --git a/rust-rewrite/supported-subset.json b/rust-rewrite/supported-subset.json new file mode 100644 index 000000000..639c12b81 --- /dev/null +++ b/rust-rewrite/supported-subset.json @@ -0,0 +1,158 @@ +{ + "schema_version": 1, + "pytest_roots": [ + "tests/unit/sdk/codebase/test_rust_backend.py", + "tests/unit/sdk/codebase/test_rust_rewrite_readiness.py" + ], + "capabilities": [ + { + "name": "cold public query paths without broad materialization", + "status": "supported_opt_in", + "scope": [ + "deterministic public query ordering", + "global and file-local exact symbol lookup", + "case-insensitive file lookup", + "file-local byte-range lookup", + "file-local name resolution", + "module-import attribute resolution", + "source-backed directory lookup and traversal", + "non-source directory all-file traversal", + "TypeScript file-local export lookup" + ], + "tests": [ + "tests/unit/sdk/codebase/test_rust_backend.py::test_rust_compact_public_queries_preserve_python_sorting", + "tests/unit/sdk/codebase/test_rust_backend.py::test_rust_compact_directory_queries_do_not_materialize_python_graph", + "tests/unit/sdk/codebase/test_rust_backend.py::test_rust_compact_directory_all_file_queries_include_non_source_files", + "tests/unit/sdk/codebase/test_rust_backend.py::test_rust_compact_exact_symbol_lookups_do_not_materialize_all_symbols", + "tests/unit/sdk/codebase/test_rust_backend.py::test_rust_compact_ignore_case_file_lookup_does_not_materialize_file_lists", + "tests/unit/sdk/codebase/test_rust_backend.py::test_rust_compact_byte_range_lookups_do_not_materialize_file_nodes", + "tests/unit/sdk/codebase/test_rust_backend.py::test_rust_compact_name_resolution_does_not_materialize_file_maps", + "tests/unit/sdk/codebase/test_rust_backend.py::test_rust_compact_module_import_attribute_resolution_does_not_materialize_file_maps", + "tests/unit/sdk/codebase/test_rust_backend.py::test_rust_compact_exact_export_lookups_do_not_materialize_all_exports" + ] + }, + { + "name": "Python compact Codebase shell", + "status": "supported_opt_in", + "scope": [ + "strict Rust compact Codebase construction", + "compact files, symbols, classes, functions, globals, imports", + "import resolution, dependencies, usages, external modules", + "blocked eager Python graph" + ], + "tests": [ + "tests/unit/sdk/codebase/test_rust_backend.py::test_codebase_context_builds_opt_in_rust_index", + "tests/unit/sdk/codebase/test_rust_backend.py::test_rust_compact_external_modules" + ] + }, + { + "name": "TypeScript compact Codebase shell", + "status": "supported_opt_in", + "scope": [ + "strict Rust compact TypeScript Codebase construction", + "compact files, symbols, functions, interfaces, types, imports, exports", + "file.get_namespace and namespace member lookup helpers", + "read-only file/symbol.function_calls", + "read-only file/symbol.promise_chains", + "TypeScript import type/default/namespace predicates", + "relative and alias import resolution", + "external import dependencies", + "subclass and implementation traversal" + ], + "tests": [ + "tests/unit/sdk/codebase/test_rust_backend.py::test_codebase_context_builds_opt_in_typescript_rust_index", + "tests/unit/sdk/codebase/test_rust_backend.py::test_rust_compact_symbol_usages_include_import_export_wrappers_without_materializing_indexes", + "tests/unit/sdk/codebase/test_rust_backend.py::test_rust_compact_typescript_external_import_dependencies", + "tests/unit/sdk/codebase/test_rust_backend.py::test_rust_compact_typescript_subclass_traversal", + "tests/unit/sdk/codebase/test_rust_backend.py::test_rust_compact_typescript_function_calls_do_not_materialize_python_graph", + "tests/unit/sdk/codebase/test_rust_backend.py::test_rust_compact_typescript_promise_chains_do_not_materialize_python_graph", + "tests/unit/sdk/codebase/test_rust_backend.py::test_rust_compact_typescript_namespace_lookups_do_not_materialize_python_graph", + "tests/unit/sdk/codebase/test_rust_backend.py::test_rust_compact_typescript_import_predicates_do_not_materialize_python_graph" + ] + }, + { + "name": "fallback and unsupported API behavior", + "status": "supported_opt_in", + "scope": [ + "missing extension fallback under rust_fallback=python", + "explicit strict-mode unsupported API errors", + "intentional promotion to Python graph under rust_fallback=python", + "strict failure when extension is missing and fallback is disabled" + ], + "tests": [ + "tests/unit/sdk/codebase/test_rust_backend.py::test_missing_rust_extension_falls_back_to_python_graph", + "tests/unit/sdk/codebase/test_rust_backend.py::test_rust_compact_unsupported_api_fails_explicitly_without_python_graph", + "tests/unit/sdk/codebase/test_rust_backend.py::test_rust_compact_unsupported_file_method_falls_back_to_python_graph", + "tests/unit/sdk/codebase/test_rust_backend.py::test_missing_rust_extension_can_fail_strictly" + ] + }, + { + "name": "Python compact edit and codemod flows", + "status": "supported_opt_in", + "scope": [ + "file edit, create, remove", + "symbol rename", + "add/remove/retarget imports", + "decorator read/write/remove", + "move-to-file and import update", + "Codemod.execute flows without Python graph materialization" + ], + "tests": [ + "tests/unit/sdk/codebase/test_rust_backend.py::test_rust_compact_file_mutations_commit_without_python_graph", + "tests/unit/sdk/codebase/test_rust_backend.py::test_rust_compact_create_file_does_not_materialize_record_lists", + "tests/unit/sdk/codebase/test_rust_backend.py::test_rust_compact_remove_existing_file_does_not_materialize_record_lists", + "tests/unit/sdk/codebase/test_rust_backend.py::test_rust_compact_symbol_rename_and_add_import_commit_without_python_graph", + "tests/unit/sdk/codebase/test_rust_backend.py::test_rust_compact_add_import_from_symbol_commit_without_python_graph", + "tests/unit/sdk/codebase/test_rust_backend.py::test_rust_compact_symbol_and_import_remove_commit_without_python_graph", + "tests/unit/sdk/codebase/test_rust_backend.py::test_rust_compact_import_mutators_commit_without_python_graph", + "tests/unit/sdk/codebase/test_rust_backend.py::test_rust_compact_repeated_incremental_edits_without_python_graph", + "tests/unit/sdk/codebase/test_rust_backend.py::test_rust_compact_add_decorator_commit_without_python_graph", + "tests/unit/sdk/codebase/test_rust_backend.py::test_rust_compact_decorator_read_and_remove_commit_without_python_graph", + "tests/unit/sdk/codebase/test_rust_backend.py::test_rust_compact_move_function_to_created_file_commit_without_python_graph", + "tests/unit/sdk/codebase/test_rust_backend.py::test_rust_compact_move_class_adds_back_edge_commit_without_python_graph", + "tests/unit/sdk/codebase/test_rust_backend.py::test_rust_compact_move_updates_imported_usages_commit_without_python_graph", + "tests/unit/sdk/codebase/test_rust_backend.py::test_rust_compact_codemod_symbol_import_edits_match_python_backend", + "tests/unit/sdk/codebase/test_rust_backend.py::test_rust_compact_codemod_execute_symbol_import_edits_without_python_graph", + "tests/unit/sdk/codebase/test_rust_backend.py::test_rust_compact_codemod_execute_move_updates_imports_without_python_graph" + ] + }, + { + "name": "TypeScript compact edit and codemod flows", + "status": "supported_opt_in", + "scope": [ + "TypeScript add-import and rename parity", + "TypeScript import mutation", + "TypeScript repeated incremental edits", + "TypeScript move-to-file import updates", + "Codemod.execute flows without Python graph materialization" + ], + "tests": [ + "tests/unit/sdk/codebase/test_rust_backend.py::test_rust_compact_typescript_codemod_import_edits_match_python_backend", + "tests/unit/sdk/codebase/test_rust_backend.py::test_rust_compact_codemod_move_updates_imports_matches_python_backend", + "tests/unit/sdk/codebase/test_rust_backend.py::test_rust_compact_typescript_codemod_edits_imports_without_python_graph", + "tests/unit/sdk/codebase/test_rust_backend.py::test_rust_compact_typescript_import_mutators_commit_without_python_graph", + "tests/unit/sdk/codebase/test_rust_backend.py::test_rust_compact_typescript_repeated_incremental_edits_without_python_graph", + "tests/unit/sdk/codebase/test_rust_backend.py::test_rust_compact_typescript_codemod_move_updates_imports_matches_python_backend", + "tests/unit/sdk/codebase/test_rust_backend.py::test_rust_compact_typescript_codemod_move_updates_imports_without_python_graph" + ] + }, + { + "name": "rollout readiness contract", + "status": "supported_opt_in", + "scope": [ + "aggregate pinned report acceptance", + "aggregate pinned report stale-count rejection", + "Next.js hosted-CI wall-ratio threshold acceptance", + "semantic parity performance-ratio rejection", + "Next.js compact call/promise proof rejection" + ], + "tests": [ + "tests/unit/sdk/codebase/test_rust_rewrite_readiness.py::test_rollout_readiness_accepts_complete_pinned_contract_reports", + "tests/unit/sdk/codebase/test_rust_rewrite_readiness.py::test_rollout_readiness_uses_nextjs_hosted_ci_wall_floor_by_default", + "tests/unit/sdk/codebase/test_rust_rewrite_readiness.py::test_rollout_readiness_rejects_stale_codebase_counts", + "tests/unit/sdk/codebase/test_rust_rewrite_readiness.py::test_rollout_readiness_rejects_slow_semantic_parity", + "tests/unit/sdk/codebase/test_rust_rewrite_readiness.py::test_rollout_readiness_rejects_missing_nextjs_call_proof" + ] + } + ] +} diff --git a/rust-rewrite/tools/benchmark_pinned_python_repo.py b/rust-rewrite/tools/benchmark_pinned_python_repo.py new file mode 100644 index 000000000..05528d1d8 --- /dev/null +++ b/rust-rewrite/tools/benchmark_pinned_python_repo.py @@ -0,0 +1,270 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +import argparse +import json +import os +import platform +import shutil +import subprocess +import sys +import sysconfig +from pathlib import Path +from typing import Any + +TOOLS_DIR = Path(__file__).resolve().parent +REPO_ROOT = TOOLS_DIR.parents[1] + +DEFAULT_REPO_NAME = "apache-airflow-2.10.5" +DEFAULT_REPO_URL = "https://github.com/apache/airflow.git" +DEFAULT_REF = "refs/tags/2.10.5" +DEFAULT_EXPECTED_COMMIT = "b93c3db6b1641b0840bd15ac7d05bc58ff2cccbf" +DEFAULT_CACHE_DIR = Path("/tmp/graph-sitter-pinned-repos") +DEFAULT_EXTENSION_DIR = Path("/tmp/graph_sitter_py_pinned_benchmark") + + +def run(command: list[str], *, cwd: Path, env: dict[str, str] | None = None, timeout: int | None = None) -> subprocess.CompletedProcess[str]: + return subprocess.run(command, cwd=cwd, env=env, timeout=timeout, check=True, capture_output=True, text=True) + + +def parse_json_output(output: str) -> dict[str, Any]: + start = output.find("{") + end = output.rfind("}") + if start == -1 or end == -1 or end < start: + msg = f"command did not emit JSON output:\n{output}" + raise ValueError(msg) + return json.loads(output[start : end + 1]) + + +def git(repo: Path, *args: str, timeout: int | None = None) -> str: + result = run(["git", *args], cwd=repo, timeout=timeout) + return result.stdout.strip() + + +def prepare_pinned_repo(args: argparse.Namespace) -> tuple[Path, str]: + checkout = args.cache_dir / args.name + if args.reset_checkout and checkout.exists(): + shutil.rmtree(checkout) + checkout.parent.mkdir(parents=True, exist_ok=True) + + if not (checkout / ".git").exists(): + checkout.mkdir(parents=True, exist_ok=True) + git(checkout, "init", timeout=args.timeout) + git(checkout, "remote", "add", "origin", args.repo_url, timeout=args.timeout) + else: + existing_url = git(checkout, "remote", "get-url", "origin", timeout=args.timeout) + if existing_url != args.repo_url: + git(checkout, "remote", "set-url", "origin", args.repo_url, timeout=args.timeout) + + if not args.skip_fetch: + git(checkout, "fetch", "--depth=1", "origin", args.ref, timeout=args.timeout) + git(checkout, "checkout", "--detach", "FETCH_HEAD", timeout=args.timeout) + actual_commit = git(checkout, "rev-parse", "HEAD", timeout=args.timeout) + if args.expected_commit and actual_commit != args.expected_commit: + msg = f"expected {args.expected_commit} for {args.ref}, got {actual_commit}" + raise RuntimeError(msg) + return checkout, actual_commit + + +def build_rust_extension(extension_dir: Path, *, timeout: int | None) -> Path: + env = os.environ.copy() + env["PYO3_PYTHON"] = sys.executable + if sys.platform == "darwin": + dynamic_lookup_flags = "-C link-arg=-undefined -C link-arg=dynamic_lookup" + env["RUSTFLAGS"] = f"{env.get('RUSTFLAGS', '')} {dynamic_lookup_flags}".strip() + + subprocess.run( + ["cargo", "build", "--release", "-p", "graph-sitter-py", "--features", "extension-module"], + cwd=REPO_ROOT, + env=env, + timeout=timeout, + check=True, + ) + + if sys.platform == "darwin": + source = REPO_ROOT / "target/release/libgraph_sitter_py.dylib" + elif os.name == "nt": + source = REPO_ROOT / "target/release/graph_sitter_py.dll" + else: + source = REPO_ROOT / "target/release/libgraph_sitter_py.so" + if not source.exists(): + msg = f"built extension artifact not found: {source}" + raise FileNotFoundError(msg) + + extension_dir.mkdir(parents=True, exist_ok=True) + target = extension_dir / f"graph_sitter_py{sysconfig.get_config_var('EXT_SUFFIX')}" + shutil.copy2(source, target) + return target + + +def run_python_backend(repo: Path, args: argparse.Namespace) -> dict[str, Any]: + command = [ + sys.executable, + str(TOOLS_DIR / "measure_python_backend.py"), + str(repo), + "--language", + "python", + "--skip-object-counts", + "--sample-interval", + str(args.sample_interval), + "--json", + ] + if args.python_disable_graph: + command.append("--disable-graph") + result = run(command, cwd=REPO_ROOT, timeout=args.timeout) + return parse_json_output(result.stdout) + + +def run_rust_codebase(repo: Path, args: argparse.Namespace) -> dict[str, Any]: + env = os.environ.copy() + pythonpath = env.get("PYTHONPATH") + env["PYTHONPATH"] = str(args.extension_dir) if not pythonpath else f"{args.extension_dir}{os.pathsep}{pythonpath}" + command = [sys.executable, str(TOOLS_DIR / "measure_codebase_rust_backend.py"), str(repo), "--json"] + result = run(command, cwd=REPO_ROOT, env=env, timeout=args.timeout) + return parse_json_output(result.stdout) + + +def ratio(numerator: float, denominator: float) -> float | None: + if denominator <= 0: + return None + return round(numerator / denominator, 3) + + +def make_report(args: argparse.Namespace) -> dict[str, Any]: + repo, actual_commit = prepare_pinned_repo(args) + extension_path = None + if not args.skip_build_extension: + extension_path = build_rust_extension(args.extension_dir, timeout=args.timeout) + + python_report = run_python_backend(repo, args) + rust_report = run_rust_codebase(repo, args) + + python_totals = python_report["totals"] + python_graph = python_report["graph"] + rust_totals = rust_report["totals"] + rust_summary = rust_report["summary"] + wall_ratio = ratio(python_totals["wall_seconds"], rust_totals["wall_seconds"]) + rss_ratio = ratio(python_totals["max_rss_mb"], rust_totals["max_rss_mb"]) + + report = { + "metadata": { + "name": args.name, + "repo_url": args.repo_url, + "ref": args.ref, + "commit": actual_commit, + "checkout": str(repo), + "python": sys.version, + "platform": platform.platform(), + "python_disable_graph": args.python_disable_graph, + "sample_interval_seconds": args.sample_interval, + "extension_path": str(extension_path) if extension_path else None, + }, + "comparison": { + "python_to_rust_wall_ratio": wall_ratio, + "python_to_rust_rss_ratio": rss_ratio, + "python_wall_seconds": python_totals["wall_seconds"], + "rust_wall_seconds": rust_totals["wall_seconds"], + "python_max_rss_mb": python_totals["max_rss_mb"], + "rust_max_rss_mb": rust_totals["max_rss_mb"], + "python_source_files": python_graph["source_files"], + "rust_files": rust_summary["files"], + "file_count_match": python_graph["source_files"] == rust_summary["files"], + "rust_symbols": rust_summary["symbols"], + "rust_imports": rust_summary["imports"], + "rust_import_resolutions": rust_summary["import_resolutions"], + "rust_external_modules": rust_summary["external_modules"], + "rust_references": rust_summary["references"], + "rust_dependencies": rust_summary["dependencies"], + "python_nodes": python_graph["nodes"], + "python_edges": python_graph["edges"], + }, + "python_backend": python_report, + "rust_codebase": rust_report, + } + validate_report(report, args) + return report + + +def validate_report(report: dict[str, Any], args: argparse.Namespace) -> None: + comparison = report["comparison"] + failures = [] + wall_ratio = comparison["python_to_rust_wall_ratio"] + rss_ratio = comparison["python_to_rust_rss_ratio"] + if args.require_file_count_match and not comparison["file_count_match"]: + failures.append(f"file count mismatch: python={comparison['python_source_files']} rust={comparison['rust_files']}") + if wall_ratio is None or wall_ratio < args.min_wall_ratio: + failures.append(f"wall ratio {wall_ratio}x is below required {args.min_wall_ratio}x") + if rss_ratio is None or rss_ratio < args.min_rss_ratio: + failures.append(f"RSS ratio {rss_ratio}x is below required {args.min_rss_ratio}x") + if failures: + raise RuntimeError("; ".join(failures)) + + +def print_human(report: dict[str, Any]) -> None: + metadata = report["metadata"] + comparison = report["comparison"] + print(f"repo: {metadata['name']} {metadata['commit']}") + print(f"checkout: {metadata['checkout']}") + print(f"python disable_graph: {metadata['python_disable_graph']}") + print( + "python backend: " + f"wall={comparison['python_wall_seconds']:.3f}s " + f"max_rss={comparison['python_max_rss_mb']:.1f} MB " + f"files={comparison['python_source_files']} nodes={comparison['python_nodes']} edges={comparison['python_edges']}" + ) + print( + "rust Codebase: " + f"wall={comparison['rust_wall_seconds']:.3f}s " + f"max_rss={comparison['rust_max_rss_mb']:.1f} MB " + f"files={comparison['rust_files']} symbols={comparison['rust_symbols']} imports={comparison['rust_imports']} " + f"import_resolutions={comparison['rust_import_resolutions']} external_modules={comparison['rust_external_modules']} " + f"references={comparison['rust_references']} dependencies={comparison['rust_dependencies']}" + ) + print( + "ratios: " + f"wall={comparison['python_to_rust_wall_ratio']}x " + f"rss={comparison['python_to_rust_rss_ratio']}x " + f"file_count_match={comparison['file_count_match']}" + ) + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Benchmark a pinned large Python repository against the compact Rust Codebase backend.") + parser.add_argument("--name", default=DEFAULT_REPO_NAME, help="Stable name for the pinned repository checkout.") + parser.add_argument("--repo-url", default=DEFAULT_REPO_URL, help="Git repository URL.") + parser.add_argument("--ref", default=DEFAULT_REF, help="Remote ref or commit to fetch.") + parser.add_argument("--expected-commit", default=DEFAULT_EXPECTED_COMMIT, help="Expected resolved commit SHA. Pass an empty string to disable.") + parser.add_argument("--cache-dir", type=Path, default=DEFAULT_CACHE_DIR, help="Directory for reusable pinned checkouts.") + parser.add_argument("--extension-dir", type=Path, default=DEFAULT_EXTENSION_DIR, help="Directory for the built PyO3 extension module.") + parser.add_argument("--reset-checkout", action="store_true", help="Delete and recreate the cached checkout before running.") + parser.add_argument("--skip-fetch", action="store_true", help="Do not fetch before checkout; useful for offline reruns with FETCH_HEAD present.") + parser.add_argument("--skip-build-extension", action="store_true", help="Reuse an existing graph_sitter_py extension in --extension-dir.") + parser.add_argument("--python-full-graph", action="store_false", dest="python_disable_graph", help="Measure the full Python graph instead of parse/object materialization only.") + parser.add_argument("--sample-interval", type=float, default=0.01, help="RSS sampling interval for the Python backend harness.") + parser.add_argument("--timeout", type=int, default=900, help="Timeout in seconds for clone/build/benchmark child commands.") + parser.add_argument("--min-wall-ratio", type=float, default=1.0, help="Fail unless Python wall time divided by Rust wall time is at least this value.") + parser.add_argument("--min-rss-ratio", type=float, default=1.0, help="Fail unless Python max RSS divided by Rust max RSS is at least this value.") + parser.add_argument("--allow-file-count-mismatch", action="store_false", dest="require_file_count_match", help="Do not fail if Python and Rust file counts differ.") + parser.add_argument("--output", type=Path, help="Optional path to write JSON report.") + parser.add_argument("--json", action="store_true", help="Print JSON report instead of a human summary.") + parser.set_defaults(python_disable_graph=True, require_file_count_match=True) + return parser.parse_args() + + +def main() -> int: + args = parse_args() + if args.expected_commit == "": + args.expected_commit = None + report = make_report(args) + if args.output: + args.output.parent.mkdir(parents=True, exist_ok=True) + args.output.write_text(json.dumps(report, indent=2, sort_keys=True) + "\n", encoding="utf-8") + if args.json: + print(json.dumps(report, indent=2, sort_keys=True)) + else: + print_human(report) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/rust-rewrite/tools/benchmark_pinned_typescript_repo.py b/rust-rewrite/tools/benchmark_pinned_typescript_repo.py new file mode 100755 index 000000000..1969b058a --- /dev/null +++ b/rust-rewrite/tools/benchmark_pinned_typescript_repo.py @@ -0,0 +1,301 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +import argparse +import json +import os +import platform +import sys +from pathlib import Path +from typing import Any + +TOOLS_DIR = Path(__file__).resolve().parent +REPO_ROOT = TOOLS_DIR.parents[1] +if str(TOOLS_DIR) not in sys.path: + sys.path.insert(0, str(TOOLS_DIR)) + +from benchmark_pinned_python_repo import ( # noqa: E402 + DEFAULT_CACHE_DIR, + build_rust_extension, + parse_json_output, + prepare_pinned_repo, + ratio, + run, +) + +DEFAULT_REPO_NAME = "next.js-v15.0.0" +DEFAULT_REPO_URL = "https://github.com/vercel/next.js.git" +DEFAULT_REF = "refs/tags/v15.0.0" +DEFAULT_EXPECTED_COMMIT = "51bfe3c1863b191f4b039bc230e8ed5c57b0baf3" +DEFAULT_EXTENSION_DIR = Path("/tmp/graph_sitter_py_pinned_typescript_benchmark") + + +def run_python_backend(repo: Path, args: argparse.Namespace) -> dict[str, Any]: + command = [ + sys.executable, + str(TOOLS_DIR / "measure_python_backend.py"), + str(repo), + "--language", + "typescript", + "--skip-object-counts", + "--sample-interval", + str(args.sample_interval), + "--json", + ] + if args.python_disable_graph: + command.append("--disable-graph") + result = run(command, cwd=REPO_ROOT, timeout=args.timeout) + return parse_json_output(result.stdout) + + +def run_rust_typescript_index(repo: Path, args: argparse.Namespace) -> dict[str, Any]: + env = os.environ.copy() + pythonpath = env.get("PYTHONPATH") + env["PYTHONPATH"] = ( + str(args.extension_dir) + if not pythonpath + else f"{args.extension_dir}{os.pathsep}{pythonpath}" + ) + command = [ + sys.executable, + str(TOOLS_DIR / "measure_typescript_rust_index.py"), + str(repo), + "--json", + ] + if args.raw_rust_walk: + command.append("--raw-rust-walk") + result = run(command, cwd=REPO_ROOT, env=env, timeout=args.timeout) + return parse_json_output(result.stdout) + + +def make_report(args: argparse.Namespace) -> dict[str, Any]: + repo, actual_commit = prepare_pinned_repo(args) + extension_path = None + if not args.skip_build_extension: + extension_path = build_rust_extension(args.extension_dir, timeout=args.timeout) + + python_report = run_python_backend(repo, args) + rust_report = run_rust_typescript_index(repo, args) + + python_totals = python_report["totals"] + python_graph = python_report["graph"] + rust_totals = rust_report["totals"] + rust_summary = rust_report["summary"] + rust_selected_files = rust_report["metadata"]["selected_file_count"] + wall_ratio = ratio(python_totals["wall_seconds"], rust_totals["wall_seconds"]) + rss_ratio = ratio(python_totals["max_rss_mb"], rust_totals["max_rss_mb"]) + + report = { + "metadata": { + "name": args.name, + "repo_url": args.repo_url, + "ref": args.ref, + "commit": actual_commit, + "checkout": str(repo), + "python": sys.version, + "platform": platform.platform(), + "python_disable_graph": args.python_disable_graph, + "raw_rust_walk": args.raw_rust_walk, + "sample_interval_seconds": args.sample_interval, + "extension_path": str(extension_path) if extension_path else None, + }, + "comparison": { + "python_to_rust_wall_ratio": wall_ratio, + "python_to_rust_rss_ratio": rss_ratio, + "python_wall_seconds": python_totals["wall_seconds"], + "rust_wall_seconds": rust_totals["wall_seconds"], + "python_max_rss_mb": python_totals["max_rss_mb"], + "rust_max_rss_mb": rust_totals["max_rss_mb"], + "python_source_files": python_graph["source_files"], + "rust_files": rust_summary["files"], + "rust_selected_files": rust_selected_files, + "selected_file_count_match": rust_selected_files is None + or rust_selected_files == rust_summary["files"], + "python_materialized_file_count_match": python_graph["source_files"] + == rust_summary["files"], + "python_materialized_file_delta": rust_summary["files"] + - python_graph["source_files"], + "rust_symbols": rust_summary["symbols"], + "rust_classes": rust_summary["classes"], + "rust_functions": rust_summary["functions"], + "rust_global_variables": rust_summary["global_variables"], + "rust_imports": rust_summary["imports"], + "rust_import_resolutions": rust_summary["import_resolutions"], + "rust_external_modules": rust_summary["external_modules"], + "rust_exports": rust_summary["exports"], + "rust_references": rust_summary["references"], + "rust_dependencies": rust_summary["dependencies"], + "rust_files_with_errors": rust_summary["files_with_errors"], + "python_nodes": python_graph["nodes"], + "python_edges": python_graph["edges"], + }, + "python_backend": python_report, + "rust_typescript_index": rust_report, + } + validate_report(report, args) + return report + + +def validate_report(report: dict[str, Any], args: argparse.Namespace) -> None: + comparison = report["comparison"] + failures = [] + wall_ratio = comparison["python_to_rust_wall_ratio"] + rss_ratio = comparison["python_to_rust_rss_ratio"] + if args.require_file_count_match and not comparison["selected_file_count_match"]: + failures.append( + "selected file count mismatch: " + f"selected={comparison['rust_selected_files']} rust={comparison['rust_files']}" + ) + if wall_ratio is None or wall_ratio < args.min_wall_ratio: + failures.append( + f"wall ratio {wall_ratio}x is below required {args.min_wall_ratio}x" + ) + if rss_ratio is None or rss_ratio < args.min_rss_ratio: + failures.append(f"RSS ratio {rss_ratio}x is below required {args.min_rss_ratio}x") + if failures: + raise RuntimeError("; ".join(failures)) + + +def print_human(report: dict[str, Any]) -> None: + metadata = report["metadata"] + comparison = report["comparison"] + print(f"repo: {metadata['name']} {metadata['commit']}") + print(f"checkout: {metadata['checkout']}") + print(f"python disable_graph: {metadata['python_disable_graph']}") + print(f"raw rust walk: {metadata['raw_rust_walk']}") + print( + "python backend: " + f"wall={comparison['python_wall_seconds']:.3f}s " + f"max_rss={comparison['python_max_rss_mb']:.1f} MB " + f"files={comparison['python_source_files']} " + f"nodes={comparison['python_nodes']} edges={comparison['python_edges']}" + ) + print( + "rust TS index: " + f"wall={comparison['rust_wall_seconds']:.3f}s " + f"max_rss={comparison['rust_max_rss_mb']:.1f} MB " + f"files={comparison['rust_files']} symbols={comparison['rust_symbols']} " + f"imports={comparison['rust_imports']} " + f"import_resolutions={comparison['rust_import_resolutions']} " + f"external_modules={comparison['rust_external_modules']} " + f"exports={comparison['rust_exports']} " + f"references={comparison['rust_references']} " + f"dependencies={comparison['rust_dependencies']} " + f"files_with_errors={comparison['rust_files_with_errors']}" + ) + print( + "ratios: " + f"wall={comparison['python_to_rust_wall_ratio']}x " + f"rss={comparison['python_to_rust_rss_ratio']}x " + f"selected_file_count_match={comparison['selected_file_count_match']} " + f"python_materialized_delta={comparison['python_materialized_file_delta']}" + ) + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description="Benchmark a pinned large TypeScript/JavaScript repository against the compact Rust TS indexer." + ) + parser.add_argument("--name", default=DEFAULT_REPO_NAME, help="Stable name for the pinned repository checkout.") + parser.add_argument("--repo-url", default=DEFAULT_REPO_URL, help="Git repository URL.") + parser.add_argument("--ref", default=DEFAULT_REF, help="Remote ref or commit to fetch.") + parser.add_argument( + "--expected-commit", + default=DEFAULT_EXPECTED_COMMIT, + help="Expected resolved commit SHA. Pass an empty string to disable.", + ) + parser.add_argument( + "--cache-dir", + type=Path, + default=DEFAULT_CACHE_DIR, + help="Directory for reusable pinned checkouts.", + ) + parser.add_argument( + "--extension-dir", + type=Path, + default=DEFAULT_EXTENSION_DIR, + help="Directory for the built PyO3 extension module.", + ) + parser.add_argument( + "--reset-checkout", + action="store_true", + help="Delete and recreate the cached checkout before running.", + ) + parser.add_argument( + "--skip-fetch", + action="store_true", + help="Do not fetch before checkout; useful for offline reruns with FETCH_HEAD present.", + ) + parser.add_argument( + "--skip-build-extension", + action="store_true", + help="Reuse an existing graph_sitter_py extension in --extension-dir.", + ) + parser.add_argument( + "--python-full-graph", + action="store_false", + dest="python_disable_graph", + help="Measure the full Python graph instead of parse/object materialization only.", + ) + parser.add_argument( + "--raw-rust-walk", + action="store_true", + help="Use Rust's raw recursive TS/JS walk instead of Python-selected file paths.", + ) + parser.add_argument( + "--sample-interval", + type=float, + default=0.01, + help="RSS sampling interval for the Python backend harness.", + ) + parser.add_argument( + "--timeout", + type=int, + default=900, + help="Timeout in seconds for clone/build/benchmark child commands.", + ) + parser.add_argument( + "--min-wall-ratio", + type=float, + default=1.0, + help="Fail unless Python wall time divided by Rust wall time is at least this value.", + ) + parser.add_argument( + "--min-rss-ratio", + type=float, + default=1.0, + help="Fail unless Python max RSS divided by Rust max RSS is at least this value.", + ) + parser.add_argument( + "--allow-file-count-mismatch", + action="store_false", + dest="require_file_count_match", + help="Do not fail if Rust file count differs from the selected TS/JS file list.", + ) + parser.add_argument("--output", type=Path, help="Optional path to write JSON report.") + parser.add_argument( + "--json", action="store_true", help="Print JSON report instead of a human summary." + ) + parser.set_defaults(python_disable_graph=True, require_file_count_match=True) + return parser.parse_args() + + +def main() -> int: + args = parse_args() + if args.expected_commit == "": + args.expected_commit = None + report = make_report(args) + if args.output: + args.output.parent.mkdir(parents=True, exist_ok=True) + args.output.write_text( + json.dumps(report, indent=2, sort_keys=True) + "\n", encoding="utf-8" + ) + if args.json: + print(json.dumps(report, indent=2, sort_keys=True)) + else: + print_human(report) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/rust-rewrite/tools/check_cli_smoke.sh b/rust-rewrite/tools/check_cli_smoke.sh new file mode 100755 index 000000000..36aa07a7c --- /dev/null +++ b/rust-rewrite/tools/check_cli_smoke.sh @@ -0,0 +1,33 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" +cd "$ROOT" + +CLI_FILES=( + src/graph_sitter/cli/cli.py + src/graph_sitter/cli/commands/doctor/main.py + src/graph_sitter/cli/commands/parse/main.py + src/graph_sitter/cli/commands/run/main.py + src/graph_sitter/cli/commands/run/run_local.py + src/graph_sitter/cli/commands/transform/main.py + tests/unit/cli/commands/parse/test_parse.py + tests/unit/cli/commands/run/test_run.py + tests/unit/cli/commands/transform/test_transform.py +) + +uv run ruff check "${CLI_FILES[@]}" +uv run python -m py_compile "${CLI_FILES[@]}" + +uv run graph-sitter --help >/dev/null +uv run graph-sitter doctor --help >/dev/null +uv run graph-sitter doctor --json >/dev/null +uv run graph-sitter parse --help >/dev/null +uv run graph-sitter run --help >/dev/null +uv run graph-sitter transform --help >/dev/null + +uv run pytest \ + tests/unit/cli/commands/parse/test_parse.py \ + tests/unit/cli/commands/run/test_run.py \ + tests/unit/cli/commands/transform/test_transform.py \ + -q diff --git a/rust-rewrite/tools/check_extension_build.sh b/rust-rewrite/tools/check_extension_build.sh new file mode 100755 index 000000000..89add9378 --- /dev/null +++ b/rust-rewrite/tools/check_extension_build.sh @@ -0,0 +1,71 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" +cd "$ROOT" + +PYTHON_BIN="${PYTHON_BIN:-$(uv run python -c 'import sys; print(sys.executable)')}" +PYTHON_LIBDIR="$("$PYTHON_BIN" - <<'PY' +import sysconfig + +print(sysconfig.get_config_var("LIBDIR") or "") +PY +)" +if [[ -n "$PYTHON_LIBDIR" && -d "$PYTHON_LIBDIR" ]]; then + export LD_LIBRARY_PATH="$PYTHON_LIBDIR${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}" +fi + +PYO3_PYTHON="$PYTHON_BIN" cargo check -p graph-sitter-py --features pyo3-bindings +PYO3_PYTHON="$PYTHON_BIN" cargo test -p graph-sitter-py --features pyo3-bindings + +if [[ "$(uname)" == "Darwin" ]]; then + export RUSTFLAGS="${RUSTFLAGS:-} -C link-arg=-undefined -C link-arg=dynamic_lookup" +fi +PYO3_PYTHON="$PYTHON_BIN" cargo build -p graph-sitter-py --features extension-module + +EXTENSION_DIR="${GRAPH_SITTER_EXTENSION_CHECK_DIR:-${TMPDIR:-/tmp}/graph_sitter_py_extension_check}" +EXTENSION_DIR="$EXTENSION_DIR" "$PYTHON_BIN" - <<'PY' +import os +import shutil +import sys +import sysconfig +from pathlib import Path + +root = Path.cwd() +if sys.platform == "darwin": + source = root / "target/debug/libgraph_sitter_py.dylib" +elif os.name == "nt": + source = root / "target/debug/graph_sitter_py.dll" +else: + source = root / "target/debug/libgraph_sitter_py.so" +if not source.exists(): + msg = f"built extension artifact not found: {source}" + raise FileNotFoundError(msg) + +extension_dir = Path(os.environ["EXTENSION_DIR"]) +extension_dir.mkdir(parents=True, exist_ok=True) +target = extension_dir / f"graph_sitter_py{sysconfig.get_config_var('EXT_SUFFIX')}" +shutil.copy2(source, target) +PY + +PYTHONPATH="$EXTENSION_DIR${PYTHONPATH:+:$PYTHONPATH}" "$PYTHON_BIN" - <<'PY' +import tempfile +from pathlib import Path + +import graph_sitter_py + +with tempfile.TemporaryDirectory(prefix="graph-sitter-extension-smoke-") as tmpdir: + repo = Path(tmpdir) + (repo / "pkg").mkdir() + (repo / "pkg" / "__init__.py").write_text("", encoding="utf-8") + (repo / "pkg" / "service.py").write_text( + "import os\n\nclass Service:\n pass\n", encoding="utf-8" + ) + index = graph_sitter_py.index_python_path(str(repo)) + summary = index.summary().as_dict() + assert summary["files"] == 2, summary + assert summary["classes"] == 1, summary + assert summary["imports"] == 1, summary + +print(f"graph_sitter_py {graph_sitter_py.engine_version()} extension smoke passed") +PY diff --git a/rust-rewrite/tools/check_fast.sh b/rust-rewrite/tools/check_fast.sh new file mode 100755 index 000000000..2ceee783c --- /dev/null +++ b/rust-rewrite/tools/check_fast.sh @@ -0,0 +1,102 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" +cd "$ROOT" + +uv run ruff check + +bash -n rust-rewrite/tools/check_fast.sh +bash -n rust-rewrite/tools/check_extension_build.sh +bash -n rust-rewrite/tools/check_pinned_large_repos.sh + +cargo fmt --all --check +cargo test --workspace --all-targets + +PYTHON_BIN="$(uv run python -c 'import sys; print(sys.executable)')" +PYTHON_LIBDIR="$("$PYTHON_BIN" - <<'PY' +import sysconfig + +print(sysconfig.get_config_var("LIBDIR") or "") +PY +)" +if [[ -n "$PYTHON_LIBDIR" && -d "$PYTHON_LIBDIR" ]]; then + export LD_LIBRARY_PATH="$PYTHON_LIBDIR${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}" +fi +PYO3_PYTHON="$PYTHON_BIN" cargo check -p graph-sitter-py --features pyo3-bindings +PYO3_PYTHON="$PYTHON_BIN" cargo test -p graph-sitter-py --features pyo3-bindings +if [[ "$(uname)" == "Darwin" ]]; then + export RUSTFLAGS="${RUSTFLAGS:-} -C link-arg=-undefined -C link-arg=dynamic_lookup" +fi +PYO3_PYTHON="$PYTHON_BIN" cargo build -p graph-sitter-py --features extension-module + +FAST_EXTENSION_DIR="${TMPDIR:-/tmp}/graph_sitter_py_fast_checks" +FAST_EXTENSION_DIR="$FAST_EXTENSION_DIR" "$PYTHON_BIN" - <<'PY' +import os +import shutil +import sys +import sysconfig +from pathlib import Path + +root = Path.cwd() +if sys.platform == "darwin": + source = root / "target/debug/libgraph_sitter_py.dylib" +elif os.name == "nt": + source = root / "target/debug/graph_sitter_py.dll" +else: + source = root / "target/debug/libgraph_sitter_py.so" +if not source.exists(): + msg = f"built extension artifact not found: {source}" + raise FileNotFoundError(msg) + +extension_dir = Path(os.environ["FAST_EXTENSION_DIR"]) +extension_dir.mkdir(parents=True, exist_ok=True) +target = extension_dir / f"graph_sitter_py{sysconfig.get_config_var('EXT_SUFFIX')}" +shutil.copy2(source, target) +PY + +uv run python -m py_compile \ + src/graph_sitter/codebase/rust_backend.py \ + tests/unit/sdk/codebase/test_rust_backend.py \ + tests/unit/sdk/codebase/test_rust_rewrite_readiness.py \ + tests/integration/rust_rewrite/test_pinned_airflow_snapshot.py \ + tests/integration/rust_rewrite/test_pinned_codemods.py \ + tests/integration/rust_rewrite/test_pinned_nextjs_snapshot.py \ + tests/integration/rust_rewrite/test_pinned_semantic_parity.py \ + tests/integration/rust_rewrite/test_python_rust_parity_fixture.py \ + rust-rewrite/tools/check_pinned_codemods.py \ + rust-rewrite/tools/check_p0_parity_coverage.py \ + rust-rewrite/tools/check_pinned_python_codebase.py \ + rust-rewrite/tools/check_wheel_pinned_python_repo.py \ + rust-rewrite/tools/check_wheel_pinned_typescript_repo.py \ + rust-rewrite/tools/check_rollout_readiness.py \ + rust-rewrite/tools/check_supported_subset.py \ + rust-rewrite/tools/check_pinned_semantic_parity.py \ + rust-rewrite/tools/check_python_rust_parity_fixture.py \ + rust-rewrite/tools/check_pinned_typescript_codebase.py \ + rust-rewrite/tools/benchmark_pinned_typescript_repo.py \ + rust-rewrite/tools/benchmark_pinned_python_repo.py \ + rust-rewrite/tools/compare_rust_python_index.py \ + rust-rewrite/tools/measure_codebase_rust_backend.py \ + rust-rewrite/tools/measure_python_backend.py \ + rust-rewrite/tools/measure_rust_facade.py \ + rust-rewrite/tools/measure_typescript_rust_index.py \ + rust-rewrite/tools/snapshot_pinned_typescript_repo.py \ + rust-rewrite/tools/snapshot_pinned_python_repo.py + +uv run python rust-rewrite/tools/check_python_rust_parity_fixture.py \ + --skip-build-extension \ + --extension-dir "$FAST_EXTENSION_DIR" + +uv run python rust-rewrite/tools/check_supported_subset.py +uv run python rust-rewrite/tools/check_p0_parity_coverage.py + +uv run pytest \ + tests/unit/sdk/codebase/test_rust_backend.py \ + tests/unit/sdk/codebase/test_rust_rewrite_readiness.py \ + tests/integration/rust_rewrite/test_pinned_airflow_snapshot.py \ + tests/integration/rust_rewrite/test_pinned_codemods.py \ + tests/integration/rust_rewrite/test_pinned_nextjs_snapshot.py \ + tests/integration/rust_rewrite/test_pinned_semantic_parity.py \ + tests/integration/rust_rewrite/test_python_rust_parity_fixture.py \ + -q diff --git a/rust-rewrite/tools/check_p0_parity_coverage.py b/rust-rewrite/tools/check_p0_parity_coverage.py new file mode 100644 index 000000000..9b124533d --- /dev/null +++ b/rust-rewrite/tools/check_p0_parity_coverage.py @@ -0,0 +1,206 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +import argparse +import json +import subprocess +import sys +from collections import Counter +from pathlib import Path +from typing import Any + +REPO_ROOT = Path(__file__).resolve().parents[2] +DEFAULT_MANIFEST = REPO_ROOT / "rust-rewrite/p0-parity-coverage.json" +ALLOWED_STATUSES = {"parity_covered", "fallback_covered", "open_gap"} +COVERED_STATUSES = {"parity_covered", "fallback_covered"} + + +def load_manifest(path: Path) -> dict[str, Any]: + manifest = json.loads(path.read_text(encoding="utf-8")) + if manifest.get("schema_version") != 1: + msg = f"unsupported p0 parity coverage schema version: {manifest.get('schema_version')!r}" + raise ValueError(msg) + if not isinstance(manifest.get("pytest_roots"), list) or not manifest["pytest_roots"]: + msg = "p0 parity coverage manifest must define non-empty pytest_roots" + raise ValueError(msg) + if not isinstance(manifest.get("groups"), list) or not manifest["groups"]: + msg = "p0 parity coverage manifest must define non-empty groups" + raise ValueError(msg) + return manifest + + +def collect_pytest_ids(pytest_roots: list[str]) -> set[str]: + command = [ + sys.executable, + "-m", + "pytest", + "--collect-only", + "-q", + *pytest_roots, + ] + result = subprocess.run( + command, + cwd=REPO_ROOT, + check=False, + capture_output=True, + text=True, + ) + if result.returncode != 0: + msg = ( + f"pytest collection failed with exit code {result.returncode}\n" + f"stdout:\n{result.stdout}\n" + f"stderr:\n{result.stderr}" + ) + raise RuntimeError(msg) + return { + line.strip() + for line in result.stdout.splitlines() + if "::test_" in line and line.strip().endswith(tuple("abcdefghijklmnopqrstuvwxyz0123456789_]")) + } + + +def as_string_list(value: object, *, context: str) -> list[str]: + if not isinstance(value, list) or not value: + msg = f"{context} must be a non-empty list" + raise ValueError(msg) + for item in value: + if not isinstance(item, str) or not item: + msg = f"{context} must contain only non-empty strings" + raise ValueError(msg) + return value + + +def evidence_lists(group: dict[str, Any]) -> tuple[list[str], list[str]]: + evidence = group.get("evidence", {}) + if evidence is None: + evidence = {} + if not isinstance(evidence, dict): + msg = f"{group.get('name', '')}: evidence must be an object" + raise ValueError(msg) + + pytest_ids = evidence.get("pytest", []) + tool_paths = evidence.get("tools", []) + if pytest_ids: + pytest_ids = as_string_list(pytest_ids, context=f"{group['name']}: evidence.pytest") + if tool_paths: + tool_paths = as_string_list(tool_paths, context=f"{group['name']}: evidence.tools") + return list(pytest_ids), list(tool_paths) + + +def validate_group(group: dict[str, Any], collected: set[str]) -> tuple[list[str], list[str]]: + name = group.get("name") + status = group.get("status") + if not isinstance(name, str) or not name: + msg = f"group is missing a name: {group!r}" + raise ValueError(msg) + if status not in ALLOWED_STATUSES: + msg = f"{name}: status must be one of {sorted(ALLOWED_STATUSES)}" + raise ValueError(msg) + + as_string_list(group.get("api_inventory"), context=f"{name}: api_inventory") + pytest_ids, tool_paths = evidence_lists(group) + + if status in COVERED_STATUSES and not pytest_ids and not tool_paths: + msg = f"{name}: {status} groups require pytest or tool evidence" + raise ValueError(msg) + if status == "open_gap" and not isinstance(group.get("gap"), str): + msg = f"{name}: open_gap groups require a gap string" + raise ValueError(msg) + + missing_tests = sorted(set(pytest_ids) - collected) + if missing_tests: + msg = f"{name}: evidence pytest IDs not collected: {', '.join(missing_tests)}" + raise ValueError(msg) + + missing_tools = sorted(tool for tool in tool_paths if not (REPO_ROOT / tool).exists()) + if missing_tools: + msg = f"{name}: evidence tool paths not found: {', '.join(missing_tools)}" + raise ValueError(msg) + + return pytest_ids, tool_paths + + +def make_report(args: argparse.Namespace) -> dict[str, Any]: + manifest = load_manifest(args.manifest) + collected = collect_pytest_ids(manifest["pytest_roots"]) + + status_counts: Counter[str] = Counter() + pytest_evidence: set[str] = set() + tool_evidence: set[str] = set() + open_gaps: list[str] = [] + + for root in manifest["pytest_roots"]: + if not (REPO_ROOT / root).exists(): + msg = f"pytest root not found: {root}" + raise ValueError(msg) + + for group in manifest["groups"]: + pytest_ids, tool_paths = validate_group(group, collected) + status = group["status"] + status_counts[status] += 1 + pytest_evidence.update(pytest_ids) + tool_evidence.update(tool_paths) + if status == "open_gap": + open_gaps.append(group["name"]) + + failures: list[str] = [] + if args.require_complete and open_gaps: + failures.append("open P0 parity gaps remain: " + ", ".join(sorted(open_gaps))) + + report = { + "status": "failed" if failures else "passed", + "manifest": str(args.manifest), + "pytest_roots": manifest["pytest_roots"], + "group_count": len(manifest["groups"]), + "status_counts": dict(sorted(status_counts.items())), + "pytest_evidence_count": len(pytest_evidence), + "tool_evidence_count": len(tool_evidence), + "open_gaps": sorted(open_gaps), + "failures": failures, + } + if failures: + msg = "p0 parity coverage check failed: " + "; ".join(failures) + raise RuntimeError(msg) + return report + + +def print_human(report: dict[str, Any]) -> None: + print(f"status: {report['status']}") + print(f"manifest: {report['manifest']}") + print(f"groups: {report['group_count']}") + for status, count in report["status_counts"].items(): + print(f"{status}: {count}") + print(f"pytest evidence: {report['pytest_evidence_count']}") + print(f"tool evidence: {report['tool_evidence_count']}") + if report["open_gaps"]: + print("open gaps:") + for gap in report["open_gaps"]: + print(f" - {gap}") + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description="Validate the Rust rewrite P0 parity coverage manifest." + ) + parser.add_argument("--manifest", type=Path, default=DEFAULT_MANIFEST) + parser.add_argument( + "--require-complete", + action="store_true", + help="Fail if any P0 group is still marked open_gap.", + ) + parser.add_argument("--json", action="store_true", help="Print JSON instead of a human summary.") + return parser.parse_args() + + +def main() -> int: + args = parse_args() + report = make_report(args) + if args.json: + print(json.dumps(report, indent=2, sort_keys=True)) + else: + print_human(report) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/rust-rewrite/tools/check_pinned_codemods.py b/rust-rewrite/tools/check_pinned_codemods.py new file mode 100644 index 000000000..7d64b986e --- /dev/null +++ b/rust-rewrite/tools/check_pinned_codemods.py @@ -0,0 +1,438 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +import argparse +import json +import os +import resource +import subprocess +import sys +import tempfile +import time +from pathlib import Path +from typing import TYPE_CHECKING, Any + +if TYPE_CHECKING: + from collections.abc import Callable + +TOOLS_DIR = Path(__file__).resolve().parent +REPO_ROOT = TOOLS_DIR.parents[1] +SRC_ROOT = REPO_ROOT / "src" +if str(TOOLS_DIR) not in sys.path: + sys.path.insert(0, str(TOOLS_DIR)) +if str(SRC_ROOT) not in sys.path: + sys.path.insert(0, str(SRC_ROOT)) + +import benchmark_pinned_python_repo as python_benchmark # noqa: E402 +import benchmark_pinned_typescript_repo as typescript_benchmark # noqa: E402 + +from codemods.codemod import Codemod # noqa: E402 + +DEFAULT_CACHE_DIR = Path("/tmp/graph-sitter-pinned-repos") +DEFAULT_EXTENSION_DIR = Path("/tmp/graph_sitter_py_pinned_codemods") + +PYTHON_TARGET_FILE = "airflow/__init__.py" +PYTHON_IMPORTED_LINE = "from typing import Any" +PYTHON_RENAMED_FUNCTION = "__getattr_rust_proof__" + +TYPESCRIPT_TARGET_FILE = "packages/next/src/client/components/app-router-announcer.tsx" +TYPESCRIPT_USAGE_FILE = "packages/next/src/client/components/app-router.tsx" +TYPESCRIPT_IMPORTED_LINE = "import { act } from 'react-dom/test-utils';" +TYPESCRIPT_RENAMED_FUNCTION = "AppRouterAnnouncerProof" + + +def run(command: list[str], *, cwd: Path, timeout: int | None) -> subprocess.CompletedProcess[str]: + return subprocess.run( + command, + cwd=cwd, + timeout=timeout, + check=True, + text=True, + capture_output=True, + ) + + +def bytes_to_mb(value: float) -> float: + return value / (1024 * 1024) + + +def max_rss_bytes() -> int: + rss = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss + if sys.platform == "darwin": + return int(rss) + return int(rss * 1024) + + +def current_rss_bytes() -> int: + import psutil + + return int(psutil.Process(os.getpid()).memory_info().rss) + + +def memory_sample(label: str) -> dict[str, float | str]: + return { + "label": label, + "rss_mb": round(bytes_to_mb(current_rss_bytes()), 3), + "max_rss_mb": round(bytes_to_mb(max_rss_bytes()), 3), + } + + +def large_cache_materialization_report(backend: Any) -> dict[str, bool]: + return { + "files": backend._files is not None, + "symbols": backend._symbols is not None, + "imports": backend._imports is not None, + "exports": backend._exports is not None, + "references": backend._references is not None, + "external_references": backend._external_references is not None, + "dependencies": backend._dependencies is not None, + "file_handles": backend._file_handles is not None, + "symbol_handles": backend._symbol_handles is not None, + "import_handles": backend._import_handles is not None, + "export_handles": backend._export_handles is not None, + "external_module_handles": backend._external_module_handles is not None, + } + + +def prepare_repo( + *, + name: str, + repo_url: str, + ref: str, + expected_commit: str, + cache_dir: Path, + reset_checkout: bool, + skip_fetch: bool, + timeout: int, + prepare: Callable[[argparse.Namespace], tuple[Path, str]], +) -> tuple[Path, str]: + repo_args = argparse.Namespace( + name=name, + repo_url=repo_url, + ref=ref, + expected_commit=expected_commit, + cache_dir=cache_dir, + reset_checkout=reset_checkout, + skip_fetch=skip_fetch, + timeout=timeout, + ) + return prepare(repo_args) + + +def clone_mutable_checkout( + cache_repo: Path, + commit: str, + *, + prefix: str, + repo_url: str, + timeout: int, +) -> tuple[tempfile.TemporaryDirectory[str], Path]: + tempdir = tempfile.TemporaryDirectory(prefix=f"graph-sitter-{prefix}-codemod-") + checkout = Path(tempdir.name) / "repo" + run(["git", "clone", "--shared", "--no-checkout", str(cache_repo), str(checkout)], cwd=REPO_ROOT, timeout=timeout) + run(["git", "remote", "set-url", "origin", repo_url], cwd=checkout, timeout=timeout) + run(["git", "checkout", "--detach", commit], cwd=checkout, timeout=timeout) + return tempdir, checkout + + +def git_status(checkout: Path, *, timeout: int) -> list[str]: + result = run(["git", "status", "--porcelain"], cwd=checkout, timeout=timeout) + return [line for line in result.stdout.splitlines() if line] + + +def python_graph_is_blocked(codebase: Any) -> bool: + try: + len(codebase.ctx.nodes) + except RuntimeError: + return True + return False + + +def build_codebase(checkout: Path, *, language: str) -> Any: + from graph_sitter.configs.models.codebase import CodebaseConfig, GraphBackend, RustFallbackMode + from graph_sitter.core.codebase import Codebase + + config = CodebaseConfig(graph_backend=GraphBackend.RUST, rust_fallback=RustFallbackMode.ERROR) + return Codebase(str(checkout), language=language, config=config) + + +def run_airflow_codemod(checkout: Path, *, timeout: int) -> dict[str, Any]: + memory_samples = [memory_sample("python_start")] + + construct_start = time.perf_counter() + codebase = build_codebase(checkout, language="python") + construct_wall = time.perf_counter() - construct_start + memory_samples.append(memory_sample("python_after_codebase_construct")) + + graph_blocked_before = python_graph_is_blocked(codebase) + memory_samples.append(memory_sample("python_after_graph_block_check")) + backend = codebase.ctx.rust_index + assert backend is not None + + def execute(mod_codebase: Any) -> None: + target_file = mod_codebase.get_file(PYTHON_TARGET_FILE) + target_file.add_import(PYTHON_IMPORTED_LINE) + target_file.get_function("__getattr__").rename(PYTHON_RENAMED_FUNCTION) + + codemod = Codemod(name="rust-pinned-airflow-import-rename", execute=execute) + codemod_start = time.perf_counter() + codemod.execute(codebase) + codebase.commit(sync_graph=False) + codemod_wall = time.perf_counter() - codemod_start + memory_samples.append(memory_sample("python_after_codemod_commit")) + + graph_blocked_after = python_graph_is_blocked(codebase) + modified_content = (checkout / PYTHON_TARGET_FILE).read_text(encoding="utf-8") + status = git_status(checkout, timeout=timeout) + cache_materialization = large_cache_materialization_report(backend) + + assertions = { + "added_import": PYTHON_IMPORTED_LINE in modified_content, + "renamed_declaration": f"def {PYTHON_RENAMED_FUNCTION}(name: str):" in modified_content, + "removed_original_declaration": "def __getattr__(name: str):" not in modified_content, + "only_target_file_modified": status == [f" M {PYTHON_TARGET_FILE}"], + "python_graph_blocked_before": graph_blocked_before, + "python_graph_blocked_after": graph_blocked_after, + "large_caches_cold": not any(cache_materialization.values()), + } + + return { + "suite": "python", + "repo": "apache-airflow-2.10.5", + "target_file": PYTHON_TARGET_FILE, + "codemod": codemod.name, + "timings": { + "codebase_construct_wall_seconds": round(construct_wall, 6), + "codemod_commit_wall_seconds": round(codemod_wall, 6), + }, + "rss_samples": memory_samples, + "git_status": status, + "assertions": assertions, + "large_cache_materialization": cache_materialization, + } + + +def run_nextjs_codemod(checkout: Path, *, timeout: int) -> dict[str, Any]: + memory_samples = [memory_sample("typescript_start")] + + construct_start = time.perf_counter() + codebase = build_codebase(checkout, language="typescript") + construct_wall = time.perf_counter() - construct_start + memory_samples.append(memory_sample("typescript_after_codebase_construct")) + + graph_blocked_before = python_graph_is_blocked(codebase) + memory_samples.append(memory_sample("typescript_after_graph_block_check")) + backend = codebase.ctx.rust_index + assert backend is not None + + def execute(mod_codebase: Any) -> None: + target_file = mod_codebase.get_file(TYPESCRIPT_TARGET_FILE) + target_file.add_import(TYPESCRIPT_IMPORTED_LINE) + target_file.get_function("AppRouterAnnouncer").rename(TYPESCRIPT_RENAMED_FUNCTION) + + codemod = Codemod(name="rust-pinned-nextjs-import-rename", execute=execute) + codemod_start = time.perf_counter() + codemod.execute(codebase) + codebase.commit(sync_graph=False) + codemod_wall = time.perf_counter() - codemod_start + memory_samples.append(memory_sample("typescript_after_codemod_commit")) + + graph_blocked_after = python_graph_is_blocked(codebase) + modified_content = (checkout / TYPESCRIPT_TARGET_FILE).read_text(encoding="utf-8") + usage_content = (checkout / TYPESCRIPT_USAGE_FILE).read_text(encoding="utf-8") + status = git_status(checkout, timeout=timeout) + cache_materialization = large_cache_materialization_report(backend) + expected_status = { + f" M {TYPESCRIPT_TARGET_FILE}", + f" M {TYPESCRIPT_USAGE_FILE}", + } + + assertions = { + "added_import": TYPESCRIPT_IMPORTED_LINE in modified_content, + "renamed_declaration": f"export function {TYPESCRIPT_RENAMED_FUNCTION}" in modified_content, + "removed_original_declaration": "export function AppRouterAnnouncer(" not in modified_content, + "rewrote_importing_usage": TYPESCRIPT_RENAMED_FUNCTION in usage_content, + "only_expected_files_modified": set(status) == expected_status, + "python_graph_blocked_before": graph_blocked_before, + "python_graph_blocked_after": graph_blocked_after, + "large_caches_cold": not any(cache_materialization.values()), + } + + return { + "suite": "typescript", + "repo": "next.js-v15.0.0", + "target_file": TYPESCRIPT_TARGET_FILE, + "codemod": codemod.name, + "timings": { + "codebase_construct_wall_seconds": round(construct_wall, 6), + "codemod_commit_wall_seconds": round(codemod_wall, 6), + }, + "rss_samples": memory_samples, + "git_status": status, + "assertions": assertions, + "large_cache_materialization": cache_materialization, + } + + +def run_python_suite(args: argparse.Namespace) -> dict[str, Any]: + cache_repo, commit = prepare_repo( + name=python_benchmark.DEFAULT_REPO_NAME, + repo_url=python_benchmark.DEFAULT_REPO_URL, + ref=python_benchmark.DEFAULT_REF, + expected_commit=python_benchmark.DEFAULT_EXPECTED_COMMIT, + cache_dir=args.cache_dir, + reset_checkout=args.reset_checkout, + skip_fetch=args.skip_fetch, + timeout=args.timeout, + prepare=python_benchmark.prepare_pinned_repo, + ) + tempdir, checkout = clone_mutable_checkout( + cache_repo, + commit, + prefix="airflow", + repo_url=python_benchmark.DEFAULT_REPO_URL, + timeout=args.timeout, + ) + with tempdir: + report = run_airflow_codemod(checkout, timeout=args.timeout) + report["metadata"] = { + "repo_url": python_benchmark.DEFAULT_REPO_URL, + "ref": python_benchmark.DEFAULT_REF, + "commit": commit, + "cache_checkout": str(cache_repo), + "mutable_checkout": str(checkout), + } + return report + + +def run_typescript_suite(args: argparse.Namespace) -> dict[str, Any]: + cache_repo, commit = prepare_repo( + name=typescript_benchmark.DEFAULT_REPO_NAME, + repo_url=typescript_benchmark.DEFAULT_REPO_URL, + ref=typescript_benchmark.DEFAULT_REF, + expected_commit=typescript_benchmark.DEFAULT_EXPECTED_COMMIT, + cache_dir=args.cache_dir, + reset_checkout=args.reset_checkout, + skip_fetch=args.skip_fetch, + timeout=args.timeout, + prepare=typescript_benchmark.prepare_pinned_repo, + ) + tempdir, checkout = clone_mutable_checkout( + cache_repo, + commit, + prefix="nextjs", + repo_url=typescript_benchmark.DEFAULT_REPO_URL, + timeout=args.timeout, + ) + with tempdir: + report = run_nextjs_codemod(checkout, timeout=args.timeout) + report["metadata"] = { + "repo_url": typescript_benchmark.DEFAULT_REPO_URL, + "ref": typescript_benchmark.DEFAULT_REF, + "commit": commit, + "cache_checkout": str(cache_repo), + "mutable_checkout": str(checkout), + } + return report + + +def validate_report(report: dict[str, Any], args: argparse.Namespace) -> None: + failures: list[str] = [] + for suite in report["suites"]: + failed_assertions = [name for name, passed in suite["assertions"].items() if not passed] + if failed_assertions: + failures.append(f"{suite['suite']} assertions failed: {', '.join(failed_assertions)}") + timings = suite["timings"] + if timings["codebase_construct_wall_seconds"] > args.max_construct_wall_seconds: + failures.append( + f"{suite['suite']} construct wall {timings['codebase_construct_wall_seconds']}s " + f"exceeds {args.max_construct_wall_seconds}s" + ) + if timings["codemod_commit_wall_seconds"] > args.max_codemod_wall_seconds: + failures.append( + f"{suite['suite']} codemod wall {timings['codemod_commit_wall_seconds']}s " + f"exceeds {args.max_codemod_wall_seconds}s" + ) + max_rss = max(float(sample["max_rss_mb"]) for sample in suite["rss_samples"]) + if max_rss > args.max_rss_mb: + failures.append(f"{suite['suite']} max RSS {max_rss} MB exceeds {args.max_rss_mb} MB") + + if failures: + raise RuntimeError("; ".join(failures)) + + +def make_report(args: argparse.Namespace) -> dict[str, Any]: + extension_path = None + if not args.skip_build_extension: + extension_path = python_benchmark.build_rust_extension(args.extension_dir, timeout=args.timeout) + if str(args.extension_dir) not in sys.path: + sys.path.insert(0, str(args.extension_dir)) + + suites: list[dict[str, Any]] = [] + if args.suite in {"all", "python"}: + suites.append(run_python_suite(args)) + if args.suite in {"all", "typescript"}: + suites.append(run_typescript_suite(args)) + + report = { + "metadata": { + "suite": args.suite, + "extension_path": str(extension_path) if extension_path else None, + "extension_dir": str(args.extension_dir), + "cache_dir": str(args.cache_dir), + }, + "suites": suites, + } + validate_report(report, args) + return report + + +def print_human(report: dict[str, Any]) -> None: + print(f"suite: {report['metadata']['suite']}") + print(f"extension_dir: {report['metadata']['extension_dir']}") + for suite in report["suites"]: + timings = suite["timings"] + max_rss = max(float(sample["max_rss_mb"]) for sample in suite["rss_samples"]) + print( + f"{suite['suite']} {suite['repo']}: " + f"construct={timings['codebase_construct_wall_seconds']:.3f}s " + f"codemod={timings['codemod_commit_wall_seconds']:.3f}s " + f"max_rss={max_rss:.1f} MB " + f"modified={', '.join(suite['git_status'])}" + ) + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description="Run graph-free compact Rust codemod smoke checks on pinned Airflow and Next.js checkouts." + ) + parser.add_argument("--suite", choices=["all", "python", "typescript"], default="all") + parser.add_argument("--cache-dir", type=Path, default=DEFAULT_CACHE_DIR, help="Directory for reusable pinned checkouts.") + parser.add_argument("--extension-dir", type=Path, default=DEFAULT_EXTENSION_DIR, help="Directory for the built PyO3 extension module.") + parser.add_argument("--reset-checkout", action="store_true", help="Delete and recreate cached pinned checkouts before running.") + parser.add_argument("--skip-fetch", action="store_true", help="Do not fetch before checkout; useful for offline reruns with FETCH_HEAD present.") + parser.add_argument("--skip-build-extension", action="store_true", help="Reuse an existing graph_sitter_py extension in --extension-dir.") + parser.add_argument("--timeout", type=int, default=900, help="Timeout in seconds for clone/build/check child commands.") + parser.add_argument("--max-construct-wall-seconds", type=float, default=20.0, help="Fail if any Rust Codebase construction exceeds this ceiling.") + parser.add_argument("--max-codemod-wall-seconds", type=float, default=10.0, help="Fail if any codemod execute+commit exceeds this ceiling.") + parser.add_argument("--max-rss-mb", type=float, default=1000.0, help="Fail if process max RSS exceeds this ceiling.") + parser.add_argument("--output", type=Path, help="Optional path to write JSON report.") + parser.add_argument("--json", action="store_true", help="Print JSON report instead of a human summary.") + return parser.parse_args() + + +def main() -> int: + args = parse_args() + report = make_report(args) + if args.output: + args.output.parent.mkdir(parents=True, exist_ok=True) + args.output.write_text(json.dumps(report, indent=2, sort_keys=True) + "\n", encoding="utf-8") + if args.json: + print(json.dumps(report, indent=2, sort_keys=True)) + else: + print_human(report) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/rust-rewrite/tools/check_pinned_large_repos.sh b/rust-rewrite/tools/check_pinned_large_repos.sh new file mode 100755 index 000000000..042e0605c --- /dev/null +++ b/rust-rewrite/tools/check_pinned_large_repos.sh @@ -0,0 +1,68 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" +cd "$ROOT" + +OUTPUT_DIR="${GRAPH_SITTER_PINNED_OUTPUT_DIR:-$ROOT/rust-rewrite/reports}" +CACHE_DIR="${GRAPH_SITTER_PINNED_CACHE_DIR:-/tmp/graph-sitter-pinned-repos}" +EXTENSION_DIR="${GRAPH_SITTER_PINNED_EXTENSION_DIR:-/tmp/graph_sitter_py_large_repo_checks}" +TIMEOUT="${GRAPH_SITTER_PINNED_TIMEOUT:-900}" + +COMMON_ARGS=( + --cache-dir "$CACHE_DIR" + --extension-dir "$EXTENSION_DIR" + --timeout "$TIMEOUT" +) +if [[ "${GRAPH_SITTER_PINNED_SKIP_FETCH:-0}" == "1" ]]; then + COMMON_ARGS+=(--skip-fetch) +fi + +AIRFLOW_SNAPSHOT_ARGS=("${COMMON_ARGS[@]}") +if [[ "${GRAPH_SITTER_PINNED_SKIP_BUILD_EXTENSION:-0}" == "1" ]]; then + AIRFLOW_SNAPSHOT_ARGS+=(--skip-build-extension) +fi + +mkdir -p "$OUTPUT_DIR" + +echo "Checking pinned Airflow compact snapshot" +uv run python rust-rewrite/tools/snapshot_pinned_python_repo.py \ + "${AIRFLOW_SNAPSHOT_ARGS[@]}" \ + --output "$OUTPUT_DIR/airflow-rust-compact-snapshot.json" + +echo "Checking pinned Airflow Rust Codebase proof" +uv run python rust-rewrite/tools/check_pinned_python_codebase.py \ + "${COMMON_ARGS[@]}" \ + --skip-build-extension \ + --output "$OUTPUT_DIR/airflow-rust-codebase.json" + +echo "Checking pinned Next.js compact snapshot" +uv run python rust-rewrite/tools/snapshot_pinned_typescript_repo.py \ + "${COMMON_ARGS[@]}" \ + --skip-build-extension \ + --output "$OUTPUT_DIR/nextjs-rust-compact-snapshot.json" + +echo "Checking pinned Next.js Rust Codebase proof" +uv run python rust-rewrite/tools/check_pinned_typescript_codebase.py \ + "${COMMON_ARGS[@]}" \ + --skip-build-extension \ + --output "$OUTPUT_DIR/nextjs-rust-codebase.json" + +echo "Checking pinned large-repo Rust codemod proof" +uv run python rust-rewrite/tools/check_pinned_codemods.py \ + "${COMMON_ARGS[@]}" \ + --skip-build-extension \ + --output "$OUTPUT_DIR/pinned-rust-codemods.json" + +echo "Checking pinned large-repo semantic parity" +uv run python rust-rewrite/tools/check_pinned_semantic_parity.py \ + "${COMMON_ARGS[@]}" \ + --skip-build-extension \ + --output "$OUTPUT_DIR/pinned-semantic-parity.json" + +echo "Checking aggregate rollout readiness" +uv run python rust-rewrite/tools/check_rollout_readiness.py \ + --report-dir "$OUTPUT_DIR" \ + --output "$OUTPUT_DIR/rollout-readiness.json" + +echo "Pinned large-repo checks wrote reports to $OUTPUT_DIR" diff --git a/rust-rewrite/tools/check_pinned_python_codebase.py b/rust-rewrite/tools/check_pinned_python_codebase.py new file mode 100644 index 000000000..eaa91f317 --- /dev/null +++ b/rust-rewrite/tools/check_pinned_python_codebase.py @@ -0,0 +1,712 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +import argparse +import json +import os +import resource +import sys +import time +from pathlib import Path +from typing import Any + +TOOLS_DIR = Path(__file__).resolve().parent +REPO_ROOT = TOOLS_DIR.parents[1] +SRC_ROOT = REPO_ROOT / "src" +if str(TOOLS_DIR) not in sys.path: + sys.path.insert(0, str(TOOLS_DIR)) +if str(SRC_ROOT) not in sys.path: + sys.path.insert(0, str(SRC_ROOT)) + +from benchmark_pinned_python_repo import ( # noqa: E402 + DEFAULT_CACHE_DIR, + DEFAULT_EXPECTED_COMMIT, + DEFAULT_EXTENSION_DIR, + DEFAULT_REF, + DEFAULT_REPO_NAME, + DEFAULT_REPO_URL, + build_rust_extension, + prepare_pinned_repo, + ratio, +) + +EXPECTED_SUMMARY = { + "files": 4789, + "symbols": 52339, + "classes": 5665, + "functions": 34535, + "global_variables": 12139, + "imports": 44121, + "import_resolutions": 20887, + "external_modules": 19545, + "references": 120770, + "external_references": 79300, + "dependencies": 79737, + "bytes": 36617627, + "lines": 924514, + "files_with_errors": 0, +} + +EXPECTED_RECORDS = { + "rust_files": 4789, + "rust_symbols": 52339, + "rust_classes": 5665, + "rust_functions": 34535, + "rust_global_vars": 12139, + "rust_imports": 44121, + "rust_import_resolutions": 20887, + "rust_external_modules": 19545, + "rust_exports": 0, + "rust_references": 120770, + "rust_external_references": 79300, + "rust_function_calls": 0, + "rust_promise_chains": 0, + "rust_dependencies": 79737, + "rust_subclass_edges": 0, +} + +EXPECTED_COMPAT_HANDLES = { + "files": 4789, + "symbols": 23663, + "classes": 5379, + "functions": 6145, + "global_vars": 12139, + "interfaces": 0, + "types": 0, + "imports": 44121, + "external_modules": 19545, +} + +EXPECTED_KNOWN_LOOKUPS = { + "airflow_init_import_os": [ + { + "handle": "RustCompactImport", + "kind": "import", + "name": "os", + "source": "import os", + } + ], + "airflow_init_getattr_name": [ + { + "handle": "RustCompactSymbol", + "kind": "function", + "name": "__getattr__", + } + ], + "airflow_init_lazy_imports_reference_container": [ + { + "handle": "RustCompactSymbol", + "kind": "function", + "name": "__getattr__", + } + ], +} + +EXPECTED_KNOWN_GLOBAL_LOOKUPS = { + "provider_info_schema_validator": { + "filepath": "airflow/providers_manager.py", + "handle": "RustCompactSymbol", + "kind": "function", + "name": "_create_provider_info_schema_validator", + } +} + +EXPECTED_KNOWN_CHILD_LOOKUPS = { + "kerberos_service_children": [ + { + "filepath": "airflow/api/auth/backend/kerberos_auth.py", + "handle": "RustCompactSymbol", + "kind": "function", + "name": "__init__", + } + ] +} + +EXPECTED_KNOWN_FILE_LOCAL_LOOKUPS = { + "airflow_init_getattr": { + "filepath": "airflow/__init__.py", + "handle": "RustCompactSymbol", + "kind": "function", + "name": "__getattr__", + } +} + +EXPECTED_KNOWN_FILE_LOCAL_IMPORT_LOOKUPS = { + "airflow_init_import_os": { + "filepath": "airflow/__init__.py", + "handle": "RustCompactImport", + "kind": "import", + "name": "os", + "source": "import os", + } +} + +EXPECTED_KNOWN_FILE_LOCAL_NAME_RESOLUTION = { + "airflow_init_resolve_getattr": [ + { + "filepath": "airflow/__init__.py", + "handle": "RustCompactSymbol", + "kind": "function", + "name": "__getattr__", + } + ], + "airflow_init_resolve_import_os": { + "filepath": "airflow/__init__.py", + "handle": "RustCompactImport", + "kind": "import", + "name": "os", + "source": "import os", + }, + "airflow_init_get_node_import_os": { + "filepath": "airflow/__init__.py", + "handle": "RustCompactImport", + "kind": "import", + "name": "os", + "source": "import os", + }, +} + +EXPECTED_KNOWN_MODULE_IMPORT_ATTRIBUTE_RESOLUTION = { + "dag_processing_manager_airflow_models_dagmodel": { + "filepath": "airflow/models/__init__.py", + "handle": "RustCompactImport", + "kind": "from_import", + "name": "DagModel", + "source": "from airflow.models.dag import DAG, DagModel, DagTag", + } +} + +EXPECTED_KNOWN_IGNORE_CASE_FILE_LOOKUPS = { + "airflow_init": { + "filepath": "airflow/__init__.py", + "handle": "RustCompactFile", + "name": "__init__", + } +} + +EXPECTED_TARGETED_CACHE_MATERIALIZATION = { + "files": False, + "symbols": False, + "imports": False, + "references": False, + "external_references": False, + "dependencies": False, + "file_handles": False, + "symbol_handles": False, + "import_handles": False, + "external_module_handles": False, + "symbols_by_file": False, + "imports_by_file": False, +} + +EXPECTED_BYTE_RANGE_CACHE_MATERIALIZATION = { + **EXPECTED_TARGETED_CACHE_MATERIALIZATION, + "exports": False, + "export_handles": False, + "exports_by_file": False, +} + +EXPECTED_KNOWN_DEPENDENCIES = { + "airflow_init_getattr_dependencies": [ + { + "filepath": "airflow/__init__.py", + "name": "importlib", + "node_type": "IMPORT", + "source": "import importlib", + }, + { + "filepath": "airflow/__init__.py", + "name": "sys", + "node_type": "IMPORT", + "source": "import sys", + }, + { + "filepath": "airflow/__init__.py", + "name": "warnings", + "node_type": "IMPORT", + "source": "import warnings", + }, + { + "filepath": "airflow/__init__.py", + "name": "__lazy_imports", + "node_type": "SYMBOL", + }, + ], +} + +EXPECTED_LARGE_CACHE_MATERIALIZATION = { + "files": False, + "symbols": False, + "imports": False, + "references": False, + "external_references": False, + "dependencies": False, + "file_handles": False, + "symbol_handles": False, + "import_handles": False, + "external_module_handles": False, +} + +RECORDED_PYTHON_BASELINE = { + "wall_seconds": 18.649, + "max_rss_mb": 3470.3, +} + + +def bytes_to_mb(value: float) -> float: + return value / (1024 * 1024) + + +def max_rss_bytes() -> int: + rss = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss + if sys.platform == "darwin": + return int(rss) + return int(rss * 1024) + + +def current_rss_bytes() -> int: + import psutil + + return int(psutil.Process(os.getpid()).memory_info().rss) + + +def memory_sample(label: str) -> dict[str, float | str]: + return { + "label": label, + "rss_mb": round(bytes_to_mb(current_rss_bytes()), 3), + "max_rss_mb": round(bytes_to_mb(max_rss_bytes()), 3), + } + + +def handle_signature(handle: Any) -> dict[str, Any]: + signature = { + "handle": type(handle).__name__, + "name": handle.name, + } + record = getattr(handle, "record", None) + if record is not None and hasattr(record, "kind"): + signature["kind"] = record.kind + if type(handle).__name__ == "RustCompactImport": + signature["source"] = handle.source + return signature + + +def file_signature(file: Any) -> dict[str, Any]: + return { + "filepath": file.filepath, + "handle": type(file).__name__, + "name": file.name, + } + + +def known_lookup_report(codebase: Any) -> dict[str, list[dict[str, Any]]]: + init_file = codebase.get_file("airflow/__init__.py") + return { + "airflow_init_import_os": [ + handle_signature(handle) + for handle in init_file.find_by_byte_range({"start_byte": 847, "end_byte": 856}) + ], + "airflow_init_getattr_name": [ + handle_signature(handle) + for handle in init_file.find_by_byte_range({"start_byte": 4048, "end_byte": 4059}) + ], + "airflow_init_lazy_imports_reference_container": [ + handle_signature(handle) + for handle in init_file.find_by_byte_range({"start_byte": 4169, "end_byte": 4183}) + ], + } + + +def known_global_lookup_report(codebase: Any) -> dict[str, dict[str, Any]]: + function = codebase.get_function("_create_provider_info_schema_validator") + signature = handle_signature(function) + signature["filepath"] = function.filepath + return { + "provider_info_schema_validator": signature, + } + + +def known_child_lookup_report(codebase: Any) -> dict[str, list[dict[str, Any]]]: + service = codebase.get_class("KerberosService") + return { + "kerberos_service_children": [ + { + **handle_signature(child), + "filepath": child.filepath, + } + for child in service.child_symbols + ], + } + + +def known_file_local_lookup_report(codebase: Any) -> dict[str, dict[str, Any]]: + function = codebase.get_file("airflow/__init__.py").get_function("__getattr__") + signature = handle_signature(function) + signature["filepath"] = function.filepath + return { + "airflow_init_getattr": signature, + } + + +def known_file_local_import_lookup_report(codebase: Any) -> dict[str, dict[str, Any]]: + import_handle = codebase.get_file("airflow/__init__.py").get_import("import os") + signature = handle_signature(import_handle) + signature["filepath"] = import_handle.filepath + return { + "airflow_init_import_os": signature, + } + + +def known_file_local_name_resolution_report(codebase: Any) -> dict[str, Any]: + init_file = codebase.get_file("airflow/__init__.py") + resolved_import = init_file.resolve_attribute("os") + node_import = init_file.get_node_by_name("os") + import_signature = handle_signature(resolved_import) + import_signature["filepath"] = resolved_import.filepath + node_signature = handle_signature(node_import) + node_signature["filepath"] = node_import.filepath + return { + "airflow_init_resolve_getattr": [ + { + **handle_signature(handle), + "filepath": handle.filepath, + } + for handle in init_file.resolve_name("__getattr__") + ], + "airflow_init_resolve_import_os": import_signature, + "airflow_init_get_node_import_os": node_signature, + } + + +def known_module_import_attribute_resolution_report(codebase: Any) -> dict[str, dict[str, Any]]: + module_import = codebase.get_file("airflow/dag_processing/manager.py").get_import("airflow.models") + resolved = module_import.resolve_attribute("DagModel") + signature = handle_signature(resolved) + signature["filepath"] = resolved.filepath + return { + "dag_processing_manager_airflow_models_dagmodel": signature, + } + + +def known_ignore_case_file_lookup_report(codebase: Any) -> dict[str, dict[str, Any]]: + return { + "airflow_init": file_signature(codebase.get_file("AIRFLOW/__INIT__.PY", ignore_case=True)), + } + + +def dependency_signature(handle: Any) -> dict[str, Any]: + node_type = getattr(handle, "node_type", None) + signature = { + "filepath": handle.filepath, + "name": handle.name, + "node_type": getattr(node_type, "name", str(node_type)), + } + if signature["node_type"] == "IMPORT": + signature["source"] = handle.source + return signature + + +def known_dependency_report(codebase: Any) -> dict[str, list[dict[str, Any]]]: + getattr_function = codebase.get_file("airflow/__init__.py").get_function("__getattr__") + return { + "airflow_init_getattr_dependencies": sorted( + (dependency_signature(handle) for handle in getattr_function.dependencies), + key=lambda item: (item["node_type"], item["name"], item["filepath"], item.get("source", "")), + ) + } + + +def large_cache_materialization_report(backend: Any) -> dict[str, bool]: + return { + "files": backend._files is not None, + "symbols": backend._symbols is not None, + "imports": backend._imports is not None, + "references": backend._references is not None, + "external_references": backend._external_references is not None, + "dependencies": backend._dependencies is not None, + "file_handles": backend._file_handles is not None, + "symbol_handles": backend._symbol_handles is not None, + "import_handles": backend._import_handles is not None, + "external_module_handles": backend._external_module_handles is not None, + } + + +def targeted_cache_materialization_report(backend: Any) -> dict[str, bool]: + report = large_cache_materialization_report(backend) + report["symbols_by_file"] = backend._symbols_by_file_id is not None + report["imports_by_file"] = backend._imports_by_file_id is not None + return report + + +def byte_range_cache_materialization_report(backend: Any) -> dict[str, bool]: + report = targeted_cache_materialization_report(backend) + report["exports"] = backend._exports is not None + report["export_handles"] = backend._export_handles is not None + report["exports_by_file"] = backend._exports_by_file_id is not None + return report + + +def make_report(args: argparse.Namespace) -> dict[str, Any]: + memory_samples = [memory_sample("start")] + repo, actual_commit = prepare_pinned_repo(args) + extension_path = None + if not args.skip_build_extension: + extension_path = build_rust_extension(args.extension_dir, timeout=args.timeout) + if str(args.extension_dir) not in sys.path: + sys.path.insert(0, str(args.extension_dir)) + + from graph_sitter.configs.models.codebase import CodebaseConfig, GraphBackend, RustFallbackMode + from graph_sitter.core.codebase import Codebase + + start = time.perf_counter() + config = CodebaseConfig(graph_backend=GraphBackend.RUST, rust_fallback=RustFallbackMode.ERROR) + codebase = Codebase(str(repo), language="python", config=config) + wall = time.perf_counter() - start + memory_samples.append(memory_sample("after_codebase_construct")) + + python_graph_blocked = False + try: + len(codebase.ctx.nodes) + except RuntimeError: + python_graph_blocked = True + memory_samples.append(memory_sample("after_python_graph_block_check")) + + backend = codebase.ctx.rust_index + assert backend is not None + summary = codebase.rust_index_summary + summary_counts = { + "files": summary.files, + "symbols": summary.symbols, + "classes": summary.classes, + "functions": summary.functions, + "global_variables": summary.global_variables, + "imports": summary.imports, + "import_resolutions": summary.import_resolutions, + "external_modules": summary.external_modules, + "references": summary.references, + "external_references": summary.external_references, + "dependencies": summary.dependencies, + "bytes": summary.bytes, + "lines": summary.lines, + "files_with_errors": summary.files_with_errors, + } + memory_samples.append(memory_sample("after_summary_counts")) + record_counts = backend.compact_record_counts() + memory_samples.append(memory_sample("after_record_counts")) + compat_counts = backend.compact_compat_counts() + memory_samples.append(memory_sample("after_compat_handles")) + known_global_lookups = known_global_lookup_report(codebase) + memory_samples.append(memory_sample("after_known_global_lookups")) + known_child_lookups = known_child_lookup_report(codebase) + memory_samples.append(memory_sample("after_known_child_lookups")) + known_file_local_lookups = known_file_local_lookup_report(codebase) + memory_samples.append(memory_sample("after_known_file_local_lookups")) + known_file_local_import_lookups = known_file_local_import_lookup_report(codebase) + memory_samples.append(memory_sample("after_known_file_local_import_lookups")) + known_file_local_name_resolution = known_file_local_name_resolution_report(codebase) + memory_samples.append(memory_sample("after_known_file_local_name_resolution")) + known_module_import_attribute_resolution = known_module_import_attribute_resolution_report(codebase) + memory_samples.append(memory_sample("after_known_module_import_attribute_resolution")) + known_ignore_case_file_lookups = known_ignore_case_file_lookup_report(codebase) + memory_samples.append(memory_sample("after_known_ignore_case_file_lookups")) + targeted_cache_materialization = targeted_cache_materialization_report(backend) + known_lookups = known_lookup_report(codebase) + memory_samples.append(memory_sample("after_known_lookups")) + byte_range_cache_materialization = byte_range_cache_materialization_report(backend) + known_dependencies = known_dependency_report(codebase) + memory_samples.append(memory_sample("after_known_dependencies")) + large_cache_materialization = large_cache_materialization_report(backend) + + totals = { + "wall_seconds": round(wall, 6), + "max_rss_mb": round(bytes_to_mb(max_rss_bytes()), 3), + "current_rss_mb": memory_samples[-1]["rss_mb"], + } + comparison = { + "recorded_python_wall_seconds": RECORDED_PYTHON_BASELINE["wall_seconds"], + "recorded_python_max_rss_mb": RECORDED_PYTHON_BASELINE["max_rss_mb"], + "recorded_python_to_rust_wall_ratio": ratio( + RECORDED_PYTHON_BASELINE["wall_seconds"], totals["wall_seconds"] + ), + "recorded_python_to_rust_rss_ratio": ratio( + RECORDED_PYTHON_BASELINE["max_rss_mb"], totals["max_rss_mb"] + ), + } + report = { + "metadata": { + "name": args.name, + "repo_url": args.repo_url, + "ref": args.ref, + "commit": actual_commit, + "checkout": str(repo), + "extension_path": str(extension_path) if extension_path else None, + "python_graph_blocked": python_graph_blocked, + }, + "totals": totals, + "rss_samples": memory_samples, + "summary": summary_counts, + "records": record_counts, + "compat_handles": compat_counts, + "known_global_lookups": known_global_lookups, + "known_child_lookups": known_child_lookups, + "known_file_local_lookups": known_file_local_lookups, + "known_file_local_import_lookups": known_file_local_import_lookups, + "known_file_local_name_resolution": known_file_local_name_resolution, + "known_module_import_attribute_resolution": known_module_import_attribute_resolution, + "known_ignore_case_file_lookups": known_ignore_case_file_lookups, + "targeted_cache_materialization": targeted_cache_materialization, + "known_lookups": known_lookups, + "byte_range_cache_materialization": byte_range_cache_materialization, + "known_dependencies": known_dependencies, + "large_cache_materialization": large_cache_materialization, + "comparison": comparison, + } + validate_report(report, args) + return report + + +def compare_counts(name: str, observed: dict[str, int], expected: dict[str, int], failures: list[str]) -> None: + for key, expected_value in expected.items(): + observed_value = observed.get(key) + if observed_value != expected_value: + failures.append(f"{name}.{key}: expected {expected_value}, got {observed_value}") + + +def validate_report(report: dict[str, Any], args: argparse.Namespace) -> None: + failures: list[str] = [] + if not report["metadata"]["python_graph_blocked"]: + failures.append("Python graph was materialized") + if not args.allow_count_drift: + compare_counts("summary", report["summary"], EXPECTED_SUMMARY, failures) + compare_counts("records", report["records"], EXPECTED_RECORDS, failures) + compare_counts("compat_handles", report["compat_handles"], EXPECTED_COMPAT_HANDLES, failures) + if report["known_global_lookups"] != EXPECTED_KNOWN_GLOBAL_LOOKUPS: + failures.append("known global lookup results drifted") + if report["known_child_lookups"] != EXPECTED_KNOWN_CHILD_LOOKUPS: + failures.append("known child lookup results drifted") + if report["known_file_local_lookups"] != EXPECTED_KNOWN_FILE_LOCAL_LOOKUPS: + failures.append("known file-local lookup results drifted") + if report["known_file_local_import_lookups"] != EXPECTED_KNOWN_FILE_LOCAL_IMPORT_LOOKUPS: + failures.append("known file-local import lookup results drifted") + if report["known_file_local_name_resolution"] != EXPECTED_KNOWN_FILE_LOCAL_NAME_RESOLUTION: + failures.append("known file-local name resolution results drifted") + if report["known_module_import_attribute_resolution"] != EXPECTED_KNOWN_MODULE_IMPORT_ATTRIBUTE_RESOLUTION: + failures.append("known module import attribute resolution results drifted") + if report["known_ignore_case_file_lookups"] != EXPECTED_KNOWN_IGNORE_CASE_FILE_LOOKUPS: + failures.append("known ignore-case file lookup results drifted") + if report["targeted_cache_materialization"] != EXPECTED_TARGETED_CACHE_MATERIALIZATION: + failures.append("targeted lookup caches were materialized before byte-range queries") + if report["known_lookups"] != EXPECTED_KNOWN_LOOKUPS: + failures.append("known byte-range lookup results drifted") + if report["byte_range_cache_materialization"] != EXPECTED_BYTE_RANGE_CACHE_MATERIALIZATION: + failures.append("byte-range lookup caches were materialized") + if report["known_dependencies"] != EXPECTED_KNOWN_DEPENDENCIES: + failures.append("known dependency results drifted") + if report["large_cache_materialization"] != EXPECTED_LARGE_CACHE_MATERIALIZATION: + failures.append("large Rust backend caches were materialized during known queries") + + totals = report["totals"] + comparison = report["comparison"] + if totals["wall_seconds"] > args.max_wall_seconds: + failures.append(f"wall time {totals['wall_seconds']}s exceeds allowed {args.max_wall_seconds}s") + if totals["max_rss_mb"] > args.max_rss_mb: + failures.append(f"max RSS {totals['max_rss_mb']} MB exceeds allowed {args.max_rss_mb} MB") + wall_ratio = comparison["recorded_python_to_rust_wall_ratio"] + rss_ratio = comparison["recorded_python_to_rust_rss_ratio"] + if wall_ratio is None or wall_ratio < args.min_recorded_wall_ratio: + failures.append( + f"recorded Python/Rust wall ratio {wall_ratio}x is below {args.min_recorded_wall_ratio}x" + ) + if rss_ratio is None or rss_ratio < args.min_recorded_rss_ratio: + failures.append( + f"recorded Python/Rust RSS ratio {rss_ratio}x is below {args.min_recorded_rss_ratio}x" + ) + + if failures: + raise RuntimeError("; ".join(failures)) + + +def print_human(report: dict[str, Any]) -> None: + metadata = report["metadata"] + totals = report["totals"] + summary = report["summary"] + records = report["records"] + compat = report["compat_handles"] + comparison = report["comparison"] + print(f"repo: {metadata['name']} {metadata['commit']}") + print(f"checkout: {metadata['checkout']}") + print(f"python graph blocked: {metadata['python_graph_blocked']}") + print( + f"rust Codebase: wall={totals['wall_seconds']:.3f}s " + f"max_rss={totals['max_rss_mb']:.1f} MB current_rss={totals['current_rss_mb']:.1f} MB" + ) + print( + "rss samples: " + + " -> ".join(f"{sample['label']}={sample['rss_mb']:.1f} MB" for sample in report["rss_samples"]) + ) + print( + "summary: " + f"files={summary['files']} symbols={summary['symbols']} imports={summary['imports']} " + f"import_resolutions={summary['import_resolutions']} external_modules={summary['external_modules']} " + f"references={summary['references']} external_references={summary['external_references']} " + f"dependencies={summary['dependencies']}" + ) + print( + "compat handles: " + f"files={compat['files']} symbols={compat['symbols']} classes={compat['classes']} " + f"functions={compat['functions']} global_vars={compat['global_vars']} imports={compat['imports']} " + f"external_modules={compat['external_modules']}" + ) + print( + "records: " + f"references={records['rust_references']} external_references={records['rust_external_references']} " + f"dependencies={records['rust_dependencies']}" + ) + print( + "recorded baseline ratios: " + f"wall={comparison['recorded_python_to_rust_wall_ratio']}x " + f"rss={comparison['recorded_python_to_rust_rss_ratio']}x" + ) + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description="Check pinned Airflow Rust Codebase construction, compatibility handles, byte-range lookups, and performance ceilings." + ) + parser.add_argument("--name", default=DEFAULT_REPO_NAME, help="Stable name for the pinned repository checkout.") + parser.add_argument("--repo-url", default=DEFAULT_REPO_URL, help="Git repository URL.") + parser.add_argument("--ref", default=DEFAULT_REF, help="Remote ref or commit to fetch.") + parser.add_argument("--expected-commit", default=DEFAULT_EXPECTED_COMMIT, help="Expected resolved commit SHA. Pass an empty string to disable.") + parser.add_argument("--cache-dir", type=Path, default=DEFAULT_CACHE_DIR, help="Directory for reusable pinned checkouts.") + parser.add_argument("--extension-dir", type=Path, default=DEFAULT_EXTENSION_DIR, help="Directory for the built PyO3 extension module.") + parser.add_argument("--reset-checkout", action="store_true", help="Delete and recreate the cached checkout before running.") + parser.add_argument("--skip-fetch", action="store_true", help="Do not fetch before checkout; useful for offline reruns with FETCH_HEAD present.") + parser.add_argument("--skip-build-extension", action="store_true", help="Reuse an existing graph_sitter_py extension in --extension-dir.") + parser.add_argument("--timeout", type=int, default=900, help="Timeout in seconds for clone/build/benchmark child commands.") + parser.add_argument("--allow-count-drift", action="store_true", help="Do not fail if compact record or compatibility-handle counts differ from the pinned expectations.") + parser.add_argument("--max-wall-seconds", type=float, default=10.0, help="Fail if Rust Codebase construction is slower than this ceiling.") + parser.add_argument("--max-rss-mb", type=float, default=700.0, help="Fail if process max RSS exceeds this ceiling.") + parser.add_argument("--min-recorded-wall-ratio", type=float, default=2.0, help="Fail unless the recorded Python baseline divided by Rust wall time is at least this value.") + parser.add_argument("--min-recorded-rss-ratio", type=float, default=5.0, help="Fail unless the recorded Python baseline divided by Rust max RSS is at least this value.") + parser.add_argument("--output", type=Path, help="Optional path to write JSON report.") + parser.add_argument("--json", action="store_true", help="Print JSON report instead of a human summary.") + return parser.parse_args() + + +def main() -> int: + args = parse_args() + if args.expected_commit == "": + args.expected_commit = None + report = make_report(args) + if args.output: + args.output.parent.mkdir(parents=True, exist_ok=True) + args.output.write_text(json.dumps(report, indent=2, sort_keys=True) + "\n", encoding="utf-8") + if args.json: + print(json.dumps(report, indent=2, sort_keys=True)) + else: + print_human(report) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/rust-rewrite/tools/check_pinned_semantic_parity.py b/rust-rewrite/tools/check_pinned_semantic_parity.py new file mode 100644 index 000000000..83188dc27 --- /dev/null +++ b/rust-rewrite/tools/check_pinned_semantic_parity.py @@ -0,0 +1,557 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +import argparse +import json +import os +import resource +import subprocess +import sys +import time +from pathlib import Path +from typing import Any + +TOOLS_DIR = Path(__file__).resolve().parent +REPO_ROOT = TOOLS_DIR.parents[1] +SRC_ROOT = REPO_ROOT / "src" +if str(TOOLS_DIR) not in sys.path: + sys.path.insert(0, str(TOOLS_DIR)) +if str(SRC_ROOT) not in sys.path: + sys.path.insert(0, str(SRC_ROOT)) + +import benchmark_pinned_python_repo as python_benchmark # noqa: E402 +import benchmark_pinned_typescript_repo as typescript_benchmark # noqa: E402 +from benchmark_pinned_python_repo import parse_json_output # noqa: E402 + +DEFAULT_CACHE_DIR = Path("/tmp/graph-sitter-pinned-repos") +DEFAULT_EXTENSION_DIR = Path("/tmp/graph_sitter_py_pinned_semantic_parity") + +AIRFLOW_INIT_FILE = "airflow/__init__.py" +AIRFLOW_MANAGER_FILE = "airflow/dag_processing/manager.py" +NEXTJS_ANNOUNCER_FILE = "packages/next/src/client/components/app-router-announcer.tsx" + + +def run( + command: list[str], + *, + cwd: Path, + env: dict[str, str] | None = None, + timeout: int | None = None, +) -> subprocess.CompletedProcess[str]: + result = subprocess.run( + command, + cwd=cwd, + env=env, + timeout=timeout, + check=False, + capture_output=True, + text=True, + ) + if result.returncode != 0: + command_text = " ".join(command) + msg = ( + f"command failed with exit code {result.returncode}: {command_text}\n" + f"stdout:\n{result.stdout}\n" + f"stderr:\n{result.stderr}" + ) + raise RuntimeError(msg) + return result + + +def bytes_to_mb(value: float) -> float: + return value / (1024 * 1024) + + +def ratio(numerator: float | int | None, denominator: float | int | None) -> float | None: + if numerator is None or denominator is None or denominator <= 0: + return None + return round(float(numerator) / float(denominator), 3) + + +def ratio_at_least(value: Any, minimum: float) -> bool: + return isinstance(value, int | float) and value >= minimum + + +def max_rss_bytes() -> int: + rss = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss + if sys.platform == "darwin": + return int(rss) + return int(rss * 1024) + + +def current_rss_bytes() -> int: + import psutil + + return int(psutil.Process(os.getpid()).memory_info().rss) + + +def memory_sample(label: str) -> dict[str, float | str]: + return { + "label": label, + "rss_mb": round(bytes_to_mb(current_rss_bytes()), 3), + "max_rss_mb": round(bytes_to_mb(max_rss_bytes()), 3), + } + + +def node_type_name(value: Any) -> str: + return str(getattr(value, "name", value)) + + +def node_signature(node: Any) -> dict[str, Any] | None: + if node is None: + return None + node_type = node_type_name(getattr(node, "node_type", type(node).__name__)) + signature = { + "filepath": getattr(node, "filepath", None), + "name": getattr(node, "name", None), + "node_type": node_type, + } + source = getattr(node, "source", None) + if node_type in {"IMPORT", "EXPORT", "EXTERNAL"} and source is not None: + signature["source"] = source + return signature + + +def file_signature(file: Any) -> dict[str, Any]: + return { + "filepath": file.filepath, + "name": file.name, + "node_type": node_type_name(getattr(file, "node_type", type(file).__name__)), + } + + +def import_signature(imp: Any) -> dict[str, Any]: + return { + "filepath": imp.filepath, + "from_file": None if imp.from_file is None else imp.from_file.filepath, + "name": imp.name, + "resolved_symbol": node_signature(imp.resolved_symbol), + "source": imp.source, + } + + +def find_import(file: Any, lookup: str) -> Any: + import_handle = file.get_import(lookup) + if import_handle is not None: + return import_handle + for candidate in file.imports: + if candidate.source == lookup or candidate.name == lookup: + return candidate + msg = f"could not find import {lookup!r} in {file.filepath}" + raise RuntimeError(msg) + + +def export_signature(export: Any) -> dict[str, Any]: + return { + "declared_symbol": node_signature(export.declared_symbol), + "exported_symbol": node_signature(export.exported_symbol), + "filepath": export.filepath, + "is_default": export.is_default_export(), + "is_reexport": export.is_reexport(), + "name": export.name, + "resolved_symbol": node_signature(export.resolved_symbol), + } + + +def dependency_signature(handle: Any) -> dict[str, Any] | None: + if node_type_name(getattr(handle, "node_type", None)) in {"IMPORT", "EXPORT"}: + resolved = getattr(handle, "resolved_symbol", None) + if resolved is not None: + signature = node_signature(handle) + assert signature is not None + signature["resolved_symbol"] = node_signature(resolved) + return signature + return node_signature(handle) + + +def sort_key(item: dict[str, Any] | None) -> tuple[str, str, str, str]: + if item is None: + return ("", "", "", "") + return ( + item.get("filepath") or "", + item.get("node_type") or "", + item.get("name") or "", + item.get("source") or "", + ) + + +def sorted_signatures(items: list[dict[str, Any] | None]) -> list[dict[str, Any] | None]: + return sorted(items, key=sort_key) + + +def unique_sorted_signatures(items: list[dict[str, Any] | None]) -> list[dict[str, Any] | None]: + seen: set[str] = set() + unique = [] + for item in items: + key = json.dumps(item, sort_keys=True) + if key in seen: + continue + seen.add(key) + unique.append(item) + return sorted_signatures(unique) + + +def max_sample_rss_mb(report: dict[str, Any]) -> float | None: + samples = report.get("rss_samples", []) + if not isinstance(samples, list) or not samples: + return None + values = [ + float(sample["max_rss_mb"]) + for sample in samples + if isinstance(sample, dict) and "max_rss_mb" in sample + ] + return max(values) if values else None + + +def graph_is_blocked(codebase: Any) -> bool: + try: + len(codebase.ctx.nodes) + except RuntimeError: + return True + return False + + +def build_codebase(repo: Path, *, backend: str, language: str, extension_dir: Path) -> Any: + if backend == "rust" and str(extension_dir) not in sys.path: + sys.path.insert(0, str(extension_dir)) + + from graph_sitter.configs.models.codebase import CodebaseConfig, GraphBackend, RustFallbackMode + from graph_sitter.core.codebase import Codebase + + graph_backend = GraphBackend.PYTHON if backend == "python" else GraphBackend.RUST + config = CodebaseConfig(graph_backend=graph_backend, rust_fallback=RustFallbackMode.ERROR) + return Codebase(str(repo), language=language, config=config) + + +def collect_airflow_report(repo: Path, *, backend: str, extension_dir: Path) -> dict[str, Any]: + memory_samples = [memory_sample("start")] + start = time.perf_counter() + codebase = build_codebase(repo, backend=backend, language="python", extension_dir=extension_dir) + wall_seconds = time.perf_counter() - start + memory_samples.append(memory_sample("after_codebase_construct")) + + python_graph_blocked = graph_is_blocked(codebase) + memory_samples.append(memory_sample("after_graph_block_check")) + + init_file = codebase.get_file(AIRFLOW_INIT_FILE) + manager_file = codebase.get_file(AIRFLOW_MANAGER_FILE) + getattr_function = init_file.get_function("__getattr__") + provider_validator = codebase.get_function("_create_provider_info_schema_validator") + airflow_models_import = manager_file.get_import("airflow.models") + + report = { + "backend": backend, + "python_graph_blocked": python_graph_blocked, + "timings": {"codebase_construct_wall_seconds": round(wall_seconds, 6)}, + "rss_samples": memory_samples, + "known_files": { + "airflow_init": file_signature(init_file), + "dag_processing_manager": file_signature(manager_file), + }, + "global_function": node_signature(provider_validator), + "airflow_init_import_os": import_signature(find_import(init_file, "import os")), + "airflow_init_resolve_getattr": sorted_signatures( + [node_signature(node) for node in init_file.resolve_name("__getattr__")] + ), + "airflow_init_resolve_os": node_signature(init_file.resolve_attribute("os")), + "airflow_init_get_node_os": node_signature(init_file.get_node_by_name("os")), + "module_import_attribute_resolution": node_signature( + airflow_models_import.resolve_attribute("DagModel") + ), + "getattr_dependencies": sorted_signatures( + [dependency_signature(handle) for handle in getattr_function.dependencies] + ), + } + if backend == "rust" and not python_graph_blocked: + msg = "expected Rust Airflow semantic parity run to keep Python graph blocked" + raise RuntimeError(msg) + return report + + +def collect_nextjs_report(repo: Path, *, backend: str, extension_dir: Path) -> dict[str, Any]: + memory_samples = [memory_sample("start")] + start = time.perf_counter() + codebase = build_codebase(repo, backend=backend, language="typescript", extension_dir=extension_dir) + wall_seconds = time.perf_counter() - start + memory_samples.append(memory_sample("after_codebase_construct")) + + python_graph_blocked = graph_is_blocked(codebase) + memory_samples.append(memory_sample("after_graph_block_check")) + + announcer_file = codebase.get_file(NEXTJS_ANNOUNCER_FILE) + announcer = codebase.get_function("AppRouterAnnouncer") + announcer_export = announcer_file.get_export("AppRouterAnnouncer") + + report = { + "backend": backend, + "python_graph_blocked": python_graph_blocked, + "timings": {"codebase_construct_wall_seconds": round(wall_seconds, 6)}, + "rss_samples": memory_samples, + "announcer_file": file_signature(announcer_file), + "announcer_function": node_signature(announcer), + "announcer_export": export_signature(announcer_export), + "announcer_imports": sorted( + (import_signature(imp) for imp in announcer_file.imports), + key=lambda item: (item["source"], item["name"] or ""), + ), + "announcer_dependencies": unique_sorted_signatures( + [dependency_signature(handle) for handle in announcer.dependencies] + ), + "announcer_import_dependencies": unique_sorted_signatures( + [ + dependency_signature(handle) + for handle in announcer.dependencies + if node_type_name(getattr(handle, "node_type", None)) == "IMPORT" + ] + ), + "announcer_symbol_usages": unique_sorted_signatures( + [node_signature(handle) for handle in announcer.symbol_usages] + ), + } + if backend == "rust" and not python_graph_blocked: + msg = "expected Rust Next.js semantic parity run to keep Python graph blocked" + raise RuntimeError(msg) + return report + + +def collect_report(args: argparse.Namespace) -> dict[str, Any]: + repo = Path(args.repo_path) + if args.suite == "python": + return collect_airflow_report(repo, backend=args.backend, extension_dir=args.extension_dir) + if args.suite == "typescript": + return collect_nextjs_report(repo, backend=args.backend, extension_dir=args.extension_dir) + msg = f"unsupported collect suite: {args.suite}" + raise ValueError(msg) + + +def prepare_repo( + *, + args: argparse.Namespace, + suite: str, +) -> tuple[Path, str]: + if suite == "python": + repo_args = argparse.Namespace( + name=python_benchmark.DEFAULT_REPO_NAME, + repo_url=python_benchmark.DEFAULT_REPO_URL, + ref=python_benchmark.DEFAULT_REF, + expected_commit=python_benchmark.DEFAULT_EXPECTED_COMMIT, + cache_dir=args.cache_dir, + reset_checkout=args.reset_checkout, + skip_fetch=args.skip_fetch, + timeout=args.timeout, + ) + return python_benchmark.prepare_pinned_repo(repo_args) + repo_args = argparse.Namespace( + name=typescript_benchmark.DEFAULT_REPO_NAME, + repo_url=typescript_benchmark.DEFAULT_REPO_URL, + ref=typescript_benchmark.DEFAULT_REF, + expected_commit=typescript_benchmark.DEFAULT_EXPECTED_COMMIT, + cache_dir=args.cache_dir, + reset_checkout=args.reset_checkout, + skip_fetch=args.skip_fetch, + timeout=args.timeout, + ) + return typescript_benchmark.prepare_pinned_repo(repo_args) + + +def collect_backend_report( + *, + suite: str, + backend: str, + repo: Path, + args: argparse.Namespace, +) -> dict[str, Any]: + command = [ + sys.executable, + str(Path(__file__).resolve()), + "--collect", + "--suite", + suite, + "--backend", + backend, + "--repo-path", + str(repo), + "--extension-dir", + str(args.extension_dir), + "--json", + ] + result = run(command, cwd=REPO_ROOT, timeout=args.timeout) + return parse_json_output(result.stdout) + + +def compare_suite(python_report: dict[str, Any], rust_report: dict[str, Any], *, suite: str) -> dict[str, Any]: + if suite == "python": + exact_keys = [ + "known_files", + "global_function", + "airflow_init_import_os", + "airflow_init_resolve_getattr", + "airflow_init_resolve_os", + "airflow_init_get_node_os", + "getattr_dependencies", + ] + known_delta_keys = ["module_import_attribute_resolution"] + expected_known_deltas = { + "module_import_attribute_resolution": { + "python": None, + "rust": { + "filepath": "airflow/models/__init__.py", + "name": "DagModel", + "node_type": "IMPORT", + "source": "from airflow.models.dag import DAG, DagModel, DagTag", + }, + } + } + else: + exact_keys = [ + "announcer_file", + "announcer_function", + "announcer_export", + "announcer_imports", + "announcer_dependencies", + "announcer_import_dependencies", + "announcer_symbol_usages", + ] + known_delta_keys = [] + expected_known_deltas = {} + mismatches = [key for key in exact_keys if python_report.get(key) != rust_report.get(key)] + known_deltas = { + key: { + "python": python_report.get(key), + "rust": rust_report.get(key), + } + for key in known_delta_keys + if python_report.get(key) != rust_report.get(key) + } + if known_deltas != expected_known_deltas: + mismatches.append("known_deltas") + python_timing = python_report.get("timings", {}).get("codebase_construct_wall_seconds") + rust_timing = rust_report.get("timings", {}).get("codebase_construct_wall_seconds") + performance = { + "wall_ratio": ratio(python_timing, rust_timing), + "rss_ratio": ratio(max_sample_rss_mb(python_report), max_sample_rss_mb(rust_report)), + } + return { + "exact_keys": exact_keys, + "expected_known_deltas": expected_known_deltas, + "known_deltas": known_deltas, + "mismatches": mismatches, + "performance": performance, + } + + +def run_suite(args: argparse.Namespace, suite: str) -> dict[str, Any]: + repo, commit = prepare_repo(args=args, suite=suite) + python_report = collect_backend_report(suite=suite, backend="python", repo=repo, args=args) + rust_report = collect_backend_report(suite=suite, backend="rust", repo=repo, args=args) + comparison = compare_suite(python_report, rust_report, suite=suite) + if comparison["mismatches"]: + msg = f"{suite} pinned semantic parity mismatches: " + ", ".join(comparison["mismatches"]) + raise RuntimeError(msg) + performance = comparison["performance"] + failures = [] + if not ratio_at_least(performance["wall_ratio"], args.min_wall_ratio): + failures.append( + f"wall ratio {performance['wall_ratio']}x is below {args.min_wall_ratio}x" + ) + if not ratio_at_least(performance["rss_ratio"], args.min_rss_ratio): + failures.append(f"RSS ratio {performance['rss_ratio']}x is below {args.min_rss_ratio}x") + if failures: + msg = f"{suite} pinned semantic parity performance failed: " + "; ".join(failures) + raise RuntimeError(msg) + return { + "suite": suite, + "metadata": { + "repo": str(repo), + "commit": commit, + }, + "python": python_report, + "rust": rust_report, + "comparison": comparison, + } + + +def make_report(args: argparse.Namespace) -> dict[str, Any]: + extension_path = None + if not args.skip_build_extension: + extension_path = python_benchmark.build_rust_extension(args.extension_dir, timeout=args.timeout) + if str(args.extension_dir) not in sys.path: + sys.path.insert(0, str(args.extension_dir)) + + suites = [] + if args.suite in {"all", "python"}: + suites.append(run_suite(args, "python")) + if args.suite in {"all", "typescript"}: + suites.append(run_suite(args, "typescript")) + + return { + "metadata": { + "suite": args.suite, + "extension_path": str(extension_path) if extension_path else None, + "extension_dir": str(args.extension_dir), + "cache_dir": str(args.cache_dir), + }, + "suites": suites, + } + + +def print_human(report: dict[str, Any]) -> None: + print(f"suite: {report['metadata']['suite']}") + print(f"extension_dir: {report['metadata']['extension_dir']}") + for suite in report["suites"]: + python_timing = suite["python"]["timings"]["codebase_construct_wall_seconds"] + rust_timing = suite["rust"]["timings"]["codebase_construct_wall_seconds"] + python_max_rss = max(float(sample["max_rss_mb"]) for sample in suite["python"]["rss_samples"]) + rust_max_rss = max(float(sample["max_rss_mb"]) for sample in suite["rust"]["rss_samples"]) + print( + f"{suite['suite']}: exact={', '.join(suite['comparison']['exact_keys'])} " + f"known_deltas={len(suite['comparison']['known_deltas'])} " + f"python={python_timing:.3f}s/{python_max_rss:.1f} MB " + f"rust={rust_timing:.3f}s/{rust_max_rss:.1f} MB " + f"ratios={suite['comparison']['performance']['wall_ratio']}x/" + f"{suite['comparison']['performance']['rss_ratio']}x" + ) + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description="Compare selected pinned Airflow and Next.js graph semantics between Python and compact Rust backends." + ) + parser.add_argument("--suite", choices=["all", "python", "typescript"], default="all") + parser.add_argument("--cache-dir", type=Path, default=DEFAULT_CACHE_DIR, help="Directory for reusable pinned checkouts.") + parser.add_argument("--extension-dir", type=Path, default=DEFAULT_EXTENSION_DIR, help="Directory for the built PyO3 extension module.") + parser.add_argument("--reset-checkout", action="store_true", help="Delete and recreate cached pinned checkouts before running.") + parser.add_argument("--skip-fetch", action="store_true", help="Do not fetch before checkout; useful for offline reruns with FETCH_HEAD present.") + parser.add_argument("--skip-build-extension", action="store_true", help="Reuse an existing graph_sitter_py extension in --extension-dir.") + parser.add_argument("--min-wall-ratio", type=float, default=1.0, help="Fail unless Python wall time divided by Rust wall time is at least this value.") + parser.add_argument("--min-rss-ratio", type=float, default=1.0, help="Fail unless Python max RSS divided by Rust max RSS is at least this value.") + parser.add_argument("--timeout", type=int, default=900, help="Timeout in seconds for clone/build/check child commands.") + parser.add_argument("--output", type=Path, help="Optional path to write JSON report.") + parser.add_argument("--json", action="store_true", help="Print JSON report instead of a human summary.") + parser.add_argument("--collect", action="store_true", help=argparse.SUPPRESS) + parser.add_argument("--backend", choices=["python", "rust"], help=argparse.SUPPRESS) + parser.add_argument("--repo-path", type=Path, help=argparse.SUPPRESS) + return parser.parse_args() + + +def main() -> int: + args = parse_args() + if args.collect: + if args.backend is None or args.repo_path is None: + msg = "--collect requires --backend and --repo-path" + raise ValueError(msg) + report = collect_report(args) + else: + report = make_report(args) + if args.output: + args.output.parent.mkdir(parents=True, exist_ok=True) + args.output.write_text(json.dumps(report, indent=2, sort_keys=True) + "\n", encoding="utf-8") + if args.json: + print(json.dumps(report, indent=2, sort_keys=True)) + else: + print_human(report) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/rust-rewrite/tools/check_pinned_typescript_codebase.py b/rust-rewrite/tools/check_pinned_typescript_codebase.py new file mode 100644 index 000000000..3a1de8171 --- /dev/null +++ b/rust-rewrite/tools/check_pinned_typescript_codebase.py @@ -0,0 +1,573 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +import argparse +import json +import os +import resource +import sys +import time +from pathlib import Path +from typing import Any + +TOOLS_DIR = Path(__file__).resolve().parent +REPO_ROOT = TOOLS_DIR.parents[1] +SRC_ROOT = REPO_ROOT / "src" +if str(TOOLS_DIR) not in sys.path: + sys.path.insert(0, str(TOOLS_DIR)) +if str(SRC_ROOT) not in sys.path: + sys.path.insert(0, str(SRC_ROOT)) + +from benchmark_pinned_typescript_repo import ( # noqa: E402 + DEFAULT_CACHE_DIR, + DEFAULT_EXPECTED_COMMIT, + DEFAULT_EXTENSION_DIR, + DEFAULT_REF, + DEFAULT_REPO_NAME, + DEFAULT_REPO_URL, + build_rust_extension, + prepare_pinned_repo, + ratio, +) + +EXPECTED_SUMMARY = { + "files": 13688, + "symbols": 44871, + "classes": 502, + "functions": 13497, + "global_variables": 28742, + "imports": 28210, + "import_resolutions": 13462, + "external_modules": 13525, + "references": 114464, + "dependencies": 49287, + "bytes": 25421217, + "lines": 634891, + "files_with_errors": 113, +} + +EXPECTED_RECORDS = { + "rust_files": 13688, + "rust_symbols": 44871, + "rust_classes": 502, + "rust_functions": 13497, + "rust_global_vars": 28742, + "rust_imports": 28210, + "rust_import_resolutions": 13462, + "rust_external_modules": 13525, + "rust_exports": 16027, + "rust_references": 114464, + "rust_external_references": 25317, + "rust_function_calls": 197581, + "rust_promise_chains": 878, + "rust_dependencies": 49287, + "rust_subclass_edges": 160, +} + +EXPECTED_COMPAT_HANDLES = { + "files": 13688, + "symbols": 23981, + "classes": 502, + "functions": 13497, + "global_vars": 7867, + "interfaces": 516, + "types": 1570, + "imports": 28210, + "external_modules": 13525, + "exports": 16027, +} + +EXPECTED_KNOWN_GLOBAL_LOOKUPS = { + "app_router_announcer": { + "filepath": "packages/next/src/client/components/app-router-announcer.tsx", + "handle": "RustCompactSymbol", + "kind": "function", + "name": "AppRouterAnnouncer", + } +} + +EXPECTED_KNOWN_FILE_LOCAL_EXPORT_LOOKUPS = { + "app_router_announcer_export": { + "filepath": "packages/next/src/client/components/app-router-announcer.tsx", + "handle": "RustCompactExport", + "kind": "named", + "name": "AppRouterAnnouncer", + } +} + +EXPECTED_KNOWN_IGNORE_CASE_FILE_LOOKUPS = { + "app_router_announcer": { + "filepath": "packages/next/src/client/components/app-router-announcer.tsx", + "handle": "RustCompactFile", + "name": "app-router-announcer", + } +} + +EXPECTED_KNOWN_FILE_LOCAL_CALL_LOOKUPS = { + "next_lint_file": { + "filepath": "packages/next/src/cli/next-lint.ts", + "function_call_count": 27, + "first_function_call_names": [ + "getProjectDir", + "existsSync", + "printAndExit", + "loadConfig", + "reduce", + "isAbsolute", + "join", + "existsSync", + ], + "promise_chain_count": 1, + "promise_chain_base_lines": ["runLintCheck(baseDir, pathsToLint, {"], + "promise_chain_stage_names": [["then", "catch"]], + "promise_chain_has_catch": [True], + "promise_chain_has_finally": [False], + }, + "next_lint_symbol": { + "filepath": "packages/next/src/cli/next-lint.ts", + "handle": "RustCompactSymbol", + "kind": "function", + "name": "nextLint", + "function_call_count": 16, + "first_function_call_names": [ + "existsSync", + "printAndExit", + "verifyTypeScriptSetup", + "filter", + "catch", + "then", + "runLintCheck", + "eslintOptions", + "record", + "eventLintCheckCompleted", + "flush", + "printAndExit", + ], + "promise_chain_count": 1, + "promise_chain_base_lines": ["runLintCheck(baseDir, pathsToLint, {"], + "promise_chain_stage_names": [["then", "catch"]], + "promise_chain_has_catch": [True], + "promise_chain_has_finally": [False], + }, +} + +EXPECTED_TARGETED_CACHE_MATERIALIZATION = { + "files": False, + "symbols": False, + "imports": False, + "exports": False, + "references": False, + "external_references": False, + "function_calls": False, + "promise_chains": False, + "dependencies": False, + "file_handles": False, + "symbol_handles": False, + "import_handles": False, + "export_handles": False, + "function_call_handles": False, + "promise_chain_handles": False, + "function_call_handles_by_id": True, + "promise_chain_handles_by_id": True, + "exports_by_file": False, + "function_calls_by_file": True, + "promise_chains_by_file": True, + "function_calls_by_symbol": True, + "promise_chains_by_symbol": True, +} + +EXPECTED_LARGE_CACHE_MATERIALIZATION = { + "files": False, + "symbols": False, + "imports": False, + "exports": False, + "references": False, + "external_references": False, + "function_calls": False, + "promise_chains": False, + "dependencies": False, +} + +RECORDED_PYTHON_BASELINE = { + "wall_seconds": 24.959, + "max_rss_mb": 3100.1, +} + + +def bytes_to_mb(value: float) -> float: + return value / (1024 * 1024) + + +def max_rss_bytes() -> int: + rss = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss + if sys.platform == "darwin": + return int(rss) + return int(rss * 1024) + + +def current_rss_bytes() -> int: + import psutil + + return int(psutil.Process(os.getpid()).memory_info().rss) + + +def memory_sample(label: str) -> dict[str, float | str]: + return { + "label": label, + "rss_mb": round(bytes_to_mb(current_rss_bytes()), 3), + "max_rss_mb": round(bytes_to_mb(max_rss_bytes()), 3), + } + + +def handle_signature(handle: Any) -> dict[str, Any]: + signature = { + "handle": type(handle).__name__, + "name": handle.name, + } + record = getattr(handle, "record", None) + if record is not None and hasattr(record, "kind"): + signature["kind"] = record.kind + return signature + + +def file_signature(file: Any) -> dict[str, Any]: + return { + "filepath": file.filepath, + "handle": type(file).__name__, + "name": file.name, + } + + +def known_global_lookup_report(codebase: Any) -> dict[str, dict[str, Any]]: + function = codebase.get_function("AppRouterAnnouncer") + signature = handle_signature(function) + signature["filepath"] = function.filepath + return { + "app_router_announcer": signature, + } + + +def known_file_local_export_lookup_report(codebase: Any) -> dict[str, dict[str, Any]]: + export = codebase.get_file( + "packages/next/src/client/components/app-router-announcer.tsx" + ).get_export("AppRouterAnnouncer") + signature = handle_signature(export) + signature["filepath"] = export.filepath + return { + "app_router_announcer_export": signature, + } + + +def known_ignore_case_file_lookup_report(codebase: Any) -> dict[str, dict[str, Any]]: + return { + "app_router_announcer": file_signature( + codebase.get_file( + "PACKAGES/NEXT/SRC/CLIENT/COMPONENTS/APP-ROUTER-ANNOUNCER.TSX", + ignore_case=True, + ) + ), + } + + +def promise_chain_signatures(chains: list[Any]) -> dict[str, Any]: + return { + "promise_chain_count": len(chains), + "promise_chain_base_lines": [chain.base_source.splitlines()[0] for chain in chains], + "promise_chain_stage_names": [chain.stage_names for chain in chains], + "promise_chain_has_catch": [chain.has_catch_call for chain in chains], + "promise_chain_has_finally": [chain.has_finally_call for chain in chains], + } + + +def known_file_local_call_lookup_report(codebase: Any) -> dict[str, dict[str, Any]]: + file = codebase.get_file("packages/next/src/cli/next-lint.ts") + file_calls = file.function_calls + file_chains = file.promise_chains + symbol = file.get_function("nextLint") + symbol_calls = symbol.function_calls + symbol_chains = symbol.promise_chains + symbol_signature = handle_signature(symbol) + symbol_signature["filepath"] = symbol.filepath + return { + "next_lint_file": { + "filepath": file.filepath, + "function_call_count": len(file_calls), + "first_function_call_names": [call.name for call in file_calls[:8]], + **promise_chain_signatures(file_chains), + }, + "next_lint_symbol": { + **symbol_signature, + "function_call_count": len(symbol_calls), + "first_function_call_names": [call.name for call in symbol_calls[:12]], + **promise_chain_signatures(symbol_chains), + }, + } + + +def large_cache_materialization_report(backend: Any) -> dict[str, bool]: + return { + "files": backend._files is not None, + "symbols": backend._symbols is not None, + "imports": backend._imports is not None, + "exports": backend._exports is not None, + "references": backend._references is not None, + "external_references": backend._external_references is not None, + "function_calls": backend._function_calls is not None, + "promise_chains": backend._promise_chains is not None, + "dependencies": backend._dependencies is not None, + } + + +def targeted_cache_materialization_report(backend: Any) -> dict[str, bool]: + report = large_cache_materialization_report(backend) + report["file_handles"] = backend._file_handles is not None + report["symbol_handles"] = backend._symbol_handles is not None + report["import_handles"] = backend._import_handles is not None + report["export_handles"] = backend._export_handles is not None + report["function_call_handles"] = backend._function_call_handles is not None + report["promise_chain_handles"] = backend._promise_chain_handles is not None + report["function_call_handles_by_id"] = backend._function_call_handles_by_id is not None + report["promise_chain_handles_by_id"] = backend._promise_chain_handles_by_id is not None + report["exports_by_file"] = backend._exports_by_file_id is not None + report["function_calls_by_file"] = backend._function_calls_by_file_id is not None + report["promise_chains_by_file"] = backend._promise_chains_by_file_id is not None + report["function_calls_by_symbol"] = backend._function_calls_by_source_symbol_id is not None + report["promise_chains_by_symbol"] = backend._promise_chains_by_source_symbol_id is not None + return report + + +def make_report(args: argparse.Namespace) -> dict[str, Any]: + memory_samples = [memory_sample("start")] + repo, actual_commit = prepare_pinned_repo(args) + extension_path = None + if not args.skip_build_extension: + extension_path = build_rust_extension(args.extension_dir, timeout=args.timeout) + if str(args.extension_dir) not in sys.path: + sys.path.insert(0, str(args.extension_dir)) + + from graph_sitter.configs.models.codebase import CodebaseConfig, GraphBackend, RustFallbackMode + from graph_sitter.core.codebase import Codebase + + start = time.perf_counter() + config = CodebaseConfig(graph_backend=GraphBackend.RUST, rust_fallback=RustFallbackMode.ERROR) + codebase = Codebase(str(repo), language="typescript", config=config) + wall = time.perf_counter() - start + memory_samples.append(memory_sample("after_codebase_construct")) + + python_graph_blocked = False + try: + len(codebase.ctx.nodes) + except RuntimeError: + python_graph_blocked = True + memory_samples.append(memory_sample("after_python_graph_block_check")) + + backend = codebase.ctx.rust_index + assert backend is not None + summary = codebase.rust_index_summary + summary_counts = { + "files": summary.files, + "symbols": summary.symbols, + "classes": summary.classes, + "functions": summary.functions, + "global_variables": summary.global_variables, + "imports": summary.imports, + "import_resolutions": summary.import_resolutions, + "external_modules": summary.external_modules, + "references": summary.references, + "dependencies": summary.dependencies, + "bytes": summary.bytes, + "lines": summary.lines, + "files_with_errors": summary.files_with_errors, + } + memory_samples.append(memory_sample("after_summary_counts")) + record_counts = backend.compact_record_counts() + memory_samples.append(memory_sample("after_record_counts")) + compat_counts = backend.compact_compat_counts() + memory_samples.append(memory_sample("after_compat_handles")) + known_global_lookups = known_global_lookup_report(codebase) + memory_samples.append(memory_sample("after_known_global_lookups")) + known_file_local_export_lookups = known_file_local_export_lookup_report(codebase) + memory_samples.append(memory_sample("after_known_file_local_export_lookups")) + known_ignore_case_file_lookups = known_ignore_case_file_lookup_report(codebase) + memory_samples.append(memory_sample("after_known_ignore_case_file_lookups")) + known_file_local_call_lookups = known_file_local_call_lookup_report(codebase) + memory_samples.append(memory_sample("after_known_file_local_call_lookups")) + targeted_cache_materialization = targeted_cache_materialization_report(backend) + large_cache_materialization = large_cache_materialization_report(backend) + + totals = { + "wall_seconds": round(wall, 6), + "max_rss_mb": round(bytes_to_mb(max_rss_bytes()), 3), + "current_rss_mb": memory_samples[-1]["rss_mb"], + } + comparison = { + "recorded_python_wall_seconds": RECORDED_PYTHON_BASELINE["wall_seconds"], + "recorded_python_max_rss_mb": RECORDED_PYTHON_BASELINE["max_rss_mb"], + "recorded_python_to_rust_wall_ratio": ratio( + RECORDED_PYTHON_BASELINE["wall_seconds"], totals["wall_seconds"] + ), + "recorded_python_to_rust_rss_ratio": ratio( + RECORDED_PYTHON_BASELINE["max_rss_mb"], totals["max_rss_mb"] + ), + } + report = { + "metadata": { + "name": args.name, + "repo_url": args.repo_url, + "ref": args.ref, + "commit": actual_commit, + "checkout": str(repo), + "extension_path": str(extension_path) if extension_path else None, + "python_graph_blocked": python_graph_blocked, + }, + "totals": totals, + "rss_samples": memory_samples, + "summary": summary_counts, + "records": record_counts, + "compat_handles": compat_counts, + "known_global_lookups": known_global_lookups, + "known_file_local_export_lookups": known_file_local_export_lookups, + "known_ignore_case_file_lookups": known_ignore_case_file_lookups, + "known_file_local_call_lookups": known_file_local_call_lookups, + "targeted_cache_materialization": targeted_cache_materialization, + "large_cache_materialization": large_cache_materialization, + "comparison": comparison, + } + validate_report(report, args) + return report + + +def compare_counts(name: str, observed: dict[str, int], expected: dict[str, int], failures: list[str]) -> None: + for key, expected_value in expected.items(): + observed_value = observed.get(key) + if observed_value != expected_value: + failures.append(f"{name}.{key}: expected {expected_value}, got {observed_value}") + + +def validate_report(report: dict[str, Any], args: argparse.Namespace) -> None: + failures: list[str] = [] + if not report["metadata"]["python_graph_blocked"]: + failures.append("Python graph was materialized") + if not args.allow_count_drift: + compare_counts("summary", report["summary"], EXPECTED_SUMMARY, failures) + compare_counts("records", report["records"], EXPECTED_RECORDS, failures) + compare_counts("compat_handles", report["compat_handles"], EXPECTED_COMPAT_HANDLES, failures) + if report["known_global_lookups"] != EXPECTED_KNOWN_GLOBAL_LOOKUPS: + failures.append("known global lookup results drifted") + if report["known_file_local_export_lookups"] != EXPECTED_KNOWN_FILE_LOCAL_EXPORT_LOOKUPS: + failures.append("known file-local export lookup results drifted") + if report["known_ignore_case_file_lookups"] != EXPECTED_KNOWN_IGNORE_CASE_FILE_LOOKUPS: + failures.append("known ignore-case file lookup results drifted") + if report["known_file_local_call_lookups"] != EXPECTED_KNOWN_FILE_LOCAL_CALL_LOOKUPS: + failures.append("known file-local call lookup results drifted") + if report["targeted_cache_materialization"] != EXPECTED_TARGETED_CACHE_MATERIALIZATION: + failures.append("targeted lookup caches were materialized during known queries") + if report["large_cache_materialization"] != EXPECTED_LARGE_CACHE_MATERIALIZATION: + failures.append("large Rust backend caches were materialized during known queries") + + totals = report["totals"] + comparison = report["comparison"] + if totals["wall_seconds"] > args.max_wall_seconds: + failures.append( + f"wall time {totals['wall_seconds']}s exceeds allowed {args.max_wall_seconds}s" + ) + if totals["max_rss_mb"] > args.max_rss_mb: + failures.append( + f"max RSS {totals['max_rss_mb']} MB exceeds allowed {args.max_rss_mb} MB" + ) + wall_ratio = comparison["recorded_python_to_rust_wall_ratio"] + rss_ratio = comparison["recorded_python_to_rust_rss_ratio"] + if wall_ratio is None or wall_ratio < args.min_recorded_wall_ratio: + failures.append( + f"recorded Python/Rust wall ratio {wall_ratio}x is below {args.min_recorded_wall_ratio}x" + ) + if rss_ratio is None or rss_ratio < args.min_recorded_rss_ratio: + failures.append( + f"recorded Python/Rust RSS ratio {rss_ratio}x is below {args.min_recorded_rss_ratio}x" + ) + + if failures: + raise RuntimeError("; ".join(failures)) + + +def print_human(report: dict[str, Any]) -> None: + metadata = report["metadata"] + totals = report["totals"] + summary = report["summary"] + compat = report["compat_handles"] + comparison = report["comparison"] + print(f"repo: {metadata['name']} {metadata['commit']}") + print(f"checkout: {metadata['checkout']}") + print(f"python graph blocked: {metadata['python_graph_blocked']}") + print( + f"rust Codebase: wall={totals['wall_seconds']:.3f}s " + f"max_rss={totals['max_rss_mb']:.1f} MB current_rss={totals['current_rss_mb']:.1f} MB" + ) + print( + "rss samples: " + + " -> ".join(f"{sample['label']}={sample['rss_mb']:.1f} MB" for sample in report["rss_samples"]) + ) + print( + "summary: " + f"files={summary['files']} symbols={summary['symbols']} imports={summary['imports']} " + f"external_modules={summary['external_modules']} exports={compat['exports']} " + f"references={summary['references']} dependencies={summary['dependencies']} " + f"external_references={report['records']['rust_external_references']} " + f"function_calls={report['records']['rust_function_calls']} " + f"promise_chains={report['records']['rust_promise_chains']} " + f"subclass_edges={report['records']['rust_subclass_edges']}" + ) + print( + "compat handles: " + f"files={compat['files']} symbols={compat['symbols']} interfaces={compat['interfaces']} " + f"types={compat['types']} imports={compat['imports']} exports={compat['exports']}" + f" external_modules={compat['external_modules']}" + ) + print( + "recorded baseline ratios: " + f"wall={comparison['recorded_python_to_rust_wall_ratio']}x " + f"rss={comparison['recorded_python_to_rust_rss_ratio']}x" + ) + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description="Check pinned Next.js Rust Codebase construction, compatibility handles, and performance ceilings." + ) + parser.add_argument("--name", default=DEFAULT_REPO_NAME, help="Stable name for the pinned repository checkout.") + parser.add_argument("--repo-url", default=DEFAULT_REPO_URL, help="Git repository URL.") + parser.add_argument("--ref", default=DEFAULT_REF, help="Remote ref or commit to fetch.") + parser.add_argument("--expected-commit", default=DEFAULT_EXPECTED_COMMIT, help="Expected resolved commit SHA. Pass an empty string to disable.") + parser.add_argument("--cache-dir", type=Path, default=DEFAULT_CACHE_DIR, help="Directory for reusable pinned checkouts.") + parser.add_argument("--extension-dir", type=Path, default=DEFAULT_EXTENSION_DIR, help="Directory for the built PyO3 extension module.") + parser.add_argument("--reset-checkout", action="store_true", help="Delete and recreate the cached checkout before running.") + parser.add_argument("--skip-fetch", action="store_true", help="Do not fetch before checkout; useful for offline reruns with FETCH_HEAD present.") + parser.add_argument("--skip-build-extension", action="store_true", help="Reuse an existing graph_sitter_py extension in --extension-dir.") + parser.add_argument("--timeout", type=int, default=900, help="Timeout in seconds for clone/build/benchmark child commands.") + parser.add_argument("--allow-count-drift", action="store_true", help="Do not fail if compact record or compatibility-handle counts differ from the pinned expectations.") + parser.add_argument("--max-wall-seconds", type=float, default=25.0, help="Fail if Rust Codebase construction is slower than this ceiling.") + parser.add_argument("--max-rss-mb", type=float, default=1000.0, help="Fail if process max RSS exceeds this ceiling.") + parser.add_argument("--min-recorded-wall-ratio", type=float, default=1.2, help="Fail unless the recorded Python baseline divided by Rust wall time is at least this value.") + parser.add_argument("--min-recorded-rss-ratio", type=float, default=3.0, help="Fail unless the recorded Python baseline divided by Rust max RSS is at least this value.") + parser.add_argument("--output", type=Path, help="Optional path to write JSON report.") + parser.add_argument("--json", action="store_true", help="Print JSON report instead of a human summary.") + return parser.parse_args() + + +def main() -> int: + args = parse_args() + if args.expected_commit == "": + args.expected_commit = None + report = make_report(args) + if args.output: + args.output.parent.mkdir(parents=True, exist_ok=True) + args.output.write_text(json.dumps(report, indent=2, sort_keys=True) + "\n", encoding="utf-8") + if args.json: + print(json.dumps(report, indent=2, sort_keys=True)) + else: + print_human(report) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/rust-rewrite/tools/check_python_rust_parity_fixture.py b/rust-rewrite/tools/check_python_rust_parity_fixture.py new file mode 100644 index 000000000..bf98bbfbc --- /dev/null +++ b/rust-rewrite/tools/check_python_rust_parity_fixture.py @@ -0,0 +1,703 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +import argparse +import json +import shutil +import sys +import tempfile +from pathlib import Path +from typing import Any + +TOOLS_DIR = Path(__file__).resolve().parent +REPO_ROOT = TOOLS_DIR.parents[1] +SRC_ROOT = REPO_ROOT / "src" +if str(TOOLS_DIR) not in sys.path: + sys.path.insert(0, str(TOOLS_DIR)) +if str(SRC_ROOT) not in sys.path: + sys.path.insert(0, str(SRC_ROOT)) + +from benchmark_pinned_python_repo import build_rust_extension # noqa: E402 + +DEFAULT_EXTENSION_DIR = Path("/tmp/graph_sitter_py_parity_fixture") + +FIXTURE_FILES = { + "pkg/__init__.py": "from .api import ApiHelper as public_helper\n", + "pkg/api.py": "from .models import Helper as ApiHelper\n", + "pkg/models.py": "class Helper:\n pass\n\n\ndef build():\n return Helper()\n", + "pkg/service.py": ( + "import requests\n" + "import pkg.models as models\n" + "from pkg.models import Helper\n" + "from pkg.api import ApiHelper\n" + "from pkg import public_helper\n" + "\n\n" + "def run():\n" + " item = Helper()\n" + " api = ApiHelper()\n" + " other = models.build()\n" + " public = public_helper()\n" + " return public, api, item, other, requests.get\n" + "\n\n" + "def load_plugin(name):\n" + " import importlib\n" + " return importlib.import_module(name)\n" + ), +} + +MUTATION_FILES = { + "pkg/service.py": "import os\nimport pkg.service\n\nclass Service:\n def run(self):\n return os.getcwd()\n\ndef helper():\n return Service()\n", +} +MUTATION_OUTPUT_PATHS = ["pkg/service.py"] + +TYPESCRIPT_FIXTURE_FILES = { + "src/base.ts": ( + "export interface Animal {}\n" + "export interface Dog extends Animal {}\n" + "export class Labrador implements Dog {}\n" + ), + "src/util.ts": "export function helper(value: number) { return value; }\n", + "src/index.ts": ( + "export { helper as publicHelper } from './util';\n" + "export * as utilNamespace from './util';\n" + ), + "src/legacy.ts": "class Legacy {}\nexport = Legacy;\n", + "src/app.ts": ( + "import { helper } from './util';\n" + "import { publicHelper, utilNamespace } from './index';\n" + "import Legacy from './legacy';\n" + "\n" + "export function run() {\n" + " const legacy = Legacy;\n" + " return helper(publicHelper(1)) + utilNamespace.helper(2);\n" + "}\n" + ), +} +TYPESCRIPT_MUTATION_FILES = { + "src/util.ts": "export function helper(value: number) { return value; }\n", + "src/app.ts": "import { helper } from './util';\n\nexport function run() {\n return helper(1);\n}\n", +} +TYPESCRIPT_MUTATION_OUTPUT_PATHS = ["src/app.ts"] + + +def node_type_name(value: Any) -> str: + return str(getattr(value, "name", value)) + + +def node_signature(node: Any) -> dict[str, Any] | None: + if node is None: + return None + signature = { + "node_type": node_type_name(getattr(node, "node_type", type(node).__name__)), + "name": getattr(node, "name", None), + "filepath": getattr(node, "filepath", None), + } + if signature["node_type"] == "EXTERNAL": + signature["source"] = getattr(node, "source", None) + return signature + + +def sorted_signatures(nodes: list[Any]) -> list[dict[str, Any]]: + return sorted( + (node_signature(node) for node in nodes), + key=lambda item: ( + item["filepath"] or "", + item["node_type"], + item["name"] or "", + item.get("source") or "", + ), + ) + + +def sorted_rows(rows: list[dict[str, Any]]) -> list[dict[str, Any]]: + return sorted(rows, key=lambda row: json.dumps(row, sort_keys=True)) + + +def import_signature(imp: Any) -> dict[str, Any]: + return { + "filepath": imp.filepath, + "source": imp.source, + "name": imp.name, + "from_file": None if imp.from_file is None else imp.from_file.filepath, + "resolved_symbol": node_signature(imp.resolved_symbol), + } + + +def import_target_signature(imp: Any) -> dict[str, Any]: + return { + "filepath": imp.filepath, + "source": imp.source, + "name": imp.name, + "from_file": None if imp.from_file is None else imp.from_file.filepath, + "resolved_symbol": node_signature(imp.resolved_symbol), + } + + +def export_signature(export: Any) -> dict[str, Any]: + return { + "filepath": export.filepath, + "name": export.name, + "declared_symbol": node_signature(export.declared_symbol), + "exported_symbol": node_signature(export.exported_symbol), + "resolved_symbol": node_signature(export.resolved_symbol), + "is_default": export.is_default_export(), + "is_reexport": export.is_reexport(), + } + + +def resolved_target_signature(node: Any) -> dict[str, Any] | None: + if node_type_name(getattr(node, "node_type", None)) in {"IMPORT", "EXPORT"}: + resolved_symbol = getattr(node, "resolved_symbol", None) + if resolved_symbol is not None: + return node_signature(resolved_symbol) + return node_signature(node) + + +def unique_sorted_signatures(items: list[dict[str, Any] | None]) -> list[dict[str, Any] | None]: + seen: set[str] = set() + unique = [] + for item in items: + key = json.dumps(item, sort_keys=True) + if key in seen: + continue + seen.add(key) + unique.append(item) + return sorted( + unique, + key=lambda item: ( + "" if item is None else item.get("filepath") or "", + "" if item is None else item.get("node_type") or "", + "" if item is None else item.get("name") or "", + ), + ) + + +def import_resolves_to_external(imp: Any) -> bool: + resolved = imp.resolved_symbol + return node_type_name(getattr(resolved, "node_type", None)) == "EXTERNAL" + + +def get_symbol(codebase: Any, name: str) -> Any | None: + return codebase.get_symbol(name, optional=True) + + +def symbol_dependency_graph(symbols: list[Any]) -> list[dict[str, Any]]: + return sorted_rows( + [ + { + "symbol": node_signature(symbol), + "dependencies": unique_sorted_signatures( + [node_signature(dependency) for dependency in symbol.dependencies] + ), + } + for symbol in symbols + ] + ) + + +def symbol_usage_graph(symbols: list[Any]) -> list[dict[str, Any]]: + return sorted_rows( + [ + { + "symbol": node_signature(symbol), + "symbol_usages": unique_sorted_signatures( + [node_signature(usage) for usage in symbol.symbol_usages] + ), + } + for symbol in symbols + ] + ) + + +def import_usage_graph(imports: list[Any]) -> list[dict[str, Any]]: + return sorted_rows( + [ + { + "import": import_signature(imp), + "symbol_usages": unique_sorted_signatures( + [node_signature(usage) for usage in imp.symbol_usages] + ), + } + for imp in imports + ] + ) + + +def relation_signatures(value: Any) -> list[dict[str, Any]]: + if callable(value): + value = value() + return sorted_signatures(list(value)) + + +def collect_report(codebase: Any, *, expect_blocked_graph: bool) -> dict[str, Any]: + python_graph_blocked = False + try: + len(codebase.ctx.nodes) + except RuntimeError: + python_graph_blocked = True + + service = codebase.get_file("pkg/service.py") + helper = get_symbol(codebase, "Helper") + build = get_symbol(codebase, "build") + run = get_symbol(codebase, "run") + load_plugin = get_symbol(codebase, "load_plugin") + + if helper is None or build is None or run is None or load_plugin is None: + missing = [ + name + for name, symbol in ( + ("Helper", helper), + ("build", build), + ("run", run), + ("load_plugin", load_plugin), + ) + if symbol is None + ] + msg = "missing expected symbols: " + ", ".join(missing) + raise RuntimeError(msg) + + helper_symbol_usages = [ + usage + for usage in helper.symbol_usages + if node_type_name(getattr(usage, "node_type", None)) == "SYMBOL" + ] + run_dependencies = list(run.dependencies) + run_internal_dependencies = [ + dependency + for dependency in run_dependencies + if not ( + node_type_name(getattr(dependency, "node_type", None)) == "IMPORT" + and import_resolves_to_external(dependency) + ) + ] + + report = { + "python_graph_blocked": python_graph_blocked, + "files": sorted(file.filepath for file in codebase.files), + "symbols": sorted( + ( + { + "filepath": symbol.filepath, + "name": symbol.name, + "node_type": node_type_name(symbol.node_type), + } + for symbol in codebase.symbols + ), + key=lambda item: (item["filepath"], item["node_type"], item["name"]), + ), + "imports": sorted_rows([import_signature(imp) for imp in codebase.imports]), + "service_imports": sorted( + (import_signature(imp) for imp in service.imports), + key=lambda item: item["source"], + ), + "external_modules": sorted_signatures(codebase.external_modules), + "symbol_dependency_graph": symbol_dependency_graph(codebase.symbols), + "symbol_usage_graph": symbol_usage_graph(codebase.symbols), + "import_usage_graph": import_usage_graph(codebase.imports), + "build_dependencies": sorted_signatures(build.dependencies), + "build_symbol_usages": sorted_signatures(build.symbol_usages), + "helper_symbol_usages_symbols_only": sorted_signatures(helper_symbol_usages), + "run_internal_dependencies": sorted_signatures(run_internal_dependencies), + "run_dependencies": sorted_signatures(run_dependencies), + "load_plugin_dependencies": sorted_signatures(load_plugin.dependencies), + } + if expect_blocked_graph and not python_graph_blocked: + msg = "expected compact Rust backend to block Python graph materialization" + raise RuntimeError(msg) + return report + + +def make_codebase_report(files: dict[str, str], *, backend: str) -> dict[str, Any]: + from graph_sitter.codebase.factory.get_session import get_codebase_session + from graph_sitter.configs.models.codebase import ( + CodebaseConfig, + GraphBackend, + RustFallbackMode, + ) + + tmpdir = Path(tempfile.mkdtemp(prefix=f"graph-sitter-parity-{backend}-")) + try: + graph_backend = GraphBackend.PYTHON if backend == "python" else GraphBackend.RUST + config = CodebaseConfig( + graph_backend=graph_backend, + rust_fallback=RustFallbackMode.ERROR, + ) + with get_codebase_session( + tmpdir=tmpdir, + files=files, + config=config, + verify_input=False, + verify_output=False, + ) as codebase: + return collect_report(codebase, expect_blocked_graph=backend == "rust") + finally: + shutil.rmtree(tmpdir, ignore_errors=True) + + +def typescript_heritage_graph(codebase: Any) -> dict[str, Any]: + animal = get_symbol(codebase, "Animal") + dog = get_symbol(codebase, "Dog") + labrador = get_symbol(codebase, "Labrador") + if animal is None or dog is None or labrador is None: + missing = [ + name + for name, symbol in ( + ("Animal", animal), + ("Dog", dog), + ("Labrador", labrador), + ) + if symbol is None + ] + msg = "missing expected TypeScript heritage symbols: " + ", ".join(missing) + raise RuntimeError(msg) + + return { + "animal_implementations": relation_signatures(animal.implementations), + "dog_dependencies": sorted_signatures(list(dog.dependencies)), + "dog_implementations": relation_signatures(dog.implementations), + "labrador_dependencies": sorted_signatures(list(labrador.dependencies)), + "labrador_is_subclass": bool(labrador.is_subclass), + "labrador_superclasses": relation_signatures(labrador.superclasses), + } + + +def collect_typescript_report(codebase: Any, *, expect_blocked_graph: bool) -> dict[str, Any]: + python_graph_blocked = False + try: + len(codebase.ctx.nodes) + except RuntimeError: + python_graph_blocked = True + + app = codebase.get_file("src/app.ts") + helper = get_symbol(codebase, "helper") + run = get_symbol(codebase, "run") + if helper is None or run is None: + missing = [ + name + for name, symbol in (("helper", helper), ("run", run)) + if symbol is None + ] + msg = "missing expected TypeScript symbols: " + ", ".join(missing) + raise RuntimeError(msg) + + helper_symbol_usages = [ + usage + for usage in helper.symbol_usages + if node_type_name(getattr(usage, "node_type", None)) == "SYMBOL" + ] + report = { + "python_graph_blocked": python_graph_blocked, + "files": sorted(file.filepath for file in codebase.files), + "symbols": sorted_signatures(codebase.symbols), + "imports": sorted_rows([import_target_signature(imp) for imp in codebase.imports]), + "app_import_targets": sorted( + (import_target_signature(imp) for imp in app.imports), + key=lambda item: (item["from_file"] or "", item["name"] or ""), + ), + "exports": sorted( + (export_signature(export) for export in codebase.exports), + key=lambda item: (item["filepath"], item["name"] or ""), + ), + "symbol_dependency_graph": symbol_dependency_graph(codebase.symbols), + "symbol_usage_graph": symbol_usage_graph(codebase.symbols), + "import_usage_graph": import_usage_graph(codebase.imports), + "helper_symbol_usages_symbols_only": sorted_signatures(helper_symbol_usages), + "run_resolved_dependency_targets": unique_sorted_signatures( + [resolved_target_signature(dependency) for dependency in run.dependencies] + ), + "typescript_heritage_graph": typescript_heritage_graph(codebase), + } + if expect_blocked_graph and not python_graph_blocked: + msg = "expected compact Rust backend to block Python graph materialization" + raise RuntimeError(msg) + return report + + +def make_typescript_codebase_report(files: dict[str, str], *, backend: str) -> dict[str, Any]: + from graph_sitter.codebase.factory.get_session import get_codebase_session + from graph_sitter.configs.models.codebase import ( + CodebaseConfig, + GraphBackend, + RustFallbackMode, + ) + from graph_sitter.shared.enums.programming_language import ProgrammingLanguage + + tmpdir = Path(tempfile.mkdtemp(prefix=f"graph-sitter-ts-parity-{backend}-")) + try: + graph_backend = GraphBackend.PYTHON if backend == "python" else GraphBackend.RUST + config = CodebaseConfig( + graph_backend=graph_backend, + rust_fallback=RustFallbackMode.ERROR, + ) + with get_codebase_session( + tmpdir=tmpdir, + programming_language=ProgrammingLanguage.TYPESCRIPT, + files=files, + config=config, + verify_input=False, + verify_output=False, + ) as codebase: + return collect_typescript_report(codebase, expect_blocked_graph=backend == "rust") + finally: + shutil.rmtree(tmpdir, ignore_errors=True) + + +def read_outputs(root: Path, paths: list[str]) -> dict[str, str]: + return {path: (root / path).read_text(encoding="utf-8") for path in paths} + + +def make_mutation_report(files: dict[str, str], *, backend: str) -> dict[str, Any]: + from graph_sitter.codebase.factory.get_session import get_codebase_session + from graph_sitter.configs.models.codebase import ( + CodebaseConfig, + GraphBackend, + RustFallbackMode, + ) + + tmpdir = Path(tempfile.mkdtemp(prefix=f"graph-sitter-mutation-{backend}-")) + try: + graph_backend = GraphBackend.PYTHON if backend == "python" else GraphBackend.RUST + config = CodebaseConfig( + graph_backend=graph_backend, + rust_fallback=RustFallbackMode.ERROR, + ) + with get_codebase_session( + tmpdir=tmpdir, + files=files, + config=config, + sync_graph=False, + verify_input=False, + verify_output=False, + ) as codebase: + service_file = codebase.get_file("pkg/service.py") + service_file.add_import("from typing import Any") + codebase.imports[0].remove() + codebase.get_class("Service").rename("Worker") + codebase.commit(sync_graph=False) + + python_graph_blocked = False + try: + len(codebase.ctx.nodes) + except RuntimeError: + python_graph_blocked = True + if backend == "rust" and not python_graph_blocked: + msg = "expected compact Rust mutation flow to keep Python graph blocked" + raise RuntimeError(msg) + + return { + "python_graph_blocked": python_graph_blocked, + "outputs": read_outputs(tmpdir, MUTATION_OUTPUT_PATHS), + } + finally: + shutil.rmtree(tmpdir, ignore_errors=True) + + +def make_typescript_mutation_report(files: dict[str, str], *, backend: str) -> dict[str, Any]: + from graph_sitter.codebase.factory.get_session import get_codebase_session + from graph_sitter.configs.models.codebase import ( + CodebaseConfig, + GraphBackend, + RustFallbackMode, + ) + from graph_sitter.shared.enums.programming_language import ProgrammingLanguage + + tmpdir = Path(tempfile.mkdtemp(prefix=f"graph-sitter-ts-mutation-{backend}-")) + try: + graph_backend = GraphBackend.PYTHON if backend == "python" else GraphBackend.RUST + config = CodebaseConfig( + graph_backend=graph_backend, + rust_fallback=RustFallbackMode.ERROR, + ) + with get_codebase_session( + tmpdir=tmpdir, + programming_language=ProgrammingLanguage.TYPESCRIPT, + files=files, + config=config, + sync_graph=False, + verify_input=False, + verify_output=False, + ) as codebase: + app_file = codebase.get_file("src/app.ts") + app_file.add_import("import { describe } from 'node:test';") + codebase.get_function("run").rename("executeRun") + codebase.commit(sync_graph=False) + + python_graph_blocked = False + try: + len(codebase.ctx.nodes) + except RuntimeError: + python_graph_blocked = True + if backend == "rust" and not python_graph_blocked: + msg = "expected compact Rust TypeScript mutation flow to keep Python graph blocked" + raise RuntimeError(msg) + + return { + "python_graph_blocked": python_graph_blocked, + "outputs": read_outputs(tmpdir, TYPESCRIPT_MUTATION_OUTPUT_PATHS), + } + finally: + shutil.rmtree(tmpdir, ignore_errors=True) + + +def compare_reports(python_report: dict[str, Any], rust_report: dict[str, Any]) -> dict[str, Any]: + exact_keys = [ + "files", + "symbols", + "imports", + "service_imports", + "external_modules", + "symbol_dependency_graph", + "symbol_usage_graph", + "import_usage_graph", + "build_dependencies", + "build_symbol_usages", + "helper_symbol_usages_symbols_only", + "run_internal_dependencies", + "run_dependencies", + "load_plugin_dependencies", + ] + mismatches = [ + key for key in exact_keys if python_report.get(key) != rust_report.get(key) + ] + known_deltas: dict[str, Any] = {} + return { + "exact_keys": exact_keys, + "mismatches": mismatches, + "known_deltas": known_deltas, + } + + +def compare_typescript_reports(python_report: dict[str, Any], rust_report: dict[str, Any]) -> dict[str, Any]: + exact_keys = [ + "files", + "symbols", + "imports", + "app_import_targets", + "exports", + "symbol_dependency_graph", + "symbol_usage_graph", + "import_usage_graph", + "helper_symbol_usages_symbols_only", + "run_resolved_dependency_targets", + "typescript_heritage_graph", + ] + mismatches = [ + key for key in exact_keys if python_report.get(key) != rust_report.get(key) + ] + return { + "exact_keys": exact_keys, + "mismatches": mismatches, + "known_deltas": {}, + } + + +def make_report(args: argparse.Namespace) -> dict[str, Any]: + extension_path = None + if not args.skip_build_extension: + extension_path = build_rust_extension(args.extension_dir, timeout=args.timeout) + if str(args.extension_dir) not in sys.path: + sys.path.insert(0, str(args.extension_dir)) + + python_report = make_codebase_report(FIXTURE_FILES, backend="python") + rust_report = make_codebase_report(FIXTURE_FILES, backend="rust") + comparison = compare_reports(python_report, rust_report) + python_typescript_report = make_typescript_codebase_report(TYPESCRIPT_FIXTURE_FILES, backend="python") + rust_typescript_report = make_typescript_codebase_report(TYPESCRIPT_FIXTURE_FILES, backend="rust") + typescript_comparison = compare_typescript_reports(python_typescript_report, rust_typescript_report) + python_mutation_report = make_mutation_report(MUTATION_FILES, backend="python") + rust_mutation_report = make_mutation_report(MUTATION_FILES, backend="rust") + mutation_mismatch = python_mutation_report["outputs"] != rust_mutation_report["outputs"] + python_typescript_mutation_report = make_typescript_mutation_report(TYPESCRIPT_MUTATION_FILES, backend="python") + rust_typescript_mutation_report = make_typescript_mutation_report(TYPESCRIPT_MUTATION_FILES, backend="rust") + typescript_mutation_mismatch = python_typescript_mutation_report["outputs"] != rust_typescript_mutation_report["outputs"] + report = { + "metadata": { + "extension_path": str(extension_path) if extension_path else None, + "fixture_files": sorted(FIXTURE_FILES), + "mutation_files": sorted(MUTATION_FILES), + "typescript_fixture_files": sorted(TYPESCRIPT_FIXTURE_FILES), + "typescript_mutation_files": sorted(TYPESCRIPT_MUTATION_FILES), + }, + "python": python_report, + "rust": rust_report, + "python_typescript": python_typescript_report, + "rust_typescript": rust_typescript_report, + "python_mutation": python_mutation_report, + "rust_mutation": rust_mutation_report, + "python_typescript_mutation": python_typescript_mutation_report, + "rust_typescript_mutation": rust_typescript_mutation_report, + "comparison": comparison, + "typescript_comparison": typescript_comparison, + } + if comparison["mismatches"]: + msg = "Python/Rust parity fixture mismatches: " + ", ".join( + comparison["mismatches"] + ) + raise RuntimeError(msg) + if typescript_comparison["mismatches"]: + msg = "Python/Rust TypeScript parity fixture mismatches: " + ", ".join( + typescript_comparison["mismatches"] + ) + raise RuntimeError(msg) + if mutation_mismatch: + msg = "Python/Rust mutation parity fixture mismatched file outputs" + raise RuntimeError(msg) + if typescript_mutation_mismatch: + msg = "Python/Rust TypeScript mutation parity fixture mismatched file outputs" + raise RuntimeError(msg) + return report + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description="Compare a representative Python fixture through the Python backend and compact Rust backend." + ) + parser.add_argument( + "--extension-dir", + type=Path, + default=DEFAULT_EXTENSION_DIR, + help="Directory for the built PyO3 extension module.", + ) + parser.add_argument( + "--skip-build-extension", + action="store_true", + help="Reuse an existing graph_sitter_py extension in --extension-dir.", + ) + parser.add_argument("--timeout", type=int, default=900) + parser.add_argument("--output", type=Path, help="Optional path to write JSON report.") + parser.add_argument("--json", action="store_true", help="Print JSON report.") + return parser.parse_args() + + +def print_human(report: dict[str, Any]) -> None: + comparison = report["comparison"] + typescript_comparison = report["typescript_comparison"] + print("Python/Rust parity fixture passed") + print(f"exact keys: {', '.join(comparison['exact_keys'])}") + print(f"external modules: {len(report['rust']['external_modules'])}") + print(f"service imports: {len(report['rust']['service_imports'])}") + print(f"mutation outputs: {len(report['rust_mutation']['outputs'])}") + print(f"typescript exact keys: {', '.join(typescript_comparison['exact_keys'])}") + print(f"typescript exports: {len(report['rust_typescript']['exports'])}") + print(f"typescript mutation outputs: {len(report['rust_typescript_mutation']['outputs'])}") + print(f"known deltas: {len(comparison['known_deltas'])}") + + +def main() -> int: + args = parse_args() + report = make_report(args) + if args.output: + args.output.parent.mkdir(parents=True, exist_ok=True) + args.output.write_text( + json.dumps(report, indent=2, sort_keys=True) + "\n", + encoding="utf-8", + ) + if args.json: + print(json.dumps(report, indent=2, sort_keys=True)) + else: + print_human(report) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/rust-rewrite/tools/check_rollout_readiness.py b/rust-rewrite/tools/check_rollout_readiness.py new file mode 100644 index 000000000..d2207951c --- /dev/null +++ b/rust-rewrite/tools/check_rollout_readiness.py @@ -0,0 +1,647 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +import argparse +import json +from pathlib import Path +from typing import Any + +import benchmark_pinned_python_repo as airflow_benchmark +import benchmark_pinned_typescript_repo as nextjs_benchmark +import check_pinned_python_codebase as airflow_codebase +import check_pinned_typescript_codebase as nextjs_codebase +import snapshot_pinned_python_repo as airflow_snapshot +import snapshot_pinned_typescript_repo as nextjs_snapshot + +REPO_ROOT = Path(__file__).resolve().parents[2] +DEFAULT_REPORT_DIR = REPO_ROOT / "rust-rewrite/reports" +DEFAULT_MIN_AIRFLOW_WALL_RATIO = 2.0 +DEFAULT_MIN_NEXTJS_WALL_RATIO = 1.2 +DEFAULT_MIN_SEMANTIC_WALL_RATIO = 2.0 +DEFAULT_MIN_RSS_RATIO = 4.0 + +REQUIRED_REPORTS = { + "airflow_snapshot": "airflow-rust-compact-snapshot.json", + "airflow_codebase": "airflow-rust-codebase.json", + "nextjs_snapshot": "nextjs-rust-compact-snapshot.json", + "nextjs_codebase": "nextjs-rust-codebase.json", + "codemods": "pinned-rust-codemods.json", + "semantic_parity": "pinned-semantic-parity.json", +} + +AIRFLOW_EXPECTED_SNAPSHOT_SUMMARY = airflow_codebase.EXPECTED_SUMMARY +NEXTJS_EXPECTED_SNAPSHOT_SUMMARY = { + **nextjs_codebase.EXPECTED_SUMMARY, + "exports": nextjs_codebase.EXPECTED_RECORDS["rust_exports"], + "external_references": nextjs_codebase.EXPECTED_RECORDS["rust_external_references"], + "subclass_edges": nextjs_codebase.EXPECTED_RECORDS["rust_subclass_edges"], +} + +AIRFLOW_EXPECTED_METADATA = { + "name": airflow_benchmark.DEFAULT_REPO_NAME, + "repo_url": airflow_benchmark.DEFAULT_REPO_URL, + "ref": airflow_benchmark.DEFAULT_REF, + "commit": airflow_benchmark.DEFAULT_EXPECTED_COMMIT, +} +NEXTJS_EXPECTED_METADATA = { + "name": nextjs_benchmark.DEFAULT_REPO_NAME, + "repo_url": nextjs_benchmark.DEFAULT_REPO_URL, + "ref": nextjs_benchmark.DEFAULT_REF, + "commit": nextjs_benchmark.DEFAULT_EXPECTED_COMMIT, +} + + +def load_json(path: Path) -> dict[str, Any]: + if not path.exists(): + msg = f"missing required report: {path}" + raise FileNotFoundError(msg) + return json.loads(path.read_text(encoding="utf-8")) + + +def ratio_at_least(value: Any, minimum: float) -> bool: + return isinstance(value, int | float) and value >= minimum + + +def ratio(numerator: float | int | None, denominator: float | int | None) -> float | None: + if numerator is None or denominator is None or denominator <= 0: + return None + return round(float(numerator) / float(denominator), 3) + + +def resolve_threshold(value: float | None, common: float | None, default: float) -> float: + if value is not None: + return value + if common is not None: + return common + return default + + +def assert_metadata(name: str, metadata: dict[str, Any], expected: dict[str, Any], failures: list[str]) -> None: + for key, expected_value in expected.items(): + observed_value = metadata.get(key) + if observed_value != expected_value: + failures.append(f"{name}: metadata.{key} expected {expected_value!r}, got {observed_value!r}") + + +def assert_exact_counts(name: str, observed: dict[str, Any], expected: dict[str, int], failures: list[str]) -> None: + if not isinstance(observed, dict): + failures.append(f"{name}: missing count mapping") + return + for key, expected_value in expected.items(): + observed_value = observed.get(key) + if observed_value != expected_value: + failures.append(f"{name}: {key} expected {expected_value}, got {observed_value}") + + +def assert_exact_mapping(name: str, observed: Any, expected: Any, failures: list[str]) -> None: + if observed != expected: + failures.append(f"{name}: drifted") + + +def assert_cache_contract(name: str, observed: dict[str, Any], expected: dict[str, bool], failures: list[str]) -> None: + if not isinstance(observed, dict): + failures.append(f"{name}: missing cache materialization report") + return + assert_exact_mapping(name, observed, expected, failures) + + +def assert_no_integrity_failures(name: str, snapshot: dict[str, Any], failures: list[str]) -> None: + integrity = snapshot.get("integrity") + if not isinstance(integrity, dict): + failures.append(f"{name}: missing integrity report") + return + drift = {key: value for key, value in integrity.items() if value != 0} + if drift: + failures.append(f"{name}: integrity drifted: {drift}") + + +def assert_nonempty_graphs( + name: str, + snapshot: dict[str, Any], + failures: list[str], + *, + required_graphs: list[str], +) -> None: + graphs = snapshot.get("graphs") + if not isinstance(graphs, dict): + failures.append(f"{name}: missing graph hashes") + return + for graph_name in required_graphs: + graph = graphs.get(graph_name) + if not isinstance(graph, dict): + failures.append(f"{name}: missing graph hash for {graph_name}") + continue + if graph.get("count", 0) <= 0: + failures.append(f"{name}: graph {graph_name} is empty") + if not graph.get("sha256"): + failures.append(f"{name}: graph {graph_name} is missing sha256") + + +def assert_snapshot_contract( + name: str, + snapshot: dict[str, Any], + *, + expected_schema_version: int, + expected_metadata: dict[str, Any], + expected_summary: dict[str, int], + required_graphs: list[str], + failures: list[str], +) -> None: + schema_version = snapshot.get("schema_version") + if schema_version != expected_schema_version: + failures.append( + f"{name}: schema_version expected {expected_schema_version}, got {schema_version}" + ) + assert_metadata(name, snapshot.get("metadata", {}), expected_metadata, failures) + assert_exact_counts(f"{name}.summary", snapshot.get("summary", {}), expected_summary, failures) + assert_no_integrity_failures(name, snapshot, failures) + assert_nonempty_graphs(name, snapshot, failures, required_graphs=required_graphs) + graphs = snapshot.get("graphs", {}) + if isinstance(graphs, dict): + for graph_name in required_graphs: + graph = graphs.get(graph_name, {}) + summary_count = expected_summary.get(graph_name) + graph_count = graph.get("count") if isinstance(graph, dict) else None + if summary_count is not None and graph_count != summary_count: + failures.append( + f"{name}: graph {graph_name} count expected {summary_count}, got {graph_count}" + ) + + +def assert_codebase_report( + name: str, + report: dict[str, Any], + *, + min_wall_ratio: float, + min_rss_ratio: float, + failures: list[str], +) -> dict[str, Any]: + metadata = report.get("metadata", {}) + comparison = report.get("comparison", {}) + totals = report.get("totals", {}) + large_caches = report.get("large_cache_materialization", {}) + + if not metadata.get("python_graph_blocked"): + failures.append(f"{name}: Python graph was materialized") + wall_ratio = comparison.get("recorded_python_to_rust_wall_ratio") + rss_ratio = comparison.get("recorded_python_to_rust_rss_ratio") + if not ratio_at_least(wall_ratio, min_wall_ratio): + failures.append(f"{name}: wall ratio {wall_ratio}x is below {min_wall_ratio}x") + if not ratio_at_least(rss_ratio, min_rss_ratio): + failures.append(f"{name}: RSS ratio {rss_ratio}x is below {min_rss_ratio}x") + materialized = [key for key, value in large_caches.items() if value] + if materialized: + failures.append(f"{name}: large Rust caches were materialized: {', '.join(materialized)}") + + return { + "wall_seconds": totals.get("wall_seconds"), + "max_rss_mb": totals.get("max_rss_mb"), + "wall_ratio": wall_ratio, + "rss_ratio": rss_ratio, + } + + +def assert_airflow_codebase_contract(report: dict[str, Any], failures: list[str]) -> None: + assert_metadata("airflow_codebase", report.get("metadata", {}), AIRFLOW_EXPECTED_METADATA, failures) + assert_exact_counts( + "airflow_codebase.summary", + report.get("summary", {}), + airflow_codebase.EXPECTED_SUMMARY, + failures, + ) + assert_exact_counts( + "airflow_codebase.records", + report.get("records", {}), + airflow_codebase.EXPECTED_RECORDS, + failures, + ) + assert_exact_counts( + "airflow_codebase.compat_handles", + report.get("compat_handles", {}), + airflow_codebase.EXPECTED_COMPAT_HANDLES, + failures, + ) + assert_exact_mapping( + "airflow_codebase.known_global_lookups", + report.get("known_global_lookups"), + airflow_codebase.EXPECTED_KNOWN_GLOBAL_LOOKUPS, + failures, + ) + assert_exact_mapping( + "airflow_codebase.known_child_lookups", + report.get("known_child_lookups"), + airflow_codebase.EXPECTED_KNOWN_CHILD_LOOKUPS, + failures, + ) + assert_exact_mapping( + "airflow_codebase.known_file_local_lookups", + report.get("known_file_local_lookups"), + airflow_codebase.EXPECTED_KNOWN_FILE_LOCAL_LOOKUPS, + failures, + ) + assert_exact_mapping( + "airflow_codebase.known_file_local_import_lookups", + report.get("known_file_local_import_lookups"), + airflow_codebase.EXPECTED_KNOWN_FILE_LOCAL_IMPORT_LOOKUPS, + failures, + ) + assert_exact_mapping( + "airflow_codebase.known_file_local_name_resolution", + report.get("known_file_local_name_resolution"), + airflow_codebase.EXPECTED_KNOWN_FILE_LOCAL_NAME_RESOLUTION, + failures, + ) + assert_exact_mapping( + "airflow_codebase.known_module_import_attribute_resolution", + report.get("known_module_import_attribute_resolution"), + airflow_codebase.EXPECTED_KNOWN_MODULE_IMPORT_ATTRIBUTE_RESOLUTION, + failures, + ) + assert_exact_mapping( + "airflow_codebase.known_ignore_case_file_lookups", + report.get("known_ignore_case_file_lookups"), + airflow_codebase.EXPECTED_KNOWN_IGNORE_CASE_FILE_LOOKUPS, + failures, + ) + assert_exact_mapping( + "airflow_codebase.known_lookups", + report.get("known_lookups"), + airflow_codebase.EXPECTED_KNOWN_LOOKUPS, + failures, + ) + assert_exact_mapping( + "airflow_codebase.known_dependencies", + report.get("known_dependencies"), + airflow_codebase.EXPECTED_KNOWN_DEPENDENCIES, + failures, + ) + assert_cache_contract( + "airflow_codebase.targeted_cache_materialization", + report.get("targeted_cache_materialization", {}), + airflow_codebase.EXPECTED_TARGETED_CACHE_MATERIALIZATION, + failures, + ) + assert_cache_contract( + "airflow_codebase.byte_range_cache_materialization", + report.get("byte_range_cache_materialization", {}), + airflow_codebase.EXPECTED_BYTE_RANGE_CACHE_MATERIALIZATION, + failures, + ) + assert_cache_contract( + "airflow_codebase.large_cache_materialization", + report.get("large_cache_materialization", {}), + airflow_codebase.EXPECTED_LARGE_CACHE_MATERIALIZATION, + failures, + ) + + +def assert_nextjs_codebase_contract(report: dict[str, Any], failures: list[str]) -> None: + assert_metadata("nextjs_codebase", report.get("metadata", {}), NEXTJS_EXPECTED_METADATA, failures) + assert_exact_counts( + "nextjs_codebase.summary", + report.get("summary", {}), + nextjs_codebase.EXPECTED_SUMMARY, + failures, + ) + assert_exact_counts( + "nextjs_codebase.records", + report.get("records", {}), + nextjs_codebase.EXPECTED_RECORDS, + failures, + ) + assert_exact_counts( + "nextjs_codebase.compat_handles", + report.get("compat_handles", {}), + nextjs_codebase.EXPECTED_COMPAT_HANDLES, + failures, + ) + assert_exact_mapping( + "nextjs_codebase.known_global_lookups", + report.get("known_global_lookups"), + nextjs_codebase.EXPECTED_KNOWN_GLOBAL_LOOKUPS, + failures, + ) + assert_exact_mapping( + "nextjs_codebase.known_file_local_export_lookups", + report.get("known_file_local_export_lookups"), + nextjs_codebase.EXPECTED_KNOWN_FILE_LOCAL_EXPORT_LOOKUPS, + failures, + ) + assert_exact_mapping( + "nextjs_codebase.known_ignore_case_file_lookups", + report.get("known_ignore_case_file_lookups"), + nextjs_codebase.EXPECTED_KNOWN_IGNORE_CASE_FILE_LOOKUPS, + failures, + ) + assert_exact_mapping( + "nextjs_codebase.known_file_local_call_lookups", + report.get("known_file_local_call_lookups"), + nextjs_codebase.EXPECTED_KNOWN_FILE_LOCAL_CALL_LOOKUPS, + failures, + ) + assert_cache_contract( + "nextjs_codebase.targeted_cache_materialization", + report.get("targeted_cache_materialization", {}), + nextjs_codebase.EXPECTED_TARGETED_CACHE_MATERIALIZATION, + failures, + ) + assert_cache_contract( + "nextjs_codebase.large_cache_materialization", + report.get("large_cache_materialization", {}), + nextjs_codebase.EXPECTED_LARGE_CACHE_MATERIALIZATION, + failures, + ) + + +def assert_codemods(report: dict[str, Any], failures: list[str]) -> list[dict[str, Any]]: + summaries = [] + for suite in report.get("suites", []): + suite_name = suite.get("suite", "") + failed_assertions = [ + name for name, passed in suite.get("assertions", {}).items() if not passed + ] + if failed_assertions: + failures.append(f"codemods.{suite_name}: failed assertions: {', '.join(failed_assertions)}") + caches = suite.get("large_cache_materialization", {}) + materialized = [name for name, value in caches.items() if value] + if materialized: + failures.append( + f"codemods.{suite_name}: large Rust caches were materialized: {', '.join(materialized)}" + ) + timings = suite.get("timings", {}) + max_rss = max((float(sample["max_rss_mb"]) for sample in suite.get("rss_samples", [])), default=None) + summaries.append( + { + "suite": suite_name, + "construct_wall_seconds": timings.get("codebase_construct_wall_seconds"), + "codemod_commit_wall_seconds": timings.get("codemod_commit_wall_seconds"), + "max_rss_mb": max_rss, + } + ) + if not summaries: + failures.append("codemods: no suites were reported") + return summaries + + +def assert_semantic_parity( + report: dict[str, Any], + failures: list[str], + *, + min_wall_ratio: float, + min_rss_ratio: float, +) -> list[dict[str, Any]]: + summaries = [] + for suite in report.get("suites", []): + suite_name = suite.get("suite", "") + comparison = suite.get("comparison", {}) + mismatches = comparison.get("mismatches", []) + if mismatches: + failures.append(f"semantic_parity.{suite_name}: mismatches: {', '.join(mismatches)}") + if comparison.get("known_deltas") != comparison.get("expected_known_deltas"): + failures.append(f"semantic_parity.{suite_name}: known deltas do not match expectations") + rust_report = suite.get("rust", {}) + if not rust_report.get("python_graph_blocked"): + failures.append(f"semantic_parity.{suite_name}: Rust run materialized the Python graph") + + python_timing = suite.get("python", {}).get("timings", {}).get("codebase_construct_wall_seconds") + rust_timing = rust_report.get("timings", {}).get("codebase_construct_wall_seconds") + python_rss = max( + (float(sample["max_rss_mb"]) for sample in suite.get("python", {}).get("rss_samples", [])), + default=None, + ) + rust_rss = max( + (float(sample["max_rss_mb"]) for sample in rust_report.get("rss_samples", [])), + default=None, + ) + performance = comparison.get("performance", {}) + wall_ratio = performance.get("wall_ratio") if isinstance(performance, dict) else None + rss_ratio = performance.get("rss_ratio") if isinstance(performance, dict) else None + wall_ratio = wall_ratio if wall_ratio is not None else ratio(python_timing, rust_timing) + rss_ratio = rss_ratio if rss_ratio is not None else ratio(python_rss, rust_rss) + if not ratio_at_least(wall_ratio, min_wall_ratio): + failures.append( + f"semantic_parity.{suite_name}: wall ratio {wall_ratio}x is below {min_wall_ratio}x" + ) + if not ratio_at_least(rss_ratio, min_rss_ratio): + failures.append( + f"semantic_parity.{suite_name}: RSS ratio {rss_ratio}x is below {min_rss_ratio}x" + ) + summaries.append( + { + "suite": suite_name, + "exact_keys": comparison.get("exact_keys", []), + "known_delta_count": len(comparison.get("known_deltas", {})), + "python_wall_seconds": python_timing, + "rust_wall_seconds": rust_timing, + "python_max_rss_mb": python_rss, + "rust_max_rss_mb": rust_rss, + "wall_ratio": wall_ratio, + "rss_ratio": rss_ratio, + } + ) + if not summaries: + failures.append("semantic_parity: no suites were reported") + return summaries + + +def make_report(args: argparse.Namespace) -> dict[str, Any]: + report_dir = args.report_dir + common_wall_ratio = getattr(args, "min_wall_ratio", None) + min_airflow_wall_ratio = resolve_threshold( + getattr(args, "min_airflow_wall_ratio", None), + common_wall_ratio, + DEFAULT_MIN_AIRFLOW_WALL_RATIO, + ) + min_nextjs_wall_ratio = resolve_threshold( + getattr(args, "min_nextjs_wall_ratio", None), + common_wall_ratio, + DEFAULT_MIN_NEXTJS_WALL_RATIO, + ) + min_semantic_wall_ratio = resolve_threshold( + getattr(args, "min_semantic_wall_ratio", None), + common_wall_ratio, + DEFAULT_MIN_SEMANTIC_WALL_RATIO, + ) + min_rss_ratio = getattr(args, "min_rss_ratio", DEFAULT_MIN_RSS_RATIO) + reports = { + key: load_json(report_dir / filename) + for key, filename in REQUIRED_REPORTS.items() + } + + failures: list[str] = [] + assert_snapshot_contract( + "airflow_snapshot", + reports["airflow_snapshot"], + expected_schema_version=airflow_snapshot.SNAPSHOT_SCHEMA_VERSION, + expected_metadata=AIRFLOW_EXPECTED_METADATA, + expected_summary=AIRFLOW_EXPECTED_SNAPSHOT_SUMMARY, + required_graphs=[ + "files", + "symbols", + "imports", + "import_resolutions", + "external_modules", + "references", + "external_references", + "dependencies", + ], + failures=failures, + ) + assert_snapshot_contract( + "nextjs_snapshot", + reports["nextjs_snapshot"], + expected_schema_version=nextjs_snapshot.SNAPSHOT_SCHEMA_VERSION, + expected_metadata={ + **NEXTJS_EXPECTED_METADATA, + "raw_rust_walk": False, + "selected_file_count": nextjs_codebase.EXPECTED_SUMMARY["files"], + }, + expected_summary=NEXTJS_EXPECTED_SNAPSHOT_SUMMARY, + required_graphs=[ + "files", + "symbols", + "imports", + "import_resolutions", + "external_modules", + "exports", + "references", + "external_references", + "dependencies", + "subclass_edges", + ], + failures=failures, + ) + + codebase_summary = { + "airflow": assert_codebase_report( + "airflow_codebase", + reports["airflow_codebase"], + min_wall_ratio=min_airflow_wall_ratio, + min_rss_ratio=min_rss_ratio, + failures=failures, + ), + "nextjs": assert_codebase_report( + "nextjs_codebase", + reports["nextjs_codebase"], + min_wall_ratio=min_nextjs_wall_ratio, + min_rss_ratio=min_rss_ratio, + failures=failures, + ), + } + assert_airflow_codebase_contract(reports["airflow_codebase"], failures) + assert_nextjs_codebase_contract(reports["nextjs_codebase"], failures) + codemod_summary = assert_codemods(reports["codemods"], failures) + semantic_summary = assert_semantic_parity( + reports["semantic_parity"], + failures, + min_wall_ratio=min_semantic_wall_ratio, + min_rss_ratio=min_rss_ratio, + ) + + readiness = { + "status": "failed" if failures else "passed", + "thresholds": { + "min_airflow_wall_ratio": min_airflow_wall_ratio, + "min_nextjs_wall_ratio": min_nextjs_wall_ratio, + "min_semantic_wall_ratio": min_semantic_wall_ratio, + "min_rss_ratio": min_rss_ratio, + }, + "reports": {key: str(report_dir / filename) for key, filename in REQUIRED_REPORTS.items()}, + "codebase": codebase_summary, + "codemods": codemod_summary, + "semantic_parity": semantic_summary, + "failures": failures, + } + if failures: + msg = "rollout readiness failed: " + "; ".join(failures) + raise RuntimeError(msg) + return readiness + + +def print_human(report: dict[str, Any]) -> None: + print(f"status: {report['status']}") + thresholds = report["thresholds"] + print( + "thresholds: " + f"airflow_wall>={thresholds['min_airflow_wall_ratio']}x " + f"nextjs_wall>={thresholds['min_nextjs_wall_ratio']}x " + f"semantic_wall>={thresholds['min_semantic_wall_ratio']}x " + f"rss>={thresholds['min_rss_ratio']}x" + ) + for name, summary in report["codebase"].items(): + print( + f"{name}: wall={summary['wall_seconds']:.3f}s " + f"rss={summary['max_rss_mb']:.1f} MB " + f"ratios={summary['wall_ratio']}x/{summary['rss_ratio']}x" + ) + for summary in report["codemods"]: + print( + f"codemod {summary['suite']}: construct={summary['construct_wall_seconds']:.3f}s " + f"commit={summary['codemod_commit_wall_seconds']:.3f}s " + f"max_rss={summary['max_rss_mb']:.1f} MB" + ) + for summary in report["semantic_parity"]: + print( + f"semantic {summary['suite']}: exact={len(summary['exact_keys'])} " + f"known_deltas={summary['known_delta_count']} " + f"python={summary['python_wall_seconds']:.3f}s/{summary['python_max_rss_mb']:.1f} MB " + f"rust={summary['rust_wall_seconds']:.3f}s/{summary['rust_max_rss_mb']:.1f} MB " + f"ratios={summary['wall_ratio']}x/{summary['rss_ratio']}x" + ) + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description="Aggregate rust-rewrite large-repo reports into a single rollout readiness gate." + ) + parser.add_argument("--report-dir", type=Path, default=DEFAULT_REPORT_DIR) + parser.add_argument( + "--min-wall-ratio", + type=float, + default=None, + help="Common override for all wall ratio gates.", + ) + parser.add_argument( + "--min-airflow-wall-ratio", + type=float, + default=None, + help=f"Airflow Codebase wall-ratio gate. Defaults to {DEFAULT_MIN_AIRFLOW_WALL_RATIO}x.", + ) + parser.add_argument( + "--min-nextjs-wall-ratio", + type=float, + default=None, + help=f"Next.js Codebase wall-ratio gate. Defaults to {DEFAULT_MIN_NEXTJS_WALL_RATIO}x.", + ) + parser.add_argument( + "--min-semantic-wall-ratio", + type=float, + default=None, + help=f"Semantic parity wall-ratio gate. Defaults to {DEFAULT_MIN_SEMANTIC_WALL_RATIO}x.", + ) + parser.add_argument( + "--min-rss-ratio", + type=float, + default=DEFAULT_MIN_RSS_RATIO, + help="Fail unless recorded Python max RSS divided by Rust max RSS is at least this value.", + ) + parser.add_argument("--output", type=Path, help="Optional path to write the readiness JSON report.") + parser.add_argument("--json", action="store_true", help="Print JSON instead of a human summary.") + return parser.parse_args() + + +def main() -> int: + args = parse_args() + report = make_report(args) + if args.output: + args.output.parent.mkdir(parents=True, exist_ok=True) + args.output.write_text(json.dumps(report, indent=2, sort_keys=True) + "\n", encoding="utf-8") + if args.json: + print(json.dumps(report, indent=2, sort_keys=True)) + else: + print_human(report) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/rust-rewrite/tools/check_supported_subset.py b/rust-rewrite/tools/check_supported_subset.py new file mode 100644 index 000000000..93afc2e2a --- /dev/null +++ b/rust-rewrite/tools/check_supported_subset.py @@ -0,0 +1,151 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +import argparse +import json +import subprocess +import sys +from pathlib import Path +from typing import Any + +REPO_ROOT = Path(__file__).resolve().parents[2] +DEFAULT_MANIFEST = REPO_ROOT / "rust-rewrite/supported-subset.json" + + +def load_manifest(path: Path) -> dict[str, Any]: + manifest = json.loads(path.read_text(encoding="utf-8")) + if manifest.get("schema_version") != 1: + msg = f"unsupported supported-subset schema version: {manifest.get('schema_version')!r}" + raise ValueError(msg) + if not isinstance(manifest.get("pytest_roots"), list) or not manifest["pytest_roots"]: + msg = "supported-subset manifest must define non-empty pytest_roots" + raise ValueError(msg) + if not isinstance(manifest.get("capabilities"), list) or not manifest["capabilities"]: + msg = "supported-subset manifest must define non-empty capabilities" + raise ValueError(msg) + return manifest + + +def collect_pytest_ids(pytest_roots: list[str]) -> set[str]: + command = [ + sys.executable, + "-m", + "pytest", + "--collect-only", + "-q", + *pytest_roots, + ] + result = subprocess.run( + command, + cwd=REPO_ROOT, + check=False, + capture_output=True, + text=True, + ) + if result.returncode != 0: + msg = ( + f"pytest collection failed with exit code {result.returncode}\n" + f"stdout:\n{result.stdout}\n" + f"stderr:\n{result.stderr}" + ) + raise RuntimeError(msg) + return { + line.strip() + for line in result.stdout.splitlines() + if "::test_" in line and line.strip().endswith(tuple("abcdefghijklmnopqrstuvwxyz0123456789_]")) + } + + +def manifest_test_ids(manifest: dict[str, Any]) -> list[str]: + test_ids: list[str] = [] + for capability in manifest["capabilities"]: + name = capability.get("name") + tests = capability.get("tests") + if not isinstance(name, str) or not name: + msg = f"capability is missing a name: {capability!r}" + raise ValueError(msg) + if capability.get("status") != "supported_opt_in": + msg = f"{name}: status must be supported_opt_in" + raise ValueError(msg) + if not isinstance(capability.get("scope"), list) or not capability["scope"]: + msg = f"{name}: scope must be non-empty" + raise ValueError(msg) + if not isinstance(tests, list) or not tests: + msg = f"{name}: tests must be non-empty" + raise ValueError(msg) + test_ids.extend(tests) + return test_ids + + +def duplicate_items(items: list[str]) -> list[str]: + seen: set[str] = set() + duplicates: set[str] = set() + for item in items: + if item in seen: + duplicates.add(item) + seen.add(item) + return sorted(duplicates) + + +def make_report(args: argparse.Namespace) -> dict[str, Any]: + manifest = load_manifest(args.manifest) + collected = collect_pytest_ids(manifest["pytest_roots"]) + listed = manifest_test_ids(manifest) + listed_set = set(listed) + + failures: list[str] = [] + duplicates = duplicate_items(listed) + if duplicates: + failures.append("duplicate manifest test ids: " + ", ".join(duplicates)) + + missing_from_collection = sorted(listed_set - collected) + if missing_from_collection: + failures.append("manifest tests not collected: " + ", ".join(missing_from_collection)) + + unlisted_collected = sorted(collected - listed_set) + if unlisted_collected: + failures.append("collected supported-subset tests missing from manifest: " + ", ".join(unlisted_collected)) + + report = { + "status": "failed" if failures else "passed", + "manifest": str(args.manifest), + "pytest_roots": manifest["pytest_roots"], + "capability_count": len(manifest["capabilities"]), + "test_count": len(listed_set), + "collected_test_count": len(collected), + "failures": failures, + } + if failures: + msg = "supported subset check failed: " + "; ".join(failures) + raise RuntimeError(msg) + return report + + +def print_human(report: dict[str, Any]) -> None: + print(f"status: {report['status']}") + print(f"manifest: {report['manifest']}") + print(f"capabilities: {report['capability_count']}") + print(f"tests: {report['test_count']}") + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description="Validate that the Rust rewrite supported-subset manifest matches collected fast-lane tests." + ) + parser.add_argument("--manifest", type=Path, default=DEFAULT_MANIFEST) + parser.add_argument("--json", action="store_true", help="Print JSON instead of a human summary.") + return parser.parse_args() + + +def main() -> int: + args = parse_args() + report = make_report(args) + if args.json: + print(json.dumps(report, indent=2, sort_keys=True)) + else: + print_human(report) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/rust-rewrite/tools/check_wheel_pinned_python_repo.py b/rust-rewrite/tools/check_wheel_pinned_python_repo.py new file mode 100644 index 000000000..d34e4a8da --- /dev/null +++ b/rust-rewrite/tools/check_wheel_pinned_python_repo.py @@ -0,0 +1,452 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +import argparse +import json +import os +import platform +import sys +import tempfile +from pathlib import Path +from typing import Any + +import benchmark_pinned_python_repo as python_benchmark +from benchmark_pinned_python_repo import DEFAULT_CACHE_DIR, parse_json_output, ratio, run +from check_wheel_pinned_typescript_repo import ( + SampledRun, + build_wheel, + git, + git_status, + graph_sitter_command, + run_sampled, +) + +REPO_ROOT = Path(__file__).resolve().parents[2] +DEFAULT_EXPECTED_SNAPSHOT = ( + REPO_ROOT / "rust-rewrite/golden/apache-airflow-2.10.5-rust-compact.json" +) + +SUMMARY_KEYS = ( + "files", + "symbols", + "classes", + "functions", + "global_variables", + "imports", + "references", + "external_references", + "dependencies", + "files_with_errors", +) + +PYTHON_TARGET_FILE = "airflow/__init__.py" +PYTHON_IMPORTED_LINE = "from typing import Any" +PYTHON_RENAMED_FUNCTION = "__getattr_wheel_proof__" + + +def load_expected_summary(path: Path) -> dict[str, int]: + snapshot = json.loads(path.read_text()) + summary = snapshot["summary"] + return {key: summary[key] for key in SUMMARY_KEYS} + + +def clone_mutable_checkout(cache_repo: Path, commit: str, *, destination: Path, repo_url: str, timeout: int) -> Path: + checkout = destination / "airflow-transform-repo" + git(["clone", "--shared", "--no-checkout", str(cache_repo), str(checkout)], cwd=REPO_ROOT, timeout=timeout) + git(["remote", "set-url", "origin", repo_url], cwd=checkout, timeout=timeout) + git(["checkout", "--detach", commit], cwd=checkout, timeout=timeout) + return checkout + + +def run_wheel_parse( + repo: Path, + wheel: Path, + args: argparse.Namespace, + *, + backend: str, + env: dict[str, str], +) -> tuple[dict[str, Any], SampledRun]: + fallback = "error" if backend == "rust" else args.python_backend_fallback + command = graph_sitter_command( + wheel, + args, + "parse", + str(repo), + "--language", + "python", + "--backend", + backend, + "--fallback", + fallback, + "--format", + "json", + ) + sampled = run_sampled( + command, + cwd=REPO_ROOT, + env=env, + sample_interval=args.sample_interval, + timeout=args.timeout, + ) + return parse_json_output(sampled.stdout), sampled + + +def write_airflow_transform(path: Path, *, renamed_function: str) -> None: + path.write_text( + f"""def rename(codebase): + target_file = codebase.get_file({PYTHON_TARGET_FILE!r}) + target_file.add_import({PYTHON_IMPORTED_LINE!r}) + target_file.get_function("__getattr__").rename({renamed_function!r}) + codebase.commit() +""", + encoding="utf-8", + ) + + +def run_wheel_transform( + repo: Path, + transform: Path, + wheel: Path, + args: argparse.Namespace, + *, + env: dict[str, str], +) -> SampledRun: + command = graph_sitter_command( + wheel, + args, + "transform", + f"{transform}:rename", + str(repo), + "--language", + "python", + "--backend", + "rust", + "--fallback", + "error", + "--write", + ) + return run_sampled( + command, + cwd=REPO_ROOT, + env=env, + sample_interval=args.sample_interval, + timeout=args.timeout, + ) + + +def validate_rust_payload( + *, + payload: dict[str, Any], + expected_summary: dict[str, int], + expected_commit: str, + actual_commit: str, +) -> dict[str, Any]: + failures = [] + if expected_commit and actual_commit != expected_commit: + failures.append(f"expected commit {expected_commit}, got {actual_commit}") + if payload.get("backend_requested") != "rust": + failures.append(f"expected backend_requested=rust, got {payload.get('backend_requested')}") + if payload.get("backend") != "rust": + failures.append(f"expected backend=rust, got {payload.get('backend')}") + if payload.get("language") != "python": + failures.append(f"expected language=python, got {payload.get('language')}") + if payload.get("rust_backend_error") is not None: + failures.append(f"expected no rust_backend_error, got {payload.get('rust_backend_error')}") + + actual_summary = {key: payload.get(key) for key in SUMMARY_KEYS} + count_mismatches = { + key: {"expected": expected, "actual": actual_summary[key]} + for key, expected in expected_summary.items() + if actual_summary[key] != expected + } + if count_mismatches: + failures.append(f"summary count mismatches: {count_mismatches}") + if payload.get("exports") != 0: + failures.append(f"expected exports=0, got {payload.get('exports')}") + if payload.get("subclass_edges") != 0: + failures.append(f"expected subclass_edges=0, got {payload.get('subclass_edges')}") + if failures: + raise RuntimeError("; ".join(failures)) + + return { + "status": "passed", + "matched_summary_keys": list(SUMMARY_KEYS), + "actual_summary": actual_summary, + } + + +def validate_python_payload(payload: dict[str, Any], expected_summary: dict[str, int]) -> dict[str, Any]: + failures = [] + if payload.get("backend_requested") != "python": + failures.append(f"expected backend_requested=python, got {payload.get('backend_requested')}") + if payload.get("backend") != "python": + failures.append(f"expected backend=python, got {payload.get('backend')}") + if payload.get("language") != "python": + failures.append(f"expected language=python, got {payload.get('language')}") + if failures: + raise RuntimeError("; ".join(failures)) + return { + "status": "passed", + "validated_keys": ["backend_requested", "backend", "language"], + "expected_rust_files": expected_summary["files"], + "python_files": payload.get("files"), + "python_to_rust_file_delta": payload.get("files", 0) - expected_summary["files"], + } + + +def validate_transform(checkout: Path, sampled: SampledRun, args: argparse.Namespace) -> dict[str, Any]: + target_content = (checkout / PYTHON_TARGET_FILE).read_text(encoding="utf-8") + status = git_status(checkout, timeout=args.timeout) + modified_paths = {line[2:].lstrip() for line in status if line[:2].strip() == "M"} + assertions = { + "added_import": PYTHON_IMPORTED_LINE in target_content, + "renamed_declaration": f"def {args.transform_new_name}(name: str):" in target_content, + "removed_original_declaration": "def __getattr__(name: str):" not in target_content, + "only_target_file_modified": modified_paths == {PYTHON_TARGET_FILE}, + "reported_applied_changes": "Changes have been applied" in sampled.stdout, + } + failed = [name for name, passed in assertions.items() if not passed] + if failed: + msg = ( + f"installed-wheel Airflow transform assertions failed: {', '.join(failed)}; " + f"git_status={status!r}" + ) + raise RuntimeError(msg) + return { + "status": "passed", + "target_file": PYTHON_TARGET_FILE, + "git_status": status, + "modified_paths": sorted(modified_paths), + "assertions": assertions, + } + + +def make_comparison( + *, + rust_payload: dict[str, Any], + rust_sampled: SampledRun, + python_payload: dict[str, Any], + python_sampled: SampledRun, + args: argparse.Namespace, +) -> dict[str, Any]: + comparison = { + "python_to_rust_parse_elapsed_ratio": ratio( + python_payload["elapsed_seconds"], + rust_payload["elapsed_seconds"], + ), + "python_to_rust_outer_wall_ratio": ratio( + python_sampled.wall_seconds, + rust_sampled.wall_seconds, + ), + "python_to_rust_sampled_rss_ratio": ratio( + python_sampled.rss_peak_mb, + rust_sampled.rss_peak_mb, + ), + "python_parse_elapsed_seconds": python_payload["elapsed_seconds"], + "rust_parse_elapsed_seconds": rust_payload["elapsed_seconds"], + "python_outer_wall_seconds": round(python_sampled.wall_seconds, 6), + "rust_outer_wall_seconds": round(rust_sampled.wall_seconds, 6), + "python_sampled_rss_peak_mb": round(python_sampled.rss_peak_mb, 3), + "rust_sampled_rss_peak_mb": round(rust_sampled.rss_peak_mb, 3), + "min_parse_elapsed_ratio": args.min_parse_elapsed_ratio, + "min_sampled_rss_ratio": args.min_sampled_rss_ratio, + } + failures = [] + if ( + comparison["python_to_rust_parse_elapsed_ratio"] is None + or comparison["python_to_rust_parse_elapsed_ratio"] < args.min_parse_elapsed_ratio + ): + failures.append( + "parse elapsed ratio " + f"{comparison['python_to_rust_parse_elapsed_ratio']}x is below required " + f"{args.min_parse_elapsed_ratio}x" + ) + if ( + comparison["python_to_rust_sampled_rss_ratio"] is None + or comparison["python_to_rust_sampled_rss_ratio"] < args.min_sampled_rss_ratio + ): + failures.append( + "sampled RSS ratio " + f"{comparison['python_to_rust_sampled_rss_ratio']}x is below required " + f"{args.min_sampled_rss_ratio}x" + ) + if failures: + raise RuntimeError("; ".join(failures)) + comparison["status"] = "passed" + return comparison + + +def make_report(args: argparse.Namespace) -> dict[str, Any]: + repo, actual_commit = python_benchmark.prepare_pinned_repo(args) + wheel = build_wheel(args) + expected_summary = load_expected_summary(args.expected_snapshot) + + with tempfile.TemporaryDirectory(prefix="graph-sitter-uvx-airflow-") as scratch: + env = os.environ.copy() + uv_cache_dir = Path(scratch) / "uv-cache" + uv_cache_dir.mkdir() + env["UV_CACHE_DIR"] = str(uv_cache_dir) + + run(graph_sitter_command(wheel, args, "--help"), cwd=REPO_ROOT, env=env, timeout=args.timeout) + payload, rust_sampled = run_wheel_parse(repo, wheel, args, backend="rust", env=env) + python_payload = None + python_sampled = None + python_validation = None + comparison = None + transform_report = None + if args.compare_python_backend: + python_payload, python_sampled = run_wheel_parse(repo, wheel, args, backend="python", env=env) + python_validation = validate_python_payload(python_payload, expected_summary) + comparison = make_comparison( + rust_payload=payload, + rust_sampled=rust_sampled, + python_payload=python_payload, + python_sampled=python_sampled, + args=args, + ) + if args.run_transform_proof: + mutable_checkout = clone_mutable_checkout( + repo, + actual_commit, + destination=Path(scratch), + repo_url=args.repo_url, + timeout=args.timeout, + ) + transform = Path(scratch) / "rename_airflow.py" + write_airflow_transform(transform, renamed_function=args.transform_new_name) + transform_sampled = run_wheel_transform(mutable_checkout, transform, wheel, args, env=env) + transform_validation = validate_transform(mutable_checkout, transform_sampled, args) + transform_report = { + "process": transform_sampled.as_report(), + "renamed_function": args.transform_new_name, + "validation": transform_validation, + } + + validation = validate_rust_payload( + payload=payload, + expected_summary=expected_summary, + expected_commit=args.expected_commit, + actual_commit=actual_commit, + ) + report = { + "metadata": { + "name": args.name, + "repo_url": args.repo_url, + "ref": args.ref, + "commit": actual_commit, + "checkout": str(repo), + "wheel": str(wheel), + "expected_snapshot": str(args.expected_snapshot), + "uvx_python_version": args.python_version, + "python": sys.version, + "platform": platform.platform(), + "sample_interval_seconds": args.sample_interval, + }, + "timings": { + "parse_elapsed_seconds": payload["elapsed_seconds"], + "uvx_outer_wall_seconds": round(rust_sampled.wall_seconds, 6), + "uvx_sampled_rss_peak_mb": round(rust_sampled.rss_peak_mb, 3), + }, + "parse": payload, + "rust_process": rust_sampled.as_report(), + "expected_summary": expected_summary, + "validation": validation, + } + if python_payload is not None and python_sampled is not None: + report["python_backend"] = { + "parse": python_payload, + "process": python_sampled.as_report(), + "validation": python_validation, + } + if comparison is not None: + report["comparison"] = comparison + if transform_report is not None: + report["transform"] = transform_report + return report + + +def print_human(report: dict[str, Any]) -> None: + metadata = report["metadata"] + timings = report["timings"] + summary = report["validation"]["actual_summary"] + print(f"repo: {metadata['name']} {metadata['commit']}") + print(f"checkout: {metadata['checkout']}") + print(f"wheel: {metadata['wheel']}") + print( + "uvx parse: " + f"elapsed={timings['parse_elapsed_seconds']:.3f}s " + f"outer_wall={timings['uvx_outer_wall_seconds']:.3f}s " + f"rss_peak={timings['uvx_sampled_rss_peak_mb']:.1f} MB" + ) + print( + "counts: " + f"files={summary['files']} symbols={summary['symbols']} " + f"imports={summary['imports']} references={summary['references']} " + f"dependencies={summary['dependencies']} files_with_errors={summary['files_with_errors']}" + ) + print("validation: matched committed Airflow Python golden summary") + comparison = report.get("comparison") + if comparison is not None: + print( + "installed-wheel ratios: " + f"parse_elapsed={comparison['python_to_rust_parse_elapsed_ratio']}x " + f"outer_wall={comparison['python_to_rust_outer_wall_ratio']}x " + f"sampled_rss={comparison['python_to_rust_sampled_rss_ratio']}x" + ) + transform = report.get("transform") + if transform is not None: + process = transform["process"] + validation = transform["validation"] + print( + "uvx transform: " + f"wall={process['wall_seconds']:.3f}s " + f"rss_peak={process['rss_peak_mb']:.1f} MB " + f"modified={', '.join(validation['git_status'])}" + ) + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description=( + "Build or reuse a graph-sitter wheel, run it through uvx against " + "pinned Airflow, and compare strict Rust parse counts with the " + "committed Python golden snapshot." + ) + ) + parser.add_argument("--name", default=python_benchmark.DEFAULT_REPO_NAME) + parser.add_argument("--repo-url", default=python_benchmark.DEFAULT_REPO_URL) + parser.add_argument("--ref", default=python_benchmark.DEFAULT_REF) + parser.add_argument("--expected-commit", default=python_benchmark.DEFAULT_EXPECTED_COMMIT) + parser.add_argument("--cache-dir", type=Path, default=DEFAULT_CACHE_DIR) + parser.add_argument("--reset-checkout", action="store_true") + parser.add_argument("--skip-fetch", action="store_true") + parser.add_argument("--timeout", type=int, default=900) + parser.add_argument("--python-version", default=os.environ.get("PYTHON_VERSION", "3.13")) + parser.add_argument("--wheel", type=Path) + parser.add_argument("--expected-snapshot", type=Path, default=DEFAULT_EXPECTED_SNAPSHOT) + parser.add_argument("--sample-interval", type=float, default=0.02) + parser.add_argument("--compare-python-backend", action="store_true") + parser.add_argument("--python-backend-fallback", choices=["error", "python"], default="error") + parser.add_argument("--min-parse-elapsed-ratio", type=float, default=1.0) + parser.add_argument("--min-sampled-rss-ratio", type=float, default=1.0) + parser.add_argument("--run-transform-proof", action="store_true") + parser.add_argument("--transform-new-name", default=PYTHON_RENAMED_FUNCTION) + parser.add_argument("--output", type=Path) + parser.add_argument("--json", action="store_true") + return parser.parse_args() + + +def main() -> None: + args = parse_args() + report = make_report(args) + if args.output: + args.output.parent.mkdir(parents=True, exist_ok=True) + args.output.write_text(json.dumps(report, indent=2, sort_keys=True) + "\n") + if args.json: + print(json.dumps(report, indent=2, sort_keys=True)) + else: + print_human(report) + + +if __name__ == "__main__": + main() diff --git a/rust-rewrite/tools/check_wheel_pinned_typescript_repo.py b/rust-rewrite/tools/check_wheel_pinned_typescript_repo.py new file mode 100644 index 000000000..b781b11d1 --- /dev/null +++ b/rust-rewrite/tools/check_wheel_pinned_typescript_repo.py @@ -0,0 +1,697 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +import argparse +import json +import os +import platform +import subprocess +import sys +import tempfile +import time +from dataclasses import dataclass +from pathlib import Path +from typing import Any + +import benchmark_pinned_typescript_repo as typescript_benchmark +from benchmark_pinned_python_repo import ( + DEFAULT_CACHE_DIR, + parse_json_output, + prepare_pinned_repo, + ratio, + run, +) + +REPO_ROOT = Path(__file__).resolve().parents[2] +DEFAULT_EXPECTED_SNAPSHOT = ( + REPO_ROOT / "rust-rewrite/golden/next.js-v15.0.0-rust-compact-typescript.json" +) + +SUMMARY_KEYS = ( + "files", + "symbols", + "classes", + "functions", + "global_variables", + "imports", + "exports", + "references", + "external_references", + "dependencies", + "subclass_edges", + "files_with_errors", +) + +TYPESCRIPT_TARGET_FILE = "packages/next/src/client/components/app-router-announcer.tsx" +TYPESCRIPT_USAGE_FILE = "packages/next/src/client/components/app-router.tsx" +TYPESCRIPT_IMPORTED_LINE = "import { act } from 'react-dom/test-utils';" +TYPESCRIPT_RENAMED_FUNCTION = "AppRouterAnnouncerWheelProof" + + +@dataclass +class SampledRun: + command: list[str] + wall_seconds: float + rss_peak_mb: float + stdout: str + stderr: str + + def as_report(self) -> dict[str, Any]: + return { + "command": " ".join(self.command), + "wall_seconds": round(self.wall_seconds, 6), + "rss_peak_mb": round(self.rss_peak_mb, 3), + "stderr": self.stderr.strip(), + } + + +def bytes_to_mb(value: float) -> float: + return value / (1024 * 1024) + + +def build_wheel(args: argparse.Namespace) -> Path: + if args.wheel is not None: + wheel = args.wheel.resolve() + if not wheel.exists(): + msg = f"wheel does not exist: {wheel}" + raise FileNotFoundError(msg) + return wheel + + for wheel in (REPO_ROOT / "dist").glob("graph_sitter-*.whl"): + wheel.unlink() + run(["uv", "build", "--wheel"], cwd=REPO_ROOT, timeout=args.timeout) + wheels = sorted( + (REPO_ROOT / "dist").glob("graph_sitter-*.whl"), + key=lambda path: path.stat().st_mtime, + reverse=True, + ) + if not wheels: + msg = "uv build --wheel did not produce a graph-sitter wheel" + raise FileNotFoundError(msg) + return wheels[0].resolve() + + +def load_expected_summary(path: Path) -> dict[str, int]: + snapshot = json.loads(path.read_text()) + summary = snapshot["summary"] + return {key: summary[key] for key in SUMMARY_KEYS} + + +def git(command: list[str], *, cwd: Path, timeout: int) -> str: + result = run(["git", *command], cwd=cwd, timeout=timeout) + return result.stdout.strip() + + +def clone_mutable_checkout(cache_repo: Path, commit: str, *, destination: Path, repo_url: str, timeout: int) -> Path: + checkout = destination / "nextjs-transform-repo" + git(["clone", "--shared", "--no-checkout", str(cache_repo), str(checkout)], cwd=REPO_ROOT, timeout=timeout) + git(["remote", "set-url", "origin", repo_url], cwd=checkout, timeout=timeout) + git(["checkout", "--detach", commit], cwd=checkout, timeout=timeout) + return checkout + + +def git_status(checkout: Path, *, timeout: int) -> list[str]: + return [line for line in git(["status", "--porcelain"], cwd=checkout, timeout=timeout).splitlines() if line] + + +def process_tree_rss(process: Any) -> int: + import psutil + + rss = 0 + processes = [process] + try: + processes.extend(process.children(recursive=True)) + except psutil.Error: + pass + for candidate in processes: + try: + rss += int(candidate.memory_info().rss) + except psutil.Error: + continue + return rss + + +def kill_process_tree(process: Any) -> None: + import psutil + + try: + children = process.children(recursive=True) + except psutil.Error: + children = [] + for child in children: + try: + child.kill() + except psutil.Error: + continue + try: + process.kill() + except psutil.Error: + pass + + +def run_sampled( + command: list[str], + *, + cwd: Path, + env: dict[str, str], + sample_interval: float, + timeout: int, +) -> SampledRun: + import psutil + + started = time.perf_counter() + process = subprocess.Popen( + command, + cwd=cwd, + env=env, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + ps_process = psutil.Process(process.pid) + rss_peak = 0 + while process.poll() is None: + rss_peak = max(rss_peak, process_tree_rss(ps_process)) + if time.perf_counter() - started > timeout: + kill_process_tree(ps_process) + stdout, stderr = process.communicate() + raise subprocess.TimeoutExpired(command, timeout, output=stdout, stderr=stderr) + time.sleep(sample_interval) + stdout, stderr = process.communicate() + rss_peak = max(rss_peak, process_tree_rss(ps_process)) + wall_seconds = time.perf_counter() - started + if process.returncode != 0: + msg = ( + f"command failed with exit {process.returncode}: {' '.join(command)}\n" + f"stdout:\n{stdout}\n" + f"stderr:\n{stderr}" + ) + raise RuntimeError(msg) + return SampledRun( + command=command, + wall_seconds=wall_seconds, + rss_peak_mb=bytes_to_mb(rss_peak), + stdout=stdout, + stderr=stderr, + ) + + +def graph_sitter_command(wheel: Path, args: argparse.Namespace, *graph_sitter_args: str) -> list[str]: + return [ + "uvx", + "--python", + args.python_version, + "--from", + str(wheel), + "graph-sitter", + *graph_sitter_args, + ] + + +def run_wheel_parse( + repo: Path, + wheel: Path, + args: argparse.Namespace, + *, + backend: str, + env: dict[str, str], +) -> tuple[dict[str, Any], SampledRun]: + fallback = "error" if backend == "rust" else args.python_backend_fallback + command = graph_sitter_command( + wheel, + args, + "parse", + str(repo), + "--language", + "typescript", + "--backend", + backend, + "--fallback", + fallback, + "--format", + "json", + ) + sampled = run_sampled( + command, + cwd=REPO_ROOT, + env=env, + sample_interval=args.sample_interval, + timeout=args.timeout, + ) + return parse_json_output(sampled.stdout), sampled + + +def write_nextjs_transform(path: Path, *, renamed_function: str) -> None: + path.write_text( + f"""def rename(codebase): + target_file = codebase.get_file({TYPESCRIPT_TARGET_FILE!r}) + target_file.add_import({TYPESCRIPT_IMPORTED_LINE!r}) + target_file.get_function("AppRouterAnnouncer").rename({renamed_function!r}) + codebase.commit() +""", + encoding="utf-8", + ) + + +def run_wheel_transform( + repo: Path, + transform: Path, + wheel: Path, + args: argparse.Namespace, + *, + env: dict[str, str], +) -> SampledRun: + command = graph_sitter_command( + wheel, + args, + "transform", + f"{transform}:rename", + str(repo), + "--language", + "typescript", + "--backend", + "rust", + "--fallback", + "error", + "--write", + ) + return run_sampled( + command, + cwd=REPO_ROOT, + env=env, + sample_interval=args.sample_interval, + timeout=args.timeout, + ) + + +def validate_transform(checkout: Path, sampled: SampledRun, args: argparse.Namespace) -> dict[str, Any]: + target_content = (checkout / TYPESCRIPT_TARGET_FILE).read_text(encoding="utf-8") + usage_content = (checkout / TYPESCRIPT_USAGE_FILE).read_text(encoding="utf-8") + status = git_status(checkout, timeout=args.timeout) + modified_paths = {line[2:].lstrip() for line in status if line[:2].strip() == "M"} + expected_modified_paths = { + TYPESCRIPT_TARGET_FILE, + TYPESCRIPT_USAGE_FILE, + } + assertions = { + "added_import": TYPESCRIPT_IMPORTED_LINE in target_content, + "renamed_declaration": f"export function {args.transform_new_name}" in target_content, + "removed_original_declaration": "export function AppRouterAnnouncer(" not in target_content, + "rewrote_importing_usage": args.transform_new_name in usage_content, + "only_expected_files_modified": modified_paths == expected_modified_paths, + "reported_applied_changes": "Changes have been applied" in sampled.stdout, + } + failed = [name for name, passed in assertions.items() if not passed] + if failed: + msg = ( + f"installed-wheel Next.js transform assertions failed: {', '.join(failed)}; " + f"git_status={status!r}" + ) + raise RuntimeError(msg) + return { + "status": "passed", + "target_file": TYPESCRIPT_TARGET_FILE, + "usage_file": TYPESCRIPT_USAGE_FILE, + "git_status": status, + "modified_paths": sorted(modified_paths), + "assertions": assertions, + } + + +def validate_payload( + *, + payload: dict[str, Any], + expected_summary: dict[str, int], + expected_commit: str, + actual_commit: str, +) -> dict[str, Any]: + failures: list[str] = [] + if expected_commit and actual_commit != expected_commit: + failures.append(f"expected commit {expected_commit}, got {actual_commit}") + if payload.get("backend_requested") != "rust": + failures.append(f"expected backend_requested=rust, got {payload.get('backend_requested')}") + if payload.get("backend") != "rust": + failures.append(f"expected backend=rust, got {payload.get('backend')}") + if payload.get("language") != "typescript": + failures.append(f"expected language=typescript, got {payload.get('language')}") + if payload.get("rust_backend_error") is not None: + failures.append(f"expected no rust_backend_error, got {payload.get('rust_backend_error')}") + + actual_summary = {key: payload.get(key) for key in SUMMARY_KEYS} + count_mismatches = { + key: {"expected": expected, "actual": actual_summary[key]} + for key, expected in expected_summary.items() + if actual_summary[key] != expected + } + if count_mismatches: + failures.append(f"summary count mismatches: {count_mismatches}") + + if failures: + raise RuntimeError("; ".join(failures)) + + return { + "status": "passed", + "matched_summary_keys": list(SUMMARY_KEYS), + "actual_summary": actual_summary, + } + + +def validate_python_payload(payload: dict[str, Any], expected_summary: dict[str, int]) -> dict[str, Any]: + failures = [] + if payload.get("backend_requested") != "python": + failures.append(f"expected backend_requested=python, got {payload.get('backend_requested')}") + if payload.get("backend") != "python": + failures.append(f"expected backend=python, got {payload.get('backend')}") + if payload.get("language") != "typescript": + failures.append(f"expected language=typescript, got {payload.get('language')}") + if failures: + raise RuntimeError("; ".join(failures)) + return { + "status": "passed", + "validated_keys": ["backend_requested", "backend", "language"], + "expected_rust_files": expected_summary["files"], + "python_files": payload.get("files"), + "python_to_rust_file_delta": payload.get("files", 0) - expected_summary["files"], + } + + +def make_comparison( + *, + rust_payload: dict[str, Any], + rust_sampled: SampledRun, + python_payload: dict[str, Any], + python_sampled: SampledRun, + args: argparse.Namespace, +) -> dict[str, Any]: + comparison = { + "python_to_rust_parse_elapsed_ratio": ratio( + python_payload["elapsed_seconds"], + rust_payload["elapsed_seconds"], + ), + "python_to_rust_outer_wall_ratio": ratio( + python_sampled.wall_seconds, + rust_sampled.wall_seconds, + ), + "python_to_rust_sampled_rss_ratio": ratio( + python_sampled.rss_peak_mb, + rust_sampled.rss_peak_mb, + ), + "python_parse_elapsed_seconds": python_payload["elapsed_seconds"], + "rust_parse_elapsed_seconds": rust_payload["elapsed_seconds"], + "python_outer_wall_seconds": round(python_sampled.wall_seconds, 6), + "rust_outer_wall_seconds": round(rust_sampled.wall_seconds, 6), + "python_sampled_rss_peak_mb": round(python_sampled.rss_peak_mb, 3), + "rust_sampled_rss_peak_mb": round(rust_sampled.rss_peak_mb, 3), + "min_parse_elapsed_ratio": args.min_parse_elapsed_ratio, + "min_sampled_rss_ratio": args.min_sampled_rss_ratio, + } + failures = [] + if ( + comparison["python_to_rust_parse_elapsed_ratio"] is None + or comparison["python_to_rust_parse_elapsed_ratio"] < args.min_parse_elapsed_ratio + ): + failures.append( + "parse elapsed ratio " + f"{comparison['python_to_rust_parse_elapsed_ratio']}x is below required " + f"{args.min_parse_elapsed_ratio}x" + ) + if ( + comparison["python_to_rust_sampled_rss_ratio"] is None + or comparison["python_to_rust_sampled_rss_ratio"] < args.min_sampled_rss_ratio + ): + failures.append( + "sampled RSS ratio " + f"{comparison['python_to_rust_sampled_rss_ratio']}x is below required " + f"{args.min_sampled_rss_ratio}x" + ) + if failures: + raise RuntimeError("; ".join(failures)) + comparison["status"] = "passed" + return comparison + + +def make_report(args: argparse.Namespace) -> dict[str, Any]: + repo, actual_commit = prepare_pinned_repo(args) + wheel = build_wheel(args) + expected_summary = load_expected_summary(args.expected_snapshot) + with tempfile.TemporaryDirectory(prefix="graph-sitter-uvx-nextjs-") as scratch: + env = os.environ.copy() + uv_cache_dir = Path(scratch) / "uv-cache" + uv_cache_dir.mkdir() + env["UV_CACHE_DIR"] = str(uv_cache_dir) + + run( + graph_sitter_command(wheel, args, "--help"), + cwd=REPO_ROOT, + env=env, + timeout=args.timeout, + ) + payload, rust_sampled = run_wheel_parse( + repo, + wheel, + args, + backend="rust", + env=env, + ) + python_payload = None + python_sampled = None + python_validation = None + comparison = None + transform_report = None + if args.compare_python_backend: + python_payload, python_sampled = run_wheel_parse( + repo, + wheel, + args, + backend="python", + env=env, + ) + python_validation = validate_python_payload(python_payload, expected_summary) + comparison = make_comparison( + rust_payload=payload, + rust_sampled=rust_sampled, + python_payload=python_payload, + python_sampled=python_sampled, + args=args, + ) + if args.run_transform_proof: + mutable_checkout = clone_mutable_checkout( + repo, + actual_commit, + destination=Path(scratch), + repo_url=args.repo_url, + timeout=args.timeout, + ) + transform = Path(scratch) / "rename_nextjs.py" + write_nextjs_transform(transform, renamed_function=args.transform_new_name) + transform_sampled = run_wheel_transform( + mutable_checkout, + transform, + wheel, + args, + env=env, + ) + transform_validation = validate_transform(mutable_checkout, transform_sampled, args) + transform_report = { + "process": transform_sampled.as_report(), + "renamed_function": args.transform_new_name, + "validation": transform_validation, + } + validation = validate_payload( + payload=payload, + expected_summary=expected_summary, + expected_commit=args.expected_commit, + actual_commit=actual_commit, + ) + + report = { + "metadata": { + "name": args.name, + "repo_url": args.repo_url, + "ref": args.ref, + "commit": actual_commit, + "checkout": str(repo), + "wheel": str(wheel), + "expected_snapshot": str(args.expected_snapshot), + "uvx_python_version": args.python_version, + "python": sys.version, + "platform": platform.platform(), + "sample_interval_seconds": args.sample_interval, + }, + "timings": { + "parse_elapsed_seconds": payload["elapsed_seconds"], + "uvx_outer_wall_seconds": round(rust_sampled.wall_seconds, 6), + "uvx_sampled_rss_peak_mb": round(rust_sampled.rss_peak_mb, 3), + }, + "parse": payload, + "rust_process": rust_sampled.as_report(), + "expected_summary": expected_summary, + "validation": validation, + } + if python_payload is not None and python_sampled is not None: + report["python_backend"] = { + "parse": python_payload, + "process": python_sampled.as_report(), + "validation": python_validation, + } + if comparison is not None: + report["comparison"] = comparison + if transform_report is not None: + report["transform"] = transform_report + return report + + +def print_human(report: dict[str, Any]) -> None: + metadata = report["metadata"] + timings = report["timings"] + summary = report["validation"]["actual_summary"] + + print(f"repo: {metadata['name']} {metadata['commit']}") + print(f"checkout: {metadata['checkout']}") + print(f"wheel: {metadata['wheel']}") + print( + "uvx parse: " + f"elapsed={timings['parse_elapsed_seconds']:.3f}s " + f"outer_wall={timings['uvx_outer_wall_seconds']:.3f}s " + f"rss_peak={timings['uvx_sampled_rss_peak_mb']:.1f} MB" + ) + print( + "counts: " + f"files={summary['files']} symbols={summary['symbols']} " + f"imports={summary['imports']} exports={summary['exports']} " + f"references={summary['references']} dependencies={summary['dependencies']} " + f"files_with_errors={summary['files_with_errors']}" + ) + print("validation: matched committed Next.js TypeScript golden summary") + comparison = report.get("comparison") + if comparison is not None: + print( + "installed-wheel ratios: " + f"parse_elapsed={comparison['python_to_rust_parse_elapsed_ratio']}x " + f"outer_wall={comparison['python_to_rust_outer_wall_ratio']}x " + f"sampled_rss={comparison['python_to_rust_sampled_rss_ratio']}x" + ) + transform = report.get("transform") + if transform is not None: + process = transform["process"] + validation = transform["validation"] + print( + "uvx transform: " + f"wall={process['wall_seconds']:.3f}s " + f"rss_peak={process['rss_peak_mb']:.1f} MB " + f"modified={', '.join(validation['git_status'])}" + ) + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description=( + "Build or reuse a graph-sitter wheel, run it through uvx against " + "pinned Next.js, and compare strict Rust parse counts with the " + "committed TypeScript golden snapshot." + ) + ) + parser.add_argument("--name", default=typescript_benchmark.DEFAULT_REPO_NAME) + parser.add_argument("--repo-url", default=typescript_benchmark.DEFAULT_REPO_URL) + parser.add_argument("--ref", default=typescript_benchmark.DEFAULT_REF) + parser.add_argument( + "--expected-commit", + default=typescript_benchmark.DEFAULT_EXPECTED_COMMIT, + help="Expected resolved commit SHA. Pass an empty string to disable.", + ) + parser.add_argument( + "--cache-dir", + type=Path, + default=DEFAULT_CACHE_DIR, + help="Directory for reusable pinned checkouts.", + ) + parser.add_argument( + "--reset-checkout", + action="store_true", + help="Delete and recreate the cached checkout before running.", + ) + parser.add_argument( + "--skip-fetch", + action="store_true", + help="Do not fetch before checkout; useful for offline reruns with FETCH_HEAD present.", + ) + parser.add_argument( + "--timeout", + type=int, + default=900, + help="Timeout in seconds for clone/build/uvx child commands.", + ) + parser.add_argument( + "--python-version", + default=os.environ.get("PYTHON_VERSION", "3.13"), + help="Python version passed to uvx.", + ) + parser.add_argument( + "--wheel", + type=Path, + help="Existing wheel to test. If omitted, the script builds one with uv build --wheel.", + ) + parser.add_argument( + "--expected-snapshot", + type=Path, + default=DEFAULT_EXPECTED_SNAPSHOT, + help="Committed compact TypeScript golden snapshot to compare summary counts against.", + ) + parser.add_argument( + "--sample-interval", + type=float, + default=0.02, + help="RSS sampling interval for uvx process-tree measurements.", + ) + parser.add_argument( + "--compare-python-backend", + action="store_true", + help="Also run the installed wheel with --backend python and compare wall/RSS against strict Rust.", + ) + parser.add_argument( + "--python-backend-fallback", + choices=["error", "python"], + default="error", + help="Fallback flag passed to the Python backend parse baseline.", + ) + parser.add_argument( + "--min-parse-elapsed-ratio", + type=float, + default=1.0, + help="Minimum Python/Rust parse elapsed ratio when --compare-python-backend is enabled.", + ) + parser.add_argument( + "--min-sampled-rss-ratio", + type=float, + default=1.0, + help="Minimum Python/Rust sampled process-tree RSS ratio when --compare-python-backend is enabled.", + ) + parser.add_argument( + "--run-transform-proof", + action="store_true", + help="Run a strict Rust installed-wheel transform against a temporary clone of pinned Next.js.", + ) + parser.add_argument( + "--transform-new-name", + default=TYPESCRIPT_RENAMED_FUNCTION, + help="New function name used by the optional pinned Next.js transform proof.", + ) + parser.add_argument("--output", type=Path, help="Optional path to write the JSON report.") + parser.add_argument("--json", action="store_true", help="Print the full JSON report.") + return parser.parse_args() + + +def main() -> None: + args = parse_args() + report = make_report(args) + if args.output: + args.output.parent.mkdir(parents=True, exist_ok=True) + args.output.write_text(json.dumps(report, indent=2, sort_keys=True) + "\n") + if args.json: + print(json.dumps(report, indent=2, sort_keys=True)) + else: + print_human(report) + + +if __name__ == "__main__": + main() diff --git a/rust-rewrite/tools/check_wheel_rust_backend.sh b/rust-rewrite/tools/check_wheel_rust_backend.sh new file mode 100755 index 000000000..4e4abba55 --- /dev/null +++ b/rust-rewrite/tools/check_wheel_rust_backend.sh @@ -0,0 +1,527 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" +cd "$ROOT" + +PYTHON_VERSION="${PYTHON_VERSION:-3.13}" + +run_python() { + if command -v python >/dev/null 2>&1; then + python "$@" + elif command -v python3 >/dev/null 2>&1; then + python3 "$@" + else + uv run python "$@" + fi +} + +usage() { + echo "usage: $0 [--wheel PATH]" >&2 +} + +WHEEL="${GRAPH_SITTER_WHEEL:-}" +if [[ "$#" -gt 0 ]]; then + case "$1" in + --wheel) + if [[ "$#" -ne 2 ]]; then + usage + exit 2 + fi + WHEEL="$2" + ;; + *) + usage + exit 2 + ;; + esac +fi + +if [[ -n "$WHEEL" ]]; then + if [[ ! -f "$WHEEL" ]]; then + echo "Wheel does not exist: $WHEEL" >&2 + exit 1 + fi +else + rm -f dist/graph_sitter-*.whl + uv build --wheel + WHEEL="$(ls -t dist/graph_sitter-*.whl | head -n 1)" +fi + +if [[ -z "$WHEEL" ]]; then + echo "No graph-sitter wheel was built" >&2 + exit 1 +fi +WHEEL="$(run_python -c 'from pathlib import Path; import sys; print(Path(sys.argv[1]).resolve())' "$WHEEL")" + +SCRATCH="$(mktemp -d)" +trap 'rm -rf "$SCRATCH"' EXIT + +UV_CACHE_DIR="$SCRATCH/uv-cache" +mkdir -p "$UV_CACHE_DIR" +export UV_CACHE_DIR +INVOKE_DIR="$SCRATCH/invoke" +mkdir -p "$INVOKE_DIR" + +run_graph_sitter() { + (cd "$INVOKE_DIR" && env -u PYTHONPATH uvx --python "$PYTHON_VERSION" --from "$WHEEL" graph-sitter "$@") +} + +run_gs() { + (cd "$INVOKE_DIR" && env -u PYTHONPATH uvx --python "$PYTHON_VERSION" --from "$WHEEL" gs "$@") +} + +run_python - "$WHEEL" <<'PY' +import configparser +from pathlib import Path +import sys +import zipfile + +wheel = sys.argv[1] +with zipfile.ZipFile(wheel) as archive: + names = archive.namelist() + if not any(name.startswith("graph_sitter_py") and name.endswith((".so", ".pyd")) for name in names): + msg = f"wheel does not include graph_sitter_py extension: {wheel}" + raise AssertionError(msg) + if "codemods/codemod.py" not in names: + msg = f"wheel does not include codemods package: {wheel}" + raise AssertionError(msg) + entrypoint_names = [name for name in names if name.endswith(".dist-info/entry_points.txt")] + if len(entrypoint_names) != 1: + msg = f"wheel does not include exactly one entry_points.txt file: {wheel}" + raise AssertionError(msg) + entrypoints = configparser.ConfigParser() + entrypoints.read_string(archive.read(entrypoint_names[0]).decode()) + console_scripts = entrypoints["console_scripts"] + expected_script = "graph_sitter.cli.cli:main" + for script_name in ["graph-sitter", "gs"]: + if console_scripts.get(script_name) != expected_script: + msg = f"wheel console script {script_name!r} does not point at {expected_script}: {wheel}" + raise AssertionError(msg) + wheel_metadata_names = [name for name in names if name.endswith(".dist-info/WHEEL")] + if len(wheel_metadata_names) != 1: + msg = f"wheel does not include exactly one WHEEL metadata file: {wheel}" + raise AssertionError(msg) + wheel_metadata = archive.read(wheel_metadata_names[0]).decode() + tags = [ + line.removeprefix("Tag: ").strip() + for line in wheel_metadata.splitlines() + if line.startswith("Tag: ") + ] + if "Root-Is-Purelib: false" not in wheel_metadata: + msg = f"wheel metadata still marks the Rust-backed artifact pure: {wheel}" + raise AssertionError(msg) + if not tags or any(tag.endswith("-none-any") for tag in tags): + msg = f"wheel metadata includes misleading pure-Python tags {tags}: {wheel}" + raise AssertionError(msg) + if Path(wheel).name.endswith("-none-any.whl"): + msg = f"wheel filename includes a pure-Python tag despite graph_sitter_py: {wheel}" + raise AssertionError(msg) +PY + +REPO="$SCRATCH/repo" +git init "$REPO" >/dev/null +git -C "$REPO" config user.email test@example.com +git -C "$REPO" config user.name "Test User" +mkdir -p "$REPO/pkg" +printf '' > "$REPO/pkg/__init__.py" +cat > "$REPO/pkg/service.py" <<'PY' +import os + + +class Service: + pass + + +def run(): + return os.getcwd() +PY +git -C "$REPO" add . +git -C "$REPO" commit -m initial >/dev/null + +run_graph_sitter --help >/dev/null +run_gs --help >/dev/null + +DOCTOR_PYTHON_OUTPUT="$(run_graph_sitter doctor --backend rust --language python --json)" +run_python - "$DOCTOR_PYTHON_OUTPUT" <<'PY' +import json +import sys + +payload = json.loads(sys.argv[1]) +assert payload["ok"] is True, payload +assert payload["backend_requested"] == "rust", payload +assert payload["language_requested"] == "python", payload +assert payload["rust_extension"]["ok"] is True, payload +smoke = payload["rust_parse_smoke"] +assert smoke["ok"] is True, payload +assert smoke["backend"] == "rust", payload +assert smoke["language"] == "python", payload +assert smoke["files"] == 1, payload +assert smoke["symbols"] == 1, payload +assert smoke["files_with_errors"] == 0, payload +assert smoke["rust_backend_error"] in (None, ""), payload +PY + +DOCTOR_TYPESCRIPT_OUTPUT="$(run_graph_sitter doctor --backend rust --language typescript --json)" +run_python - "$DOCTOR_TYPESCRIPT_OUTPUT" <<'PY' +import json +import sys + +payload = json.loads(sys.argv[1]) +assert payload["ok"] is True, payload +assert payload["backend_requested"] == "rust", payload +assert payload["language_requested"] == "typescript", payload +assert payload["rust_extension"]["ok"] is True, payload +smoke = payload["rust_parse_smoke"] +assert smoke["ok"] is True, payload +assert smoke["backend"] == "rust", payload +assert smoke["language"] == "typescript", payload +assert smoke["files"] == 1, payload +assert smoke["symbols"] == 1, payload +assert smoke["files_with_errors"] == 0, payload +assert smoke["rust_backend_error"] in (None, ""), payload +PY + +PYTHON_OUTPUT="$(run_graph_sitter parse "$REPO" --language python --backend python --format json)" +run_python - "$PYTHON_OUTPUT" <<'PY' +import json +import sys + +payload = json.loads(sys.argv[1]) +assert payload["backend"] == "python", payload +assert payload["backend_requested"] == "python", payload +assert payload["files"] == 2, payload +assert payload["classes"] == 1, payload +assert payload["functions"] == 1, payload +assert payload["imports"] == 1, payload +PY + +OUTPUT="$(run_graph_sitter parse "$REPO" --language python --backend rust --fallback error --format json)" +run_python - "$OUTPUT" "$REPO" <<'PY' +import json +from pathlib import Path +import sys + +payload = json.loads(sys.argv[1]) +repo = Path(sys.argv[2]).resolve() +assert payload["schema_version"] == 1, payload +assert Path(payload["path"]).resolve() == repo, payload +assert payload["backend"] == "rust", payload +assert payload["backend_requested"] == "rust", payload +assert payload["language"] == "python", payload +assert isinstance(payload["elapsed_seconds"], float), payload +assert payload["elapsed_seconds"] >= 0, payload +assert payload["subdirectories"] is None, payload +assert payload["files"] == 2, payload +assert payload["classes"] == 1, payload +assert payload["functions"] == 1, payload +assert payload["imports"] == 1, payload +assert payload["rust_backend_error"] in (None, ""), payload +PY + +PARSE_OUTPUT_FILE="$SCRATCH/python-rust-parse.json" +PARSE_STDOUT="$(run_graph_sitter parse "$REPO" --language python --backend rust --fallback error --format json --subdir pkg --output "$PARSE_OUTPUT_FILE")" +if [[ -n "$PARSE_STDOUT" ]]; then + echo "Expected parse --output stdout to be empty" >&2 + echo "$PARSE_STDOUT" >&2 + exit 1 +fi +run_python - "$PARSE_OUTPUT_FILE" "$REPO" <<'PY' +import json +from pathlib import Path +import sys + +output_path = Path(sys.argv[1]) +repo = Path(sys.argv[2]).resolve() +raw = output_path.read_bytes() +assert raw.endswith(b"\n"), raw +payload = json.loads(raw) +assert payload["schema_version"] == 1, payload +assert Path(payload["path"]).resolve() == repo, payload +assert payload["backend_requested"] == "rust", payload +assert payload["backend"] == "rust", payload +assert payload["language"] == "python", payload +assert isinstance(payload["elapsed_seconds"], float), payload +assert payload["elapsed_seconds"] >= 0, payload +assert payload["subdirectories"] == ["pkg/"], payload +assert payload["files"] == 2, payload +assert payload["classes"] == 1, payload +assert payload["functions"] == 1, payload +assert payload["imports"] == 1, payload +assert payload["rust_backend_error"] in (None, ""), payload +PY + +TRANSFORM="$SCRATCH/rename_transform.py" +cat > "$TRANSFORM" <<'PY' +def rename(codebase): + function = codebase.get_function("run") + function.rename("renamed") + codebase.commit() +PY + +set +e +CHECK_OUTPUT="$(run_graph_sitter transform "${TRANSFORM}:rename" "$REPO" --language python --backend rust --fallback error --check 2>&1)" +CHECK_STATUS=$? +set -e +if [[ "$CHECK_STATUS" -ne 1 ]]; then + echo "Expected transform --check to exit 1 when changes would be produced; got $CHECK_STATUS" >&2 + echo "$CHECK_OUTPUT" >&2 + exit 1 +fi +if [[ "$CHECK_OUTPUT" != *"Codemod would produce changes"* ]]; then + echo "Expected transform --check output to mention produced changes" >&2 + echo "$CHECK_OUTPUT" >&2 + exit 1 +fi +if ! git -C "$REPO" diff --quiet; then + echo "transform --check mutated the target repository" >&2 + git -C "$REPO" diff --name-only >&2 + exit 1 +fi +if ! grep -q "def run():" "$REPO/pkg/service.py"; then + echo "transform --check mutated the target repository" >&2 + exit 1 +fi + +WRITE_OUTPUT="$(run_graph_sitter transform "${TRANSFORM}:rename" "$REPO" --language python --backend rust --fallback error --write)" +if [[ "$WRITE_OUTPUT" != *"Changes have been applied"* ]]; then + echo "Expected transform --write output to mention applied changes" >&2 + echo "$WRITE_OUTPUT" >&2 + exit 1 +fi +if ! grep -q "def renamed():" "$REPO/pkg/service.py"; then + echo "transform --write did not update the target repository" >&2 + exit 1 +fi +WRITE_CHANGED_FILES="$(git -C "$REPO" diff --name-only)" +if [[ "$WRITE_CHANGED_FILES" != "pkg/service.py" ]]; then + echo "transform --write changed unexpected files" >&2 + echo "$WRITE_CHANGED_FILES" >&2 + exit 1 +fi + +REGISTERED_REPO="$SCRATCH/registered-repo" +git init "$REGISTERED_REPO" >/dev/null +git -C "$REGISTERED_REPO" config user.email test@example.com +git -C "$REGISTERED_REPO" config user.name "Test User" +mkdir -p "$REGISTERED_REPO/pkg" "$REGISTERED_REPO/.codegen/codemods/rename" +printf '' > "$REGISTERED_REPO/pkg/__init__.py" +cat > "$REGISTERED_REPO/pkg/app.py" <<'PY' +def target(): + return 1 +PY +cat > "$REGISTERED_REPO/.codegen/codemods/rename/rename.py" <<'PY' +import graph_sitter + + +@graph_sitter.function("rename-target") +def run(codebase): + function = codebase.get_function("target") + function.rename("renamed_target") + codebase.commit() +PY +git -C "$REGISTERED_REPO" add . +git -C "$REGISTERED_REPO" commit -m initial >/dev/null + +set +e +RUN_CHECK_OUTPUT="$(run_graph_sitter run rename-target "$REGISTERED_REPO" --language python --backend rust --fallback error --check 2>&1)" +RUN_CHECK_STATUS=$? +set -e +if [[ "$RUN_CHECK_STATUS" -ne 1 ]]; then + echo "Expected registered run --check to exit 1 when changes would be produced; got $RUN_CHECK_STATUS" >&2 + echo "$RUN_CHECK_OUTPUT" >&2 + exit 1 +fi +if [[ "$RUN_CHECK_OUTPUT" != *"Codemod would produce changes"* ]]; then + echo "Expected registered run --check output to mention produced changes" >&2 + echo "$RUN_CHECK_OUTPUT" >&2 + exit 1 +fi +if ! git -C "$REGISTERED_REPO" diff --quiet; then + echo "registered run --check mutated the target repository" >&2 + git -C "$REGISTERED_REPO" diff --name-only >&2 + exit 1 +fi +if ! grep -q "def target():" "$REGISTERED_REPO/pkg/app.py"; then + echo "registered run --check mutated the target repository" >&2 + exit 1 +fi + +RUN_WRITE_OUTPUT="$(run_graph_sitter run rename-target "$REGISTERED_REPO" --language python --backend rust --fallback error --write)" +if [[ "$RUN_WRITE_OUTPUT" != *"Changes have been applied"* ]]; then + echo "Expected registered run --write output to mention applied changes" >&2 + echo "$RUN_WRITE_OUTPUT" >&2 + exit 1 +fi +if ! grep -q "def renamed_target():" "$REGISTERED_REPO/pkg/app.py"; then + echo "registered run --write did not update the target repository" >&2 + exit 1 +fi +RUN_WRITE_CHANGED_FILES="$(git -C "$REGISTERED_REPO" diff --name-only)" +if [[ "$RUN_WRITE_CHANGED_FILES" != "pkg/app.py" ]]; then + echo "registered run --write changed unexpected files" >&2 + echo "$RUN_WRITE_CHANGED_FILES" >&2 + exit 1 +fi + +REGISTERED_SUBDIR_REPO="$SCRATCH/registered-subdir-repo" +git init "$REGISTERED_SUBDIR_REPO" >/dev/null +git -C "$REGISTERED_SUBDIR_REPO" config user.email test@example.com +git -C "$REGISTERED_SUBDIR_REPO" config user.name "Test User" +mkdir -p "$REGISTERED_SUBDIR_REPO/src" "$REGISTERED_SUBDIR_REPO/tests" "$REGISTERED_SUBDIR_REPO/.codegen/codemods/scoped" +cat > "$REGISTERED_SUBDIR_REPO/src/app.py" <<'PY' +def target(): + return 1 +PY +cat > "$REGISTERED_SUBDIR_REPO/tests/test_app.py" <<'PY' +def target(): + return 2 +PY +cat > "$REGISTERED_SUBDIR_REPO/.codegen/codemods/scoped/scoped.py" <<'PY' +import graph_sitter + + +@graph_sitter.function("assert-scoped") +def run(codebase): + filepaths = [file.filepath for file in codebase.files] + if any(filepath.endswith("tests/test_app.py") for filepath in filepaths): + raise AssertionError(f"unscoped parse: {filepaths}") + function = codebase.get_function("target") + function.rename("renamed_target") + codebase.commit() +PY +git -C "$REGISTERED_SUBDIR_REPO" add . +git -C "$REGISTERED_SUBDIR_REPO" commit -m initial >/dev/null + +set +e +RUN_SUBDIR_CHECK_OUTPUT="$(run_graph_sitter run assert-scoped "$REGISTERED_SUBDIR_REPO" --language python --backend rust --fallback error --subdir src --check 2>&1)" +RUN_SUBDIR_CHECK_STATUS=$? +set -e +if [[ "$RUN_SUBDIR_CHECK_STATUS" -ne 1 ]]; then + echo "Expected registered run --subdir --check to exit 1 when changes would be produced; got $RUN_SUBDIR_CHECK_STATUS" >&2 + echo "$RUN_SUBDIR_CHECK_OUTPUT" >&2 + exit 1 +fi +if [[ "$RUN_SUBDIR_CHECK_OUTPUT" != *"Codemod would produce changes"* ]]; then + echo "Expected registered run --subdir --check output to mention produced changes" >&2 + echo "$RUN_SUBDIR_CHECK_OUTPUT" >&2 + exit 1 +fi +if [[ "$RUN_SUBDIR_CHECK_OUTPUT" == *"unscoped parse"* ]]; then + echo "registered run --subdir --check did not preserve scoped parsing in the sandbox" >&2 + echo "$RUN_SUBDIR_CHECK_OUTPUT" >&2 + exit 1 +fi +if ! git -C "$REGISTERED_SUBDIR_REPO" diff --quiet; then + echo "registered run --subdir --check mutated the target repository" >&2 + git -C "$REGISTERED_SUBDIR_REPO" diff --name-only >&2 + exit 1 +fi +if ! grep -q "def target():" "$REGISTERED_SUBDIR_REPO/src/app.py"; then + echo "registered run --subdir --check mutated the selected target file" >&2 + exit 1 +fi +if ! grep -q "def target():" "$REGISTERED_SUBDIR_REPO/tests/test_app.py"; then + echo "registered run --subdir --check mutated the unselected target file" >&2 + exit 1 +fi + +TS_REPO="$SCRATCH/typescript-repo" +git init "$TS_REPO" >/dev/null +git -C "$TS_REPO" config user.email test@example.com +git -C "$TS_REPO" config user.name "Test User" +mkdir -p "$TS_REPO/src" +cat > "$TS_REPO/src/util.ts" <<'TS' +export function helper() { + return 1; +} +TS +cat > "$TS_REPO/src/app.ts" <<'TS' +import { helper } from './util'; + +export function run() { + return helper(); +} +TS +git -C "$TS_REPO" add . +git -C "$TS_REPO" commit -m initial >/dev/null + +TS_OUTPUT="$(run_graph_sitter parse "$TS_REPO" --language typescript --backend rust --fallback error --format json)" +run_python - "$TS_OUTPUT" "$TS_REPO" <<'PY' +import json +from pathlib import Path +import sys + +payload = json.loads(sys.argv[1]) +repo = Path(sys.argv[2]).resolve() +assert payload["schema_version"] == 1, payload +assert Path(payload["path"]).resolve() == repo, payload +assert payload["backend"] == "rust", payload +assert payload["backend_requested"] == "rust", payload +assert payload["language"] == "typescript", payload +assert isinstance(payload["elapsed_seconds"], float), payload +assert payload["elapsed_seconds"] >= 0, payload +assert payload["subdirectories"] is None, payload +assert payload["files"] == 2, payload +assert payload["symbols"] == 2, payload +assert payload["classes"] == 0, payload +assert payload["functions"] == 2, payload +assert payload["imports"] == 1, payload +assert payload["exports"] == 2, payload +assert payload["references"] == 1, payload +assert payload["dependencies"] == 1, payload +assert payload["files_with_errors"] == 0, payload +assert payload["rust_backend_error"] in (None, ""), payload +PY + +TS_TRANSFORM="$SCRATCH/rename_ts_transform.py" +cat > "$TS_TRANSFORM" <<'PY' +def rename(codebase): + function = codebase.get_function("run") + function.rename("renamedRun") + codebase.commit() +PY + +set +e +TS_CHECK_OUTPUT="$(run_graph_sitter transform "${TS_TRANSFORM}:rename" "$TS_REPO" --language typescript --backend rust --fallback error --check 2>&1)" +TS_CHECK_STATUS=$? +set -e +if [[ "$TS_CHECK_STATUS" -ne 1 ]]; then + echo "Expected TypeScript transform --check to exit 1 when changes would be produced; got $TS_CHECK_STATUS" >&2 + echo "$TS_CHECK_OUTPUT" >&2 + exit 1 +fi +if [[ "$TS_CHECK_OUTPUT" != *"Codemod would produce changes"* ]]; then + echo "Expected TypeScript transform --check output to mention produced changes" >&2 + echo "$TS_CHECK_OUTPUT" >&2 + exit 1 +fi +if ! git -C "$TS_REPO" diff --quiet; then + echo "TypeScript transform --check mutated the target repository" >&2 + git -C "$TS_REPO" diff --name-only >&2 + exit 1 +fi +if ! grep -q "export function run()" "$TS_REPO/src/app.ts"; then + echo "TypeScript transform --check mutated the target repository" >&2 + exit 1 +fi + +TS_WRITE_OUTPUT="$(run_graph_sitter transform "${TS_TRANSFORM}:rename" "$TS_REPO" --language typescript --backend rust --fallback error --write)" +if [[ "$TS_WRITE_OUTPUT" != *"Changes have been applied"* ]]; then + echo "Expected TypeScript transform --write output to mention applied changes" >&2 + echo "$TS_WRITE_OUTPUT" >&2 + exit 1 +fi +if ! grep -q "export function renamedRun()" "$TS_REPO/src/app.ts"; then + echo "TypeScript transform --write did not update the target repository" >&2 + exit 1 +fi +TS_WRITE_CHANGED_FILES="$(git -C "$TS_REPO" diff --name-only)" +if [[ "$TS_WRITE_CHANGED_FILES" != "src/app.ts" ]]; then + echo "TypeScript transform --write changed unexpected files" >&2 + echo "$TS_WRITE_CHANGED_FILES" >&2 + exit 1 +fi + +print_message="wheel Rust backend Python/TypeScript parse, transform, and registered run smoke passed" +echo "$print_message" diff --git a/rust-rewrite/tools/compare_rust_python_index.py b/rust-rewrite/tools/compare_rust_python_index.py new file mode 100644 index 000000000..ac6384a95 --- /dev/null +++ b/rust-rewrite/tools/compare_rust_python_index.py @@ -0,0 +1,236 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +import argparse +import json +import os +import platform +import subprocess +import sys +import tempfile +import time +from dataclasses import dataclass +from pathlib import Path +from typing import Any + +TOOLS_DIR = Path(__file__).resolve().parent +REPO_ROOT = TOOLS_DIR.parents[1] +if str(TOOLS_DIR) not in sys.path: + sys.path.insert(0, str(TOOLS_DIR)) + +from measure_python_backend import bytes_to_mb, create_python_fixture # noqa: E402 + + +@dataclass +class SampledProcess: + command: list[str] + wall_seconds: float + rss_peak_mb: float + stdout: str + stderr: str + + +def sample_process(command: list[str], *, cwd: Path, sample_interval: float) -> SampledProcess: + import psutil + + start = time.perf_counter() + process = subprocess.Popen(command, cwd=cwd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) + ps_process = psutil.Process(process.pid) + rss_peak = 0 + while process.poll() is None: + try: + rss_peak = max(rss_peak, int(ps_process.memory_info().rss)) + except psutil.NoSuchProcess: + break + time.sleep(sample_interval) + stdout, stderr = process.communicate() + try: + rss_peak = max(rss_peak, int(ps_process.memory_info().rss)) + except psutil.NoSuchProcess: + pass + wall = time.perf_counter() - start + if process.returncode != 0: + msg = f"command failed with exit {process.returncode}: {' '.join(command)}\n{stderr}" + raise RuntimeError(msg) + return SampledProcess( + command=command, + wall_seconds=wall, + rss_peak_mb=round(bytes_to_mb(rss_peak), 3), + stdout=stdout, + stderr=stderr, + ) + + +def run_json(command: list[str], *, cwd: Path) -> dict[str, Any]: + result = subprocess.run(command, cwd=cwd, check=True, capture_output=True, text=True) + return parse_json_output(result.stdout) + + +def parse_json_output(output: str) -> dict[str, Any]: + start = output.find("{") + end = output.rfind("}") + if start == -1 or end == -1 or end < start: + msg = f"command did not emit JSON output:\n{output}" + raise ValueError(msg) + return json.loads(output[start : end + 1]) + + +def rust_example_path() -> Path: + exe = "index_python.exe" if os.name == "nt" else "index_python" + return REPO_ROOT / "target" / "release" / "examples" / exe + + +def build_rust_example() -> None: + subprocess.run( + ["cargo", "build", "--release", "-p", "graph-sitter-engine", "--example", "index_python"], + cwd=REPO_ROOT, + check=True, + ) + + +def run_python_backend(repo_path: Path, *, disable_graph: bool) -> dict[str, Any]: + command = [ + sys.executable, + str(TOOLS_DIR / "measure_python_backend.py"), + str(repo_path), + "--language", + "python", + "--skip-object-counts", + "--json", + ] + if disable_graph: + command.append("--disable-graph") + return run_json(command, cwd=REPO_ROOT) + + +def run_rust_index(repo_path: Path, *, sample_interval: float) -> dict[str, Any]: + command = [str(rust_example_path()), str(repo_path), "--json"] + sampled = sample_process(command, cwd=REPO_ROOT, sample_interval=sample_interval) + report = parse_json_output(sampled.stdout) + report["process"] = { + "command": " ".join(command), + "wall_seconds": round(sampled.wall_seconds, 6), + "rss_peak_mb": sampled.rss_peak_mb, + } + return report + + +def ratio(numerator: float, denominator: float) -> float | None: + if denominator <= 0: + return None + return round(numerator / denominator, 3) + + +def make_report(args: argparse.Namespace) -> dict[str, Any]: + temp_dir: tempfile.TemporaryDirectory[str] | None = None + if args.repo is None: + temp_dir = tempfile.TemporaryDirectory(prefix="graph-sitter-rust-compare-") + repo_path = create_python_fixture(Path(temp_dir.name), args.fixture_files, args.fixture_functions) + generated_fixture = True + else: + repo_path = Path(args.repo).expanduser().resolve() + generated_fixture = False + + try: + if not args.skip_build: + build_rust_example() + python_report = run_python_backend(repo_path, disable_graph=args.python_disable_graph) + rust_report = run_rust_index(repo_path, sample_interval=args.sample_interval) + finally: + if temp_dir is not None: + temp_dir.cleanup() + + python_totals = python_report["totals"] + rust_process = rust_report["process"] + comparison = { + "python_to_rust_wall_ratio": ratio(python_totals["wall_seconds"], rust_report["wall_seconds"]), + "python_to_rust_process_wall_ratio": ratio(python_totals["wall_seconds"], rust_process["wall_seconds"]), + "python_to_rust_peak_rss_ratio": ratio(python_totals["max_rss_mb"], rust_process["rss_peak_mb"]), + "python_wall_seconds": python_totals["wall_seconds"], + "rust_index_wall_seconds": round(rust_report["wall_seconds"], 6), + "rust_process_wall_seconds": rust_process["wall_seconds"], + "python_max_rss_mb": python_totals["max_rss_mb"], + "rust_sampled_rss_peak_mb": rust_process["rss_peak_mb"], + } + return { + "metadata": { + "repo_path": str(repo_path), + "generated_fixture": generated_fixture, + "fixture_files": args.fixture_files if generated_fixture else None, + "fixture_functions": args.fixture_functions if generated_fixture else None, + "python_disable_graph": args.python_disable_graph, + "python": sys.version, + "platform": platform.platform(), + "sample_interval_seconds": args.sample_interval, + }, + "comparison": comparison, + "python_backend": python_report, + "rust_index": rust_report, + } + + +def print_human(report: dict[str, Any]) -> None: + metadata = report["metadata"] + comparison = report["comparison"] + python_graph = report["python_backend"]["graph"] + rust_summary = report["rust_index"]["summary"] + print(f"repo: {metadata['repo_path']}") + print(f"python disable_graph: {metadata['python_disable_graph']}") + print( + "python backend: " + f"wall={comparison['python_wall_seconds']:.3f}s " + f"max_rss={comparison['python_max_rss_mb']:.1f} MB " + f"nodes={python_graph['nodes']} edges={python_graph['edges']} file_nodes={python_graph['source_file_nodes_total']}" + ) + print( + "rust index: " + f"wall={comparison['rust_index_wall_seconds']:.3f}s " + f"process_wall={comparison['rust_process_wall_seconds']:.3f}s " + f"rss_peak={comparison['rust_sampled_rss_peak_mb']:.1f} MB " + f"files={rust_summary['files']} symbols={rust_summary['symbols']} " + f"global_variables={rust_summary['global_variables']} " + f"imports={rust_summary['imports']} import_resolutions={rust_summary['import_resolutions']} " + f"references={rust_summary['references']} dependencies={rust_summary['dependencies']}" + ) + print( + "ratios: " + f"wall={comparison['python_to_rust_wall_ratio']}x " + f"process_wall={comparison['python_to_rust_process_wall_ratio']}x " + f"rss={comparison['python_to_rust_peak_rss_ratio']}x" + ) + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Compare current Python backend parse/index cost with the Rust compact Python indexer.") + parser.add_argument("repo", nargs="?", help="Path to a git repository. If omitted, a generated Python fixture is used.") + parser.add_argument("--fixture-files", type=int, default=150, help="Generated fixture module count when repo is omitted.") + parser.add_argument("--fixture-functions", type=int, default=20, help="Generated helper functions per module when repo is omitted.") + parser.add_argument("--sample-interval", type=float, default=0.005, help="RSS sampling interval for the Rust process.") + parser.add_argument("--skip-build", action="store_true", help="Do not build the Rust example before running it.") + parser.add_argument( + "--python-full-graph", + action="store_false", + dest="python_disable_graph", + help="Compare against the full Python graph instead of parse/object materialization only.", + ) + parser.add_argument("--output", type=Path, help="Optional path to write JSON report.") + parser.add_argument("--json", action="store_true", help="Print JSON report instead of a human summary.") + parser.set_defaults(python_disable_graph=True) + return parser.parse_args() + + +def main() -> int: + args = parse_args() + report = make_report(args) + if args.output: + args.output.parent.mkdir(parents=True, exist_ok=True) + args.output.write_text(json.dumps(report, indent=2, sort_keys=True) + "\n", encoding="utf-8") + if args.json: + print(json.dumps(report, indent=2, sort_keys=True)) + else: + print_human(report) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/rust-rewrite/tools/measure_codebase_rust_backend.py b/rust-rewrite/tools/measure_codebase_rust_backend.py new file mode 100644 index 000000000..04aaf2a42 --- /dev/null +++ b/rust-rewrite/tools/measure_codebase_rust_backend.py @@ -0,0 +1,185 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +import argparse +import json +import os +import platform +import resource +import sys +import time +from pathlib import Path + +REPO_ROOT = Path(__file__).resolve().parents[2] +SRC_ROOT = REPO_ROOT / "src" +if str(SRC_ROOT) not in sys.path: + sys.path.insert(0, str(SRC_ROOT)) + +from graph_sitter.configs.models.codebase import CodebaseConfig, GraphBackend, RustFallbackMode # noqa: E402 +from graph_sitter.core.codebase import Codebase # noqa: E402 + + +def bytes_to_mb(value: float) -> float: + return value / (1024 * 1024) + + +def max_rss_bytes() -> int: + rss = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss + if sys.platform == "darwin": + return int(rss) + return int(rss * 1024) + + +def current_rss_bytes() -> int: + import psutil + + return int(psutil.Process(os.getpid()).memory_info().rss) + + +def memory_sample(label: str) -> dict[str, float | str]: + return { + "label": label, + "rss_mb": round(bytes_to_mb(current_rss_bytes()), 3), + "max_rss_mb": round(bytes_to_mb(max_rss_bytes()), 3), + } + + +def make_report(repo: Path, *, language: str) -> dict: + memory_samples = [memory_sample("start")] + config = CodebaseConfig(graph_backend=GraphBackend.RUST, rust_fallback=RustFallbackMode.ERROR) + start = time.perf_counter() + codebase = Codebase(str(repo), language=language, config=config) + wall = time.perf_counter() - start + memory_samples.append(memory_sample("after_codebase_construct")) + python_graph_blocked = False + try: + len(codebase.ctx.nodes) + except RuntimeError: + python_graph_blocked = True + memory_samples.append(memory_sample("after_python_graph_block_check")) + + backend = codebase.ctx.rust_index + assert backend is not None + summary = codebase.rust_index_summary + summary_counts = { + "files": summary.files, + "symbols": summary.symbols, + "classes": summary.classes, + "functions": summary.functions, + "global_variables": summary.global_variables, + "imports": summary.imports, + "import_resolutions": summary.import_resolutions, + "external_modules": summary.external_modules, + "references": summary.references, + "external_references": summary.external_references, + "dependencies": summary.dependencies, + "exports": summary.exports, + "subclass_edges": summary.subclass_edges, + "bytes": summary.bytes, + "lines": summary.lines, + "files_with_errors": summary.files_with_errors, + } + memory_samples.append(memory_sample("after_summary_counts")) + records = backend.compact_record_counts() + memory_samples.append(memory_sample("after_record_counts")) + compat_handles = backend.compact_compat_counts() + memory_samples.append(memory_sample("after_compat_handles")) + return { + "metadata": { + "repo_path": str(repo), + "language": language, + "python": sys.version, + "platform": platform.platform(), + "python_graph_blocked": python_graph_blocked, + }, + "totals": { + "wall_seconds": round(wall, 6), + "max_rss_mb": round(bytes_to_mb(max_rss_bytes()), 3), + "current_rss_mb": memory_samples[-1]["rss_mb"], + }, + "rss_samples": memory_samples, + "summary": summary_counts, + "records": records, + "compat_handles": compat_handles, + } + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Measure Codebase construction with the opt-in compact Rust backend.") + parser.add_argument("repo", nargs="?", default=".", help="Path to the repository to index.") + parser.add_argument("--language", choices=["python", "typescript"], default="python", help="Codebase language to index.") + parser.add_argument("--extension-dir", type=Path, help="Optional directory containing a built graph_sitter_py extension module.") + parser.add_argument("--output", type=Path, help="Optional path to write JSON report.") + parser.add_argument("--json", action="store_true", help="Print JSON report instead of a human summary.") + return parser.parse_args() + + +def print_human(report: dict) -> None: + totals = report["totals"] + summary = report["summary"] + records = report["records"] + compat_handles = report["compat_handles"] + print(f"repo: {report['metadata']['repo_path']}") + print(f"language: {report['metadata']['language']}") + print( + f"rust Codebase: wall={totals['wall_seconds']:.3f}s " + f"max_rss={totals['max_rss_mb']:.1f} MB current_rss={totals['current_rss_mb']:.1f} MB" + ) + print(f"python graph blocked: {report['metadata']['python_graph_blocked']}") + print( + "rss samples: " + + " -> ".join(f"{sample['label']}={sample['rss_mb']:.1f} MB" for sample in report["rss_samples"]) + ) + print( + "summary: " + f"files={summary['files']} " + f"symbols={summary['symbols']} " + f"global_variables={summary['global_variables']} " + f"imports={summary['imports']} " + f"import_resolutions={summary['import_resolutions']} " + f"external_modules={summary['external_modules']} " + f"external_references={summary.get('external_references', 0)} " + f"references={summary['references']} " + f"dependencies={summary['dependencies']}" + ) + print( + "records: " + f"files={records['rust_files']} " + f"symbols={records['rust_symbols']} " + f"imports={records['rust_imports']} " + f"import_resolutions={records['rust_import_resolutions']} " + f"external_modules={records['rust_external_modules']} " + f"exports={records['rust_exports']} " + f"references={records['rust_references']} " + f"external_references={records.get('rust_external_references', 0)} " + f"dependencies={records['rust_dependencies']}" + ) + print( + "compat handles: " + f"files={compat_handles['files']} " + f"symbols={compat_handles['symbols']} " + f"interfaces={compat_handles['interfaces']} " + f"types={compat_handles['types']} " + f"imports={compat_handles['imports']} " + f"external_modules={compat_handles['external_modules']} " + f"exports={compat_handles['exports']}" + ) + + +def main() -> int: + args = parse_args() + if args.extension_dir is not None: + sys.path.insert(0, str(args.extension_dir.resolve())) + report = make_report(Path(args.repo).expanduser().resolve(), language=args.language) + if args.output: + args.output.parent.mkdir(parents=True, exist_ok=True) + args.output.write_text(json.dumps(report, indent=2, sort_keys=True) + "\n", encoding="utf-8") + if args.json: + print(json.dumps(report, indent=2, sort_keys=True)) + else: + print_human(report) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/rust-rewrite/tools/measure_python_backend.py b/rust-rewrite/tools/measure_python_backend.py new file mode 100644 index 000000000..6a22b05f3 --- /dev/null +++ b/rust-rewrite/tools/measure_python_backend.py @@ -0,0 +1,402 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +import argparse +import gc +import json +import os +import platform +import resource +import subprocess +import sys +import tempfile +import threading +import time +from collections import Counter, defaultdict +from contextlib import contextmanager +from dataclasses import dataclass, field +from functools import wraps +from pathlib import Path +from typing import Any + +REPO_ROOT = Path(__file__).resolve().parents[2] +SRC_ROOT = REPO_ROOT / "src" +if str(SRC_ROOT) not in sys.path: + sys.path.insert(0, str(SRC_ROOT)) + + +def bytes_to_mb(value: float) -> float: + return value / (1024 * 1024) + + +def current_rss_bytes() -> int: + import psutil + + return int(psutil.Process(os.getpid()).memory_info().rss) + + +def max_rss_bytes() -> int: + rss = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss + if sys.platform == "darwin": + return int(rss) + return int(rss * 1024) + + +@dataclass +class PhaseStats: + calls: int = 0 + wall_seconds: float = 0.0 + rss_peak_bytes: int = 0 + counters: dict[str, int] = field(default_factory=lambda: defaultdict(int)) + + +class Recorder: + def __init__(self, sample_interval: float) -> None: + self.sample_interval = sample_interval + self._lock = threading.Lock() + self._stack: list[str] = [] + self._stop = threading.Event() + self._thread: threading.Thread | None = None + self.phases: dict[str, PhaseStats] = defaultdict(PhaseStats) + self.rss_peak_bytes = 0 + + @contextmanager + def measure(self, phase: str): + with self._lock: + self._stack.append(phase) + start = time.perf_counter() + try: + yield + finally: + elapsed = time.perf_counter() - start + rss = current_rss_bytes() + with self._lock: + if self._stack and self._stack[-1] == phase: + self._stack.pop() + elif phase in self._stack: + self._stack.remove(phase) + stats = self.phases[phase] + stats.calls += 1 + stats.wall_seconds += elapsed + stats.rss_peak_bytes = max(stats.rss_peak_bytes, rss) + self.rss_peak_bytes = max(self.rss_peak_bytes, rss) + + def add_counter(self, phase: str, key: str, value: int) -> None: + with self._lock: + self.phases[phase].counters[key] += int(value) + + def start(self) -> None: + self._stop.clear() + self._thread = threading.Thread(target=self._sample_loop, name="rss-sampler", daemon=True) + self._thread.start() + + def stop(self) -> None: + self._stop.set() + if self._thread is not None: + self._thread.join(timeout=max(1.0, self.sample_interval * 4)) + self._sample_once() + + def _sample_loop(self) -> None: + while not self._stop.wait(self.sample_interval): + self._sample_once() + + def _sample_once(self) -> None: + rss = current_rss_bytes() + with self._lock: + self.rss_peak_bytes = max(self.rss_peak_bytes, rss) + if self._stack: + phase = self._stack[-1] + self.phases[phase].rss_peak_bytes = max(self.phases[phase].rss_peak_bytes, rss) + + def as_jsonable(self) -> list[dict[str, Any]]: + rows = [] + for name, stats in sorted(self.phases.items()): + rows.append( + { + "name": name, + "calls": stats.calls, + "wall_seconds": round(stats.wall_seconds, 6), + "rss_peak_mb": round(bytes_to_mb(stats.rss_peak_bytes), 3), + "counters": dict(sorted(stats.counters.items())), + } + ) + return rows + + +def patch_method( + recorder: Recorder, + patches: list[tuple[Any, str, Any]], + owner: Any, + method_name: str, + phase: str, +) -> None: + original = getattr(owner, method_name) + + @wraps(original) + def wrapped(*args, **kwargs): + with recorder.measure(phase): + return original(*args, **kwargs) + + setattr(owner, method_name, wrapped) + patches.append((owner, method_name, original)) + + +def patch_iter_files(recorder: Recorder, patches: list[tuple[Any, str, Any]], repo_operator_cls: Any) -> None: + original = repo_operator_cls.iter_files + + @wraps(original) + def wrapped(self, *args, **kwargs): + iterator = original(self, *args, **kwargs) + + def measured_iterator(): + yielded = 0 + while True: + try: + with recorder.measure("repo_iter_files"): + item = next(iterator) + except StopIteration: + break + yielded += 1 + yield item + recorder.add_counter("repo_iter_files", "items_yielded", yielded) + + return measured_iterator() + + repo_operator_cls.iter_files = wrapped + patches.append((repo_operator_cls, "iter_files", original)) + + +def install_instrumentation(recorder: Recorder) -> list[tuple[Any, str, Any]]: + import graph_sitter.core.file as file_module + import graph_sitter.tree_sitter_parser as parser_module + from graph_sitter.codebase.codebase_context import CodebaseContext + from graph_sitter.core.class_definition import Class + from graph_sitter.core.file import SourceFile + from graph_sitter.core.import_resolution import Import + from graph_sitter.core.interface import Interface + from graph_sitter.core.interfaces.importable import Importable + from graph_sitter.core.symbol_groups.parents import Parents + from graph_sitter.git.repo_operator.repo_operator import RepoOperator + from graph_sitter.typescript.config_parser import TSConfigParser + from graph_sitter.typescript.export import TSExport + + patches: list[tuple[Any, str, Any]] = [] + + original_parse_file = file_module.parse_file + + @wraps(original_parse_file) + def parse_file_wrapper(filepath, content): + if isinstance(content, str): + recorder.add_counter("tree_sitter_parse_file", "bytes", len(content.encode("utf-8"))) + with recorder.measure("tree_sitter_parse_file"): + return original_parse_file(filepath, content) + + file_module.parse_file = parse_file_wrapper + patches.append((file_module, "parse_file", original_parse_file)) + if parser_module.parse_file is original_parse_file: + parser_module.parse_file = parse_file_wrapper + patches.append((parser_module, "parse_file", original_parse_file)) + + patch_iter_files(recorder, patches, RepoOperator) + patch_method(recorder, patches, CodebaseContext, "build_graph", "build_graph_total") + patch_method(recorder, patches, CodebaseContext, "_process_diff_files", "process_diff_files_total") + patch_method(recorder, patches, CodebaseContext, "build_directory_tree", "directory_tree") + patch_method(recorder, patches, CodebaseContext, "_compute_dependencies", "dependency_fixed_point") + patch_method(recorder, patches, SourceFile, "parse", "sourcefile_object_parse") + patch_method(recorder, patches, Import, "add_symbol_resolution_edge", "import_resolution") + patch_method(recorder, patches, Importable, "recompute", "importable_recompute") + patch_method(recorder, patches, TSConfigParser, "parse_configs", "config_parse") + patch_method(recorder, patches, TSExport, "compute_export_dependencies", "export_resolution") + patch_method(recorder, patches, Class, "compute_superclass_dependencies", "superclass_resolution") + patch_method(recorder, patches, Interface, "compute_superclass_dependencies", "superclass_resolution") + patch_method(recorder, patches, Parents, "compute_superclass_dependencies", "superclass_resolution") + + return patches + + +def restore_patches(patches: list[tuple[Any, str, Any]]) -> None: + for owner, method_name, original in reversed(patches): + setattr(owner, method_name, original) + + +def run_git(repo_path: Path, *args: str) -> None: + subprocess.run(["git", *args], cwd=repo_path, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + + +def create_python_fixture(base_dir: Path, file_count: int, functions_per_file: int) -> Path: + repo_path = base_dir / "python-smoke-repo" + package = repo_path / "pkg" + package.mkdir(parents=True) + (package / "__init__.py").write_text("from .module_0 import Class0\n", encoding="utf-8") + for idx in range(file_count): + previous_import = "" if idx == 0 else f"from .module_{idx - 1} import Class{idx - 1}, helper_{idx - 1}_0\n" + functions = "\n\n".join( + [ + f"def helper_{idx}_{fn}(value: int) -> int:\n" + f" total = value + {idx} + {fn}\n" + " return total\n" + for fn in range(functions_per_file) + ] + ) + parent = f"Class{idx - 1}" if idx else "object" + inherited_call = f"helper_{idx - 1}_0(value)" if idx else "value" + content = ( + "from __future__ import annotations\n" + f"{previous_import}\n\n" + f"class Class{idx}({parent}):\n" + " def __init__(self, value: int) -> None:\n" + f" self.value = {inherited_call}\n\n" + " def compute(self) -> int:\n" + f" return helper_{idx}_0(self.value)\n\n" + f"{functions}\n" + ) + (package / f"module_{idx}.py").write_text(content, encoding="utf-8") + run_git(repo_path, "init") + run_git(repo_path, "add", ".") + return repo_path + + +def summarize_graph(codebase: Any) -> dict[str, Any]: + from graph_sitter.core.file import SourceFile + + ctx = codebase.ctx + nodes = list(ctx.nodes) + edges = list(ctx.edges) + node_types = Counter(getattr(node.node_type, "name", str(node.node_type)) for node in nodes) + files = [node for node in nodes if isinstance(node, SourceFile)] + return { + "nodes": len(nodes), + "edges": len(edges), + "node_types": dict(sorted(node_types.items())), + "source_files": len(files), + "source_file_nodes_total": sum(len(getattr(file, "_nodes", [])) for file in files), + "directories": len(getattr(ctx, "directories", {})), + } + + +def summarize_objects(skip: bool) -> dict[str, Any] | None: + if skip: + return None + gc.collect() + counts: Counter[str] = Counter() + total = 0 + for obj in gc.get_objects(): + cls = type(obj) + module = getattr(cls, "__module__", "") + if not isinstance(module, str): + continue + if module.startswith("graph_sitter"): + total += 1 + counts[f"{module}.{cls.__qualname__}"] += 1 + return { + "graph_sitter_objects": total, + "top_classes": counts.most_common(30), + } + + +def build_codebase(args: argparse.Namespace) -> tuple[Any, Path, bool, tempfile.TemporaryDirectory[str] | None]: + from graph_sitter.configs.models.codebase import CodebaseConfig + from graph_sitter.core.codebase import Codebase + + temp_dir: tempfile.TemporaryDirectory[str] | None = None + generated_fixture = False + if args.repo is None: + temp_dir = tempfile.TemporaryDirectory(prefix="graph-sitter-bench-") + repo_path = create_python_fixture(Path(temp_dir.name), args.fixture_files, args.fixture_functions) + generated_fixture = True + else: + repo_path = Path(args.repo).expanduser().resolve() + + config = CodebaseConfig(disable_graph=args.disable_graph) + language = None if args.language == "auto" else args.language + codebase = Codebase(str(repo_path), language=language, config=config) + return codebase, repo_path, generated_fixture, temp_dir + + +def make_report(args: argparse.Namespace) -> dict[str, Any]: + recorder = Recorder(sample_interval=args.sample_interval) + patches = install_instrumentation(recorder) + rss_start = current_rss_bytes() + start = time.perf_counter() + recorder.start() + temp_dir = None + try: + with recorder.measure("codebase_construct"): + codebase, repo_path, generated_fixture, temp_dir = build_codebase(args) + finally: + recorder.stop() + restore_patches(patches) + wall = time.perf_counter() - start + rss_end = current_rss_bytes() + + report = { + "metadata": { + "repo_path": str(repo_path), + "generated_fixture": generated_fixture, + "language": args.language, + "disable_graph": args.disable_graph, + "python": sys.version, + "platform": platform.platform(), + "sample_interval_seconds": args.sample_interval, + "command": " ".join(sys.argv), + }, + "totals": { + "wall_seconds": round(wall, 6), + "rss_start_mb": round(bytes_to_mb(rss_start), 3), + "rss_end_mb": round(bytes_to_mb(rss_end), 3), + "rss_peak_sampled_mb": round(bytes_to_mb(recorder.rss_peak_bytes), 3), + "max_rss_mb": round(bytes_to_mb(max_rss_bytes()), 3), + }, + "phases": recorder.as_jsonable(), + "graph": summarize_graph(codebase), + "objects": summarize_objects(args.skip_object_counts), + } + if temp_dir is not None: + temp_dir.cleanup() + return report + + +def print_human(report: dict[str, Any]) -> None: + totals = report["totals"] + graph = report["graph"] + print(f"repo: {report['metadata']['repo_path']}") + print(f"wall: {totals['wall_seconds']:.3f}s") + print(f"rss: start={totals['rss_start_mb']:.1f} MB end={totals['rss_end_mb']:.1f} MB peak={totals['rss_peak_sampled_mb']:.1f} MB max={totals['max_rss_mb']:.1f} MB") + print(f"graph: nodes={graph['nodes']} edges={graph['edges']} files={graph['source_files']} file_nodes={graph['source_file_nodes_total']}") + print("phases:") + for phase in report["phases"]: + print( + f" {phase['name']}: calls={phase['calls']} " + f"wall={phase['wall_seconds']:.3f}s rss_peak={phase['rss_peak_mb']:.1f} MB" + ) + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Measure current graph-sitter Python backend cold parse RSS and wall time.") + parser.add_argument("repo", nargs="?", help="Path to a git repository. If omitted, a tiny Python fixture repo is generated.") + parser.add_argument("--language", choices=["auto", "python", "typescript"], default="auto", help="Language passed to Codebase.") + parser.add_argument("--disable-graph", action="store_true", help="Set CodebaseConfig(disable_graph=True) to isolate parse/object materialization.") + parser.add_argument("--fixture-files", type=int, default=8, help="Generated fixture Python module count when repo is omitted.") + parser.add_argument("--fixture-functions", type=int, default=8, help="Generated helper functions per fixture module when repo is omitted.") + parser.add_argument("--sample-interval", type=float, default=0.01, help="RSS sampling interval in seconds.") + parser.add_argument("--skip-object-counts", action="store_true", help="Skip post-run gc object counting.") + parser.add_argument("--output", type=Path, help="Optional path to write JSON report.") + parser.add_argument("--json", action="store_true", help="Print JSON report instead of a human summary.") + return parser.parse_args() + + +def main() -> int: + args = parse_args() + report = make_report(args) + if args.output: + args.output.parent.mkdir(parents=True, exist_ok=True) + args.output.write_text(json.dumps(report, indent=2, sort_keys=True) + "\n", encoding="utf-8") + if args.json: + print(json.dumps(report, indent=2, sort_keys=True)) + else: + print_human(report) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/rust-rewrite/tools/measure_rust_facade.py b/rust-rewrite/tools/measure_rust_facade.py new file mode 100644 index 000000000..ea6ad1ec6 --- /dev/null +++ b/rust-rewrite/tools/measure_rust_facade.py @@ -0,0 +1,125 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +import argparse +import dataclasses +import json +import platform +import resource +import sys +import time +from pathlib import Path + +REPO_ROOT = Path(__file__).resolve().parents[2] +SRC_ROOT = REPO_ROOT / "src" +if str(SRC_ROOT) not in sys.path: + sys.path.insert(0, str(SRC_ROOT)) + +from graph_sitter.codebase.codebase_context import GLOBAL_FILE_IGNORE_LIST, get_node_classes # noqa: E402 +from graph_sitter.codebase.config import ProjectConfig # noqa: E402 +from graph_sitter.codebase.rust_backend import RustIndexBackend # noqa: E402 +from graph_sitter.shared.enums.programming_language import ProgrammingLanguage # noqa: E402 + + +def bytes_to_mb(value: float) -> float: + return value / (1024 * 1024) + + +def max_rss_bytes() -> int: + rss = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss + if sys.platform == "darwin": + return int(rss) + return int(rss * 1024) + + +def discover_python_files(repo: Path) -> tuple[Path, list[str]]: + project = ProjectConfig.from_path(str(repo), programming_language=ProgrammingLanguage.PYTHON) + node_classes = get_node_classes(ProgrammingLanguage.PYTHON) + extensions = node_classes.file_cls.get_extensions() + file_paths = [ + str(filepath) + for filepath, _ in project.repo_operator.iter_files( + subdirs=project.subdirectories, + extensions=extensions, + ignore_list=GLOBAL_FILE_IGNORE_LIST, + ) + ] + return Path(project.repo_operator.repo_path).resolve(), file_paths + + +def make_report(repo: Path, *, raw_rust_walk: bool) -> dict: + start = time.perf_counter() + if raw_rust_walk: + repo_root = repo + file_paths = None + else: + repo_root, file_paths = discover_python_files(repo) + backend = RustIndexBackend.build(repo_root, file_paths=file_paths) + wall = time.perf_counter() - start + return { + "metadata": { + "repo_path": str(repo), + "repo_root": str(repo_root), + "raw_rust_walk": raw_rust_walk, + "selected_file_count": None if file_paths is None else len(file_paths), + "python": sys.version, + "platform": platform.platform(), + "engine_version": backend.engine_version, + }, + "totals": { + "wall_seconds": round(wall, 6), + "max_rss_mb": round(bytes_to_mb(max_rss_bytes()), 3), + }, + "summary": { + **dataclasses.asdict(backend.summary), + "external_modules": len(backend.external_modules), + }, + } + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Measure the Python-facing Rust compact index facade.") + parser.add_argument("repo", nargs="?", default=".", help="Path to the Python repository to index.") + parser.add_argument("--raw-rust-walk", action="store_true", help="Use Rust's recursive file walk instead of Python RepoOperator file discovery.") + parser.add_argument("--output", type=Path, help="Optional path to write JSON report.") + parser.add_argument("--json", action="store_true", help="Print JSON report instead of a human summary.") + return parser.parse_args() + + +def print_human(report: dict) -> None: + totals = report["totals"] + summary = report["summary"] + print(f"repo: {report['metadata']['repo_path']}") + print(f"repo root: {report['metadata']['repo_root']}") + print(f"engine: {report['metadata']['engine_version']}") + print(f"raw rust walk: {report['metadata']['raw_rust_walk']}") + print(f"selected files: {report['metadata']['selected_file_count']}") + print(f"rust facade: wall={totals['wall_seconds']:.3f}s max_rss={totals['max_rss_mb']:.1f} MB") + print( + "summary: " + f"files={summary['files']} " + f"symbols={summary['symbols']} " + f"global_variables={summary['global_variables']} " + f"imports={summary['imports']} " + f"import_resolutions={summary['import_resolutions']} " + f"external_modules={summary['external_modules']} " + f"references={summary['references']} " + f"dependencies={summary['dependencies']}" + ) + + +def main() -> int: + args = parse_args() + report = make_report(Path(args.repo).expanduser().resolve(), raw_rust_walk=args.raw_rust_walk) + if args.output: + args.output.parent.mkdir(parents=True, exist_ok=True) + args.output.write_text(json.dumps(report, indent=2, sort_keys=True) + "\n", encoding="utf-8") + if args.json: + print(json.dumps(report, indent=2, sort_keys=True)) + else: + print_human(report) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/rust-rewrite/tools/measure_typescript_rust_index.py b/rust-rewrite/tools/measure_typescript_rust_index.py new file mode 100755 index 000000000..752859621 --- /dev/null +++ b/rust-rewrite/tools/measure_typescript_rust_index.py @@ -0,0 +1,166 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +import argparse +import json +import platform +import resource +import sys +import time +from pathlib import Path +from typing import Any + +REPO_ROOT = Path(__file__).resolve().parents[2] +SRC_ROOT = REPO_ROOT / "src" +if str(SRC_ROOT) not in sys.path: + sys.path.insert(0, str(SRC_ROOT)) + +from graph_sitter.codebase.codebase_context import GLOBAL_FILE_IGNORE_LIST, get_node_classes # noqa: E402 +from graph_sitter.codebase.config import ProjectConfig # noqa: E402 +from graph_sitter.shared.enums.programming_language import ProgrammingLanguage # noqa: E402 + + +def bytes_to_mb(value: float) -> float: + return value / (1024 * 1024) + + +def max_rss_bytes() -> int: + rss = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss + if sys.platform == "darwin": + return int(rss) + return int(rss * 1024) + + +def discover_typescript_files(repo: Path) -> tuple[Path, list[str]]: + project = ProjectConfig.from_path( + str(repo), programming_language=ProgrammingLanguage.TYPESCRIPT + ) + node_classes = get_node_classes(ProgrammingLanguage.TYPESCRIPT) + extensions = node_classes.file_cls.get_extensions() + file_paths = [ + str(filepath) + for filepath, _ in project.repo_operator.iter_files( + subdirs=project.subdirectories, + extensions=extensions, + ignore_list=GLOBAL_FILE_IGNORE_LIST, + ) + ] + return Path(project.repo_operator.repo_path).resolve(), file_paths + + +def summary_dict(summary: Any) -> dict[str, int]: + return dict(summary.as_dict()) + + +def make_report(repo: Path, *, raw_rust_walk: bool) -> dict[str, Any]: + import graph_sitter_py + + start = time.perf_counter() + if raw_rust_walk: + repo_root = repo + selected_file_count = None + index = graph_sitter_py.index_typescript_path(str(repo_root)) + else: + repo_root, file_paths = discover_typescript_files(repo) + selected_file_count = len(file_paths) + index = graph_sitter_py.index_typescript_paths(str(repo_root), file_paths) + wall = time.perf_counter() - start + summary = summary_dict(index.summary()) + + return { + "metadata": { + "repo_path": str(repo), + "repo_root": str(repo_root), + "raw_rust_walk": raw_rust_walk, + "selected_file_count": selected_file_count, + "python": sys.version, + "platform": platform.platform(), + "engine_version": graph_sitter_py.engine_version(), + }, + "totals": { + "wall_seconds": round(wall, 6), + "max_rss_mb": round(bytes_to_mb(max_rss_bytes()), 3), + }, + "summary": { + **summary, + "external_modules": index.external_module_count, + "exports": index.export_count, + }, + "records": { + "files": index.file_count, + "symbols": index.symbol_count, + "imports": index.import_count, + "import_resolutions": index.import_resolution_count, + "external_modules": index.external_module_count, + "exports": index.export_count, + "references": index.reference_count, + "dependencies": index.dependency_count, + }, + } + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description="Measure standalone compact Rust TypeScript/JavaScript indexing through PyO3." + ) + parser.add_argument( + "repo", + nargs="?", + default=".", + help="Path to the TypeScript/JavaScript repository to index.", + ) + parser.add_argument( + "--raw-rust-walk", + action="store_true", + help="Use Rust's recursive file walk instead of Python RepoOperator file discovery.", + ) + parser.add_argument("--output", type=Path, help="Optional path to write JSON report.") + parser.add_argument( + "--json", action="store_true", help="Print JSON report instead of a human summary." + ) + return parser.parse_args() + + +def print_human(report: dict[str, Any]) -> None: + totals = report["totals"] + summary = report["summary"] + print(f"repo: {report['metadata']['repo_path']}") + print(f"repo root: {report['metadata']['repo_root']}") + print(f"engine: {report['metadata']['engine_version']}") + print(f"raw rust walk: {report['metadata']['raw_rust_walk']}") + print(f"selected files: {report['metadata']['selected_file_count']}") + print( + f"rust TS index: wall={totals['wall_seconds']:.3f}s " + f"max_rss={totals['max_rss_mb']:.1f} MB" + ) + print( + "summary: " + f"files={summary['files']} symbols={summary['symbols']} " + f"classes={summary['classes']} functions={summary['functions']} " + f"global_variables={summary['global_variables']} imports={summary['imports']} " + f"import_resolutions={summary['import_resolutions']} " + f"external_modules={summary['external_modules']} " + f"exports={summary['exports']} references={summary['references']} " + f"dependencies={summary['dependencies']} files_with_errors={summary['files_with_errors']}" + ) + + +def main() -> int: + args = parse_args() + report = make_report( + Path(args.repo).expanduser().resolve(), raw_rust_walk=args.raw_rust_walk + ) + if args.output: + args.output.parent.mkdir(parents=True, exist_ok=True) + args.output.write_text( + json.dumps(report, indent=2, sort_keys=True) + "\n", encoding="utf-8" + ) + if args.json: + print(json.dumps(report, indent=2, sort_keys=True)) + else: + print_human(report) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/rust-rewrite/tools/snapshot_pinned_python_repo.py b/rust-rewrite/tools/snapshot_pinned_python_repo.py new file mode 100644 index 000000000..678e4167f --- /dev/null +++ b/rust-rewrite/tools/snapshot_pinned_python_repo.py @@ -0,0 +1,474 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +import argparse +import hashlib +import json +import resource +import sys +import time +from pathlib import Path +from typing import Any + +TOOLS_DIR = Path(__file__).resolve().parent +REPO_ROOT = TOOLS_DIR.parents[1] +SRC_ROOT = REPO_ROOT / "src" +if str(TOOLS_DIR) not in sys.path: + sys.path.insert(0, str(TOOLS_DIR)) +if str(SRC_ROOT) not in sys.path: + sys.path.insert(0, str(SRC_ROOT)) + +from benchmark_pinned_python_repo import ( # noqa: E402 + DEFAULT_CACHE_DIR, + DEFAULT_EXPECTED_COMMIT, + DEFAULT_EXTENSION_DIR, + DEFAULT_REF, + DEFAULT_REPO_NAME, + DEFAULT_REPO_URL, + build_rust_extension, + prepare_pinned_repo, +) + +DEFAULT_EXPECTED_SNAPSHOT = REPO_ROOT / "rust-rewrite/golden/apache-airflow-2.10.5-rust-compact.json" +SNAPSHOT_SCHEMA_VERSION = 3 + + +def bytes_to_mb(value: float) -> float: + return value / (1024 * 1024) + + +def max_rss_bytes() -> int: + rss = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss + if sys.platform == "darwin": + return int(rss) + return int(rss * 1024) + + +def stable_json(data: Any) -> str: + return json.dumps(data, sort_keys=True, separators=(",", ":")) + + +def row_digest(rows: list[dict[str, Any]]) -> str: + digest = hashlib.sha256() + for row in rows: + digest.update(stable_json(row).encode("utf-8")) + digest.update(b"\n") + return digest.hexdigest() + + +def compact_record_set(rows: list[dict[str, Any]], *, sample_size: int) -> dict[str, Any]: + return { + "count": len(rows), + "sha256": row_digest(rows), + "samples": rows[:sample_size], + } + + +def symbol_key(symbol: Any, file_by_id: dict[int, Any]) -> str: + file = file_by_id[symbol.file_id] + return f"{file.path}:{symbol.kind}:{symbol.name}@{symbol.name_range.start_byte}" + + +def import_key(import_record: Any, file_by_id: dict[int, Any]) -> str: + file = file_by_id[import_record.file_id] + module = import_record.module if import_record.module is not None else "" + name = import_record.name if import_record.name is not None else "" + alias = import_record.alias if import_record.alias is not None else "" + return f"{file.path}:{import_record.kind}:{module}:{name}:{alias}@{import_record.range.start_byte}" + + +def external_module_key(external_module: Any, file_by_id: dict[int, Any], import_by_id: dict[int, Any]) -> str: + return f"{import_key(import_by_id[external_module.import_id], file_by_id)}:{external_module.name}" + + +def make_file_rows(codebase: Any) -> list[dict[str, Any]]: + rows = [ + { + "path": file.path, + "module_name": file.module_name, + "byte_len": file.byte_len, + "line_count": file.line_count, + "has_error": file.has_error, + } + for file in codebase.rust_files + ] + return sorted(rows, key=lambda row: row["path"]) + + +def make_symbol_rows(codebase: Any, file_by_id: dict[int, Any]) -> list[dict[str, Any]]: + symbol_by_id = {symbol.id: symbol for symbol in codebase.rust_symbols} + rows = [ + { + "key": symbol_key(symbol, file_by_id), + "parent_symbol": None if symbol.parent_symbol_id is None else symbol_key(symbol_by_id[symbol.parent_symbol_id], file_by_id), + "is_top_level": symbol.is_top_level, + "file": file_by_id[symbol.file_id].path, + "kind": symbol.kind, + "name": symbol.name, + "range": [symbol.range.start_byte, symbol.range.end_byte], + "name_range": [symbol.name_range.start_byte, symbol.name_range.end_byte], + } + for symbol in codebase.rust_symbols + ] + return sorted(rows, key=lambda row: (row["file"], row["kind"], row["name"], row["name_range"])) + + +def make_import_rows(codebase: Any, file_by_id: dict[int, Any]) -> list[dict[str, Any]]: + rows = [ + { + "key": import_key(import_record, file_by_id), + "file": file_by_id[import_record.file_id].path, + "kind": import_record.kind, + "module": import_record.module, + "name": import_record.name, + "alias": import_record.alias, + "range": [import_record.range.start_byte, import_record.range.end_byte], + } + for import_record in codebase.rust_imports + ] + return sorted(rows, key=lambda row: (row["file"], row["range"], row["kind"], row["module"] or "", row["name"] or "", row["alias"] or "")) + + +def make_import_resolution_rows( + codebase: Any, + file_by_id: dict[int, Any], + symbol_by_id: dict[int, Any], + import_by_id: dict[int, Any], +) -> list[dict[str, Any]]: + rows = [] + for resolution in codebase.rust_import_resolutions: + target_symbol = None if resolution.target_symbol_id is None else symbol_by_id[resolution.target_symbol_id] + rows.append( + { + "import": import_key(import_by_id[resolution.import_id], file_by_id), + "source_file": file_by_id[resolution.source_file_id].path, + "target_file": file_by_id[resolution.target_file_id].path, + "target_symbol": None if target_symbol is None else symbol_key(target_symbol, file_by_id), + } + ) + return sorted(rows, key=lambda row: (row["source_file"], row["import"], row["target_file"], row["target_symbol"] or "")) + + +def make_external_module_rows( + codebase: Any, + file_by_id: dict[int, Any], + import_by_id: dict[int, Any], +) -> list[dict[str, Any]]: + rows = [ + { + "key": external_module_key(external_module, file_by_id, import_by_id), + "file": file_by_id[external_module.file_id].path, + "import": import_key(import_by_id[external_module.import_id], file_by_id), + "module": external_module.module, + "name": external_module.name, + "alias": external_module.alias, + "range": [external_module.range.start_byte, external_module.range.end_byte], + } + for external_module in codebase.rust_external_modules + ] + return sorted(rows, key=lambda row: (row["file"], row["range"], row["module"] or "", row["name"], row["alias"] or "")) + + +def make_reference_rows( + codebase: Any, + file_by_id: dict[int, Any], + symbol_by_id: dict[int, Any], + import_by_id: dict[int, Any], +) -> list[dict[str, Any]]: + rows = [] + for reference in codebase.rust_references: + source_symbol = None if reference.source_symbol_id is None else symbol_by_id[reference.source_symbol_id] + rows.append( + { + "source_file": file_by_id[reference.source_file_id].path, + "source_symbol": None if source_symbol is None else symbol_key(source_symbol, file_by_id), + "target_symbol": symbol_key(symbol_by_id[reference.target_symbol_id], file_by_id), + "import": None if reference.import_id is None else import_key(import_by_id[reference.import_id], file_by_id), + "name": reference.name, + "range": [reference.range.start_byte, reference.range.end_byte], + } + ) + return sorted(rows, key=lambda row: (row["source_file"], row["range"], row["source_symbol"] or "", row["target_symbol"], row["name"])) + + +def make_external_reference_rows( + codebase: Any, + file_by_id: dict[int, Any], + symbol_by_id: dict[int, Any], + import_by_id: dict[int, Any], +) -> list[dict[str, Any]]: + rows = [] + for reference in codebase.rust_external_references: + source_symbol = None if reference.source_symbol_id is None else symbol_by_id[reference.source_symbol_id] + rows.append( + { + "source_file": file_by_id[reference.source_file_id].path, + "source_symbol": None if source_symbol is None else symbol_key(source_symbol, file_by_id), + "import": import_key(import_by_id[reference.import_id], file_by_id), + "name": reference.name, + "range": [reference.range.start_byte, reference.range.end_byte], + } + ) + return sorted( + rows, + key=lambda row: ( + row["source_file"], + row["range"], + row["source_symbol"] or "", + row["import"], + row["name"], + ), + ) + + +def make_dependency_rows( + codebase: Any, + file_by_id: dict[int, Any], + symbol_by_id: dict[int, Any], +) -> list[dict[str, Any]]: + rows = [ + { + "source_file": file_by_id[dependency.source_file_id].path, + "source_symbol": symbol_key(symbol_by_id[dependency.source_symbol_id], file_by_id), + "target_file": file_by_id[dependency.target_file_id].path, + "target_symbol": symbol_key(symbol_by_id[dependency.target_symbol_id], file_by_id), + "reference_count": dependency.reference_count, + } + for dependency in codebase.rust_dependencies + ] + return sorted(rows, key=lambda row: (row["source_symbol"], row["target_symbol"], row["reference_count"])) + + +def validate_integrity(codebase: Any) -> dict[str, int]: + file_ids = {file.id for file in codebase.rust_files} + symbol_ids = {symbol.id for symbol in codebase.rust_symbols} + import_ids = {import_record.id for import_record in codebase.rust_imports} + reference_by_id = {reference.id: reference for reference in codebase.rust_references} + + missing_external_module_links = 0 + for external_module in codebase.rust_external_modules: + missing_external_module_links += int(external_module.import_id not in import_ids) + missing_external_module_links += int(external_module.file_id not in file_ids) + + missing_import_resolution_links = 0 + for resolution in codebase.rust_import_resolutions: + missing_import_resolution_links += int(resolution.import_id not in import_ids) + missing_import_resolution_links += int(resolution.source_file_id not in file_ids) + missing_import_resolution_links += int(resolution.target_file_id not in file_ids) + if resolution.target_symbol_id is not None: + missing_import_resolution_links += int(resolution.target_symbol_id not in symbol_ids) + + missing_reference_links = 0 + for reference in codebase.rust_references: + missing_reference_links += int(reference.source_file_id not in file_ids) + missing_reference_links += int(reference.target_symbol_id not in symbol_ids) + if reference.source_symbol_id is not None: + missing_reference_links += int(reference.source_symbol_id not in symbol_ids) + if reference.import_id is not None: + missing_reference_links += int(reference.import_id not in import_ids) + + missing_external_reference_links = 0 + for reference in codebase.rust_external_references: + missing_external_reference_links += int(reference.source_file_id not in file_ids) + missing_external_reference_links += int(reference.import_id not in import_ids) + if reference.source_symbol_id is not None: + missing_external_reference_links += int(reference.source_symbol_id not in symbol_ids) + + missing_dependency_links = 0 + bad_dependency_reference_counts = 0 + bad_dependency_reference_targets = 0 + for dependency in codebase.rust_dependencies: + missing_dependency_links += int(dependency.source_file_id not in file_ids) + missing_dependency_links += int(dependency.target_file_id not in file_ids) + missing_dependency_links += int(dependency.source_symbol_id not in symbol_ids) + missing_dependency_links += int(dependency.target_symbol_id not in symbol_ids) + if dependency.reference_count != len(dependency.reference_ids): + bad_dependency_reference_counts += 1 + for reference_id in dependency.reference_ids: + reference = reference_by_id.get(reference_id) + if reference is None: + missing_dependency_links += 1 + continue + if reference.source_symbol_id != dependency.source_symbol_id or reference.target_symbol_id != dependency.target_symbol_id: + bad_dependency_reference_targets += 1 + + return { + "missing_external_module_links": missing_external_module_links, + "missing_import_resolution_links": missing_import_resolution_links, + "missing_reference_links": missing_reference_links, + "missing_external_reference_links": missing_external_reference_links, + "missing_dependency_links": missing_dependency_links, + "bad_dependency_reference_counts": bad_dependency_reference_counts, + "bad_dependency_reference_targets": bad_dependency_reference_targets, + } + + +def assert_integrity(integrity: dict[str, int]) -> None: + failures = [f"{name}={value}" for name, value in integrity.items() if value != 0] + if failures: + msg = "compact graph integrity check failed: " + ", ".join(failures) + raise RuntimeError(msg) + + +def make_snapshot(args: argparse.Namespace) -> tuple[dict[str, Any], dict[str, Any]]: + repo, actual_commit = prepare_pinned_repo(args) + extension_path = None + if not args.skip_build_extension: + extension_path = build_rust_extension(args.extension_dir, timeout=args.timeout) + if str(args.extension_dir) not in sys.path: + sys.path.insert(0, str(args.extension_dir)) + + from graph_sitter.configs.models.codebase import CodebaseConfig, GraphBackend, RustFallbackMode + from graph_sitter.core.codebase import Codebase + + start = time.perf_counter() + config = CodebaseConfig(graph_backend=GraphBackend.RUST, rust_fallback=RustFallbackMode.ERROR) + codebase = Codebase(str(repo), language="python", config=config) + wall = time.perf_counter() - start + + file_by_id = {file.id: file for file in codebase.rust_files} + symbol_by_id = {symbol.id: symbol for symbol in codebase.rust_symbols} + import_by_id = {import_record.id: import_record for import_record in codebase.rust_imports} + + file_rows = make_file_rows(codebase) + symbol_rows = make_symbol_rows(codebase, file_by_id) + import_rows = make_import_rows(codebase, file_by_id) + import_resolution_rows = make_import_resolution_rows(codebase, file_by_id, symbol_by_id, import_by_id) + external_module_rows = make_external_module_rows(codebase, file_by_id, import_by_id) + reference_rows = make_reference_rows(codebase, file_by_id, symbol_by_id, import_by_id) + external_reference_rows = make_external_reference_rows(codebase, file_by_id, symbol_by_id, import_by_id) + dependency_rows = make_dependency_rows(codebase, file_by_id, symbol_by_id) + integrity = validate_integrity(codebase) + assert_integrity(integrity) + + summary = codebase.rust_index_summary + snapshot = { + "schema_version": SNAPSHOT_SCHEMA_VERSION, + "metadata": { + "name": args.name, + "repo_url": args.repo_url, + "ref": args.ref, + "commit": actual_commit, + }, + "summary": { + "files": summary.files, + "symbols": summary.symbols, + "classes": summary.classes, + "functions": summary.functions, + "global_variables": summary.global_variables, + "imports": summary.imports, + "import_resolutions": summary.import_resolutions, + "external_modules": len(codebase.rust_external_modules), + "references": summary.references, + "external_references": len(codebase.rust_external_references), + "dependencies": summary.dependencies, + "bytes": summary.bytes, + "lines": summary.lines, + "files_with_errors": summary.files_with_errors, + }, + "graphs": { + "files": compact_record_set(file_rows, sample_size=args.sample_size), + "symbols": compact_record_set(symbol_rows, sample_size=args.sample_size), + "imports": compact_record_set(import_rows, sample_size=args.sample_size), + "import_resolutions": compact_record_set(import_resolution_rows, sample_size=args.sample_size), + "external_modules": compact_record_set(external_module_rows, sample_size=args.sample_size), + "references": compact_record_set(reference_rows, sample_size=args.sample_size), + "external_references": compact_record_set(external_reference_rows, sample_size=args.sample_size), + "dependencies": compact_record_set(dependency_rows, sample_size=args.sample_size), + }, + "integrity": integrity, + } + observation = { + "checkout": str(repo), + "extension_path": str(extension_path) if extension_path else None, + "wall_seconds": round(wall, 6), + "max_rss_mb": round(bytes_to_mb(max_rss_bytes()), 3), + } + return snapshot, observation + + +def compare_snapshot(actual: dict[str, Any], expected_path: Path) -> None: + expected = json.loads(expected_path.read_text(encoding="utf-8")) + if actual == expected: + return + + mismatches = [] + for key in ("metadata", "summary", "integrity"): + if actual.get(key) != expected.get(key): + mismatches.append(key) + for graph_name, graph in actual.get("graphs", {}).items(): + expected_graph = expected.get("graphs", {}).get(graph_name) + if graph != expected_graph: + mismatches.append(f"graphs.{graph_name}") + msg = f"snapshot mismatch against {expected_path}: {', '.join(mismatches)}" + raise AssertionError(msg) + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Create or verify a deterministic compact Rust graph snapshot for a pinned Python repository.") + parser.add_argument("--name", default=DEFAULT_REPO_NAME, help="Stable name for the pinned repository checkout.") + parser.add_argument("--repo-url", default=DEFAULT_REPO_URL, help="Git repository URL.") + parser.add_argument("--ref", default=DEFAULT_REF, help="Remote ref or commit to fetch.") + parser.add_argument("--expected-commit", default=DEFAULT_EXPECTED_COMMIT, help="Expected resolved commit SHA. Pass an empty string to disable.") + parser.add_argument("--cache-dir", type=Path, default=DEFAULT_CACHE_DIR, help="Directory for reusable pinned checkouts.") + parser.add_argument("--extension-dir", type=Path, default=DEFAULT_EXTENSION_DIR, help="Directory for the built PyO3 extension module.") + parser.add_argument("--expected", type=Path, default=DEFAULT_EXPECTED_SNAPSHOT, help="Expected compact snapshot JSON path.") + parser.add_argument("--output", type=Path, help="Optional path to write the observed snapshot JSON.") + parser.add_argument("--update", action="store_true", help="Write the observed snapshot to --expected instead of comparing.") + parser.add_argument("--reset-checkout", action="store_true", help="Delete and recreate the cached checkout before running.") + parser.add_argument("--skip-fetch", action="store_true", help="Do not fetch before checkout; useful for offline reruns with FETCH_HEAD present.") + parser.add_argument("--skip-build-extension", action="store_true", help="Reuse an existing graph_sitter_py extension in --extension-dir.") + parser.add_argument("--sample-size", type=int, default=20, help="Number of sorted sample rows stored for each graph family.") + parser.add_argument("--timeout", type=int, default=900, help="Timeout in seconds for clone/build child commands.") + parser.add_argument("--json", action="store_true", help="Print the snapshot JSON instead of a human summary.") + return parser.parse_args() + + +def print_human(snapshot: dict[str, Any], observation: dict[str, Any], expected: Path) -> None: + summary = snapshot["summary"] + print(f"repo: {snapshot['metadata']['name']} {snapshot['metadata']['commit']}") + print(f"expected: {expected}") + print(f"checkout: {observation['checkout']}") + print(f"rust snapshot: wall={observation['wall_seconds']:.3f}s max_rss={observation['max_rss_mb']:.1f} MB") + print( + "summary: " + f"files={summary['files']} symbols={summary['symbols']} imports={summary['imports']} " + f"import_resolutions={summary['import_resolutions']} external_modules={summary['external_modules']} " + f"references={summary['references']} external_references={summary['external_references']} " + f"dependencies={summary['dependencies']}" + ) + print( + "hashes: " + f"files={snapshot['graphs']['files']['sha256']} " + f"imports={snapshot['graphs']['imports']['sha256']} " + f"external_modules={snapshot['graphs']['external_modules']['sha256']} " + f"references={snapshot['graphs']['references']['sha256']} " + f"external_references={snapshot['graphs']['external_references']['sha256']} " + f"dependencies={snapshot['graphs']['dependencies']['sha256']}" + ) + + +def main() -> int: + args = parse_args() + if args.expected_commit == "": + args.expected_commit = None + snapshot, observation = make_snapshot(args) + + if args.output: + args.output.parent.mkdir(parents=True, exist_ok=True) + args.output.write_text(json.dumps(snapshot, indent=2, sort_keys=True) + "\n", encoding="utf-8") + if args.update: + args.expected.parent.mkdir(parents=True, exist_ok=True) + args.expected.write_text(json.dumps(snapshot, indent=2, sort_keys=True) + "\n", encoding="utf-8") + else: + compare_snapshot(snapshot, args.expected) + + if args.json: + print(json.dumps({"observation": observation, "snapshot": snapshot}, indent=2, sort_keys=True)) + else: + print_human(snapshot, observation, args.expected) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/rust-rewrite/tools/snapshot_pinned_typescript_repo.py b/rust-rewrite/tools/snapshot_pinned_typescript_repo.py new file mode 100644 index 000000000..28af2c08b --- /dev/null +++ b/rust-rewrite/tools/snapshot_pinned_typescript_repo.py @@ -0,0 +1,956 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +import argparse +import json +import sys +import time +from pathlib import Path +from typing import Any + +TOOLS_DIR = Path(__file__).resolve().parent +REPO_ROOT = TOOLS_DIR.parents[1] +if str(TOOLS_DIR) not in sys.path: + sys.path.insert(0, str(TOOLS_DIR)) + +from benchmark_pinned_typescript_repo import ( # noqa: E402 + DEFAULT_CACHE_DIR, + DEFAULT_EXPECTED_COMMIT, + DEFAULT_EXTENSION_DIR, + DEFAULT_REF, + DEFAULT_REPO_NAME, + DEFAULT_REPO_URL, + build_rust_extension, + prepare_pinned_repo, +) +from measure_typescript_rust_index import discover_typescript_files # noqa: E402 +from snapshot_pinned_python_repo import ( # noqa: E402 + bytes_to_mb, + compact_record_set, + compare_snapshot, + max_rss_bytes, +) + +DEFAULT_EXPECTED_SNAPSHOT = ( + REPO_ROOT / "rust-rewrite/golden/next.js-v15.0.0-rust-compact-typescript.json" +) +SNAPSHOT_SCHEMA_VERSION = 6 + + +def range_list(record: dict[str, Any], name: str = "range") -> list[int]: + source_range = record[name] + if isinstance(source_range, list): + return source_range + return [ + source_range["start_byte"], + source_range["end_byte"], + source_range["start_row"], + source_range["start_column"], + source_range["end_row"], + source_range["end_column"], + ] + + +def symbol_key(symbol: dict[str, Any], file_by_id: dict[int, dict[str, Any]]) -> str: + file = file_by_id[symbol["file_id"]] + return f"{file['path']}:{symbol['kind']}:{symbol['name']}@{range_list(symbol, 'name_range')[0]}" + + +def import_key(import_record: dict[str, Any], file_by_id: dict[int, dict[str, Any]]) -> str: + file = file_by_id[import_record["file_id"]] + module = import_record["module"] or "" + name = import_record["name"] or "" + alias = import_record["alias"] or "" + return f"{file['path']}:{import_record['kind']}:{module}:{name}:{alias}@{range_list(import_record)[0]}" + + +def external_module_key( + external_module: dict[str, Any], + file_by_id: dict[int, dict[str, Any]], + import_by_id: dict[int, dict[str, Any]], +) -> str: + return f"{import_key(import_by_id[external_module['import_id']], file_by_id)}:{external_module['name']}" + + +def import_resolution_key( + resolution: dict[str, Any], + file_by_id: dict[int, dict[str, Any]], + symbol_by_id: dict[int, dict[str, Any]], + import_by_id: dict[int, dict[str, Any]], +) -> str: + import_record = import_by_id[resolution["import_id"]] + target_file = file_by_id[resolution["target_file_id"]]["path"] + target_symbol = ( + "" + if resolution["target_symbol_id"] is None + else symbol_key(symbol_by_id[resolution["target_symbol_id"]], file_by_id) + ) + return f"{import_key(import_record, file_by_id)}->{target_file}:{target_symbol}" + + +def export_key(export: dict[str, Any], file_by_id: dict[int, dict[str, Any]]) -> str: + file = file_by_id[export["file_id"]] + name = export["name"] or "" + local_name = export["local_name"] or "" + source_module = export["source_module"] or "" + return f"{file['path']}:{export['kind']}:{name}:{local_name}:{source_module}@{range_list(export)[0]}" + + +def reference_key( + reference: dict[str, Any], + file_by_id: dict[int, dict[str, Any]], + symbol_by_id: dict[int, dict[str, Any]], + import_by_id: dict[int, dict[str, Any]], +) -> str: + source_symbol = ( + "" + if reference["source_symbol_id"] is None + else symbol_key(symbol_by_id[reference["source_symbol_id"]], file_by_id) + ) + import_record = ( + "" + if reference["import_id"] is None + else import_key(import_by_id[reference["import_id"]], file_by_id) + ) + target_symbol = symbol_key(symbol_by_id[reference["target_symbol_id"]], file_by_id) + source_file = file_by_id[reference["source_file_id"]]["path"] + return f"{source_file}:{source_symbol}->{target_symbol}:{import_record}:{reference['name']}@{range_list(reference)[0]}" + + +def external_reference_key( + reference: dict[str, Any], + file_by_id: dict[int, dict[str, Any]], + symbol_by_id: dict[int, dict[str, Any]], + import_by_id: dict[int, dict[str, Any]], +) -> str: + source_symbol = ( + "" + if reference["source_symbol_id"] is None + else symbol_key(symbol_by_id[reference["source_symbol_id"]], file_by_id) + ) + import_record = import_key(import_by_id[reference["import_id"]], file_by_id) + source_file = file_by_id[reference["source_file_id"]]["path"] + return f"{source_file}:{source_symbol}->{import_record}:{reference['name']}@{range_list(reference)[0]}" + + +def subclass_edge_key( + subclass_edge: dict[str, Any], + file_by_id: dict[int, dict[str, Any]], + symbol_by_id: dict[int, dict[str, Any]], + import_by_id: dict[int, dict[str, Any]], + reference_by_id: dict[int, dict[str, Any]], +) -> str: + source_symbol = symbol_key( + symbol_by_id[subclass_edge["source_symbol_id"]], file_by_id + ) + target_symbol = symbol_key( + symbol_by_id[subclass_edge["target_symbol_id"]], file_by_id + ) + reference = reference_key( + reference_by_id[subclass_edge["reference_id"]], + file_by_id, + symbol_by_id, + import_by_id, + ) + return f"{source_symbol}->{target_symbol}:{reference}" + + +def make_file_rows(files: list[dict[str, Any]]) -> list[dict[str, Any]]: + rows = [ + { + "path": file["path"], + "byte_len": file["byte_len"], + "line_count": file["line_count"], + "has_error": file["has_error"], + "root_range": range_list(file, "root_range"), + } + for file in files + ] + return sorted(rows, key=lambda row: row["path"]) + + +def make_symbol_rows( + symbols: list[dict[str, Any]], + file_by_id: dict[int, dict[str, Any]], + symbol_by_id: dict[int, dict[str, Any]], +) -> list[dict[str, Any]]: + rows = [ + { + "key": symbol_key(symbol, file_by_id), + "parent_symbol": None + if symbol["parent_symbol_id"] is None + else symbol_key(symbol_by_id[symbol["parent_symbol_id"]], file_by_id), + "is_top_level": symbol["is_top_level"], + "file": file_by_id[symbol["file_id"]]["path"], + "kind": symbol["kind"], + "name": symbol["name"], + "range": range_list(symbol), + "name_range": range_list(symbol, "name_range"), + } + for symbol in symbols + ] + return sorted( + rows, + key=lambda row: (row["file"], row["kind"], row["name"], row["name_range"]), + ) + + +def make_import_rows( + imports: list[dict[str, Any]], + file_by_id: dict[int, dict[str, Any]], +) -> list[dict[str, Any]]: + rows = [ + { + "key": import_key(import_record, file_by_id), + "file": file_by_id[import_record["file_id"]]["path"], + "kind": import_record["kind"], + "module": import_record["module"], + "name": import_record["name"], + "alias": import_record["alias"], + "range": range_list(import_record), + } + for import_record in imports + ] + return sorted( + rows, + key=lambda row: ( + row["file"], + row["range"], + row["kind"], + row["module"] or "", + row["name"] or "", + row["alias"] or "", + ), + ) + + +def make_import_resolution_rows( + import_resolutions: list[dict[str, Any]], + file_by_id: dict[int, dict[str, Any]], + symbol_by_id: dict[int, dict[str, Any]], + import_by_id: dict[int, dict[str, Any]], +) -> list[dict[str, Any]]: + rows = [] + for resolution in import_resolutions: + target_symbol = ( + None + if resolution["target_symbol_id"] is None + else symbol_key(symbol_by_id[resolution["target_symbol_id"]], file_by_id) + ) + rows.append( + { + "key": import_resolution_key( + resolution, file_by_id, symbol_by_id, import_by_id + ), + "import": import_key(import_by_id[resolution["import_id"]], file_by_id), + "source_file": file_by_id[resolution["source_file_id"]]["path"], + "target_file": file_by_id[resolution["target_file_id"]]["path"], + "target_symbol": target_symbol, + } + ) + return sorted( + rows, + key=lambda row: ( + row["source_file"], + row["target_file"], + row["target_symbol"] or "", + row["import"], + ), + ) + + +def make_external_module_rows( + external_modules: list[dict[str, Any]], + file_by_id: dict[int, dict[str, Any]], + import_by_id: dict[int, dict[str, Any]], +) -> list[dict[str, Any]]: + rows = [ + { + "key": external_module_key(external_module, file_by_id, import_by_id), + "file": file_by_id[external_module["file_id"]]["path"], + "import": import_key(import_by_id[external_module["import_id"]], file_by_id), + "module": external_module["module"], + "name": external_module["name"], + "alias": external_module["alias"], + "range": range_list(external_module), + } + for external_module in external_modules + ] + return sorted( + rows, + key=lambda row: ( + row["file"], + row["range"], + row["module"] or "", + row["name"], + row["alias"] or "", + ), + ) + + +def make_export_rows( + exports: list[dict[str, Any]], + file_by_id: dict[int, dict[str, Any]], + symbol_by_id: dict[int, dict[str, Any]], + import_by_id: dict[int, dict[str, Any]], +) -> list[dict[str, Any]]: + rows = [] + for export in exports: + symbol = ( + None + if export["symbol_id"] is None + else symbol_key(symbol_by_id[export["symbol_id"]], file_by_id) + ) + import_record = ( + None + if export["import_id"] is None + else import_key(import_by_id[export["import_id"]], file_by_id) + ) + rows.append( + { + "key": export_key(export, file_by_id), + "file": file_by_id[export["file_id"]]["path"], + "kind": export["kind"], + "name": export["name"], + "local_name": export["local_name"], + "source_module": export["source_module"], + "symbol": symbol, + "import": import_record, + "range": range_list(export), + } + ) + return sorted( + rows, + key=lambda row: ( + row["file"], + row["range"], + row["kind"], + row["name"] or "", + row["local_name"] or "", + row["source_module"] or "", + ), + ) + + +def make_reference_rows( + references: list[dict[str, Any]], + file_by_id: dict[int, dict[str, Any]], + symbol_by_id: dict[int, dict[str, Any]], + import_by_id: dict[int, dict[str, Any]], +) -> list[dict[str, Any]]: + rows = [] + for reference in references: + source_symbol = ( + None + if reference["source_symbol_id"] is None + else symbol_key(symbol_by_id[reference["source_symbol_id"]], file_by_id) + ) + import_record = ( + None + if reference["import_id"] is None + else import_key(import_by_id[reference["import_id"]], file_by_id) + ) + rows.append( + { + "key": reference_key(reference, file_by_id, symbol_by_id, import_by_id), + "source_file": file_by_id[reference["source_file_id"]]["path"], + "source_symbol": source_symbol, + "target_symbol": symbol_key( + symbol_by_id[reference["target_symbol_id"]], file_by_id + ), + "import": import_record, + "name": reference["name"], + "range": range_list(reference), + } + ) + return sorted( + rows, + key=lambda row: ( + row["source_file"], + row["range"], + row["name"], + row["target_symbol"], + row["source_symbol"] or "", + row["import"] or "", + ), + ) + + +def make_external_reference_rows( + external_references: list[dict[str, Any]], + file_by_id: dict[int, dict[str, Any]], + symbol_by_id: dict[int, dict[str, Any]], + import_by_id: dict[int, dict[str, Any]], +) -> list[dict[str, Any]]: + rows = [] + for reference in external_references: + source_symbol = ( + None + if reference["source_symbol_id"] is None + else symbol_key(symbol_by_id[reference["source_symbol_id"]], file_by_id) + ) + rows.append( + { + "key": external_reference_key(reference, file_by_id, symbol_by_id, import_by_id), + "source_file": file_by_id[reference["source_file_id"]]["path"], + "source_symbol": source_symbol, + "import": import_key(import_by_id[reference["import_id"]], file_by_id), + "name": reference["name"], + "range": range_list(reference), + } + ) + return sorted( + rows, + key=lambda row: ( + row["source_file"], + row["range"], + row["name"], + row["source_symbol"] or "", + row["import"], + ), + ) + + +def make_dependency_rows( + dependencies: list[dict[str, Any]], + file_by_id: dict[int, dict[str, Any]], + symbol_by_id: dict[int, dict[str, Any]], + import_by_id: dict[int, dict[str, Any]], + reference_by_id: dict[int, dict[str, Any]], +) -> list[dict[str, Any]]: + rows = [] + for dependency in dependencies: + rows.append( + { + "source_symbol": symbol_key( + symbol_by_id[dependency["source_symbol_id"]], file_by_id + ), + "target_symbol": symbol_key( + symbol_by_id[dependency["target_symbol_id"]], file_by_id + ), + "source_file": file_by_id[dependency["source_file_id"]]["path"], + "target_file": file_by_id[dependency["target_file_id"]]["path"], + "reference_count": dependency["reference_count"], + "references": [ + reference_key(reference_by_id[reference_id], file_by_id, symbol_by_id, import_by_id) + for reference_id in dependency["reference_ids"] + ], + } + ) + return sorted( + rows, + key=lambda row: ( + row["source_symbol"], + row["target_symbol"], + row["reference_count"], + row["references"], + ), + ) + + +def make_subclass_edge_rows( + subclass_edges: list[dict[str, Any]], + file_by_id: dict[int, dict[str, Any]], + symbol_by_id: dict[int, dict[str, Any]], + import_by_id: dict[int, dict[str, Any]], + reference_by_id: dict[int, dict[str, Any]], +) -> list[dict[str, Any]]: + rows = [] + for edge in subclass_edges: + rows.append( + { + "key": subclass_edge_key( + edge, file_by_id, symbol_by_id, import_by_id, reference_by_id + ), + "source_symbol": symbol_key( + symbol_by_id[edge["source_symbol_id"]], file_by_id + ), + "target_symbol": symbol_key( + symbol_by_id[edge["target_symbol_id"]], file_by_id + ), + "source_file": file_by_id[edge["source_file_id"]]["path"], + "target_file": file_by_id[edge["target_file_id"]]["path"], + "reference": reference_key( + reference_by_id[edge["reference_id"]], + file_by_id, + symbol_by_id, + import_by_id, + ), + } + ) + return sorted( + rows, + key=lambda row: ( + row["source_symbol"], + row["target_symbol"], + row["reference"], + ), + ) + + +def validate_integrity( + *, + files: list[dict[str, Any]], + symbols: list[dict[str, Any]], + imports: list[dict[str, Any]], + import_resolutions: list[dict[str, Any]], + external_modules: list[dict[str, Any]], + exports: list[dict[str, Any]], + references: list[dict[str, Any]], + external_references: list[dict[str, Any]], + dependencies: list[dict[str, Any]], + subclass_edges: list[dict[str, Any]], + selected_file_count: int | None, +) -> dict[str, int]: + file_ids = {file["id"] for file in files} + symbol_ids = {symbol["id"] for symbol in symbols} + import_ids = {import_record["id"] for import_record in imports} + reference_ids = {reference["id"] for reference in references} + + missing_symbol_file_links = sum( + int(symbol["file_id"] not in file_ids) for symbol in symbols + ) + missing_import_file_links = sum( + int(import_record["file_id"] not in file_ids) for import_record in imports + ) + missing_external_module_file_links = sum( + int(external_module["file_id"] not in file_ids) + for external_module in external_modules + ) + missing_external_module_import_links = sum( + int(external_module["import_id"] not in import_ids) + for external_module in external_modules + ) + missing_export_file_links = sum( + int(export["file_id"] not in file_ids) for export in exports + ) + missing_export_symbol_links = sum( + int(export["symbol_id"] is not None and export["symbol_id"] not in symbol_ids) + for export in exports + ) + missing_export_import_links = sum( + int(export["import_id"] is not None and export["import_id"] not in import_ids) + for export in exports + ) + missing_resolution_import_links = sum( + int(resolution["import_id"] not in import_ids) + for resolution in import_resolutions + ) + missing_resolution_source_file_links = sum( + int(resolution["source_file_id"] not in file_ids) + for resolution in import_resolutions + ) + missing_resolution_target_file_links = sum( + int(resolution["target_file_id"] not in file_ids) + for resolution in import_resolutions + ) + missing_resolution_target_symbol_links = sum( + int( + resolution["target_symbol_id"] is not None + and resolution["target_symbol_id"] not in symbol_ids + ) + for resolution in import_resolutions + ) + missing_reference_source_file_links = sum( + int(reference["source_file_id"] not in file_ids) for reference in references + ) + missing_reference_source_symbol_links = sum( + int( + reference["source_symbol_id"] is not None + and reference["source_symbol_id"] not in symbol_ids + ) + for reference in references + ) + missing_reference_target_symbol_links = sum( + int(reference["target_symbol_id"] not in symbol_ids) for reference in references + ) + missing_reference_import_links = sum( + int(reference["import_id"] is not None and reference["import_id"] not in import_ids) + for reference in references + ) + missing_external_reference_source_file_links = sum( + int(reference["source_file_id"] not in file_ids) + for reference in external_references + ) + missing_external_reference_source_symbol_links = sum( + int( + reference["source_symbol_id"] is not None + and reference["source_symbol_id"] not in symbol_ids + ) + for reference in external_references + ) + missing_external_reference_import_links = sum( + int(reference["import_id"] not in import_ids) + for reference in external_references + ) + missing_dependency_source_symbol_links = sum( + int(dependency["source_symbol_id"] not in symbol_ids) + for dependency in dependencies + ) + missing_dependency_target_symbol_links = sum( + int(dependency["target_symbol_id"] not in symbol_ids) + for dependency in dependencies + ) + missing_dependency_source_file_links = sum( + int(dependency["source_file_id"] not in file_ids) for dependency in dependencies + ) + missing_dependency_target_file_links = sum( + int(dependency["target_file_id"] not in file_ids) for dependency in dependencies + ) + missing_dependency_reference_links = sum( + int(reference_id not in reference_ids) + for dependency in dependencies + for reference_id in dependency["reference_ids"] + ) + missing_subclass_edge_source_symbol_links = sum( + int(edge["source_symbol_id"] not in symbol_ids) for edge in subclass_edges + ) + missing_subclass_edge_target_symbol_links = sum( + int(edge["target_symbol_id"] not in symbol_ids) for edge in subclass_edges + ) + missing_subclass_edge_source_file_links = sum( + int(edge["source_file_id"] not in file_ids) for edge in subclass_edges + ) + missing_subclass_edge_target_file_links = sum( + int(edge["target_file_id"] not in file_ids) for edge in subclass_edges + ) + missing_subclass_edge_reference_links = sum( + int(edge["reference_id"] not in reference_ids) for edge in subclass_edges + ) + reference_by_id = {reference["id"]: reference for reference in references} + mismatched_subclass_edge_references = sum( + int( + edge["reference_id"] in reference_by_id + and ( + reference_by_id[edge["reference_id"]]["source_symbol_id"] + != edge["source_symbol_id"] + or reference_by_id[edge["reference_id"]]["target_symbol_id"] + != edge["target_symbol_id"] + or reference_by_id[edge["reference_id"]]["source_file_id"] + != edge["source_file_id"] + ) + ) + for edge in subclass_edges + ) + + selected_file_count_delta = ( + 0 if selected_file_count is None else len(files) - selected_file_count + ) + return { + "missing_symbol_file_links": missing_symbol_file_links, + "missing_import_file_links": missing_import_file_links, + "missing_external_module_file_links": missing_external_module_file_links, + "missing_external_module_import_links": missing_external_module_import_links, + "missing_export_file_links": missing_export_file_links, + "missing_export_symbol_links": missing_export_symbol_links, + "missing_export_import_links": missing_export_import_links, + "missing_resolution_import_links": missing_resolution_import_links, + "missing_resolution_source_file_links": missing_resolution_source_file_links, + "missing_resolution_target_file_links": missing_resolution_target_file_links, + "missing_resolution_target_symbol_links": missing_resolution_target_symbol_links, + "missing_reference_source_file_links": missing_reference_source_file_links, + "missing_reference_source_symbol_links": missing_reference_source_symbol_links, + "missing_reference_target_symbol_links": missing_reference_target_symbol_links, + "missing_reference_import_links": missing_reference_import_links, + "missing_external_reference_source_file_links": missing_external_reference_source_file_links, + "missing_external_reference_source_symbol_links": missing_external_reference_source_symbol_links, + "missing_external_reference_import_links": missing_external_reference_import_links, + "missing_dependency_source_symbol_links": missing_dependency_source_symbol_links, + "missing_dependency_target_symbol_links": missing_dependency_target_symbol_links, + "missing_dependency_source_file_links": missing_dependency_source_file_links, + "missing_dependency_target_file_links": missing_dependency_target_file_links, + "missing_dependency_reference_links": missing_dependency_reference_links, + "missing_subclass_edge_source_symbol_links": missing_subclass_edge_source_symbol_links, + "missing_subclass_edge_target_symbol_links": missing_subclass_edge_target_symbol_links, + "missing_subclass_edge_source_file_links": missing_subclass_edge_source_file_links, + "missing_subclass_edge_target_file_links": missing_subclass_edge_target_file_links, + "missing_subclass_edge_reference_links": missing_subclass_edge_reference_links, + "mismatched_subclass_edge_references": mismatched_subclass_edge_references, + "selected_file_count_delta": selected_file_count_delta, + } + + +def assert_integrity(integrity: dict[str, int]) -> None: + failures = [f"{name}={value}" for name, value in integrity.items() if value != 0] + if failures: + msg = "compact TypeScript snapshot integrity check failed: " + ", ".join(failures) + raise RuntimeError(msg) + + +def summary_dict(summary: Any) -> dict[str, int]: + return dict(summary.as_dict()) + + +def make_snapshot(args: argparse.Namespace) -> tuple[dict[str, Any], dict[str, Any]]: + repo, actual_commit = prepare_pinned_repo(args) + extension_path = None + if not args.skip_build_extension: + extension_path = build_rust_extension(args.extension_dir, timeout=args.timeout) + if str(args.extension_dir) not in sys.path: + sys.path.insert(0, str(args.extension_dir)) + + import graph_sitter_py + + start = time.perf_counter() + if args.raw_rust_walk: + repo_root = repo + selected_file_count = None + index = graph_sitter_py.index_typescript_path(str(repo_root)) + else: + repo_root, file_paths = discover_typescript_files(repo) + selected_file_count = len(file_paths) + index = graph_sitter_py.index_typescript_paths(str(repo_root), file_paths) + wall = time.perf_counter() - start + + files = json.loads(index.files_json()) + symbols = json.loads(index.symbols_json()) + imports = json.loads(index.imports_json()) + import_resolutions = json.loads(index.import_resolutions_json()) + external_modules = json.loads(index.external_modules_json()) + exports = json.loads(index.exports_json()) + references = json.loads(index.references_json()) + external_references = json.loads(index.external_references_json()) + dependencies = json.loads(index.dependencies_json()) + subclass_edges = json.loads(index.subclass_edges_json()) + + file_by_id = {file["id"]: file for file in files} + symbol_by_id = {symbol["id"]: symbol for symbol in symbols} + import_by_id = {import_record["id"]: import_record for import_record in imports} + reference_by_id = {reference["id"]: reference for reference in references} + + file_rows = make_file_rows(files) + symbol_rows = make_symbol_rows(symbols, file_by_id, symbol_by_id) + import_rows = make_import_rows(imports, file_by_id) + import_resolution_rows = make_import_resolution_rows( + import_resolutions, file_by_id, symbol_by_id, import_by_id + ) + external_module_rows = make_external_module_rows( + external_modules, file_by_id, import_by_id + ) + export_rows = make_export_rows(exports, file_by_id, symbol_by_id, import_by_id) + reference_rows = make_reference_rows(references, file_by_id, symbol_by_id, import_by_id) + external_reference_rows = make_external_reference_rows( + external_references, file_by_id, symbol_by_id, import_by_id + ) + dependency_rows = make_dependency_rows( + dependencies, file_by_id, symbol_by_id, import_by_id, reference_by_id + ) + subclass_edge_rows = make_subclass_edge_rows( + subclass_edges, file_by_id, symbol_by_id, import_by_id, reference_by_id + ) + integrity = validate_integrity( + files=files, + symbols=symbols, + imports=imports, + import_resolutions=import_resolutions, + external_modules=external_modules, + exports=exports, + references=references, + external_references=external_references, + dependencies=dependencies, + subclass_edges=subclass_edges, + selected_file_count=selected_file_count, + ) + assert_integrity(integrity) + + summary = summary_dict(index.summary()) + snapshot = { + "schema_version": SNAPSHOT_SCHEMA_VERSION, + "metadata": { + "name": args.name, + "repo_url": args.repo_url, + "ref": args.ref, + "commit": actual_commit, + "raw_rust_walk": args.raw_rust_walk, + "selected_file_count": selected_file_count, + }, + "summary": { + **summary, + "external_modules": index.external_module_count, + "exports": index.export_count, + "external_references": len(external_references), + "subclass_edges": index.subclass_edge_count, + }, + "graphs": { + "files": compact_record_set(file_rows, sample_size=args.sample_size), + "symbols": compact_record_set(symbol_rows, sample_size=args.sample_size), + "imports": compact_record_set(import_rows, sample_size=args.sample_size), + "import_resolutions": compact_record_set( + import_resolution_rows, sample_size=args.sample_size + ), + "external_modules": compact_record_set( + external_module_rows, sample_size=args.sample_size + ), + "exports": compact_record_set(export_rows, sample_size=args.sample_size), + "references": compact_record_set(reference_rows, sample_size=args.sample_size), + "external_references": compact_record_set( + external_reference_rows, sample_size=args.sample_size + ), + "dependencies": compact_record_set( + dependency_rows, sample_size=args.sample_size + ), + "subclass_edges": compact_record_set( + subclass_edge_rows, sample_size=args.sample_size + ), + }, + "integrity": integrity, + } + observation = { + "checkout": str(repo), + "repo_root": str(repo_root), + "extension_path": str(extension_path) if extension_path else None, + "wall_seconds": round(wall, 6), + "max_rss_mb": round(bytes_to_mb(max_rss_bytes()), 3), + } + return snapshot, observation + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description="Create or verify a deterministic compact Rust syntax snapshot for a pinned TypeScript/JavaScript repository." + ) + parser.add_argument( + "--name", + default=DEFAULT_REPO_NAME, + help="Stable name for the pinned repository checkout.", + ) + parser.add_argument("--repo-url", default=DEFAULT_REPO_URL, help="Git repository URL.") + parser.add_argument("--ref", default=DEFAULT_REF, help="Remote ref or commit to fetch.") + parser.add_argument( + "--expected-commit", + default=DEFAULT_EXPECTED_COMMIT, + help="Expected resolved commit SHA. Pass an empty string to disable.", + ) + parser.add_argument( + "--cache-dir", + type=Path, + default=DEFAULT_CACHE_DIR, + help="Directory for reusable pinned checkouts.", + ) + parser.add_argument( + "--extension-dir", + type=Path, + default=DEFAULT_EXTENSION_DIR, + help="Directory for the built PyO3 extension module.", + ) + parser.add_argument( + "--expected", + type=Path, + default=DEFAULT_EXPECTED_SNAPSHOT, + help="Expected compact snapshot JSON path.", + ) + parser.add_argument("--output", type=Path, help="Optional path to write the observed snapshot JSON.") + parser.add_argument( + "--update", + action="store_true", + help="Write the observed snapshot to --expected instead of comparing.", + ) + parser.add_argument( + "--reset-checkout", + action="store_true", + help="Delete and recreate the cached checkout before running.", + ) + parser.add_argument( + "--skip-fetch", + action="store_true", + help="Do not fetch before checkout; useful for offline reruns with FETCH_HEAD present.", + ) + parser.add_argument( + "--skip-build-extension", + action="store_true", + help="Reuse an existing graph_sitter_py extension in --extension-dir.", + ) + parser.add_argument( + "--raw-rust-walk", + action="store_true", + help="Use Rust's raw recursive TS/JS walk instead of Python-selected file paths.", + ) + parser.add_argument( + "--sample-size", + type=int, + default=20, + help="Number of sorted sample rows stored for each graph family.", + ) + parser.add_argument( + "--timeout", + type=int, + default=900, + help="Timeout in seconds for clone/build child commands.", + ) + parser.add_argument("--json", action="store_true", help="Print the snapshot JSON instead of a human summary.") + return parser.parse_args() + + +def print_human(snapshot: dict[str, Any], observation: dict[str, Any], expected: Path) -> None: + summary = snapshot["summary"] + print(f"repo: {snapshot['metadata']['name']} {snapshot['metadata']['commit']}") + print(f"expected: {expected}") + print(f"checkout: {observation['checkout']}") + print(f"repo root: {observation['repo_root']}") + print(f"raw rust walk: {snapshot['metadata']['raw_rust_walk']}") + print(f"selected files: {snapshot['metadata']['selected_file_count']}") + print( + f"rust TS snapshot: wall={observation['wall_seconds']:.3f}s " + f"max_rss={observation['max_rss_mb']:.1f} MB" + ) + print( + "summary: " + f"files={summary['files']} symbols={summary['symbols']} imports={summary['imports']} " + f"import_resolutions={summary['import_resolutions']} " + f"external_modules={summary['external_modules']} " + f"exports={summary['exports']} references={summary['references']} " + f"external_references={summary['external_references']} " + f"dependencies={summary['dependencies']} subclass_edges={summary['subclass_edges']} " + f"files_with_errors={summary['files_with_errors']}" + ) + print( + "hashes: " + f"files={snapshot['graphs']['files']['sha256']} " + f"symbols={snapshot['graphs']['symbols']['sha256']} " + f"imports={snapshot['graphs']['imports']['sha256']} " + f"import_resolutions={snapshot['graphs']['import_resolutions']['sha256']} " + f"external_modules={snapshot['graphs']['external_modules']['sha256']} " + f"exports={snapshot['graphs']['exports']['sha256']} " + f"references={snapshot['graphs']['references']['sha256']} " + f"external_references={snapshot['graphs']['external_references']['sha256']} " + f"dependencies={snapshot['graphs']['dependencies']['sha256']} " + f"subclass_edges={snapshot['graphs']['subclass_edges']['sha256']}" + ) + + +def main() -> int: + args = parse_args() + if args.expected_commit == "": + args.expected_commit = None + snapshot, observation = make_snapshot(args) + + if args.output: + args.output.parent.mkdir(parents=True, exist_ok=True) + args.output.write_text( + json.dumps(snapshot, indent=2, sort_keys=True) + "\n", + encoding="utf-8", + ) + if args.update: + args.expected.parent.mkdir(parents=True, exist_ok=True) + args.expected.write_text( + json.dumps(snapshot, indent=2, sort_keys=True) + "\n", + encoding="utf-8", + ) + else: + compare_snapshot(snapshot, args.expected) + + if args.json: + print( + json.dumps( + {"observation": observation, "snapshot": snapshot}, + indent=2, + sort_keys=True, + ) + ) + else: + print_human(snapshot, observation, args.expected) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/rust-rewrite/uvx-cli-plan.md b/rust-rewrite/uvx-cli-plan.md new file mode 100644 index 000000000..152819038 --- /dev/null +++ b/rust-rewrite/uvx-cli-plan.md @@ -0,0 +1,257 @@ +# `uvx graph-sitter` CLI And Distribution Plan + +## Scope + +Own the package entry points and user-facing CLI path for: + +- parsing/indexing a local codebase +- running local transformations +- exposing the Rust compact backend through distributed wheels when it is available + +This plan does not change docs/site content. + +## Current State + +- The package distribution name is already `graph-sitter`. +- The current branch exposes both the historical `gs` console script and the canonical `graph-sitter` script: + + ```toml + [project.scripts] + gs = "graph_sitter.cli.cli:main" + graph-sitter = "graph_sitter.cli.cli:main" + ``` + +- `uv run gs --help` and `uv run graph-sitter --help` work. +- `uvx --from graph-sitter --help` resolves to the packaged console script once the checkout is installable in uv's temporary environment. +- Local `uvx --from graph-sitter parse --backend python --format json` now works on Python 3.12 and 3.13 after constraining parser/runtime dependencies to the lock-compatible ranges. +- `uvx --from dist/.whl graph-sitter ...` is now the release-wheel smoke path: the Hatch custom wheel hook builds and bundles `graph_sitter_py`, and `rust-rewrite/tools/check_wheel_rust_backend.sh` proves the installed wheel can run `--help`, parse through both Python and strict Rust backends, run import-path transforms in strict Rust `--check` and `--write` modes, and run target-owned registered codemods in strict Rust `--check`, `--write`, and scoped `--subdir --check` modes. +- `graph-sitter parse [PATH] --backend python --format json` works without `.codegen` initialization and emits stable summary JSON. +- `graph-sitter run LABEL PATH --arguments '{"key":"value"}' --backend python` resolves decorated functions under the target repo's `.codegen/codemods`, validates typed Pydantic arguments, and runs without an active `gs init` session. +- `graph-sitter run LABEL PATH --check` runs in a temporary copied-repo sandbox, reports the semantic diff, and leaves the target repo unchanged. +- `graph-sitter transform MODULE:OBJECT PATH --check|--write` loads ad hoc file or module transforms, supports plain functions plus `Codemod.execute` classes/instances, requires explicit `--check` or `--write`, and uses the same backend/language/check/write path as `run`. +- The Hatch wheel package list now includes both `src/graph_sitter` and `src/codemods` so `codemods.codemod.Codemod` is importable in clean `uvx` environments. +- `graph_sitter.cli.cli:main` is the public CLI. `graph_sitter.gscli` appears to be an internal generation CLI and should not be used for the `uvx graph-sitter` surface. +- The current `run` path executes decorated functions found under `.codegen/codemods`: + - `gs init` creates/persists a session for a git repo. + - `gs create ` scaffolds a `@graph_sitter.function("")` function. + - `gs run