diff --git a/docs/cli/about.mdx b/docs/cli/about.mdx
index 719e75f46..397ff5107 100644
--- a/docs/cli/about.mdx
+++ b/docs/cli/about.mdx
@@ -8,6 +8,7 @@ iconType: "solid"
The Graph-sitter CLI helps you:
- Parse a local repository into graph summary data
+- Diagnose parse time, memory use, and graph size for a local repository
- Initialize Graph-sitter in your repository
- Create and run codemods
- Run one-shot transformations by import path
@@ -35,13 +36,19 @@ uvx --python 3.13 graph-sitter parse . --format json
See [uvx workflows](/cli/uvx) for branch-built wheel validation and release
gate details.
-2. **Initialize Graph-sitter** in your repository:
+2. **Diagnose a repository** with timing and memory stats:
+
+```bash
+uvx --python 3.13 graph-sitter diagnose .
+```
+
+3. **Initialize Graph-sitter** in your repository:
```bash
graph-sitter init
```
-3. **Create your first codemod**:
+4. **Create your first codemod**:
```bash
graph-sitter create my-codemod --description "What you want to accomplish"
@@ -64,6 +71,9 @@ The `--description` flag enables AI assistance to help generate your codemod. Be
Parse a repository and print graph summary counts.
+
+ Report parse time, memory use, and graph size for a repository.
+
Create new codemods with optional AI assistance.
diff --git a/docs/cli/diagnose.mdx b/docs/cli/diagnose.mdx
new file mode 100644
index 000000000..37287b55b
--- /dev/null
+++ b/docs/cli/diagnose.mdx
@@ -0,0 +1,70 @@
+---
+title: "Diagnose Command"
+sidebarTitle: "diagnose"
+icon: "gauge"
+iconType: "solid"
+---
+
+The `diagnose` command parses a local repository and reports performance,
+memory, and graph-size diagnostics.
+
+```bash
+graph-sitter diagnose .
+```
+
+## Usage
+
+```bash
+graph-sitter diagnose [PATH] [OPTIONS]
+```
+
+`PATH` defaults to the current directory. The command does not require
+`.codegen` initialization or an active session.
+
+## Options
+
+- `--backend python|rust|auto`: Choose the graph backend. Defaults to `auto`.
+- `--fallback python|error`: Choose fallback behavior when the Rust backend is
+ unavailable. Defaults to `python`.
+- `--language auto|python|typescript`: Choose the repository language. Defaults
+ to `auto`.
+- `--json`: Print machine-readable diagnostics.
+- `--output FILE`: Write JSON diagnostics to a file. Requires `--json`.
+- `--subdir PATH`: Limit parsing to a repository-relative subdirectory or file.
+ Pass this option more than once to include multiple paths.
+
+## Output
+
+Human-readable output includes:
+
+- Parse time
+- File count
+- Memory after parse
+- Peak memory
+- Memory delta
+- Core graph counts such as symbols, imports, exports, and dependencies
+
+Use JSON output in CI or agent workflows:
+
+```bash
+graph-sitter diagnose . --language python --backend rust --fallback error --json
+```
+
+The JSON payload includes `schema_version`, requested and selected backend,
+language, parse time, selected subdirectories, graph count fields, and a
+structured `memory` object with RSS samples.
+
+## With uvx
+
+Published package form:
+
+```bash
+uvx --python 3.13 graph-sitter diagnose .
+uvx --python 3.13 graph-sitter diagnose . --json --output graph-sitter-diagnostics.json
+```
+
+Strict Rust validation form:
+
+```bash
+uvx --python 3.13 graph-sitter diagnose . --backend rust --fallback error --json
+```
diff --git a/src/graph_sitter/cli/cli.py b/src/graph_sitter/cli/cli.py
index 9a77fc6c7..2ee211482 100644
--- a/src/graph_sitter/cli/cli.py
+++ b/src/graph_sitter/cli/cli.py
@@ -4,6 +4,7 @@
# Removed reference to non-existent agent module
from graph_sitter.cli.commands.config.main import config_command
from graph_sitter.cli.commands.create.main import create_command
+from graph_sitter.cli.commands.diagnose.main import diagnose_command
from graph_sitter.cli.commands.doctor.main import doctor_command
from graph_sitter.cli.commands.init.main import init_command
from graph_sitter.cli.commands.list.main import list_command
@@ -31,6 +32,7 @@ def main():
# Removed reference to non-existent agent_command
main.add_command(init_command)
main.add_command(doctor_command)
+main.add_command(diagnose_command)
main.add_command(parse_command)
main.add_command(run_command)
main.add_command(transform_command)
diff --git a/src/graph_sitter/cli/commands/diagnose/main.py b/src/graph_sitter/cli/commands/diagnose/main.py
new file mode 100644
index 000000000..b529bcb29
--- /dev/null
+++ b/src/graph_sitter/cli/commands/diagnose/main.py
@@ -0,0 +1,153 @@
+from __future__ import annotations
+
+import json
+import logging
+import os
+import resource
+import sys
+import time
+from pathlib import Path
+from typing import Any
+
+import psutil
+import rich
+import rich_click as click
+from rich.table import Table
+
+from graph_sitter.cli.commands.parse.main import _base_payload, _parse_language, _project_for_parse, _suppress_parse_logs
+from graph_sitter.configs.models.codebase import CodebaseConfig, GraphBackend, RustFallbackMode
+from graph_sitter.core.codebase import Codebase
+
+DIAGNOSTICS_JSON_SCHEMA_VERSION = 1
+
+
+def _bytes_to_mb(value: int) -> float:
+ return value / (1024 * 1024)
+
+
+def _current_rss_bytes() -> int:
+ return int(psutil.Process(os.getpid()).memory_info().rss)
+
+
+def _max_rss_bytes() -> int:
+ rss = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
+ if sys.platform == "darwin":
+ return int(rss)
+ return int(rss * 1024)
+
+
+def _memory_sample(label: str) -> dict[str, float | str]:
+ return {
+ "label": label,
+ "rss_mb": round(_bytes_to_mb(_current_rss_bytes()), 3),
+ "max_rss_mb": round(_bytes_to_mb(_max_rss_bytes()), 3),
+ }
+
+
+def _memory_payload(samples: list[dict[str, float | str]]) -> dict[str, float | list[dict[str, float | str]]]:
+ start_rss = float(samples[0]["rss_mb"])
+ after_parse_rss = float(samples[1]["rss_mb"])
+ after_stats_rss = float(samples[-1]["rss_mb"])
+ peak_rss = max(float(sample["max_rss_mb"]) for sample in samples)
+ return {
+ "rss_start_mb": round(start_rss, 3),
+ "rss_after_parse_mb": round(after_parse_rss, 3),
+ "rss_after_stats_mb": round(after_stats_rss, 3),
+ "rss_delta_mb": round(after_stats_rss - start_rss, 3),
+ "peak_rss_mb": round(peak_rss, 3),
+ "samples": samples,
+ }
+
+
+def _write_json_payload(payload: dict[str, Any], output: Path | None) -> None:
+ contents = json.dumps(payload, sort_keys=True) + "\n"
+ if output is None:
+ click.echo(contents, nl=False)
+ return
+
+ try:
+ output.write_text(contents)
+ except OSError as error:
+ msg = f"Could not write diagnostics JSON output to {output}: {error}"
+ raise click.ClickException(msg) from error
+
+
+def _print_summary(payload: dict[str, Any]) -> None:
+ memory = payload["memory"]
+ rich.print(f"[bold]Graph-sitter diagnostics[/bold] ({payload['backend']}, {payload['language']})")
+ rich.print(f"Path: {payload['path']}")
+ rich.print(f"Subdirectories: {payload['subdirectories'] or 'ALL'}")
+
+ table = Table(show_header=True, header_style="bold", box=None)
+ table.add_column("Metric")
+ table.add_column("Value", justify="right")
+ table.add_row("Parse time", f"{payload['parse_seconds']:.3f}s")
+ table.add_row("Files", str(payload["files"]))
+ table.add_row("Memory after parse", f"{memory['rss_after_parse_mb']:.1f} MB")
+ table.add_row("Peak memory", f"{memory['peak_rss_mb']:.1f} MB")
+ table.add_row("Memory delta", f"{memory['rss_delta_mb']:+.1f} MB")
+ table.add_row("Symbols", str(payload["symbols"]))
+ table.add_row("Imports", str(payload["imports"]))
+ table.add_row("Exports", str(payload["exports"]))
+ table.add_row("Dependencies", str(payload["dependencies"]))
+ rich.print(table)
+
+ if payload.get("rust_backend_error"):
+ rich.print(f"[yellow]Rust backend fallback:[/yellow] {payload['rust_backend_error']}")
+
+
+@click.command(name="diagnose")
+@click.argument("path", type=click.Path(path_type=Path, exists=True, file_okay=False), default=Path("."), required=False)
+@click.option("--backend", type=click.Choice(["python", "rust", "auto"]), default="auto", show_default=True, help="Graph backend to use.")
+@click.option("--fallback", type=click.Choice(["python", "error"]), default="python", show_default=True, help="Fallback behavior when the Rust backend is unavailable.")
+@click.option("--language", type=click.Choice(["auto", "python", "typescript"]), default="auto", show_default=True, help="Project language.")
+@click.option("--json", "as_json", is_flag=True, help="Print machine-readable diagnostics.")
+@click.option("--output", type=click.Path(path_type=Path, dir_okay=False), help="Write JSON diagnostics to this file. Requires --json.")
+@click.option("--subdir", "subdirectories", multiple=True, help="Limit parsing to a repository-relative subdirectory or file. Can be passed more than once.")
+def diagnose_command(
+ path: Path,
+ backend: str,
+ fallback: str,
+ language: str,
+ as_json: bool,
+ output: Path | None,
+ subdirectories: tuple[str, ...],
+) -> None:
+ """Parse a codebase and report timing, memory, and graph diagnostics."""
+ if output is not None and not as_json:
+ msg = "--output is only supported with --json"
+ raise click.ClickException(msg)
+
+ config = CodebaseConfig(
+ graph_backend=GraphBackend(backend),
+ rust_fallback=RustFallbackMode(fallback),
+ )
+ parsed_language = _parse_language(language)
+ project = _project_for_parse(path, parsed_language, subdirectories)
+
+ memory_samples = [_memory_sample("start")]
+ parse_start = time.perf_counter()
+ try:
+ disabled_level = sys.maxsize if as_json else logging.INFO
+ with _suppress_parse_logs(disabled_level):
+ codebase = Codebase(projects=[project], config=config)
+ except RuntimeError as error:
+ raise click.ClickException(str(error)) from error
+ parse_seconds = time.perf_counter() - parse_start
+ memory_samples.append(_memory_sample("after_parse"))
+
+ payload = _base_payload(codebase, path=path, backend=backend, elapsed_seconds=parse_seconds)
+ memory_samples.append(_memory_sample("after_stats"))
+ payload.update(
+ {
+ "schema_version": DIAGNOSTICS_JSON_SCHEMA_VERSION,
+ "command": "diagnose",
+ "parse_seconds": round(parse_seconds, 6),
+ "memory": _memory_payload(memory_samples),
+ }
+ )
+
+ if as_json:
+ _write_json_payload(payload, output)
+ else:
+ _print_summary(payload)
diff --git a/tests/unit/cli/commands/diagnose/test_diagnose.py b/tests/unit/cli/commands/diagnose/test_diagnose.py
new file mode 100644
index 000000000..ee4c4de00
--- /dev/null
+++ b/tests/unit/cli/commands/diagnose/test_diagnose.py
@@ -0,0 +1,119 @@
+import json
+import subprocess
+from pathlib import Path
+
+from click.testing import CliRunner
+
+from graph_sitter.cli.cli import main
+
+
+def _init_repo(path: Path) -> None:
+ subprocess.run(["git", "init", str(path)], check=True, capture_output=True)
+ subprocess.run(["git", "-C", str(path), "config", "user.email", "test@example.com"], check=True)
+ subprocess.run(["git", "-C", str(path), "config", "user.name", "Test User"], check=True)
+
+
+def test_diagnose_command_reports_parse_time_memory_and_file_count_as_json(tmp_path):
+ _init_repo(tmp_path)
+ (tmp_path / "app.py").write_text("import os\n\ndef run():\n return os.getcwd()\n")
+
+ result = CliRunner().invoke(
+ main,
+ [
+ "diagnose",
+ str(tmp_path),
+ "--language",
+ "python",
+ "--backend",
+ "python",
+ "--json",
+ ],
+ )
+
+ assert result.exit_code == 0, result.output
+ payload = json.loads(result.output)
+ assert payload["schema_version"] == 1
+ assert payload["command"] == "diagnose"
+ assert payload["backend_requested"] == "python"
+ assert payload["backend"] == "python"
+ assert payload["language"] == "python"
+ assert payload["files"] == 1
+ assert payload["functions"] == 1
+ assert payload["parse_seconds"] >= 0
+ assert payload["elapsed_seconds"] == payload["parse_seconds"]
+ assert payload["memory"]["rss_start_mb"] > 0
+ assert payload["memory"]["rss_after_parse_mb"] > 0
+ assert payload["memory"]["peak_rss_mb"] >= payload["memory"]["rss_after_parse_mb"]
+ assert [sample["label"] for sample in payload["memory"]["samples"]] == ["start", "after_parse", "after_stats"]
+
+
+def test_diagnose_command_prints_human_summary(tmp_path):
+ _init_repo(tmp_path)
+ (tmp_path / "app.py").write_text("def run():\n return 1\n")
+
+ result = CliRunner().invoke(
+ main,
+ [
+ "diagnose",
+ str(tmp_path),
+ "--language",
+ "python",
+ "--backend",
+ "python",
+ ],
+ )
+
+ assert result.exit_code == 0, result.output
+ assert "Graph-sitter diagnostics" in result.output
+ assert "Parse time" in result.output
+ assert "Memory after parse" in result.output
+ assert "Peak memory" in result.output
+ assert "Files" in result.output
+ assert "1" in result.output
+
+
+def test_diagnose_command_writes_json_output_file(tmp_path):
+ _init_repo(tmp_path)
+ (tmp_path / "app.py").write_text("def run():\n return 1\n")
+ output_path = tmp_path / "diagnostics.json"
+
+ result = CliRunner().invoke(
+ main,
+ [
+ "diagnose",
+ str(tmp_path),
+ "--language",
+ "python",
+ "--backend",
+ "python",
+ "--json",
+ "--output",
+ str(output_path),
+ ],
+ )
+
+ assert result.exit_code == 0, result.output
+ assert result.output == ""
+ payload = json.loads(output_path.read_text())
+ assert payload["command"] == "diagnose"
+ assert payload["files"] == 1
+ assert payload["memory"]["rss_after_parse_mb"] > 0
+
+
+def test_diagnose_command_rejects_output_without_json(tmp_path):
+ _init_repo(tmp_path)
+ (tmp_path / "app.py").write_text("def run():\n return 1\n")
+
+ result = CliRunner().invoke(
+ main,
+ [
+ "diagnose",
+ str(tmp_path),
+ "--output",
+ str(tmp_path / "diagnostics.json"),
+ ],
+ )
+
+ assert result.exit_code != 0
+ assert "--output" in result.output
+ assert "--json" in result.output