diff --git a/docs/cli/about.mdx b/docs/cli/about.mdx index 719e75f46..397ff5107 100644 --- a/docs/cli/about.mdx +++ b/docs/cli/about.mdx @@ -8,6 +8,7 @@ iconType: "solid" The Graph-sitter CLI helps you: - Parse a local repository into graph summary data +- Diagnose parse time, memory use, and graph size for a local repository - Initialize Graph-sitter in your repository - Create and run codemods - Run one-shot transformations by import path @@ -35,13 +36,19 @@ uvx --python 3.13 graph-sitter parse . --format json See [uvx workflows](/cli/uvx) for branch-built wheel validation and release gate details. -2. **Initialize Graph-sitter** in your repository: +2. **Diagnose a repository** with timing and memory stats: + +```bash +uvx --python 3.13 graph-sitter diagnose . +``` + +3. **Initialize Graph-sitter** in your repository: ```bash graph-sitter init ``` -3. **Create your first codemod**: +4. **Create your first codemod**: ```bash graph-sitter create my-codemod --description "What you want to accomplish" @@ -64,6 +71,9 @@ The `--description` flag enables AI assistance to help generate your codemod. Be Parse a repository and print graph summary counts. + + Report parse time, memory use, and graph size for a repository. + Create new codemods with optional AI assistance. diff --git a/docs/cli/diagnose.mdx b/docs/cli/diagnose.mdx new file mode 100644 index 000000000..37287b55b --- /dev/null +++ b/docs/cli/diagnose.mdx @@ -0,0 +1,70 @@ +--- +title: "Diagnose Command" +sidebarTitle: "diagnose" +icon: "gauge" +iconType: "solid" +--- + +The `diagnose` command parses a local repository and reports performance, +memory, and graph-size diagnostics. + +```bash +graph-sitter diagnose . +``` + +## Usage + +```bash +graph-sitter diagnose [PATH] [OPTIONS] +``` + +`PATH` defaults to the current directory. The command does not require +`.codegen` initialization or an active session. + +## Options + +- `--backend python|rust|auto`: Choose the graph backend. Defaults to `auto`. +- `--fallback python|error`: Choose fallback behavior when the Rust backend is + unavailable. Defaults to `python`. +- `--language auto|python|typescript`: Choose the repository language. Defaults + to `auto`. +- `--json`: Print machine-readable diagnostics. +- `--output FILE`: Write JSON diagnostics to a file. Requires `--json`. +- `--subdir PATH`: Limit parsing to a repository-relative subdirectory or file. + Pass this option more than once to include multiple paths. + +## Output + +Human-readable output includes: + +- Parse time +- File count +- Memory after parse +- Peak memory +- Memory delta +- Core graph counts such as symbols, imports, exports, and dependencies + +Use JSON output in CI or agent workflows: + +```bash +graph-sitter diagnose . --language python --backend rust --fallback error --json +``` + +The JSON payload includes `schema_version`, requested and selected backend, +language, parse time, selected subdirectories, graph count fields, and a +structured `memory` object with RSS samples. + +## With uvx + +Published package form: + +```bash +uvx --python 3.13 graph-sitter diagnose . +uvx --python 3.13 graph-sitter diagnose . --json --output graph-sitter-diagnostics.json +``` + +Strict Rust validation form: + +```bash +uvx --python 3.13 graph-sitter diagnose . --backend rust --fallback error --json +``` diff --git a/src/graph_sitter/cli/cli.py b/src/graph_sitter/cli/cli.py index 9a77fc6c7..2ee211482 100644 --- a/src/graph_sitter/cli/cli.py +++ b/src/graph_sitter/cli/cli.py @@ -4,6 +4,7 @@ # Removed reference to non-existent agent module from graph_sitter.cli.commands.config.main import config_command from graph_sitter.cli.commands.create.main import create_command +from graph_sitter.cli.commands.diagnose.main import diagnose_command from graph_sitter.cli.commands.doctor.main import doctor_command from graph_sitter.cli.commands.init.main import init_command from graph_sitter.cli.commands.list.main import list_command @@ -31,6 +32,7 @@ def main(): # Removed reference to non-existent agent_command main.add_command(init_command) main.add_command(doctor_command) +main.add_command(diagnose_command) main.add_command(parse_command) main.add_command(run_command) main.add_command(transform_command) diff --git a/src/graph_sitter/cli/commands/diagnose/main.py b/src/graph_sitter/cli/commands/diagnose/main.py new file mode 100644 index 000000000..b529bcb29 --- /dev/null +++ b/src/graph_sitter/cli/commands/diagnose/main.py @@ -0,0 +1,153 @@ +from __future__ import annotations + +import json +import logging +import os +import resource +import sys +import time +from pathlib import Path +from typing import Any + +import psutil +import rich +import rich_click as click +from rich.table import Table + +from graph_sitter.cli.commands.parse.main import _base_payload, _parse_language, _project_for_parse, _suppress_parse_logs +from graph_sitter.configs.models.codebase import CodebaseConfig, GraphBackend, RustFallbackMode +from graph_sitter.core.codebase import Codebase + +DIAGNOSTICS_JSON_SCHEMA_VERSION = 1 + + +def _bytes_to_mb(value: int) -> float: + return value / (1024 * 1024) + + +def _current_rss_bytes() -> int: + return int(psutil.Process(os.getpid()).memory_info().rss) + + +def _max_rss_bytes() -> int: + rss = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss + if sys.platform == "darwin": + return int(rss) + return int(rss * 1024) + + +def _memory_sample(label: str) -> dict[str, float | str]: + return { + "label": label, + "rss_mb": round(_bytes_to_mb(_current_rss_bytes()), 3), + "max_rss_mb": round(_bytes_to_mb(_max_rss_bytes()), 3), + } + + +def _memory_payload(samples: list[dict[str, float | str]]) -> dict[str, float | list[dict[str, float | str]]]: + start_rss = float(samples[0]["rss_mb"]) + after_parse_rss = float(samples[1]["rss_mb"]) + after_stats_rss = float(samples[-1]["rss_mb"]) + peak_rss = max(float(sample["max_rss_mb"]) for sample in samples) + return { + "rss_start_mb": round(start_rss, 3), + "rss_after_parse_mb": round(after_parse_rss, 3), + "rss_after_stats_mb": round(after_stats_rss, 3), + "rss_delta_mb": round(after_stats_rss - start_rss, 3), + "peak_rss_mb": round(peak_rss, 3), + "samples": samples, + } + + +def _write_json_payload(payload: dict[str, Any], output: Path | None) -> None: + contents = json.dumps(payload, sort_keys=True) + "\n" + if output is None: + click.echo(contents, nl=False) + return + + try: + output.write_text(contents) + except OSError as error: + msg = f"Could not write diagnostics JSON output to {output}: {error}" + raise click.ClickException(msg) from error + + +def _print_summary(payload: dict[str, Any]) -> None: + memory = payload["memory"] + rich.print(f"[bold]Graph-sitter diagnostics[/bold] ({payload['backend']}, {payload['language']})") + rich.print(f"Path: {payload['path']}") + rich.print(f"Subdirectories: {payload['subdirectories'] or 'ALL'}") + + table = Table(show_header=True, header_style="bold", box=None) + table.add_column("Metric") + table.add_column("Value", justify="right") + table.add_row("Parse time", f"{payload['parse_seconds']:.3f}s") + table.add_row("Files", str(payload["files"])) + table.add_row("Memory after parse", f"{memory['rss_after_parse_mb']:.1f} MB") + table.add_row("Peak memory", f"{memory['peak_rss_mb']:.1f} MB") + table.add_row("Memory delta", f"{memory['rss_delta_mb']:+.1f} MB") + table.add_row("Symbols", str(payload["symbols"])) + table.add_row("Imports", str(payload["imports"])) + table.add_row("Exports", str(payload["exports"])) + table.add_row("Dependencies", str(payload["dependencies"])) + rich.print(table) + + if payload.get("rust_backend_error"): + rich.print(f"[yellow]Rust backend fallback:[/yellow] {payload['rust_backend_error']}") + + +@click.command(name="diagnose") +@click.argument("path", type=click.Path(path_type=Path, exists=True, file_okay=False), default=Path("."), required=False) +@click.option("--backend", type=click.Choice(["python", "rust", "auto"]), default="auto", show_default=True, help="Graph backend to use.") +@click.option("--fallback", type=click.Choice(["python", "error"]), default="python", show_default=True, help="Fallback behavior when the Rust backend is unavailable.") +@click.option("--language", type=click.Choice(["auto", "python", "typescript"]), default="auto", show_default=True, help="Project language.") +@click.option("--json", "as_json", is_flag=True, help="Print machine-readable diagnostics.") +@click.option("--output", type=click.Path(path_type=Path, dir_okay=False), help="Write JSON diagnostics to this file. Requires --json.") +@click.option("--subdir", "subdirectories", multiple=True, help="Limit parsing to a repository-relative subdirectory or file. Can be passed more than once.") +def diagnose_command( + path: Path, + backend: str, + fallback: str, + language: str, + as_json: bool, + output: Path | None, + subdirectories: tuple[str, ...], +) -> None: + """Parse a codebase and report timing, memory, and graph diagnostics.""" + if output is not None and not as_json: + msg = "--output is only supported with --json" + raise click.ClickException(msg) + + config = CodebaseConfig( + graph_backend=GraphBackend(backend), + rust_fallback=RustFallbackMode(fallback), + ) + parsed_language = _parse_language(language) + project = _project_for_parse(path, parsed_language, subdirectories) + + memory_samples = [_memory_sample("start")] + parse_start = time.perf_counter() + try: + disabled_level = sys.maxsize if as_json else logging.INFO + with _suppress_parse_logs(disabled_level): + codebase = Codebase(projects=[project], config=config) + except RuntimeError as error: + raise click.ClickException(str(error)) from error + parse_seconds = time.perf_counter() - parse_start + memory_samples.append(_memory_sample("after_parse")) + + payload = _base_payload(codebase, path=path, backend=backend, elapsed_seconds=parse_seconds) + memory_samples.append(_memory_sample("after_stats")) + payload.update( + { + "schema_version": DIAGNOSTICS_JSON_SCHEMA_VERSION, + "command": "diagnose", + "parse_seconds": round(parse_seconds, 6), + "memory": _memory_payload(memory_samples), + } + ) + + if as_json: + _write_json_payload(payload, output) + else: + _print_summary(payload) diff --git a/tests/unit/cli/commands/diagnose/test_diagnose.py b/tests/unit/cli/commands/diagnose/test_diagnose.py new file mode 100644 index 000000000..ee4c4de00 --- /dev/null +++ b/tests/unit/cli/commands/diagnose/test_diagnose.py @@ -0,0 +1,119 @@ +import json +import subprocess +from pathlib import Path + +from click.testing import CliRunner + +from graph_sitter.cli.cli import main + + +def _init_repo(path: Path) -> None: + subprocess.run(["git", "init", str(path)], check=True, capture_output=True) + subprocess.run(["git", "-C", str(path), "config", "user.email", "test@example.com"], check=True) + subprocess.run(["git", "-C", str(path), "config", "user.name", "Test User"], check=True) + + +def test_diagnose_command_reports_parse_time_memory_and_file_count_as_json(tmp_path): + _init_repo(tmp_path) + (tmp_path / "app.py").write_text("import os\n\ndef run():\n return os.getcwd()\n") + + result = CliRunner().invoke( + main, + [ + "diagnose", + str(tmp_path), + "--language", + "python", + "--backend", + "python", + "--json", + ], + ) + + assert result.exit_code == 0, result.output + payload = json.loads(result.output) + assert payload["schema_version"] == 1 + assert payload["command"] == "diagnose" + assert payload["backend_requested"] == "python" + assert payload["backend"] == "python" + assert payload["language"] == "python" + assert payload["files"] == 1 + assert payload["functions"] == 1 + assert payload["parse_seconds"] >= 0 + assert payload["elapsed_seconds"] == payload["parse_seconds"] + assert payload["memory"]["rss_start_mb"] > 0 + assert payload["memory"]["rss_after_parse_mb"] > 0 + assert payload["memory"]["peak_rss_mb"] >= payload["memory"]["rss_after_parse_mb"] + assert [sample["label"] for sample in payload["memory"]["samples"]] == ["start", "after_parse", "after_stats"] + + +def test_diagnose_command_prints_human_summary(tmp_path): + _init_repo(tmp_path) + (tmp_path / "app.py").write_text("def run():\n return 1\n") + + result = CliRunner().invoke( + main, + [ + "diagnose", + str(tmp_path), + "--language", + "python", + "--backend", + "python", + ], + ) + + assert result.exit_code == 0, result.output + assert "Graph-sitter diagnostics" in result.output + assert "Parse time" in result.output + assert "Memory after parse" in result.output + assert "Peak memory" in result.output + assert "Files" in result.output + assert "1" in result.output + + +def test_diagnose_command_writes_json_output_file(tmp_path): + _init_repo(tmp_path) + (tmp_path / "app.py").write_text("def run():\n return 1\n") + output_path = tmp_path / "diagnostics.json" + + result = CliRunner().invoke( + main, + [ + "diagnose", + str(tmp_path), + "--language", + "python", + "--backend", + "python", + "--json", + "--output", + str(output_path), + ], + ) + + assert result.exit_code == 0, result.output + assert result.output == "" + payload = json.loads(output_path.read_text()) + assert payload["command"] == "diagnose" + assert payload["files"] == 1 + assert payload["memory"]["rss_after_parse_mb"] > 0 + + +def test_diagnose_command_rejects_output_without_json(tmp_path): + _init_repo(tmp_path) + (tmp_path / "app.py").write_text("def run():\n return 1\n") + + result = CliRunner().invoke( + main, + [ + "diagnose", + str(tmp_path), + "--output", + str(tmp_path / "diagnostics.json"), + ], + ) + + assert result.exit_code != 0 + assert "--output" in result.output + assert "--json" in result.output