diff --git a/docs/building-with-graph-sitter/at-a-glance.mdx b/docs/building-with-graph-sitter/at-a-glance.mdx index b52c30a4c..ebe0f051c 100644 --- a/docs/building-with-graph-sitter/at-a-glance.mdx +++ b/docs/building-with-graph-sitter/at-a-glance.mdx @@ -126,13 +126,6 @@ Learn how to use Codegen's core APIs to analyze and transform code. > Analyze and manipulate local variable usage and scope. - - Integrate AI assistance into your code transformations. - - Graph-sitter does not automatically provide any context to the LLM by default. It - does not "understand" your codebase, only the context you provide. - - -The context parameter can include: - -- A single symbol (its source code will be provided) -- A list of related symbols -- A dictionary mapping descriptions to symbols/values -- Nested combinations of the above - -### How Context Works - -The AI doesn't automatically know about your codebase. Instead, you can provide relevant context by: - -1. Using GraphSitter's static analysis to gather information: - -```python -function = codebase.get_function("process_data") -context = { - "call_sites": function.call_sites, # Where the function is called - "dependencies": function.dependencies, # What the function depends on - "parent": function.parent, # Class/module containing the function - "docstring": function.docstring, # Existing documentation -} -``` - -2. Passing this information to the AI: - -```python -result = codebase.ai( - "Improve this function's implementation", - target=function, - context=context # AI will see the gathered information -) -``` - -## Common Use Cases - -### Code Generation - -Generate new code or refactor existing code: - -```python -# Break up a large function -function = codebase.get_function("large_function") -new_code = codebase.ai( - "Break this function into smaller, more focused functions", - target=function -) -function.edit(new_code) - -# Generate a test -my_function = codebase.get_function("my_function") -test_code = codebase.ai( - f"Write a test for the function {my_function.name}", - target=my_function -) -my_function.insert_after(test_code) -``` - -### Documentation - -Generate and format documentation: - -```python -# Generate docstrings for a class -class_def = codebase.get_class("MyClass") -for method in class_def.methods: - docstring = codebase.ai( - "Generate a docstring describing this method", - target=method, - context={ - "class": class_def, - "style": "Google docstring format" - } - ) - method.set_docstring(docstring) -``` - -### Code Analysis and Improvement - -Use AI to analyze and improve code: - -```python -# Improve function names -for function in codebase.functions: - if codebase.ai( - "Does this function name clearly describe its purpose? Answer yes/no", - target=function - ).lower() == "no": - new_name = codebase.ai( - "Suggest a better name for this function", - target=function, - context={"call_sites": function.call_sites} - ) - function.rename(new_name) -``` - -### Contextual Modifications - -Make changes with full context awareness: - -```python -# Refactor a class method -method = codebase.get_class("MyClass").get_method("target_method") -new_impl = codebase.ai( - "Refactor this method to be more efficient", - target=method, - context={ - "parent_class": method.parent, - "call_sites": method.call_sites, - "dependencies": method.dependencies - } -) -method.edit(new_impl) -``` - -## Best Practices - -1. **Provide Relevant Context** - - ```python - # Good: Providing specific, relevant context - summary = codebase.ai( - "Generate a summary of this method's purpose", - target=method, - context={ - "class": method.parent, # Class containing the method - "usages": list(method.usages), # How the method is used - "dependencies": method.dependencies, # What the method depends on - "style": "concise" - } - ) - - # Bad: Missing context that could help the AI - summary = codebase.ai( - "Generate a summary", - target=method # AI only sees the method's code - ) - ``` - -2. **Gather Comprehensive Context** - - ```python - # Gather relevant information before AI call - def get_method_context(method): - return { - "class": method.parent, - "call_sites": list(method.call_sites), - "dependencies": list(method.dependencies), - "related_methods": [m for m in method.parent.methods - if m.name != method.name] - } - - # Use gathered context in AI call - new_impl = codebase.ai( - "Refactor this method to be more efficient", - target=method, - context=get_method_context(method) - ) - ``` - -3. **Handle AI Limits** - - ```python - # Set custom AI request limits for large operations - codebase.set_session_options(max_ai_requests=200) - ``` - -4. **Review Generated Code** - ```python - # Generate and review before applying - new_code = codebase.ai( - "Optimize this function", - target=function - ) - print("Review generated code:") - print(new_code) - if input("Apply changes? (y/n): ").lower() == 'y': - function.edit(new_code) - ``` - -## Limitations and Safety - -- The AI doesn't automatically know about your codebase - you must provide relevant context -- AI-generated code should always be reviewed -- Default limit of 150 AI requests per codemod execution - - Use [set_session_options(...)](../api-reference/core/Codebase#set-session-options) to adjust limits: - ```python - codebase.set_session_options(max_ai_requests=200) - ``` - - You can also use `codebase.set_session_options` to increase the execution time and the number of operations allowed in a session. This is useful for handling larger tasks or more complex operations that require additional resources. Adjust the `max_seconds` and `max_transactions` parameters to suit your needs: - ```python - codebase.set_session_options(max_seconds=300, max_transactions=500) - ``` - \ No newline at end of file diff --git a/docs/building-with-graph-sitter/codegen-with-wsl.mdx b/docs/building-with-graph-sitter/codegen-with-wsl.mdx index d2261fd24..da82c74a6 100644 --- a/docs/building-with-graph-sitter/codegen-with-wsl.mdx +++ b/docs/building-with-graph-sitter/codegen-with-wsl.mdx @@ -71,5 +71,5 @@ If you plan on using Codegen's MCP (Model Context Protocol) or LSP (Language Ser For any additional issues, see the [troubleshooting guide](/introduction/installation#troubleshooting). -For more help, join our [community Slack](/introduction/community) or check the [FAQ](/introduction/faq). +For more help, check the [FAQ](/introduction/faq). diff --git a/docs/building-with-graph-sitter/comments-and-docstrings.mdx b/docs/building-with-graph-sitter/comments-and-docstrings.mdx index 7c30553b6..759afd2c4 100644 --- a/docs/building-with-graph-sitter/comments-and-docstrings.mdx +++ b/docs/building-with-graph-sitter/comments-and-docstrings.mdx @@ -185,12 +185,6 @@ new_docstring = codebase.ai( function.set_docstring(new_docstring) ``` - - Learn more about AI documentation capabilities in our [Documentation - Guide](/tutorials/creating-documentation) and [LLM Integration - Guide](/building-with-graph-sitter/calling-out-to-llms). - - ### Documentation Coverage You can analyze and improve documentation coverage across your codebase: @@ -206,9 +200,4 @@ for function in codebase.functions: coverage = (documented / total * 100) if total > 0 else 0 print(f"Documentation coverage: {coverage:.1f}%") -``` - - - Check out the [Documentation Guide](/tutorials/creating-documentation) for - more advanced coverage analysis and bulk documentation generation. - +``` \ No newline at end of file diff --git a/docs/building-with-graph-sitter/reusable-codemods.mdx b/docs/building-with-graph-sitter/reusable-codemods.mdx index c5cf6344f..fa3211f91 100644 --- a/docs/building-with-graph-sitter/reusable-codemods.mdx +++ b/docs/building-with-graph-sitter/reusable-codemods.mdx @@ -42,7 +42,7 @@ gs create rename-function . -d "Rename the getUserData function to fetchUserProf This will: 1. Generate an implementation based on your description -2. Create a custom system prompt that you can provide to an IDE chat assistant (learn more about [working with AI](/introduction/work-with-ai)) +2. Create a custom system prompt that you can provide to an IDE chat assistant 3. Place both files in the codemod directory ## Running Codemods diff --git a/docs/introduction/about.mdx b/docs/introduction/about.mdx index c65f9cdeb..383d98152 100644 --- a/docs/introduction/about.mdx +++ b/docs/introduction/about.mdx @@ -40,7 +40,7 @@ We believe in the power of open source software. Our core library, [codegen](htt We're hiring! Join us in building the future of code transformation. - + Connect with other developers and share your Graph-sitter experiences. diff --git a/docs/introduction/advanced-settings.mdx b/docs/introduction/advanced-settings.mdx deleted file mode 100644 index 4aa48cc9c..000000000 --- a/docs/introduction/advanced-settings.mdx +++ /dev/null @@ -1,404 +0,0 @@ ---- -title: "Advanced Settings" -sidebarTitle: "Advanced Settings" -icon: "memory" -iconType: "solid" ---- - -Codegen's [Codebase](/api-reference/core/Codebase) constructor accepts a `CodebaseConfig` object which is used to configure more advanced behaviors of the graph construction process. - -These flags are helpful for debugging problematic repos, optimizing Codegen's performance, or testing unreleased or experimental (potentially backwards-breaking) features. - - -**These are considered experimental features and may change in the future!** - -As such, they may have little to no testing or documentation. Many of these flags may also be unsupported in the future! - -If you need help, please visit our [community](/introduction/community). - - - -These configuration options are defined in [src/codegen/configs/models/codebase.py](https://github.com/codegen-sh/graph-sitter/blob/develop/src/codegen/configs/models/codebase.py). - - -# Usage - -You can customize the behavior of the graph construction process when initializing a [Codebase](/api-reference/core/Codebase) by passing a `CodebaseConfig` object with the desired configuration flags. - -```python -from graph_sitter import Codebase -from graph_sitter.configs import CodebaseConfig - -# Initialize a Codebase with custom configuration -codebase = Codebase( - "", - config=CodebaseConfig( - flag1=..., - flag2=..., - ... - ) -) -``` - -# Table of Contents - -- [debug](#flag-debug) -- [verify-graph](#flag-verify-graph) -- [track-graph](#flag-track-graph) -- [method-usages](#flag-method-usages) -- [sync-enabled](#flag-sync-enabled) -- [full-range-index](#flag-full-range-index) -- [ignore-process-errors](#flag-ignore-process-errors) -- [disable-graph](#flag-disable-graph) -- [disable-file-parse](#flag-disable-file-parse) -- [exp-lazy-graph](#flag-exp-lazy-graph) -- [generics](#flag-generics) -- [import-resolution-paths](#flag-import-resolution-paths) -- [import-resolution-overrides](#flag-import-resolution-overrides) -- [py-resolve-syspath](#flag-py-resolve-syspath) -- [ts-dependency-manager](#flag-ts-dependency-manager) -- [ts-language-engine](#flag-ts-language-engine) -- [v8-ts-engine](#flag-v8-ts-engine) -- [unpacking-assignment-partial-removal](#flag-unpacking-assignment-partial-removal) - -# Configuration Flags - -## Flag: `debug` -> **Default: `False`** - -Enables verbose logging for debugging purposes. In its current form, it enables: -- Verbose logging when adding nodes to the graph -- Verbose logging during initial file parsing -- Additional assertions on graph creation -- Additional (costly) debug metrics on codebase construction -- etc. - - -This flag may be very noisy and significantly impact performance. It is generally not recommended to use. - - -## Flag: `verify_graph` -> **Default: `False`** - -Adds assertions for graph state during reset resync. Used to test and debug graph desyncs after a codebase reset. - -Runs `post_reset_validation` after a reset resync. - - -This is an internal debug flag. - - -## Flag: `track_graph` -> **Default: `False`** - -Keeps a copy of the original graph before a resync. Used in conjunction with `verify_graph` to test and debug graph desyncs. - -Original graph is saved as `ctx.old_graph`. - - -This is an internal debug flag. - - -## Flag: `method_usages` -> **Default: `True`** - -Enables and disables resolving method usages. - -**Example Codebase:** -```python -class Foo: - def bar(): - ... - -obj = Foo() -obj.bar() # Method Usage -``` - -**Codemod with `method_usages` on:** -```python -bar_func = codebase.get_class("Foo").get_method("bar") -len(bar_func.usages) # 1 -bar_func.usages # [obj.bar()] -``` - -**Codemod with `method_usages` off:** -```python -bar_func = codebase.get_class("Foo").get_method("bar") -len(bar_func.usages) # 0 -bar_func.usages # [] -``` - -Method usage resolution could be disabled for a marginal performance boost. However, it is generally recommended to leave it enabled. - -## Flag: `sync_enabled` -> **Default: `False`** - -Enables or disables graph sync during `codebase.commit`. - - -Implementation-specific details on sync graph can be found [here](https://github.com/codegen-sh/graph-sitter/blob/develop/architecture/6.%20incremental-computation/C.%20Graph%20Recomputation.md). - - -This section won't go into the specific details of sync graph, but the general idea is that enabling sync graph will update the Codebase object to whatever new changes were made. - -**Example with `sync_enabled` on:** -```python -file = codebase.get_file(...) -file.insert_after("foobar = 1") -codebase.commit() - -foobar = codebase.get_symbol("foobar") -assert foobar # foobar is available after commit / graph sync -``` - -**Example with `sync_enabled` disabled:** -```python -file = codebase.get_file(...) -file.insert_after("foobar = 1") - -foobar = codebase.get_symbol("foobar", optional=True) -assert not foobar # foobar is not available after commit -``` - - -Enabling sync graph will have a performance impact on codebase commit, but will also unlock a bunch of operations that were previously not possible. - - -## Flag: `full_range_index` -> **Default: `False`** - -By default, Codebase maintains an internal range-to-node index for fast lookups. (i.e. `bytes 120 to 130 maps to node X`). -For optimization purposes, this only applies to nodes defined and handled by `parser.py`. - -Enabling `full_range_index` will create an additional index that maps **all** tree-sitter ranges to nodes. -This can be useful for debugging or when you need to build any applications that require a full range-to-node index (i.e. a codebase tree lookup). - - -This flag **significantly** increases memory usage! - - -## Flag: `ignore_process_errors` -> **Default: `True`** - -Controls whether to ignore errors that occur during external process execution (such as dependency manager or language engine). - -Disabling `ignore_process_errors` would make Graph-sitter fail on errors that would otherwise be logged then ignored. - -## Flag: `disable_graph` -> **Default: `False`** - -Disables the graph construction process. Any operations that require the graph will no longer work. (In other words, this turns off import resolution and usage/dependency resolution) - -Functions that operate purely on AST such as getting and editing parameters or modifying function and class definitions will still work. - - -For codemods that do not require the graph (aka only AST/Syntax-level changes), **disabling graph parse could yield a 30%-40% decrease in parse time and memory usage**! - - -## Flag: `disable_file_parse` -> **Default: `False`** - -Disables **ALL** parsing, including file and graph parsing. This essentially treats all codebases as the "UNSUPPORTED" language mode. - -Nearly all functions except for editing primitives like `codebase.get_file` and `file.edit` will no longer work. - - -This flag is useful for any usages of Graph-sitter that do **NOT** require any AST/CST/Graph parsing. (i.e. using Graph-sitter purely as a file editing harness) - -If this is your use case, this **could decrease parse and memory usage by 95%.** - - -## Flag: `exp_lazy_graph` -> **Default: `False`** - -This experimental flag pushes the graph creation back until the graph is needed. This is an experimental feature and may have some unintended consequences. - -**Example Codemod:** -```python -from graph_sitter import Codebase -from graph_sitter.configs import CodebaseConfig - -# Enable lazy graph parsing -codebase = Codebase("", config=CodebaseConfig(exp_lazy_graph=True)) - -# The codebase object will be created immediately with no parsing done -# These all do not require graph parsing -codebase.files -codebase.directories -codebase.get_file("...") - -# These do require graph parsing, and will create the graph only if called -codebase.get_function("...") -codebase.get_class("...") -codebase.imports -``` - - -This may have a very slight performance boost. Use at your own risk! - - -## Flag: `generics` -> **Default: `True`** - -Enables and disables generic type resolution. - -**Example Codebase:** -```python -class Point: - def scale(cls, n: int): - pass - -class List[T](): - def pop(self) -> T: - ... - -l: List[Point] = [] -l.pop().scale(1) # Generic Usage -``` - -**Codemod with `generics` on:** -```python -bar_func = codebase.get_class("Point").get_method("scale") -len(bar_func.usages) # 1 -bar_func.usages # [l.pop().scale(1)] -``` - -**Codemod with `generics` off:** -```python -bar_func = codebase.get_class("Point").get_method("scale") -len(bar_func.usages) # 0 -bar_func.usages # [] -``` - - -Generic resolution is still largely WIP and experimental, and may not work in all cases. In some rare circumstances, disabling generics may result in a significant performance boost. - - -## Flag: `import_resolution_paths` -> **Default: `[]`** - -Controls alternative paths to resolve imports from. - -**Example Codebase:** -```python -# a/b/c/src.py -def update(): - pass - -# consumer.py -from c import src as operations - -operations.update() -``` - -**Codemod:** -```python -codebase.ctx.config.import_resolution_paths = ["a/b"] -``` - -## Flag: `import_resolution_overrides` -> **Default: `{}`** - -Controls import path overrides during import resolution. - -**Example** -`from a.b.c import d` with the override `a/b` -> `foo/bar` will internally resolve the import as `from foo.bar.c import d`. - -## Flag: `py_resolve_syspath` -> **Default: `False`** - -Enables and disables resolution of imports from `sys.path`. - - -For this to properly work, you must also set `allow_external` to `True`. - - -## Flag: `allow_external` -> **Default: `False`** - -Enables resolving imports, files, modules, and directories from outside of the repo path. - - -Turning this flag off may allow for bad actors to access files outside of the repo path! Use with caution! - - -## Flag: `ts_dependency_manager` -> **Default: `False`** - - -**This is an internal flag used for Codegen Cloud and should not be used externally!** - -This flag **WILL** nuke any existing `node_modules` folder! - - - -This flag also assumes many constants for Codegen Cloud. Very likely this will not work if run locally. - -Instead, just install `node_modules` as normal (either through `npm`, `pnpm`, or `yarn`) and skip this setting! - - -Enables Codegen's internal dependency installer for TypeScript. This will modify `package.json` and install the bare minimum set of installable dependencies. - - -More documentation on TypeScript dependency manager can be found [here](https://github.com/codegen-sh/graph-sitter/blob/develop/architecture/external/dependency-manager.md) - - -## Flag: `ts_language_engine` -> **Default: `False`** - - -This feature was built primarily with Codegen Cloud in mind. As such, this assumes a valid NodeJS and TypeScript environment. - - -Enables using the TypeScript compiler to extract information from the codebase. Enables commands such as `inferred_return_type`. - - -This will increase memory usage and parsing time. Larger repos may even hit resource constraints with the bundled TypeScript compiler integration. - - -## Flag: `v8_ts_engine` -> **Default: `False`** - - -This feature flag requires `ts_language_engine` to be enabled as well. - - -Enables using the **V8-based TypeScript compiler** to extract information from the codebase. Enables commands such as `inferred_return_type`. - -The V8 implementation (as opposed to the default external-process based implementation) is less stable, but provides the entire TypeScript API to be used from within Codegen. - - -This will increase memory usage and parsing time. Larger repos may even hit resource constraints with the V8-based TypeScript compiler integration. - - -## Flag: `unpacking_assignment_partial_removal` -> **Default: `False`** - -Enables smarter removal of unpacking assignments. - -**Example Codebase:** -```python -a, b, c = (1, 2, 3) -``` - -**Codemod with `unpacking_assignment_partial_removal` on:** -```python -file = codebase.get_file(...) -b = file.get_symbol("b") -b.remove() -codebase.commit() - -file.symbols # [a, c] -file.source # "a, c = (1, 3)" -``` - -**Codemod with `unpacking_assignment_partial_removal` off:** -```python -file = codebase.get_file(...) -b = file.get_symbol("b") -b.remove() -codebase.commit() - -file.symbols # [] -file.source # "" -``` diff --git a/docs/introduction/community.mdx b/docs/introduction/community.mdx deleted file mode 100644 index 9e083aea2..000000000 --- a/docs/introduction/community.mdx +++ /dev/null @@ -1,48 +0,0 @@ ---- -title: "Community & Contributing" -sidebarTitle: "Community" -icon: "people-group" -iconType: "solid" ---- - -import { - COMMUNITY_SLACK_URL, - CODEGEN_SDK_GITHUB_URL, -} from "/snippets/links.mdx"; - -Join the growing Graph-sitter community! We're excited to have you be part of our journey to make codebase manipulation and transformation more accessible. - - - - Connect with the community, get help, and share your Graph-sitter projects in our - active Slack workspace. - - - Star us on GitHub, report issues, submit PRs, and contribute to the project. - - - Follow us for updates, tips, and community highlights. - - - Learn how to use Graph-sitter effectively with our comprehensive guides. - - - - - Please help us improve this library and documentation by submitting a PR! - - -## Contributing - -We welcome contributions of all kinds! Whether you're fixing a typo in documentation, reporting a bug, or implementing a new feature, we appreciate your help in making Graph-sitter better. - -Check out our [Contributing Guide](https://github.com/codegen-sh/graph-sitter/blob/develop/CONTRIBUTING.md) on GitHub to learn how to: - -- Set up your development environment -- Submit pull requests -- Report issues -- Contribute to documentation diff --git a/docs/introduction/faq.mdx b/docs/introduction/faq.mdx index 7f2f6eaa5..28c210948 100644 --- a/docs/introduction/faq.mdx +++ b/docs/introduction/faq.mdx @@ -17,7 +17,7 @@ iconType: "solid" Support](/building-with-graph-sitter/language-support) guide. - Interested in adding support for your language? [Let us know](https://x.com/codegen) or [contribute](/introduction/community)! + Interested in adding support for your language? [Let us know](https://x.com/codegen) or [contribute](https://github.com/codegen-sh/graph-sitter)! diff --git a/docs/introduction/getting-started.mdx b/docs/introduction/getting-started.mdx index 7c6dfce85..a35a5cc7a 100644 --- a/docs/introduction/getting-started.mdx +++ b/docs/introduction/getting-started.mdx @@ -30,10 +30,6 @@ gs notebook --demo exploring right away! - - Prefer working in your IDE? See [IDE Usage](/introduction/ide-usage) - - ## Initializing a Codebase Instantiating a [Codebase](/api-reference/core/Codebase) will automatically parse a codebase and make it available for manipulation. @@ -303,8 +299,6 @@ codebase = Codebase( ) ``` -To learn more about available settings, see the [Advanced Settings](/introduction/advanced-settings) page. - These are considered experimental and unstable features that may be removed or changed in the future. @@ -328,15 +322,4 @@ These are considered experimental and unstable features that may be removed or c Understand key concepts like working with files, functions, imports, and the call graph to effectively manipulate code. - - Iterate locally with your favorite IDE, work with a debugger and build sophisticated codemods - - - Learn how to use Graph-sitter with Cursor, Devin, Windsurf, and more. - - diff --git a/docs/introduction/how-it-works.mdx b/docs/introduction/how-it-works.mdx index 05c118f2a..33d2e40de 100644 --- a/docs/introduction/how-it-works.mdx +++ b/docs/introduction/how-it-works.mdx @@ -86,4 +86,4 @@ Graph-sitter is just getting started, and we're excited about the possibilities - Adding new transformations - Improving documentation -Check out our [community guide](/introduction/community) to get involved! +Check out the [GitHub repository](https://github.com/codegen-sh/graph-sitter) to get involved! diff --git a/docs/introduction/ide-usage.mdx b/docs/introduction/ide-usage.mdx deleted file mode 100644 index ebbb1cba0..000000000 --- a/docs/introduction/ide-usage.mdx +++ /dev/null @@ -1,178 +0,0 @@ ---- -title: "Using Graph-sitter in Your IDE" -sidebarTitle: "IDE Usage" -icon: "window" -iconType: "solid" ---- - -Get up and running with Graph-sitter programs in IDEs like VSCode, Cursor and PyCharm. - -Make sure to [install and initialize](/introduction/installation) Graph-sitter with `gs init` - -## Configuring your IDE Interpreter - -gs creates a custom Python environment in `.codegen/.venv`. Configure your IDE to use this environment for the best development experience. - - - - 1. Install the VSCode Python Extensions for LSP and debugging support. We recommend Python, Pylance and Python Debugger for the best experience. - - 2. Open the Command Palette (Cmd/Ctrl + Shift + P) - 3. Type "Python: Select Interpreter" - - 4. Choose "Enter interpreter path" - 5. Navigate to and select: - ```bash - .codegen/.venv/bin/python - ``` - - Alternatively, create a `.vscode/settings.json`: - ```json - { - "python.defaultInterpreterPath": "${workspaceFolder}/.codegen/.venv/bin/python", - "python.analysis.extraPaths": [ - "${workspaceFolder}/.codegen/.venv/lib/python3.12/site-packages" - ] - } - ``` - - - - 1. Open PyCharm Settings/Preferences - 2. Navigate to "Project > Python Interpreter" - 3. Click the gear icon ⚙️ and select "Add" - 4. Choose "Existing Environment" - 5. Set interpreter path to: - ```bash - .codegen/.venv/bin/python - ``` - - - - - -## MCP Server Setup -This is an optional step but highly recommended if your IDE supports MCP support and you use AI Agents. -The MCP server is a local server that allows your AI Agent to interact with the Graph-sitter specific tools, -it will allow an agent to: -- ask an expert to create a codemod -- improve a codemod -- get setup instructions - -### IDE Configuration -#### Cline -Add this to your cline_mcp_settings.json: -```json -{ - "mcpServers": { - "graph_sitter.cli": { - "command": "uv", - "args": [ - "--directory", - "/codegen-sdk/src/graph_sitter.cli/mcp", - "run", - "server.py" - ] - } - } -} -``` - - -#### Cursor: -Under the `Settings` > `Feature` > `MCP Servers` section, click "Add New MCP Server" and add the following: - -``` -Name: codegen-mcp -Type: Command -Command: uv --directory /codegen-sdk/src/graph_sitter.cli/mcp run server.py -``` - - -## Index Graph-sitter Docs -#### Cursor: -If you use Cursor you'll be able to configure the IDE to index the Graph-sitter docs. To do so go to `Settings` > `Features` > `Docs` -and then click on `Add new docs`. We recommend using this url to index the API reference: -``` -https://graph-sitter.com/api-reference/index -``` - - -## Create a New Codemod - -Generate the boilerplate for a new code manipulation program using [gs create](/cli/create): - -```bash -gs create organize-types \ - -d "Move all TypeScript types to \ - into a centralized types.ts file" -``` - - - Passing in `-d --description` will get an LLM expert to compose an initial version for you. This requires a Github account registered on [codegen.sh](https://codegen.sh) - - -This will: -1. Create a new codemod in `.codegen/codemods/organize_types/` -2. Generate a custom `system-prompt.txt` based on your task -3. Set up the basic structure for your program - - -The generated codemod includes type hints and docstrings, making it easy to get IDE autocompletion and documentation. - - -## Iterating with Chat Assistants - -When you do `gs init`, you will receive a [system prompt optimized for AI consumption](/introduction/work-with-ai) at `.codegen/codegen-system-prompt.txt`. - -If you reference this file in "chat" sessions with Copilot, Cursor, Cody, etc., the assistant will become fluent in Codegen. - - - - Collaborating with Cursor's assistant and the Graph-sitter system prompt - - -In addition, when you [create](/cli/create) a codemod with "-d", Graph-sitter generates an optimized system prompt in `.codegen/codemods/{name}/{name}-system-prompt.txt`. This prompt contains: -- Relevant Codegen API documentation -- Examples of relevant transformations -- Context about your specific task - - -You can also drag and drop the system prompt ([available here](/introduction/work-with-ai))file directly into chat windows like ChatGPT or Claude for standalone help. - - -## Running and Testing Codemods - -```bash -# Run => write changes to disk -gs run organize-types - -# Reset changes on disk -gs reset -``` - -You can also run the program directly via `.codegen/.venv/bin/python path/to/codemod.py` or via your editor's debugger - -## Viewing Changes - -We recommend viewing changes in your IDE's native diff editor. - - -## What's Next - - - - See real-world examples of codemods in action. - - - Learn about Codegen's core concepts and features - - diff --git a/docs/introduction/installation.mdx b/docs/introduction/installation.mdx index 04284eca8..1fc807c67 100644 --- a/docs/introduction/installation.mdx +++ b/docs/introduction/installation.mdx @@ -5,186 +5,72 @@ icon: "download" iconType: "solid" --- -Install and set up Graph-sitter in your development environment. +Graph-sitter runs as a single command with [`uvx`](https://docs.astral.sh/uv/) — no global install or project setup required. #### We currently support: -- Running Graph-sitter in Python 3.12 - 3.13 (recommended: Python 3.13+) -- macOS and Linux - - macOS is supported - - Linux is supported on x86_64 and aarch64 with glibc 2.34+ - - Windows is supported via WSL. See [here](https://graph-sitter.com/building-with-graph-sitter/codegen-with-wsl) for more details. -- Python, Typescript, Javascript and React codebases +- Python 3.12 - 3.13 (recommended: Python 3.13+) +- macOS and Linux (x86_64 and aarch64 with glibc 2.34+); Windows via WSL +- Python, TypeScript, JavaScript, and React codebases ## Prerequisites -We recommend using [uv](https://github.com/astral-sh/uv) for installation. If you haven't installed `uv` yet: -```bash -curl -LsSf https://astral.sh/uv/install.sh | sh -``` - -## Installing Graph-sitter +Install [uv](https://github.com/astral-sh/uv) if you don't already have it: ```bash -uv tool install graph-sitter --python 3.13 -``` - - - -This makes the `graph-sitter` command available globally in your terminal, while keeping its dependencies isolated. - - -Verify the installation before parsing a large repository: - -```bash -graph-sitter doctor --json -graph-sitter parse . --language python --backend python --format summary +curl -LsSf https://astral.sh/uv/install.sh | sh ``` -Use `--language typescript` for TypeScript, JavaScript, and React repositories. - -## One-Shot uvx Usage +## Run with uvx -Use `uvx` when you want to run Graph-sitter without installing a global tool: +`uvx` fetches and runs `graph-sitter` in an isolated environment, so there's nothing to install or keep up to date: ```bash +# Verify your environment uvx --python 3.13 graph-sitter doctor --json -uvx --python 3.13 graph-sitter parse . --language auto --backend auto --fallback python --format json -``` - -For branch-built Rust wheel validation before a public release, point `uvx` at -the wheel artifact: -```bash -uvx --python 3.13 --from dist/.whl graph-sitter doctor --backend rust --language python --json -uvx --python 3.13 --from dist/.whl graph-sitter parse . --language python --backend rust --fallback error --format json +# Parse a repository into the codebase graph +uvx --python 3.13 graph-sitter parse . --language auto --backend auto --fallback python --format summary ``` -See [uvx workflows](/cli/uvx) for parse, run, transform, `--subdir`, and -release-gate examples. - -## Quick Start - -Let's walk through a minimal example of using Graph-sitter in a project: - -1. Navigate to your repository: - ```bash - cd path/to/your/project - ``` +Use `--language typescript` for TypeScript, JavaScript, and React repositories, or `--language auto` to let graph-sitter detect it. -2. Parse the repository without initialization: - ```bash - graph-sitter parse . --language python --backend python --format summary - ``` +## Transform - For TypeScript, JavaScript, and React repositories: - ```bash - graph-sitter parse . --language typescript --backend auto --fallback python --format summary - ``` +Run a codemod in check mode, then write once the diff looks right: -3. Initialize Graph-sitter in your project with [graph-sitter init](/cli/init): - ```bash - graph-sitter init - ``` - - This creates a `.codegen/` directory with: - ```bash - .codegen/ - ├── .venv/ # Python virtual environment (gitignored) - ├── config.toml # Project configuration - ├── codemods/ # Your codemod implementations - ├── jupyter/ # Jupyter notebooks for exploration - └── codegen-system-prompt.txt # AI system prompt - ``` - -4. Create your first codemod with [graph-sitter create](/cli/create): - ```bash - graph-sitter create organize-imports \ - -d "Sort and organize imports according to PEP8" - ``` - - The `-d` flag in `graph-sitter create` generates an AI-powered implementation. This requires a Github account registered on [codegen.sh](https://codegen.sh) - - - - -5. Preview your codemod with [graph-sitter run](/cli/run): - ```bash - graph-sitter run organize-imports . --check - ``` - -6. Apply the codemod after reviewing the diff: - ```bash - graph-sitter run organize-imports . --write - ``` - -7. Reset any filesystem changes (excluding `.codegen/*`) with [graph-sitter reset](/cli/reset): - ```bash - graph-sitter reset - ``` - -## Rust Backend +```bash +uvx --python 3.13 graph-sitter transform ./codemods/rename.py:rename . --check +uvx --python 3.13 graph-sitter transform ./codemods/rename.py:rename . --write +``` -Python remains the authoring shell. The Rust backend is an opt-in compact -parse/index backend for supported graph and codemod surfaces. +## Rust backend (optional) -Use strict Rust mode when unsupported behavior should fail loudly: +Python remains the authoring shell. The Rust backend is an opt-in compact parse/index backend for supported graph and codemod surfaces. Use strict Rust mode when unsupported behavior should fail loudly: ```bash -graph-sitter parse . --language python --backend rust --fallback error --format json +uvx --python 3.13 graph-sitter parse . --language python --backend rust --fallback error --format json ``` -Use automatic mode with Python fallback when a working result is more important -than proving the Rust path: +To validate a branch-built Rust wheel before a release, point `uvx` at the wheel artifact: ```bash -graph-sitter parse . --language auto --backend auto --fallback python --format json -``` - -Python API users can select the same backend behavior through -`CodebaseConfig`: - -```python -from graph_sitter.configs.models.codebase import ( - CodebaseConfig, - GraphBackend, - RustFallbackMode, -) -from graph_sitter.core.codebase import Codebase - -codebase = Codebase( - "./", - config=CodebaseConfig( - graph_backend=GraphBackend.RUST, - rust_fallback=RustFallbackMode.ERROR, - ), -) +uvx --python 3.13 --from dist/.whl graph-sitter parse . --language python --backend rust --fallback error --format json ``` -Run [`doctor`](/cli/doctor) before relying on strict Rust mode in CI, -benchmarks, or release validation. +See [uvx workflows](/cli/uvx) for parse, run, transform, `--subdir`, and release-gate examples. ## Troubleshooting -Having issues? Here are some common problems and their solutions: - -- **I'm hitting an UV error related to `[[ packages ]]`**: This means you're likely using an outdated version of UV. Try updating to the latest version with: `uv self update`. -- **I'm hitting an error about `No module named 'graph_sitter.sdk.extensions.utils'`**: The compiled cython extensions are out of sync. Update them with `uv sync --reinstall-package graph-sitter`. -- **I'm hitting a `RecursionError: maximum recursion depth exceeded` error while parsing my codebase**: If you are using python 3.12, try upgrading to 3.13. If you are already on 3.13, try upping the recursion limit with `sys.setrecursionlimit(10000)`. +- **UV error related to `[[ packages ]]`**: you're likely on an outdated UV. Update with `uv self update`. +- **`RecursionError: maximum recursion depth exceeded` while parsing**: upgrade to Python 3.13, or raise the limit with `sys.setrecursionlimit(10000)`. -For more help, join our [community Slack](/introduction/community) or check the [FAQ](/introduction/faq). +For more help, check the [FAQ](/introduction/faq). ## Next Steps - - Learn how to use Graph-sitter effectively in VSCode, Cursor, and other IDEs. - Follow step-by-step tutorials for common code transformation tasks. - - Leverage AI assistants like Copilot, Cursor and Devin - - Learn more about building with Graph-sitter - - + Learn more about building with Graph-sitter. + diff --git a/docs/introduction/overview.mdx b/docs/introduction/overview.mdx index 90a5dbd32..d2ba9d0fc 100644 --- a/docs/introduction/overview.mdx +++ b/docs/introduction/overview.mdx @@ -1,6 +1,7 @@ --- title: "Graph-sitter" sidebarTitle: "Overview" +sidebarOrder: 0 icon: "robot" iconType: "solid" --- diff --git a/docs/introduction/work-with-ai.mdx b/docs/introduction/work-with-ai.mdx deleted file mode 100644 index bda42c322..000000000 --- a/docs/introduction/work-with-ai.mdx +++ /dev/null @@ -1,86 +0,0 @@ ---- -title: "Working with AI" -sidebarTitle: "AI Integration" -icon: "microchip" -iconType: "solid" ---- - -Graph-sitter is designed to be used with AI assistants. This document describes how to use Graph-sitter with common AI tools, including Copilot, Cursor, Devin and more. - -## System Prompt - -Graph-sitter provides a `.txt` file that you can drag-and-drop into any chat assistant. This is roughly 60k tokens and will enable chat assistants like, ChatGPT, Claude 3.5 etc. to build effectively with Graph-sitter. - -import { - CODEGEN_SYSTEM_PROMPT -} from "/snippets/links.mdx"; - - - Download System Prompt - - -Learn about leveraging this in IDE chat assistants like Cursor [here](/introduction/ide-usage#iterating-with-chat-assistants) - -## Generating System Prompts - -The [graph_sitter.cli](/cli/about) provides commands to generate `.md` files that can be fed to any AI assistant for more accurate and contextual help. - -When you create a new codemod via [gs create](/cli/create): - -```bash -gs create delete-dead-imports . --description "Delete unused imports" -``` - -Graph-sitter automatically generates an optimized ["system prompt"](https://news.ycombinator.com/item?id=37880023) that includes: - -- An introduction to Graph-sitter -- Graph-sitter API documentation -- Examples of relevant transformations - -You can find this generated prompt in the `.codegen/prompts/-system-prompt.md` file. - - - All contents of the `.codegen/prompts` directory are by default ignored the - `.gitignore` file. after running [gs init](/cli/init) - - -This `.md` file can be used with any AI assistant (Claude, GPT-4, etc.) to get more accurate and contextual help. - -## Example Workflow - - - - Use the [create command](/cli/create) with a detailed description of what you want to accomplish: - ```bash - gs create modernize-components . --description "Convert class components to functional components with hooks" - ``` - - - Check the AI context that Graph-sitter generated for your transformation: ```bash - cat codegen-sh/codemods/modernize-components/prompt.md ``` - - - - Reference your codemod when asking questions to get contextual help: ``` - @codegen-sh/codemods/modernize-components How should I handle - componentDidMount? ``` - - - - The AI will understand you're working on React modernization and provide relevant suggestions about using useEffect hooks and other modern React patterns. - - - -## Copilot, Cursor and Windsurf (IDEs) - -When using IDE chat assistants, you can leverage Graph-sitter's context by mentioning your codemod in composer mode: - -```bash -@.codegen/codemods/upgrade-react18 @.codegen/prompts/system-prompt.md -``` - -This will ensure that the IDE's native chat model is aware of the APIs and common patterns for Graph-sitter. - -## Devin, OpenHands and Semi-autonomous Code Agents - -Coming soon! diff --git a/docs/organizations/get-organizations.mdx b/docs/organizations/get-organizations.mdx deleted file mode 100644 index fb20cdd62..000000000 --- a/docs/organizations/get-organizations.mdx +++ /dev/null @@ -1,3 +0,0 @@ ---- -openapi: get /v1/organizations ---- \ No newline at end of file diff --git a/docs/tutorials/at-a-glance.mdx b/docs/tutorials/at-a-glance.mdx index 89bc03c52..02df7e7d0 100644 --- a/docs/tutorials/at-a-glance.mdx +++ b/docs/tutorials/at-a-glance.mdx @@ -118,16 +118,9 @@ Explore our tutorials to learn how to use Graph-sitter for various code transfor -## Documentation & AI +## AI - - Generate JSDoc comments, README files, and API documentation. - View the full code in our [examples repository](https://github.com/codegen-sh/graph-sitter/tree/develop/src/graph_sitter.extensions/mcp) - - -## Setup: -Install the MCP python library -``` -uv pip install mcp -``` - -## Step 1: Setting Up Your MCP Server - -First, let's create a basic MCP server using Codegen's MCP tools: - -server.py -```python -from graph_sitter import Codebase -from mcp.server.fastmcp import FastMCP -from typing import Annotated -# Initialize the codebase -codebase = Codebase.from_repo(".") - -# create the MCP server using FastMCP -mcp = FastMCP(name="demo-mcp", instructions="Use this server for semantic search of codebases") - - -if __name__ == "__main__": - # Initialize and run the server - print("Starting demo mpc server...") - mcp.run(transport="stdio") - -``` - -## Step 2: Create the search tool - -Let's implement the semantic search tool. - -server.py -```python -from graph_sitter.extensions.tools.semantic_search import semantic_search - -.... - -@mcp.tool('codebase_semantic_search', "search codebase with the provided query") -def search(query: Annotated[str, "search query to run against codebase"]): - codebase = Codebase("provide location to codebase", language="provide codebase Language") - # use the semantic search tool from graph_sitter.extensions.tools OR write your own - results = semantic_search(codebase=codebase, query=query) - return results - -.... -``` - -## Run Your MCP Server - -You can run and inspect your MCP server with: - -``` -mcp dev server.py -``` - -If you'd like to integrate this into an IDE checkout out this [setup guide](/introduction/ide-usage#mcp-server-setup) - -And that's a wrap, chime in at our [community - Slack](https://community.codegen.com) if you have questions or ideas for additional MCP tools/capabilities diff --git a/docs/tutorials/creating-documentation.mdx b/docs/tutorials/creating-documentation.mdx deleted file mode 100644 index c7fc01a51..000000000 --- a/docs/tutorials/creating-documentation.mdx +++ /dev/null @@ -1,213 +0,0 @@ ---- -title: "Creating Documentation" -sidebarTitle: "Documentation" -icon: "book" -iconType: "solid" ---- - -This guide demonstrates how to determine docs coverage and create documentation for your codebase. - -This primarily leverages two APIs: -- [codebase.ai(...)](/api-reference/core/Codebase#ai) for generating docstrings -- [function.set_docstring(...)](/api-reference/core/HasBlock#set-docstring) for modifying them - -## Determining Documentation Coverage - -In order to determine the extent of your documentation coverage, you can iterate through all symbols of interest and count the number of docstrings: - -To see your current documentation coverage, you can iterate through all symbols of interest and count the number of docstrings: - -```python python -# Initialize counters -total_functions = 0 -functions_with_docs = 0 -total_classes = 0 -classes_with_docs = 0 - -# Check functions -for function in codebase.functions: - total_functions += 1 - if function.docstring: - functions_with_docs += 1 - -# Check classes -for cls in codebase.classes: - total_classes += 1 - if cls.docstring: - classes_with_docs += 1 - -# Calculate percentages -func_coverage = (functions_with_docs / total_functions * 100) if total_functions > 0 else 0 -class_coverage = (classes_with_docs / total_classes * 100) if total_classes > 0 else 0 - -# Print results with emojis -print("\n📊 Documentation Coverage Report:") -print(f"\n📝 Functions:") -print(f" • Total: {total_functions}") -print(f" • Documented: {functions_with_docs}") -print(f" • Coverage: {func_coverage:.1f}%") - -print(f"\n📚 Classes:") -print(f" • Total: {total_classes}") -print(f" • Documented: {classes_with_docs}") -print(f" • Coverage: {class_coverage:.1f}%") - -print(f"\n🎯 Overall Coverage: {((functions_with_docs + classes_with_docs) / (total_functions + total_classes) * 100):.1f}%") -``` - -Which provides the following output: -``` -📊 Documentation Coverage Report: -📝 Functions: - • Total: 1384 - • Documented: 331 - • Coverage: 23.9% -📚 Classes: - • Total: 453 - • Documented: 91 - • Coverage: 20.1% -🎯 Overall Coverage: 23.0% -``` - -## Identifying Areas of Low Documentation Coverage - - -To identify areas of low documentation coverage, you can iterate through all directories and count the number of functions with docstrings. - -Learn more about [Directories here](/building-with-graph-sitter/files-and-directories). - -```python python -# Track directory stats -dir_stats = {} - -# Analyze each directory -for directory in codebase.directories: - # Skip test, sql and alembic directories - if any(x in directory.path.lower() for x in ['test', 'sql', 'alembic']): - continue - - # Get undecorated functions - funcs = [f for f in directory.functions if not f.is_decorated] - total = len(funcs) - - # Only analyze dirs with >10 functions - if total > 10: - documented = sum(1 for f in funcs if f.docstring) - coverage = (documented / total * 100) - dir_stats[directory.path] = { - 'total': total, - 'documented': documented, - 'coverage': coverage - } - -# Find lowest coverage directory -if dir_stats: - lowest_dir = min(dir_stats.items(), key=lambda x: x[1]['coverage']) - path, stats = lowest_dir - - print(f"📉 Lowest coverage directory: '{path}'") - print(f" • Total functions: {stats['total']}") - print(f" • Documented: {stats['documented']}") - print(f" • Coverage: {stats['coverage']:.1f}%") - - # Print all directory stats for comparison - print("\n📊 All directory coverage rates:") - for path, stats in sorted(dir_stats.items(), key=lambda x: x[1]['coverage']): - print(f" '{path}': {stats['coverage']:.1f}% ({stats['documented']}/{stats['total']} functions)") -``` - -Which provides the following output: -```python -📉 Lowest coverage directory: 'codegen-backend/app/utils/github_utils/branch' - • Total functions: 12 - • Documented: 0 - • Coverage: 0.0% -📊 All directory coverage rates: - 'codegen-backend/app/utils/github_utils/branch': 0.0% (0/12 functions) - 'codegen-backend/app/utils/slack': 14.3% (2/14 functions) - 'codegen-backend/app/modal_app/github': 18.2% (2/11 functions) - 'codegen-backend/app/modal_app/slack': 18.2% (2/11 functions) - 'codegen-backend/app/utils/github_utils/webhook': 21.4% (6/28 functions) - 'codegen-backend/app/modal_app/cron': 23.1% (3/13 functions) - 'codegen-backend/app/utils/github_utils': 23.5% (39/166 functions) - 'codegen-backend/app/codemod': 25.0% (7/28 functions) -``` - -## Leveraging AI for Generating Documentation - -For non-trivial codebases, it can be challenging to achieve full documentation coverage. - -The most efficient way to edit informative docstrings is to use [codebase.ai](/api-reference/core/Codebase#ai) to generate docstrings, then use the [set_docstring](/api-reference/core/HasBlock#set-docstring) method to update the docstring. - -Learn more about using AI in our [guides](/building-with-graph-sitter/calling-out-to-llms). - -```python python -# Import datetime for timestamp -from datetime import datetime - -# Get current timestamp -timestamp = datetime.now().strftime("%B %d, %Y") - -print("📚 Generating and Updating Function Documentation") - -# Process all functions in the codebase -for function in codebase.functions: - current_docstring = function.docstring() - - if current_docstring: - # Update existing docstring to be more descriptive - new_docstring = codebase.ai( - f"Update the docstring for {function.name} to be more descriptive and comprehensive.", - target=function - ) - new_docstring += f"\n\nUpdated on: {timestamp}" - else: - # Generate new docstring for function - new_docstring = codebase.ai( - f"Generate a comprehensive docstring for {function.name} including parameters, return type, and description.", - target=function - ) - new_docstring += f"\n\nCreated on: {timestamp}" - - # Set the new or updated docstring - function.set_docstring(new_docstring) -``` - - - -## Adding Explicit Parameter Names and Types - -Alternatively, you can also rely on deterministic string formatting to edit docstrings. - -To add "Google-style" parameter names and types to a function docstring, you can use the following code snippet: - -```python python -# Iterate through all functions in the codebase -for function in codebase.functions: - # Skip if function already has a docstring - if function.docstring: - continue - - # Build parameter documentation - param_docs = [] - for param in function.parameters: - param_type = param.type.source if param.is_typed else "Any" - param_docs.append(f" {param.name} ({param_type}): Description of {param.name}") - - # Get return type if present - return_type = function.return_type.source if function.return_type else "None" - - # Create Google-style docstring - docstring = f'''""" - Description of {function.name}. - - Args: -{chr(10).join(param_docs)} - - Returns: - {return_type}: Description of return value - """''' - - # Set the new docstring - function.set_docstring(docstring) -``` diff --git a/docs/tutorials/deep-code-research.mdx b/docs/tutorials/deep-code-research.mdx deleted file mode 100644 index 2e8d1bcae..000000000 --- a/docs/tutorials/deep-code-research.mdx +++ /dev/null @@ -1,215 +0,0 @@ ---- -title: "Deep Code Research with AI" -sidebarTitle: "Code Research Agent" -icon: "magnifying-glass" -iconType: "solid" ---- - -This guide demonstrates how to build an intelligent code research tool that can analyze and explain codebases using Codegen's and LangChain. The tool combines semantic code search, dependency analysis, and natural language understanding to help developers quickly understand new codebases. - -View the full code on [GitHub](https://github.com/codegen-sh/graph-sitter/tree/develop/examples/examples/deep_code_research) - -This example works with any public GitHub repository - just provide the repo name in the format owner/repo - -## Overview - -The process involves three main components: - -1. A CLI interface for interacting with the research agent -2. A set of code analysis tools powered by Codegen -3. An LLM-powered agent that combines the tools to answer questions - -Let's walk through building each component. - -## Step 1: Setting Up the Research Tools - -First, let's import the necessary components and set up our research tools: - -```python -from graph_sitter import Codebase -from graph_sitter.extensions.langchain.agent import create_agent_with_tools -from graph_sitter.extensions.langchain.tools import ( - ListDirectoryTool, - RevealSymbolTool, - SearchTool, - SemanticSearchTool, - ViewFileTool, -) -from langchain_core.messages import SystemMessage -``` - -We'll create a function to initialize our codebase with a nice progress indicator: - -```python -def initialize_codebase(repo_name: str) -> Optional[Codebase]: - """Initialize a codebase with a spinner showing progress.""" - with console.status("") as status: - try: - status.update(f"[bold blue]Cloning {repo_name}...[/bold blue]") - codebase = Codebase.from_repo(repo_name) - status.update("[bold green]✓ Repository cloned successfully![/bold green]") - return codebase - except Exception as e: - console.print(f"[bold red]Error initializing codebase:[/bold red] {e}") - return None -``` - -Then we'll set up our research tools: - -```python -# Create research tools -tools = [ - ViewFileTool(codebase), # View file contents - ListDirectoryTool(codebase), # Explore directory structure - SearchTool(codebase), # Text-based search - SemanticSearchTool(codebase), # Natural language search - RevealSymbolTool(codebase), # Analyze symbol relationships -] -``` - -Each tool provides specific capabilities: -- `ViewFileTool`: Read and understand file contents -- `ListDirectoryTool`: Explore the codebase structure -- `SearchTool`: Find specific code patterns -- `SemanticSearchTool`: Search using natural language -- `RevealSymbolTool`: Analyze dependencies and usages - -## Step 2: Creating the Research Agent - -Next, we'll create an agent that can use these tools intelligently. We'll give it a detailed prompt about its role: - -```python -RESEARCH_AGENT_PROMPT = """You are a code research expert. Your goal is to help users understand codebases by: -1. Finding relevant code through semantic and text search -2. Analyzing symbol relationships and dependencies -3. Exploring directory structures -4. Reading and explaining code - -Always explain your findings in detail and provide context about how different parts of the code relate to each other. -When analyzing code, consider: -- The purpose and functionality of each component -- How different parts interact -- Key patterns and design decisions -- Potential areas for improvement - -Break down complex concepts into understandable pieces and use examples when helpful.""" - -# Initialize the agent -agent = create_agent_with_tools( - codebase=codebase, - tools=tools, - chat_history=[SystemMessage(content=RESEARCH_AGENT_PROMPT)], - verbose=True -) -``` - -## Step 3: Building the CLI Interface - -Finally, we'll create a user-friendly CLI interface using rich-click: - -```python -import rich_click as click -from rich.console import Console -from rich.markdown import Markdown - -@click.group() -def cli(): - """🔍 Graph-sitter Code Research CLI""" - pass - -@cli.command() -@click.argument("repo_name", required=False) -@click.option("--query", "-q", default=None, help="Initial research query.") -def research(repo_name: Optional[str] = None, query: Optional[str] = None): - """Start a code research session.""" - # Initialize codebase - codebase = initialize_codebase(repo_name) - - # Create and run the agent - agent = create_research_agent(codebase) - - # Main research loop - while True: - if not query: - query = Prompt.ask("[bold cyan]Research query[/bold cyan]") - - result = agent.invoke( - {"input": query}, - config={"configurable": {"thread_id": 1}} - ) - console.print(Markdown(result["messages"][-1].content)) - - query = None # Clear for next iteration -``` - -## Using the Research Tool - -You can use the tool in several ways: - -1. Interactive mode (will prompt for repo): -```bash -python run.py research -``` - -2. Specify a repository: -```bash -python run.py research "fastapi/fastapi" -``` - -3. Start with an initial query: -```bash -python run.py research "fastapi/fastapi" -q "Explain the main components" -``` - -Example research queries: -- "Explain the main components and their relationships" -- "Find all usages of the FastAPI class" -- "Show me the dependency graph for the routing module" -- "What design patterns are used in this codebase?" - - - The agent maintains conversation history, so you can ask follow-up questions - and build on previous findings. - - -## Advanced Usage - -### Custom Research Tools - -You can extend the agent with custom tools for specific analysis needs: - -```python -from langchain.tools import BaseTool -from pydantic import BaseModel, Field - -class CustomAnalysisTool(BaseTool): - """Custom tool for specialized code analysis.""" - name = "custom_analysis" - description = "Performs specialized code analysis" - - def _run(self, query: str) -> str: - # Custom analysis logic - return results - -# Add to tools list -tools.append(CustomAnalysisTool()) -``` - -### Customizing the Agent - -You can modify the agent's behavior by adjusting its prompt: - -```python -CUSTOM_PROMPT = """You are a specialized code reviewer focused on: -1. Security best practices -2. Performance optimization -3. Code maintainability -... -""" - -agent = create_agent_with_tools( - codebase=codebase, - tools=tools, - chat_history=[SystemMessage(content=CUSTOM_PROMPT)], -) -``` diff --git a/docs/tutorials/neo4j-graph.mdx b/docs/tutorials/neo4j-graph.mdx deleted file mode 100644 index 724a1ea9a..000000000 --- a/docs/tutorials/neo4j-graph.mdx +++ /dev/null @@ -1,93 +0,0 @@ ---- -title: "Neo4j Graph" -sidebarTitle: "Neo4j Graph" -icon: "database" -iconType: "solid" ---- - - - - - -# Neo4j Graph - -Graph-sitter can export codebase graphs to Neo4j for visualization and analysis. - -## Installation -In order to use Neo4j you will need to install it and run it locally using Docker. - -### Neo4j -First, install Neo4j using the official [installation guide](https://neo4j.com/docs/desktop-manual/current/installation/download-installation/). - -### Docker -To run Neo4j locally using Docker, follow the instructions [here](https://neo4j.com/docs/apoc/current/installation/#docker). - -## Launch Neo4j Locally - -```bash -docker run \ - -p 7474:7474 -p 7687:7687 \ - -v $PWD/data:/data -v $PWD/plugins:/plugins \ - --name neo4j-apoc \ - -e NEO4J_apoc_export_file_enabled=true \ - -e NEO4J_apoc_import_file_enabled=true \ - -e NEO4J_apoc_import_file_use__neo4j__config=true \ - -e NEO4J_PLUGINS=\[\"apoc\"\] \ - neo4j:latest -``` -## Usage - -```python -from graph_sitter import Codebase -from graph_sitter.extensions.graph.main import visualize_codebase - -# parse codebase -codebase = Codebase("path/to/codebase") - -# export to Neo4j -visualize_codebase(codebase, "bolt://localhost:7687", "neo4j", "password") -``` - -## Visualization - -Once exported, you can open the Neo4j browser at `http://localhost:7474`, sign in with the username `neo4j` and the password `password`, and use the following Cypher queries to visualize the codebase: - -### Class Hierarchy - -```cypher -Match (s: Class )-[r: INHERITS_FROM*]-> (e:Class) RETURN s, e LIMIT 10 -``` - - - - -### Methods Defined by Each Class - -```cypher -Match (s: Class )-[r: DEFINES]-> (e:Method) RETURN s, e LIMIT 10 -``` - - - - -### Function Calls - -```cypher -Match (s: Func )-[r: CALLS]-> (e:Func) RETURN s, e LIMIT 10 -``` - - - - - -### Call Graph - -```cypher -Match path = (:(Method|Func)) -[:CALLS*5..10]-> (:(Method|Func)) -Return path -LIMIT 20 -``` - - - - \ No newline at end of file diff --git a/docs/tutorials/python2-to-python3.mdx b/docs/tutorials/python2-to-python3.mdx index b119a73ce..70826a60e 100644 --- a/docs/tutorials/python2-to-python3.mdx +++ b/docs/tutorials/python2-to-python3.mdx @@ -246,7 +246,6 @@ Check out these related tutorials: - [Increase Type Coverage](/tutorials/increase-type-coverage) - [Organizing Your Codebase](/tutorials/organize-your-codebase) -- [Creating Documentation](/tutorials/creating-documentation) ## Learn More diff --git a/docs/use-cases/I. Upgrading APIs.md b/docs/use-cases/I. Upgrading APIs.md deleted file mode 100644 index e69de29bb..000000000 diff --git a/docs/use-cases/II. Improving Codebase Modularity.md b/docs/use-cases/II. Improving Codebase Modularity.md deleted file mode 100644 index e69de29bb..000000000 diff --git a/docs/use-cases/III. Improving Type Coverage.md b/docs/use-cases/III. Improving Type Coverage.md deleted file mode 100644 index e69de29bb..000000000 diff --git a/docs/use-cases/IV. Analyzing Critical Code Paths.md b/docs/use-cases/IV. Analyzing Critical Code Paths.md deleted file mode 100644 index e69de29bb..000000000 diff --git a/docs/users/get-user.mdx b/docs/users/get-user.mdx deleted file mode 100644 index 8fde95c28..000000000 --- a/docs/users/get-user.mdx +++ /dev/null @@ -1,3 +0,0 @@ ---- -openapi: get /v1/organizations/{org_id}/users/{user_id} ---- \ No newline at end of file diff --git a/docs/users/get-users.mdx b/docs/users/get-users.mdx deleted file mode 100644 index 8448ef000..000000000 --- a/docs/users/get-users.mdx +++ /dev/null @@ -1,3 +0,0 @@ ---- -openapi: get /v1/organizations/{org_id}/users ---- \ No newline at end of file diff --git a/site/app/page.tsx b/site/app/page.tsx index 690129979..0ab99db4a 100644 --- a/site/app/page.tsx +++ b/site/app/page.tsx @@ -1,99 +1,72 @@ import { ArrowRight, BookOpen, - Code2, + Bot, + Database, ExternalLink, - FileCode2, - GitBranch, - Network, - ShieldCheck, + FolderTree, + Share2, TerminalSquare, + Trash2, + TreePine, } from "lucide-react"; import Link from "next/link"; import { Logo, Wordmark } from "@/components/logo"; import { ThemeToggle } from "@/components/theme-toggle"; -import { Badge } from "@/components/ui/badge"; import { Button } from "@/components/ui/button"; const docsUrl = "/docs"; const githubUrl = "https://github.com/codegen-sh/graph-sitter"; -const capabilities = [ +const pillars = [ { - icon: FileCode2, + icon: Database, tone: "text-aura-purple", - title: "Parse real codebases", - text: "Load Python, TypeScript, JavaScript, and React repositories into files, directories, and language-aware symbols.", + title: "Fast, in-memory index", + text: "Parse a whole repository into an in-memory index that captures more than the AST — imports, exports, usages, references, and call relationships.", }, { - icon: GitBranch, + icon: TreePine, tone: "text-aura-green", - title: "Build the graph", - text: "Index imports, exports, function calls, references, usages, and dependencies before touching any source text.", + title: "Backed by tree-sitter", + text: "Built on tree-sitter parsing for Python, TypeScript, JavaScript, and React, with the heaviest indexes moving into Rust for scale.", }, { - icon: ShieldCheck, + icon: Bot, tone: "text-aura-blue", - title: "Run guarded codemods", - text: "Write transformations that move, rename, delete, and rewrite code while keeping the related graph edges in sync.", + title: "Optimized for coding agents", + text: "A clean, scriptable API designed for AI coding agents to explore code and make safe, graph-aware edits without breaking imports.", }, ]; -const architecture = [ +const useCases = [ { - icon: Code2, + icon: FolderTree, tone: "text-aura-purple", - title: "Python stays the shell", - text: "The authoring experience remains Python: notebooks, scripts, reusable codemods, and the high-level editable API.", + title: "Large-scale reorganization", + text: "Restructure directories and move symbols across a codebase while every import and reference is repaired automatically.", + href: `${docsUrl}/tutorials/organize-your-codebase`, + cta: "Reorganize a codebase", }, { - icon: Network, - tone: "text-aura-green", - title: "Rust handles scale", - text: "The rewrite path moves the massive parse and index data structure into a compact Rust backend for large repositories.", + icon: Trash2, + tone: "text-aura-red", + title: "Dead code monitoring & deletion", + text: "Find functions, classes, and imports with no usages across the index, then remove them with confidence in a single pass.", + href: `${docsUrl}/tutorials/deleting-dead-code`, + cta: "Delete dead code", }, { - icon: TerminalSquare, - tone: "text-aura-blue", - title: "uvx is the entrypoint", - text: "The target command is uvx graph-sitter for repository parsing, graph inspection, and guarded transformations.", + icon: Share2, + tone: "text-aura-green", + title: "Codebase visualization", + text: "Turn the dependency and call graph into interactive visualizations to understand structure and blast radius before you change it.", + href: `${docsUrl}/tutorials/codebase-visualization`, + cta: "Visualize a codebase", }, ]; -const useCases = [ - "Delete dead code with usage checks", - "Move symbols while repairing imports", - "Trace API impact across a repo", - "Inspect import and reference graphs", - "Build custom codebase analytics", - "Run checked codemods before writes", -]; - -const graphNodes = [ - { label: "user.py", dot: "bg-aura-purple", x: 17, y: 24 }, - { label: "UserService", dot: "bg-aura-blue", x: 63, y: 17 }, - { label: "create_user()", dot: "bg-aura-orange", x: 72, y: 56 }, - { label: "usages", dot: "bg-aura-green", x: 23, y: 72 }, -]; - -function CheckIcon({ className }: { className?: string }) { - return ( - - ); -} - export default function Home() { return (
@@ -150,15 +123,22 @@ export default function Home() {
-

- A codebase graph and codemod library. +
+ + Backed by tree-sitter · built for coding agents +
+

+ A fast, in-memory codebase index — beyond the AST.

- Graph-sitter lets Python programs parse whole repositories, - build reference and import graphs, query code relationships, and - make targeted source edits — with the largest indexes moving - into Rust for scale. + Graph-sitter parses whole repositories into an in-memory index + of imports, usages, and references — backed by tree-sitter and + optimized for coding agents that need to analyze and safely + rewrite code.

+ + +
-
- - $ uvx - graph-sitter parse . - - parse any repo -
- +
- {/* Capabilities */} -
-

- A graph-shaped API for codebase automation. -

-

- Everything resolves through one editable model of your repository — - so analysis and rewrites stay consistent. -

-
- {capabilities.map((item) => ( + {/* Pillars */} +
+
+ {pillars.map((item) => (
@@ -217,91 +183,60 @@ export default function Home() { {/* Use cases */}
-
-
-

- Programmatic refactors, analysis, and repo maintenance. -

-

- Reach for graph-sitter when a change is too mechanical for hands - and too structural for find-and-replace. -

-
-
    +
    +

    + What people build with graph-sitter. +

    +

    + Reach for graph-sitter when a change is too mechanical for hands + and too structural for find-and-replace. +

    +
    {useCases.map((item) => ( -
  • - - {item} -
  • - ))} -
-
-
- - {/* Architecture */} -
-

- Same Python workflow, smaller graph engine. -

-

- The resurrection keeps the Python shell intact while moving the - heavy lifting into Rust. -

-
- {architecture.map((item) => ( -
-
- -

+ +

{item.title}

-

-

- {item.text} -

-
- ))} +

+ {item.text} +

+ + {item.cta} + + + + ))} +
{/* CLI */} -
-
+
+

- One command surface for parse, inspect, and transform. + One command surface:{" "} + uvx graph-sitter.

- The release target is{" "} - - uvx graph-sitter - - : start with fast parse summaries and graph inspection, then run - codemods in explicit check and write modes. + Run it with no install. Start with fast parse summaries, then + run codemods in explicit check and write modes — with the + heaviest indexes moving into Rust for scale.

-
- - Branch wheels: Rust parsing - - - Parity & release: in progress - -
+
- +
@@ -336,7 +271,42 @@ export default function Home() { ); } -function ProductVisual() { +function HeroTerminal() { + return ( +
+
+
+ + + uvx graph-sitter + +
+
+					
+						
+							${" "}
+							uvx graph-sitter parse .
+						
+						
+							${" "}
+							uvx graph-sitter
+							transform delete_dead_code.py:run .{" "}
+							--check
+						
+						
+							${" "}
+							uvx graph-sitter
+							transform delete_dead_code.py:run .{" "}
+							--write
+						
+					
+				
+
+
+ ); +} + +function DeadCodeExample() { return (
@@ -347,101 +317,64 @@ function ProductVisual() { - codebase.py + delete_dead_code.py - graph indexed + index ready
- -
-
-						
-							
-								from graph_sitter{" "}
-								import{" "}
-								Codebase
-							
-							
-								codebase ={" "}
-								Codebase(
-								"./")
-							
-							{" "}
-							
-								for fn{" "}
-								in codebase.
-								functions:
-							
-							
-								if{" "}
-								not fn.
-								usages:
-							
-							
-								fn.remove()
-							
-							{" "}
-							
-								# python stays the control plane
-							
-							
-								codebase.commit()
-							
-						
-					
- -
- - {graphNodes.map((node) => ( - - - {node.label} - - ))} -
-
+
+					
+						
+							from graph_sitter{" "}
+							import{" "}
+							Codebase
+						
+						 
+						
+							# Parse the repo into a fast, in-memory index
+						
+						
+							codebase ={" "}
+							Codebase(
+							"./")
+						
+						 
+						
+							# Delete functions with no usages anywhere
+						
+						
+							for function{" "}
+							in codebase.
+							functions:
+						
+						
+							if{" "}
+							not function.
+							usages:
+						
+						
+							function.remove()
+						
+						 
+						
+							codebase.commit()
+						
+					
+				
); } -function Terminal() { +function CommandSurface() { return (
- future command surface + parse · transform
@@ -453,18 +386,19 @@ function Terminal() {
 						
 							--language auto --backend rust --format summary
 						
-						{" "}
+						 
+						
+							# run a codemod in check mode, then write
+						
 						
 							uvx graph-sitter
-							transform ./codemods/rename.py{" "}
+							transform delete_dead_code.py:run .{" "}
 							--check
 						
-						{" "}
-						
-							# branch wheel proof:
-						
-						
-							# uvx --from dist/*.whl graph-sitter parse . --backend rust
+						
+							uvx graph-sitter
+							transform delete_dead_code.py:run .{" "}
+							--write
 						
 					
 				
diff --git a/site/lib/docs.ts b/site/lib/docs.ts index 42fd59e4c..5086c15a1 100644 --- a/site/lib/docs.ts +++ b/site/lib/docs.ts @@ -263,9 +263,6 @@ const docsSectionOrder = [ ["blog", "Blog"], ["api-reference", "API Reference"], ["graph-sitter", "Graph-sitter"], - ["use-cases", "Use Cases"], - ["organizations", "Organizations"], - ["users", "Users"], ] as const; const sectionTitleBySlug = new Map(docsSectionOrder); @@ -320,7 +317,14 @@ function buildDocsNavItems( } return [...bySegment.entries()] - .sort(([left], [right]) => sortNavSegments(left, right)) + .sort(([left], [right]) => { + const leftOrder = navOrder(parentSlug, left); + const rightOrder = navOrder(parentSlug, right); + if (leftOrder !== rightOrder) { + return leftOrder - rightOrder; + } + return sortNavSegments(left, right); + }) .map(([segment, segmentSlugs]) => { if (!segment) { return navLeaf(parentSlug, section); @@ -356,6 +360,22 @@ function navLeaf(slug: string, section: string): DocsNavItem { }; } +function navOrder(parentSlug: string, segment: string) { + // The section's own index page always leads its group. + if (!segment) { + return Number.NEGATIVE_INFINITY; + } + + const filePath = findDocFile(`${parentSlug}/${segment}`); + if (!filePath) { + return Number.POSITIVE_INFINITY; + } + + const parsed = matter(fs.readFileSync(filePath, "utf8")); + const order = parsed.data.sidebarOrder; + return typeof order === "number" ? order : Number.POSITIVE_INFINITY; +} + function sortNavSegments(left: string, right: string) { if (!left) { return -1;