longcipher
diff --git a/‎Cargo.toml‎
Lines changed: 4 additions & 2 deletions b/‎Cargo.toml‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎README.md‎
Lines changed: 50 additions & 132 deletions b/‎README.md‎
Lines changed: 50 additions & 132 deletions
diff --git a/‎bin/ast-doc/src/main.rs‎
Lines changed: 1 addition & 1 deletion b/‎bin/ast-doc/src/main.rs‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎crates/ast-doc-core/Cargo.toml‎
Lines changed: 4 additions & 1 deletion b/‎crates/ast-doc-core/Cargo.toml‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎crates/ast-doc-core/proptest-regressions/parser/lang/generic_parser.txt‎
Lines changed: 7 additions & 0 deletions b/‎crates/ast-doc-core/proptest-regressions/parser/lang/generic_parser.txt‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎crates/ast-doc-core/src/ingestion/mod.rs‎
Lines changed: 3 additions & 3 deletions b/‎crates/ast-doc-core/src/ingestion/mod.rs‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎crates/ast-doc-core/src/lib.rs‎
Lines changed: 2 additions & 2 deletions b/‎crates/ast-doc-core/src/lib.rs‎
Lines changed: 2 additions & 2 deletions
@@ -1,5 +1,5 @@
 [workspace.package]
-version = "0.1.0"
+version = "0.1.1"
 edition = "2024"
 license = "Apache-2.0"
 repository = "https://github.com/longcipher/ast-doc"
@@ -15,7 +15,8 @@ resolver = "3"
 
 [workspace.dependencies]
 # local crates
-ast-doc-core = { path = "crates/ast-doc-core", version = "0.1.0" }
+ast-doc = { path = "bin/ast-doc", version = "0.1.1" }
+ast-doc-core = { path = "crates/ast-doc-core", version = "0.1.1" }
 
 # external crates
 clap = "4.6.0"
@@ -40,6 +41,7 @@ tracing-subscriber = "0.3.23"
 tree-sitter = "0.26.7"
 tree-sitter-c = "0.24.1"
 tree-sitter-go = "0.25.0"
+tree-sitter-language-pack = { version = "1.4.0", default-features = false }
 tree-sitter-python = "0.25.0"
 tree-sitter-rust = "0.24.2"
 tree-sitter-typescript = "0.23.2"
 
@@ -1,17 +1,14 @@
 # ast-doc
 
 [![DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/longcipher/ast-doc)
-[![Context7](https://img.shields.io/badge/Website-context7.com-blue)](https://context7.com/longcipher/ast-doc)
 [![crates.io](https://img.shields.io/crates/v/ast-doc.svg)](https://crates.io/crates/ast-doc)
 [![docs.rs](https://docs.rs/ast-doc/badge.svg)](https://docs.rs/ast-doc)
 
-![ast-doc](https://socialify.git.ci/longcipher/ast-doc/image?font=Source+Code+Pro&language=1&name=1&owner=1&pattern=Circuit+Board&theme=Auto)
-
 AST-powered code documentation tool for generating optimized `llms.txt` files from codebases.
 
 ## Overview
 
-`ast-doc` is a Rust CLI tool that combines broad file traversal with deep AST-based semantic parsing to create optimized documentation. It uses a four-stage pipeline:
+`ast-doc` is a Rust CLI that combines broad file traversal with deep AST-based semantic parsing to create optimized documentation. It uses a four-stage pipeline:
 
 1. **Ingestion** — File discovery, git metadata capture, directory tree generation
 2. **Parser** — tree-sitter AST extraction with pre-computed strategy variants
@@ -20,11 +17,19 @@ AST-powered code documentation tool for generating optimized `llms.txt` files fr
 
 ## Supported Languages
 
-- Rust (`.rs`)
-- Python (`.py`)
-- TypeScript/JavaScript (`.ts`, `.tsx`, `.js`, `.jsx`)
-- Go (`.go`)
-- C (`.c`, `.h`)
+### Core (deep analysis)
+
+| Language | Extensions |
+|----------|------------|
+| Rust | `.rs` |
+| Python | `.py` |
+| TypeScript/JavaScript | `.ts`, `.tsx`, `.js`, `.jsx` |
+| Go | `.go` |
+| C | `.c`, `.h` |
+
+### Extended
+
+With the `lang-pack` feature, 50+ additional languages are supported via `tree-sitter-language-pack` (Java, Ruby, Kotlin, Swift, etc.).
 
 ## Installation
 
@@ -36,36 +41,20 @@ Install this skill for use with AI coding agents:
 npx skills add longcipher/ast-doc
 ```
 
-### From Source
-
-```bash
-cargo install --path bin/ast-doc
-```
-
 ### From crates.io
 
 ```bash
 cargo install ast-doc
 ```
 
-## Features
+### From source
 
-- **Four-stage pipeline**: Ingestion → AST Parser → Token Scheduler → Renderer
-- **Output strategies**: Full, NoTests (strip tests), Summary (signatures only)
-- **Token budget management**: Configurable `--max-tokens` with automatic degradation
-- **Core file protection**: Mark files with `--core` patterns that never get degraded
-- **Git context**: Automatic branch, commit, and diff inclusion (disable with `--no-git`)
-- **Directory tree**: Visual project structure with language annotations (disable with `--no-tree`)
-- **Glob filtering**: Include/exclude patterns for fine-grained file selection
-- **Anti-bloat rules**: Compress blank lines, trim trailing whitespace
-- **BDD acceptance tests**: Gherkin scenarios with `cucumber-rs`
-- **TDD inner loop**: Unit tests with `cargo test`
-- **Property tests**: `proptest` in the standard test flow
+```bash
+cargo install --path bin/ast-doc
+```
 
 ## Usage
 
-### Basic Usage
-
 ```bash
 # Generate llms.txt to stdout
 ast-doc .
@@ -75,137 +64,66 @@ ast-doc . --output llms.txt
 
 # Set token budget (default: 128,000)
 ast-doc . --max-tokens 64000
-```
 
-### Output Strategies
-
-```bash
-# Full source code (default)
-ast-doc . --strategy full
-
-# Strip test modules and functions
-ast-doc . --strategy no-tests
-
-# Signatures only, no implementations
+# Use summary mode (signatures only)
 ast-doc . --strategy summary
-```
 
-### Core Files Protection
+# Strip tests
+ast-doc . --strategy no-tests
 
-```bash
-# Core files always use Full strategy, never degraded
+# Protect core files from degradation
 ast-doc . --core "src/main.rs" --core "src/lib.rs" --strategy summary
-```
-
-### File Filtering
-
-```bash
-# Include only Rust files
-ast-doc . --include "*.rs"
-
-# Exclude test files
-ast-doc . --exclude "*test*"
 
-# Combine include/exclude
+# Filter files
 ast-doc . --include "*.rs" --exclude "target/**"
-```
-
-### Git and Tree Options
-
-```bash
-# Skip git context
-ast-doc . --no-git
-
-# Skip directory tree
-ast-doc . --no-tree
-
-# Copy to clipboard (not yet implemented)
-ast-doc . --copy
-```
 
-### Verbose Logging
-
-```bash
-ast-doc . --verbose
+# Skip git context and directory tree
+ast-doc . --no-git --no-tree
 ```
 
-## Quick Start (Development)
+## Development
 
 ```bash
+# Install tools
 just setup
-just check
-just test
-just bdd
-just test-all
-
-# Run the CLI
-cargo run -p ast-doc -- --help
-cargo run -p ast-doc -- .
-```
-
-## Testing Matrix
 
-- BDD via `features/*.feature` plus `just bdd` remains the acceptance contract.
-- Example-based crate-local unit tests remain the default inner loop for named business cases and edge cases.
-- `proptest` lives in the ordinary `cargo test` path when the rule is an invariant across many valid inputs.
-- Advanced modes are opt-in: use `cargo-fuzz` only for hostile-input or `unsafe`-heavy crates, and add Criterion only when the work has a real performance target.
+# Run full CI
+just ci
 
-## BDD + TDD Workflow
-
-1. Write a failing Gherkin scenario in `features/*.feature`.
-2. Write a failing crate-local unit or property test in the affected crate to drive the inner loop.
-3. Implement the smallest shared Rust API needed to satisfy the test.
-4. Run `just test` to exercise deterministic unit tests and any `proptest` properties together.
-5. Re-run `just bdd` to confirm the acceptance scenario passes.
-
-Use example-based unit tests for named business cases and edge cases that should stay readable. Use `proptest` when the rule is an invariant, such as totals matching line-item arithmetic or checkout always emptying the cart.
-
-## Project Convention
-
-- Put executable crates under `bin/*`
-- Put reusable library crates under `crates/*`
-- Keep shared dependencies in root `[workspace.dependencies]`
-
-## Common Commands
-
-```bash
-just format
+# Individual steps
 just lint
 just test
 just bdd
-just test-all
 just build
 ```
 
-`just test` runs the usual `cargo test --all-features` flow, so colocated `proptest` coverage in crate test modules stays in the standard inner loop rather than a separate test layer.
+## Feature Flags
 
-## Conditional Benchmark Guidance
+| Feature | Description | Default |
+|---------|-------------|---------|
+| `lang-rust` | Rust parser | ✓ |
+| `lang-pack` | 50+ languages via tree-sitter-language-pack | ✓ |
+| `lang-python` | Python parser | ✗ |
+| `lang-typescript` | TypeScript/JavaScript parser | ✗ |
+| `lang-go` | Go parser | ✗ |
+| `lang-c` | C parser | ✗ |
+| `all-languages` | Enable all language parsers | ✗ |
+| `hotpath` | Profiling instrumentation | ✗ |
 
-Do not add Criterion or a benchmark scaffold to every new workspace by default. Most business logic and CRUD-style crate work should stay on the ordinary `just test` plus `just bdd` path unless the planned feature has an explicit latency SLA, throughput target, or known hot path worth measuring.
+## Testing
 
-When performance-sensitive code appears, add Criterion only in the affected crate and benchmark the hot path that carries the requirement. That keeps the default template lean while still using the standard Rust benchmark tool when the work genuinely needs measurement.
+- **BDD**: Gherkin scenarios in `features/*.feature`, run with `just bdd`
+- **Unit tests**: Colocated `#[cfg(test)]` modules, run with `just test`
+- **Property tests**: `proptest` in standard `cargo test` flow
 
-## Conditional Fuzzing Guidance
+## Project Structure
 
-Do not add `cargo-fuzz` targets to every new workspace by default. The standard Rust template is enough for ordinary business logic, CRUD-style services, and shared domain crates that only handle trusted or well-formed inputs.
-
-Reach for fuzzing when a specific crate starts handling hostile input or high-risk memory behavior, especially when it:
-
-- parses free-form text or file formats,
-- implements protocol framing or message decoding,
-- decodes binary formats or other untrusted payloads,
-- or relies on substantial `unsafe` code.
-
-When one of those conditions applies, enable fuzzing only in the affected crate and use the normal Cargo workflow rather than baking a `fuzz/` directory into every starter:
-
-```bash
-cd crates/<crate-name>
-cargo fuzz init
-cargo fuzz run <target-name>
+```text
+bin/          CLI binary crates
+crates/       Reusable library crates
+features/     BDD Gherkin scenarios
 ```
 
-That keeps the default template lean while still pointing parser-like, protocol, binary-decoding, or `unsafe`-heavy crates to the standard `cargo-fuzz` layout when they actually need it.
-
 ## License
 
 Apache-2.0
@@ -107,7 +107,7 @@ pub fn build_config(args: &Args) -> ast_doc_core::AstDocConfig {
 
 fn main() -> Result<()> {
     #[cfg(feature = "hotpath")]
-    let _guard = hotpath::GuardBuilder::new("main").build();
+    let _guard = hotpath::HotpathGuardBuilder::new("main").build();
 
     let args = Args::parse();
 
 
@@ -18,11 +18,13 @@ all-languages = [
     "lang-typescript",
     "lang-go",
     "lang-c",
+    "lang-pack",
 ]
-default = ["lang-rust"]
+default = ["lang-rust", "lang-pack"]
 hotpath = ["dep:hotpath"]
 lang-c = ["tree-sitter-c"]
 lang-go = ["tree-sitter-go"]
+lang-pack = ["tree-sitter-language-pack"]
 lang-python = ["tree-sitter-python"]
 lang-rust = ["tree-sitter-rust"]
 lang-typescript = ["tree-sitter-typescript"]
@@ -42,6 +44,7 @@ tracing = { workspace = true }
 tree-sitter = { workspace = true }
 tree-sitter-c = { workspace = true, optional = true }
 tree-sitter-go = { workspace = true, optional = true }
+tree-sitter-language-pack = { workspace = true, optional = true }
 tree-sitter-python = { workspace = true, optional = true }
 tree-sitter-rust = { workspace = true, optional = true }
 tree-sitter-typescript = { workspace = true, optional = true }
 
@@ -0,0 +1,7 @@
+# Seeds for failure cases proptest has generated in the past. It is
+# automatically read and these particular cases re-run before any
+# novel cases are generated.
+#
+# It is recommended to check this file in to source control so that
+# everyone who runs the test benefits from these saved cases.
+cc d0fbd11ee1ec6c1952200f7bd57a632e17bb8de5cd741240aeace2c99899eb80 # shrinks to source = ""
@@ -76,18 +76,18 @@ pub fn run_ingestion(config: &AstDocConfig) -> Result<IngestionResult, AstDocErr
         let abs_path = root.join(rel_path);
         match std::fs::read_to_string(&abs_path) {
             Ok(content) => {
-                let language = crate::parser::detect_language(rel_path);
+                let lang = crate::parser::detect_language(rel_path);
                 let token_count = count_tokens(&content);
                 debug!(
                     path = %rel_path.display(),
-                    language = ?language,
+                    language = ?lang,
                     tokens = token_count,
                     "discovered file"
                 );
                 files.push(DiscoveredFile {
                     path: rel_path.clone(),
                     content,
-                    language,
+                    language: lang,
                     raw_token_count: token_count,
                 });
             }
 
@@ -76,7 +76,7 @@ pub fn run_pipeline(config: &AstDocConfig) -> eyre::Result<PipelineResult> {
     let parsed: Vec<ParsedFile> = ingestion
         .files
         .par_iter()
-        .filter_map(|f| f.language.map(|lang| (f, lang)))
+        .filter_map(|f| f.language.as_ref().map(|lang| (f, lang)))
         .map(|(f, lang)| parser::parse_file(f, lang).map_err(eyre::Report::from))
         .collect::<eyre::Result<Vec<_>>>()?;
 
@@ -134,6 +134,6 @@ fn count_tokens(text: &str) -> usize {
 #[cfg(all(test, feature = "hotpath"))]
 #[ctor::ctor]
 fn init_hotpath_for_tests() {
-    let _guard = hotpath::GuardBuilder::new("test").build();
+    let _guard = hotpath::HotpathGuardBuilder::new("test").build();
     std::mem::forget(_guard);
 }