diff --git a/.cursor/rules/log-analysis.mdc b/.cursor/rules/log-analysis.mdc new file mode 100644 index 0000000..cdfa1d0 --- /dev/null +++ b/.cursor/rules/log-analysis.mdc @@ -0,0 +1,21 @@ +--- +description: Use ctrlb-decompose to analyze log files before reading them raw +globs: **/*.log +alwaysApply: false +--- + +# Log File Analysis + +This project includes ctrlb-decompose, a log pattern extraction tool. When you encounter a `.log` file or are asked to analyze logs, run ctrlb-decompose first to get a structural summary: + +```bash +./target/release/ctrlb-decompose --llm --context 2 --quiet 2>/dev/null +``` + +This compresses raw log lines into patterns with typed variables, quantile stats (p50/p99), and anomaly detection. Errors and warnings are prioritized first. + +For JSON output: `./target/release/ctrlb-decompose --json --quiet 2>/dev/null` + +For piped input: `kubectl logs pod | ./target/release/ctrlb-decompose --llm --source-label "pod-name"` + +Workflow: decompose first, identify patterns of interest, then grep or use `--context N` with a higher N to see more examples. diff --git a/.cursorrules b/.cursorrules new file mode 100644 index 0000000..d76a2e0 --- /dev/null +++ b/.cursorrules @@ -0,0 +1,35 @@ +This repository contains ctrlb-decompose, a CLI tool that compresses raw log lines into structural patterns with typed variables, quantile statistics, anomaly detection, and temporal correlations. + +## Log Analysis + +When working with log files (>100 lines), run ctrlb-decompose first to get a structural summary before reading raw logs: + +```bash +./target/release/ctrlb-decompose --llm --context 2 --quiet 2>/dev/null +``` + +This compresses thousands of log lines into a handful of actionable patterns with: +- Error/warning patterns prioritized first +- Typed variables (IPv4, UUID, Duration, HexID, Integer, Enum, etc.) +- Quantile statistics (p50, p99, min, max) for numeric variables +- Example raw lines for each pattern + +Key flags: +- `--llm` — Token-efficient markdown output (default context=2, banner suppressed) +- `--json` — Structured JSON output for programmatic use +- `--source-label ` — Tag output with a source name (e.g., pod name) +- `--sim-threshold <0.0-1.0>` — Tune clustering granularity (default 0.5, lower = fewer patterns) +- `--top ` — Limit to top N patterns (default 20) +- `--context ` — Example lines per pattern + +Works with stdin: `kubectl logs pod | ./target/release/ctrlb-decompose --llm` + +Workflow: decompose first, identify patterns of interest, then drill into the raw file with grep or higher --context. + +## Build + +```bash +cargo build --release +cargo test --locked +cargo clippy +``` diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md new file mode 100644 index 0000000..a6356bf --- /dev/null +++ b/.github/copilot-instructions.md @@ -0,0 +1,25 @@ +This repository contains ctrlb-decompose, a CLI tool that compresses raw log lines into structural patterns with typed variables, quantile statistics, and anomaly detection. + +## Log Analysis + +When working with log files (>100 lines), run ctrlb-decompose first: + +```bash +./target/release/ctrlb-decompose --llm --context 2 --quiet 2>/dev/null +``` + +Key flags: +- `--llm` — Token-efficient markdown (errors first, quantile stats inline) +- `--json` — Structured JSON for programmatic use +- `--source-label ` — Tag output with source name +- `--sim-threshold <0.0-1.0>` — Tune clustering granularity (default 0.5) + +Works with stdin: `kubectl logs pod | ./target/release/ctrlb-decompose --llm` + +## Build & Test + +```bash +cargo build --release +cargo test --locked +cargo clippy +``` diff --git a/.gitignore b/.gitignore index 7d5320d..a275b9b 100644 --- a/.gitignore +++ b/.gitignore @@ -27,4 +27,8 @@ target /logs # WASM build output (regenerated by wasm-pack) -/web/pkg \ No newline at end of file +/web/pkg + +# Perf profiling data +perf.data +perf.data.old \ No newline at end of file diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..6bf141a --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,53 @@ +# AGENTS.md + +This file provides guidance to Codex and other AI agents working with code in this repository. + +## Project + +ctrlb-decompose compresses raw log lines into structural patterns with typed variables, quantile statistics, anomaly detection, and temporal correlations. It runs as a CLI tool, a Rust library, or a WASM module. + +## Build & Test + +```bash +cargo build --release +cargo test --locked +cargo clippy +``` + +## Log Analysis + +When working with log files (>100 lines), run ctrlb-decompose first to get a structural summary before reading raw logs: + +```bash +./target/release/ctrlb-decompose --llm --context 2 --quiet 2>/dev/null +``` + +Key flags: +- `--llm` — Token-efficient markdown output (banner auto-suppressed, default context=2) +- `--json` — Structured JSON for programmatic use +- `--source-label ` — Tag output with source name +- `--sim-threshold <0.0-1.0>` — Tune clustering (default 0.5, lower = fewer patterns) +- `--top ` — Top N patterns (default 20) +- `--context ` — Example lines per pattern + +Works with stdin: `kubectl logs pod | ./target/release/ctrlb-decompose --llm` + +Workflow: decompose first, identify patterns, then drill into raw logs with grep or higher `--context`. + +## Architecture + +Two-stage normalization + clustering pipeline (single-pass, streaming): + +1. Timestamp extraction (`src/timestamp.rs`) +2. CLP encoding (`src/extraction/clp/`) — normalizes variables into typed placeholders +3. Drain3 clustering (`src/extraction/drain3.rs`) — tree-based prefix clustering with LRU eviction +4. Variable classification — semantic types: IPv4, UUID, Duration, HexID, Integer, Float, Enum, String +5. Statistics (`src/stats.rs`) — DDSketch quantiles, HyperLogLog cardinality, top-k, reservoir sampling +6. Anomaly detection (`src/anomaly.rs`) — frequency spikes, error cascades, bimodal distributions +7. Scoring & correlation (`src/scoring.rs`, `src/correlation.rs`) +8. Output formatting (`src/format/`) — human, llm, json + +Entry points: +- CLI: `main.rs` -> `lib.rs::run(args)` +- Library: `lib.rs::process_log_text(input, opts)` +- WASM: `wasm.rs::analyze_logs(input, format, top_n, context_lines)` diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..0ab7ea8 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,93 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Project + +ctrlb-decompose compresses raw log lines into structural patterns with typed variables, quantile statistics, anomaly detection, and temporal correlations. It runs as a CLI tool, a Rust library, or a WASM module in the browser. + +## Build & Test Commands + +```bash +# Build +cargo build +cargo build --release + +# Test +cargo test --locked +cargo test # Run a single test + +# Lint +cargo clippy + +# Build without default features (library-only, no CLI) +cargo build --no-default-features + +# WASM build +wasm-pack build --target web --out-dir web/pkg -- --no-default-features --features wasm +``` + +## Architecture + +**Two-stage normalization + clustering pipeline** (single-pass, streaming): + +1. **Timestamp extraction** (`src/timestamp.rs`) — regex-based, stripped before further processing +2. **CLP encoding** (`src/extraction/clp/`) — normalizes variables (ints, floats, IPs, hex) into typed placeholders +3. **Drain3 clustering** (`src/extraction/drain3.rs`) — tree-based prefix clustering on logtypes with LRU eviction +4. **Variable classification** (`src/extraction/drain3.rs`) — merges CLP-decoded values with Drain3 wildcards, classifies into semantic types (IPv4, UUID, Duration, HexID, Integer, Float, Enum, String, etc.) +5. **Statistics** (`src/stats.rs`) — DDSketch quantiles (~200 bytes/slot), HyperLogLog++ cardinality, top-k, temporal bucketing, reservoir-sampled examples +6. **Anomaly detection** (`src/anomaly.rs`) — frequency spikes, error cascades, bimodal distributions, low cardinality +7. **Scoring & correlation** (`src/scoring.rs`, `src/correlation.rs`) — keyword severity, Pearson temporal co-occurrence, shared variables +8. **Output formatting** (`src/format/`) — human (ANSI terminal), llm (compact markdown), json (structured) + +**Entry points:** +- CLI: `main.rs` → `lib.rs::run(args)` +- Library: `lib.rs::process_log_text(input, opts) -> AnalysisOutput` +- WASM: `wasm.rs::analyze_logs(input, format, top_n, context_lines) -> String` + +## Feature Gates + +- `cli` (default) — includes `clap` and `colored` for terminal use +- `wasm` — includes `wasm-bindgen` and `serde-wasm-bindgen` for browser use +- The core library is WASM-safe (no stdin/filesystem deps) +- Crate type is `["cdylib", "rlib"]` for dual WASM + library output + +## Key Design Decisions + +- **Single-pass streaming**: no second pass over data; all stats accumulated incrementally +- **Memory-bounded**: Drain3 LRU (default 10k clusters), DDSketch fixed-size quantiles, HyperLogLog++ fixed-size cardinality, reservoir sampling for examples +- **Lazy regex compilation**: `once_cell::sync::Lazy` for all regex patterns +- **Minimum Rust version**: 1.94.0 + +## Testing + +Tests are in `tests/integration.rs` using the fixture at `tests/fixtures/sample.log`. Snapshot testing uses `insta`. Benchmarks use `criterion` in `benches/pipeline.rs`. + +## CI + +- `ci.yml` — runs on push to main and PRs: `cargo test`, `clippy`, feature matrix (no-default-features, wasm target) +- `release.yml` — triggered by `v*.*.*` tags: cargo-dist multi-platform binaries + homebrew +- `wasm-deploy.yml` — deploys WASM build to GitHub Pages on push to main + +## Using ctrlb-decompose for log analysis + +When working with large log files (>100 lines), use ctrlb-decompose to get a structured summary before reading raw logs. This saves context window and surfaces errors/anomalies immediately. + +```bash +# Analyze a log file (LLM-optimized output, 2 example lines per pattern) +./target/release/ctrlb-decompose --llm --context 2 --quiet 2>/dev/null + +# Pipe from any command +kubectl logs | ./target/release/ctrlb-decompose --llm --context 2 --quiet --source-label "pod-name" 2>/dev/null +journalctl -n 5000 | ./target/release/ctrlb-decompose --llm --context 2 --quiet 2>/dev/null + +# JSON output for programmatic use +./target/release/ctrlb-decompose --json --quiet 2>/dev/null + +# Tune clustering granularity (lower = more aggressive merging, higher = more patterns) +./target/release/ctrlb-decompose --llm --sim-threshold 0.6 +``` + +In LLM mode, the banner is suppressed automatically. The `--quiet` flag suppresses the progress line on stderr. + +**Workflow**: Run `--llm` first to identify patterns of interest, then use `--context N` with higher N or grep for specific patterns in the raw file. diff --git a/Cargo.lock b/Cargo.lock index aa80da9..2bfe6e1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -94,12 +94,36 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" +[[package]] +name = "bit-set" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3" +dependencies = [ + "bit-vec", +] + +[[package]] +name = "bit-vec" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7" + [[package]] name = "bitflags" version = "2.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af" +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + [[package]] name = "bumpalo" version = "3.20.2" @@ -242,6 +266,15 @@ version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" +[[package]] +name = "cpufeatures" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" +dependencies = [ + "libc", +] + [[package]] name = "criterion" version = "0.5.1" @@ -309,6 +342,16 @@ version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" +[[package]] +name = "crypto-common" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" +dependencies = [ + "generic-array", + "typenum", +] + [[package]] name = "ctrlb-decompose" version = "0.1.0" @@ -319,11 +362,12 @@ dependencies = [ "colored", "criterion", "fastrand", - "getrandom", + "getrandom 0.4.2", "hyperloglogplus", "insta", "lru", "once_cell", + "proptest", "regex", "serde", "serde-wasm-bindgen", @@ -332,6 +376,16 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", +] + [[package]] name = "either" version = "1.15.0" @@ -372,12 +426,40 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + [[package]] name = "foldhash" version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + +[[package]] +name = "getrandom" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" +dependencies = [ + "cfg-if", + "libc", + "r-efi 5.3.0", + "wasip2", +] + [[package]] name = "getrandom" version = "0.4.2" @@ -387,7 +469,7 @@ dependencies = [ "cfg-if", "js-sys", "libc", - "r-efi", + "r-efi 6.0.0", "wasip2", "wasip3", "wasm-bindgen", @@ -492,6 +574,8 @@ checksum = "7b4a6248eb93a4401ed2f37dfe8ea592d3cf05b7cf4f8efa867b6895af7e094e" dependencies = [ "console", "once_cell", + "pest", + "pest_derive", "serde", "similar", "tempfile", @@ -611,6 +695,49 @@ version = "11.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" +[[package]] +name = "pest" +version = "2.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0848c601009d37dfa3430c4666e147e49cdcf1b92ecd3e63657d8a5f19da662" +dependencies = [ + "memchr", + "ucd-trie", +] + +[[package]] +name = "pest_derive" +version = "2.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11f486f1ea21e6c10ed15d5a7c77165d0ee443402f0780849d1768e7d9d6fe77" +dependencies = [ + "pest", + "pest_generator", +] + +[[package]] +name = "pest_generator" +version = "2.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8040c4647b13b210a963c1ed407c1ff4fdfa01c31d6d2a098218702e6664f94f" +dependencies = [ + "pest", + "pest_meta", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "pest_meta" +version = "2.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89815c69d36021a140146f26659a81d6c2afa33d216d736dd4be5381a7362220" +dependencies = [ + "pest", + "sha2", +] + [[package]] name = "plotters" version = "0.3.7" @@ -639,6 +766,15 @@ dependencies = [ "plotters-backend", ] +[[package]] +name = "ppv-lite86" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +dependencies = [ + "zerocopy", +] + [[package]] name = "prettyplease" version = "0.2.37" @@ -658,6 +794,31 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "proptest" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b45fcc2344c680f5025fe57779faef368840d0bd1f42f216291f0dc4ace4744" +dependencies = [ + "bit-set", + "bit-vec", + "bitflags", + "num-traits", + "rand", + "rand_chacha", + "rand_xorshift", + "regex-syntax", + "rusty-fork", + "tempfile", + "unarray", +] + +[[package]] +name = "quick-error" +version = "1.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" + [[package]] name = "quote" version = "1.0.45" @@ -667,12 +828,56 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "r-efi" +version = "5.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" + [[package]] name = "r-efi" version = "6.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf" +[[package]] +name = "rand" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" +dependencies = [ + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c" +dependencies = [ + "getrandom 0.3.4", +] + +[[package]] +name = "rand_xorshift" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "513962919efc330f829edb2535844d1b912b0fbe2ca165d613e4e8788bb05a5a" +dependencies = [ + "rand_core", +] + [[package]] name = "rayon" version = "1.11.0" @@ -741,6 +946,18 @@ version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" +[[package]] +name = "rusty-fork" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc6bf79ff24e648f6da1f8d1f011e9cac26491b619e6b9280f2b47f1774e6ee2" +dependencies = [ + "fnv", + "quick-error", + "tempfile", + "wait-timeout", +] + [[package]] name = "same-file" version = "1.0.6" @@ -810,6 +1027,17 @@ dependencies = [ "zmij", ] +[[package]] +name = "sha2" +version = "0.10.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + [[package]] name = "shlex" version = "1.3.0" @@ -852,7 +1080,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32497e9a4c7b38532efcdebeef879707aa9f794296a4f0244f6f69e9bc8574bd" dependencies = [ "fastrand", - "getrandom", + "getrandom 0.4.2", "once_cell", "rustix", "windows-sys 0.61.2", @@ -868,6 +1096,24 @@ dependencies = [ "serde_json", ] +[[package]] +name = "typenum" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" + +[[package]] +name = "ucd-trie" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2896d95c02a80c6d6a5d6e953d479f5ddf2dfdb6a244441010e373ac0fb88971" + +[[package]] +name = "unarray" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eaea85b334db583fe3274d12b4cd1880032beab409c0d774be044d4480ab9a94" + [[package]] name = "unicode-ident" version = "1.0.24" @@ -886,6 +1132,21 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + +[[package]] +name = "wait-timeout" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ac3b126d3914f9849036f826e054cbabdc8519970b8998ddaf3b5bd3c65f11" +dependencies = [ + "libc", +] + [[package]] name = "walkdir" version = "2.5.0" diff --git a/Cargo.toml b/Cargo.toml index 47880aa..86dd480 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -41,8 +41,9 @@ wasm-bindgen = { version = "0.2", optional = true } serde-wasm-bindgen = { version = "0.6", optional = true } [dev-dependencies] -insta = { version = "1", features = ["json"] } +insta = { version = "1", features = ["json", "redactions"] } criterion = { version = "0.5", features = ["html_reports"] } +proptest = "1" [[bench]] name = "pipeline" diff --git a/README.md b/README.md index 8b7d4a0..4491cf6 100644 --- a/README.md +++ b/README.md @@ -206,6 +206,38 @@ Options: --- +## Use Cases + +ctrlb-decompose is useful for: + +- **Incident triage** — Surface error patterns, p99 spikes, and host concentration in thousands of log lines. See [docs/use-cases.md](docs/use-cases.md#incident-triage). +- **Postmortem summary** — Generate LLM-friendly summaries for automated timeline reconstruction. See [docs/use-cases.md](docs/use-cases.md#postmortem-summary). +- **CI/CD log triage** — Find failure patterns buried in verbose build output. See [docs/use-cases.md](docs/use-cases.md#cicd-log-triage). +- **Monitoring** — Use `--json` for programmatic alerting thresholds. See [docs/use-cases.md](docs/use-cases.md#monitoring--alerting). +- **Log comparison** — Diff patterns before and after a deployment. See [docs/use-cases.md](docs/use-cases.md#log-comparison). + +## Integration + +ctrlb-decompose pipes into any workflow: + +```bash +# Kubernetes pods +kubectl logs -l app=api --since=1h | ctrlb-decompose --llm --source-label "api" + +# systemd services +journalctl -u nginx --since "1 hour ago" --no-pager | ctrlb-decompose --llm + +# Docker Compose +docker compose logs api db --since 30m | ctrlb-decompose --llm + +# CI failure summary +cargo test 2>&1 | ctrlb-decompose --llm --top 5 --context 2 +``` + +See [docs/integration.md](docs/integration.md) for detailed recipes including Claude Code setup, GitHub Actions, SSH remote analysis, and JSON programmatic use. + +--- + ## License [MIT](LICENSE) diff --git a/benches/compare_drain3.sh b/benches/compare_drain3.sh new file mode 100755 index 0000000..753aa5e --- /dev/null +++ b/benches/compare_drain3.sh @@ -0,0 +1,246 @@ +#!/usr/bin/env bash +# +# compare_drain3.sh - Benchmark ctrlb-decompose (Rust) against Drain3 (Python) +# +# Generates log files at several scales, runs both tools, and prints a +# markdown comparison table with wall-clock time, peak RSS, and throughput. +# +# Requirements: cargo, python3, drain3 (pip), /usr/bin/time (GNU), bc +# +set -euo pipefail + +# ── Configuration ──────────────────────────────────────────────────────────── + +SCALES=(1000 10000 100000 1000000) +SEED=42 +BENCH_DIR="/tmp/ctrlb-bench" +TIME_BIN="/usr/bin/time" +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)" + +CTRLB_BIN="$PROJECT_DIR/target/release/ctrlb-decompose" +GENLOG_BIN="$PROJECT_DIR/target/release/examples/generate_logs" +DRAIN3_SCRIPT="$PROJECT_DIR/benches/drain3_baseline.py" + +# ── Dependency checks ──────────────────────────────────────────────────────── + +check_deps() { + local missing=() + + if ! command -v cargo &>/dev/null; then + missing+=("cargo") + fi + + if ! command -v python3 &>/dev/null; then + missing+=("python3") + fi + + if ! command -v bc &>/dev/null; then + missing+=("bc") + fi + + if [[ ! -x "$TIME_BIN" ]]; then + missing+=("/usr/bin/time (GNU time)") + fi + + if python3 -c "import drain3" 2>/dev/null; then + : + else + missing+=("drain3 (pip install drain3)") + fi + + if (( ${#missing[@]} > 0 )); then + echo "ERROR: missing dependencies:" >&2 + for dep in "${missing[@]}"; do + echo " - $dep" >&2 + done + exit 1 + fi +} + +# ── Build ───────────────────────────────────────────────────────────────────── + +build() { + echo "==> Building ctrlb-decompose and log generator..." + (cd "$PROJECT_DIR" && cargo build --release --example generate_logs 2>&1) + (cd "$PROJECT_DIR" && cargo build --release 2>&1) + + if [[ ! -x "$CTRLB_BIN" ]]; then + echo "ERROR: $CTRLB_BIN not found after build" >&2 + exit 1 + fi + if [[ ! -x "$GENLOG_BIN" ]]; then + echo "ERROR: $GENLOG_BIN not found after build" >&2 + exit 1 + fi +} + +# ── Log generation ──────────────────────────────────────────────────────────── + +generate_logs() { + mkdir -p "$BENCH_DIR" + for n in "${SCALES[@]}"; do + local logfile="$BENCH_DIR/bench-${n}.log" + if [[ -f "$logfile" ]]; then + echo "==> Reusing existing $logfile" + else + echo "==> Generating $logfile ($n lines)..." + "$GENLOG_BIN" --lines "$n" --seed "$SEED" > "$logfile" + fi + done +} + +# ── Time parsing helpers ────────────────────────────────────────────────────── + +# Parse GNU time "Elapsed (wall clock)" value to seconds. +# Formats: "M:SS.ss" or "H:MM:SS.ss" or "H:MM:SS" +parse_wall_clock() { + local raw="$1" + # Strip any leading/trailing whitespace + raw="$(echo "$raw" | xargs)" + + local parts + IFS=':' read -ra parts <<< "$raw" + + if (( ${#parts[@]} == 2 )); then + # M:SS.ss + local minutes="${parts[0]}" + local seconds="${parts[1]}" + echo "$(echo "$minutes * 60 + $seconds" | bc -l)" + elif (( ${#parts[@]} == 3 )); then + # H:MM:SS.ss + local hours="${parts[0]}" + local minutes="${parts[1]}" + local seconds="${parts[2]}" + echo "$(echo "$hours * 3600 + $minutes * 60 + $seconds" | bc -l)" + else + echo "0" + fi +} + +# Extract wall clock time string from GNU time -v output file +extract_wall_clock() { + local timefile="$1" + grep "Elapsed (wall clock)" "$timefile" | sed 's/.*): //' +} + +# Extract max RSS in KB from GNU time -v output file +extract_rss_kb() { + local timefile="$1" + grep "Maximum resident set size" "$timefile" | awk '{print $NF}' +} + +# ── Benchmark runner ────────────────────────────────────────────────────────── + +# Run a single benchmark, capture time metrics. +# Usage: run_bench