diff --git a/.continue/prompts/new-prompt.md b/.continue/prompts/new-prompt.md new file mode 100644 index 00000000..9fd5bf20 --- /dev/null +++ b/.continue/prompts/new-prompt.md @@ -0,0 +1,7 @@ +--- +name: New prompt +description: New prompt +invokable: true +--- + +Please write a thorough suite of unit tests for this code, making sure to cover all relevant edge cases \ No newline at end of file diff --git a/ENHANCEMENT_PLAN.md b/ENHANCEMENT_PLAN.md new file mode 100644 index 00000000..2b1f1950 --- /dev/null +++ b/ENHANCEMENT_PLAN.md @@ -0,0 +1,104 @@ +# Documentation Enhancement Plan + +## Current State Analysis + +Based on CONTENT_GAPS_ANALYSIS.md: +- 201 tutorials total +- 198 with exactly 8 chapters +- 3 with >8 chapters (n8n-mcp, langchain, ag2) +- 0 with 0 chapters +- 0 with partial chapter coverage + +## Enhancement Strategy + +### Phase 1: High-Traffic Tutorial Regeneration +**Priority**: Top 10 tutorials by stars from `discoverability/tutorial-source-verification.json` + +| Tutorial | Stars | Repo | Status | +|----------|-------|------|--------| +| openclaw/openclaw | 341,130 | openclaw/openclaw | Need regeneration | +| facebook/react | 244,271 | facebook/react | Need regeneration | +| n8n-io/n8n | 181,679 | n8n-io/n8n | Need regeneration | +| ollama/ollama | 166,451 | ollama/ollama | Need regeneration | +| huggingface/transformers | 158,545 | huggingface/transformers | Need regeneration | +| langflow-ai/langflow | 146,399 | langflow-ai/langflow | Need regeneration | +| langgenius/dify | 134,981 | langgenius/dify | Need regeneration | +| anomalyco/opencode | 132,650 | anomalyco/opencode | Need regeneration | +| langchain-ai/langchain | 131,599 | langchain-ai/langchain | Need regeneration | +| open-webui/open-webui | 129,246 | open-webui/open-webui | Need regeneration | + +### Phase 2: Missing High-Impact Tutorials +**Priority**: Add tutorials for trending OSS projects not yet covered + +**Candidates** (check GitHub for stars > 10K): +- Vercel AI SDK (22K+ stars) - Already covered +- Browser Use (85K+ stars) - Already covered +- Claude Code (84K+ stars) - Already covered +- Model Context Protocol servers (82K+ stars) - Already covered +- Infiniflow RAGFlow (76K+ stars) - Already covered +- vLLM (74K+ stars) - Already covered + +**New additions needed**: +- Check GitHub for trending repos in AI/agents space +- Focus on repos with recent activity (pushed_at in last 30 days) +- Target repos with documentation gaps + +### Phase 3: Content Gap Resolution +**Priority**: Fill missing code examples and depth + +**Issues to fix**: +1. Tutorials with <100 lines in chapters (already addressed in commit 5bda1be) +2. Missing Mermaid diagrams in architecture chapters +3. Inconsistent code example quality across tutorials +4. Missing production deployment examples + +### Phase 4: Source Code Extraction Improvements +**Priority**: Enhance the regeneration script + +**Improvements needed**: +1. Better file prioritization (focus on core modules) +2. Handle more file types (`.md`, `.json`, `.yaml`, `.toml`) +3. Better abstraction detection for different languages +4. Add test file extraction for usage examples +5. Better Mermaid diagram generation from code structure + +## Execution Plan + +### Step 1: Regenerate High-Traffic Tutorials +```bash +# Run regeneration on top 10 tutorials +python scripts/regenerate_tutorial_chapters.py \ + --slugs openclaw,facebook-react,n8n,ollama,huggingface-transformers,langflow,dify,opencode,langchain,open-webui +``` + +### Step 2: Add New Tutorials +1. Identify 5-10 missing high-impact repos +2. Create tutorial directories with proper structure +3. Add to `llms.txt` and `llms-full.txt` +4. Update `discoverability/tutorial-source-verification.json` + +### Step 3: Fix Content Gaps +1. Review tutorials with low chapter counts +2. Add missing code examples from source repos +3. Add Mermaid diagrams where missing +4. Ensure consistent production examples + +### Step 4: Improve Source Extraction +1. Update `regenerate_tutorial_chapters.py` +2. Add better file filtering logic +3. Enhance abstraction detection +4. Add diagram generation from code structure + +### Step 5: Quality Verification +```bash +# Run health checks +python scripts/docs_health.py +``` + +## Success Metrics + +- [ ] All top 10 tutorials have real code examples from source repos +- [ ] 5-10 new high-impact tutorials added +- [ ] 0 tutorials with placeholder content +- [ ] All tutorials pass docs_health.py checks +- [ ] Source extraction script handles 95%+ of file types diff --git a/README.md b/README.md index 67117f8f..21071499 100644 --- a/README.md +++ b/README.md @@ -1,21 +1,16 @@ +
block */
- heightClass?: string;
-
- /**
- * When true, vertically center the content and copy button – useful for
- * single-line shell commands shown inside a short container (e.g. FAQ).
- */
- centerVertically?: boolean;
-}
+import CodeExample from "@/components/CodeExample";
-export const HERO_AGENTS_MD = `# AGENTS.md
-
-## Setup commands
-- Install deps: \`pnpm install\`
-- Start dev server: \`pnpm dev\`
-- Run tests: \`pnpm test\`
-
-## Code style
-- TypeScript strict mode
-- Single quotes, no semicolons
-- Use functional patterns where possible`;
+interface FAQItem {
+ question: string;
+ answer: React.ReactNode;
+}
-const EXAMPLE_AGENTS_MD = `# Sample AGENTS.md file
+export default function FAQ() {
+ const faqItems: FAQItem[] = [
+ {
+ question: "Are there required fields?",
+ answer:
+ "No. AGENTS.md is just standard Markdown. Use any headings you like; the agent simply parses the text you provide.",
+ },
+ {
+ question: "What if instructions conflict?",
+ answer:
+ "The closest AGENTS.md to the edited file wins; explicit user chat prompts override everything.",
+ },
+ {
+ question: "Will the agent run testing commands found in AGENTS.md automatically?",
+ answer:
+ "Yes—if you list them. The agent will attempt to execute relevant programmatic checks and fix failures before finishing the task.",
+ },
+ {
+ question: "Can I update it later?",
+ answer: "Absolutely. Treat AGENTS.md as living documentation.",
+ },
+ {
+ question: "How do I migrate existing docs to AGENTS.md?",
+ answer: (
+ <>
```
-This function is important because it defines how AGENTS.md Tutorial: Open Standard for Coding-Agent Guidance in Repositories implements the patterns covered in this chapter.
+This interface is important because it defines how AGENTS.md Tutorial: Open Standard for Coding-Agent Guidance in Repositories implements the patterns covered in this chapter.
## How These Components Connect
```mermaid
flowchart TD
- A[parseMarkdown]
- B[renderLineWithInlineCode]
- C[CodeExample]
+ A[Hero]
+ B[FAQ]
+ C[FAQItem]
A --> B
B --> C
```
diff --git a/tutorials/agents-md-tutorial/04-repository-structure-and-scope-strategy.md b/tutorials/agents-md-tutorial/04-repository-structure-and-scope-strategy.md
index 909b1ba3..2dc30038 100644
--- a/tutorials/agents-md-tutorial/04-repository-structure-and-scope-strategy.md
+++ b/tutorials/agents-md-tutorial/04-repository-structure-and-scope-strategy.md
@@ -37,141 +37,10 @@ You now can scale AGENTS.md patterns from small repos to monorepos.
Next: [Chapter 5: Testing, Linting, and CI Alignment](05-testing-linting-and-ci-alignment.md)
-## Depth Expansion Playbook
-
## Source Code Walkthrough
-### `components/CodeExample.tsx`
-
-The `CodeExampleProps` interface in [`components/CodeExample.tsx`](https://github.com/agentsmd/agents.md/blob/HEAD/components/CodeExample.tsx) handles a key part of this chapter's functionality:
-
-```tsx
-import CopyIcon from "./icons/CopyIcon";
-
-interface CodeExampleProps {
- /** Markdown content to display; falls back to default example if not provided */
- code?: string;
- /** Optional URL for "View on GitHub" link */
- href?: string;
- /** If true, render only the code block without the section wrapper */
- compact?: boolean;
- /** Override Tailwind height classes for the block */
- heightClass?: string;
-
- /**
- * When true, vertically center the content and copy button – useful for
- * single-line shell commands shown inside a short container (e.g. FAQ).
- */
- centerVertically?: boolean;
-}
-
-export const HERO_AGENTS_MD = `# AGENTS.md
-
-## Setup commands
-- Install deps: \`pnpm install\`
-- Start dev server: \`pnpm dev\`
-- Run tests: \`pnpm test\`
-
-## Code style
-- TypeScript strict mode
-- Single quotes, no semicolons
-- Use functional patterns where possible`;
-
-const EXAMPLE_AGENTS_MD = `# Sample AGENTS.md file
-```
-
-This interface is important because it defines how AGENTS.md Tutorial: Open Standard for Coding-Agent Guidance in Repositories implements the patterns covered in this chapter.
-
-### `components/CompatibilitySection.tsx`
-
-The `LogoItem` function in [`components/CompatibilitySection.tsx`](https://github.com/agentsmd/agents.md/blob/HEAD/components/CompatibilitySection.tsx) handles a key part of this chapter's functionality:
-
-```tsx
-};
-
-type LogoItemProps = AgentEntry & {
- variant?: "marquee" | "grid";
-};
-
-function LogoItem({
- name,
- url,
- from,
- imageSrc,
- imageSrcLight,
- imageSrcDark,
- variant = "marquee",
-}: LogoItemProps) {
- const baseClasses =
- variant === "grid"
- ? "flex h-full w-full min-w-0 items-center gap-4"
- : "flex h-20 min-w-[280px] items-center gap-4 pr-10";
-
- return (
-
-
- {imageSrcLight && imageSrcDark ? (
- <>
- [...agents, ...agents], [agents]);
-
- if (doubledAgents.length === 0) {
- return null;
- }
-
- const trackStyle = {
- animationPlayState: isActive ? "running" : "paused",
- animationDelay: offset ? `${offset}s` : undefined,
- "--marquee-duration": `${duration}s`,
- } as React.CSSProperties;
-
- return (
-
-
- {doubledAgents.map((agent, index) => (
-```
-
-This function is important because it defines how AGENTS.md Tutorial: Open Standard for Coding-Agent Guidance in Repositories implements the patterns covered in this chapter.
-
-
-## How These Components Connect
-
-```mermaid
-flowchart TD
- A[CodeExampleProps]
- B[LogoItem]
- C[LogoMarqueeRow]
- A --> B
- B --> C
-```
+### `AGENTS.md`
+
+Repository structure and scope decisions are visible in the [`AGENTS.md`](https://github.com/agentsmd/agents.md/blob/HEAD/AGENTS.md) specification itself. The file lives at the repository root, which is the standard location agents look for first. The specification notes that sub-directory AGENTS.md files override or extend the root file for narrower scopes — observe how the root file deliberately keeps scope broad enough to serve the whole project.
+
+Cross-reference the upstream repo’s directory layout with the guidance in the root `AGENTS.md` to see how structure and scope choices interact in a real project.
diff --git a/tutorials/agents-md-tutorial/05-testing-linting-and-ci-alignment.md b/tutorials/agents-md-tutorial/05-testing-linting-and-ci-alignment.md
index 3d045eb0..88b8ed0d 100644
--- a/tutorials/agents-md-tutorial/05-testing-linting-and-ci-alignment.md
+++ b/tutorials/agents-md-tutorial/05-testing-linting-and-ci-alignment.md
@@ -37,141 +37,10 @@ You now can align AGENTS.md behavior with enforceable CI outcomes.
Next: [Chapter 6: Team Rollout and Adoption Playbook](06-team-rollout-and-adoption-playbook.md)
-## Depth Expansion Playbook
-
## Source Code Walkthrough
-### `components/CompatibilitySection.tsx`
-
-The `CompatibilitySection` function in [`components/CompatibilitySection.tsx`](https://github.com/agentsmd/agents.md/blob/HEAD/components/CompatibilitySection.tsx) handles a key part of this chapter's functionality:
-
-```tsx
-}
-
-export default function CompatibilitySection() {
- const containerRef = useRef(null);
- const [isInView, setIsInView] = useState(false);
- const [shuffledAgents, setShuffledAgents] = useState(agents);
- const [showGrid, setShowGrid] = useState(false);
-
- useEffect(() => {
- setShuffledAgents(shuffleAgents(agents));
- }, []);
-
- useEffect(() => {
- if (showGrid) {
- setIsInView(false);
- return;
- }
-
- const node = containerRef.current;
- if (!node) {
- return;
- }
-
- const observer = new IntersectionObserver(
- ([entry]) => {
- setIsInView(entry.isIntersecting && entry.intersectionRatio > 0);
- },
- {
- threshold: 0,
- }
- );
-
-```
-
-This function is important because it defines how AGENTS.md Tutorial: Open Standard for Coding-Agent Guidance in Repositories implements the patterns covered in this chapter.
-
-### `components/FAQSection.tsx`
-
-The `FAQ` function in [`components/FAQSection.tsx`](https://github.com/agentsmd/agents.md/blob/HEAD/components/FAQSection.tsx) handles a key part of this chapter's functionality:
-
-```tsx
-import CodeExample from "@/components/CodeExample";
-
-interface FAQItem {
- question: string;
- answer: React.ReactNode;
-}
-
-export default function FAQ() {
- const faqItems: FAQItem[] = [
- {
- question: "Are there required fields?",
- answer:
- "No. AGENTS.md is just standard Markdown. Use any headings you like; the agent simply parses the text you provide.",
- },
- {
- question: "What if instructions conflict?",
- answer:
- "The closest AGENTS.md to the edited file wins; explicit user chat prompts override everything.",
- },
- {
- question: "Will the agent run testing commands found in AGENTS.md automatically?",
- answer:
- "Yes—if you list them. The agent will attempt to execute relevant programmatic checks and fix failures before finishing the task.",
- },
- {
- question: "Can I update it later?",
- answer: "Absolutely. Treat AGENTS.md as living documentation.",
- },
- {
- question: "How do I migrate existing docs to AGENTS.md?",
- answer: (
- <>
-```
-
-This function is important because it defines how AGENTS.md Tutorial: Open Standard for Coding-Agent Guidance in Repositories implements the patterns covered in this chapter.
-
-### `components/FAQSection.tsx`
-
-The `FAQItem` interface in [`components/FAQSection.tsx`](https://github.com/agentsmd/agents.md/blob/HEAD/components/FAQSection.tsx) handles a key part of this chapter's functionality:
-
-```tsx
-import CodeExample from "@/components/CodeExample";
-
-interface FAQItem {
- question: string;
- answer: React.ReactNode;
-}
-
-export default function FAQ() {
- const faqItems: FAQItem[] = [
- {
- question: "Are there required fields?",
- answer:
- "No. AGENTS.md is just standard Markdown. Use any headings you like; the agent simply parses the text you provide.",
- },
- {
- question: "What if instructions conflict?",
- answer:
- "The closest AGENTS.md to the edited file wins; explicit user chat prompts override everything.",
- },
- {
- question: "Will the agent run testing commands found in AGENTS.md automatically?",
- answer:
- "Yes—if you list them. The agent will attempt to execute relevant programmatic checks and fix failures before finishing the task.",
- },
- {
- question: "Can I update it later?",
- answer: "Absolutely. Treat AGENTS.md as living documentation.",
- },
- {
- question: "How do I migrate existing docs to AGENTS.md?",
- answer: (
- <>
-```
-
-This interface is important because it defines how AGENTS.md Tutorial: Open Standard for Coding-Agent Guidance in Repositories implements the patterns covered in this chapter.
-
-
-## How These Components Connect
-
-```mermaid
-flowchart TD
- A[CompatibilitySection]
- B[FAQ]
- C[FAQItem]
- A --> B
- B --> C
-```
+### `AGENTS.md`
+
+The testing and CI alignment patterns described in this chapter are reflected in the [`AGENTS.md`](https://github.com/agentsmd/agents.md/blob/HEAD/AGENTS.md) file itself, which documents the project's own test and lint commands. Agents reading this file know exactly which commands to run before submitting changes — the same principle this chapter teaches you to apply in your own repositories.
+
+The upstream repo's [`package.json`](https://github.com/agentsmd/agents.md/blob/HEAD/package.json) shows how the commands listed in `AGENTS.md` map to actual scripts, demonstrating the link between the specification and the CI configuration.
diff --git a/tutorials/agents-md-tutorial/06-team-rollout-and-adoption-playbook.md b/tutorials/agents-md-tutorial/06-team-rollout-and-adoption-playbook.md
index a7deed9f..e7a221ac 100644
--- a/tutorials/agents-md-tutorial/06-team-rollout-and-adoption-playbook.md
+++ b/tutorials/agents-md-tutorial/06-team-rollout-and-adoption-playbook.md
@@ -38,141 +38,10 @@ You now have a practical rollout path for organization-wide AGENTS.md adoption.
Next: [Chapter 7: Governance, Versioning, and Drift Control](07-governance-versioning-and-drift-control.md)
-## Depth Expansion Playbook
-
## Source Code Walkthrough
-### `components/Hero.tsx`
-
-The `Hero` function in [`components/Hero.tsx`](https://github.com/agentsmd/agents.md/blob/HEAD/components/Hero.tsx) handles a key part of this chapter's functionality:
-
-```tsx
-import GitHubIcon from "@/components/icons/GitHubIcon";
-
-export default function Hero() {
- return (
-
-
- {/*
- On large screens we want the primary CTA buttons to align with the
- bottom edge of the code block rendered in the right column. Making
- the left column a full-height flex container and pushing the CTA row
- to the bottom (via `lg:justify-between`) achieves this without
- disturbing the natural flow on small screens where the layout stacks
- vertically.
- */}
-
- AGENTS.md
-
-
- A simple, open format for guiding coding agents,{" "}
-
- used by over{" "}
-
- 60k open-source projects
-
- .
-
-
-```
-
-This function is important because it defines how AGENTS.md Tutorial: Open Standard for Coding-Agent Guidance in Repositories implements the patterns covered in this chapter.
-
-### `components/WhySection.tsx`
-
-The `WhySection` function in [`components/WhySection.tsx`](https://github.com/agentsmd/agents.md/blob/HEAD/components/WhySection.tsx) handles a key part of this chapter's functionality:
-
-```tsx
-import LinkIcon from "@/components/icons/LinkIcon";
-
-export default function WhySection() {
- return (
-
-
-
- README.md files are for humans: quick starts, project descriptions,
- and contribution guidelines.
-
-
- AGENTS.md complements this by containing the extra, sometimes detailed
- context coding agents need: build steps, tests, and conventions that
- might clutter a README or aren’t relevant to human contributors.
-
- We intentionally kept it separate to:
-
-
-
-
-
- Give agents a clear, predictable place for instructions.
-
-
-
-
-```
-
-This function is important because it defines how AGENTS.md Tutorial: Open Standard for Coding-Agent Guidance in Repositories implements the patterns covered in this chapter.
-
-### `components/Section.tsx`
-
-The `Section` function in [`components/Section.tsx`](https://github.com/agentsmd/agents.md/blob/HEAD/components/Section.tsx) handles a key part of this chapter's functionality:
-
-```tsx
-import React from "react";
-
-export type SectionProps = React.PropsWithChildren<{
- id?: string;
- className?: string;
- title: string;
- /**
- * Center the heading and inner content horizontally (text-center).
- */
- center?: boolean;
- /**
- * Tailwind max-width utility to override the default container width.
- * e.g. "max-w-4xl". Defaults to "max-w-6xl".
- */
- maxWidthClass?: string;
-}>;
-
-export default function Section({
- className = "",
- id,
- title,
- children,
- center = false,
- maxWidthClass = "max-w-6xl",
-}: SectionProps) {
- const containerClasses = `${maxWidthClass} mx-auto flex flex-col gap-6`;
-
- return (
-
-
- B
- B --> C
-```
+### `AGENTS.md`
+
+Rollout success depends on the AGENTS.md file being discoverable and immediately useful. The [`AGENTS.md`](https://github.com/agentsmd/agents.md/blob/HEAD/AGENTS.md) in the upstream repo models the kind of concise, team-oriented guidance that generates early buy-in — short sections, plain language, and commands that are copy-pasteable without modification.
+
+Use the file's structure as a template when drafting the initial version you will socialize with your team. The [`README.md`](https://github.com/agentsmd/agents.md/blob/HEAD/README.md) also shows the talking points that have proven effective for explaining the standard to skeptical contributors.
diff --git a/tutorials/agents-md-tutorial/07-governance-versioning-and-drift-control.md b/tutorials/agents-md-tutorial/07-governance-versioning-and-drift-control.md
index bef72ea8..7d6f8294 100644
--- a/tutorials/agents-md-tutorial/07-governance-versioning-and-drift-control.md
+++ b/tutorials/agents-md-tutorial/07-governance-versioning-and-drift-control.md
@@ -37,110 +37,10 @@ You now have governance patterns to keep agent guidance accurate over time.
Next: [Chapter 8: Ecosystem Contribution and Standard Evolution](08-ecosystem-contribution-and-standard-evolution.md)
-## Depth Expansion Playbook
-
## Source Code Walkthrough
-### `components/ExamplesSection.tsx`
-
-The `ExamplesSection` function in [`components/ExamplesSection.tsx`](https://github.com/agentsmd/agents.md/blob/HEAD/components/ExamplesSection.tsx) handles a key part of this chapter's functionality:
-
-```tsx
-import ExampleListSection from "@/components/ExampleListSection";
-
-interface ExamplesSectionProps {
- contributorsByRepo: Record;
-}
-
-export default function ExamplesSection({ contributorsByRepo }: ExamplesSectionProps) {
- return (
-
- {/* Wide code example */}
-
-
-
-
- {/* Repo cards */}
-
-
- );
-}
-
-```
-
-This function is important because it defines how AGENTS.md Tutorial: Open Standard for Coding-Agent Guidance in Repositories implements the patterns covered in this chapter.
-
-### `components/ExamplesSection.tsx`
-
-The `ExamplesSectionProps` interface in [`components/ExamplesSection.tsx`](https://github.com/agentsmd/agents.md/blob/HEAD/components/ExamplesSection.tsx) handles a key part of this chapter's functionality:
-
-```tsx
-import ExampleListSection from "@/components/ExampleListSection";
-
-interface ExamplesSectionProps {
- contributorsByRepo: Record;
-}
-
-export default function ExamplesSection({ contributorsByRepo }: ExamplesSectionProps) {
- return (
-
- {/* Wide code example */}
-
-
-
-
- {/* Repo cards */}
-
-
- );
-}
-
-```
-
-This interface is important because it defines how AGENTS.md Tutorial: Open Standard for Coding-Agent Guidance in Repositories implements the patterns covered in this chapter.
-
-### `pages/_app.tsx`
-
-The `App` function in [`pages/_app.tsx`](https://github.com/agentsmd/agents.md/blob/HEAD/pages/_app.tsx) handles a key part of this chapter's functionality:
-
-```tsx
-import "@/styles/globals.css";
-import type { AppProps } from "next/app";
-import Head from "next/head";
-import { Analytics } from "@vercel/analytics/next";
-export default function App({ Component, pageProps }: AppProps) {
- return <>
-
- AGENTS.md
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- >;
-}
-
-```
-
-This function is important because it defines how AGENTS.md Tutorial: Open Standard for Coding-Agent Guidance in Repositories implements the patterns covered in this chapter.
-
-
-## How These Components Connect
-
-```mermaid
-flowchart TD
- A[ExamplesSection]
- B[ExamplesSectionProps]
- C[App]
- A --> B
- B --> C
-```
+### `AGENTS.md`
+
+Governance and drift control depend on treating `AGENTS.md` as a version-controlled artifact with the same discipline as code. The upstream [`AGENTS.md`](https://github.com/agentsmd/agents.md/blob/HEAD/AGENTS.md) itself is managed through standard pull requests and reviewed like any other file — the commit history for this file in the upstream repo illustrates how the specification evolves incrementally without breaking existing consumers.
+
+Review the git log for `AGENTS.md` in the upstream repository to see what kinds of changes are considered breaking versus additive, which directly informs the versioning strategy described in this chapter.
diff --git a/tutorials/agents-md-tutorial/08-ecosystem-contribution-and-standard-evolution.md b/tutorials/agents-md-tutorial/08-ecosystem-contribution-and-standard-evolution.md
index d463378e..b6e31c9a 100644
--- a/tutorials/agents-md-tutorial/08-ecosystem-contribution-and-standard-evolution.md
+++ b/tutorials/agents-md-tutorial/08-ecosystem-contribution-and-standard-evolution.md
@@ -38,127 +38,10 @@ You now have a full AGENTS.md playbook from local adoption to ecosystem contribu
Next tutorial: [OpenCode AI Legacy Tutorial](../opencode-ai-legacy-tutorial/)
-## Depth Expansion Playbook
-
## Source Code Walkthrough
-### `components/HowToUseSection.tsx`
-
-The `HowToUseSection` function in [`components/HowToUseSection.tsx`](https://github.com/agentsmd/agents.md/blob/HEAD/components/HowToUseSection.tsx) handles a key part of this chapter's functionality:
-
-```tsx
-import React from "react";
-
-export default function HowToUseSection() {
- const steps = [
- {
- title: "Add AGENTS.md",
- body: (
- <>
- Create an AGENTS.md file at the root of the repository. Most
- coding agents can even scaffold one for you if you ask nicely.
- >
- ),
- },
- {
- title: "Cover what matters",
- body: (
- <>
- Add sections that help an agent work effectively with your project. Popular choices:
-
- - Project overview
- - Build and test commands
- - Code style guidelines
- - Testing instructions
- - Security considerations
-
- >
- ),
- },
- {
- title: "Add extra instructions",
- body: "Commit messages or pull request guidelines, security gotchas, large datasets, deployment steps: anything you’d tell a new teammate belongs here too.",
- },
-```
-
-This function is important because it defines how AGENTS.md Tutorial: Open Standard for Coding-Agent Guidance in Repositories implements the patterns covered in this chapter.
-
-### `components/icons/GitHubIcon.tsx`
-
-The `GitHubIcon` function in [`components/icons/GitHubIcon.tsx`](https://github.com/agentsmd/agents.md/blob/HEAD/components/icons/GitHubIcon.tsx) handles a key part of this chapter's functionality:
-
-```tsx
-import React from "react";
-
-interface GitHubIconProps {
- className?: string;
-}
-
-// The path data is the official GitHub mark (see https://github.com/logos).
-export default function GitHubIcon({ className = "w-4 h-4" }: GitHubIconProps) {
- return (
-
- );
-}
-
-```
-
-This function is important because it defines how AGENTS.md Tutorial: Open Standard for Coding-Agent Guidance in Repositories implements the patterns covered in this chapter.
-
-### `components/icons/GitHubIcon.tsx`
-
-The `GitHubIconProps` interface in [`components/icons/GitHubIcon.tsx`](https://github.com/agentsmd/agents.md/blob/HEAD/components/icons/GitHubIcon.tsx) handles a key part of this chapter's functionality:
-
-```tsx
-import React from "react";
-
-interface GitHubIconProps {
- className?: string;
-}
-
-// The path data is the official GitHub mark (see https://github.com/logos).
-export default function GitHubIcon({ className = "w-4 h-4" }: GitHubIconProps) {
- return (
-
- );
-}
-
-```
-
-This interface is important because it defines how AGENTS.md Tutorial: Open Standard for Coding-Agent Guidance in Repositories implements the patterns covered in this chapter.
-
-
-## How These Components Connect
-
-```mermaid
-flowchart TD
- A[HowToUseSection]
- B[GitHubIcon]
- C[GitHubIconProps]
- A --> B
- B --> C
-```
+### `AGENTS.md` and `README.md`
+
+Contributing to the standard starts with the [`AGENTS.md`](https://github.com/agentsmd/agents.md/blob/HEAD/AGENTS.md) file and the [`README.md`](https://github.com/agentsmd/agents.md/blob/HEAD/README.md) in the upstream repository. The README describes the contribution process — opening issues to propose new conventions, submitting PRs against the spec file, and the review criteria used by maintainers.
+
+Before proposing a change to the standard, read the commit history and open issues in the upstream repository to understand which proposals have been accepted, rejected, or deferred, and what reasoning shaped those decisions.
diff --git a/tutorials/agno-tutorial/01-getting-started.md b/tutorials/agno-tutorial/01-getting-started.md
index 2904652a..22c9f576 100644
--- a/tutorials/agno-tutorial/01-getting-started.md
+++ b/tutorials/agno-tutorial/01-getting-started.md
@@ -52,186 +52,8 @@ You now have an Agno baseline with persistent memory and learning enabled.
Next: [Chapter 2: Framework Architecture](02-framework-architecture.md)
-## Depth Expansion Playbook
-
## Source Code Walkthrough
-### `cookbook/scripts/cookbook_runner.py`
-
-The `resolve_python_bin` function in [`cookbook/scripts/cookbook_runner.py`](https://github.com/agno-agi/agno/blob/HEAD/cookbook/scripts/cookbook_runner.py) handles a key part of this chapter's functionality:
-
-```py
-
-
-def resolve_python_bin(python_bin: str | None) -> str:
- if python_bin:
- return python_bin
- demo_python = Path(".venvs/demo/bin/python")
- if demo_python.exists():
- return demo_python.as_posix()
- return sys.executable
-
-
-def select_directory(base_directory: Path) -> Path | None:
- if inquirer is None:
- raise click.ClickException(
- "Interactive mode requires `inquirer`. Install it or use `--batch`."
- )
-
- current_dir = base_directory
- while True:
- items = [
- item.name
- for item in current_dir.iterdir()
- if item.is_dir() and item.name not in SKIP_DIR_NAMES
- ]
- items.sort()
- items.insert(0, "[Select this directory]")
- if current_dir != current_dir.parent:
- items.insert(1, "[Go back]")
-
- questions = [
- inquirer.List(
- "selected_item",
-```
-
-This function is important because it defines how Agno Tutorial: Multi-Agent Systems That Learn Over Time implements the patterns covered in this chapter.
-
-### `cookbook/scripts/cookbook_runner.py`
-
-The `select_directory` function in [`cookbook/scripts/cookbook_runner.py`](https://github.com/agno-agi/agno/blob/HEAD/cookbook/scripts/cookbook_runner.py) handles a key part of this chapter's functionality:
-
-```py
-
-
-def select_directory(base_directory: Path) -> Path | None:
- if inquirer is None:
- raise click.ClickException(
- "Interactive mode requires `inquirer`. Install it or use `--batch`."
- )
-
- current_dir = base_directory
- while True:
- items = [
- item.name
- for item in current_dir.iterdir()
- if item.is_dir() and item.name not in SKIP_DIR_NAMES
- ]
- items.sort()
- items.insert(0, "[Select this directory]")
- if current_dir != current_dir.parent:
- items.insert(1, "[Go back]")
-
- questions = [
- inquirer.List(
- "selected_item",
- message=f"Current directory: {current_dir.as_posix()}",
- choices=items,
- )
- ]
- answers = inquirer.prompt(questions)
- if not answers or "selected_item" not in answers:
- click.echo("No selection made. Exiting.")
- return None
+### `libs/agno/agno/agent/agent.py`
-```
-
-This function is important because it defines how Agno Tutorial: Multi-Agent Systems That Learn Over Time implements the patterns covered in this chapter.
-
-### `cookbook/scripts/cookbook_runner.py`
-
-The `list_python_files` function in [`cookbook/scripts/cookbook_runner.py`](https://github.com/agno-agi/agno/blob/HEAD/cookbook/scripts/cookbook_runner.py) handles a key part of this chapter's functionality:
-
-```py
-
-
-def list_python_files(base_directory: Path, recursive: bool) -> list[Path]:
- pattern = "**/*.py" if recursive else "*.py"
- files = []
- for path in sorted(base_directory.glob(pattern)):
- if not path.is_file():
- continue
- if path.name in SKIP_FILE_NAMES:
- continue
- if any(part in SKIP_DIR_NAMES for part in path.parts):
- continue
- files.append(path)
- return files
-
-
-def run_python_script(
- script_path: Path, python_bin: str, timeout_seconds: int
-) -> dict[str, object]:
- click.echo(f"Running {script_path.as_posix()} with {python_bin}")
- start = time.perf_counter()
- timed_out = False
- return_code = 1
- error_message = None
- try:
- completed = subprocess.run(
- [python_bin, script_path.as_posix()],
- check=False,
- timeout=timeout_seconds if timeout_seconds > 0 else None,
- text=True,
- )
- return_code = completed.returncode
-```
-
-This function is important because it defines how Agno Tutorial: Multi-Agent Systems That Learn Over Time implements the patterns covered in this chapter.
-
-### `cookbook/scripts/cookbook_runner.py`
-
-The `run_python_script` function in [`cookbook/scripts/cookbook_runner.py`](https://github.com/agno-agi/agno/blob/HEAD/cookbook/scripts/cookbook_runner.py) handles a key part of this chapter's functionality:
-
-```py
-
-
-def run_python_script(
- script_path: Path, python_bin: str, timeout_seconds: int
-) -> dict[str, object]:
- click.echo(f"Running {script_path.as_posix()} with {python_bin}")
- start = time.perf_counter()
- timed_out = False
- return_code = 1
- error_message = None
- try:
- completed = subprocess.run(
- [python_bin, script_path.as_posix()],
- check=False,
- timeout=timeout_seconds if timeout_seconds > 0 else None,
- text=True,
- )
- return_code = completed.returncode
- except subprocess.TimeoutExpired:
- timed_out = True
- error_message = f"Timed out after {timeout_seconds}s"
- return_code = 124
- click.echo(f"Timeout: {script_path.as_posix()} exceeded {timeout_seconds}s")
- except OSError as exc:
- error_message = str(exc)
- click.echo(f"Error running {script_path.as_posix()}: {exc}")
-
- duration = time.perf_counter() - start
- passed = return_code == 0 and not timed_out
- return {
- "script": script_path.as_posix(),
- "status": "PASS" if passed else "FAIL",
-```
-
-This function is important because it defines how Agno Tutorial: Multi-Agent Systems That Learn Over Time implements the patterns covered in this chapter.
-
-
-## How These Components Connect
-
-```mermaid
-flowchart TD
- A[resolve_python_bin]
- B[select_directory]
- C[list_python_files]
- D[run_python_script]
- E[run_with_retries]
- A --> B
- B --> C
- C --> D
- D --> E
-```
+The core `Agent` class in [`libs/agno/agno/agent/agent.py`](https://github.com/agno-agi/agno/blob/HEAD/libs/agno/agno/agent/agent.py) is the primary entry point for Chapter 1. Creating your first agent means instantiating this class with a model and optional tools. The constructor parameters map directly to the concepts introduced in the getting started chapter: `model`, `tools`, `instructions`, `markdown`, and `debug_mode`.
\ No newline at end of file
diff --git a/tutorials/agno-tutorial/02-framework-architecture.md b/tutorials/agno-tutorial/02-framework-architecture.md
index 50509c71..775ded7d 100644
--- a/tutorials/agno-tutorial/02-framework-architecture.md
+++ b/tutorials/agno-tutorial/02-framework-architecture.md
@@ -42,186 +42,8 @@ You now understand how Agno separates application logic from runtime and operati
Next: [Chapter 3: Learning, Memory, and State](03-learning-memory-and-state.md)
-## Depth Expansion Playbook
-
## Source Code Walkthrough
-### `cookbook/scripts/cookbook_runner.py`
-
-The `summarize_results` function in [`cookbook/scripts/cookbook_runner.py`](https://github.com/agno-agi/agno/blob/HEAD/cookbook/scripts/cookbook_runner.py) handles a key part of this chapter's functionality:
-
-```py
-
-
-def summarize_results(results: list[dict[str, object]]) -> dict[str, int]:
- passed = sum(1 for r in results if r["status"] == "PASS")
- failed = len(results) - passed
- timed_out = sum(1 for r in results if r["timed_out"])
- return {
- "total_scripts": len(results),
- "passed": passed,
- "failed": failed,
- "timed_out": timed_out,
- }
-
-
-def write_json_report(
- output_path: str,
- base_directory: Path,
- selected_directory: Path,
- mode: str,
- recursive: bool,
- python_bin: str,
- timeout_seconds: int,
- retries: int,
- results: list[dict[str, object]],
-) -> None:
- payload = {
- "generated_at": datetime.now(timezone.utc).isoformat(),
- "base_directory": base_directory.resolve().as_posix(),
- "selected_directory": selected_directory.resolve().as_posix(),
- "mode": mode,
- "recursive": recursive,
- "python_bin": python_bin,
-```
-
-This function is important because it defines how Agno Tutorial: Multi-Agent Systems That Learn Over Time implements the patterns covered in this chapter.
-
-### `cookbook/scripts/cookbook_runner.py`
-
-The `write_json_report` function in [`cookbook/scripts/cookbook_runner.py`](https://github.com/agno-agi/agno/blob/HEAD/cookbook/scripts/cookbook_runner.py) handles a key part of this chapter's functionality:
-
-```py
-
-
-def write_json_report(
- output_path: str,
- base_directory: Path,
- selected_directory: Path,
- mode: str,
- recursive: bool,
- python_bin: str,
- timeout_seconds: int,
- retries: int,
- results: list[dict[str, object]],
-) -> None:
- payload = {
- "generated_at": datetime.now(timezone.utc).isoformat(),
- "base_directory": base_directory.resolve().as_posix(),
- "selected_directory": selected_directory.resolve().as_posix(),
- "mode": mode,
- "recursive": recursive,
- "python_bin": python_bin,
- "timeout_seconds": timeout_seconds,
- "retries": retries,
- "summary": summarize_results(results),
- "results": results,
- }
- path = Path(output_path)
- path.parent.mkdir(parents=True, exist_ok=True)
- path.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
- click.echo(f"Wrote JSON report to {path.as_posix()}")
-
-
-def select_interactive_action() -> str | None:
-```
-
-This function is important because it defines how Agno Tutorial: Multi-Agent Systems That Learn Over Time implements the patterns covered in this chapter.
-
-### `cookbook/scripts/cookbook_runner.py`
-
-The `select_interactive_action` function in [`cookbook/scripts/cookbook_runner.py`](https://github.com/agno-agi/agno/blob/HEAD/cookbook/scripts/cookbook_runner.py) handles a key part of this chapter's functionality:
-
-```py
-
-
-def select_interactive_action() -> str | None:
- if inquirer is None:
- return None
- questions = [
- inquirer.List(
- "action",
- message="Some cookbooks failed. What would you like to do?",
- choices=["Retry failed scripts", "Exit with error log"],
- )
- ]
- answers = inquirer.prompt(questions)
- return answers.get("action") if answers else None
-
-
-@click.command()
-@click.argument(
- "base_directory",
- type=click.Path(exists=True, file_okay=False, dir_okay=True),
- default="cookbook",
-)
-@click.option(
- "--batch",
- is_flag=True,
- default=False,
- help="Non-interactive mode: run all scripts in the selected directory.",
-)
-@click.option(
- "--recursive/--no-recursive",
- default=False,
- help="Include Python scripts recursively under selected directory.",
-```
-
-This function is important because it defines how Agno Tutorial: Multi-Agent Systems That Learn Over Time implements the patterns covered in this chapter.
-
-### `cookbook/scripts/cookbook_runner.py`
-
-The `drill_and_run_scripts` function in [`cookbook/scripts/cookbook_runner.py`](https://github.com/agno-agi/agno/blob/HEAD/cookbook/scripts/cookbook_runner.py) handles a key part of this chapter's functionality:
-
-```py
- help="Optional path to write machine-readable JSON results.",
-)
-def drill_and_run_scripts(
- base_directory: str,
- batch: bool,
- recursive: bool,
- python_bin: str | None,
- timeout_seconds: int,
- retries: int,
- fail_fast: bool,
- json_report: str | None,
-) -> None:
- """Run cookbook scripts in interactive or batch mode."""
- if timeout_seconds < 0:
- raise click.ClickException("--timeout-seconds must be >= 0")
- if retries < 0:
- raise click.ClickException("--retries must be >= 0")
-
- base_dir_path = Path(base_directory)
- selected_directory = (
- base_dir_path if batch else select_directory(base_directory=base_dir_path)
- )
- if selected_directory is None:
- raise SystemExit(1)
-
- resolved_python_bin = resolve_python_bin(python_bin=python_bin)
- click.echo(f"Selected directory: {selected_directory.as_posix()}")
- click.echo(f"Python executable: {resolved_python_bin}")
- click.echo(f"Recursive: {recursive}")
- click.echo(f"Timeout (seconds): {timeout_seconds}")
- click.echo(f"Retries: {retries}")
+### `libs/agno/agno/agent/agent.py` and `libs/agno/agno/models/`
-```
-
-This function is important because it defines how Agno Tutorial: Multi-Agent Systems That Learn Over Time implements the patterns covered in this chapter.
-
-
-## How These Components Connect
-
-```mermaid
-flowchart TD
- A[summarize_results]
- B[write_json_report]
- C[select_interactive_action]
- D[drill_and_run_scripts]
- E[create_regional_agent]
- A --> B
- B --> C
- C --> D
- D --> E
-```
+The framework architecture is best understood by examining the `Agent` class alongside the model abstraction layer in [`libs/agno/agno/models/`](https://github.com/agno-agi/agno/tree/HEAD/libs/agno/agno/models). The separation between the `Agent` orchestration logic and the interchangeable model backends demonstrates the provider-agnostic design described in this chapter. The `run` and `arun` methods show the core request/response lifecycle.
\ No newline at end of file
diff --git a/tutorials/agno-tutorial/03-learning-memory-and-state.md b/tutorials/agno-tutorial/03-learning-memory-and-state.md
index c57ee97c..a1159ecb 100644
--- a/tutorials/agno-tutorial/03-learning-memory-and-state.md
+++ b/tutorials/agno-tutorial/03-learning-memory-and-state.md
@@ -38,186 +38,8 @@ You now know how to structure Agno memory for sustainable long-term improvement.
Next: [Chapter 4: Multi-Agent Orchestration](04-multi-agent-orchestration.md)
-## Depth Expansion Playbook
-
## Source Code Walkthrough
-### `cookbook/00_quickstart/human_in_the_loop.py`
-
-The `save_learning` function in [`cookbook/00_quickstart/human_in_the_loop.py`](https://github.com/agno-agi/agno/blob/HEAD/cookbook/00_quickstart/human_in_the_loop.py) handles a key part of this chapter's functionality:
-
-```py
-# ---------------------------------------------------------------------------
-@tool(requires_confirmation=True)
-def save_learning(title: str, learning: str) -> str:
- """
- Save a reusable insight to the knowledge base for future reference.
- This action requires user confirmation before executing.
-
- Args:
- title: Short descriptive title (e.g., "Tech stock P/E benchmarks")
- learning: The insight to save — be specific and actionable
-
- Returns:
- Confirmation message
- """
- if not title or not title.strip():
- return "Cannot save: title is required"
- if not learning or not learning.strip():
- return "Cannot save: learning content is required"
-
- payload = {
- "title": title.strip(),
- "learning": learning.strip(),
- "saved_at": datetime.now(timezone.utc).isoformat(),
- }
-
- learnings_kb.insert(
- name=payload["title"],
- text_content=json.dumps(payload, ensure_ascii=False),
- reader=TextReader(),
- skip_if_exists=True,
- )
-
-```
-
-This function is important because it defines how Agno Tutorial: Multi-Agent Systems That Learn Over Time implements the patterns covered in this chapter.
-
-### `cookbook/91_tools/trafilatura_tools.py`
-
-The `basic_text_extraction` function in [`cookbook/91_tools/trafilatura_tools.py`](https://github.com/agno-agi/agno/blob/HEAD/cookbook/91_tools/trafilatura_tools.py) handles a key part of this chapter's functionality:
-
-```py
-
-
-def basic_text_extraction():
- """
- Basic text extraction from a single URL.
- Perfect for simple content extraction tasks.
- """
- print("=== Example 1: Basic Text Extraction ===")
-
- agent = Agent(
- tools=[TrafilaturaTools()], # Default configuration
- markdown=True,
- )
-
- agent.print_response(
- "Please extract and summarize the main content from https://github.com/agno-agi/agno"
- )
-
-
-# =============================================================================
-# Example 2: JSON Output with Metadata
-# =============================================================================
-
-
-def json_with_metadata():
- """
- Extract content in JSON format with metadata.
- Useful when you need structured data including titles, authors, dates, etc.
- """
- print("\n=== Example 2: JSON Output with Metadata ===")
-
- # Configure tool for JSON output with metadata
-```
-
-This function is important because it defines how Agno Tutorial: Multi-Agent Systems That Learn Over Time implements the patterns covered in this chapter.
-
-### `cookbook/91_tools/trafilatura_tools.py`
-
-The `json_with_metadata` function in [`cookbook/91_tools/trafilatura_tools.py`](https://github.com/agno-agi/agno/blob/HEAD/cookbook/91_tools/trafilatura_tools.py) handles a key part of this chapter's functionality:
-
-```py
-
-
-def json_with_metadata():
- """
- Extract content in JSON format with metadata.
- Useful when you need structured data including titles, authors, dates, etc.
- """
- print("\n=== Example 2: JSON Output with Metadata ===")
-
- # Configure tool for JSON output with metadata
- agent = Agent(
- tools=[
- TrafilaturaTools(
- output_format="json",
- with_metadata=True,
- include_comments=True,
- include_tables=True,
- )
- ],
- markdown=True,
- )
-
- agent.print_response(
- "Extract the article content from https://en.wikipedia.org/wiki/Web_scraping in JSON format with metadata"
- )
-
-
-# =============================================================================
-# Example 3: Markdown Output with Formatting
-# =============================================================================
-
-
-```
-
-This function is important because it defines how Agno Tutorial: Multi-Agent Systems That Learn Over Time implements the patterns covered in this chapter.
-
-### `cookbook/91_tools/trafilatura_tools.py`
-
-The `markdown_with_formatting` function in [`cookbook/91_tools/trafilatura_tools.py`](https://github.com/agno-agi/agno/blob/HEAD/cookbook/91_tools/trafilatura_tools.py) handles a key part of this chapter's functionality:
-
-```py
-
-
-def markdown_with_formatting():
- """
- Extract content in Markdown format preserving structure.
- Great for maintaining document structure and readability.
- """
- print("\n=== Example 3: Markdown with Formatting ===")
-
- agent = Agent(
- tools=[
- TrafilaturaTools(
- output_format="markdown",
- include_formatting=True,
- include_links=True,
- with_metadata=True,
- )
- ],
- markdown=True,
- )
-
- agent.print_response(
- "Convert https://docs.python.org/3/tutorial/introduction.html to markdown format while preserving the structure and links"
- )
-
-
-# =============================================================================
-# Example 4: Metadata-Only Extraction
-# =============================================================================
-
-
-def metadata_only_extraction():
-```
-
-This function is important because it defines how Agno Tutorial: Multi-Agent Systems That Learn Over Time implements the patterns covered in this chapter.
-
-
-## How These Components Connect
+### `libs/agno/agno/memory/` and storage backends
-```mermaid
-flowchart TD
- A[save_learning]
- B[basic_text_extraction]
- C[json_with_metadata]
- D[markdown_with_formatting]
- E[metadata_only_extraction]
- A --> B
- B --> C
- C --> D
- D --> E
-```
+Memory and state management are implemented in [`libs/agno/agno/memory/`](https://github.com/agno-agi/agno/tree/HEAD/libs/agno/agno/memory). This module contains the memory manager, session storage, and user memory classes that Chapter 3 covers. The storage backends (SQLite, PostgreSQL, Redis) show how Agno persists state across runs — review the base storage interface to understand the abstraction layer before examining specific implementations.
\ No newline at end of file
diff --git a/tutorials/agno-tutorial/04-multi-agent-orchestration.md b/tutorials/agno-tutorial/04-multi-agent-orchestration.md
index 6c3612b0..165ea5db 100644
--- a/tutorials/agno-tutorial/04-multi-agent-orchestration.md
+++ b/tutorials/agno-tutorial/04-multi-agent-orchestration.md
@@ -38,186 +38,8 @@ You now have a practical pattern for building coherent Agno multi-agent teams.
Next: [Chapter 5: Knowledge, RAG, and Tools](05-knowledge-rag-and-tools.md)
-## Depth Expansion Playbook
-
## Source Code Walkthrough
-### `cookbook/91_tools/trafilatura_tools.py`
-
-The `high_precision_extraction` function in [`cookbook/91_tools/trafilatura_tools.py`](https://github.com/agno-agi/agno/blob/HEAD/cookbook/91_tools/trafilatura_tools.py) handles a key part of this chapter's functionality:
-
-```py
-
-
-def high_precision_extraction():
- """
- Extract with high precision settings.
- Use when you need clean, accurate content and don't mind missing some text.
- """
- print("\n=== Example 5: High Precision Extraction ===")
-
- agent = Agent(
- tools=[
- TrafilaturaTools(
- favor_precision=True,
- include_comments=False, # Skip comments for cleaner output
- include_tables=True,
- output_format="txt",
- )
- ],
- markdown=True,
- )
-
- agent.print_response(
- "Extract the main article content from https://www.bbc.com/news with high precision, excluding comments and ads"
- )
-
-
-# =============================================================================
-# Example 6: High Recall Extraction
-# =============================================================================
-
-
-def high_recall_extraction():
-```
-
-This function is important because it defines how Agno Tutorial: Multi-Agent Systems That Learn Over Time implements the patterns covered in this chapter.
-
-### `cookbook/91_tools/trafilatura_tools.py`
-
-The `high_recall_extraction` function in [`cookbook/91_tools/trafilatura_tools.py`](https://github.com/agno-agi/agno/blob/HEAD/cookbook/91_tools/trafilatura_tools.py) handles a key part of this chapter's functionality:
-
-```py
-
-
-def high_recall_extraction():
- """
- Extract with high recall settings.
- Use when you want to capture as much content as possible.
- """
- print("\n=== Example 6: High Recall Extraction ===")
-
- agent = Agent(
- tools=[
- TrafilaturaTools(
- favor_recall=True,
- include_comments=True,
- include_tables=True,
- include_formatting=True,
- output_format="markdown",
- )
- ],
- markdown=True,
- )
-
- agent.print_response(
- "Extract comprehensive content from https://stackoverflow.com/questions/1732348/regex-match-open-tags-except-xhtml-self-contained-tags including all comments and discussions"
- )
-
-
-# =============================================================================
-# Example 7: Language-Specific Extraction
-# =============================================================================
-
-
-```
-
-This function is important because it defines how Agno Tutorial: Multi-Agent Systems That Learn Over Time implements the patterns covered in this chapter.
-
-### `cookbook/91_tools/trafilatura_tools.py`
-
-The `language_specific_extraction` function in [`cookbook/91_tools/trafilatura_tools.py`](https://github.com/agno-agi/agno/blob/HEAD/cookbook/91_tools/trafilatura_tools.py) handles a key part of this chapter's functionality:
-
-```py
-
-
-def language_specific_extraction():
- """
- Extract content with language filtering.
- Useful for multilingual websites or language-specific content.
- """
- print("\n=== Example 7: Language-Specific Extraction ===")
-
- agent = Agent(
- tools=[
- TrafilaturaTools(
- target_language="en", # Filter for English content
- output_format="json",
- with_metadata=True,
- deduplicate=True,
- )
- ],
- markdown=True,
- )
-
- agent.print_response(
- "Extract English content from https://www.reddit.com/r/MachineLearning/ and provide a summary"
- )
-
-
-# =============================================================================
-# Example 8: Website Crawling (if spider available)
-# =============================================================================
-
-
-def website_crawling():
-```
-
-This function is important because it defines how Agno Tutorial: Multi-Agent Systems That Learn Over Time implements the patterns covered in this chapter.
-
-### `cookbook/91_tools/trafilatura_tools.py`
-
-The `website_crawling` function in [`cookbook/91_tools/trafilatura_tools.py`](https://github.com/agno-agi/agno/blob/HEAD/cookbook/91_tools/trafilatura_tools.py) handles a key part of this chapter's functionality:
-
-```py
-
-
-def website_crawling():
- """
- Crawl a website to discover and extract content from multiple pages.
- Note: Requires trafilatura spider module to be available.
- """
- print("\n=== Example 8: Website Crawling ===")
-
- agent = Agent(
- tools=[
- TrafilaturaTools(
- enable_crawl_website=True,
- max_crawl_urls=5, # Limit for demo
- output_format="json",
- with_metadata=True,
- )
- ],
- markdown=True,
- )
-
- agent.print_response(
- "Crawl https://example.com and extract content from up to 5 internal pages"
- )
-
-
-# =============================================================================
-# Example 9: HTML to Text Conversion
-# =============================================================================
-
-
-def html_to_text_conversion():
-```
-
-This function is important because it defines how Agno Tutorial: Multi-Agent Systems That Learn Over Time implements the patterns covered in this chapter.
-
-
-## How These Components Connect
+### `libs/agno/agno/team/team.py`
-```mermaid
-flowchart TD
- A[high_precision_extraction]
- B[high_recall_extraction]
- C[language_specific_extraction]
- D[website_crawling]
- E[html_to_text_conversion]
- A --> B
- B --> C
- C --> D
- D --> E
-```
+Multi-agent orchestration in Agno is implemented in [`libs/agno/agno/team/team.py`](https://github.com/agno-agi/agno/blob/HEAD/libs/agno/agno/team/team.py). The `Team` class coordinates multiple agents, handling routing, delegation, and response aggregation. The `mode` parameter (route, coordinate, collaborate) maps to the orchestration patterns described in this chapter.
\ No newline at end of file
diff --git a/tutorials/agno-tutorial/05-knowledge-rag-and-tools.md b/tutorials/agno-tutorial/05-knowledge-rag-and-tools.md
index 4edf8de0..180c7878 100644
--- a/tutorials/agno-tutorial/05-knowledge-rag-and-tools.md
+++ b/tutorials/agno-tutorial/05-knowledge-rag-and-tools.md
@@ -38,186 +38,8 @@ You now understand how to combine knowledge and tool layers in Agno without sacr
Next: [Chapter 6: AgentOS Runtime and Control Plane](06-agentos-runtime-and-control-plane.md)
-## Depth Expansion Playbook
-
## Source Code Walkthrough
-### `cookbook/91_tools/trafilatura_tools.py`
-
-The `research_assistant_agent` function in [`cookbook/91_tools/trafilatura_tools.py`](https://github.com/agno-agi/agno/blob/HEAD/cookbook/91_tools/trafilatura_tools.py) handles a key part of this chapter's functionality:
-
-```py
-
-
-def research_assistant_agent():
- """
- Create a specialized research assistant using TrafilaturaTools.
- This agent is optimized for extracting and analyzing research content.
- """
- research_agent = Agent(
- name="Research Assistant",
- model=OpenAIChat(id="gpt-4"),
- tools=[
- TrafilaturaTools(
- output_format="json",
- with_metadata=True,
- include_tables=True,
- include_links=True,
- favor_recall=True,
- target_language="en",
- )
- ],
- instructions="""
- You are a research assistant specialized in gathering and analyzing information from web sources.
-
- When extracting content:
- 1. Always include source metadata (title, author, date, URL)
- 2. Preserve important structural elements like tables and lists
- 3. Maintain links for citation purposes
- 4. Focus on comprehensive content extraction
- 5. Provide structured analysis of the extracted content
-
- Format your responses with:
- - Executive Summary
-```
-
-This function is important because it defines how Agno Tutorial: Multi-Agent Systems That Learn Over Time implements the patterns covered in this chapter.
-
-### `cookbook/91_tools/trafilatura_tools.py`
-
-The `multiple_urls_different_configs` function in [`cookbook/91_tools/trafilatura_tools.py`](https://github.com/agno-agi/agno/blob/HEAD/cookbook/91_tools/trafilatura_tools.py) handles a key part of this chapter's functionality:
-
-```py
-
-
-def multiple_urls_different_configs():
- """
- Process multiple URLs with different extraction strategies.
- Demonstrates flexibility in handling various content types.
- """
- print("\n=== Example 10: Multiple URLs with Different Configurations ===")
-
- # Different agents for different content types
- news_agent = Agent(
- tools=[
- TrafilaturaTools(
- output_format="json",
- with_metadata=True,
- include_comments=False,
- favor_precision=True,
- )
- ],
- markdown=True,
- )
-
- documentation_agent = Agent(
- tools=[
- TrafilaturaTools(
- output_format="markdown",
- include_formatting=True,
- include_links=True,
- include_tables=True,
- favor_recall=True,
- )
- ],
-```
-
-This function is important because it defines how Agno Tutorial: Multi-Agent Systems That Learn Over Time implements the patterns covered in this chapter.
-
-### `cookbook/91_tools/trafilatura_tools.py`
-
-The `advanced_customization` function in [`cookbook/91_tools/trafilatura_tools.py`](https://github.com/agno-agi/agno/blob/HEAD/cookbook/91_tools/trafilatura_tools.py) handles a key part of this chapter's functionality:
-
-```py
-
-
-def advanced_customization():
- """
- Advanced configuration with all customization options.
- Shows how to fine-tune extraction for specific needs.
- """
- print("\n=== Example 11: Advanced Customization ===")
-
- agent = Agent(
- tools=[
- TrafilaturaTools(
- output_format="xml",
- include_comments=False,
- include_tables=True,
- include_images=True,
- include_formatting=True,
- include_links=True,
- with_metadata=True,
- favor_precision=True,
- target_language="en",
- deduplicate=True,
- max_tree_size=10000,
- )
- ],
- markdown=True,
- )
-
- agent.print_response(
- "Extract comprehensive structured content from https://en.wikipedia.org/wiki/Artificial_intelligence in XML format with all metadata and structural elements"
- )
-
-```
-
-This function is important because it defines how Agno Tutorial: Multi-Agent Systems That Learn Over Time implements the patterns covered in this chapter.
-
-### `cookbook/91_tools/trafilatura_tools.py`
-
-The `comparative_analysis` function in [`cookbook/91_tools/trafilatura_tools.py`](https://github.com/agno-agi/agno/blob/HEAD/cookbook/91_tools/trafilatura_tools.py) handles a key part of this chapter's functionality:
-
-```py
-
-
-def comparative_analysis():
- """
- Compare content from multiple sources using different extraction strategies.
- Useful for research and content analysis tasks.
- """
- print("\n=== Example 12: Comparative Analysis ===")
-
- agent = Agent(
- model=OpenAIChat(id="gpt-4"),
- tools=[
- TrafilaturaTools(
- output_format="json",
- with_metadata=True,
- include_tables=True,
- favor_precision=True,
- )
- ],
- markdown=True,
- )
-
- agent.print_response("""
- Compare and analyze the content about artificial intelligence from these sources:
- 1. https://en.wikipedia.org/wiki/Artificial_intelligence
- 2. https://www.ibm.com/cloud/learn/what-is-artificial-intelligence
-
- Provide a comparative analysis highlighting the key differences in how they present AI concepts.
- """)
-
-
-# =============================================================================
-```
-
-This function is important because it defines how Agno Tutorial: Multi-Agent Systems That Learn Over Time implements the patterns covered in this chapter.
-
-
-## How These Components Connect
-
-```mermaid
-flowchart TD
- A[research_assistant_agent]
- B[multiple_urls_different_configs]
- C[advanced_customization]
- D[comparative_analysis]
- E[content_research_pipeline]
- A --> B
- B --> C
- C --> D
- D --> E
-```
+### `libs/agno/agno/knowledge/` and `libs/agno/agno/tools/`
+
+Knowledge and RAG capabilities live in [`libs/agno/agno/knowledge/`](https://github.com/agno-agi/agno/tree/HEAD/libs/agno/agno/knowledge), while tool integrations are in [`libs/agno/agno/tools/`](https://github.com/agno-agi/agno/tree/HEAD/libs/agno/agno/tools). The `AgnoKnowledge` base class shows how documents are chunked, embedded, and searched. Browsing the tools directory reveals how Agno wraps external APIs and services as callable tools for agents.
\ No newline at end of file
diff --git a/tutorials/agno-tutorial/06-agentos-runtime-and-control-plane.md b/tutorials/agno-tutorial/06-agentos-runtime-and-control-plane.md
index 6de1d507..280d46a5 100644
--- a/tutorials/agno-tutorial/06-agentos-runtime-and-control-plane.md
+++ b/tutorials/agno-tutorial/06-agentos-runtime-and-control-plane.md
@@ -38,186 +38,8 @@ You now have an operational model for running Agno via AgentOS infrastructure.
Next: [Chapter 7: Guardrails, Evals, and Observability](07-guardrails-evals-and-observability.md)
-## Depth Expansion Playbook
-
## Source Code Walkthrough
-### `cookbook/91_tools/trafilatura_tools.py`
-
-The `performance_optimized` function in [`cookbook/91_tools/trafilatura_tools.py`](https://github.com/agno-agi/agno/blob/HEAD/cookbook/91_tools/trafilatura_tools.py) handles a key part of this chapter's functionality:
-
-```py
-
-
-def performance_optimized():
- """
- Optimized configuration for fast, efficient extraction.
- Best for high-volume processing or when speed is critical.
- """
- print("\n=== Example 14: Performance Optimized Extraction ===")
-
- agent = Agent(
- tools=[
- TrafilaturaTools(
- output_format="txt",
- include_comments=False,
- include_tables=False,
- include_images=False,
- include_formatting=False,
- include_links=False,
- with_metadata=False,
- favor_precision=True, # Faster processing
- deduplicate=False, # Skip deduplication for speed
- )
- ],
- markdown=True,
- )
-
- agent.print_response(
- "Quickly extract just the main text content from https://news.ycombinator.com optimized for speed"
- )
-
-
-# =============================================================================
-```
-
-This function is important because it defines how Agno Tutorial: Multi-Agent Systems That Learn Over Time implements the patterns covered in this chapter.
-
-### `cookbook/91_tools/github_tools.py`
-
-The `definitions` class in [`cookbook/91_tools/github_tools.py`](https://github.com/agno-agi/agno/blob/HEAD/cookbook/91_tools/github_tools.py) handles a key part of this chapter's functionality:
+### `libs/agno/agno/app/` and runtime entrypoints
-```py
- # Example: Search code in repository
- # agent.print_response(
- # "Search for 'Agent' class definitions in the agno-agi/agno repository",
- # markdown=True,
- # )
-
- # Example: Search issues and pull requests
- # agent.print_response(
- # "Find all issues and PRs mentioning 'bug' in the agno-agi/agno repository",
- # markdown=True,
- # )
-
- # Example: Creating a pull request (commented out by default)
- # agent.print_response("Create a pull request from 'feature-branch' to 'main' in agno-agi/agno titled 'New Feature' with description 'Implements the new feature'", markdown=True)
-
- # Example: Creating a branch (commented out by default)
- # agent.print_response("Create a new branch called 'feature-branch' from the main branch in the agno-agi/agno repository", markdown=True)
-
- # Example: Setting default branch (commented out by default)
- # agent.print_response("Set the default branch to 'develop' in the agno-agi/agno repository", markdown=True)
-
- # Example: File creation (commented out by default)
- # agent.print_response("Create a file called 'test.md' with content 'This is a test' in the agno-agi/agno repository", markdown=True)
-
- # Example: Update file (commented out by default)
- # agent.print_response("Update the README.md file in the agno-agi/agno repository to add a new section about installation", markdown=True)
-
- # Example: Delete file (commented out by default)
- # agent.print_response("Delete the file test.md from the agno-agi/agno repository", markdown=True)
-
- # Example: Requesting a review for a pull request (commented out by default)
- # agent.print_response("Request a review from user 'username' for pull request #100 in the agno-agi/agno repository", markdown=True)
-```
-
-This class is important because it defines how Agno Tutorial: Multi-Agent Systems That Learn Over Time implements the patterns covered in this chapter.
-
-### `cookbook/gemini_3/20_workflow.py`
-
-The `quality_gate` function in [`cookbook/gemini_3/20_workflow.py`](https://github.com/agno-agi/agno/blob/HEAD/cookbook/gemini_3/20_workflow.py) handles a key part of this chapter's functionality:
-
-```py
-# Custom step functions
-# ---------------------------------------------------------------------------
-def quality_gate(step_input: StepInput) -> StepOutput:
- """Check that the analysis has enough substance to proceed."""
- content = str(step_input.previous_step_content or "")
- if len(content) < 200:
- return StepOutput(
- content="Quality gate failed: analysis too short. Stopping pipeline.",
- stop=True,
- success=False,
- )
- return StepOutput(
- content=content,
- success=True,
- )
-
-
-def needs_fact_check(step_input: StepInput) -> bool:
- """Decide whether the report needs fact-checking."""
- content = str(step_input.previous_step_content or "").lower()
- indicators = [
- "study",
- "research",
- "percent",
- "%",
- "million",
- "billion",
- "according",
- ]
- return any(indicator in content for indicator in indicators)
-
-
-```
-
-This function is important because it defines how Agno Tutorial: Multi-Agent Systems That Learn Over Time implements the patterns covered in this chapter.
-
-### `cookbook/gemini_3/20_workflow.py`
-
-The `needs_fact_check` function in [`cookbook/gemini_3/20_workflow.py`](https://github.com/agno-agi/agno/blob/HEAD/cookbook/gemini_3/20_workflow.py) handles a key part of this chapter's functionality:
-
-```py
-
-
-def needs_fact_check(step_input: StepInput) -> bool:
- """Decide whether the report needs fact-checking."""
- content = str(step_input.previous_step_content or "").lower()
- indicators = [
- "study",
- "research",
- "percent",
- "%",
- "million",
- "billion",
- "according",
- ]
- return any(indicator in content for indicator in indicators)
-
-
-# ---------------------------------------------------------------------------
-# Build Workflow
-# ---------------------------------------------------------------------------
-research_pipeline = Workflow(
- id="gemini-research-pipeline",
- name="Research Pipeline",
- description="Research-to-publication pipeline: parallel research, analysis, quality gate, writing, and conditional fact-checking.",
- db=gemini_agents_db,
- steps=[
- # Step 1: Research in parallel (two agents search simultaneously)
- Parallel(
- "Research",
- Step(name="web_research", agent=web_researcher),
- Step(name="deep_research", agent=deep_researcher),
- ),
-```
-
-This function is important because it defines how Agno Tutorial: Multi-Agent Systems That Learn Over Time implements the patterns covered in this chapter.
-
-
-## How These Components Connect
-
-```mermaid
-flowchart TD
- A[performance_optimized]
- B[definitions]
- C[quality_gate]
- D[needs_fact_check]
- E[AnalysisRequest]
- A --> B
- B --> C
- C --> D
- D --> E
-```
+The AgentOS runtime and API serving layer are in [`libs/agno/agno/app/`](https://github.com/agno-agi/agno/tree/HEAD/libs/agno/agno/app). This module provides the FastAPI-based serving infrastructure that wraps agents as HTTP endpoints. The app factory and middleware show the control plane features — authentication, session management, and streaming — that Chapter 6 describes.
\ No newline at end of file
diff --git a/tutorials/agno-tutorial/07-guardrails-evals-and-observability.md b/tutorials/agno-tutorial/07-guardrails-evals-and-observability.md
index d8f42978..804f4c29 100644
--- a/tutorials/agno-tutorial/07-guardrails-evals-and-observability.md
+++ b/tutorials/agno-tutorial/07-guardrails-evals-and-observability.md
@@ -39,177 +39,8 @@ You now have a repeatable quality and safety loop for Agno systems.
Next: [Chapter 8: Production Deployment](08-production-deployment.md)
-## Depth Expansion Playbook
-
## Source Code Walkthrough
-### `cookbook/00_quickstart/agent_with_typed_input_output.py`
-
-The `StockAnalysis` class in [`cookbook/00_quickstart/agent_with_typed_input_output.py`](https://github.com/agno-agi/agno/blob/HEAD/cookbook/00_quickstart/agent_with_typed_input_output.py) handles a key part of this chapter's functionality:
-
-```py
-# Output Schema — what the agent returns
-# ---------------------------------------------------------------------------
-class StockAnalysis(BaseModel):
- """Structured output for stock analysis."""
-
- ticker: str = Field(..., description="Stock ticker symbol")
- company_name: str = Field(..., description="Full company name")
- current_price: float = Field(..., description="Current stock price in USD")
- summary: str = Field(..., description="One-line summary of the stock")
- key_drivers: Optional[List[str]] = Field(
- None, description="Key growth drivers (if deep analysis)"
- )
- key_risks: Optional[List[str]] = Field(
- None, description="Key risks (if include_risks=True)"
- )
- recommendation: str = Field(
- ..., description="One of: Strong Buy, Buy, Hold, Sell, Strong Sell"
- )
-
-
-# ---------------------------------------------------------------------------
-# Agent Instructions
-# ---------------------------------------------------------------------------
-instructions = """\
-You are a Finance Agent that produces structured stock analyses.
-
-## Input Parameters
-
-You receive structured requests with:
-- ticker: The stock to analyze
-- analysis_type: "quick" (summary only) or "deep" (full analysis)
-- include_risks: Whether to include risk analysis
-```
-
-This class is important because it defines how Agno Tutorial: Multi-Agent Systems That Learn Over Time implements the patterns covered in this chapter.
-
-### `cookbook/00_quickstart/custom_tool_for_self_learning.py`
-
-The `save_learning` function in [`cookbook/00_quickstart/custom_tool_for_self_learning.py`](https://github.com/agno-agi/agno/blob/HEAD/cookbook/00_quickstart/custom_tool_for_self_learning.py) handles a key part of this chapter's functionality:
-
-```py
-# Custom Tool: Save Learning
-# ---------------------------------------------------------------------------
-def save_learning(title: str, learning: str) -> str:
- """
- Save a reusable insight to the knowledge base for future reference.
-
- Args:
- title: Short descriptive title (e.g., "Tech stock P/E benchmarks")
- learning: The insight to save — be specific and actionable
-
- Returns:
- Confirmation message
- """
- # Validate inputs
- if not title or not title.strip():
- return "Cannot save: title is required"
- if not learning or not learning.strip():
- return "Cannot save: learning content is required"
-
- # Build the payload
- payload = {
- "title": title.strip(),
- "learning": learning.strip(),
- "saved_at": datetime.now(timezone.utc).isoformat(),
- }
-
- # Save to knowledge base
- learnings_kb.insert(
- name=payload["title"],
- text_content=json.dumps(payload, ensure_ascii=False),
- reader=TextReader(),
- skip_if_exists=True,
-```
-
-This function is important because it defines how Agno Tutorial: Multi-Agent Systems That Learn Over Time implements the patterns covered in this chapter.
-
-### `cookbook/00_quickstart/agent_with_guardrails.py`
-
-The `SpamDetectionGuardrail` class in [`cookbook/00_quickstart/agent_with_guardrails.py`](https://github.com/agno-agi/agno/blob/HEAD/cookbook/00_quickstart/agent_with_guardrails.py) handles a key part of this chapter's functionality:
-
-```py
-# Custom Guardrail: Spam Detection
-# ---------------------------------------------------------------------------
-class SpamDetectionGuardrail(BaseGuardrail):
- """
- A custom guardrail that detects spammy or low-quality input.
-
- This demonstrates how to write your own guardrail:
- 1. Inherit from BaseGuardrail
- 2. Implement check() method
- 3. Raise InputCheckError to block the request
- """
-
- def __init__(self, max_caps_ratio: float = 0.7, max_exclamations: int = 3):
- self.max_caps_ratio = max_caps_ratio
- self.max_exclamations = max_exclamations
-
- def check(self, run_input: Union[RunInput, TeamRunInput]) -> None:
- """Check for spam patterns in the input."""
- content = run_input.input_content_string()
-
- # Check for excessive caps
- if len(content) > 10:
- caps_ratio = sum(1 for c in content if c.isupper()) / len(content)
- if caps_ratio > self.max_caps_ratio:
- raise InputCheckError(
- "Input appears to be spam (excessive capitals)",
- )
-
- # Check for excessive exclamation marks
- if content.count("!") > self.max_exclamations:
- raise InputCheckError(
- "Input appears to be spam (excessive exclamation marks)",
-```
-
-This class is important because it defines how Agno Tutorial: Multi-Agent Systems That Learn Over Time implements the patterns covered in this chapter.
-
-### `cookbook/00_quickstart/agent_with_guardrails.py`
-
-The `MyGuardrail` class in [`cookbook/00_quickstart/agent_with_guardrails.py`](https://github.com/agno-agi/agno/blob/HEAD/cookbook/00_quickstart/agent_with_guardrails.py) handles a key part of this chapter's functionality:
-
-```py
-Writing custom guardrails:
-
-class MyGuardrail(BaseGuardrail):
- def check(self, run_input: Union[RunInput, TeamRunInput]) -> None:
- content = run_input.input_content_string()
- if some_condition(content):
- raise InputCheckError(
- "Reason for blocking",
- check_trigger=CheckTrigger.CUSTOM,
- )
-
- async def async_check(self, run_input):
- self.check(run_input)
-
-Guardrail patterns:
-- Profanity filtering
-- Topic restrictions
-- Rate limiting
-- Input length limits
-- Language detection
-- Sentiment analysis
-"""
-
-```
-
-This class is important because it defines how Agno Tutorial: Multi-Agent Systems That Learn Over Time implements the patterns covered in this chapter.
-
-
-## How These Components Connect
-
-```mermaid
-flowchart TD
- A[StockAnalysis]
- B[save_learning]
- C[SpamDetectionGuardrail]
- D[MyGuardrail]
- E[from]
- A --> B
- B --> C
- C --> D
- D --> E
-```
+### `libs/agno/agno/eval/` and monitoring integrations
+
+Evaluation utilities are in [`libs/agno/agno/eval/`](https://github.com/agno-agi/agno/tree/HEAD/libs/agno/agno/eval). This module provides accuracy and performance eval classes for testing agent outputs. For observability, Agno's integration modules (Langfuse, Arize, etc.) in `libs/agno/agno/monitoring/` show how traces and metrics are emitted — the foundation for the guardrails and observability patterns in Chapter 7.
\ No newline at end of file
diff --git a/tutorials/agno-tutorial/08-production-deployment.md b/tutorials/agno-tutorial/08-production-deployment.md
index af6f2417..3667831b 100644
--- a/tutorials/agno-tutorial/08-production-deployment.md
+++ b/tutorials/agno-tutorial/08-production-deployment.md
@@ -38,186 +38,8 @@ This chapter establishes the baseline for scaling Agno systems safely in product
You now have a production runbook baseline for operating Agno multi-agent systems.
-## Depth Expansion Playbook
-
## Source Code Walkthrough
-### `cookbook/scripts/check_cookbook_pattern.py`
-
-The `class` class in [`cookbook/scripts/check_cookbook_pattern.py`](https://github.com/agno-agi/agno/blob/HEAD/cookbook/scripts/check_cookbook_pattern.py) handles a key part of this chapter's functionality:
-
-```py
-import json
-import re
-from dataclasses import asdict, dataclass
-from pathlib import Path
-
-EMOJI_RE = re.compile(r"[\U0001F300-\U0001FAFF]")
-MAIN_GATE_RE = re.compile(r'if __name__ == ["\']__main__["\']:')
-SECTION_RE = re.compile(r"^# [-=]+\n# (?P.+?)\n# [-=]+$", re.MULTILINE)
-SKIP_FILE_NAMES = {"__init__.py"}
-SKIP_DIR_NAMES = {"__pycache__", ".git", ".context"}
-
-
-@dataclass
-class Violation:
- path: str
- line: int
- code: str
- message: str
-
-
-def iter_python_files(base_dir: Path, recursive: bool) -> list[Path]:
- pattern = "**/*.py" if recursive else "*.py"
- files: list[Path] = []
- for path in sorted(base_dir.glob(pattern)):
- if not path.is_file():
- continue
- if path.name in SKIP_FILE_NAMES:
- continue
- if any(part in SKIP_DIR_NAMES for part in path.parts):
- continue
- files.append(path)
- return files
-```
-
-This class is important because it defines how Agno Tutorial: Multi-Agent Systems That Learn Over Time implements the patterns covered in this chapter.
-
-### `cookbook/scripts/check_cookbook_pattern.py`
-
-The `iter_python_files` function in [`cookbook/scripts/check_cookbook_pattern.py`](https://github.com/agno-agi/agno/blob/HEAD/cookbook/scripts/check_cookbook_pattern.py) handles a key part of this chapter's functionality:
-
-```py
-
-
-def iter_python_files(base_dir: Path, recursive: bool) -> list[Path]:
- pattern = "**/*.py" if recursive else "*.py"
- files: list[Path] = []
- for path in sorted(base_dir.glob(pattern)):
- if not path.is_file():
- continue
- if path.name in SKIP_FILE_NAMES:
- continue
- if any(part in SKIP_DIR_NAMES for part in path.parts):
- continue
- files.append(path)
- return files
-
-
-def find_sections(text: str) -> list[tuple[str, int]]:
- sections: list[tuple[str, int]] = []
- for match in SECTION_RE.finditer(text):
- title = match.group("title").strip()
- # 1-based line number of the section title line
- line = text[: match.start()].count("\n") + 2
- sections.append((title, line))
- return sections
-
-
-def find_first_section_line(
- sections: list[tuple[str, int]], keyword: str
-) -> int | None:
- needle = re.compile(rf"\b{re.escape(keyword)}\b", re.IGNORECASE)
- for title, line in sections:
- if needle.search(title):
-```
-
-This function is important because it defines how Agno Tutorial: Multi-Agent Systems That Learn Over Time implements the patterns covered in this chapter.
-
-### `cookbook/scripts/check_cookbook_pattern.py`
-
-The `find_sections` function in [`cookbook/scripts/check_cookbook_pattern.py`](https://github.com/agno-agi/agno/blob/HEAD/cookbook/scripts/check_cookbook_pattern.py) handles a key part of this chapter's functionality:
-
-```py
-
-
-def find_sections(text: str) -> list[tuple[str, int]]:
- sections: list[tuple[str, int]] = []
- for match in SECTION_RE.finditer(text):
- title = match.group("title").strip()
- # 1-based line number of the section title line
- line = text[: match.start()].count("\n") + 2
- sections.append((title, line))
- return sections
-
-
-def find_first_section_line(
- sections: list[tuple[str, int]], keyword: str
-) -> int | None:
- needle = re.compile(rf"\b{re.escape(keyword)}\b", re.IGNORECASE)
- for title, line in sections:
- if needle.search(title):
- return line
- return None
-
-
-def validate_file(path: Path) -> list[Violation]:
- violations: list[Violation] = []
- text = path.read_text(encoding="utf-8")
-
- try:
- tree = ast.parse(text)
- except SyntaxError as exc:
- violations.append(
- Violation(
- path=path.as_posix(),
-```
-
-This function is important because it defines how Agno Tutorial: Multi-Agent Systems That Learn Over Time implements the patterns covered in this chapter.
-
-### `cookbook/scripts/check_cookbook_pattern.py`
-
-The `find_first_section_line` function in [`cookbook/scripts/check_cookbook_pattern.py`](https://github.com/agno-agi/agno/blob/HEAD/cookbook/scripts/check_cookbook_pattern.py) handles a key part of this chapter's functionality:
-
-```py
-
-
-def find_first_section_line(
- sections: list[tuple[str, int]], keyword: str
-) -> int | None:
- needle = re.compile(rf"\b{re.escape(keyword)}\b", re.IGNORECASE)
- for title, line in sections:
- if needle.search(title):
- return line
- return None
+### `libs/agno/agno/app/` and deployment examples
-
-def validate_file(path: Path) -> list[Violation]:
- violations: list[Violation] = []
- text = path.read_text(encoding="utf-8")
-
- try:
- tree = ast.parse(text)
- except SyntaxError as exc:
- violations.append(
- Violation(
- path=path.as_posix(),
- line=exc.lineno or 1,
- code="syntax_error",
- message=exc.msg,
- )
- )
- return violations
-
- if not ast.get_docstring(tree, clean=False):
- violations.append(
- Violation(
-```
-
-This function is important because it defines how Agno Tutorial: Multi-Agent Systems That Learn Over Time implements the patterns covered in this chapter.
-
-
-## How These Components Connect
-
-```mermaid
-flowchart TD
- A[class]
- B[iter_python_files]
- C[find_sections]
- D[find_first_section_line]
- E[validate_file]
- A --> B
- B --> C
- C --> D
- D --> E
-```
+Production deployment patterns are demonstrated in the `cookbook/` directories under `apps/` and `deployments/`. The [`libs/agno/agno/app/`](https://github.com/agno-agi/agno/tree/HEAD/libs/agno/agno/app) module's Dockerfile references and app configuration options show the recommended containerization approach. For scaling and configuration management in production, the app module's environment variable handling is the authoritative reference.
\ No newline at end of file
diff --git a/tutorials/aider-tutorial/01-getting-started.md b/tutorials/aider-tutorial/01-getting-started.md
index 0c0bd9b0..cab18f63 100644
--- a/tutorials/aider-tutorial/01-getting-started.md
+++ b/tutorials/aider-tutorial/01-getting-started.md
@@ -6,6 +6,7 @@ has_children: false
parent: Aider Tutorial
---
+
# Chapter 1: Getting Started with Aider
Welcome to **Chapter 1: Getting Started with Aider**. In this part of **Aider Tutorial: AI Pair Programming in Your Terminal**, you will build an intuitive mental model first, then move into concrete implementation details and practical production tradeoffs.
@@ -563,148 +564,8 @@ Now that you can run Aider and make basic code changes, let's explore **basic ed
## Depth Expansion Playbook
-
-
-This chapter is expanded to v1-style depth for production-grade learning and implementation quality.
-
-### Strategic Context
-
-- tutorial: **Aider Tutorial: AI Pair Programming in Your Terminal**
-- tutorial slug: **aider-tutorial**
-- chapter focus: **Chapter 1: Getting Started with Aider**
-- system context: **Aider Tutorial**
-- objective: move from surface-level usage to repeatable engineering operation
-
-### Architecture Decomposition
-
-1. Define the runtime boundary for `Chapter 1: Getting Started with Aider`.
-2. Separate control-plane decisions from data-plane execution.
-3. Capture input contracts, transformation points, and output contracts.
-4. Trace state transitions across request lifecycle stages.
-5. Identify extension hooks and policy interception points.
-6. Map ownership boundaries for team and automation workflows.
-7. Specify rollback and recovery paths for unsafe changes.
-8. Track observability signals for correctness, latency, and cost.
-
-### Operator Decision Matrix
-
-| Decision Area | Low-Risk Path | High-Control Path | Tradeoff |
-|:--------------|:--------------|:------------------|:---------|
-| Runtime mode | managed defaults | explicit policy config | speed vs control |
-| State handling | local ephemeral | durable persisted state | simplicity vs auditability |
-| Tool integration | direct API use | mediated adapter layer | velocity vs governance |
-| Rollout method | manual change | staged + canary rollout | effort vs safety |
-| Incident response | best effort logs | runbooks + SLO alerts | cost vs reliability |
-
-### Failure Modes and Countermeasures
-
-| Failure Mode | Early Signal | Root Cause Pattern | Countermeasure |
-|:-------------|:-------------|:-------------------|:---------------|
-| stale context | inconsistent outputs | missing refresh window | enforce context TTL and refresh hooks |
-| policy drift | unexpected execution | ad hoc overrides | centralize policy profiles |
-| auth mismatch | 401/403 bursts | credential sprawl | rotation schedule + scope minimization |
-| schema breakage | parser/validation errors | unmanaged upstream changes | contract tests per release |
-| retry storms | queue congestion | no backoff controls | jittered backoff + circuit breakers |
-| silent regressions | quality drop without alerts | weak baseline metrics | eval harness with thresholds |
-
-### Implementation Runbook
-
-1. Establish a reproducible baseline environment.
-2. Capture chapter-specific success criteria before changes.
-3. Implement minimal viable path with explicit interfaces.
-4. Add observability before expanding feature scope.
-5. Run deterministic tests for happy-path behavior.
-6. Inject failure scenarios for negative-path validation.
-7. Compare output quality against baseline snapshots.
-8. Promote through staged environments with rollback gates.
-9. Record operational lessons in release notes.
-
-### Quality Gate Checklist
-
-- [ ] chapter-level assumptions are explicit and testable
-- [ ] API/tool boundaries are documented with input/output examples
-- [ ] failure handling includes retry, timeout, and fallback policy
-- [ ] security controls include auth scopes and secret rotation plans
-- [ ] observability includes logs, metrics, traces, and alert thresholds
-- [ ] deployment guidance includes canary and rollback paths
-- [ ] docs include links to upstream sources and related tracks
-- [ ] post-release verification confirms expected behavior under load
-
-### Source Alignment
-
-- [Aider Repository](https://github.com/Aider-AI/aider)
-- [Aider Releases](https://github.com/Aider-AI/aider/releases)
-- [Aider Docs](https://aider.chat/)
-
-### Cross-Tutorial Connection Map
-
-- [Cline Tutorial](../cline-tutorial/)
-- [Roo Code Tutorial](../roo-code-tutorial/)
-- [Continue Tutorial](../continue-tutorial/)
-- [Codex Analysis Platform](../codex-analysis-tutorial/)
-- [Chapter 1: Getting Started](01-getting-started.md)
-
-### Advanced Practice Exercises
-
-1. Build a minimal end-to-end implementation for `Chapter 1: Getting Started with Aider`.
-2. Add instrumentation and measure baseline latency and error rate.
-3. Introduce one controlled failure and confirm graceful recovery.
-4. Add policy constraints and verify they are enforced consistently.
-5. Run a staged rollout and document rollback decision criteria.
-
-### Review Questions
-
-1. Which execution boundary matters most for this chapter and why?
-2. What signal detects regressions earliest in your environment?
-3. What tradeoff did you make between delivery speed and governance?
-4. How would you recover from the highest-impact failure mode?
-5. What must be automated before scaling to team-wide adoption?
-
-## What Problem Does This Solve?
-
-Most teams struggle here because the hard part is not writing more code, but deciding clear boundaries for `aider`, `model`, `version` so behavior stays predictable as complexity grows.
-
-In practical terms, this chapter helps you avoid three common failures:
-
-- coupling core logic too tightly to one implementation path
-- missing the handoff boundaries between setup, execution, and validation
-- shipping changes without clear rollback or observability strategy
-
-After working through this chapter, you should be able to reason about `Chapter 1: Getting Started with Aider` as an operating subsystem inside **Aider Tutorial: AI Pair Programming in Your Terminal**, with explicit contracts for inputs, state transitions, and outputs.
-
-Use the implementation notes around `hello`, `claude`, `install` as your checklist when adapting these patterns to your own repository.
-
-## How it Works Under the Hood
-
-Under the hood, `Chapter 1: Getting Started with Aider` usually follows a repeatable control path:
-
-1. **Context bootstrap**: initialize runtime config and prerequisites for `aider`.
-2. **Input normalization**: shape incoming data so `model` receives stable contracts.
-3. **Core execution**: run the main logic branch and propagate intermediate state through `version`.
-4. **Policy and safety checks**: enforce limits, auth scopes, and failure boundaries.
-5. **Output composition**: return canonical result payloads for downstream consumers.
-6. **Operational telemetry**: emit logs/metrics needed for debugging and performance tuning.
-
-When debugging, walk this sequence in order and confirm each stage has explicit success/failure conditions.
-
-## Source Walkthrough
-
-Use the following upstream sources to verify implementation details while reading this chapter:
-
-- [Aider Repository](https://github.com/Aider-AI/aider)
- Why it matters: authoritative reference on `Aider Repository` (github.com).
-- [Aider Releases](https://github.com/Aider-AI/aider/releases)
- Why it matters: authoritative reference on `Aider Releases` (github.com).
-- [Aider Docs](https://aider.chat/)
- Why it matters: authoritative reference on `Aider Docs` (aider.chat).
-
-Suggested trace strategy:
-- search upstream code for `aider` and `model` to map concrete implementation paths
-- compare docs claims against actual runtime/config code before reusing patterns in production
+## Source Code Walkthrough
-## Chapter Connections
+### `aider/main.py`
-- [Tutorial Index](README.md)
-- [Next Chapter: Chapter 2: Basic Editing Operations](02-basic-editing.md)
-- [Main Catalog](../../README.md#-tutorial-catalog)
-- [A-Z Tutorial Directory](../../discoverability/tutorial-directory.md)
+The [`aider/main.py`](https://github.com/Aider-AI/aider/blob/HEAD/aider/main.py) file is the entry point for the `aider` command. The `main()` function parses CLI arguments, sets up the coder, and starts the interactive loop — which is exactly the flow a new user experiences in Chapter 1. Tracing `main()` shows which arguments are required, how the git repo is detected, and how the first edit session begins.
\ No newline at end of file
diff --git a/tutorials/aider-tutorial/03-multi-file.md b/tutorials/aider-tutorial/03-multi-file.md
index 624124a0..aa117918 100644
--- a/tutorials/aider-tutorial/03-multi-file.md
+++ b/tutorials/aider-tutorial/03-multi-file.md
@@ -6,6 +6,7 @@ has_children: false
parent: Aider Tutorial
---
+
# Chapter 3: Multi-File Projects
Welcome to **Chapter 3: Multi-File Projects**. In this part of **Aider Tutorial: AI Pair Programming in Your Terminal**, you will build an intuitive mental model first, then move into concrete implementation details and practical production tradeoffs.
@@ -578,149 +579,8 @@ Now that you can work across multiple files, let's explore **Git integration** a
## Depth Expansion Playbook
-
-
-This chapter is expanded to v1-style depth for production-grade learning and implementation quality.
-
-### Strategic Context
-
-- tutorial: **Aider Tutorial: AI Pair Programming in Your Terminal**
-- tutorial slug: **aider-tutorial**
-- chapter focus: **Chapter 3: Multi-File Projects**
-- system context: **Aider Tutorial**
-- objective: move from surface-level usage to repeatable engineering operation
-
-### Architecture Decomposition
-
-1. Define the runtime boundary for `Chapter 3: Multi-File Projects`.
-2. Separate control-plane decisions from data-plane execution.
-3. Capture input contracts, transformation points, and output contracts.
-4. Trace state transitions across request lifecycle stages.
-5. Identify extension hooks and policy interception points.
-6. Map ownership boundaries for team and automation workflows.
-7. Specify rollback and recovery paths for unsafe changes.
-8. Track observability signals for correctness, latency, and cost.
-
-### Operator Decision Matrix
-
-| Decision Area | Low-Risk Path | High-Control Path | Tradeoff |
-|:--------------|:--------------|:------------------|:---------|
-| Runtime mode | managed defaults | explicit policy config | speed vs control |
-| State handling | local ephemeral | durable persisted state | simplicity vs auditability |
-| Tool integration | direct API use | mediated adapter layer | velocity vs governance |
-| Rollout method | manual change | staged + canary rollout | effort vs safety |
-| Incident response | best effort logs | runbooks + SLO alerts | cost vs reliability |
-
-### Failure Modes and Countermeasures
-
-| Failure Mode | Early Signal | Root Cause Pattern | Countermeasure |
-|:-------------|:-------------|:-------------------|:---------------|
-| stale context | inconsistent outputs | missing refresh window | enforce context TTL and refresh hooks |
-| policy drift | unexpected execution | ad hoc overrides | centralize policy profiles |
-| auth mismatch | 401/403 bursts | credential sprawl | rotation schedule + scope minimization |
-| schema breakage | parser/validation errors | unmanaged upstream changes | contract tests per release |
-| retry storms | queue congestion | no backoff controls | jittered backoff + circuit breakers |
-| silent regressions | quality drop without alerts | weak baseline metrics | eval harness with thresholds |
-
-### Implementation Runbook
-
-1. Establish a reproducible baseline environment.
-2. Capture chapter-specific success criteria before changes.
-3. Implement minimal viable path with explicit interfaces.
-4. Add observability before expanding feature scope.
-5. Run deterministic tests for happy-path behavior.
-6. Inject failure scenarios for negative-path validation.
-7. Compare output quality against baseline snapshots.
-8. Promote through staged environments with rollback gates.
-9. Record operational lessons in release notes.
-
-### Quality Gate Checklist
-
-- [ ] chapter-level assumptions are explicit and testable
-- [ ] API/tool boundaries are documented with input/output examples
-- [ ] failure handling includes retry, timeout, and fallback policy
-- [ ] security controls include auth scopes and secret rotation plans
-- [ ] observability includes logs, metrics, traces, and alert thresholds
-- [ ] deployment guidance includes canary and rollback paths
-- [ ] docs include links to upstream sources and related tracks
-- [ ] post-release verification confirms expected behavior under load
-
-### Source Alignment
-
-- [Aider Repository](https://github.com/Aider-AI/aider)
-- [Aider Releases](https://github.com/Aider-AI/aider/releases)
-- [Aider Docs](https://aider.chat/)
-
-### Cross-Tutorial Connection Map
-
-- [Cline Tutorial](../cline-tutorial/)
-- [Roo Code Tutorial](../roo-code-tutorial/)
-- [Continue Tutorial](../continue-tutorial/)
-- [Codex Analysis Platform](../codex-analysis-tutorial/)
-- [Chapter 1: Getting Started](01-getting-started.md)
-
-### Advanced Practice Exercises
-
-1. Build a minimal end-to-end implementation for `Chapter 3: Multi-File Projects`.
-2. Add instrumentation and measure baseline latency and error rate.
-3. Introduce one controlled failure and confirm graceful recovery.
-4. Add policy constraints and verify they are enforced consistently.
-5. Run a staged rollout and document rollback decision criteria.
-
-### Review Questions
-
-1. Which execution boundary matters most for this chapter and why?
-2. What signal detects regressions earliest in your environment?
-3. What tradeoff did you make between delivery speed and governance?
-4. How would you recover from the highest-impact failure mode?
-5. What must be automated before scaling to team-wide adoption?
-
-## What Problem Does This Solve?
-
-Most teams struggle here because the hard part is not writing more code, but deciding clear boundaries for `Aider`, `user`, `Request` so behavior stays predictable as complexity grows.
-
-In practical terms, this chapter helps you avoid three common failures:
-
-- coupling core logic too tightly to one implementation path
-- missing the handoff boundaries between setup, execution, and validation
-- shipping changes without clear rollback or observability strategy
-
-After working through this chapter, you should be able to reason about `Chapter 3: Multi-File Projects` as an operating subsystem inside **Aider Tutorial: AI Pair Programming in Your Terminal**, with explicit contracts for inputs, state transitions, and outputs.
-
-Use the implementation notes around `models`, `username`, `self` as your checklist when adapting these patterns to your own repository.
-
-## How it Works Under the Hood
-
-Under the hood, `Chapter 3: Multi-File Projects` usually follows a repeatable control path:
-
-1. **Context bootstrap**: initialize runtime config and prerequisites for `Aider`.
-2. **Input normalization**: shape incoming data so `user` receives stable contracts.
-3. **Core execution**: run the main logic branch and propagate intermediate state through `Request`.
-4. **Policy and safety checks**: enforce limits, auth scopes, and failure boundaries.
-5. **Output composition**: return canonical result payloads for downstream consumers.
-6. **Operational telemetry**: emit logs/metrics needed for debugging and performance tuning.
-
-When debugging, walk this sequence in order and confirm each stage has explicit success/failure conditions.
-
-## Source Walkthrough
-
-Use the following upstream sources to verify implementation details while reading this chapter:
-
-- [Aider Repository](https://github.com/Aider-AI/aider)
- Why it matters: authoritative reference on `Aider Repository` (github.com).
-- [Aider Releases](https://github.com/Aider-AI/aider/releases)
- Why it matters: authoritative reference on `Aider Releases` (github.com).
-- [Aider Docs](https://aider.chat/)
- Why it matters: authoritative reference on `Aider Docs` (aider.chat).
-
-Suggested trace strategy:
-- search upstream code for `Aider` and `user` to map concrete implementation paths
-- compare docs claims against actual runtime/config code before reusing patterns in production
+## Source Code Walkthrough
-## Chapter Connections
+### `aider/coders/base_coder.py`
-- [Tutorial Index](README.md)
-- [Previous Chapter: Chapter 2: Basic Editing Operations](02-basic-editing.md)
-- [Next Chapter: Chapter 4: Git Integration](04-git.md)
-- [Main Catalog](../../README.md#-tutorial-catalog)
-- [A-Z Tutorial Directory](../../discoverability/tutorial-directory.md)
+Multi-file context management is handled in [`aider/coders/base_coder.py`](https://github.com/Aider-AI/aider/blob/HEAD/aider/coders/base_coder.py). The `add_rel_fname`, `drop_rel_fname`, and `get_files_content` methods show how Aider tracks which files are in the active context and assembles their content into the LLM prompt. This is the core of the multi-file workflow described in Chapter 3.
\ No newline at end of file
diff --git a/tutorials/aider-tutorial/04-git.md b/tutorials/aider-tutorial/04-git.md
index 08fefbfb..4d568a3c 100644
--- a/tutorials/aider-tutorial/04-git.md
+++ b/tutorials/aider-tutorial/04-git.md
@@ -6,6 +6,7 @@ has_children: false
parent: Aider Tutorial
---
+
# Chapter 4: Git Integration
Welcome to **Chapter 4: Git Integration**. In this part of **Aider Tutorial: AI Pair Programming in Your Terminal**, you will build an intuitive mental model first, then move into concrete implementation details and practical production tradeoffs.
@@ -471,173 +472,8 @@ Now that you understand Git integration, let's explore **advanced prompting tech
## Depth Expansion Playbook
-
-
-This chapter is expanded to v1-style depth for production-grade learning and implementation quality.
-
-### Strategic Context
-
-- tutorial: **Aider Tutorial: AI Pair Programming in Your Terminal**
-- tutorial slug: **aider-tutorial**
-- chapter focus: **Chapter 4: Git Integration**
-- system context: **Aider Tutorial**
-- objective: move from surface-level usage to repeatable engineering operation
-
-### Architecture Decomposition
-
-1. Define the runtime boundary for `Chapter 4: Git Integration`.
-2. Separate control-plane decisions from data-plane execution.
-3. Capture input contracts, transformation points, and output contracts.
-4. Trace state transitions across request lifecycle stages.
-5. Identify extension hooks and policy interception points.
-6. Map ownership boundaries for team and automation workflows.
-7. Specify rollback and recovery paths for unsafe changes.
-8. Track observability signals for correctness, latency, and cost.
-
-### Operator Decision Matrix
-
-| Decision Area | Low-Risk Path | High-Control Path | Tradeoff |
-|:--------------|:--------------|:------------------|:---------|
-| Runtime mode | managed defaults | explicit policy config | speed vs control |
-| State handling | local ephemeral | durable persisted state | simplicity vs auditability |
-| Tool integration | direct API use | mediated adapter layer | velocity vs governance |
-| Rollout method | manual change | staged + canary rollout | effort vs safety |
-| Incident response | best effort logs | runbooks + SLO alerts | cost vs reliability |
-
-### Failure Modes and Countermeasures
-
-| Failure Mode | Early Signal | Root Cause Pattern | Countermeasure |
-|:-------------|:-------------|:-------------------|:---------------|
-| stale context | inconsistent outputs | missing refresh window | enforce context TTL and refresh hooks |
-| policy drift | unexpected execution | ad hoc overrides | centralize policy profiles |
-| auth mismatch | 401/403 bursts | credential sprawl | rotation schedule + scope minimization |
-| schema breakage | parser/validation errors | unmanaged upstream changes | contract tests per release |
-| retry storms | queue congestion | no backoff controls | jittered backoff + circuit breakers |
-| silent regressions | quality drop without alerts | weak baseline metrics | eval harness with thresholds |
-
-### Implementation Runbook
-
-1. Establish a reproducible baseline environment.
-2. Capture chapter-specific success criteria before changes.
-3. Implement minimal viable path with explicit interfaces.
-4. Add observability before expanding feature scope.
-5. Run deterministic tests for happy-path behavior.
-6. Inject failure scenarios for negative-path validation.
-7. Compare output quality against baseline snapshots.
-8. Promote through staged environments with rollback gates.
-9. Record operational lessons in release notes.
-
-### Quality Gate Checklist
-
-- [ ] chapter-level assumptions are explicit and testable
-- [ ] API/tool boundaries are documented with input/output examples
-- [ ] failure handling includes retry, timeout, and fallback policy
-- [ ] security controls include auth scopes and secret rotation plans
-- [ ] observability includes logs, metrics, traces, and alert thresholds
-- [ ] deployment guidance includes canary and rollback paths
-- [ ] docs include links to upstream sources and related tracks
-- [ ] post-release verification confirms expected behavior under load
-
-### Source Alignment
-
-- [Aider Repository](https://github.com/Aider-AI/aider)
-- [Aider Releases](https://github.com/Aider-AI/aider/releases)
-- [Aider Docs](https://aider.chat/)
-
-### Cross-Tutorial Connection Map
-
-- [Cline Tutorial](../cline-tutorial/)
-- [Roo Code Tutorial](../roo-code-tutorial/)
-- [Continue Tutorial](../continue-tutorial/)
-- [Codex Analysis Platform](../codex-analysis-tutorial/)
-- [Chapter 1: Getting Started](01-getting-started.md)
-
-### Advanced Practice Exercises
-
-1. Build a minimal end-to-end implementation for `Chapter 4: Git Integration`.
-2. Add instrumentation and measure baseline latency and error rate.
-3. Introduce one controlled failure and confirm graceful recovery.
-4. Add policy constraints and verify they are enforced consistently.
-5. Run a staged rollout and document rollback decision criteria.
-
-### Review Questions
-
-1. Which execution boundary matters most for this chapter and why?
-2. What signal detects regressions earliest in your environment?
-3. What tradeoff did you make between delivery speed and governance?
-4. How would you recover from the highest-impact failure mode?
-5. What must be automated before scaling to team-wide adoption?
-
-### Scenario Playbook 1: Chapter 4: Git Integration
-
-- tutorial context: **Aider Tutorial: AI Pair Programming in Your Terminal**
-- trigger condition: incoming request volume spikes after release
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: introduce adaptive concurrency limits and queue bounds
-- verification target: latency p95 and p99 stay within defined SLO windows
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 2: Chapter 4: Git Integration
-
-- tutorial context: **Aider Tutorial: AI Pair Programming in Your Terminal**
-- trigger condition: tool dependency latency increases under concurrency
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: enable staged retries with jitter and circuit breaker fallback
-- verification target: error budget burn rate remains below escalation threshold
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-## What Problem Does This Solve?
-
-Most teams struggle here because the hard part is not writing more code, but deciding clear boundaries for `auto`, `commits`, `Aider` so behavior stays predictable as complexity grows.
-
-In practical terms, this chapter helps you avoid three common failures:
-
-- coupling core logic too tightly to one implementation path
-- missing the handoff boundaries between setup, execution, and validation
-- shipping changes without clear rollback or observability strategy
-
-After working through this chapter, you should be able to reason about `Chapter 4: Git Integration` as an operating subsystem inside **Aider Tutorial: AI Pair Programming in Your Terminal**, with explicit contracts for inputs, state transitions, and outputs.
-
-Use the implementation notes around `Commit`, `aider`, `feat` as your checklist when adapting these patterns to your own repository.
-
-## How it Works Under the Hood
-
-Under the hood, `Chapter 4: Git Integration` usually follows a repeatable control path:
-
-1. **Context bootstrap**: initialize runtime config and prerequisites for `auto`.
-2. **Input normalization**: shape incoming data so `commits` receives stable contracts.
-3. **Core execution**: run the main logic branch and propagate intermediate state through `Aider`.
-4. **Policy and safety checks**: enforce limits, auth scopes, and failure boundaries.
-5. **Output composition**: return canonical result payloads for downstream consumers.
-6. **Operational telemetry**: emit logs/metrics needed for debugging and performance tuning.
-
-When debugging, walk this sequence in order and confirm each stage has explicit success/failure conditions.
-
-## Source Walkthrough
-
-Use the following upstream sources to verify implementation details while reading this chapter:
-
-- [Aider Repository](https://github.com/Aider-AI/aider)
- Why it matters: authoritative reference on `Aider Repository` (github.com).
-- [Aider Releases](https://github.com/Aider-AI/aider/releases)
- Why it matters: authoritative reference on `Aider Releases` (github.com).
-- [Aider Docs](https://aider.chat/)
- Why it matters: authoritative reference on `Aider Docs` (aider.chat).
-
-Suggested trace strategy:
-- search upstream code for `auto` and `commits` to map concrete implementation paths
-- compare docs claims against actual runtime/config code before reusing patterns in production
+## Source Code Walkthrough
-## Chapter Connections
+### `aider/repo.py`
-- [Tutorial Index](README.md)
-- [Previous Chapter: Chapter 3: Multi-File Projects](03-multi-file.md)
-- [Next Chapter: Chapter 5: Advanced Prompting](05-prompting.md)
-- [Main Catalog](../../README.md#-tutorial-catalog)
-- [A-Z Tutorial Directory](../../discoverability/tutorial-directory.md)
+Git integration lives in [`aider/repo.py`](https://github.com/Aider-AI/aider/blob/HEAD/aider/repo.py). The `GitRepo` class wraps GitPython and handles auto-commits, dirty file detection, and commit message generation. The `commit` method shows exactly how Aider creates commits after applying edits — including how it formats commit messages and which files are staged — which is the core of Chapter 4's git workflow coverage.
\ No newline at end of file
diff --git a/tutorials/aider-tutorial/05-prompting.md b/tutorials/aider-tutorial/05-prompting.md
index 55f92671..5383a851 100644
--- a/tutorials/aider-tutorial/05-prompting.md
+++ b/tutorials/aider-tutorial/05-prompting.md
@@ -6,6 +6,7 @@ has_children: false
parent: Aider Tutorial
---
+
# Chapter 5: Advanced Prompting
Welcome to **Chapter 5: Advanced Prompting**. In this part of **Aider Tutorial: AI Pair Programming in Your Terminal**, you will build an intuitive mental model first, then move into concrete implementation details and practical production tradeoffs.
@@ -425,209 +426,8 @@ Now that you can prompt effectively, let's explore **model configuration** and h
## Depth Expansion Playbook
-
-
-This chapter is expanded to v1-style depth for production-grade learning and implementation quality.
-
-### Strategic Context
-
-- tutorial: **Aider Tutorial: AI Pair Programming in Your Terminal**
-- tutorial slug: **aider-tutorial**
-- chapter focus: **Chapter 5: Advanced Prompting**
-- system context: **Aider Tutorial**
-- objective: move from surface-level usage to repeatable engineering operation
-
-### Architecture Decomposition
-
-1. Define the runtime boundary for `Chapter 5: Advanced Prompting`.
-2. Separate control-plane decisions from data-plane execution.
-3. Capture input contracts, transformation points, and output contracts.
-4. Trace state transitions across request lifecycle stages.
-5. Identify extension hooks and policy interception points.
-6. Map ownership boundaries for team and automation workflows.
-7. Specify rollback and recovery paths for unsafe changes.
-8. Track observability signals for correctness, latency, and cost.
-
-### Operator Decision Matrix
-
-| Decision Area | Low-Risk Path | High-Control Path | Tradeoff |
-|:--------------|:--------------|:------------------|:---------|
-| Runtime mode | managed defaults | explicit policy config | speed vs control |
-| State handling | local ephemeral | durable persisted state | simplicity vs auditability |
-| Tool integration | direct API use | mediated adapter layer | velocity vs governance |
-| Rollout method | manual change | staged + canary rollout | effort vs safety |
-| Incident response | best effort logs | runbooks + SLO alerts | cost vs reliability |
-
-### Failure Modes and Countermeasures
-
-| Failure Mode | Early Signal | Root Cause Pattern | Countermeasure |
-|:-------------|:-------------|:-------------------|:---------------|
-| stale context | inconsistent outputs | missing refresh window | enforce context TTL and refresh hooks |
-| policy drift | unexpected execution | ad hoc overrides | centralize policy profiles |
-| auth mismatch | 401/403 bursts | credential sprawl | rotation schedule + scope minimization |
-| schema breakage | parser/validation errors | unmanaged upstream changes | contract tests per release |
-| retry storms | queue congestion | no backoff controls | jittered backoff + circuit breakers |
-| silent regressions | quality drop without alerts | weak baseline metrics | eval harness with thresholds |
-
-### Implementation Runbook
-
-1. Establish a reproducible baseline environment.
-2. Capture chapter-specific success criteria before changes.
-3. Implement minimal viable path with explicit interfaces.
-4. Add observability before expanding feature scope.
-5. Run deterministic tests for happy-path behavior.
-6. Inject failure scenarios for negative-path validation.
-7. Compare output quality against baseline snapshots.
-8. Promote through staged environments with rollback gates.
-9. Record operational lessons in release notes.
-
-### Quality Gate Checklist
-
-- [ ] chapter-level assumptions are explicit and testable
-- [ ] API/tool boundaries are documented with input/output examples
-- [ ] failure handling includes retry, timeout, and fallback policy
-- [ ] security controls include auth scopes and secret rotation plans
-- [ ] observability includes logs, metrics, traces, and alert thresholds
-- [ ] deployment guidance includes canary and rollback paths
-- [ ] docs include links to upstream sources and related tracks
-- [ ] post-release verification confirms expected behavior under load
-
-### Source Alignment
-
-- [Aider Repository](https://github.com/Aider-AI/aider)
-- [Aider Releases](https://github.com/Aider-AI/aider/releases)
-- [Aider Docs](https://aider.chat/)
-
-### Cross-Tutorial Connection Map
-
-- [Cline Tutorial](../cline-tutorial/)
-- [Roo Code Tutorial](../roo-code-tutorial/)
-- [Continue Tutorial](../continue-tutorial/)
-- [Codex Analysis Platform](../codex-analysis-tutorial/)
-- [Chapter 1: Getting Started](01-getting-started.md)
-
-### Advanced Practice Exercises
-
-1. Build a minimal end-to-end implementation for `Chapter 5: Advanced Prompting`.
-2. Add instrumentation and measure baseline latency and error rate.
-3. Introduce one controlled failure and confirm graceful recovery.
-4. Add policy constraints and verify they are enforced consistently.
-5. Run a staged rollout and document rollback decision criteria.
-
-### Review Questions
-
-1. Which execution boundary matters most for this chapter and why?
-2. What signal detects regressions earliest in your environment?
-3. What tradeoff did you make between delivery speed and governance?
-4. How would you recover from the highest-impact failure mode?
-5. What must be automated before scaling to team-wide adoption?
-
-### Scenario Playbook 1: Chapter 5: Advanced Prompting
-
-- tutorial context: **Aider Tutorial: AI Pair Programming in Your Terminal**
-- trigger condition: incoming request volume spikes after release
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: introduce adaptive concurrency limits and queue bounds
-- verification target: latency p95 and p99 stay within defined SLO windows
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 2: Chapter 5: Advanced Prompting
-
-- tutorial context: **Aider Tutorial: AI Pair Programming in Your Terminal**
-- trigger condition: tool dependency latency increases under concurrency
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: enable staged retries with jitter and circuit breaker fallback
-- verification target: error budget burn rate remains below escalation threshold
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 3: Chapter 5: Advanced Prompting
-
-- tutorial context: **Aider Tutorial: AI Pair Programming in Your Terminal**
-- trigger condition: schema updates introduce incompatible payloads
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: pin schema versions and add compatibility shims
-- verification target: throughput remains stable under target concurrency
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 4: Chapter 5: Advanced Prompting
-
-- tutorial context: **Aider Tutorial: AI Pair Programming in Your Terminal**
-- trigger condition: environment parity drifts between staging and production
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: restore environment parity via immutable config promotion
-- verification target: retry volume stays bounded without feedback loops
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 5: Chapter 5: Advanced Prompting
-
-- tutorial context: **Aider Tutorial: AI Pair Programming in Your Terminal**
-- trigger condition: access policy changes reduce successful execution rates
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: re-scope credentials and rotate leaked or stale keys
-- verification target: data integrity checks pass across write/read cycles
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-## What Problem Does This Solve?
-
-Most teams struggle here because the hard part is not writing more code, but deciding clear boundaries for `Create`, `user`, `error` so behavior stays predictable as complexity grows.
-
-In practical terms, this chapter helps you avoid three common failures:
-
-- coupling core logic too tightly to one implementation path
-- missing the handoff boundaries between setup, execution, and validation
-- shipping changes without clear rollback or observability strategy
-
-After working through this chapter, you should be able to reason about `Chapter 5: Advanced Prompting` as an operating subsystem inside **Aider Tutorial: AI Pair Programming in Your Terminal**, with explicit contracts for inputs, state transitions, and outputs.
-
-Use the implementation notes around `email`, `model`, `using` as your checklist when adapting these patterns to your own repository.
-
-## How it Works Under the Hood
-
-Under the hood, `Chapter 5: Advanced Prompting` usually follows a repeatable control path:
-
-1. **Context bootstrap**: initialize runtime config and prerequisites for `Create`.
-2. **Input normalization**: shape incoming data so `user` receives stable contracts.
-3. **Core execution**: run the main logic branch and propagate intermediate state through `error`.
-4. **Policy and safety checks**: enforce limits, auth scopes, and failure boundaries.
-5. **Output composition**: return canonical result payloads for downstream consumers.
-6. **Operational telemetry**: emit logs/metrics needed for debugging and performance tuning.
-
-When debugging, walk this sequence in order and confirm each stage has explicit success/failure conditions.
-
-## Source Walkthrough
-
-Use the following upstream sources to verify implementation details while reading this chapter:
-
-- [Aider Repository](https://github.com/Aider-AI/aider)
- Why it matters: authoritative reference on `Aider Repository` (github.com).
-- [Aider Releases](https://github.com/Aider-AI/aider/releases)
- Why it matters: authoritative reference on `Aider Releases` (github.com).
-- [Aider Docs](https://aider.chat/)
- Why it matters: authoritative reference on `Aider Docs` (aider.chat).
-
-Suggested trace strategy:
-- search upstream code for `Create` and `user` to map concrete implementation paths
-- compare docs claims against actual runtime/config code before reusing patterns in production
-
-## Chapter Connections
-
-- [Tutorial Index](README.md)
-- [Previous Chapter: Chapter 4: Git Integration](04-git.md)
-- [Next Chapter: Chapter 6: Model Configuration](06-models.md)
-- [Main Catalog](../../README.md#-tutorial-catalog)
-- [A-Z Tutorial Directory](../../discoverability/tutorial-directory.md)
+## Source Code Walkthrough
+
+### `aider/prompts.py`
+
+Prompting strategy is defined in [`aider/prompts.py`](https://github.com/Aider-AI/aider/blob/HEAD/aider/prompts.py). This file contains the system prompt templates that Aider sends to the LLM, including the instructions for how the model should format code edits. Understanding these prompts is essential for Chapter 5 — they define what kinds of user instructions produce reliable edits versus ambiguous ones.
\ No newline at end of file
diff --git a/tutorials/aider-tutorial/06-models.md b/tutorials/aider-tutorial/06-models.md
index 0c4e697f..c4367ba5 100644
--- a/tutorials/aider-tutorial/06-models.md
+++ b/tutorials/aider-tutorial/06-models.md
@@ -6,6 +6,7 @@ has_children: false
parent: Aider Tutorial
---
+
# Chapter 6: Model Configuration
Welcome to **Chapter 6: Model Configuration**. In this part of **Aider Tutorial: AI Pair Programming in Your Terminal**, you will build an intuitive mental model first, then move into concrete implementation details and practical production tradeoffs.
@@ -474,161 +475,8 @@ Now that you can configure models effectively, let's explore **voice workflows**
## Depth Expansion Playbook
-
-
-This chapter is expanded to v1-style depth for production-grade learning and implementation quality.
-
-### Strategic Context
-
-- tutorial: **Aider Tutorial: AI Pair Programming in Your Terminal**
-- tutorial slug: **aider-tutorial**
-- chapter focus: **Chapter 6: Model Configuration**
-- system context: **Aider Tutorial**
-- objective: move from surface-level usage to repeatable engineering operation
-
-### Architecture Decomposition
-
-1. Define the runtime boundary for `Chapter 6: Model Configuration`.
-2. Separate control-plane decisions from data-plane execution.
-3. Capture input contracts, transformation points, and output contracts.
-4. Trace state transitions across request lifecycle stages.
-5. Identify extension hooks and policy interception points.
-6. Map ownership boundaries for team and automation workflows.
-7. Specify rollback and recovery paths for unsafe changes.
-8. Track observability signals for correctness, latency, and cost.
-
-### Operator Decision Matrix
-
-| Decision Area | Low-Risk Path | High-Control Path | Tradeoff |
-|:--------------|:--------------|:------------------|:---------|
-| Runtime mode | managed defaults | explicit policy config | speed vs control |
-| State handling | local ephemeral | durable persisted state | simplicity vs auditability |
-| Tool integration | direct API use | mediated adapter layer | velocity vs governance |
-| Rollout method | manual change | staged + canary rollout | effort vs safety |
-| Incident response | best effort logs | runbooks + SLO alerts | cost vs reliability |
-
-### Failure Modes and Countermeasures
-
-| Failure Mode | Early Signal | Root Cause Pattern | Countermeasure |
-|:-------------|:-------------|:-------------------|:---------------|
-| stale context | inconsistent outputs | missing refresh window | enforce context TTL and refresh hooks |
-| policy drift | unexpected execution | ad hoc overrides | centralize policy profiles |
-| auth mismatch | 401/403 bursts | credential sprawl | rotation schedule + scope minimization |
-| schema breakage | parser/validation errors | unmanaged upstream changes | contract tests per release |
-| retry storms | queue congestion | no backoff controls | jittered backoff + circuit breakers |
-| silent regressions | quality drop without alerts | weak baseline metrics | eval harness with thresholds |
-
-### Implementation Runbook
-
-1. Establish a reproducible baseline environment.
-2. Capture chapter-specific success criteria before changes.
-3. Implement minimal viable path with explicit interfaces.
-4. Add observability before expanding feature scope.
-5. Run deterministic tests for happy-path behavior.
-6. Inject failure scenarios for negative-path validation.
-7. Compare output quality against baseline snapshots.
-8. Promote through staged environments with rollback gates.
-9. Record operational lessons in release notes.
-
-### Quality Gate Checklist
-
-- [ ] chapter-level assumptions are explicit and testable
-- [ ] API/tool boundaries are documented with input/output examples
-- [ ] failure handling includes retry, timeout, and fallback policy
-- [ ] security controls include auth scopes and secret rotation plans
-- [ ] observability includes logs, metrics, traces, and alert thresholds
-- [ ] deployment guidance includes canary and rollback paths
-- [ ] docs include links to upstream sources and related tracks
-- [ ] post-release verification confirms expected behavior under load
-
-### Source Alignment
-
-- [Aider Repository](https://github.com/Aider-AI/aider)
-- [Aider Releases](https://github.com/Aider-AI/aider/releases)
-- [Aider Docs](https://aider.chat/)
-
-### Cross-Tutorial Connection Map
-
-- [Cline Tutorial](../cline-tutorial/)
-- [Roo Code Tutorial](../roo-code-tutorial/)
-- [Continue Tutorial](../continue-tutorial/)
-- [Codex Analysis Platform](../codex-analysis-tutorial/)
-- [Chapter 1: Getting Started](01-getting-started.md)
-
-### Advanced Practice Exercises
-
-1. Build a minimal end-to-end implementation for `Chapter 6: Model Configuration`.
-2. Add instrumentation and measure baseline latency and error rate.
-3. Introduce one controlled failure and confirm graceful recovery.
-4. Add policy constraints and verify they are enforced consistently.
-5. Run a staged rollout and document rollback decision criteria.
-
-### Review Questions
-
-1. Which execution boundary matters most for this chapter and why?
-2. What signal detects regressions earliest in your environment?
-3. What tradeoff did you make between delivery speed and governance?
-4. How would you recover from the highest-impact failure mode?
-5. What must be automated before scaling to team-wide adoption?
-
-### Scenario Playbook 1: Chapter 6: Model Configuration
-
-- tutorial context: **Aider Tutorial: AI Pair Programming in Your Terminal**
-- trigger condition: incoming request volume spikes after release
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: introduce adaptive concurrency limits and queue bounds
-- verification target: latency p95 and p99 stay within defined SLO windows
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-## What Problem Does This Solve?
-
-Most teams struggle here because the hard part is not writing more code, but deciding clear boundaries for `model`, `aider`, `claude` so behavior stays predictable as complexity grows.
-
-In practical terms, this chapter helps you avoid three common failures:
-
-- coupling core logic too tightly to one implementation path
-- missing the handoff boundaries between setup, execution, and validation
-- shipping changes without clear rollback or observability strategy
-
-After working through this chapter, you should be able to reason about `Chapter 6: Model Configuration` as an operating subsystem inside **Aider Tutorial: AI Pair Programming in Your Terminal**, with explicit contracts for inputs, state transitions, and outputs.
-
-Use the implementation notes around `models`, `mini`, `Claude` as your checklist when adapting these patterns to your own repository.
-
-## How it Works Under the Hood
-
-Under the hood, `Chapter 6: Model Configuration` usually follows a repeatable control path:
-
-1. **Context bootstrap**: initialize runtime config and prerequisites for `model`.
-2. **Input normalization**: shape incoming data so `aider` receives stable contracts.
-3. **Core execution**: run the main logic branch and propagate intermediate state through `claude`.
-4. **Policy and safety checks**: enforce limits, auth scopes, and failure boundaries.
-5. **Output composition**: return canonical result payloads for downstream consumers.
-6. **Operational telemetry**: emit logs/metrics needed for debugging and performance tuning.
-
-When debugging, walk this sequence in order and confirm each stage has explicit success/failure conditions.
-
-## Source Walkthrough
-
-Use the following upstream sources to verify implementation details while reading this chapter:
-
-- [Aider Repository](https://github.com/Aider-AI/aider)
- Why it matters: authoritative reference on `Aider Repository` (github.com).
-- [Aider Releases](https://github.com/Aider-AI/aider/releases)
- Why it matters: authoritative reference on `Aider Releases` (github.com).
-- [Aider Docs](https://aider.chat/)
- Why it matters: authoritative reference on `Aider Docs` (aider.chat).
-
-Suggested trace strategy:
-- search upstream code for `model` and `aider` to map concrete implementation paths
-- compare docs claims against actual runtime/config code before reusing patterns in production
+## Source Code Walkthrough
-## Chapter Connections
+### `aider/models.py`
-- [Tutorial Index](README.md)
-- [Previous Chapter: Chapter 5: Advanced Prompting](05-prompting.md)
-- [Next Chapter: Chapter 7: Voice & Workflows](07-workflows.md)
-- [Main Catalog](../../README.md#-tutorial-catalog)
-- [A-Z Tutorial Directory](../../discoverability/tutorial-directory.md)
+Model configuration and capability detection are in [`aider/models.py`](https://github.com/Aider-AI/aider/blob/HEAD/aider/models.py). The `Model` class stores per-model settings (context window, edit format support, cost estimates) and is the canonical reference for Chapter 6's coverage of model selection. The `get_model` factory function shows how Aider resolves model names and applies default configurations.
\ No newline at end of file
diff --git a/tutorials/aider-tutorial/08-best-practices.md b/tutorials/aider-tutorial/08-best-practices.md
index d2cd578e..07ba87ab 100644
--- a/tutorials/aider-tutorial/08-best-practices.md
+++ b/tutorials/aider-tutorial/08-best-practices.md
@@ -6,6 +6,7 @@ has_children: false
parent: Aider Tutorial
---
+
# Chapter 8: Best Practices
Welcome to **Chapter 8: Best Practices**. In this part of **Aider Tutorial: AI Pair Programming in Your Terminal**, you will build an intuitive mental model first, then move into concrete implementation details and practical production tradeoffs.
@@ -519,148 +520,8 @@ With these principles, Aider becomes an invaluable partner in your development j
## Depth Expansion Playbook
-
-
-This chapter is expanded to v1-style depth for production-grade learning and implementation quality.
-
-### Strategic Context
-
-- tutorial: **Aider Tutorial: AI Pair Programming in Your Terminal**
-- tutorial slug: **aider-tutorial**
-- chapter focus: **Chapter 8: Best Practices**
-- system context: **Aider Tutorial**
-- objective: move from surface-level usage to repeatable engineering operation
-
-### Architecture Decomposition
-
-1. Define the runtime boundary for `Chapter 8: Best Practices`.
-2. Separate control-plane decisions from data-plane execution.
-3. Capture input contracts, transformation points, and output contracts.
-4. Trace state transitions across request lifecycle stages.
-5. Identify extension hooks and policy interception points.
-6. Map ownership boundaries for team and automation workflows.
-7. Specify rollback and recovery paths for unsafe changes.
-8. Track observability signals for correctness, latency, and cost.
-
-### Operator Decision Matrix
-
-| Decision Area | Low-Risk Path | High-Control Path | Tradeoff |
-|:--------------|:--------------|:------------------|:---------|
-| Runtime mode | managed defaults | explicit policy config | speed vs control |
-| State handling | local ephemeral | durable persisted state | simplicity vs auditability |
-| Tool integration | direct API use | mediated adapter layer | velocity vs governance |
-| Rollout method | manual change | staged + canary rollout | effort vs safety |
-| Incident response | best effort logs | runbooks + SLO alerts | cost vs reliability |
-
-### Failure Modes and Countermeasures
-
-| Failure Mode | Early Signal | Root Cause Pattern | Countermeasure |
-|:-------------|:-------------|:-------------------|:---------------|
-| stale context | inconsistent outputs | missing refresh window | enforce context TTL and refresh hooks |
-| policy drift | unexpected execution | ad hoc overrides | centralize policy profiles |
-| auth mismatch | 401/403 bursts | credential sprawl | rotation schedule + scope minimization |
-| schema breakage | parser/validation errors | unmanaged upstream changes | contract tests per release |
-| retry storms | queue congestion | no backoff controls | jittered backoff + circuit breakers |
-| silent regressions | quality drop without alerts | weak baseline metrics | eval harness with thresholds |
-
-### Implementation Runbook
-
-1. Establish a reproducible baseline environment.
-2. Capture chapter-specific success criteria before changes.
-3. Implement minimal viable path with explicit interfaces.
-4. Add observability before expanding feature scope.
-5. Run deterministic tests for happy-path behavior.
-6. Inject failure scenarios for negative-path validation.
-7. Compare output quality against baseline snapshots.
-8. Promote through staged environments with rollback gates.
-9. Record operational lessons in release notes.
-
-### Quality Gate Checklist
-
-- [ ] chapter-level assumptions are explicit and testable
-- [ ] API/tool boundaries are documented with input/output examples
-- [ ] failure handling includes retry, timeout, and fallback policy
-- [ ] security controls include auth scopes and secret rotation plans
-- [ ] observability includes logs, metrics, traces, and alert thresholds
-- [ ] deployment guidance includes canary and rollback paths
-- [ ] docs include links to upstream sources and related tracks
-- [ ] post-release verification confirms expected behavior under load
-
-### Source Alignment
-
-- [Aider Repository](https://github.com/Aider-AI/aider)
-- [Aider Releases](https://github.com/Aider-AI/aider/releases)
-- [Aider Docs](https://aider.chat/)
-
-### Cross-Tutorial Connection Map
-
-- [Cline Tutorial](../cline-tutorial/)
-- [Roo Code Tutorial](../roo-code-tutorial/)
-- [Continue Tutorial](../continue-tutorial/)
-- [Codex Analysis Platform](../codex-analysis-tutorial/)
-- [Chapter 1: Getting Started](01-getting-started.md)
-
-### Advanced Practice Exercises
-
-1. Build a minimal end-to-end implementation for `Chapter 8: Best Practices`.
-2. Add instrumentation and measure baseline latency and error rate.
-3. Introduce one controlled failure and confirm graceful recovery.
-4. Add policy constraints and verify they are enforced consistently.
-5. Run a staged rollout and document rollback decision criteria.
-
-### Review Questions
-
-1. Which execution boundary matters most for this chapter and why?
-2. What signal detects regressions earliest in your environment?
-3. What tradeoff did you make between delivery speed and governance?
-4. How would you recover from the highest-impact failure mode?
-5. What must be automated before scaling to team-wide adoption?
-
-## What Problem Does This Solve?
-
-Most teams struggle here because the hard part is not writing more code, but deciding clear boundaries for `model`, `user`, `code` so behavior stays predictable as complexity grows.
-
-In practical terms, this chapter helps you avoid three common failures:
-
-- coupling core logic too tightly to one implementation path
-- missing the handoff boundaries between setup, execution, and validation
-- shipping changes without clear rollback or observability strategy
-
-After working through this chapter, you should be able to reason about `Chapter 8: Best Practices` as an operating subsystem inside **Aider Tutorial: AI Pair Programming in Your Terminal**, with explicit contracts for inputs, state transitions, and outputs.
-
-Use the implementation notes around `Create`, `error`, `Implement` as your checklist when adapting these patterns to your own repository.
-
-## How it Works Under the Hood
-
-Under the hood, `Chapter 8: Best Practices` usually follows a repeatable control path:
-
-1. **Context bootstrap**: initialize runtime config and prerequisites for `model`.
-2. **Input normalization**: shape incoming data so `user` receives stable contracts.
-3. **Core execution**: run the main logic branch and propagate intermediate state through `code`.
-4. **Policy and safety checks**: enforce limits, auth scopes, and failure boundaries.
-5. **Output composition**: return canonical result payloads for downstream consumers.
-6. **Operational telemetry**: emit logs/metrics needed for debugging and performance tuning.
-
-When debugging, walk this sequence in order and confirm each stage has explicit success/failure conditions.
-
-## Source Walkthrough
-
-Use the following upstream sources to verify implementation details while reading this chapter:
-
-- [Aider Repository](https://github.com/Aider-AI/aider)
- Why it matters: authoritative reference on `Aider Repository` (github.com).
-- [Aider Releases](https://github.com/Aider-AI/aider/releases)
- Why it matters: authoritative reference on `Aider Releases` (github.com).
-- [Aider Docs](https://aider.chat/)
- Why it matters: authoritative reference on `Aider Docs` (aider.chat).
-
-Suggested trace strategy:
-- search upstream code for `model` and `user` to map concrete implementation paths
-- compare docs claims against actual runtime/config code before reusing patterns in production
+## Source Code Walkthrough
-## Chapter Connections
+### `aider/coders/base_coder.py` and `aider/repo.py`
-- [Tutorial Index](README.md)
-- [Previous Chapter: Chapter 7: Voice & Workflows](07-workflows.md)
-- [Main Catalog](../../README.md#-tutorial-catalog)
-- [A-Z Tutorial Directory](../../discoverability/tutorial-directory.md)
+Best practices emerge from understanding the interaction between the coder and git integration. The `base_coder.py` file shows how context size affects edit quality (context window limits), while `repo.py` shows how auto-commit behavior can be tuned. Together these two files contain the operational levers most relevant to the production best practices described in Chapter 8.
\ No newline at end of file
diff --git a/tutorials/anthropic-skills-tutorial/01-getting-started.md b/tutorials/anthropic-skills-tutorial/01-getting-started.md
index df8f814d..78ef54af 100644
--- a/tutorials/anthropic-skills-tutorial/01-getting-started.md
+++ b/tutorials/anthropic-skills-tutorial/01-getting-started.md
@@ -2,328 +2,194 @@
layout: default
title: "Chapter 1: Getting Started"
nav_order: 1
-parent: Anthropic Skills Tutorial
+parent: Anthropic Quickstarts Tutorial
+format_version: v2
+source_repo: https://github.com/anthropics/anthropic-quickstarts
---
-
# Chapter 1: Getting Started
-Welcome to **Chapter 1: Getting Started**. In this part of **Anthropic Skills Tutorial: Reusable AI Agent Capabilities**, you will build an intuitive mental model first, then move into concrete implementation details and practical production tradeoffs.
-
+## What Problem Does This Solve?
-This chapter gets you from zero to a functioning skill you can iterate on.
+Learning from the Claude API documentation alone leaves a large gap between "I can call `messages.create`" and "I have a working agent that uses tools, manages conversation history, and handles errors gracefully." The `anthropic-quickstarts` repository closes that gap with five runnable reference implementations covering the most important use cases: desktop computer control, autonomous multi-session coding, customer support with knowledge retrieval, conversational financial analysis, and DOM-aware browser automation.
-## Skill Anatomy
+This chapter gives you a mental model for the entire repository and gets you running at least one quickstart in under 15 minutes.
-A minimal skill is one folder plus one file:
+## Repository Structure
```text
-my-first-skill/
- SKILL.md
+anthropic-quickstarts/
+├── CLAUDE.md # Development standards for contributors
+├── pyproject.toml # Python tooling config (ruff, pyright, pytest)
+├── agents/ # Reference agent loop — <300 lines, educational
+│ ├── agent.py
+│ ├── tools/
+│ └── utils/
+├── autonomous-coding/ # Two-agent pattern: initializer + coding agent
+│ ├── autonomous_agent_demo.py
+│ ├── prompts/
+│ └── requirements.txt
+├── browser-use-demo/ # Playwright browser automation + Streamlit UI
+│ ├── browser.py
+│ ├── loop.py
+│ └── streamlit.py
+├── computer-use-demo/ # Full desktop control via screenshot + xdotool
+│ ├── Dockerfile
+│ ├── computer_use_demo/
+│ │ ├── loop.py # Sampling loop (the core agentic logic)
+│ │ ├── streamlit.py # Web UI
+│ │ └── tools/
+│ │ ├── base.py # ToolResult, BaseAnthropicTool
+│ │ ├── bash.py # BashTool with sentinel pattern
+│ │ ├── computer.py # ComputerTool with coordinate scaling
+│ │ └── edit.py # EditTool (str_replace, insert, view)
+│ └── setup.sh
+├── customer-support-agent/ # Next.js + Amazon Bedrock RAG
+│ ├── app/
+│ └── package.json
+└── financial-data-analyst/ # Next.js + file upload + Recharts
+ ├── app/
+ └── package.json
```
-`SKILL.md` has two important parts:
-
-1. **Frontmatter** for identity and routing metadata
-2. **Instruction body** that defines behavior, constraints, and output expectations
-
-## Minimal Valid `SKILL.md`
-
-```markdown
----
-name: incident-summary
-description: Summarize incident notes into a concise operations report
----
-
-When given incident notes:
-1. Produce a timeline of events.
-2. List likely contributing factors.
-3. Propose prioritized action items with owners.
+## Which Quickstart to Run First
+
+| Goal | Start Here |
+|:-----|:-----------|
+| See Claude control a real computer | `computer-use-demo` |
+| Understand the core agentic loop pattern | `agents` |
+| Build a chat app with document retrieval | `customer-support-agent` |
+| Build a data analysis chat app | `financial-data-analyst` |
+| Automate web tasks without pixel coordinates | `browser-use-demo` |
+| See how a complex multi-session agent works | `autonomous-coding` |
+
+## Running the Computer Use Demo (Fastest Path)
+
+The computer use demo is the flagship quickstart. It runs entirely in Docker so you do not need to install display server dependencies.
+
+```bash
+# 1. Clone the repository
+git clone https://github.com/anthropics/anthropic-quickstarts.git
+cd anthropic-quickstarts
+
+# 2. Set your API key
+export ANTHROPIC_API_KEY=sk-ant-...
+
+# 3. Pull and run the prebuilt image
+docker run \
+ -e ANTHROPIC_API_KEY=$ANTHROPIC_API_KEY \
+ -v $HOME/.anthropic:/home/user/.anthropic \
+ -p 8080:8080 \
+ -p 8501:8501 \
+ -p 6080:6080 \
+ -p 5900:5900 \
+ ghcr.io/anthropics/anthropic-quickstarts:computer-use-demo-latest
```
-## First Upgrade: Add Determinism
+Open `http://localhost:8080` in your browser. You will see a Streamlit chat interface on the left and a live VNC view of the Docker desktop on the right.
-Most teams should move immediately from free-form instructions to explicit output contracts.
+For local development with live code changes:
-```markdown
-## Output Contract
-- Return markdown only.
-- Include sections: `Timeline`, `Contributing Factors`, `Actions`.
-- Each action must include `owner`, `due_date`, and `risk_if_missed`.
+```bash
+cd computer-use-demo
+./setup.sh # installs display dependencies
+docker build . -t computer-use-demo:local
+docker run -e ANTHROPIC_API_KEY=$ANTHROPIC_API_KEY \
+ -v $(pwd)/computer_use_demo:/home/user/computer_use_demo \
+ -p 8080:8080 -p 8501:8501 -p 6080:6080 -p 5900:5900 \
+ computer-use-demo:local
```
-This single addition usually reduces variance more than model-level tuning.
-
-## Add Supporting Files
+## Running the Agents Reference Implementation
-As tasks become operational, move from one-file skills to structured packages:
+The `agents/` quickstart requires no Docker. It demonstrates the fundamental tool-use loop in under 300 lines of Python.
-```text
-incident-skill/
- SKILL.md
- templates/
- postmortem.md
- scripts/
- normalize_incident_json.py
- references/
- severity-matrix.md
+```bash
+cd agents
+pip install anthropic mcp
+export ANTHROPIC_API_KEY=sk-ant-...
+python agent.py
```
-Use this rule:
-
-- Put **policy and behavior** in `SKILL.md`
-- Put **deterministic transforms** in `scripts/`
-- Put **stable source context** in `references/`
-
-## Local Iteration Loop
-
-1. Run the skill against 5 to 10 representative prompts.
-2. Save outputs as golden snapshots.
-3. Tighten instructions where variance or ambiguity appears.
-4. Re-run snapshots after every instruction change.
-
-This gives you fast regression detection without heavyweight tooling.
-
-## Common Early Mistakes
-
-| Mistake | Symptom | Fix |
-|:--------|:--------|:----|
-| Broad description | Skill triggers for unrelated requests | Narrow the `description` to explicit use cases |
-| No output schema | Inconsistent format between runs | Add required sections and field-level constraints |
-| Hidden dependencies | Skill fails on missing files/scripts | Document all dependencies in `SKILL.md` |
-| Conflicting instructions | Internal contradiction in outputs | Remove overlap and define precedence |
-
-## Summary
-
-You now have a valid, testable skill package and a repeatable iteration loop.
-
-Next: [Chapter 2: Skill Categories](02-skill-categories.md)
-
-## What Problem Does This Solve?
-
-Most teams struggle here because the hard part is not writing more code, but deciding clear boundaries for `incident`, `skill`, `SKILL` so behavior stays predictable as complexity grows.
-
-In practical terms, this chapter helps you avoid three common failures:
-
-- coupling core logic too tightly to one implementation path
-- missing the handoff boundaries between setup, execution, and validation
-- shipping changes without clear rollback or observability strategy
-
-After working through this chapter, you should be able to reason about `Chapter 1: Getting Started` as an operating subsystem inside **Anthropic Skills Tutorial: Reusable AI Agent Capabilities**, with explicit contracts for inputs, state transitions, and outputs.
-
-Use the implementation notes around `notes`, `action`, `first` as your checklist when adapting these patterns to your own repository.
-
-## How it Works Under the Hood
-
-Under the hood, `Chapter 1: Getting Started` usually follows a repeatable control path:
+The agent accepts a query, calls Claude, executes any tool use blocks it receives, feeds results back, and repeats until Claude returns a response with no tool calls.
-1. **Context bootstrap**: initialize runtime config and prerequisites for `incident`.
-2. **Input normalization**: shape incoming data so `skill` receives stable contracts.
-3. **Core execution**: run the main logic branch and propagate intermediate state through `SKILL`.
-4. **Policy and safety checks**: enforce limits, auth scopes, and failure boundaries.
-5. **Output composition**: return canonical result payloads for downstream consumers.
-6. **Operational telemetry**: emit logs/metrics needed for debugging and performance tuning.
+## Running the Customer Support Agent
-When debugging, walk this sequence in order and confirm each stage has explicit success/failure conditions.
-
-## Source Walkthrough
-
-Use the following upstream sources to verify implementation details while reading this chapter:
-
-- [anthropics/skills repository](https://github.com/anthropics/skills)
- Why it matters: authoritative reference on `anthropics/skills repository` (github.com).
-
-Suggested trace strategy:
-- search upstream code for `incident` and `skill` to map concrete implementation paths
-- compare docs claims against actual runtime/config code before reusing patterns in production
-
-## Chapter Connections
-
-- [Tutorial Index](README.md)
-- [Next Chapter: Chapter 2: Skill Categories](02-skill-categories.md)
-- [Main Catalog](../../README.md#-tutorial-catalog)
-- [A-Z Tutorial Directory](../../discoverability/tutorial-directory.md)
-
-## Depth Expansion Playbook
-
-## Source Code Walkthrough
-
-### `skills/skill-creator/eval-viewer/generate_review.py`
-
-The `ReviewHandler` class in [`skills/skill-creator/eval-viewer/generate_review.py`](https://github.com/anthropics/skills/blob/HEAD/skills/skill-creator/eval-viewer/generate_review.py) handles a key part of this chapter's functionality:
-
-```py
- print("Note: lsof not found, cannot check if port is in use", file=sys.stderr)
-
-class ReviewHandler(BaseHTTPRequestHandler):
- """Serves the review HTML and handles feedback saves.
-
- Regenerates the HTML on each page load so that refreshing the browser
- picks up new eval outputs without restarting the server.
- """
-
- def __init__(
- self,
- workspace: Path,
- skill_name: str,
- feedback_path: Path,
- previous: dict[str, dict],
- benchmark_path: Path | None,
- *args,
- **kwargs,
- ):
- self.workspace = workspace
- self.skill_name = skill_name
- self.feedback_path = feedback_path
- self.previous = previous
- self.benchmark_path = benchmark_path
- super().__init__(*args, **kwargs)
-
- def do_GET(self) -> None:
- if self.path == "/" or self.path == "/index.html":
- # Regenerate HTML on each request (re-scans workspace for new outputs)
- runs = find_runs(self.workspace)
- benchmark = None
- if self.benchmark_path and self.benchmark_path.exists():
+```bash
+cd customer-support-agent
+npm install
+cp .env.example .env.local
+# Edit .env.local: add ANTHROPIC_API_KEY and optionally AWS credentials
+npm run dev
```
-This class is important because it defines how Anthropic Skills Tutorial: Reusable AI Agent Capabilities implements the patterns covered in this chapter.
-
-### `skills/skill-creator/eval-viewer/generate_review.py`
-
-The `get_mime_type` function in [`skills/skill-creator/eval-viewer/generate_review.py`](https://github.com/anthropics/skills/blob/HEAD/skills/skill-creator/eval-viewer/generate_review.py) handles a key part of this chapter's functionality:
-
-```py
-
-
-def get_mime_type(path: Path) -> str:
- ext = path.suffix.lower()
- if ext in MIME_OVERRIDES:
- return MIME_OVERRIDES[ext]
- mime, _ = mimetypes.guess_type(str(path))
- return mime or "application/octet-stream"
-
-
-def find_runs(workspace: Path) -> list[dict]:
- """Recursively find directories that contain an outputs/ subdirectory."""
- runs: list[dict] = []
- _find_runs_recursive(workspace, workspace, runs)
- runs.sort(key=lambda r: (r.get("eval_id", float("inf")), r["id"]))
- return runs
+Open `http://localhost:3000`. For AWS Bedrock RAG, create a knowledge base in the AWS console, upload documents to S3, and add the knowledge base ID to `ChatArea.tsx`.
+## Running the Financial Data Analyst
-def _find_runs_recursive(root: Path, current: Path, runs: list[dict]) -> None:
- if not current.is_dir():
- return
-
- outputs_dir = current / "outputs"
- if outputs_dir.is_dir():
- run = build_run(root, current)
- if run:
- runs.append(run)
- return
-
- skip = {"node_modules", ".git", "__pycache__", "skill", "inputs"}
- for child in sorted(current.iterdir()):
- if child.is_dir() and child.name not in skip:
+```bash
+cd financial-data-analyst
+npm install
+echo "ANTHROPIC_API_KEY=sk-ant-..." > .env.local
+npm run dev
```
-This function is important because it defines how Anthropic Skills Tutorial: Reusable AI Agent Capabilities implements the patterns covered in this chapter.
-
-### `skills/skill-creator/eval-viewer/generate_review.py`
-
-The `find_runs` function in [`skills/skill-creator/eval-viewer/generate_review.py`](https://github.com/anthropics/skills/blob/HEAD/skills/skill-creator/eval-viewer/generate_review.py) handles a key part of this chapter's functionality:
+Open `http://localhost:3000`. Upload a CSV, PDF, or image file and ask analytical questions. The app uses Claude to interpret the data and generates Recharts visualizations automatically.
-```py
+## Development Standards
+All Python code in the repository follows these standards (enforced by `pyproject.toml`):
-def find_runs(workspace: Path) -> list[dict]:
- """Recursively find directories that contain an outputs/ subdirectory."""
- runs: list[dict] = []
- _find_runs_recursive(workspace, workspace, runs)
- runs.sort(key=lambda r: (r.get("eval_id", float("inf")), r["id"]))
- return runs
+```bash
+ruff check . # lint
+ruff format . # format
+pyright # type-check
+pytest # test
+```
+Python conventions: `snake_case` for functions and variables, `PascalCase` for classes, `isort` for import ordering, full type annotations, `dataclass` with abstract base classes for tool implementations.
-def _find_runs_recursive(root: Path, current: Path, runs: list[dict]) -> None:
- if not current.is_dir():
- return
+TypeScript conventions: strict mode, functional React components, `shadcn/ui` components, `ESLint` Next.js rules.
- outputs_dir = current / "outputs"
- if outputs_dir.is_dir():
- run = build_run(root, current)
- if run:
- runs.append(run)
- return
+## Architecture Decision: Why Five Separate Projects?
- skip = {"node_modules", ".git", "__pycache__", "skill", "inputs"}
- for child in sorted(current.iterdir()):
- if child.is_dir() and child.name not in skip:
- _find_runs_recursive(root, child, runs)
+The quickstarts are deliberately isolated rather than a monorepo of shared libraries. This is an intentional design choice: each project is self-contained so you can copy just the piece you need without pulling in unrelated dependencies. The tradeoff is some code duplication — the `loop.py` pattern appears in both `computer-use-demo` and `browser-use-demo` with slight variations — but the benefit is that each quickstart is a complete, immediately understandable reference.
+```mermaid
+flowchart LR
+ subgraph "Shared Pattern (not a shared library)"
+ LP["sampling_loop()"]
+ TH["Tool Handlers"]
+ MH["Message History"]
+ end
+
+ CU["computer-use-demo"] -->|adapts| LP
+ BD["browser-use-demo"] -->|adapts| LP
+ AG["agents/"] -->|reimplements| LP
+ CU --> TH
+ BD --> TH
+ AG --> TH
+ LP --> MH
+```
-def build_run(root: Path, run_dir: Path) -> dict | None:
- """Build a run dict with prompt, outputs, and grading data."""
- prompt = ""
- eval_id = None
+## Common First-Run Issues
-```
+| Issue | Cause | Fix |
+|:------|:------|:----|
+| `docker: Cannot connect to Docker daemon` | Docker Desktop not running | Start Docker Desktop |
+| `anthropic.AuthenticationError` | Missing or invalid API key | Check `ANTHROPIC_API_KEY` is set in current shell |
+| Port 8080 already in use | Another service on that port | Change `-p 8080:8080` to `-p 9080:8080` and open `:9080` |
+| Computer use agent acts slowly | Default model is large | Switch to `claude-haiku-4-20250514` in the Streamlit sidebar |
+| `npm: command not found` | Node.js not installed | Install Node.js 18+ via `nvm` or `https://nodejs.org` |
-This function is important because it defines how Anthropic Skills Tutorial: Reusable AI Agent Capabilities implements the patterns covered in this chapter.
-
-### `skills/skill-creator/eval-viewer/generate_review.py`
-
-The `build_run` function in [`skills/skill-creator/eval-viewer/generate_review.py`](https://github.com/anthropics/skills/blob/HEAD/skills/skill-creator/eval-viewer/generate_review.py) handles a key part of this chapter's functionality:
-
-```py
- outputs_dir = current / "outputs"
- if outputs_dir.is_dir():
- run = build_run(root, current)
- if run:
- runs.append(run)
- return
-
- skip = {"node_modules", ".git", "__pycache__", "skill", "inputs"}
- for child in sorted(current.iterdir()):
- if child.is_dir() and child.name not in skip:
- _find_runs_recursive(root, child, runs)
-
-
-def build_run(root: Path, run_dir: Path) -> dict | None:
- """Build a run dict with prompt, outputs, and grading data."""
- prompt = ""
- eval_id = None
-
- # Try eval_metadata.json
- for candidate in [run_dir / "eval_metadata.json", run_dir.parent / "eval_metadata.json"]:
- if candidate.exists():
- try:
- metadata = json.loads(candidate.read_text())
- prompt = metadata.get("prompt", "")
- eval_id = metadata.get("eval_id")
- except (json.JSONDecodeError, OSError):
- pass
- if prompt:
- break
-
- # Fall back to transcript.md
- if not prompt:
-```
+## Summary
-This function is important because it defines how Anthropic Skills Tutorial: Reusable AI Agent Capabilities implements the patterns covered in this chapter.
+You now have the repository structure, a clear map of which quickstart serves which purpose, and the commands to run the three most important ones. The next chapter examines the shared architectural patterns that all five quickstarts rely on.
+Next: [Chapter 2: Quickstart Architecture](02-skill-categories.md)
-## How These Components Connect
+---
-```mermaid
-flowchart TD
- A[ReviewHandler]
- B[get_mime_type]
- C[find_runs]
- D[build_run]
- E[embed_file]
- A --> B
- B --> C
- C --> D
- D --> E
-```
+- [Tutorial Index](README.md)
+- [Next Chapter: Chapter 2: Quickstart Architecture](02-skill-categories.md)
+- [Main Catalog](../../README.md#-tutorial-catalog)
diff --git a/tutorials/anthropic-skills-tutorial/02-skill-categories.md b/tutorials/anthropic-skills-tutorial/02-skill-categories.md
index 129a20dd..1fc6b3c0 100644
--- a/tutorials/anthropic-skills-tutorial/02-skill-categories.md
+++ b/tutorials/anthropic-skills-tutorial/02-skill-categories.md
@@ -1,296 +1,230 @@
---
layout: default
-title: "Chapter 2: Skill Categories"
+title: "Chapter 2: Quickstart Architecture"
nav_order: 2
-parent: Anthropic Skills Tutorial
+parent: Anthropic Quickstarts Tutorial
+format_version: v2
+source_repo: https://github.com/anthropics/anthropic-quickstarts
---
-
-# Chapter 2: Skill Categories
-
-Welcome to **Chapter 2: Skill Categories**. In this part of **Anthropic Skills Tutorial: Reusable AI Agent Capabilities**, you will build an intuitive mental model first, then move into concrete implementation details and practical production tradeoffs.
-
-
-Category design controls maintainability. If categories are too broad, skills become brittle and hard to trust.
-
-## Four Practical Categories
-
-| Category | Typical Inputs | Typical Outputs | Typical Risk |
-|:---------|:---------------|:----------------|:-------------|
-| Document Workflows | Notes, policy docs, datasets | Structured docs/slides/sheets | Formatting drift |
-| Creative and Brand | Briefs, tone rules, examples | On-brand copy or concepts | Brand inconsistency |
-| Engineering and Ops | Codebase context, tickets, logs | Patches, runbooks, plans | Incorrect assumptions |
-| Enterprise Process | Internal standards and controls | Audit artifacts, compliance actions | Governance gaps |
-
-## How to Choose Category Boundaries
-
-Use one outcome per skill. If two outcomes have different acceptance criteria, split the skill.
-
-**Good split:**
-- `incident-triage`
-- `postmortem-draft`
-- `stakeholder-update`
-
-**Bad split:**
-- `incident-everything`
-
-A single giant skill creates unclear prompts, conflicting priorities, and harder testing.
-
-## Decision Matrix
-
-| Question | If "Yes" | If "No" |
-|:---------|:----------|:----------|
-| Is the output contract identical across requests? | Keep in same skill | Split into separate skills |
-| Do tasks share the same references and policies? | Keep shared references | Isolate by domain |
-| Can one test suite verify quality for all use cases? | Keep grouped | Split for clearer quality gates |
-| Are escalation paths identical? | Keep grouped | Split by risk/approval path |
-
-## Category-Specific Design Tips
-
-- **Document skills:** prioritize template fidelity and deterministic section ordering.
-- **Creative skills:** define what variation is allowed and what must stay fixed.
-- **Technical skills:** enforce constraints on tools, files, and unsafe operations.
-- **Enterprise skills:** include explicit policy references and audit fields.
-
-## Anti-Patterns
-
-- Category names that describe team structure instead of behavior
-- Mixing high-stakes and low-stakes actions in one skill
-- Using skills as a substitute for missing source documentation
-- Requiring hidden tribal knowledge to run the skill
-
-## Summary
-
-You can now define category boundaries that keep skills focused, testable, and easier to operate.
-
-Next: [Chapter 3: Advanced Skill Design](03-advanced-skill-design.md)
+# Chapter 2: Quickstart Architecture
## What Problem Does This Solve?
-Most teams struggle here because the hard part is not writing more code, but deciding clear boundaries for core abstractions in this chapter so behavior stays predictable as complexity grows.
-
-In practical terms, this chapter helps you avoid three common failures:
+Before you can extend or adapt any of these quickstarts, you need to understand the patterns they all share. Five projects look different on the surface — Python vs TypeScript, Docker vs bare Node.js, Streamlit vs Next.js — but they share a common architectural skeleton. Recognizing that skeleton lets you find the right file to edit when something breaks, and lets you transfer patterns from one project to another.
-- coupling core logic too tightly to one implementation path
-- missing the handoff boundaries between setup, execution, and validation
-- shipping changes without clear rollback or observability strategy
+## The Universal Agent Loop
-After working through this chapter, you should be able to reason about `Chapter 2: Skill Categories` as an operating subsystem inside **Anthropic Skills Tutorial: Reusable AI Agent Capabilities**, with explicit contracts for inputs, state transitions, and outputs.
+Every project that calls Claude in a loop follows the same core pattern: send a message, check the response for tool use blocks, execute the tools, append the results to the conversation, and repeat until Claude sends a response with no tool use blocks.
-Use the implementation notes around execution and reliability details as your checklist when adapting these patterns to your own repository.
-
-## How it Works Under the Hood
+```mermaid
+sequenceDiagram
+ participant User
+ participant Loop as sampling_loop / _agent_loop
+ participant Claude as Claude API
+ participant Tools as Tool Handlers
+
+ User->>Loop: initial message
+ loop Until no tool_use in response
+ Loop->>Claude: messages + tools + system prompt
+ Claude-->>Loop: response (may contain tool_use blocks)
+ alt response contains tool_use
+ Loop->>Tools: execute each tool_use block
+ Tools-->>Loop: ToolResult (output | base64_image | error)
+ Loop->>Loop: append tool_result to messages
+ end
+ end
+ Loop-->>User: final text response
+```
-Under the hood, `Chapter 2: Skill Categories` usually follows a repeatable control path:
+The computer-use-demo implements this in `computer_use_demo/loop.py` as `sampling_loop()`. The agents quickstart implements it in `agents/agent.py` as `Agent._agent_loop()`. The browser-use-demo has its own `loop.py` following the same structure.
-1. **Context bootstrap**: initialize runtime config and prerequisites for `core component`.
-2. **Input normalization**: shape incoming data so `execution layer` receives stable contracts.
-3. **Core execution**: run the main logic branch and propagate intermediate state through `state model`.
-4. **Policy and safety checks**: enforce limits, auth scopes, and failure boundaries.
-5. **Output composition**: return canonical result payloads for downstream consumers.
-6. **Operational telemetry**: emit logs/metrics needed for debugging and performance tuning.
+## Project Anatomy Comparison
-When debugging, walk this sequence in order and confirm each stage has explicit success/failure conditions.
+### computer-use-demo
-## Source Walkthrough
+The most architecturally complete quickstart. Key files:
-Use the following upstream sources to verify implementation details while reading this chapter:
+```text
+computer_use_demo/
+├── loop.py # Core: async sampling_loop(), prompt caching, image truncation
+├── streamlit.py # UI: sidebar config, chat display, callback wiring
+└── tools/
+ ├── base.py # ToolResult dataclass, BaseAnthropicTool ABC, ToolCollection
+ ├── bash.py # BashTool20250124: persistent subprocess with sentinel pattern
+ ├── computer.py # ComputerTool: screenshot, keyboard, mouse with coord scaling
+ └── edit.py # EditTool20250728: view/create/str_replace/insert
+```
-- [anthropics/skills repository](https://github.com/anthropics/skills)
- Why it matters: authoritative reference on `anthropics/skills repository` (github.com).
+The `ToolCollection` in `base.py` is the glue: it holds all three tools, provides `to_params()` for the API call, and dispatches `run(tool_name, tool_input)` to the correct tool instance.
-Suggested trace strategy:
-- search upstream code for `Skill` and `Categories` to map concrete implementation paths
-- compare docs claims against actual runtime/config code before reusing patterns in production
+### agents/
-## Chapter Connections
+A deliberately minimal reference. The goal is clarity, not features: < 300 lines total.
-- [Tutorial Index](README.md)
-- [Previous Chapter: Chapter 1: Getting Started](01-getting-started.md)
-- [Next Chapter: Chapter 3: Advanced Skill Design](03-advanced-skill-design.md)
-- [Main Catalog](../../README.md#-tutorial-catalog)
-- [A-Z Tutorial Directory](../../discoverability/tutorial-directory.md)
+```text
+agents/
+├── agent.py # Agent class: _agent_loop, execute_tools, run/run_async
+├── tools/ # ThinkTool and MCP tool wrappers
+└── utils/ # Message history management, MCP connection setup
+```
-## Depth Expansion Playbook
+Key principle stated in the README: this is "NOT an SDK, but a reference implementation of key concepts." Do not try to use it as a production library — read it to understand the pattern, then implement your own.
-## Source Code Walkthrough
+### autonomous-coding/
-### `skills/skill-creator/scripts/run_eval.py`
+Unique two-agent architecture. Uses Claude Code CLI (`@anthropic-ai/claude-code`) for the actual coding work, with a Python orchestrator that manages state across sessions.
-The `main` function in [`skills/skill-creator/scripts/run_eval.py`](https://github.com/anthropics/skills/blob/HEAD/skills/skill-creator/scripts/run_eval.py) handles a key part of this chapter's functionality:
+```text
+autonomous-coding/
+├── autonomous_agent_demo.py # Orchestrator: launches initializer, then iterates coding agents
+├── prompts/ # System prompts for initializer and coding agents
+└── feature_list.json # State file: source of truth for completed features
+```
-```py
- while time.time() - start_time < timeout:
- if process.poll() is not None:
- remaining = process.stdout.read()
- if remaining:
- buffer += remaining.decode("utf-8", errors="replace")
- break
+The initializer agent reads a specification and writes a comprehensive test suite plus `feature_list.json`. Subsequent coding-agent sessions each implement a batch of features, commit to git, and update `feature_list.json`. Sessions can be interrupted and resumed without data loss because all state is in files.
- ready, _, _ = select.select([process.stdout], [], [], 1.0)
- if not ready:
- continue
+### customer-support-agent/
- chunk = os.read(process.stdout.fileno(), 8192)
- if not chunk:
- break
- buffer += chunk.decode("utf-8", errors="replace")
+A Next.js 14 app demonstrating real-time streaming, extended thinking display, and Bedrock knowledge base integration.
- while "\n" in buffer:
- line, buffer = buffer.split("\n", 1)
- line = line.strip()
- if not line:
- continue
+```text
+customer-support-agent/
+├── app/
+│ ├── api/chat/route.ts # Edge Runtime: streams Claude responses to the frontend
+│ └── components/
+│ └── ChatArea.tsx # Main chat component: knowledge base config, mood detection
+└── package.json
+```
- try:
- event = json.loads(line)
- except json.JSONDecodeError:
- continue
+### financial-data-analyst/
- # Early detection via stream events
- if event.get("type") == "stream_event":
- se = event.get("event", {})
- se_type = se.get("type", "")
+Next.js 14 app demonstrating file upload, multi-format parsing, and dynamic chart generation.
+```text
+financial-data-analyst/
+├── app/
+│ ├── api/analyze/route.ts # Parses uploaded files, sends to Claude, streams JSON
+│ └── components/ # Chat, FileUpload, ChartRenderer (Recharts)
+└── package.json
```
-This function is important because it defines how Anthropic Skills Tutorial: Reusable AI Agent Capabilities implements the patterns covered in this chapter.
+## Shared Patterns
+
+### Pattern 1: Provider Abstraction
-### `skills/skill-creator/scripts/aggregate_benchmark.py`
+Both `computer-use-demo` and `browser-use-demo` support three API providers through environment-variable-driven client selection:
-The `calculate_stats` function in [`skills/skill-creator/scripts/aggregate_benchmark.py`](https://github.com/anthropics/skills/blob/HEAD/skills/skill-creator/scripts/aggregate_benchmark.py) handles a key part of this chapter's functionality:
+```python
+# From computer_use_demo/loop.py (simplified)
+if provider == APIProvider.ANTHROPIC:
+ client = Anthropic(api_key=api_key)
+elif provider == APIProvider.BEDROCK:
+ client = AnthropicBedrock()
+elif provider == APIProvider.VERTEX:
+ client = AnthropicVertex()
+```
-```py
+This pattern lets you switch from Anthropic's direct API to enterprise-managed AWS Bedrock or Google Vertex deployments without changing any other code.
+### Pattern 2: Tool Result → API Message Translation
-def calculate_stats(values: list[float]) -> dict:
- """Calculate mean, stddev, min, max for a list of values."""
- if not values:
- return {"mean": 0.0, "stddev": 0.0, "min": 0.0, "max": 0.0}
+Tool execution results must be translated into the exact message format the API expects before being appended to the conversation. In `computer_use_demo/loop.py`:
- n = len(values)
- mean = sum(values) / n
+```python
+def _make_api_tool_result(
+ result: ToolResult, tool_use_id: str
+) -> BetaToolResultBlockParam:
+ tool_result_content: list[BetaTextBlockParam | BetaImageBlockParam] | str = []
- if n > 1:
- variance = sum((x - mean) ** 2 for x in values) / (n - 1)
- stddev = math.sqrt(variance)
+ if result.error:
+ tool_result_content = _maybe_prepend_system_tool_result(result, result.error)
else:
- stddev = 0.0
+ if result.output:
+ tool_result_content.append({
+ "type": "text",
+ "text": _maybe_prepend_system_tool_result(result, result.output),
+ })
+ if result.base64_image:
+ tool_result_content.append({
+ "type": "image",
+ "source": {
+ "type": "base64",
+ "media_type": "image/png",
+ "data": result.base64_image,
+ },
+ })
return {
- "mean": round(mean, 4),
- "stddev": round(stddev, 4),
- "min": round(min(values), 4),
- "max": round(max(values), 4)
+ "type": "tool_result",
+ "content": tool_result_content,
+ "tool_use_id": tool_use_id,
+ "is_error": bool(result.error),
}
-
-
-def load_run_results(benchmark_dir: Path) -> dict:
- """
- Load all run results from a benchmark directory.
-
- Returns dict keyed by config name (e.g. "with_skill"/"without_skill",
- or "new_skill"/"old_skill"), each containing a list of run results.
- """
- # Support both layouts: eval dirs directly under benchmark_dir, or under runs/
```
-This function is important because it defines how Anthropic Skills Tutorial: Reusable AI Agent Capabilities implements the patterns covered in this chapter.
+### Pattern 3: Model and Tool Version Pairing
-### `skills/skill-creator/scripts/aggregate_benchmark.py`
+The computer use tools have versioned API types that must match a compatible model version. The pairing is explicit in the code:
-The `load_run_results` function in [`skills/skill-creator/scripts/aggregate_benchmark.py`](https://github.com/anthropics/skills/blob/HEAD/skills/skill-creator/scripts/aggregate_benchmark.py) handles a key part of this chapter's functionality:
+| Tool Class | `api_type` | Compatible Models |
+|:-----------|:-----------|:------------------|
+| `ComputerTool20241022` | `computer_20241022` | claude-3-5-sonnet-20241022 |
+| `ComputerTool20250124` | `computer_20250124` | claude-3-5-sonnet-20250124+ |
+| `ComputerTool20251124` | `computer_20251124` | claude-opus-4-20250514+ |
+| `EditTool20250728` | `text_editor_20250728` | claude-3-5-sonnet-20250514+ |
-```py
+Mixing an old tool version with a new model (or vice versa) will produce API validation errors. The Streamlit sidebar in `computer-use-demo` exposes a "Tool version" selector precisely to manage this.
+## How These Patterns Connect
-def load_run_results(benchmark_dir: Path) -> dict:
- """
- Load all run results from a benchmark directory.
-
- Returns dict keyed by config name (e.g. "with_skill"/"without_skill",
- or "new_skill"/"old_skill"), each containing a list of run results.
- """
- # Support both layouts: eval dirs directly under benchmark_dir, or under runs/
- runs_dir = benchmark_dir / "runs"
- if runs_dir.exists():
- search_dir = runs_dir
- elif list(benchmark_dir.glob("eval-*")):
- search_dir = benchmark_dir
- else:
- print(f"No eval directories found in {benchmark_dir} or {benchmark_dir / 'runs'}")
- return {}
-
- results: dict[str, list] = {}
-
- for eval_idx, eval_dir in enumerate(sorted(search_dir.glob("eval-*"))):
- metadata_path = eval_dir / "eval_metadata.json"
- if metadata_path.exists():
- try:
- with open(metadata_path) as mf:
- eval_id = json.load(mf).get("eval_id", eval_idx)
- except (json.JSONDecodeError, OSError):
- eval_id = eval_idx
- else:
- try:
- eval_id = int(eval_dir.name.split("-")[1])
+```mermaid
+flowchart TD
+ subgraph "API Layer"
+ PROV["Provider Abstraction
Anthropic / Bedrock / Vertex"]
+ end
+
+ subgraph "Loop Layer"
+ SL["sampling_loop()"]
+ PC["Prompt Caching
(inject_prompt_caching)"]
+ IT["Image Truncation
(filter_to_n_most_recent)"]
+ end
+
+ subgraph "Tool Layer"
+ TC["ToolCollection"]
+ BT["BashTool"]
+ CT["ComputerTool"]
+ ET["EditTool"]
+ end
+
+ subgraph "Result Layer"
+ TR["ToolResult"]
+ MAPI["_make_api_tool_result()"]
+ end
+
+ PROV --> SL
+ SL --> PC
+ SL --> IT
+ SL --> TC
+ TC --> BT
+ TC --> CT
+ TC --> ET
+ BT --> TR
+ CT --> TR
+ ET --> TR
+ TR --> MAPI
+ MAPI --> SL
```
-This function is important because it defines how Anthropic Skills Tutorial: Reusable AI Agent Capabilities implements the patterns covered in this chapter.
-
-### `skills/skill-creator/scripts/aggregate_benchmark.py`
-
-The `aggregate_results` function in [`skills/skill-creator/scripts/aggregate_benchmark.py`](https://github.com/anthropics/skills/blob/HEAD/skills/skill-creator/scripts/aggregate_benchmark.py) handles a key part of this chapter's functionality:
-
-```py
-
-
-def aggregate_results(results: dict) -> dict:
- """
- Aggregate run results into summary statistics.
-
- Returns run_summary with stats for each configuration and delta.
- """
- run_summary = {}
- configs = list(results.keys())
-
- for config in configs:
- runs = results.get(config, [])
-
- if not runs:
- run_summary[config] = {
- "pass_rate": {"mean": 0.0, "stddev": 0.0, "min": 0.0, "max": 0.0},
- "time_seconds": {"mean": 0.0, "stddev": 0.0, "min": 0.0, "max": 0.0},
- "tokens": {"mean": 0, "stddev": 0, "min": 0, "max": 0}
- }
- continue
-
- pass_rates = [r["pass_rate"] for r in runs]
- times = [r["time_seconds"] for r in runs]
- tokens = [r.get("tokens", 0) for r in runs]
-
- run_summary[config] = {
- "pass_rate": calculate_stats(pass_rates),
- "time_seconds": calculate_stats(times),
- "tokens": calculate_stats(tokens)
- }
-
-```
+## Summary
-This function is important because it defines how Anthropic Skills Tutorial: Reusable AI Agent Capabilities implements the patterns covered in this chapter.
+All five quickstarts share an agentic loop, a tool-result-to-message translation pattern, and a tool collection dispatch mechanism. The Python projects add provider abstraction and tool version management. Understanding these shared patterns means you only need to learn the details once — the rest is project-specific configuration.
+Next: [Chapter 3: Computer Use Deep-Dive](03-advanced-skill-design.md)
-## How These Components Connect
+---
-```mermaid
-flowchart TD
- A[main]
- B[calculate_stats]
- C[load_run_results]
- D[aggregate_results]
- E[generate_benchmark]
- A --> B
- B --> C
- C --> D
- D --> E
-```
+- [Tutorial Index](README.md)
+- [Previous Chapter: Chapter 1: Getting Started](01-getting-started.md)
+- [Next Chapter: Chapter 3: Computer Use Deep-Dive](03-advanced-skill-design.md)
+- [Main Catalog](../../README.md#-tutorial-catalog)
diff --git a/tutorials/anthropic-skills-tutorial/03-advanced-skill-design.md b/tutorials/anthropic-skills-tutorial/03-advanced-skill-design.md
index 97817d26..3f472895 100644
--- a/tutorials/anthropic-skills-tutorial/03-advanced-skill-design.md
+++ b/tutorials/anthropic-skills-tutorial/03-advanced-skill-design.md
@@ -1,305 +1,256 @@
---
layout: default
-title: "Chapter 3: Advanced Skill Design"
+title: "Chapter 3: Computer Use Deep-Dive"
nav_order: 3
-parent: Anthropic Skills Tutorial
+parent: Anthropic Quickstarts Tutorial
+format_version: v2
+source_repo: https://github.com/anthropics/anthropic-quickstarts
---
-
-# Chapter 3: Advanced Skill Design
-
-Welcome to **Chapter 3: Advanced Skill Design**. In this part of **Anthropic Skills Tutorial: Reusable AI Agent Capabilities**, you will build an intuitive mental model first, then move into concrete implementation details and practical production tradeoffs.
-
-
-Advanced skills are small systems. Treat them like mini-products with explicit interfaces.
-
-## Multi-File Skill Layout
-
-```text
-customer-support-skill/
- SKILL.md
- scripts/
- classify_ticket.py
- enrich_account_context.ts
- references/
- escalation-policy.md
- sla-tiers.md
- assets/
- issue-taxonomy.csv
- templates/
- escalation-email.md
-```
-
-## Progressive Disclosure Pattern
-
-Good skills avoid dumping all context at once. Instead:
-
-1. Start with task intent and output contract.
-2. Pull references only when relevant.
-3. Call scripts only when deterministic transformation is required.
-
-This pattern reduces token waste and improves instruction adherence.
-
-## Frontmatter and Metadata Strategy
-
-At minimum, keep `name` and `description` precise.
-
-For larger catalogs, add optional metadata fields (when your runtime supports them) to improve discoverability and policy checks, such as:
-
-- compatibility constraints
-- license information
-- ownership metadata
-- tool allowlists
-
-## Script Design Rules
-
-Scripts should be boring and reliable.
-
-- Use strict argument parsing.
-- Return stable JSON structures.
-- Fail loudly with actionable error messages.
-- Avoid hidden network side effects unless clearly documented.
-
-Example output contract:
-
-```json
-{
- "status": "ok",
- "severity": "high",
- "routing_queue": "support-l2",
- "confidence": 0.91
-}
-```
-
-## References and Assets
-
-- Put durable, high-signal guidance in `references/`.
-- Keep `assets/` for files that are required but not convenient to inline.
-- Version both in Git so skill behavior is auditable over time.
-
-## Maintainability Checklist
-
-- Single responsibility per script
-- Explicit file paths in instructions
-- Backward-compatible schema evolution
-- Changelog entries for instruction changes
-
-## Summary
-
-You can now design skills that remain understandable as they grow beyond a single markdown file.
-
-Next: [Chapter 4: Integration Platforms](04-integration-platforms.md)
+# Chapter 3: Computer Use Deep-Dive
## What Problem Does This Solve?
-Most teams struggle here because the hard part is not writing more code, but deciding clear boundaries for `support`, `escalation`, `customer` so behavior stays predictable as complexity grows.
-
-In practical terms, this chapter helps you avoid three common failures:
-
-- coupling core logic too tightly to one implementation path
-- missing the handoff boundaries between setup, execution, and validation
-- shipping changes without clear rollback or observability strategy
-
-After working through this chapter, you should be able to reason about `Chapter 3: Advanced Skill Design` as an operating subsystem inside **Anthropic Skills Tutorial: Reusable AI Agent Capabilities**, with explicit contracts for inputs, state transitions, and outputs.
-
-Use the implementation notes around `skill`, `SKILL`, `scripts` as your checklist when adapting these patterns to your own repository.
-
-## How it Works Under the Hood
-
-Under the hood, `Chapter 3: Advanced Skill Design` usually follows a repeatable control path:
-
-1. **Context bootstrap**: initialize runtime config and prerequisites for `support`.
-2. **Input normalization**: shape incoming data so `escalation` receives stable contracts.
-3. **Core execution**: run the main logic branch and propagate intermediate state through `customer`.
-4. **Policy and safety checks**: enforce limits, auth scopes, and failure boundaries.
-5. **Output composition**: return canonical result payloads for downstream consumers.
-6. **Operational telemetry**: emit logs/metrics needed for debugging and performance tuning.
-
-When debugging, walk this sequence in order and confirm each stage has explicit success/failure conditions.
-
-## Source Walkthrough
+Computer use is the most complex Claude capability to implement correctly. The challenge is not just calling an API — it is building a feedback loop where Claude sees the screen, takes an action, observes the result, and continues until a goal is achieved. This chapter explains exactly how `computer-use-demo` implements that loop: the three tools Claude uses, how screenshots are captured and sent, how coordinates are scaled to match API resolution expectations, and how the sampling loop terminates.
-Use the following upstream sources to verify implementation details while reading this chapter:
+## How It Works Under the Hood
-- [anthropics/skills repository](https://github.com/anthropics/skills)
- Why it matters: authoritative reference on `anthropics/skills repository` (github.com).
+Claude does not control the computer directly. Instead, it issues structured action requests that the local Python code executes on its behalf. The cycle is:
-Suggested trace strategy:
-- search upstream code for `support` and `escalation` to map concrete implementation paths
-- compare docs claims against actual runtime/config code before reusing patterns in production
-
-## Chapter Connections
-
-- [Tutorial Index](README.md)
-- [Previous Chapter: Chapter 2: Skill Categories](02-skill-categories.md)
-- [Next Chapter: Chapter 4: Integration Platforms](04-integration-platforms.md)
-- [Main Catalog](../../README.md#-tutorial-catalog)
-- [A-Z Tutorial Directory](../../discoverability/tutorial-directory.md)
-
-## Depth Expansion Playbook
-
-## Source Code Walkthrough
-
-### `skills/pdf/scripts/extract_form_field_info.py`
-
-The `get_field_info` function in [`skills/pdf/scripts/extract_form_field_info.py`](https://github.com/anthropics/skills/blob/HEAD/skills/pdf/scripts/extract_form_field_info.py) handles a key part of this chapter's functionality:
-
-```py
-
-
-def get_field_info(reader: PdfReader):
- fields = reader.get_fields()
-
- field_info_by_id = {}
- possible_radio_names = set()
-
- for field_id, field in fields.items():
- if field.get("/Kids"):
- if field.get("/FT") == "/Btn":
- possible_radio_names.add(field_id)
- continue
- field_info_by_id[field_id] = make_field_dict(field, field_id)
-
-
- radio_fields_by_id = {}
-
- for page_index, page in enumerate(reader.pages):
- annotations = page.get('/Annots', [])
- for ann in annotations:
- field_id = get_full_annotation_field_id(ann)
- if field_id in field_info_by_id:
- field_info_by_id[field_id]["page"] = page_index + 1
- field_info_by_id[field_id]["rect"] = ann.get('/Rect')
- elif field_id in possible_radio_names:
- try:
- on_values = [v for v in ann["/AP"]["/N"] if v != "/Off"]
- except KeyError:
- continue
- if len(on_values) == 1:
- rect = ann.get("/Rect")
+```mermaid
+sequenceDiagram
+ participant Claude
+ participant Loop as sampling_loop()
+ participant Computer as ComputerTool
+ participant Bash as BashTool
+ participant Edit as EditTool
+ participant Display as Xdotool + gnome-screenshot
+
+ Claude->>Loop: tool_use: computer(screenshot)
+ Loop->>Computer: __call__(action="screenshot")
+ Computer->>Display: gnome-screenshot -f /tmp/screenshot.png
+ Display-->>Computer: PNG file
+ Computer-->>Loop: ToolResult(base64_image=...)
+ Loop->>Claude: tool_result with base64 PNG
+
+ Claude->>Loop: tool_use: computer(left_click, coordinate=[512, 300])
+ Loop->>Computer: __call__(action="left_click", coordinate=[512,300])
+ Computer->>Display: xdotool mousemove --sync 384 225 click 1
+ Display-->>Computer: exit code 0
+ Computer-->>Loop: ToolResult(output="")
+ Loop->>Claude: tool_result
+
+ Claude->>Loop: tool_use: bash(command="ls /tmp")
+ Loop->>Bash: __call__(command="ls /tmp")
+ Bash-->>Loop: ToolResult(output="screenshot.png\n")
+ Loop->>Claude: tool_result
```
-This function is important because it defines how Anthropic Skills Tutorial: Reusable AI Agent Capabilities implements the patterns covered in this chapter.
-
-### `skills/pdf/scripts/extract_form_field_info.py`
-
-The `write_field_info` function in [`skills/pdf/scripts/extract_form_field_info.py`](https://github.com/anthropics/skills/blob/HEAD/skills/pdf/scripts/extract_form_field_info.py) handles a key part of this chapter's functionality:
-
-```py
-
-
-def write_field_info(pdf_path: str, json_output_path: str):
- reader = PdfReader(pdf_path)
- field_info = get_field_info(reader)
- with open(json_output_path, "w") as f:
- json.dump(field_info, f, indent=2)
- print(f"Wrote {len(field_info)} fields to {json_output_path}")
-
-
-if __name__ == "__main__":
- if len(sys.argv) != 3:
- print("Usage: extract_form_field_info.py [input pdf] [output json]")
- sys.exit(1)
- write_field_info(sys.argv[1], sys.argv[2])
-
+## The Three Computer Use Tools
+
+### ComputerTool
+
+Defined in `computer_use_demo/tools/computer.py`. There are three versioned classes:
+
+- `ComputerTool20241022` — original set of actions
+- `ComputerTool20250124` — adds scroll, hold_key, wait, triple_click, left_mouse_down/up
+- `ComputerTool20251124` — adds zoom capability
+
+The Streamlit sidebar exposes a "Tool version" selector to choose between them.
+
+**Action types (ComputerTool20250124):**
+
+| Category | Actions |
+|:---------|:--------|
+| Mouse | `left_click`, `right_click`, `middle_click`, `double_click`, `mouse_move`, `left_click_drag`, `left_mouse_down`, `left_mouse_up`, `triple_click` |
+| Keyboard | `key`, `type`, `hold_key` |
+| Scroll | `scroll` (with `coordinate`, `direction`, `amount`) |
+| Screen | `screenshot`, `cursor_position` |
+| Timing | `wait` |
+
+**Coordinate scaling** is the most subtle part. The API expects coordinates relative to a fixed target resolution (1024×768 for XGA, 1280×800 for WXGA, 1366×768 for FWXGA), but the actual display may be a different size. The tool scales every coordinate before calling xdotool:
+
+```python
+# From computer_use_demo/tools/computer.py (simplified)
+def scale_coordinates(self, source: ScalingSource, x: int, y: int):
+ """Convert coordinates between API space and screen space."""
+ if not self._scaling_enabled:
+ return x, y
+ ratio = self.width / self.height
+ # Select target resolution that matches display aspect ratio
+ target_dimension = None
+ for dimension in MAX_SCALING_TARGETS.values():
+ if abs(dimension["width"] / dimension["height"] - ratio) < 0.02:
+ if dimension["width"] < self.width:
+ target_dimension = dimension
+ if target_dimension is None:
+ return x, y
+ x_scale = self.width / target_dimension["width"]
+ y_scale = self.height / target_dimension["height"]
+ if source == ScalingSource.API:
+ # Claude gave us API coords → convert to screen coords
+ return round(x * x_scale), round(y * y_scale)
+ else:
+ # We have screen coords → convert to API coords for display
+ return round(x / x_scale), round(y / y_scale)
```
-This function is important because it defines how Anthropic Skills Tutorial: Reusable AI Agent Capabilities implements the patterns covered in this chapter.
-
-### `skills/slack-gif-creator/core/frame_composer.py`
-
-The `create_blank_frame` function in [`skills/slack-gif-creator/core/frame_composer.py`](https://github.com/anthropics/skills/blob/HEAD/skills/slack-gif-creator/core/frame_composer.py) handles a key part of this chapter's functionality:
-
-```py
+The recommendation in the README to use XGA resolution (1024×768) in your Docker container is directly related to this: it eliminates the need for scaling by making screen coordinates and API coordinates identical.
+### BashTool
-def create_blank_frame(
- width: int, height: int, color: tuple[int, int, int] = (255, 255, 255)
-) -> Image.Image:
- """
- Create a blank frame with solid color background.
+Defined in `computer_use_demo/tools/bash.py` as `BashTool20250124`. Maintains a **persistent subprocess** across all tool calls in a session, so environment variables and working directory state persist between commands.
- Args:
- width: Frame width
- height: Frame height
- color: RGB color tuple (default: white)
+The core challenge: how do you know when a command has finished in a persistent shell? You cannot wait for EOF because the process keeps running. The solution is a **sentinel pattern**:
- Returns:
- PIL Image
- """
- return Image.new("RGB", (width, height), color)
+```python
+# From computer_use_demo/tools/bash.py (simplified)
+SENTINEL = "<>"
+async def run(self, command: str) -> tuple[str, str]:
+ """Run a command and return (stdout, stderr)."""
+ # Append sentinel echo so we know when output ends
+ self._process.stdin.write(
+ command.encode() + f"; echo '{SENTINEL}'\n".encode()
+ )
+ await self._process.stdin.drain()
-def draw_circle(
- frame: Image.Image,
- center: tuple[int, int],
- radius: int,
- fill_color: Optional[tuple[int, int, int]] = None,
- outline_color: Optional[tuple[int, int, int]] = None,
- outline_width: int = 1,
-) -> Image.Image:
- """
- Draw a circle on a frame.
+ # Read until we see the sentinel
+ output = ""
+ async for line in self._process.stdout:
+ line_str = line.decode("utf-8", errors="replace")
+ if SENTINEL in line_str:
+ break
+ output += line_str
- Args:
- frame: PIL Image to draw on
+ return output, ""
```
-This function is important because it defines how Anthropic Skills Tutorial: Reusable AI Agent Capabilities implements the patterns covered in this chapter.
-
-### `skills/slack-gif-creator/core/frame_composer.py`
+The tool also has a `restart()` method for recovery from timeouts or crashes, and enforces a 120-second timeout per command.
+
+### EditTool
+
+Defined as `EditTool20250728` in `computer_use_demo/tools/edit.py`. API type: `text_editor_20250728`. Supports four commands:
+
+| Command | Description |
+|:--------|:------------|
+| `view` | Display file contents (with optional line range) or list directory (2 levels deep) |
+| `create` | Create a new file with given content |
+| `str_replace` | Replace exactly one occurrence of `old_str` with `new_str` |
+| `insert` | Insert `new_str` after a specified `insert_line` number |
+
+The `str_replace` command enforces uniqueness: if `old_str` appears zero or more than one time, the tool returns an error. This prevents accidental partial edits.
+
+Output snippets show 4 lines of context around every edit, so Claude can verify its change landed in the right place without taking a full screenshot.
+
+## The Sampling Loop in Detail
+
+`sampling_loop()` in `computer_use_demo/loop.py` is the engine of the entire demo. Simplified structure:
+
+```python
+async def sampling_loop(
+ *,
+ model: str,
+ provider: APIProvider,
+ system_prompt_suffix: str,
+ messages: list[BetaMessageParam],
+ output_callback: Callable,
+ tool_output_callback: Callable,
+ api_response_callback: Callable,
+ api_key: str,
+ only_n_most_recent_images: int | None = None,
+ max_tokens: int = 4096,
+ thinking: BetaThinkingConfigParam | None = None,
+ tool_version: ToolVersion,
+) -> list[BetaMessageParam]:
+
+ tool_collection = ToolCollection(
+ ComputerTool(display_width_px, display_height_px, DISPLAY_NUM),
+ BashTool(),
+ EditTool(),
+ )
+
+ system = BetaTextBlockParam(
+ type="text",
+ text=f"{SYSTEM_PROMPT}{system_prompt_suffix}",
+ )
+
+ while True:
+ # Optionally trim old screenshots to manage context window
+ if only_n_most_recent_images:
+ _maybe_filter_to_n_most_recent_images(messages, only_n_most_recent_images)
+
+ # Optionally inject prompt cache breakpoints
+ if betas:
+ _inject_prompt_caching(messages)
+
+ # Call Claude
+ response = client.beta.messages.create(
+ max_tokens=max_tokens,
+ messages=messages,
+ model=model,
+ system=[system],
+ tools=tool_collection.to_params(),
+ betas=betas,
+ )
+
+ # Notify UI callback
+ await api_response_callback(response)
+
+ # Convert response to message and append
+ response_params = _response_to_params(response)
+ messages.append({"role": "assistant", "content": response_params})
+
+ # Find tool use blocks
+ tool_use_blocks = [b for b in response_params if b["type"] == "tool_use"]
+ if not tool_use_blocks:
+ return messages # ← Loop termination: no more tool calls
+
+ # Execute each tool
+ tool_result_content = []
+ for block in tool_use_blocks:
+ result = await tool_collection.run(
+ name=block["name"],
+ tool_input=block["input"],
+ )
+ tool_result_content.append(
+ _make_api_tool_result(result, block["id"])
+ )
+ await tool_output_callback(result, block["id"])
+
+ # Append tool results and loop
+ messages.append({"role": "user", "content": tool_result_content})
+```
-The `draw_circle` function in [`skills/slack-gif-creator/core/frame_composer.py`](https://github.com/anthropics/skills/blob/HEAD/skills/slack-gif-creator/core/frame_composer.py) handles a key part of this chapter's functionality:
+## Security Considerations
-```py
+The README is explicit about risks: computer use is a beta feature with distinct attack surfaces.
+**Key precautions the quickstart documents:**
-def draw_circle(
- frame: Image.Image,
- center: tuple[int, int],
- radius: int,
- fill_color: Optional[tuple[int, int, int]] = None,
- outline_color: Optional[tuple[int, int, int]] = None,
- outline_width: int = 1,
-) -> Image.Image:
- """
- Draw a circle on a frame.
+1. Run Claude in an isolated VM with minimal permissions — the Docker container enforces this
+2. Avoid exposing sensitive credentials or accounts within the VM
+3. Restrict internet access to an approved domain allowlist when possible
+4. Require human confirmation for irreversible actions
+5. Be alert to prompt injection through webpage content (an adversarial page could instruct Claude to take unintended actions)
- Args:
- frame: PIL Image to draw on
- center: (x, y) center position
- radius: Circle radius
- fill_color: RGB fill color (None for no fill)
- outline_color: RGB outline color (None for no outline)
- outline_width: Outline width in pixels
+The `SYSTEM_PROMPT` in `loop.py` explicitly warns Claude about these risks and instructs it to prefer conservative actions when uncertain.
- Returns:
- Modified frame
- """
- draw = ImageDraw.Draw(frame)
- x, y = center
- bbox = [x - radius, y - radius, x + radius, y + radius]
- draw.ellipse(bbox, fill=fill_color, outline=outline_color, width=outline_width)
- return frame
+## Resolution and Performance Tips
+- **Use XGA (1024×768)**: Recommended in the README. Eliminates coordinate scaling entirely, which reduces errors from rounding.
+- **Image truncation**: The `only_n_most_recent_images` parameter (configurable in the sidebar) drops older screenshots from the context window. Computer use generates many screenshots; without truncation, context costs grow rapidly.
+- **Model selection**: The flagship demos use `claude-opus-4-20250514`. For exploratory or budget use, switch to `claude-haiku-4-20250514` in the sidebar — it is significantly faster and cheaper.
-def draw_text(
-```
+## Summary
-This function is important because it defines how Anthropic Skills Tutorial: Reusable AI Agent Capabilities implements the patterns covered in this chapter.
+The computer use demo implements a tight feedback loop: Claude takes a screenshot, issues an action, sees the result, and continues. Three tools — ComputerTool (screenshot + input), BashTool (persistent shell with sentinel detection), and EditTool (file editing) — cover all the capabilities a desktop agent needs. Coordinate scaling handles resolution mismatches between the API and actual display. The sampling loop terminates cleanly when Claude returns a message with no tool use blocks.
+Next: [Chapter 4: Tool Use Patterns](04-integration-platforms.md)
-## How These Components Connect
+---
-```mermaid
-flowchart TD
- A[get_field_info]
- B[write_field_info]
- C[create_blank_frame]
- D[draw_circle]
- E[draw_text]
- A --> B
- B --> C
- C --> D
- D --> E
-```
+- [Tutorial Index](README.md)
+- [Previous Chapter: Chapter 2: Quickstart Architecture](02-skill-categories.md)
+- [Next Chapter: Chapter 4: Tool Use Patterns](04-integration-platforms.md)
+- [Main Catalog](../../README.md#-tutorial-catalog)
diff --git a/tutorials/anthropic-skills-tutorial/04-integration-platforms.md b/tutorials/anthropic-skills-tutorial/04-integration-platforms.md
index cb9d1067..bb6bd10f 100644
--- a/tutorials/anthropic-skills-tutorial/04-integration-platforms.md
+++ b/tutorials/anthropic-skills-tutorial/04-integration-platforms.md
@@ -1,311 +1,334 @@
---
layout: default
-title: "Chapter 4: Integration Platforms"
+title: "Chapter 4: Tool Use Patterns"
nav_order: 4
-parent: Anthropic Skills Tutorial
+parent: Anthropic Quickstarts Tutorial
+format_version: v2
+source_repo: https://github.com/anthropics/anthropic-quickstarts
---
-
-# Chapter 4: Integration Platforms
-
-Welcome to **Chapter 4: Integration Platforms**. In this part of **Anthropic Skills Tutorial: Reusable AI Agent Capabilities**, you will build an intuitive mental model first, then move into concrete implementation details and practical production tradeoffs.
-
-
-The same skill package can be used across multiple surfaces, but deployment and governance expectations differ.
-
-## Claude Code
-
-Claude Code is strong for engineering and file-centric workflows.
-
-From the official skills repository, a common setup is:
-
-```bash
-/plugin marketplace add anthropics/skills
-/plugin install example-skills@anthropic-agent-skills
-```
-
-Operational guidance:
-
-- Keep skill repos versioned and pinned.
-- Prefer local scripts for deterministic steps.
-- Enforce repository-level review on `SKILL.md` changes.
-
-## Claude.ai
-
-Claude.ai is ideal for interactive drafting and team collaboration.
-
-Use it when:
-
-- humans need to iterate on outputs quickly
-- file upload context is part of the workflow
-- you want lower-friction skill adoption for non-engineers
-
-Guardrail recommendation: keep a canonical output template in the skill so generated artifacts remain comparable.
-
-## Claude API
-
-API integration gives maximal control for enterprise systems.
-
-Typical pattern:
-
-1. Load skill instructions as controlled context.
-2. Inject request-specific payload.
-3. Validate output against schema.
-4. Store run metadata for auditing.
-
-Pseudo-flow:
-
-```text
-request -> select skill -> build prompt context -> generate -> validate -> persist
-```
-
-## Cross-Platform Compatibility Strategy
-
-| Concern | Claude Code | Claude.ai | Claude API |
-|:--------|:------------|:----------|:-----------|
-| Local file/scripts | Strong | Limited | App-controlled |
-| Governance controls | Git + review | Workspace policies | Full policy engine |
-| Structured validation | Medium | Medium | Strong |
-| Automation depth | High | Medium | Highest |
-
-## Integration Pitfalls
-
-- Reusing one skill unchanged across radically different environments
-- Assuming runtime-specific tools exist everywhere
-- Failing to log skill version with each generated artifact
-
-## Summary
-
-You can now choose the right runtime surface and adjust operating controls per platform.
-
-Next: [Chapter 5: Production Skills](05-production-skills.md)
+# Chapter 4: Tool Use Patterns
## What Problem Does This Solve?
-Most teams struggle here because the hard part is not writing more code, but deciding clear boundaries for `skills`, `plugin`, `marketplace` so behavior stays predictable as complexity grows.
-
-In practical terms, this chapter helps you avoid three common failures:
-
-- coupling core logic too tightly to one implementation path
-- missing the handoff boundaries between setup, execution, and validation
-- shipping changes without clear rollback or observability strategy
-
-After working through this chapter, you should be able to reason about `Chapter 4: Integration Platforms` as an operating subsystem inside **Anthropic Skills Tutorial: Reusable AI Agent Capabilities**, with explicit contracts for inputs, state transitions, and outputs.
+The Claude API's tool use mechanism is powerful but has several non-obvious design requirements: tools must have stable JSON schemas, results must be formatted as specific message block types, tool definitions must be versioned alongside model versions, and multi-tool responses require careful iteration. This chapter explains exactly how the quickstarts define, register, execute, and compose tools — patterns you can copy directly into your own projects.
-Use the implementation notes around `anthropics`, `install`, `example` as your checklist when adapting these patterns to your own repository.
+## How Tool Use Works Under the Hood
-## How it Works Under the Hood
+When you include a `tools` array in a `messages.create` call, Claude may return a response with `stop_reason: "tool_use"` and one or more `tool_use` content blocks. Each block contains:
-Under the hood, `Chapter 4: Integration Platforms` usually follows a repeatable control path:
+- `id` — a unique identifier for this specific tool invocation
+- `name` — the tool name (must match a name in your `tools` array)
+- `input` — a JSON object matching the tool's `input_schema`
-1. **Context bootstrap**: initialize runtime config and prerequisites for `skills`.
-2. **Input normalization**: shape incoming data so `plugin` receives stable contracts.
-3. **Core execution**: run the main logic branch and propagate intermediate state through `marketplace`.
-4. **Policy and safety checks**: enforce limits, auth scopes, and failure boundaries.
-5. **Output composition**: return canonical result payloads for downstream consumers.
-6. **Operational telemetry**: emit logs/metrics needed for debugging and performance tuning.
-
-When debugging, walk this sequence in order and confirm each stage has explicit success/failure conditions.
-
-## Source Walkthrough
-
-Use the following upstream sources to verify implementation details while reading this chapter:
-
-- [anthropics/skills repository](https://github.com/anthropics/skills)
- Why it matters: authoritative reference on `anthropics/skills repository` (github.com).
-
-Suggested trace strategy:
-- search upstream code for `skills` and `plugin` to map concrete implementation paths
-- compare docs claims against actual runtime/config code before reusing patterns in production
-
-## Chapter Connections
-
-- [Tutorial Index](README.md)
-- [Previous Chapter: Chapter 3: Advanced Skill Design](03-advanced-skill-design.md)
-- [Next Chapter: Chapter 5: Production Skills](05-production-skills.md)
-- [Main Catalog](../../README.md#-tutorial-catalog)
-- [A-Z Tutorial Directory](../../discoverability/tutorial-directory.md)
-
-## Depth Expansion Playbook
-
-## Source Code Walkthrough
-
-### `skills/slack-gif-creator/core/easing.py`
-
-The `ease_in_bounce` function in [`skills/slack-gif-creator/core/easing.py`](https://github.com/anthropics/skills/blob/HEAD/skills/slack-gif-creator/core/easing.py) handles a key part of this chapter's functionality:
-
-```py
+You must then execute the tool and return a `tool_result` message that references the same `id`. Only after that can Claude continue reasoning.
+```mermaid
+sequenceDiagram
+ participant App
+ participant Claude
-def ease_in_bounce(t: float) -> float:
- """Bounce ease-in (bouncy start)."""
- return 1 - ease_out_bounce(1 - t)
+ App->>Claude: messages=[user_msg], tools=[tool_defs]
+ Claude-->>App: stop_reason="tool_use", content=[tool_use{id,name,input}]
+ App->>App: execute tool(name, input)
+ App->>Claude: messages=[..., assistant_tool_use, user_tool_result{tool_use_id}]
+ Claude-->>App: stop_reason="end_turn", content=[text_response]
+```
-def ease_out_bounce(t: float) -> float:
- """Bounce ease-out (bouncy end)."""
- if t < 1 / 2.75:
- return 7.5625 * t * t
- elif t < 2 / 2.75:
- t -= 1.5 / 2.75
- return 7.5625 * t * t + 0.75
- elif t < 2.5 / 2.75:
- t -= 2.25 / 2.75
- return 7.5625 * t * t + 0.9375
- else:
- t -= 2.625 / 2.75
- return 7.5625 * t * t + 0.984375
+## BaseAnthropicTool: The Tool Contract
+All tools in `computer-use-demo` inherit from `BaseAnthropicTool` in `base.py`:
-def ease_in_out_bounce(t: float) -> float:
- """Bounce ease-in-out."""
- if t < 0.5:
- return ease_in_bounce(t * 2) * 0.5
- return ease_out_bounce(t * 2 - 1) * 0.5 + 0.5
+```python
+class BaseAnthropicTool(ABC):
+ """Abstract base class for Anthropic tool implementations."""
+ @abstractmethod
+ def __call__(self, **kwargs) -> Awaitable[ToolResult]:
+ """Execute the tool. Must return a ToolResult."""
+ ...
-def ease_in_elastic(t: float) -> float:
- """Elastic ease-in (spring effect)."""
- if t == 0 or t == 1:
+ @abstractmethod
+ def to_params(self) -> BetaToolUnionParam:
+ """Return the tool definition for the API call."""
+ ...
```
-This function is important because it defines how Anthropic Skills Tutorial: Reusable AI Agent Capabilities implements the patterns covered in this chapter.
-
-### `skills/slack-gif-creator/core/easing.py`
-
-The `ease_out_bounce` function in [`skills/slack-gif-creator/core/easing.py`](https://github.com/anthropics/skills/blob/HEAD/skills/slack-gif-creator/core/easing.py) handles a key part of this chapter's functionality:
+The `to_params()` method must return the exact dict structure the API expects. For the computer tool:
+
+```python
+# From computer_use_demo/tools/computer.py (simplified)
+def to_params(self) -> BetaToolUnionParam:
+ return {
+ "type": self.api_type, # e.g. "computer_20250124"
+ "name": "computer",
+ "display_width_px": self.width,
+ "display_height_px": self.height,
+ "display_number": self._display_num,
+ }
+```
-```py
-def ease_in_bounce(t: float) -> float:
- """Bounce ease-in (bouncy start)."""
- return 1 - ease_out_bounce(1 - t)
+For a generic custom tool using the standard `function` type, the schema looks like:
+
+```python
+{
+ "name": "get_weather",
+ "description": "Retrieve current weather for a city",
+ "input_schema": {
+ "type": "object",
+ "properties": {
+ "city": {"type": "string", "description": "City name"},
+ "units": {"type": "string", "enum": ["celsius", "fahrenheit"]}
+ },
+ "required": ["city"]
+ }
+}
+```
+## ToolResult: The Result Contract
-def ease_out_bounce(t: float) -> float:
- """Bounce ease-out (bouncy end)."""
- if t < 1 / 2.75:
- return 7.5625 * t * t
- elif t < 2 / 2.75:
- t -= 1.5 / 2.75
- return 7.5625 * t * t + 0.75
- elif t < 2.5 / 2.75:
- t -= 2.25 / 2.75
- return 7.5625 * t * t + 0.9375
- else:
- t -= 2.625 / 2.75
- return 7.5625 * t * t + 0.984375
+`ToolResult` in `base.py` is a frozen dataclass that represents any possible tool outcome:
+```python
+@dataclass(frozen=True)
+class ToolResult:
+ output: str | None = None # Text output from the tool
+ error: str | None = None # Error message (sets is_error=True in API)
+ base64_image: str | None = None # PNG screenshot as base64 string
+ system: str | None = None # System-level context prepended to output
-def ease_in_out_bounce(t: float) -> float:
- """Bounce ease-in-out."""
- if t < 0.5:
- return ease_in_bounce(t * 2) * 0.5
- return ease_out_bounce(t * 2 - 1) * 0.5 + 0.5
+ def __bool__(self):
+ return any([self.output, self.error, self.base64_image, self.system])
+ def __add__(self, other: "ToolResult") -> "ToolResult":
+ """Combine two results by concatenating string fields."""
+ ...
-def ease_in_elastic(t: float) -> float:
- """Elastic ease-in (spring effect)."""
- if t == 0 or t == 1:
- return t
- return -math.pow(2, 10 * (t - 1)) * math.sin((t - 1.1) * 5 * math.pi)
+ def replace(self, **kwargs) -> "ToolResult":
+ """Return a copy with specified fields replaced."""
+ return dataclasses.replace(self, **kwargs)
```
-This function is important because it defines how Anthropic Skills Tutorial: Reusable AI Agent Capabilities implements the patterns covered in this chapter.
-
-### `skills/slack-gif-creator/core/easing.py`
+Subclasses:
+- `CLIResult` — for command-line tools that only return text
+- `ToolFailure` — explicitly marks a failed execution (produces `is_error=True`)
+- `ToolError` — exception raised inside `__call__`, caught by `ToolCollection.run()`
+
+## ToolCollection: Dispatch and Registration
+
+`ToolCollection` holds a tuple of tool instances and handles:
+
+1. **Registration**: maps tool names to instances
+2. **API parameters**: calls `to_params()` on each tool and returns the list
+3. **Dispatch**: routes incoming tool names to the right `__call__`
+4. **Error wrapping**: catches `ToolError` exceptions and returns `ToolFailure`
+
+```python
+class ToolCollection:
+ def __init__(self, *tools: BaseAnthropicTool):
+ self.tools = tools
+ self.tool_map = {tool.to_params()["name"]: tool for tool in tools}
+
+ def to_params(self) -> list[BetaToolUnionParam]:
+ return [tool.to_params() for tool in self.tools]
+
+ async def run(self, *, name: str, tool_input: dict) -> ToolResult:
+ tool = self.tool_map.get(name)
+ if not tool:
+ return ToolFailure(error=f"Tool {name!r} is invalid")
+ try:
+ return await tool(**tool_input)
+ except ToolError as e:
+ return ToolFailure(error=e.message)
+```
-The `ease_in_out_bounce` function in [`skills/slack-gif-creator/core/easing.py`](https://github.com/anthropics/skills/blob/HEAD/skills/slack-gif-creator/core/easing.py) handles a key part of this chapter's functionality:
+## BashTool: Persistent Session with Sentinel Detection
-```py
+A naive bash tool implementation spawns a new subprocess per command. This loses environment variables, current directory, and shell state between calls. `BashTool20250124` uses a persistent subprocess instead, maintained across the lifetime of the sampling loop session.
+The challenge: detecting when a command is complete without waiting for EOF. The sentinel pattern appends `; echo '<>'` to every command and reads until that marker appears:
-def ease_in_out_bounce(t: float) -> float:
- """Bounce ease-in-out."""
- if t < 0.5:
- return ease_in_bounce(t * 2) * 0.5
- return ease_out_bounce(t * 2 - 1) * 0.5 + 0.5
+```python
+class _BashSession:
+ """Persistent bash subprocess."""
+ _SENTINEL = "<>"
-def ease_in_elastic(t: float) -> float:
- """Elastic ease-in (spring effect)."""
- if t == 0 or t == 1:
- return t
- return -math.pow(2, 10 * (t - 1)) * math.sin((t - 1.1) * 5 * math.pi)
+ async def run(self, command: str) -> tuple[str, str]:
+ if not self._started:
+ await self.start()
+ # Clear any leftover output in the buffer
+ await self._clear_output()
-def ease_out_elastic(t: float) -> float:
- """Elastic ease-out (spring effect)."""
- if t == 0 or t == 1:
- return t
- return math.pow(2, -10 * t) * math.sin((t - 0.1) * 5 * math.pi) + 1
+ # Send command + sentinel
+ assert self._process.stdin
+ self._process.stdin.write(
+ command.encode() + f"; echo '{self._SENTINEL}'\n".encode()
+ )
+ await self._process.stdin.drain()
+ # Collect output until sentinel
+ output_parts = []
+ async with asyncio.timeout(self._timeout):
+ async for line in self._process.stdout:
+ decoded = line.decode("utf-8", errors="replace")
+ if self._SENTINEL in decoded:
+ break
+ output_parts.append(decoded)
-def ease_in_out_elastic(t: float) -> float:
- """Elastic ease-in-out."""
- if t == 0 or t == 1:
- return t
- t = t * 2 - 1
- if t < 0:
- return -0.5 * math.pow(2, 10 * t) * math.sin((t - 0.1) * 5 * math.pi)
- return math.pow(2, -10 * t) * math.sin((t - 0.1) * 5 * math.pi) * 0.5 + 1
+ return "".join(output_parts), ""
+```
+If a command times out (default 120 seconds), the session raises `TimeoutError`. The `BashTool20250124.__call__` method catches this and returns a `ToolFailure` with instructions for Claude to restart the session.
+
+## ComputerTool: Action Dispatch and Coordinate Scaling
+
+The `__call__` method in `ComputerTool` is a large dispatch pattern. After validating the action type and required parameters, it routes to the appropriate handler:
+
+```python
+async def __call__(self, *, action: Action, **kwargs) -> ToolResult:
+ if action == "screenshot":
+ return await self.screenshot()
+ elif action == "key":
+ return await self.key(kwargs["text"])
+ elif action == "type":
+ return await self.type(kwargs["text"])
+ elif action in ("left_click", "right_click", "middle_click",
+ "double_click", "triple_click", "mouse_move"):
+ x, y = self.scale_coordinates(
+ ScalingSource.API, *kwargs["coordinate"]
+ )
+ # execute xdotool command for the action
+ ...
+ elif action == "scroll":
+ x, y = self.scale_coordinates(
+ ScalingSource.API, *kwargs["coordinate"]
+ )
+ # execute xdotool scroll command
+ ...
+ elif action == "zoom":
+ # zoom around a coordinate
+ ...
```
-This function is important because it defines how Anthropic Skills Tutorial: Reusable AI Agent Capabilities implements the patterns covered in this chapter.
+The `screenshot()` method captures the display with `gnome-screenshot` (preferred) or falls back to `scrot`, reads the PNG file, base64-encodes it, and returns it in a `ToolResult`:
+
+```python
+async def screenshot(self) -> ToolResult:
+ output_dir = Path(OUTPUT_DIR)
+ output_dir.mkdir(parents=True, exist_ok=True)
+ path = output_dir / "screenshot.png"
+
+ # Try gnome-screenshot first, fall back to scrot
+ screenshot_cmd = f"gnome-screenshot -f {path} -d 0"
+ result = await self.shell(screenshot_cmd, take_screenshot=False)
+ if result.error or not path.exists():
+ result = await self.shell(f"scrot -p {path}", take_screenshot=False)
+
+ if path.exists():
+ return ToolResult(
+ base64_image=base64.standard_b64encode(path.read_bytes()).decode()
+ )
+ return ToolResult(error=f"Failed to take screenshot: {result.error}")
+```
-### `skills/slack-gif-creator/core/easing.py`
+## EditTool: Safe File Manipulation
-The `ease_in_elastic` function in [`skills/slack-gif-creator/core/easing.py`](https://github.com/anthropics/skills/blob/HEAD/skills/slack-gif-creator/core/easing.py) handles a key part of this chapter's functionality:
+`EditTool20250728` enforces important constraints:
-```py
+- **Absolute paths only**: relative paths return an error immediately
+- **Unique string replacement**: `str_replace` fails if the target string appears 0 or 2+ times
+- **Context snippets**: every edit shows 4 lines before/after the changed region
+- **File history**: tracks changes for potential undo (stored in `_file_history`)
+```python
+async def __call__(self, *, command: Command, path: str, **kwargs) -> ToolResult:
+ _path = Path(path)
+ self.validate_path(command, _path)
-def ease_in_elastic(t: float) -> float:
- """Elastic ease-in (spring effect)."""
- if t == 0 or t == 1:
- return t
- return -math.pow(2, 10 * (t - 1)) * math.sin((t - 1.1) * 5 * math.pi)
+ if command == "view":
+ return self.view(_path, kwargs.get("view_range"))
+ elif command == "create":
+ return self.write_file(_path, kwargs["file_text"])
+ elif command == "str_replace":
+ return self.str_replace(_path, kwargs["old_str"], kwargs["new_str"])
+ elif command == "insert":
+ return self.insert(_path, kwargs["insert_line"], kwargs["new_str"])
+```
+## Building a Custom Tool
+
+To add a custom tool to the agents quickstart pattern:
+
+```python
+from dataclasses import dataclass
+from computer_use_demo.tools.base import BaseAnthropicTool, ToolResult
+
+@dataclass
+class DatabaseQueryTool(BaseAnthropicTool):
+ """Tool for querying a read-only database."""
+
+ connection_string: str
+
+ def to_params(self):
+ return {
+ "name": "database_query",
+ "description": "Execute a read-only SQL query",
+ "input_schema": {
+ "type": "object",
+ "properties": {
+ "query": {
+ "type": "string",
+ "description": "SQL SELECT statement"
+ }
+ },
+ "required": ["query"]
+ }
+ }
+
+ async def __call__(self, *, query: str, **kwargs) -> ToolResult:
+ if not query.strip().upper().startswith("SELECT"):
+ return ToolResult(error="Only SELECT queries are permitted")
+ try:
+ # execute query against self.connection_string
+ results = await execute_query(self.connection_string, query)
+ return ToolResult(output=json.dumps(results, indent=2))
+ except Exception as e:
+ return ToolResult(error=str(e))
+```
-def ease_out_elastic(t: float) -> float:
- """Elastic ease-out (spring effect)."""
- if t == 0 or t == 1:
- return t
- return math.pow(2, -10 * t) * math.sin((t - 0.1) * 5 * math.pi) + 1
+Register it alongside the built-in tools:
+```python
+tool_collection = ToolCollection(
+ ComputerTool(width, height, display_num),
+ BashTool(),
+ EditTool(),
+ DatabaseQueryTool(connection_string=os.environ["DB_URL"]),
+)
+```
-def ease_in_out_elastic(t: float) -> float:
- """Elastic ease-in-out."""
- if t == 0 or t == 1:
- return t
- t = t * 2 - 1
- if t < 0:
- return -0.5 * math.pow(2, 10 * t) * math.sin((t - 0.1) * 5 * math.pi)
- return math.pow(2, -10 * t) * math.sin((t - 0.1) * 5 * math.pi) * 0.5 + 1
+## Tool Design Checklist
+| Rule | Reason |
+|:-----|:-------|
+| Return `ToolResult(error=...)` rather than raising exceptions | `ToolCollection.run()` only catches `ToolError`; uncaught exceptions kill the loop |
+| Keep `to_params()` schemas as narrow as possible | Overly broad schemas cause Claude to pass invalid inputs |
+| Make tools idempotent where feasible | The loop may retry on timeout; side effects should not compound |
+| Never block the event loop in `__call__` | All tools are called with `await`; use `asyncio.to_thread` for sync I/O |
+| Validate all inputs before executing | Return a clear error message so Claude can correct itself |
-# Convenience mapping
-EASING_FUNCTIONS = {
- "linear": linear,
- "ease_in": ease_in_quad,
- "ease_out": ease_out_quad,
- "ease_in_out": ease_in_out_quad,
-```
+## Summary
-This function is important because it defines how Anthropic Skills Tutorial: Reusable AI Agent Capabilities implements the patterns covered in this chapter.
+Tools in the quickstarts follow a strict three-part contract: a `to_params()` schema for the API, a `__call__` implementation returning `ToolResult`, and registration in a `ToolCollection`. The BashTool's sentinel pattern solves persistent-session output detection. The ComputerTool's coordinate scaling bridges API coordinates to real display coordinates. The EditTool's uniqueness enforcement prevents accidental multi-site edits.
+Next: [Chapter 5: Multi-Turn Conversation Patterns](05-production-skills.md)
-## How These Components Connect
+---
-```mermaid
-flowchart TD
- A[ease_in_bounce]
- B[ease_out_bounce]
- C[ease_in_out_bounce]
- D[ease_in_elastic]
- E[ease_out_elastic]
- A --> B
- B --> C
- C --> D
- D --> E
-```
+- [Tutorial Index](README.md)
+- [Previous Chapter: Chapter 3: Computer Use Deep-Dive](03-advanced-skill-design.md)
+- [Next Chapter: Chapter 5: Multi-Turn Conversation Patterns](05-production-skills.md)
+- [Main Catalog](../../README.md#-tutorial-catalog)
diff --git a/tutorials/anthropic-skills-tutorial/05-production-skills.md b/tutorials/anthropic-skills-tutorial/05-production-skills.md
index c56f5495..f8126d63 100644
--- a/tutorials/anthropic-skills-tutorial/05-production-skills.md
+++ b/tutorials/anthropic-skills-tutorial/05-production-skills.md
@@ -1,311 +1,303 @@
---
layout: default
-title: "Chapter 5: Production Skills"
+title: "Chapter 5: Multi-Turn Conversation Patterns"
nav_order: 5
-parent: Anthropic Skills Tutorial
+parent: Anthropic Quickstarts Tutorial
+format_version: v2
+source_repo: https://github.com/anthropics/anthropic-quickstarts
---
-
-# Chapter 5: Production Skills
-
-Welcome to **Chapter 5: Production Skills**. In this part of **Anthropic Skills Tutorial: Reusable AI Agent Capabilities**, you will build an intuitive mental model first, then move into concrete implementation details and practical production tradeoffs.
-
-
-Production skill systems prioritize predictability over novelty.
-
-## Define Output Contracts First
-
-Every production skill should define:
-
-- required sections
-- required fields
-- accepted enum values
-- maximum lengths
-- failure behavior
-
-Example contract fragment:
-
-```yaml
-output:
- format: markdown
- required_sections:
- - executive_summary
- - risk_register
- - action_items
- action_item_fields:
- - owner
- - due_date
- - severity
-```
-
-## Deterministic Transformation Layer
-
-Push high-risk transformations into scripts:
-
-- numeric calculations
-- date normalization
-- schema mapping
-- cross-system ID handling
-
-Keep natural language synthesis for summarization and explanation, not critical arithmetic or routing logic.
-
-## Document Generation Workflows
-
-The official skills repo includes document-focused references. A stable pattern is:
-
-1. Generate intermediate structured JSON.
-2. Validate schema.
-3. Render final artifacts (DOCX/PDF/PPTX/XLSX) via script.
-4. Return validation report with artifact metadata.
-
-## Reliability Checklist
-
-- Idempotent run identifiers
-- Retry-safe script steps
-- Explicit timeout budgets
-- Structured error taxonomy
-- Artifact checksums for integrity
-
-## Security Checklist
-
-- Never embed secrets in skill instructions
-- Restrict script execution environment
-- Validate all external inputs
-- Redact sensitive logs
-- Track skill ownership and on-call routing
-
-## Summary
-
-You now have the backbone for operating skills in business-critical workflows.
-
-Next: [Chapter 6: Best Practices](06-best-practices.md)
+# Chapter 5: Multi-Turn Conversation Patterns
## What Problem Does This Solve?
-Most teams struggle here because the hard part is not writing more code, but deciding clear boundaries for `output`, `format`, `markdown` so behavior stays predictable as complexity grows.
-
-In practical terms, this chapter helps you avoid three common failures:
-
-- coupling core logic too tightly to one implementation path
-- missing the handoff boundaries between setup, execution, and validation
-- shipping changes without clear rollback or observability strategy
-
-After working through this chapter, you should be able to reason about `Chapter 5: Production Skills` as an operating subsystem inside **Anthropic Skills Tutorial: Reusable AI Agent Capabilities**, with explicit contracts for inputs, state transitions, and outputs.
-
-Use the implementation notes around `required_sections`, `executive_summary`, `risk_register` as your checklist when adapting these patterns to your own repository.
-
-## How it Works Under the Hood
-
-Under the hood, `Chapter 5: Production Skills` usually follows a repeatable control path:
-
-1. **Context bootstrap**: initialize runtime config and prerequisites for `output`.
-2. **Input normalization**: shape incoming data so `format` receives stable contracts.
-3. **Core execution**: run the main logic branch and propagate intermediate state through `markdown`.
-4. **Policy and safety checks**: enforce limits, auth scopes, and failure boundaries.
-5. **Output composition**: return canonical result payloads for downstream consumers.
-6. **Operational telemetry**: emit logs/metrics needed for debugging and performance tuning.
-
-When debugging, walk this sequence in order and confirm each stage has explicit success/failure conditions.
-
-## Source Walkthrough
-
-Use the following upstream sources to verify implementation details while reading this chapter:
-
-- [anthropics/skills repository](https://github.com/anthropics/skills)
- Why it matters: authoritative reference on `anthropics/skills repository` (github.com).
-
-Suggested trace strategy:
-- search upstream code for `output` and `format` to map concrete implementation paths
-- compare docs claims against actual runtime/config code before reusing patterns in production
-
-## Chapter Connections
-
-- [Tutorial Index](README.md)
-- [Previous Chapter: Chapter 4: Integration Platforms](04-integration-platforms.md)
-- [Next Chapter: Chapter 6: Best Practices](06-best-practices.md)
-- [Main Catalog](../../README.md#-tutorial-catalog)
-- [A-Z Tutorial Directory](../../discoverability/tutorial-directory.md)
-
-## Depth Expansion Playbook
-
-## Source Code Walkthrough
-
-### `skills/pptx/scripts/thumbnail.py`
-
-The `get_slide_info` function in [`skills/pptx/scripts/thumbnail.py`](https://github.com/anthropics/skills/blob/HEAD/skills/pptx/scripts/thumbnail.py) handles a key part of this chapter's functionality:
+Stateless single-turn calls to Claude are simple. Multi-turn conversations that include tool use, screenshots, and long reasoning traces are not. The problems compound quickly: context windows fill up with screenshots, costs rise with every token, and conversation history must be maintained in the right format or Claude loses coherence. This chapter covers how the quickstarts manage multi-turn state, how prompt caching slashes costs, how image truncation prevents context overflow, and how the `autonomous-coding` quickstart maintains state across completely separate sessions.
-```py
+## How Multi-Turn State Works
- try:
- slide_info = get_slide_info(input_path)
+The Claude API is stateless. Every request must include the full conversation history in the `messages` array. In the sampling loop, this array grows with every turn:
- with tempfile.TemporaryDirectory() as temp_dir:
- temp_path = Path(temp_dir)
- visible_images = convert_to_images(input_path, temp_path)
-
- if not visible_images and not any(s["hidden"] for s in slide_info):
- print("Error: No slides found", file=sys.stderr)
- sys.exit(1)
-
- slides = build_slide_list(slide_info, visible_images, temp_path)
-
- grid_files = create_grids(slides, cols, THUMBNAIL_WIDTH, output_path)
-
- print(f"Created {len(grid_files)} grid(s):")
- for grid_file in grid_files:
- print(f" {grid_file}")
-
- except Exception as e:
- print(f"Error: {e}", file=sys.stderr)
- sys.exit(1)
-
-
-def get_slide_info(pptx_path: Path) -> list[dict]:
- with zipfile.ZipFile(pptx_path, "r") as zf:
- rels_content = zf.read("ppt/_rels/presentation.xml.rels").decode("utf-8")
- rels_dom = defusedxml.minidom.parseString(rels_content)
-
- rid_to_slide = {}
- for rel in rels_dom.getElementsByTagName("Relationship"):
-```
-
-This function is important because it defines how Anthropic Skills Tutorial: Reusable AI Agent Capabilities implements the patterns covered in this chapter.
-
-### `skills/pptx/scripts/thumbnail.py`
-
-The `build_slide_list` function in [`skills/pptx/scripts/thumbnail.py`](https://github.com/anthropics/skills/blob/HEAD/skills/pptx/scripts/thumbnail.py) handles a key part of this chapter's functionality:
-
-```py
- sys.exit(1)
-
- slides = build_slide_list(slide_info, visible_images, temp_path)
-
- grid_files = create_grids(slides, cols, THUMBNAIL_WIDTH, output_path)
-
- print(f"Created {len(grid_files)} grid(s):")
- for grid_file in grid_files:
- print(f" {grid_file}")
-
- except Exception as e:
- print(f"Error: {e}", file=sys.stderr)
- sys.exit(1)
-
-
-def get_slide_info(pptx_path: Path) -> list[dict]:
- with zipfile.ZipFile(pptx_path, "r") as zf:
- rels_content = zf.read("ppt/_rels/presentation.xml.rels").decode("utf-8")
- rels_dom = defusedxml.minidom.parseString(rels_content)
-
- rid_to_slide = {}
- for rel in rels_dom.getElementsByTagName("Relationship"):
- rid = rel.getAttribute("Id")
- target = rel.getAttribute("Target")
- rel_type = rel.getAttribute("Type")
- if "slide" in rel_type and target.startswith("slides/"):
- rid_to_slide[rid] = target.replace("slides/", "")
-
- pres_content = zf.read("ppt/presentation.xml").decode("utf-8")
- pres_dom = defusedxml.minidom.parseString(pres_content)
-
- slides = []
+```text
+messages = [
+ {"role": "user", "content": "Open Firefox"}, # turn 1
+ {"role": "assistant", "content": [tool_use{screenshot}]}, # turn 1 response
+ {"role": "user", "content": [tool_result{image}]}, # turn 2
+ {"role": "assistant", "content": [tool_use{left_click}]}, # turn 2 response
+ {"role": "user", "content": [tool_result{}]}, # turn 3
+ ...
+]
```
-This function is important because it defines how Anthropic Skills Tutorial: Reusable AI Agent Capabilities implements the patterns covered in this chapter.
+Without management, a computer-use session that takes 50 screenshots will accumulate ~50 large base64 image blocks in memory and in every subsequent API request. The cost and latency grow linearly with session length.
-### `skills/pptx/scripts/thumbnail.py`
+## Image Truncation: `_maybe_filter_to_n_most_recent_images`
-The `create_hidden_placeholder` function in [`skills/pptx/scripts/thumbnail.py`](https://github.com/anthropics/skills/blob/HEAD/skills/pptx/scripts/thumbnail.py) handles a key part of this chapter's functionality:
+The function `_maybe_filter_to_n_most_recent_images` in `computer_use_demo/loop.py` addresses this by removing older screenshots from the messages list while preserving all text content:
-```py
- if info["hidden"]:
- placeholder_path = temp_dir / f"hidden-{info['name']}.jpg"
- placeholder_img = create_hidden_placeholder(placeholder_size)
- placeholder_img.save(placeholder_path, "JPEG")
- slides.append((placeholder_path, f"{info['name']} (hidden)"))
- else:
- if visible_idx < len(visible_images):
- slides.append((visible_images[visible_idx], info["name"]))
- visible_idx += 1
+```python
+def _maybe_filter_to_n_most_recent_images(
+ messages: list[BetaMessageParam],
+ images_to_keep: int,
+ min_removal_threshold: int = 10,
+) -> None:
+ """
+ Modify messages in place to keep only the N most recent screenshots.
+ Preserves all text blocks and tool results that have no image content.
+ """
+ if images_to_keep is None:
+ return
- return slides
-
-
-def create_hidden_placeholder(size: tuple[int, int]) -> Image.Image:
- img = Image.new("RGB", size, color="#F0F0F0")
- draw = ImageDraw.Draw(img)
- line_width = max(5, min(size) // 100)
- draw.line([(0, 0), size], fill="#CCCCCC", width=line_width)
- draw.line([(size[0], 0), (0, size[1])], fill="#CCCCCC", width=line_width)
- return img
-
-
-def convert_to_images(pptx_path: Path, temp_dir: Path) -> list[Path]:
- pdf_path = temp_dir / f"{pptx_path.stem}.pdf"
-
- result = subprocess.run(
+ tool_result_blocks = cast(
+ list[ToolResultBlockParam],
[
- "soffice",
- "--headless",
- "--convert-to",
- "pdf",
- "--outdir",
+ item
+ for message in messages
+ for item in (
+ message["content"] if isinstance(message["content"], list) else []
+ )
+ if isinstance(item, dict) and item.get("type") == "tool_result"
+ ],
+ )
+
+ total_images = sum(
+ 1
+ for tool_result in tool_result_blocks
+ for content in (
+ tool_result.get("content") or []
+ )
+ if isinstance(content, dict) and content.get("type") == "image"
+ )
+
+ images_to_remove = total_images - images_to_keep
+ if images_to_remove < min_removal_threshold:
+ return # Not enough images to bother removing
+
+ # Walk through tool_result_blocks oldest-first, removing image blocks
+ for tool_result in tool_result_blocks:
+ if images_to_remove <= 0:
+ break
+ new_content = []
+ for content in tool_result.get("content") or []:
+ if (
+ isinstance(content, dict)
+ and content.get("type") == "image"
+ and images_to_remove > 0
+ ):
+ images_to_remove -= 1
+ else:
+ new_content.append(content)
+ tool_result["content"] = new_content
```
-This function is important because it defines how Anthropic Skills Tutorial: Reusable AI Agent Capabilities implements the patterns covered in this chapter.
+The Streamlit sidebar exposes "Only send N most recent screenshots" as a user-configurable option. Setting it to 3–5 is a good default for most sessions: Claude retains enough visual context for the current task but does not accumulate megabytes of older screenshots.
+
+## Prompt Caching: `_inject_prompt_caching`
+
+Prompt caching allows the API to cache the computation for stable message prefixes and charge only 10% of the normal input token rate for cache hits. The quickstart's `_inject_prompt_caching` function adds `cache_control: {"type": "ephemeral"}` markers to the three most recent conversation turns:
+
+```python
+def _inject_prompt_caching(
+ messages: list[BetaMessageParam],
+) -> None:
+ """
+ Set cache breakpoints on the 3 most recent conversation turns.
+ Older turns are left without cache_control, so they are not candidates
+ for fresh caching but may still benefit from existing cache entries.
+ """
+ breakpoints_remaining = 3
+ for message in reversed(messages):
+ if message["role"] == "user" and isinstance(
+ message["content"], list
+ ):
+ if breakpoints_remaining == 0:
+ # Remove cache_control from older messages so they
+ # don't generate unnecessary new cache entries
+ message["content"][-1].pop("cache_control", None)
+ else:
+ message["content"][-1]["cache_control"] = {"type": "ephemeral"}
+ breakpoints_remaining -= 1
+```
-### `skills/pptx/scripts/thumbnail.py`
+**Why 3 breakpoints?** The Claude API supports up to 4 cache breakpoints per request. Using 3 for conversation turns leaves room for the system prompt to be cached separately.
-The `convert_to_images` function in [`skills/pptx/scripts/thumbnail.py`](https://github.com/anthropics/skills/blob/HEAD/skills/pptx/scripts/thumbnail.py) handles a key part of this chapter's functionality:
+**When caching helps most**: in long computer-use sessions where the first 10+ turns remain stable in the context while the agent works on a specific task. In practice, a 50-turn session with caching enabled can reduce input costs by 60–80%.
-```py
- with tempfile.TemporaryDirectory() as temp_dir:
- temp_path = Path(temp_dir)
- visible_images = convert_to_images(input_path, temp_path)
+```mermaid
+flowchart LR
+ subgraph "Without Caching"
+ R1[Request 1\n1000 tokens] --> |full price| API1[API]
+ R2[Request 2\n1200 tokens] --> |full price| API2[API]
+ R3[Request 3\n1400 tokens] --> |full price| API3[API]
+ end
+
+ subgraph "With Caching"
+ RC1[Request 1\n1000 tokens\ncache_control] --> |full price| APIC1[API\ncaches prefix]
+ RC2[Request 2\n1200 tokens\ncache_control] --> |200 new + 1000 cached 10%| APIC2[API]
+ RC3[Request 3\n1400 tokens\ncache_control] --> |200 new + 1200 cached 10%| APIC3[API]
+ end
+```
- if not visible_images and not any(s["hidden"] for s in slide_info):
- print("Error: No slides found", file=sys.stderr)
- sys.exit(1)
+## Extended Thinking Budget
- slides = build_slide_list(slide_info, visible_images, temp_path)
+The Streamlit sidebar includes a "Thinking budget" setting. When set above 0, the API request includes:
- grid_files = create_grids(slides, cols, THUMBNAIL_WIDTH, output_path)
+```python
+thinking: {"type": "enabled", "budget_tokens": thinking_budget}
+```
- print(f"Created {len(grid_files)} grid(s):")
- for grid_file in grid_files:
- print(f" {grid_file}")
+Extended thinking allows Claude to reason through complex multi-step tasks before committing to actions. For computer use, this is particularly useful for tasks that require navigating unfamiliar UIs or reasoning about dependencies between steps. The tradeoff is additional latency and token cost for the thinking blocks.
+
+## Message History Truncation in the Agents Quickstart
+
+The `agents/` quickstart implements a simpler form of context management. From `agent.py`:
+
+```python
+class Agent:
+ def _prepare_messages(
+ self,
+ messages: list[dict],
+ max_context: int | None = None,
+ ) -> list[dict]:
+ """Truncate message history if it exceeds the context window."""
+ if max_context is None:
+ max_context = self.config.context_window # default: 180,000 tokens
+
+ # Rough token estimate: 4 chars ≈ 1 token
+ total_chars = sum(
+ len(str(m.get("content", ""))) for m in messages
+ )
+ estimated_tokens = total_chars // 4
+
+ if estimated_tokens <= max_context:
+ return messages
+
+ # Keep the system message and the most recent messages
+ # Never remove the first message (usually the task description)
+ truncated = [messages[0]] # always keep first
+ for msg in reversed(messages[1:]):
+ truncated_chars = sum(len(str(m.get("content", ""))) for m in truncated)
+ if truncated_chars // 4 + len(str(msg.get("content", ""))) // 4 < max_context:
+ truncated.insert(1, msg)
+ else:
+ break
+ return truncated
+```
- except Exception as e:
- print(f"Error: {e}", file=sys.stderr)
- sys.exit(1)
+## Cross-Session State: autonomous-coding
+
+The `autonomous-coding` quickstart solves a harder problem: how do you maintain agent state across completely separate process invocations, potentially days apart?
+
+The answer is file-based state in `feature_list.json`:
+
+```json
+{
+ "features": [
+ {
+ "id": "feat-001",
+ "description": "User authentication with JWT",
+ "status": "completed",
+ "completed_at": "2025-03-15T14:23:00Z",
+ "git_commit": "abc1234"
+ },
+ {
+ "id": "feat-002",
+ "description": "Product listing page with pagination",
+ "status": "in_progress"
+ },
+ {
+ "id": "feat-003",
+ "description": "Shopping cart with local storage",
+ "status": "pending"
+ }
+ ]
+}
+```
+Each coding-agent session reads this file, picks up where the previous session left off, implements the next batch of `pending` features, commits to git, and updates the file. The git history provides an additional audit trail.
-def get_slide_info(pptx_path: Path) -> list[dict]:
- with zipfile.ZipFile(pptx_path, "r") as zf:
- rels_content = zf.read("ppt/_rels/presentation.xml.rels").decode("utf-8")
- rels_dom = defusedxml.minidom.parseString(rels_content)
+```mermaid
+flowchart TD
+ SPEC["spec.md\n(project requirements)"]
+ INIT["Initializer Agent\n(one-time)"]
+ FL["feature_list.json\n+ test suite"]
+
+ INIT -->|reads| SPEC
+ INIT -->|writes| FL
+
+ CA1["Coding Agent Session 1"]
+ CA2["Coding Agent Session 2"]
+ CA3["Coding Agent Session N"]
+
+ FL -->|reads pending features| CA1
+ CA1 -->|marks completed, git commit| FL
+ FL -->|reads pending features| CA2
+ CA2 -->|marks completed, git commit| FL
+ FL -->|reads pending features| CA3
+```
- rid_to_slide = {}
- for rel in rels_dom.getElementsByTagName("Relationship"):
- rid = rel.getAttribute("Id")
- target = rel.getAttribute("Target")
- rel_type = rel.getAttribute("Type")
- if "slide" in rel_type and target.startswith("slides/"):
+Security model: each Claude Code session runs with OS-level sandboxing that restricts bash commands to an allowlist (npm, git, specific file operations). Network access is controlled. The orchestrator Python script can set `--max-iterations` to cap how many features a single session implements, providing a natural checkpoint for human review.
+
+## Streaming vs. Non-Streaming
+
+The quickstarts take different approaches to streaming:
+
+| Project | Approach | Reason |
+|:--------|:---------|:-------|
+| `computer-use-demo` | Non-streaming (full response) | Tool results require complete responses before execution |
+| `customer-support-agent` | Streaming via `stream()` | Real-time character-by-character display improves perceived UX |
+| `financial-data-analyst` | Streaming | Same as above |
+| `agents/` | Non-streaming | Simplicity; educational reference implementation |
+
+The customer support and financial analyst quickstarts use Next.js Edge Runtime with the Anthropic SDK's streaming support:
+
+```typescript
+// From customer-support-agent/app/api/chat/route.ts (simplified)
+import Anthropic from "@anthropic-ai/sdk";
+
+const client = new Anthropic();
+
+export async function POST(req: Request) {
+ const { messages } = await req.json();
+
+ const stream = await client.messages.stream({
+ model: "claude-opus-4-20250514",
+ max_tokens: 8096,
+ system: SYSTEM_PROMPT,
+ messages,
+ });
+
+ // Return as Server-Sent Events
+ return new Response(
+ new ReadableStream({
+ async start(controller) {
+ for await (const chunk of stream) {
+ controller.enqueue(
+ new TextEncoder().encode(`data: ${JSON.stringify(chunk)}\n\n`)
+ );
+ }
+ controller.close();
+ },
+ }),
+ {
+ headers: {
+ "Content-Type": "text/event-stream",
+ "Cache-Control": "no-cache",
+ Connection: "keep-alive",
+ },
+ }
+ );
+}
```
-This function is important because it defines how Anthropic Skills Tutorial: Reusable AI Agent Capabilities implements the patterns covered in this chapter.
+## Summary
+Multi-turn conversation management in the quickstarts involves three distinct concerns: image truncation to prevent context overflow, prompt caching to reduce costs on stable prefixes, and message history management to stay within the context window. The autonomous-coding quickstart adds a fourth concern — cross-session persistence — which it solves with file-based state and git commits rather than an external database.
-## How These Components Connect
+Next: [Chapter 6: MCP Integration](06-best-practices.md)
-```mermaid
-flowchart TD
- A[get_slide_info]
- B[build_slide_list]
- C[create_hidden_placeholder]
- D[convert_to_images]
- E[create_grids]
- A --> B
- B --> C
- C --> D
- D --> E
-```
+---
+
+- [Tutorial Index](README.md)
+- [Previous Chapter: Chapter 4: Tool Use Patterns](04-integration-platforms.md)
+- [Next Chapter: Chapter 6: MCP Integration](06-best-practices.md)
+- [Main Catalog](../../README.md#-tutorial-catalog)
diff --git a/tutorials/anthropic-skills-tutorial/06-best-practices.md b/tutorials/anthropic-skills-tutorial/06-best-practices.md
index 0e62ea29..aa0f6207 100644
--- a/tutorials/anthropic-skills-tutorial/06-best-practices.md
+++ b/tutorials/anthropic-skills-tutorial/06-best-practices.md
@@ -1,296 +1,303 @@
---
layout: default
-title: "Chapter 6: Best Practices"
+title: "Chapter 6: MCP Integration"
nav_order: 6
-parent: Anthropic Skills Tutorial
+parent: Anthropic Quickstarts Tutorial
+format_version: v2
+source_repo: https://github.com/anthropics/anthropic-quickstarts
---
-
-# Chapter 6: Best Practices
-
-Welcome to **Chapter 6: Best Practices**. In this part of **Anthropic Skills Tutorial: Reusable AI Agent Capabilities**, you will build an intuitive mental model first, then move into concrete implementation details and practical production tradeoffs.
-
-
-Strong skills are explicit, testable, and easy to review.
-
-## Authoring Principles
-
-- Prefer concrete verbs over broad goals.
-- Define what to do when inputs are missing.
-- State prohibited actions directly.
-- Include examples for tricky edge cases.
-
-## Testing Strategy
-
-Use three test layers:
-
-1. **Golden tests**: stable prompts with expected output shape
-2. **Adversarial tests**: malformed or ambiguous inputs
-3. **Regression tests**: replay historical failures
-
-Keep test fixtures in version control with the skill.
-
-## Versioning and Changelogs
-
-Treat prompt changes as code changes.
-
-- Use semantic versioning for skills distributed broadly.
-- Keep a changelog with behavioral deltas.
-- Call out breaking output changes explicitly.
-
-## Review Checklist
-
-| Check | Why |
-|:------|:----|
-| Output contract unchanged or migrated | Prevent downstream breakage |
-| References updated and valid | Avoid stale policy behavior |
-| Script interfaces still compatible | Prevent runtime failures |
-| Security notes updated | Keep operators informed |
-
-## Observability
-
-Capture at least:
-
-- skill name + version
-- request category
-- validation pass/fail
-- major error class
-- latency/cost envelope
-
-This data is essential for continuous improvement.
-
-## Summary
-
-You now have a concrete quality system for maintaining skills over time.
-
-Next: [Chapter 7: Publishing and Sharing](07-publishing-sharing.md)
+# Chapter 6: MCP Integration
## What Problem Does This Solve?
-Most teams struggle here because the hard part is not writing more code, but deciding clear boundaries for core abstractions in this chapter so behavior stays predictable as complexity grows.
-
-In practical terms, this chapter helps you avoid three common failures:
-
-- coupling core logic too tightly to one implementation path
-- missing the handoff boundaries between setup, execution, and validation
-- shipping changes without clear rollback or observability strategy
-
-After working through this chapter, you should be able to reason about `Chapter 6: Best Practices` as an operating subsystem inside **Anthropic Skills Tutorial: Reusable AI Agent Capabilities**, with explicit contracts for inputs, state transitions, and outputs.
-
-Use the implementation notes around execution and reliability details as your checklist when adapting these patterns to your own repository.
-
-## How it Works Under the Hood
-
-Under the hood, `Chapter 6: Best Practices` usually follows a repeatable control path:
-
-1. **Context bootstrap**: initialize runtime config and prerequisites for `core component`.
-2. **Input normalization**: shape incoming data so `execution layer` receives stable contracts.
-3. **Core execution**: run the main logic branch and propagate intermediate state through `state model`.
-4. **Policy and safety checks**: enforce limits, auth scopes, and failure boundaries.
-5. **Output composition**: return canonical result payloads for downstream consumers.
-6. **Operational telemetry**: emit logs/metrics needed for debugging and performance tuning.
-
-When debugging, walk this sequence in order and confirm each stage has explicit success/failure conditions.
-
-## Source Walkthrough
-
-Use the following upstream sources to verify implementation details while reading this chapter:
-
-- [anthropics/skills repository](https://github.com/anthropics/skills)
- Why it matters: authoritative reference on `anthropics/skills repository` (github.com).
-
-Suggested trace strategy:
-- search upstream code for `Best` and `Practices` to map concrete implementation paths
-- compare docs claims against actual runtime/config code before reusing patterns in production
-
-## Chapter Connections
-
-- [Tutorial Index](README.md)
-- [Previous Chapter: Chapter 5: Production Skills](05-production-skills.md)
-- [Next Chapter: Chapter 7: Publishing and Sharing](07-publishing-sharing.md)
-- [Main Catalog](../../README.md#-tutorial-catalog)
-- [A-Z Tutorial Directory](../../discoverability/tutorial-directory.md)
-
-## Depth Expansion Playbook
-
-## Source Code Walkthrough
-
-### `skills/pptx/scripts/clean.py`
-
-The `remove_orphaned_rels_files` function in [`skills/pptx/scripts/clean.py`](https://github.com/anthropics/skills/blob/HEAD/skills/pptx/scripts/clean.py) handles a key part of this chapter's functionality:
-
-```py
+The `agents/` quickstart demonstrates a critical architectural decision point: should a tool be implemented as a native Python function in your agent codebase, or should it be exposed via the Model Context Protocol (MCP)? MCP tools can be shared across agents, deployed as standalone servers, updated without redeploying the agent, and consumed by any MCP-compatible client — not just your specific agent. This chapter explains MCP as implemented in the quickstart, how connections are established, how tools are discovered and called, and when to prefer MCP over native tools.
+## What MCP Is
-def remove_orphaned_rels_files(unpacked_dir: Path) -> list[str]:
- resource_dirs = ["charts", "diagrams", "drawings"]
- removed = []
- slide_referenced = get_slide_referenced_files(unpacked_dir)
+MCP (Model Context Protocol) is a standard for exposing tools, resources, and prompts to language models over a local or remote transport. An MCP server is a process that speaks the MCP protocol. An MCP client connects to that server and can list its tools, call them, and read its resources.
- for dir_name in resource_dirs:
- rels_dir = unpacked_dir / "ppt" / dir_name / "_rels"
- if not rels_dir.exists():
- continue
+In the context of the agents quickstart:
+- **Native tools** are Python callables defined directly in `tools/`
+- **MCP tools** are functions exposed by external MCP servers that the agent connects to at startup
- for rels_file in rels_dir.glob("*.rels"):
- resource_file = rels_dir.parent / rels_file.name.replace(".rels", "")
- try:
- resource_rel_path = resource_file.resolve().relative_to(unpacked_dir.resolve())
- except ValueError:
- continue
+The agent treats both identically when calling Claude: both appear in the `tools` array sent to the API.
- if not resource_file.exists() or resource_rel_path not in slide_referenced:
- rels_file.unlink()
- rel_path = rels_file.relative_to(unpacked_dir)
- removed.append(str(rel_path))
+## Architecture
- return removed
-
-
-def get_referenced_files(unpacked_dir: Path) -> set:
- referenced = set()
-
- for rels_file in unpacked_dir.rglob("*.rels"):
- dom = defusedxml.minidom.parse(str(rels_file))
+```mermaid
+flowchart TD
+ subgraph "Agent Process"
+ AG["Agent._agent_loop()"]
+ TC["Tool registry\n(native + MCP)"]
+ MCC["MCP Clients\n(one per server)"]
+ end
+
+ subgraph "Native Tools"
+ TH["ThinkTool"]
+ CT["Custom Tool A"]
+ end
+
+ subgraph "MCP Servers (separate processes)"
+ FS["filesystem MCP server"]
+ DB["database MCP server"]
+ WS["web-search MCP server"]
+ end
+
+ AG --> TC
+ TC --> TH
+ TC --> CT
+ TC --> MCC
+ MCC -->|stdio or SSE| FS
+ MCC -->|stdio or SSE| DB
+ MCC -->|stdio or SSE| WS
```
-This function is important because it defines how Anthropic Skills Tutorial: Reusable AI Agent Capabilities implements the patterns covered in this chapter.
-
-### `skills/pptx/scripts/clean.py`
-
-The `get_referenced_files` function in [`skills/pptx/scripts/clean.py`](https://github.com/anthropics/skills/blob/HEAD/skills/pptx/scripts/clean.py) handles a key part of this chapter's functionality:
-
-```py
-
-
-def get_referenced_files(unpacked_dir: Path) -> set:
- referenced = set()
-
- for rels_file in unpacked_dir.rglob("*.rels"):
- dom = defusedxml.minidom.parse(str(rels_file))
- for rel in dom.getElementsByTagName("Relationship"):
- target = rel.getAttribute("Target")
- if not target:
- continue
- target_path = (rels_file.parent.parent / target).resolve()
- try:
- referenced.add(target_path.relative_to(unpacked_dir.resolve()))
- except ValueError:
- pass
-
- return referenced
-
-
-def remove_orphaned_files(unpacked_dir: Path, referenced: set) -> list[str]:
- resource_dirs = ["media", "embeddings", "charts", "diagrams", "tags", "drawings", "ink"]
- removed = []
-
- for dir_name in resource_dirs:
- dir_path = unpacked_dir / "ppt" / dir_name
- if not dir_path.exists():
- continue
-
- for file_path in dir_path.glob("*"):
- if not file_path.is_file():
- continue
+The agent connects to MCP servers at startup, discovers their tools, and adds them to the tool registry alongside native tools. When Claude calls an MCP tool, the agent routes the call through the appropriate MCP client.
+
+## Setting Up MCP Connections
+
+From `agents/utils/mcp.py` (simplified):
+
+```python
+from mcp import ClientSession, StdioServerParameters
+from mcp.client.stdio import stdio_client
+from contextlib import AsyncExitStack
+
+async def setup_mcp_connections(
+ server_configs: list[dict],
+) -> tuple[list[dict], AsyncExitStack]:
+ """
+ Connect to MCP servers and return their combined tool list.
+
+ server_configs format:
+ [
+ {"command": "uvx", "args": ["mcp-server-filesystem", "/tmp"]},
+ {"command": "node", "args": ["path/to/mcp-server.js"]}
+ ]
+ """
+ exit_stack = AsyncExitStack()
+ all_tools = []
+
+ for config in server_configs:
+ server_params = StdioServerParameters(
+ command=config["command"],
+ args=config.get("args", []),
+ env=config.get("env"),
+ )
+ stdio_transport = await exit_stack.enter_async_context(
+ stdio_client(server_params)
+ )
+ session = await exit_stack.enter_async_context(
+ ClientSession(*stdio_transport)
+ )
+ await session.initialize()
+
+ # Discover available tools from this server
+ tools_response = await session.list_tools()
+ for tool in tools_response.tools:
+ all_tools.append({
+ "session": session,
+ "name": tool.name,
+ "description": tool.description,
+ "input_schema": tool.inputSchema,
+ })
+
+ return all_tools, exit_stack
```
-This function is important because it defines how Anthropic Skills Tutorial: Reusable AI Agent Capabilities implements the patterns covered in this chapter.
-
-### `skills/pptx/scripts/clean.py`
-
-The `remove_orphaned_files` function in [`skills/pptx/scripts/clean.py`](https://github.com/anthropics/skills/blob/HEAD/skills/pptx/scripts/clean.py) handles a key part of this chapter's functionality:
-
-```py
-
-
-def remove_orphaned_files(unpacked_dir: Path, referenced: set) -> list[str]:
- resource_dirs = ["media", "embeddings", "charts", "diagrams", "tags", "drawings", "ink"]
- removed = []
-
- for dir_name in resource_dirs:
- dir_path = unpacked_dir / "ppt" / dir_name
- if not dir_path.exists():
- continue
+The `exit_stack` pattern ensures all server connections are cleaned up when the agent shuts down, even if an exception occurs.
- for file_path in dir_path.glob("*"):
- if not file_path.is_file():
- continue
- rel_path = file_path.relative_to(unpacked_dir)
- if rel_path not in referenced:
- file_path.unlink()
- removed.append(str(rel_path))
+## Tool Discovery and Registration
- theme_dir = unpacked_dir / "ppt" / "theme"
- if theme_dir.exists():
- for file_path in theme_dir.glob("theme*.xml"):
- rel_path = file_path.relative_to(unpacked_dir)
- if rel_path not in referenced:
- file_path.unlink()
- removed.append(str(rel_path))
- theme_rels = theme_dir / "_rels" / f"{file_path.name}.rels"
- if theme_rels.exists():
- theme_rels.unlink()
- removed.append(str(theme_rels.relative_to(unpacked_dir)))
+After connecting, the agent converts MCP tool definitions into the format Claude expects:
- notes_dir = unpacked_dir / "ppt" / "notesSlides"
+```python
+def mcp_tool_to_claude_format(mcp_tool: dict) -> dict:
+ """Convert MCP tool definition to Anthropic API tool format."""
+ return {
+ "name": mcp_tool["name"],
+ "description": mcp_tool["description"],
+ "input_schema": mcp_tool["input_schema"],
+ }
```
-This function is important because it defines how Anthropic Skills Tutorial: Reusable AI Agent Capabilities implements the patterns covered in this chapter.
-
-### `skills/pptx/scripts/clean.py`
-
-The `update_content_types` function in [`skills/pptx/scripts/clean.py`](https://github.com/anthropics/skills/blob/HEAD/skills/pptx/scripts/clean.py) handles a key part of this chapter's functionality:
-
-```py
-
-
-def update_content_types(unpacked_dir: Path, removed_files: list[str]) -> None:
- ct_path = unpacked_dir / "[Content_Types].xml"
- if not ct_path.exists():
- return
-
- dom = defusedxml.minidom.parse(str(ct_path))
- changed = False
-
- for override in list(dom.getElementsByTagName("Override")):
- part_name = override.getAttribute("PartName").lstrip("/")
- if part_name in removed_files:
- if override.parentNode:
- override.parentNode.removeChild(override)
- changed = True
-
- if changed:
- with open(ct_path, "wb") as f:
- f.write(dom.toxml(encoding="utf-8"))
-
+The `input_schema` field from MCP is already JSON Schema format, so it maps directly to the `input_schema` field in Claude's tool definition. No conversion is needed.
+
+## Calling MCP Tools
+
+When Claude's response includes a `tool_use` block for an MCP-backed tool, the agent routes the call to the correct session:
+
+```python
+async def execute_tools(
+ self,
+ tool_calls: list[dict],
+ mcp_sessions: dict[str, ClientSession],
+) -> list[dict]:
+ """Execute tool calls, routing MCP tools to the right session."""
+ results = []
+ for call in tool_calls:
+ tool_name = call["name"]
+ tool_input = call["input"]
+
+ if tool_name in mcp_sessions:
+ # MCP tool
+ session = mcp_sessions[tool_name]
+ result = await session.call_tool(tool_name, tool_input)
+ output = result.content[0].text if result.content else ""
+ results.append({
+ "type": "tool_result",
+ "tool_use_id": call["id"],
+ "content": output,
+ })
+ else:
+ # Native tool
+ native_result = await self._native_tools[tool_name](**tool_input)
+ results.append({
+ "type": "tool_result",
+ "tool_use_id": call["id"],
+ "content": native_result.output or native_result.error or "",
+ "is_error": bool(native_result.error),
+ })
+
+ return results
+```
-def clean_unused_files(unpacked_dir: Path) -> list[str]:
- all_removed = []
+## The ThinkTool Pattern
+
+The `agents/` quickstart includes a `ThinkTool` as the primary example of a native tool. It is deliberately trivial — it just echoes back the input — but it demonstrates an important pattern: giving Claude a "scratchpad" tool for explicit reasoning before taking an action.
+
+```python
+class ThinkTool:
+ """A tool that lets Claude think through a problem explicitly."""
+
+ def to_dict(self) -> dict:
+ return {
+ "name": "think",
+ "description": (
+ "Use this tool to think through a problem step by step "
+ "before taking action. The output is not shown to the user."
+ ),
+ "input_schema": {
+ "type": "object",
+ "properties": {
+ "thought": {
+ "type": "string",
+ "description": "Your step-by-step reasoning"
+ }
+ },
+ "required": ["thought"]
+ }
+ }
+
+ async def __call__(self, thought: str) -> str:
+ # The tool does nothing — the value is the act of Claude
+ # structuring its reasoning as a tool call
+ return f"Acknowledged: {thought}"
+```
- slides_removed = remove_orphaned_slides(unpacked_dir)
- all_removed.extend(slides_removed)
+This pattern forces Claude to make its reasoning observable (the `thought` parameter appears in the API response), which aids debugging. It also reduces "acting too fast" errors where Claude takes irreversible actions without adequate reasoning.
+
+## When to Use MCP vs. Native Tools
+
+| Situation | Recommendation |
+|:----------|:---------------|
+| Tool is specific to one agent | Native tool |
+| Tool needs access to agent's in-process state | Native tool |
+| Tool will be shared across multiple agents | MCP server |
+| Tool can be maintained by a separate team | MCP server |
+| Tool needs to be hot-reloadable | MCP server |
+| Tool is available as a community MCP server | MCP server |
+| Tool requires tight latency (in-process) | Native tool |
+| Tool needs a persistent subprocess (like BashTool) | Native tool |
+
+## Configuring MCP Servers
+
+In the agents quickstart, MCP server configuration follows the same format as Claude Code's MCP configuration. An example `agent_config.json`:
+
+```json
+{
+ "mcpServers": {
+ "filesystem": {
+ "command": "uvx",
+ "args": ["mcp-server-filesystem", "/Users/me/projects"]
+ },
+ "github": {
+ "command": "uvx",
+ "args": ["mcp-server-github"],
+ "env": {
+ "GITHUB_PERSONAL_ACCESS_TOKEN": "${GITHUB_TOKEN}"
+ }
+ },
+ "sqlite": {
+ "command": "uvx",
+ "args": ["mcp-server-sqlite", "--db-path", "/tmp/mydb.sqlite"]
+ }
+ }
+}
+```
- trash_removed = remove_trash_directory(unpacked_dir)
- all_removed.extend(trash_removed)
+The agent reads this file at startup, connects to each server, and merges their tools into the tool registry.
+
+## Error Handling for MCP Tools
+
+MCP connections can fail at startup or mid-session. The quickstart handles this gracefully:
+
+```python
+async def call_mcp_tool_safely(
+ session: ClientSession,
+ tool_name: str,
+ tool_input: dict,
+) -> str:
+ """Call an MCP tool with error handling."""
+ try:
+ result = await session.call_tool(tool_name, tool_input)
+ if result.isError:
+ return f"Error from {tool_name}: {result.content}"
+ return result.content[0].text if result.content else ""
+ except Exception as e:
+ # Return error as string so Claude can react and try alternatives
+ return f"MCP tool {tool_name!r} failed: {str(e)}"
+```
- while True:
+Always return errors as strings rather than raising exceptions — this keeps the sampling loop running so Claude can adapt its approach.
+
+## Testing MCP Integrations
+
+Because MCP servers are separate processes, you can test the integration layer independently:
+
+```python
+# test_mcp_integration.py
+import pytest
+import asyncio
+from mcp import ClientSession, StdioServerParameters
+from mcp.client.stdio import stdio_client
+
+@pytest.mark.asyncio
+async def test_filesystem_mcp_server():
+ """Verify the filesystem MCP server lists tools correctly."""
+ params = StdioServerParameters(
+ command="uvx",
+ args=["mcp-server-filesystem", "/tmp"],
+ )
+ async with stdio_client(params) as (read, write):
+ async with ClientSession(read, write) as session:
+ await session.initialize()
+ tools = await session.list_tools()
+ tool_names = {t.name for t in tools.tools}
+ assert "read_file" in tool_names
+ assert "list_directory" in tool_names
```
-This function is important because it defines how Anthropic Skills Tutorial: Reusable AI Agent Capabilities implements the patterns covered in this chapter.
+## Summary
+The `agents/` quickstart demonstrates how to connect to MCP servers at startup, discover their tools, merge them with native tools, and route tool calls correctly through the sampling loop. The ThinkTool pattern provides Claude with a scratchpad for explicit reasoning. MCP is most valuable for shared, team-maintained tools; native tools are better for tight coupling to agent state or in-process performance.
-## How These Components Connect
+Next: [Chapter 7: Production Hardening](07-publishing-sharing.md)
-```mermaid
-flowchart TD
- A[remove_orphaned_rels_files]
- B[get_referenced_files]
- C[remove_orphaned_files]
- D[update_content_types]
- E[clean_unused_files]
- A --> B
- B --> C
- C --> D
- D --> E
-```
+---
+
+- [Tutorial Index](README.md)
+- [Previous Chapter: Chapter 5: Multi-Turn Conversation Patterns](05-production-skills.md)
+- [Next Chapter: Chapter 7: Production Hardening](07-publishing-sharing.md)
+- [Main Catalog](../../README.md#-tutorial-catalog)
diff --git a/tutorials/anthropic-skills-tutorial/07-publishing-sharing.md b/tutorials/anthropic-skills-tutorial/07-publishing-sharing.md
index 0610eae8..69fe19bf 100644
--- a/tutorials/anthropic-skills-tutorial/07-publishing-sharing.md
+++ b/tutorials/anthropic-skills-tutorial/07-publishing-sharing.md
@@ -1,296 +1,331 @@
---
layout: default
-title: "Chapter 7: Publishing and Sharing"
+title: "Chapter 7: Production Hardening"
nav_order: 7
-parent: Anthropic Skills Tutorial
+parent: Anthropic Quickstarts Tutorial
+format_version: v2
+source_repo: https://github.com/anthropics/anthropic-quickstarts
---
-
-# Chapter 7: Publishing and Sharing
-
-Welcome to **Chapter 7: Publishing and Sharing**. In this part of **Anthropic Skills Tutorial: Reusable AI Agent Capabilities**, you will build an intuitive mental model first, then move into concrete implementation details and practical production tradeoffs.
-
-
-Publishing is where many teams lose quality. The fix is strong packaging and governance.
-
-## Distribution Models
-
-| Model | Best For | Tradeoff |
-|:------|:---------|:---------|
-| Public GitHub repo | Community adoption | Requires stronger support burden |
-| Internal monorepo | Enterprise governance | Lower external discoverability |
-| Curated plugin catalog | Controlled deployment | More release process overhead |
-
-## Release Process
-
-1. Update skill version and changelog.
-2. Run regression suite.
-3. Verify references/assets integrity.
-4. Tag release and publish notes.
-5. Announce migration steps for breaking changes.
-
-## Ownership and Governance
-
-Every published skill should have:
-
-- a technical owner
-- a backup owner
-- an issue escalation path
-- a deprecation policy
-
-Without clear ownership, popular skills decay quickly.
-
-## Security and Compliance Gates
-
-Before publishing:
-
-- scan for secrets in instructions/scripts
-- verify license metadata for bundled assets
-- validate third-party dependency policy
-- confirm personally identifiable information handling
-
-## Consumer-Facing Documentation
-
-At minimum include:
-
-- when to use the skill
-- known limitations
-- input expectations
-- output contract
-- examples for successful and failed cases
-
-## Summary
-
-You can now publish skills with predictable quality and clear operational ownership.
-
-Next: [Chapter 8: Real-World Examples](08-real-world-examples.md)
+# Chapter 7: Production Hardening
## What Problem Does This Solve?
-Most teams struggle here because the hard part is not writing more code, but deciding clear boundaries for core abstractions in this chapter so behavior stays predictable as complexity grows.
-
-In practical terms, this chapter helps you avoid three common failures:
-
-- coupling core logic too tightly to one implementation path
-- missing the handoff boundaries between setup, execution, and validation
-- shipping changes without clear rollback or observability strategy
-
-After working through this chapter, you should be able to reason about `Chapter 7: Publishing and Sharing` as an operating subsystem inside **Anthropic Skills Tutorial: Reusable AI Agent Capabilities**, with explicit contracts for inputs, state transitions, and outputs.
-
-Use the implementation notes around execution and reliability details as your checklist when adapting these patterns to your own repository.
+The quickstarts are reference implementations, not production systems. The README explicitly warns that the customer support agent "is provided in a pre-release, beta, or trial form" and should not be deployed in mission-critical environments without thorough testing. This chapter identifies every pattern in the quickstarts that needs strengthening before production deployment: security isolation, authentication, retry logic, observability, provider fallback, and responsible use of computer use in multi-user environments.
-## How it Works Under the Hood
+## Security Model: Computer Use
-Under the hood, `Chapter 7: Publishing and Sharing` usually follows a repeatable control path:
-
-1. **Context bootstrap**: initialize runtime config and prerequisites for `core component`.
-2. **Input normalization**: shape incoming data so `execution layer` receives stable contracts.
-3. **Core execution**: run the main logic branch and propagate intermediate state through `state model`.
-4. **Policy and safety checks**: enforce limits, auth scopes, and failure boundaries.
-5. **Output composition**: return canonical result payloads for downstream consumers.
-6. **Operational telemetry**: emit logs/metrics needed for debugging and performance tuning.
-
-When debugging, walk this sequence in order and confirm each stage has explicit success/failure conditions.
-
-## Source Walkthrough
-
-Use the following upstream sources to verify implementation details while reading this chapter:
-
-- [anthropics/skills repository](https://github.com/anthropics/skills)
- Why it matters: authoritative reference on `anthropics/skills repository` (github.com).
-
-Suggested trace strategy:
-- search upstream code for `Publishing` and `and` to map concrete implementation paths
-- compare docs claims against actual runtime/config code before reusing patterns in production
-
-## Chapter Connections
-
-- [Tutorial Index](README.md)
-- [Previous Chapter: Chapter 6: Best Practices](06-best-practices.md)
-- [Next Chapter: Chapter 8: Real-World Examples](08-real-world-examples.md)
-- [Main Catalog](../../README.md#-tutorial-catalog)
-- [A-Z Tutorial Directory](../../discoverability/tutorial-directory.md)
-
-## Depth Expansion Playbook
-
-## Source Code Walkthrough
-
-### `skills/algorithmic-art/templates/generator_template.js`
-
-The `colorFromPalette` function in [`skills/algorithmic-art/templates/generator_template.js`](https://github.com/anthropics/skills/blob/HEAD/skills/algorithmic-art/templates/generator_template.js) handles a key part of this chapter's functionality:
-
-```js
-}
-
-function colorFromPalette(index) {
- return params.colorPalette[index % params.colorPalette.length];
-}
-
-// Mapping and easing
-function mapRange(value, inMin, inMax, outMin, outMax) {
- return outMin + (outMax - outMin) * ((value - inMin) / (inMax - inMin));
-}
-
-function easeInOutCubic(t) {
- return t < 0.5 ? 4 * t * t * t : 1 - Math.pow(-2 * t + 2, 3) / 2;
-}
-
-// Constrain to bounds
-function wrapAround(value, max) {
- if (value < 0) return max;
- if (value > max) return 0;
- return value;
-}
-
-// ============================================================================
-// 7. PARAMETER UPDATES (Connect to UI)
-// ============================================================================
-
-function updateParameter(paramName, value) {
- params[paramName] = value;
- // Decide if you need to regenerate or just update
- // Some params can update in real-time, others need full regeneration
-}
+Computer use is the highest-risk quickstart. The Docker container already enforces the most important isolation boundaries, but production deployments need additional controls.
+```mermaid
+flowchart TD
+ subgraph "Production Security Model"
+ subgraph "User-Facing Layer"
+ UI["Web UI / API Gateway\n(auth, rate limiting)"]
+ end
+
+ subgraph "Agent Layer (per-session container)"
+ SL["sampling_loop()"]
+ TL["Tool Layer"]
+ end
+
+ subgraph "Isolation"
+ VM["VM / Container\n(network restricted)\n(minimal filesystem)"]
+ end
+
+ subgraph "External"
+ ALLOW["Allowlisted domains only"]
+ DENY["x All other network"]
+ end
+ end
+
+ UI -->|authenticated session| SL
+ SL --> TL
+ TL --> VM
+ VM --> ALLOW
+ VM -.->|blocked| DENY
```
-This function is important because it defines how Anthropic Skills Tutorial: Reusable AI Agent Capabilities implements the patterns covered in this chapter.
+**Isolation requirements for production computer use:**
-### `skills/algorithmic-art/templates/generator_template.js`
+1. **One container per session**: never share a container between users. The Docker image is already designed for this — run a new instance per user session.
-The `mapRange` function in [`skills/algorithmic-art/templates/generator_template.js`](https://github.com/anthropics/skills/blob/HEAD/skills/algorithmic-art/templates/generator_template.js) handles a key part of this chapter's functionality:
+2. **Network allowlisting**: the container runs a full browser by default. Without network controls, Claude (or injected prompts) could make arbitrary web requests. Use Docker's `--network` options or a proxy to restrict outbound access to a domain allowlist.
-```js
+3. **Credential isolation**: never mount credentials, AWS profiles, or SSH keys into the container. If Claude needs to call an API, inject a scoped, short-lived token via environment variable with narrow permissions.
-// Mapping and easing
-function mapRange(value, inMin, inMax, outMin, outMax) {
- return outMin + (outMax - outMin) * ((value - inMin) / (inMax - inMin));
-}
+4. **Prompt injection awareness**: web pages Claude visits can contain adversarial instructions. The system prompt in `loop.py` warns Claude about this, but that is not a technical control. For sensitive workflows, avoid general browsing and restrict the task scope.
-function easeInOutCubic(t) {
- return t < 0.5 ? 4 * t * t * t : 1 - Math.pow(-2 * t + 2, 3) / 2;
-}
+5. **Human confirmation gates**: for irreversible actions (file deletion, form submission, API calls with side effects), implement a confirmation step in the Streamlit callback before executing the tool result.
-// Constrain to bounds
-function wrapAround(value, max) {
- if (value < 0) return max;
- if (value > max) return 0;
- return value;
-}
+## Authentication and API Key Management
-// ============================================================================
-// 7. PARAMETER UPDATES (Connect to UI)
-// ============================================================================
+None of the quickstarts include production authentication. They assume a single trusted user passing their own API key. For multi-user deployments:
-function updateParameter(paramName, value) {
- params[paramName] = value;
- // Decide if you need to regenerate or just update
- // Some params can update in real-time, others need full regeneration
-}
+```python
+# Pattern: per-request API key validation with usage limits
+from anthropic import Anthropic
+import os
-function regenerate() {
- // Reinitialize your generative system
- // Useful when parameters change significantly
- initializeSeed(params.seed);
- // Then regenerate your system
+def get_client_for_request(request_api_key: str | None) -> Anthropic:
+ """
+ In production, you would validate the request_api_key against
+ your own user database, check usage limits, and potentially use
+ a server-side API key rather than the user's own key.
+ """
+ if request_api_key:
+ # User-provided key — validate format
+ if not request_api_key.startswith("sk-ant-"):
+ raise ValueError("Invalid API key format")
+ return Anthropic(api_key=request_api_key)
+ else:
+ # Server-side key — check that the request is authenticated
+ server_key = os.environ.get("ANTHROPIC_API_KEY")
+ if not server_key:
+ raise RuntimeError("No API key configured")
+ return Anthropic(api_key=server_key)
```
-This function is important because it defines how Anthropic Skills Tutorial: Reusable AI Agent Capabilities implements the patterns covered in this chapter.
-
-### `skills/algorithmic-art/templates/generator_template.js`
+For the Next.js quickstarts, the API key should never be sent to the browser. Route all Claude calls through Next.js API routes (as both quickstarts already do) and store the key only in server-side environment variables.
+
+## Retry Logic and Rate Limit Handling
+
+The quickstart sampling loops do not implement retry logic. For production:
+
+```python
+import anthropic
+import asyncio
+import random
+
+async def api_call_with_retry(
+ client: anthropic.Anthropic,
+ *,
+ max_retries: int = 3,
+ base_delay: float = 1.0,
+ **kwargs,
+):
+ """Call the API with exponential backoff on rate limit errors."""
+ for attempt in range(max_retries + 1):
+ try:
+ return await asyncio.to_thread(
+ client.beta.messages.create, **kwargs
+ )
+ except anthropic.RateLimitError as e:
+ if attempt == max_retries:
+ raise
+ # Exponential backoff with jitter
+ delay = base_delay * (2 ** attempt) + random.uniform(0, 1)
+ await asyncio.sleep(delay)
+ except anthropic.APIConnectionError as e:
+ if attempt == max_retries:
+ raise
+ await asyncio.sleep(base_delay)
+ except anthropic.APIStatusError as e:
+ # 529 = overloaded, retry; other 4xx = don't retry
+ if e.status_code == 529 and attempt < max_retries:
+ await asyncio.sleep(base_delay * (2 ** attempt))
+ else:
+ raise
+```
-The `easeInOutCubic` function in [`skills/algorithmic-art/templates/generator_template.js`](https://github.com/anthropics/skills/blob/HEAD/skills/algorithmic-art/templates/generator_template.js) handles a key part of this chapter's functionality:
+## Provider Fallback
+
+The computer-use and browser-use quickstarts already abstract the API provider. In production, you can use this abstraction for automatic fallback:
+
+```python
+from enum import Enum
+
+class APIProvider(str, Enum):
+ ANTHROPIC = "anthropic"
+ BEDROCK = "bedrock"
+ VERTEX = "vertex"
+
+async def get_client_with_fallback(
+ primary: APIProvider = APIProvider.ANTHROPIC,
+ fallback: APIProvider = APIProvider.BEDROCK,
+):
+ """Try primary provider; fall back to secondary on failure."""
+ try:
+ client = create_client(primary)
+ # Quick health check
+ await asyncio.to_thread(client.models.list)
+ return client, primary
+ except Exception:
+ client = create_client(fallback)
+ return client, fallback
+```
-```js
-}
+AWS Bedrock and Google Vertex provide enterprise SLAs that may exceed Anthropic's direct API availability. For mission-critical deployments, configure Bedrock as a fallback.
+
+## Observability
+
+The quickstarts include minimal observability. The computer-use Streamlit app has an "HTTP Exchange Logs" tab that shows raw API request/response JSON — useful for debugging but not for production monitoring.
+
+For production, emit structured logs from the sampling loop:
+
+```python
+import structlog
+import time
+
+logger = structlog.get_logger()
+
+async def sampling_loop_with_telemetry(
+ *,
+ session_id: str,
+ user_id: str,
+ **kwargs,
+):
+ start_time = time.monotonic()
+ total_input_tokens = 0
+ total_output_tokens = 0
+ tool_call_counts: dict[str, int] = {}
+ turn_count = 0
+
+ async def instrumented_api_callback(response):
+ nonlocal total_input_tokens, total_output_tokens, turn_count
+ turn_count += 1
+ total_input_tokens += response.usage.input_tokens
+ total_output_tokens += response.usage.output_tokens
+ logger.info(
+ "sampling_loop.turn",
+ session_id=session_id,
+ turn=turn_count,
+ input_tokens=response.usage.input_tokens,
+ output_tokens=response.usage.output_tokens,
+ stop_reason=response.stop_reason,
+ )
+
+ async def instrumented_tool_callback(result, tool_use_id):
+ nonlocal tool_call_counts
+ # Count tool calls by type
+ ...
+
+ try:
+ messages = await sampling_loop(
+ api_response_callback=instrumented_api_callback,
+ tool_output_callback=instrumented_tool_callback,
+ **kwargs,
+ )
+ duration = time.monotonic() - start_time
+ logger.info(
+ "sampling_loop.complete",
+ session_id=session_id,
+ user_id=user_id,
+ duration_seconds=round(duration, 2),
+ total_turns=turn_count,
+ total_input_tokens=total_input_tokens,
+ total_output_tokens=total_output_tokens,
+ tool_calls=tool_call_counts,
+ )
+ return messages
+ except Exception as e:
+ logger.error(
+ "sampling_loop.error",
+ session_id=session_id,
+ error=str(e),
+ error_type=type(e).__name__,
+ )
+ raise
+```
-function easeInOutCubic(t) {
- return t < 0.5 ? 4 * t * t * t : 1 - Math.pow(-2 * t + 2, 3) / 2;
-}
+## Cost Controls
+
+Computer use sessions can become expensive quickly. For production:
+
+| Control | Implementation |
+|:--------|:---------------|
+| Maximum turns per session | Add a `max_turns` counter to the sampling loop |
+| Maximum tokens per session | Track `usage.input_tokens + usage.output_tokens` and abort if exceeded |
+| Image truncation | Set `only_n_most_recent_images=5` (already supported in the loop) |
+| Prompt caching | Enable `inject_prompt_caching=True` (already supported) |
+| Model downgrade for simple tasks | Use `claude-haiku-4-20250514` unless the task requires full reasoning |
+| Session timeout | Kill containers after a wall-clock limit (e.g., 10 minutes) |
+
+```python
+# Adding max_turns to sampling_loop
+MAX_TURNS = 50
+
+turn_count = 0
+while True:
+ turn_count += 1
+ if turn_count > MAX_TURNS:
+ messages.append({
+ "role": "user",
+ "content": "Session turn limit reached. Please summarize your progress."
+ })
+ # One final call to get a summary, then exit
+ final_response = client.beta.messages.create(...)
+ return messages
+
+ # ... normal loop body
+```
-// Constrain to bounds
-function wrapAround(value, max) {
- if (value < 0) return max;
- if (value > max) return 0;
- return value;
-}
+## Code Quality Requirements
-// ============================================================================
-// 7. PARAMETER UPDATES (Connect to UI)
-// ============================================================================
+The repository's `pyproject.toml` enforces code quality for all contributions. For production forks:
-function updateParameter(paramName, value) {
- params[paramName] = value;
- // Decide if you need to regenerate or just update
- // Some params can update in real-time, others need full regeneration
-}
+```toml
+[tool.ruff]
+line-length = 100
+select = ["E", "F", "W", "I", "UP", "S", "B", "A", "C4", "T20"]
-function regenerate() {
- // Reinitialize your generative system
- // Useful when parameters change significantly
- initializeSeed(params.seed);
- // Then regenerate your system
-}
+[tool.pyright]
+pythonVersion = "3.11"
+strict = true
+reportMissingImports = true
-// ============================================================================
-// 8. COMMON P5.JS PATTERNS
+[tool.pytest.ini_options]
+asyncio_mode = "auto"
```
-This function is important because it defines how Anthropic Skills Tutorial: Reusable AI Agent Capabilities implements the patterns covered in this chapter.
-
-### `skills/algorithmic-art/templates/generator_template.js`
+Run the full quality gate before deploying any changes:
-The `wrapAround` function in [`skills/algorithmic-art/templates/generator_template.js`](https://github.com/anthropics/skills/blob/HEAD/skills/algorithmic-art/templates/generator_template.js) handles a key part of this chapter's functionality:
-
-```js
-
-// Constrain to bounds
-function wrapAround(value, max) {
- if (value < 0) return max;
- if (value > max) return 0;
- return value;
-}
+```bash
+ruff check .
+ruff format --check .
+pyright
+pytest --timeout=30
+```
-// ============================================================================
-// 7. PARAMETER UPDATES (Connect to UI)
-// ============================================================================
+## Docker Security Hardening
-function updateParameter(paramName, value) {
- params[paramName] = value;
- // Decide if you need to regenerate or just update
- // Some params can update in real-time, others need full regeneration
-}
+The computer-use Dockerfile runs as root by default. For production:
-function regenerate() {
- // Reinitialize your generative system
- // Useful when parameters change significantly
- initializeSeed(params.seed);
- // Then regenerate your system
-}
+```dockerfile
+# Add to Dockerfile after existing content
+# Create non-root user
+RUN useradd -m -u 1000 -s /bin/bash agent
+USER agent
+WORKDIR /home/agent
-// ============================================================================
-// 8. COMMON P5.JS PATTERNS
-// ============================================================================
+# Read-only root filesystem where possible
+# Mount only required volumes
+# Drop unnecessary capabilities
+```
-// Drawing with transparency for trails/fading
-function fadeBackground(opacity) {
- fill(250, 249, 245, opacity); // Anthropic light with alpha
+And in your `docker run` command:
+
+```bash
+docker run \
+ --security-opt=no-new-privileges \
+ --cap-drop=ALL \
+ --cap-add=SYS_PTRACE \
+ --read-only \
+ --tmpfs /tmp:rw,noexec,nosuid \
+ --network=restricted \
+ -e ANTHROPIC_API_KEY=$ANTHROPIC_API_KEY \
+ computer-use-demo:production
```
-This function is important because it defines how Anthropic Skills Tutorial: Reusable AI Agent Capabilities implements the patterns covered in this chapter.
+## Summary
+Hardening the quickstarts for production requires addressing five concerns: security isolation (one container per session, network allowlists, no credentials in containers), authentication (route API keys server-side), reliability (retry logic with exponential backoff, provider fallback), observability (structured logging with per-session token tracking), and cost controls (turn limits, image truncation, prompt caching). The code quality infrastructure in `pyproject.toml` already handles linting, type checking, and testing — use it.
-## How These Components Connect
+Next: [Chapter 8: End-to-End Walkthroughs](08-real-world-examples.md)
-```mermaid
-flowchart TD
- A[colorFromPalette]
- B[mapRange]
- C[easeInOutCubic]
- D[wrapAround]
- E[updateParameter]
- A --> B
- B --> C
- C --> D
- D --> E
-```
+---
+
+- [Tutorial Index](README.md)
+- [Previous Chapter: Chapter 6: MCP Integration](06-best-practices.md)
+- [Next Chapter: Chapter 8: End-to-End Walkthroughs](08-real-world-examples.md)
+- [Main Catalog](../../README.md#-tutorial-catalog)
diff --git a/tutorials/anthropic-skills-tutorial/08-real-world-examples.md b/tutorials/anthropic-skills-tutorial/08-real-world-examples.md
index 5ece2f45..c9e48de9 100644
--- a/tutorials/anthropic-skills-tutorial/08-real-world-examples.md
+++ b/tutorials/anthropic-skills-tutorial/08-real-world-examples.md
@@ -1,318 +1,405 @@
---
layout: default
-title: "Chapter 8: Real-World Examples"
+title: "Chapter 8: End-to-End Walkthroughs"
nav_order: 8
-parent: Anthropic Skills Tutorial
+parent: Anthropic Quickstarts Tutorial
+format_version: v2
+source_repo: https://github.com/anthropics/anthropic-quickstarts
---
+# Chapter 8: End-to-End Walkthroughs
-# Chapter 8: Real-World Examples
+## What Problem Does This Solve?
-Welcome to **Chapter 8: Real-World Examples**. In this part of **Anthropic Skills Tutorial: Reusable AI Agent Capabilities**, you will build an intuitive mental model first, then move into concrete implementation details and practical production tradeoffs.
+Reading architecture descriptions and code snippets in isolation does not tell you what it feels like to use these quickstarts, what their actual request/response flows look like, or where the real integration pain points are. This chapter walks through three complete scenarios — a customer support chat with knowledge retrieval, a financial data analysis session, and a computer use task — showing the exact API calls, message structures, and decision points at each step.
+## Walkthrough 1: Customer Support Agent with RAG
-This chapter maps the design and operations patterns into deployable workflows.
+### Scenario
-## Example 1: Brand Governance Skill
+A user asks about their subscription plan. The app must retrieve relevant policy documents from Amazon Bedrock and generate a helpful response while detecting if the user is frustrated and should be redirected to a human agent.
-**Goal:** enforce consistent messaging across marketing outputs.
+### System Architecture
-**Inputs:** draft copy, audience, campaign goal
+```mermaid
+sequenceDiagram
+ participant User as User (browser)
+ participant UI as ChatArea.tsx
+ participant API as /api/chat (Edge Runtime)
+ participant Claude as Claude API
+ participant Bedrock as Amazon Bedrock RAG
+
+ User->>UI: "What's included in my Pro plan?"
+ UI->>API: POST {messages, knowledgeBaseId}
+ API->>Bedrock: retrieve({query, knowledgeBaseId, n=5})
+ Bedrock-->>API: [{text, source, score}×5]
+ API->>Claude: messages + retrieved context in system prompt
+ Claude-->>API: stream: thinking + response text
+ API-->>UI: SSE stream
+ UI-->>User: typed response with sources
+```
-**References:** brand voice guide, prohibited claims list, legal disclaimer policy
+### Step 1: Knowledge Base Retrieval
+
+The Next.js API route calls Bedrock before sending anything to Claude:
+
+```typescript
+// From customer-support-agent/app/api/chat/route.ts (simplified)
+import {
+ BedrockAgentRuntimeClient,
+ RetrieveCommand,
+} from "@aws-sdk/client-bedrock-agent-runtime";
+
+async function retrieveContext(
+ query: string,
+ knowledgeBaseId: string
+): Promise {
+ const client = new BedrockAgentRuntimeClient({ region: "us-east-1" });
+ const response = await client.send(
+ new RetrieveCommand({
+ knowledgeBaseId,
+ retrievalQuery: { text: query },
+ retrievalConfiguration: {
+ vectorSearchConfiguration: { numberOfResults: 5 },
+ },
+ })
+ );
+
+ const passages = response.retrievalResults
+ ?.filter((r) => (r.score ?? 0) > 0.5)
+ .map((r) => r.content?.text ?? "")
+ .join("\n\n---\n\n");
+
+ return passages ?? "";
+}
+```
-**Outputs:** revised copy + policy gap report
+### Step 2: System Prompt Construction
-Why it works:
+The retrieved context is injected into the system prompt, not the user message. This is important: putting context in the system prompt enables prompt caching — if the user asks multiple questions about the same knowledge base, the cached system prompt means subsequent requests pay only 10% of the input token cost for the context.
-- strict output schema
-- explicit policy references
-- deterministic violation labeling
+```typescript
+const systemPrompt = `You are a helpful customer support agent for Acme Corp.
+Use only the information provided in the knowledge base context below.
+If the answer is not in the context, say so clearly.
-## Example 2: Customer Support Triage Skill
+KNOWLEDGE BASE CONTEXT:
+${retrievedContext}
-**Goal:** route inbound issues with consistent severity scoring.
+MOOD DETECTION:
+If the user appears frustrated, confused, or mentions they want to speak to a human,
+respond with JSON: {"redirect_to_human": true, "reason": "..."}
+Otherwise respond normally.`;
+```
-**Inputs:** ticket text, customer tier, product area
+### Step 3: Streaming Response with Extended Thinking
+
+```typescript
+const stream = await client.messages.stream({
+ model: "claude-opus-4-20250514",
+ max_tokens: 8096,
+ thinking: { type: "enabled", budget_tokens: 2048 },
+ system: systemPrompt,
+ messages: conversationHistory,
+});
+
+// Stream to browser as Server-Sent Events
+for await (const event of stream) {
+ if (event.type === "content_block_delta") {
+ if (event.delta.type === "thinking_delta") {
+ // Display in the "Agent Thinking" panel
+ yield { type: "thinking", text: event.delta.thinking };
+ } else if (event.delta.type === "text_delta") {
+ yield { type: "text", text: event.delta.text };
+ }
+ }
+}
+```
-**Scripts:** classifier and routing map resolver
+### Step 4: Mood Detection and Human Redirect
+
+The `ChatArea.tsx` component parses Claude's response for the JSON redirect signal:
+
+```typescript
+function parseResponse(text: string): {
+ shouldRedirect: boolean;
+ reason: string;
+ cleanText: string;
+} {
+ try {
+ const parsed = JSON.parse(text);
+ if (parsed.redirect_to_human) {
+ return {
+ shouldRedirect: true,
+ reason: parsed.reason,
+ cleanText: "I'm connecting you with a human agent.",
+ };
+ }
+ } catch {
+ // Not JSON — normal response
+ }
+ return { shouldRedirect: false, reason: "", cleanText: text };
+}
+```
-**Outputs:** severity, queue, response draft, escalation rationale
+### What This Demonstrates
-Why it works:
+- Context injection via system prompt (enables caching on repeated queries)
+- Extended thinking for transparent reasoning
+- Streaming SSE for real-time UX
+- Structured output (JSON) embedded in natural language response
+- Human escalation signal without requiring function/tool calls
-- deterministic routing logic in scripts
-- natural language only for explanations
-- audit-friendly structured fields
+---
-## Example 3: Engineering RFC Assistant Skill
+## Walkthrough 2: Financial Data Analysis
+
+### Scenario
+
+A user uploads a CSV of quarterly revenue data and asks "What caused the Q3 dip and what does it mean for Q4 projections?"
+
+### Step 1: File Upload and Parsing
+
+The frontend sends the file to `/api/analyze` as a multipart form upload. The API route handles multiple file types:
+
+```typescript
+// financial-data-analyst/app/api/analyze/route.ts (simplified)
+async function parseFile(
+ file: File
+): Promise<{ text: string; mimeType: string }> {
+ const buffer = await file.arrayBuffer();
+
+ if (file.type === "text/csv" || file.name.endsWith(".csv")) {
+ return {
+ text: new TextDecoder().decode(buffer),
+ mimeType: "text/plain",
+ };
+ } else if (file.type === "application/pdf") {
+ // Use PDF.js to extract text
+ const text = await extractPdfText(buffer);
+ return { text, mimeType: "text/plain" };
+ } else if (file.type.startsWith("image/")) {
+ // Send as image block directly to Claude
+ return {
+ text: Buffer.from(buffer).toString("base64"),
+ mimeType: file.type,
+ };
+ }
+ throw new Error(`Unsupported file type: ${file.type}`);
+}
+```
-**Goal:** convert rough architecture notes into review-ready RFC drafts.
+### Step 2: Claude Analysis with Chart Request
-**Inputs:** notes, constraints, system context
+The message to Claude includes the file content and instructs it to return structured visualization data alongside its analysis:
-**Templates:** canonical RFC format with risk and rollout sections
+```typescript
+const analysisPrompt = `Analyze this financial data and answer the user's question.
-**Outputs:** RFC draft + unresolved questions list
+If your analysis would benefit from a visualization, include a JSON block in your response
+with this format:
+\`\`\`chart
+{
+ "type": "line" | "bar" | "area" | "pie" | "stacked_bar",
+ "title": "Chart title",
+ "data": [{"label": "Q1", "value": 1250000}, ...],
+ "xKey": "label",
+ "yKey": "value"
+}
+\`\`\`
-Why it works:
+Always explain your reasoning in natural language before or after the chart.
-- fixed section order and quality gate checklist
-- uncertainty explicitly captured, not hidden
-- easy reviewer handoff
+DATA:
+${fileContent}
-## Example 4: Compliance Evidence Skill
+USER QUESTION: ${userQuestion}`;
+```
-**Goal:** collect evidence artifacts for control attestations.
+### Step 3: Chart Extraction and Rendering
+
+The frontend parses the response to extract chart JSON blocks:
+
+```typescript
+function extractChartsFromResponse(text: string): {
+ charts: ChartData[];
+ cleanText: string;
+} {
+ const chartRegex = /```chart\n([\s\S]*?)\n```/g;
+ const charts: ChartData[] = [];
+ let cleanText = text;
+
+ let match;
+ while ((match = chartRegex.exec(text)) !== null) {
+ try {
+ charts.push(JSON.parse(match[1]));
+ cleanText = cleanText.replace(match[0], `[Chart: ${charts.length}]`);
+ } catch {
+ // Malformed chart JSON — skip
+ }
+ }
+
+ return { charts, cleanText };
+}
+```
-**Inputs:** control ID, system scope, evidence sources
+Charts are then rendered with Recharts:
+
+```typescript
+// Simplified ChartRenderer component
+function ChartRenderer({ chart }: { chart: ChartData }) {
+ switch (chart.type) {
+ case "line":
+ return (
+
+
+
+
+
+
+
+ );
+ case "bar":
+ return (
+
+
+
+
+
+ );
+ // ... other chart types
+ }
+}
+```
-**Outputs:** evidence matrix with source links and confidence labels
+### What This Demonstrates
-Why it works:
+- Multi-format file handling (CSV, PDF, image) before sending to Claude
+- Structured output extraction from natural language responses (without tool use)
+- Dynamic chart rendering tied to Claude's analysis
+- Edge Runtime streaming for large file analysis responses
-- strict data provenance requirements
-- source citation field required for each row
-- built-in incompleteness detection
+---
-## Final Implementation Playbook
+## Walkthrough 3: Computer Use Task
+
+### Scenario
+
+User asks: "Find the most recent Python release on python.org and create a text file on the desktop with the version number."
+
+### Full Turn-by-Turn Trace
+
+**Turn 1**: User sends message. Loop calls Claude with system prompt and empty message history.
+
+Claude responds with:
+```json
+{
+ "stop_reason": "tool_use",
+ "content": [
+ {
+ "type": "text",
+ "text": "I'll find the Python version on python.org. Let me start by taking a screenshot."
+ },
+ {
+ "type": "tool_use",
+ "id": "tu_01",
+ "name": "computer",
+ "input": {"action": "screenshot"}
+ }
+ ]
+}
+```
-1. Start with a narrow outcome.
-2. Add schema contracts before scaling usage.
-3. Move deterministic logic to scripts.
-4. Introduce regression testing early.
-5. Publish only with ownership and lifecycle policy.
+**Turn 2**: Loop executes screenshot, gets base64 PNG. Appends to messages:
+```json
+[
+ {"role": "assistant", "content": [text_block, tool_use_tu_01]},
+ {"role": "user", "content": [{"type": "tool_result", "tool_use_id": "tu_01", "content": [{"type": "image", "source": {...}}]}]}
+]
+```
-## Final Summary
+Claude sees the desktop screenshot and responds with tool_use to open Firefox.
-You now have a full lifecycle blueprint for skills: design, runtime integration, quality control, and governed distribution.
+**Turn 3–5**: Claude opens Firefox, navigates to python.org, and takes a screenshot to verify the page loaded.
-Related:
-- [MCP Python SDK Tutorial](../mcp-python-sdk-tutorial/)
-- [MCP Servers Tutorial](../mcp-servers-tutorial/)
-- [Claude Code Tutorial](../claude-code-tutorial/)
+**Turn 6**: Claude reads the version number from the screenshot:
+```json
+{
+ "type": "tool_use",
+ "name": "bash",
+ "input": {"command": "echo '3.13.2' > ~/Desktop/python_version.txt"}
+}
+```
-## What Problem Does This Solve?
+**Turn 7**: BashTool executes the command. Returns `ToolResult(output="")`. Claude sees the empty output (success) and responds:
+```json
+{
+ "stop_reason": "end_turn",
+ "content": [
+ {
+ "type": "text",
+ "text": "Done. I found that the latest Python release is 3.13.2 and saved it to python_version.txt on the Desktop."
+ }
+ ]
+}
+```
-Most teams struggle here because the hard part is not writing more code, but deciding clear boundaries for core abstractions in this chapter so behavior stays predictable as complexity grows.
+No tool_use blocks → sampling loop terminates.
-In practical terms, this chapter helps you avoid three common failures:
+### Message History After 7 Turns
-- coupling core logic too tightly to one implementation path
-- missing the handoff boundaries between setup, execution, and validation
-- shipping changes without clear rollback or observability strategy
+```mermaid
+flowchart LR
+ M1["user: 'Find Python version...'"]
+ M2["assistant: text + screenshot tool_use"]
+ M3["user: tool_result with PNG"]
+ M4["assistant: open Firefox tool_use"]
+ M5["user: tool_result empty"]
+ M6["...turns 5-6..."]
+ M7["assistant: final text only"]
+
+ M1 --> M2 --> M3 --> M4 --> M5 --> M6 --> M7
+```
+
+With `only_n_most_recent_images=3`, the loop would have removed the screenshots from turns 2–4 before sending turn 7's API call, keeping only the 3 most recent screenshots in the context.
-After working through this chapter, you should be able to reason about `Chapter 8: Real-World Examples` as an operating subsystem inside **Anthropic Skills Tutorial: Reusable AI Agent Capabilities**, with explicit contracts for inputs, state transitions, and outputs.
+---
-Use the implementation notes around execution and reliability details as your checklist when adapting these patterns to your own repository.
+## Adapting the Quickstarts: A Decision Guide
-## How it Works Under the Hood
+| You want to... | Start from | Key files to modify |
+|:---------------|:-----------|:--------------------|
+| Build a chat UI on top of Claude | `customer-support-agent` | `app/api/chat/route.ts`, `ChatArea.tsx` |
+| Add custom knowledge retrieval | `customer-support-agent` | `app/api/chat/route.ts` (replace Bedrock with your retriever) |
+| Build a data analysis app | `financial-data-analyst` | `app/api/analyze/route.ts`, add chart types |
+| Build a desktop automation agent | `computer-use-demo` | `loop.py` (add tools), `tools/` |
+| Build a minimal agent with custom tools | `agents/` | `agent.py`, `tools/` |
+| Automate web tasks | `browser-use-demo` | `browser.py`, `loop.py` |
+| Multi-session coding automation | `autonomous-coding` | `prompts/`, `autonomous_agent_demo.py` |
-Under the hood, `Chapter 8: Real-World Examples` usually follows a repeatable control path:
+## Common Adaptation Pitfalls
-1. **Context bootstrap**: initialize runtime config and prerequisites for `core component`.
-2. **Input normalization**: shape incoming data so `execution layer` receives stable contracts.
-3. **Core execution**: run the main logic branch and propagate intermediate state through `state model`.
-4. **Policy and safety checks**: enforce limits, auth scopes, and failure boundaries.
-5. **Output composition**: return canonical result payloads for downstream consumers.
-6. **Operational telemetry**: emit logs/metrics needed for debugging and performance tuning.
+**Removing the tool result messages**: If you call Claude and get a `tool_use` response, you must return a `tool_result` message before calling again. Skipping this causes an API validation error about the conversation not ending with a user message.
-When debugging, walk this sequence in order and confirm each stage has explicit success/failure conditions.
+**Mismatched tool version and model**: Using `computer_20241022` with `claude-opus-4-20250514` causes a validation error. Always pick a tool version that matches your model version using the table in Chapter 2.
-## Source Walkthrough
+**Streaming in the sampling loop**: The computer-use loop uses non-streaming calls because tool results must be complete before execution. If you add streaming to this loop, you must buffer the full response before processing tool_use blocks.
-Use the following upstream sources to verify implementation details while reading this chapter:
+**Sharing container state between users**: Never reuse a computer-use container across users or sessions. The `/tmp` directory, browser history, clipboard, and environment variables all persist within a container lifetime.
-- [anthropics/skills repository](https://github.com/anthropics/skills)
- Why it matters: authoritative reference on `anthropics/skills repository` (github.com).
+## Summary
-Suggested trace strategy:
-- search upstream code for `Real-World` and `Examples` to map concrete implementation paths
-- compare docs claims against actual runtime/config code before reusing patterns in production
+These walkthroughs show the complete data flow through each quickstart: from user input through knowledge retrieval, Claude API calls, tool execution, and final response. The customer support agent demonstrates RAG + streaming + structured escalation signals. The financial analyst demonstrates multi-format file handling + chart extraction without tool use. The computer use walkthrough demonstrates the turn-by-turn conversation structure that makes the sampling loop terminate.
-## Chapter Connections
+---
- [Tutorial Index](README.md)
-- [Previous Chapter: Chapter 7: Publishing and Sharing](07-publishing-sharing.md)
+- [Previous Chapter: Chapter 7: Production Hardening](07-publishing-sharing.md)
- [Main Catalog](../../README.md#-tutorial-catalog)
- [A-Z Tutorial Directory](../../discoverability/tutorial-directory.md)
-## Depth Expansion Playbook
-
-## Source Code Walkthrough
-
-### `skills/docx/scripts/accept_changes.py`
-
-The `accept_changes` function in [`skills/docx/scripts/accept_changes.py`](https://github.com/anthropics/skills/blob/HEAD/skills/docx/scripts/accept_changes.py) handles a key part of this chapter's functionality:
-
-```py
-
-
-def accept_changes(
- input_file: str,
- output_file: str,
-) -> tuple[None, str]:
- input_path = Path(input_file)
- output_path = Path(output_file)
+## Related Tutorials
- if not input_path.exists():
- return None, f"Error: Input file not found: {input_file}"
-
- if not input_path.suffix.lower() == ".docx":
- return None, f"Error: Input file is not a DOCX file: {input_file}"
-
- try:
- output_path.parent.mkdir(parents=True, exist_ok=True)
- shutil.copy2(input_path, output_path)
- except Exception as e:
- return None, f"Error: Failed to copy input file to output location: {e}"
-
- if not _setup_libreoffice_macro():
- return None, "Error: Failed to setup LibreOffice macro"
-
- cmd = [
- "soffice",
- "--headless",
- f"-env:UserInstallation=file://{LIBREOFFICE_PROFILE}",
- "--norestore",
- "vnd.sun.star.script:Standard.Module1.AcceptAllTrackedChanges?language=Basic&location=application",
- str(output_path.absolute()),
- ]
-```
-
-This function is important because it defines how Anthropic Skills Tutorial: Reusable AI Agent Capabilities implements the patterns covered in this chapter.
-
-### `skills/pdf/scripts/fill_fillable_fields.py`
-
-The `fill_pdf_fields` function in [`skills/pdf/scripts/fill_fillable_fields.py`](https://github.com/anthropics/skills/blob/HEAD/skills/pdf/scripts/fill_fillable_fields.py) handles a key part of this chapter's functionality:
-
-```py
-
-
-def fill_pdf_fields(input_pdf_path: str, fields_json_path: str, output_pdf_path: str):
- with open(fields_json_path) as f:
- fields = json.load(f)
- fields_by_page = {}
- for field in fields:
- if "value" in field:
- field_id = field["field_id"]
- page = field["page"]
- if page not in fields_by_page:
- fields_by_page[page] = {}
- fields_by_page[page][field_id] = field["value"]
-
- reader = PdfReader(input_pdf_path)
-
- has_error = False
- field_info = get_field_info(reader)
- fields_by_ids = {f["field_id"]: f for f in field_info}
- for field in fields:
- existing_field = fields_by_ids.get(field["field_id"])
- if not existing_field:
- has_error = True
- print(f"ERROR: `{field['field_id']}` is not a valid field ID")
- elif field["page"] != existing_field["page"]:
- has_error = True
- print(f"ERROR: Incorrect page number for `{field['field_id']}` (got {field['page']}, expected {existing_field['page']})")
- else:
- if "value" in field:
- err = validation_error_for_field_value(existing_field, field["value"])
- if err:
- print(err)
-```
-
-This function is important because it defines how Anthropic Skills Tutorial: Reusable AI Agent Capabilities implements the patterns covered in this chapter.
-
-### `skills/pdf/scripts/fill_fillable_fields.py`
-
-The `validation_error_for_field_value` function in [`skills/pdf/scripts/fill_fillable_fields.py`](https://github.com/anthropics/skills/blob/HEAD/skills/pdf/scripts/fill_fillable_fields.py) handles a key part of this chapter's functionality:
-
-```py
- else:
- if "value" in field:
- err = validation_error_for_field_value(existing_field, field["value"])
- if err:
- print(err)
- has_error = True
- if has_error:
- sys.exit(1)
-
- writer = PdfWriter(clone_from=reader)
- for page, field_values in fields_by_page.items():
- writer.update_page_form_field_values(writer.pages[page - 1], field_values, auto_regenerate=False)
-
- writer.set_need_appearances_writer(True)
-
- with open(output_pdf_path, "wb") as f:
- writer.write(f)
-
-
-def validation_error_for_field_value(field_info, field_value):
- field_type = field_info["type"]
- field_id = field_info["field_id"]
- if field_type == "checkbox":
- checked_val = field_info["checked_value"]
- unchecked_val = field_info["unchecked_value"]
- if field_value != checked_val and field_value != unchecked_val:
- return f'ERROR: Invalid value "{field_value}" for checkbox field "{field_id}". The checked value is "{checked_val}" and the unchecked value is "{unchecked_val}"'
- elif field_type == "radio_group":
- option_values = [opt["value"] for opt in field_info["radio_options"]]
- if field_value not in option_values:
- return f'ERROR: Invalid value "{field_value}" for radio group field "{field_id}". Valid values are: {option_values}'
- elif field_type == "choice":
-```
-
-This function is important because it defines how Anthropic Skills Tutorial: Reusable AI Agent Capabilities implements the patterns covered in this chapter.
-
-### `skills/pdf/scripts/fill_fillable_fields.py`
-
-The `monkeypatch_pydpf_method` function in [`skills/pdf/scripts/fill_fillable_fields.py`](https://github.com/anthropics/skills/blob/HEAD/skills/pdf/scripts/fill_fillable_fields.py) handles a key part of this chapter's functionality:
-
-```py
-
-
-def monkeypatch_pydpf_method():
- from pypdf.generic import DictionaryObject
- from pypdf.constants import FieldDictionaryAttributes
-
- original_get_inherited = DictionaryObject.get_inherited
-
- def patched_get_inherited(self, key: str, default = None):
- result = original_get_inherited(self, key, default)
- if key == FieldDictionaryAttributes.Opt:
- if isinstance(result, list) and all(isinstance(v, list) and len(v) == 2 for v in result):
- result = [r[0] for r in result]
- return result
-
- DictionaryObject.get_inherited = patched_get_inherited
-
-
-if __name__ == "__main__":
- if len(sys.argv) != 4:
- print("Usage: fill_fillable_fields.py [input pdf] [field_values.json] [output pdf]")
- sys.exit(1)
- monkeypatch_pydpf_method()
- input_pdf = sys.argv[1]
- fields_json = sys.argv[2]
- output_pdf = sys.argv[3]
- fill_pdf_fields(input_pdf, fields_json, output_pdf)
-
-```
-
-This function is important because it defines how Anthropic Skills Tutorial: Reusable AI Agent Capabilities implements the patterns covered in this chapter.
-
-
-## How These Components Connect
-
-```mermaid
-flowchart TD
- A[accept_changes]
- B[fill_pdf_fields]
- C[validation_error_for_field_value]
- D[monkeypatch_pydpf_method]
- E[import]
- A --> B
- B --> C
- C --> D
- D --> E
-```
+- [MCP Python SDK Tutorial](../mcp-python-sdk-tutorial/) — Build MCP servers to extend these quickstarts
+- [MCP Servers Tutorial](../mcp-servers-tutorial/) — Reference server patterns
+- [Claude Code Tutorial](../claude-code-tutorial/) — The CLI used by autonomous-coding
diff --git a/tutorials/anthropic-skills-tutorial/README.md b/tutorials/anthropic-skills-tutorial/README.md
index f48011e2..8df90dd5 100644
--- a/tutorials/anthropic-skills-tutorial/README.md
+++ b/tutorials/anthropic-skills-tutorial/README.md
@@ -1,98 +1,106 @@
---
layout: default
-title: "Anthropic Skills Tutorial"
+title: "Anthropic Quickstarts Tutorial"
nav_order: 91
has_children: true
format_version: v2
+source_repo: https://github.com/anthropics/anthropic-quickstarts
+categories:
+ - ai-agents
+ - computer-use
+ - tool-use
+ - multi-turn-conversations
+related_tutorials:
+ - ../anthropic-code-tutorial/
+ - ../mcp-python-sdk-tutorial/
+ - ../claude-code-tutorial/
---
-# Anthropic Skills Tutorial: Reusable AI Agent Capabilities
+# Anthropic Quickstarts Tutorial
-> Build and operate production-quality skills for Claude Code, Claude.ai, and the Claude API.
+> A deep-dive into every project in the official `anthropics/anthropic-quickstarts` repository — computer use, autonomous coding, customer support, financial analysis, and the agents reference implementation.
-[](https://github.com/anthropics/skills)
+[](https://github.com/anthropics/anthropic-quickstarts)
[](https://opensource.org/licenses/MIT)
-[](https://agentskills.io/specification)
-## Why This Track Matters
+## What This Tutorial Covers
-Anthropic Skills let you package reusable, reliable behaviors for Claude agents once and deploy them across every integration point — Claude Code, Claude.ai, and the API — without re-engineering each time.
+The `anthropics/anthropic-quickstarts` repository is the canonical starting point for building production-quality Claude-powered applications. It is **not** a skills/plugin system — it is a collection of five standalone quickstart projects that demonstrate the full range of Claude's capabilities:
-This track focuses on:
-- designing skills with clear invocation boundaries and deterministic outputs
-- packaging repeatable workflows using scripts, references, and asset files
-- publishing versioned skills for team or public reuse
-- operating a skills catalog with ownership and lifecycle controls
+| Project | What It Demonstrates |
+|:--------|:---------------------|
+| `computer-use-demo/` | Claude controlling a real desktop via screenshot + xdotool actions |
+| `agents/` | A minimal reference agent loop with tool use and MCP integration |
+| `autonomous-coding/` | Two-agent pattern: initializer + coding agent across many sessions |
+| `customer-support-agent/` | Next.js chat app with Claude + Amazon Bedrock RAG knowledge base |
+| `financial-data-analyst/` | Next.js app with file upload, Claude analysis, and Recharts visualizations |
+| `browser-use-demo/` | DOM-aware browser automation via Playwright instead of pixel coordinates |
-## What are Anthropic Skills?
+## Why This Repository Matters
-Anthropic Skills are packaged instructions and supporting files that Claude can load for specific jobs. A skill can be lightweight (one `SKILL.md`) or operationally rich (scripts, templates, and domain references).
+Before these quickstarts existed, the standard approach was to cobble together ad-hoc integrations from API documentation snippets. The quickstarts provide:
-The official `anthropics/skills` repository demonstrates real patterns used for:
+- **Working Docker environments** so you can run computer use in minutes, not days
+- **Reference sampling loops** demonstrating multi-turn conversation management, prompt caching, and image window management
+- **Concrete tool implementations** showing exactly how `bash`, `computer`, and `str_replace_based_edit_tool` are structured
+- **Production patterns** like retry logic, provider abstraction (Anthropic / Bedrock / Vertex), and structured output validation
-- document generation workflows (DOCX, PDF, XLSX, PPTX)
-- development and automation tasks
-- enterprise process standardization
-- reusable task-specific behavior across teams
+## Architecture Overview
-## Core Concepts
-
-| Concept | Why It Matters |
-|:--------|:---------------|
-| `SKILL.md` | Defines how and when the skill should be used |
-| Frontmatter | Enables discovery, routing, and compatibility metadata |
-| Body instructions | The behavioral contract Claude follows while the skill is active |
-| `scripts/` | Deterministic external logic for tasks that should not be left to free-form generation |
-| `references/` | Source material Claude can load on demand for better answers |
-| `assets/` | Non-text files required by the workflow |
+```mermaid
+graph TD
+ subgraph quickstarts["anthropic-quickstarts"]
+ CU["computer-use-demo
Python + Docker + Streamlit"]
+ AG["agents/
Python reference impl <300 lines"]
+ AC["autonomous-coding/
Claude Code CLI + Python"]
+ CS["customer-support-agent/
Next.js + Bedrock RAG"]
+ FA["financial-data-analyst/
Next.js + Recharts"]
+ BD["browser-use-demo/
Python + Playwright + Docker"]
+ end
+
+ API["Anthropic API
(claude-opus-4 / sonnet-4 / haiku-4)"]
+ MCP["MCP Servers
(optional)"]
+
+ CU --> API
+ AG --> API
+ AG --> MCP
+ AC --> API
+ CS --> API
+ FA --> API
+ BD --> API
+```
## Chapter Guide
-| Chapter | Topic | What You Will Learn |
-|:--------|:------|:--------------------|
-| [1. Getting Started](01-getting-started.md) | Setup | Skill anatomy, minimal valid skill, local iteration loop |
-| [2. Skill Categories](02-skill-categories.md) | Taxonomy | How to choose category boundaries and avoid "mega-skills" |
-| [3. Advanced Skill Design](03-advanced-skill-design.md) | Architecture | Multi-file composition with scripts, references, and assets |
-| [4. Integration Platforms](04-integration-platforms.md) | Runtime | Claude Code, Claude.ai, and Claude API integration patterns |
-| [5. Production Skills](05-production-skills.md) | Reliability | Deterministic outputs, guardrails, and validation pipelines |
-| [6. Best Practices](06-best-practices.md) | Quality | Testing strategy, change management, and security hygiene |
-| [7. Publishing and Sharing](07-publishing-sharing.md) | Distribution | Versioning, release channels, governance, and ownership |
-| [8. Real-World Examples](08-real-world-examples.md) | Case Studies | End-to-end patterns you can adapt for real teams |
-
-## Current Ecosystem Notes (February 11, 2026)
-
-- The public reference implementation remains in `anthropics/skills`.
-- The repository points to the evolving Agent Skills format specification at `agentskills.io/specification`.
-- Claude Code supports plugin marketplace workflows for skill installation from published skill repositories.
-
-## What You Will Build
-
-By the end of this tutorial, you will be able to:
-
-- design skills with clear invocation boundaries
-- package repeatable outputs with strict templates
-- integrate script-backed workflows safely
-- publish versioned skills for internal or public reuse
-- run regression checks to prevent prompt drift
-- operate a skills catalog with ownership and lifecycle controls
+| Chapter | Topic | Core Question Answered |
+|:--------|:------|:-----------------------|
+| [1. Getting Started](01-getting-started.md) | Setup & mental model | What does each quickstart actually do and how do I run it? |
+| [2. Quickstart Architecture](02-skill-categories.md) | Project anatomy | How are the five projects structured and what patterns do they share? |
+| [3. Computer Use Deep-Dive](03-advanced-skill-design.md) | Computer use agent | How does Claude control a desktop: tools, loop, coordinate scaling? |
+| [4. Tool Use Patterns](04-integration-platforms.md) | Tool design | How are BashTool, ComputerTool, EditTool, and custom tools built? |
+| [5. Multi-Turn Conversation Patterns](05-production-skills.md) | Sampling loop | How does the agentic loop work, and how do you manage context? |
+| [6. MCP Integration](06-best-practices.md) | MCP | How does the agents quickstart connect to MCP servers? |
+| [7. Production Hardening](07-publishing-sharing.md) | Reliability | Prompt caching, image truncation, provider abstraction, security |
+| [8. End-to-End Walkthroughs](08-real-world-examples.md) | Case studies | Full traces of the customer support and financial analyst quickstarts |
## Prerequisites
-- Basic markdown and YAML familiarity
-- Working knowledge of Claude Code or Claude API workflows
-- Git/GitHub basics for version control and sharing
+- Python 3.11+ and Node.js 18+ for local development
+- Docker Desktop for computer-use and browser-use demos
+- An `ANTHROPIC_API_KEY` from [console.anthropic.com](https://console.anthropic.com)
+- Basic familiarity with async Python or TypeScript/React
## Related Tutorials
**Prerequisites:**
-- [Anthropic API Tutorial](../anthropic-code-tutorial/) - Claude API fundamentals
+- [Anthropic API Tutorial](../anthropic-code-tutorial/) — Claude API fundamentals, message format, and streaming
**Complementary:**
-- [MCP Python SDK Tutorial](../mcp-python-sdk-tutorial/) - Tool integration patterns
-- [Claude Code Tutorial](../claude-code-tutorial/) - CLI-driven agent workflows
+- [MCP Python SDK Tutorial](../mcp-python-sdk-tutorial/) — Build custom MCP servers the agents quickstart can connect to
+- [Claude Code Tutorial](../claude-code-tutorial/) — The CLI used by the autonomous-coding quickstart
**Next Steps:**
-- [MCP Servers Tutorial](../mcp-servers-tutorial/) - Reference server patterns for richer tool ecosystems
+- [MCP Servers Tutorial](../mcp-servers-tutorial/) — Reference server patterns for extending any of these quickstarts
---
@@ -100,51 +108,11 @@ Ready to begin? Start with [Chapter 1: Getting Started](01-getting-started.md).
---
-*Built with references from the official [anthropics/skills repository](https://github.com/anthropics/skills), linked support articles, and the Agent Skills specification.*
+*Built from the official [anthropics/anthropic-quickstarts](https://github.com/anthropics/anthropic-quickstarts) repository. All code examples are taken directly from that source.*
-## Navigation & Backlinks
+## Navigation
-- [Start Here: Chapter 1: Getting Started](01-getting-started.md)
+- [Chapter 1: Getting Started](01-getting-started.md)
- [Back to Main Catalog](../../README.md#-tutorial-catalog)
- [Browse A-Z Tutorial Directory](../../discoverability/tutorial-directory.md)
- [Search by Intent](../../discoverability/query-hub.md)
-- [Explore Category Hubs](../../README.md#category-hubs)
-
-## Full Chapter Map
-
-1. [Chapter 1: Getting Started](01-getting-started.md)
-2. [Chapter 2: Skill Categories](02-skill-categories.md)
-3. [Chapter 3: Advanced Skill Design](03-advanced-skill-design.md)
-4. [Chapter 4: Integration Platforms](04-integration-platforms.md)
-5. [Chapter 5: Production Skills](05-production-skills.md)
-6. [Chapter 6: Best Practices](06-best-practices.md)
-7. [Chapter 7: Publishing and Sharing](07-publishing-sharing.md)
-8. [Chapter 8: Real-World Examples](08-real-world-examples.md)
-
-## Current Snapshot (auto-updated)
-
-- repository: [`anthropics/skills`](https://github.com/anthropics/skills)
-- stars: about **111k**
-
-## What You Will Learn
-
-- how to design and structure a SKILL.md file with frontmatter and behavioral contracts
-- how to compose multi-file skills with scripts, references, and asset directories
-- how to integrate skills across Claude Code, Claude.ai, and the Claude API
-- how to version, publish, and maintain skills catalogs for team-wide reuse
-
-## Source References
-
-- [anthropics/skills repository](https://github.com/anthropics/skills)
-
-## Mental Model
-
-```mermaid
-flowchart TD
- A[Foundations] --> B[Core Abstractions]
- B --> C[Interaction Patterns]
- C --> D[Advanced Operations]
- D --> E[Production Usage]
-```
-
-*Generated by [AI Codebase Knowledge Builder](https://github.com/The-Pocket/Tutorial-Codebase-Knowledge)*
diff --git a/tutorials/anything-llm-tutorial/01-getting-started.md b/tutorials/anything-llm-tutorial/01-getting-started.md
index 808ab484..77300aac 100644
--- a/tutorials/anything-llm-tutorial/01-getting-started.md
+++ b/tutorials/anything-llm-tutorial/01-getting-started.md
@@ -6,6 +6,7 @@ has_children: false
parent: AnythingLLM Tutorial
---
+
# Chapter 1: Getting Started with AnythingLLM
Welcome to **Chapter 1: Getting Started with AnythingLLM**. In this part of **AnythingLLM Tutorial: Self-Hosted RAG and Agents Platform**, you will build an intuitive mental model first, then move into concrete implementation details and practical production tradeoffs.
@@ -482,163 +483,10 @@ Now that you have AnythingLLM running with your first document chatbot, let's ex
## Depth Expansion Playbook
-
-
-This chapter is expanded to v1-style depth for production-grade learning and implementation quality.
-
-### Strategic Context
-
-- tutorial: **AnythingLLM Tutorial: Self-Hosted RAG and Agents Platform**
-- tutorial slug: **anything-llm-tutorial**
-- chapter focus: **Chapter 1: Getting Started with AnythingLLM**
-- system context: **Anything Llm Tutorial**
-- objective: move from surface-level usage to repeatable engineering operation
-
-### Architecture Decomposition
-
-1. Define the runtime boundary for `Chapter 1: Getting Started with AnythingLLM`.
-2. Separate control-plane decisions from data-plane execution.
-3. Capture input contracts, transformation points, and output contracts.
-4. Trace state transitions across request lifecycle stages.
-5. Identify extension hooks and policy interception points.
-6. Map ownership boundaries for team and automation workflows.
-7. Specify rollback and recovery paths for unsafe changes.
-8. Track observability signals for correctness, latency, and cost.
-
-### Operator Decision Matrix
-
-| Decision Area | Low-Risk Path | High-Control Path | Tradeoff |
-|:--------------|:--------------|:------------------|:---------|
-| Runtime mode | managed defaults | explicit policy config | speed vs control |
-| State handling | local ephemeral | durable persisted state | simplicity vs auditability |
-| Tool integration | direct API use | mediated adapter layer | velocity vs governance |
-| Rollout method | manual change | staged + canary rollout | effort vs safety |
-| Incident response | best effort logs | runbooks + SLO alerts | cost vs reliability |
-
-### Failure Modes and Countermeasures
-
-| Failure Mode | Early Signal | Root Cause Pattern | Countermeasure |
-|:-------------|:-------------|:-------------------|:---------------|
-| stale context | inconsistent outputs | missing refresh window | enforce context TTL and refresh hooks |
-| policy drift | unexpected execution | ad hoc overrides | centralize policy profiles |
-| auth mismatch | 401/403 bursts | credential sprawl | rotation schedule + scope minimization |
-| schema breakage | parser/validation errors | unmanaged upstream changes | contract tests per release |
-| retry storms | queue congestion | no backoff controls | jittered backoff + circuit breakers |
-| silent regressions | quality drop without alerts | weak baseline metrics | eval harness with thresholds |
-
-### Implementation Runbook
-
-1. Establish a reproducible baseline environment.
-2. Capture chapter-specific success criteria before changes.
-3. Implement minimal viable path with explicit interfaces.
-4. Add observability before expanding feature scope.
-5. Run deterministic tests for happy-path behavior.
-6. Inject failure scenarios for negative-path validation.
-7. Compare output quality against baseline snapshots.
-8. Promote through staged environments with rollback gates.
-9. Record operational lessons in release notes.
-
-### Quality Gate Checklist
-
-- [ ] chapter-level assumptions are explicit and testable
-- [ ] API/tool boundaries are documented with input/output examples
-- [ ] failure handling includes retry, timeout, and fallback policy
-- [ ] security controls include auth scopes and secret rotation plans
-- [ ] observability includes logs, metrics, traces, and alert thresholds
-- [ ] deployment guidance includes canary and rollback paths
-- [ ] docs include links to upstream sources and related tracks
-- [ ] post-release verification confirms expected behavior under load
-
-### Source Alignment
-
-- [AnythingLLM Repository](https://github.com/Mintplex-Labs/anything-llm)
-- [AnythingLLM Releases](https://github.com/Mintplex-Labs/anything-llm/releases)
-- [AnythingLLM Docs](https://docs.anythingllm.com/)
-- [AnythingLLM Website](https://anythingllm.com/)
-
-### Cross-Tutorial Connection Map
-
-- [Open WebUI Tutorial](../open-webui-tutorial/)
-- [RAGFlow Tutorial](../ragflow-tutorial/)
-- [Quivr Tutorial](../quivr-tutorial/)
-- [Langfuse Tutorial](../langfuse-tutorial/)
-- [Chapter 1: Getting Started](01-getting-started.md)
-
-### Advanced Practice Exercises
-
-1. Build a minimal end-to-end implementation for `Chapter 1: Getting Started with AnythingLLM`.
-2. Add instrumentation and measure baseline latency and error rate.
-3. Introduce one controlled failure and confirm graceful recovery.
-4. Add policy constraints and verify they are enforced consistently.
-5. Run a staged rollout and document rollback decision criteria.
-
-### Review Questions
-
-1. Which execution boundary matters most for this chapter and why?
-2. What signal detects regressions earliest in your environment?
-3. What tradeoff did you make between delivery speed and governance?
-4. How would you recover from the highest-impact failure mode?
-5. What must be automated before scaling to team-wide adoption?
-
-### Scenario Playbook 1: Chapter 1: Getting Started with AnythingLLM
-
-- tutorial context: **AnythingLLM Tutorial: Self-Hosted RAG and Agents Platform**
-- trigger condition: incoming request volume spikes after release
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: introduce adaptive concurrency limits and queue bounds
-- verification target: latency p95 and p99 stay within defined SLO windows
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-## What Problem Does This Solve?
-
-Most teams struggle here because the hard part is not writing more code, but deciding clear boundaries for `docker`, `anythingllm`, `your` so behavior stays predictable as complexity grows.
-
-In practical terms, this chapter helps you avoid three common failures:
-
-- coupling core logic too tightly to one implementation path
-- missing the handoff boundaries between setup, execution, and validation
-- shipping changes without clear rollback or observability strategy
-
-After working through this chapter, you should be able to reason about `Chapter 1: Getting Started with AnythingLLM` as an operating subsystem inside **AnythingLLM Tutorial: Self-Hosted RAG and Agents Platform**, with explicit contracts for inputs, state transitions, and outputs.
-
-Use the implementation notes around `storage`, `AnythingLLM`, `logs` as your checklist when adapting these patterns to your own repository.
-
-## How it Works Under the Hood
-
-Under the hood, `Chapter 1: Getting Started with AnythingLLM` usually follows a repeatable control path:
-
-1. **Context bootstrap**: initialize runtime config and prerequisites for `docker`.
-2. **Input normalization**: shape incoming data so `anythingllm` receives stable contracts.
-3. **Core execution**: run the main logic branch and propagate intermediate state through `your`.
-4. **Policy and safety checks**: enforce limits, auth scopes, and failure boundaries.
-5. **Output composition**: return canonical result payloads for downstream consumers.
-6. **Operational telemetry**: emit logs/metrics needed for debugging and performance tuning.
-
-When debugging, walk this sequence in order and confirm each stage has explicit success/failure conditions.
-
-## Source Walkthrough
-
-Use the following upstream sources to verify implementation details while reading this chapter:
-
-- [AnythingLLM Repository](https://github.com/Mintplex-Labs/anything-llm)
- Why it matters: authoritative reference on `AnythingLLM Repository` (github.com).
-- [AnythingLLM Releases](https://github.com/Mintplex-Labs/anything-llm/releases)
- Why it matters: authoritative reference on `AnythingLLM Releases` (github.com).
-- [AnythingLLM Docs](https://docs.anythingllm.com/)
- Why it matters: authoritative reference on `AnythingLLM Docs` (docs.anythingllm.com).
-- [AnythingLLM Website](https://anythingllm.com/)
- Why it matters: authoritative reference on `AnythingLLM Website` (anythingllm.com).
+## Source Code Walkthrough
-Suggested trace strategy:
-- search upstream code for `docker` and `anythingllm` to map concrete implementation paths
-- compare docs claims against actual runtime/config code before reusing patterns in production
+### `docker/` and `server/index.js`
-## Chapter Connections
+Getting started with AnythingLLM is driven by the Docker configuration in the [`docker/`](https://github.com/Mintplex-Labs/anything-llm/tree/HEAD/docker) directory, which contains the `Dockerfile` and `docker-compose.yml` for the recommended deployment path covered in Chapter 1.
-- [Tutorial Index](README.md)
-- [Next Chapter: Chapter 2: Workspaces - Organizing Your Knowledge](02-workspaces.md)
-- [Main Catalog](../../README.md#-tutorial-catalog)
-- [A-Z Tutorial Directory](../../discoverability/tutorial-directory.md)
+The [`server/index.js`](https://github.com/Mintplex-Labs/anything-llm/blob/HEAD/server/index.js) file bootstraps the Express server, registers API routes, and configures middleware — making it the entry point for understanding how the platform starts up. Tracing the startup sequence in `index.js` shows which services are initialized and in what order, which is helpful context for first-run setup and validation.
diff --git a/tutorials/athens-research-tutorial/01-system-overview.md b/tutorials/athens-research-tutorial/01-system-overview.md
index d7ce727b..58a4acc1 100644
--- a/tutorials/athens-research-tutorial/01-system-overview.md
+++ b/tutorials/athens-research-tutorial/01-system-overview.md
@@ -6,6 +6,7 @@ has_children: false
parent: "Athens Research Knowledge Graph"
---
+
# Chapter 1: System Overview
Welcome to **Chapter 1: System Overview**. In this part of **Athens Research: Deep Dive Tutorial**, you will build an intuitive mental model first, then move into concrete implementation details and practical production tradeoffs.
@@ -496,94 +497,8 @@ Suggested trace strategy:
## Depth Expansion Playbook
-
-
-This chapter is expanded to v1-style depth for production-grade learning and implementation quality.
-
-### Strategic Context
-
-- tutorial: **Athens Research: Deep Dive Tutorial**
-- tutorial slug: **athens-research-tutorial**
-- chapter focus: **Chapter 1: System Overview**
-- system context: **Athens Research Knowledge Graph**
-- objective: move from surface-level usage to repeatable engineering operation
-
-### Architecture Decomposition
-
-1. Define the runtime boundary for `Chapter 1: System Overview`.
-2. Separate control-plane decisions from data-plane execution.
-3. Capture input contracts, transformation points, and output contracts.
-4. Trace state transitions across request lifecycle stages.
-5. Identify extension hooks and policy interception points.
-6. Map ownership boundaries for team and automation workflows.
-7. Specify rollback and recovery paths for unsafe changes.
-8. Track observability signals for correctness, latency, and cost.
-
-### Operator Decision Matrix
-
-| Decision Area | Low-Risk Path | High-Control Path | Tradeoff |
-|:--------------|:--------------|:------------------|:---------|
-| Runtime mode | managed defaults | explicit policy config | speed vs control |
-| State handling | local ephemeral | durable persisted state | simplicity vs auditability |
-| Tool integration | direct API use | mediated adapter layer | velocity vs governance |
-| Rollout method | manual change | staged + canary rollout | effort vs safety |
-| Incident response | best effort logs | runbooks + SLO alerts | cost vs reliability |
-
-### Failure Modes and Countermeasures
-
-| Failure Mode | Early Signal | Root Cause Pattern | Countermeasure |
-|:-------------|:-------------|:-------------------|:---------------|
-| stale context | inconsistent outputs | missing refresh window | enforce context TTL and refresh hooks |
-| policy drift | unexpected execution | ad hoc overrides | centralize policy profiles |
-| auth mismatch | 401/403 bursts | credential sprawl | rotation schedule + scope minimization |
-| schema breakage | parser/validation errors | unmanaged upstream changes | contract tests per release |
-| retry storms | queue congestion | no backoff controls | jittered backoff + circuit breakers |
-| silent regressions | quality drop without alerts | weak baseline metrics | eval harness with thresholds |
-
-### Implementation Runbook
-
-1. Establish a reproducible baseline environment.
-2. Capture chapter-specific success criteria before changes.
-3. Implement minimal viable path with explicit interfaces.
-4. Add observability before expanding feature scope.
-5. Run deterministic tests for happy-path behavior.
-6. Inject failure scenarios for negative-path validation.
-7. Compare output quality against baseline snapshots.
-8. Promote through staged environments with rollback gates.
-9. Record operational lessons in release notes.
-
-### Quality Gate Checklist
-
-- [ ] chapter-level assumptions are explicit and testable
-- [ ] API/tool boundaries are documented with input/output examples
-- [ ] failure handling includes retry, timeout, and fallback policy
-- [ ] security controls include auth scopes and secret rotation plans
-- [ ] observability includes logs, metrics, traces, and alert thresholds
-- [ ] deployment guidance includes canary and rollback paths
-- [ ] docs include links to upstream sources and related tracks
-- [ ] post-release verification confirms expected behavior under load
-
-### Source Alignment
-
-- [Athens Research](https://github.com/athensresearch/athens)
-- [AI Codebase Knowledge Builder](https://github.com/The-Pocket/Tutorial-Codebase-Knowledge)
-
-### Cross-Tutorial Connection Map
-
-- Related tutorials are listed in this tutorial index.
-
-### Advanced Practice Exercises
-
-1. Build a minimal end-to-end implementation for `Chapter 1: System Overview`.
-2. Add instrumentation and measure baseline latency and error rate.
-3. Introduce one controlled failure and confirm graceful recovery.
-4. Add policy constraints and verify they are enforced consistently.
-5. Run a staged rollout and document rollback decision criteria.
+## Source Code Walkthrough
-### Review Questions
+### `src/cljs/athens/core.cljs`
-1. Which execution boundary matters most for this chapter and why?
-2. What signal detects regressions earliest in your environment?
-3. What tradeoff did you make between delivery speed and governance?
-4. How would you recover from the highest-impact failure mode?
-5. What must be automated before scaling to team-wide adoption?
+The system entry point and overall architecture are visible in [`src/cljs/athens/core.cljs`](https://github.com/athensresearch/athens/blob/HEAD/src/cljs/athens/core.cljs). This file bootstraps the Re-frame application, mounts the root component, and initializes the Datascript database — providing a concise map of how all the subsystems described in Chapter 1 fit together at startup.
\ No newline at end of file
diff --git a/tutorials/athens-research-tutorial/04-app-architecture.md b/tutorials/athens-research-tutorial/04-app-architecture.md
index 801b767d..0fcdd9f8 100644
--- a/tutorials/athens-research-tutorial/04-app-architecture.md
+++ b/tutorials/athens-research-tutorial/04-app-architecture.md
@@ -6,6 +6,7 @@ has_children: false
parent: "Athens Research Knowledge Graph"
---
+
# Chapter 4: Application Architecture
Welcome to **Chapter 4: Application Architecture**. In this part of **Athens Research: Deep Dive Tutorial**, you will build an intuitive mental model first, then move into concrete implementation details and practical production tradeoffs.
@@ -104,478 +105,8 @@ Suggested trace strategy:
## Depth Expansion Playbook
-
-
-This chapter is expanded to v1-style depth for production-grade learning and implementation quality.
-
-### Strategic Context
-
-- tutorial: **Athens Research: Deep Dive Tutorial**
-- tutorial slug: **athens-research-tutorial**
-- chapter focus: **Chapter 4: Application Architecture**
-- system context: **Athens Research Knowledge Graph**
-- objective: move from surface-level usage to repeatable engineering operation
-
-### Architecture Decomposition
-
-1. Define the runtime boundary for `Chapter 4: Application Architecture`.
-2. Separate control-plane decisions from data-plane execution.
-3. Capture input contracts, transformation points, and output contracts.
-4. Trace state transitions across request lifecycle stages.
-5. Identify extension hooks and policy interception points.
-6. Map ownership boundaries for team and automation workflows.
-7. Specify rollback and recovery paths for unsafe changes.
-8. Track observability signals for correctness, latency, and cost.
-
-### Operator Decision Matrix
-
-| Decision Area | Low-Risk Path | High-Control Path | Tradeoff |
-|:--------------|:--------------|:------------------|:---------|
-| Runtime mode | managed defaults | explicit policy config | speed vs control |
-| State handling | local ephemeral | durable persisted state | simplicity vs auditability |
-| Tool integration | direct API use | mediated adapter layer | velocity vs governance |
-| Rollout method | manual change | staged + canary rollout | effort vs safety |
-| Incident response | best effort logs | runbooks + SLO alerts | cost vs reliability |
-
-### Failure Modes and Countermeasures
-
-| Failure Mode | Early Signal | Root Cause Pattern | Countermeasure |
-|:-------------|:-------------|:-------------------|:---------------|
-| stale context | inconsistent outputs | missing refresh window | enforce context TTL and refresh hooks |
-| policy drift | unexpected execution | ad hoc overrides | centralize policy profiles |
-| auth mismatch | 401/403 bursts | credential sprawl | rotation schedule + scope minimization |
-| schema breakage | parser/validation errors | unmanaged upstream changes | contract tests per release |
-| retry storms | queue congestion | no backoff controls | jittered backoff + circuit breakers |
-| silent regressions | quality drop without alerts | weak baseline metrics | eval harness with thresholds |
-
-### Implementation Runbook
-
-1. Establish a reproducible baseline environment.
-2. Capture chapter-specific success criteria before changes.
-3. Implement minimal viable path with explicit interfaces.
-4. Add observability before expanding feature scope.
-5. Run deterministic tests for happy-path behavior.
-6. Inject failure scenarios for negative-path validation.
-7. Compare output quality against baseline snapshots.
-8. Promote through staged environments with rollback gates.
-9. Record operational lessons in release notes.
-
-### Quality Gate Checklist
-
-- [ ] chapter-level assumptions are explicit and testable
-- [ ] API/tool boundaries are documented with input/output examples
-- [ ] failure handling includes retry, timeout, and fallback policy
-- [ ] security controls include auth scopes and secret rotation plans
-- [ ] observability includes logs, metrics, traces, and alert thresholds
-- [ ] deployment guidance includes canary and rollback paths
-- [ ] docs include links to upstream sources and related tracks
-- [ ] post-release verification confirms expected behavior under load
-
-### Source Alignment
+## Source Code Walkthrough
-- [Athens Research](https://github.com/athensresearch/athens)
-- [AI Codebase Knowledge Builder](https://github.com/The-Pocket/Tutorial-Codebase-Knowledge)
-
-### Cross-Tutorial Connection Map
-
-- Related tutorials are listed in this tutorial index.
-
-### Advanced Practice Exercises
-
-1. Build a minimal end-to-end implementation for `Chapter 4: Application Architecture`.
-2. Add instrumentation and measure baseline latency and error rate.
-3. Introduce one controlled failure and confirm graceful recovery.
-4. Add policy constraints and verify they are enforced consistently.
-5. Run a staged rollout and document rollback decision criteria.
-
-### Review Questions
-
-1. Which execution boundary matters most for this chapter and why?
-2. What signal detects regressions earliest in your environment?
-3. What tradeoff did you make between delivery speed and governance?
-4. How would you recover from the highest-impact failure mode?
-5. What must be automated before scaling to team-wide adoption?
-
-### Scenario Playbook 1: Chapter 4: Application Architecture
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: incoming request volume spikes after release
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: introduce adaptive concurrency limits and queue bounds
-- verification target: latency p95 and p99 stay within defined SLO windows
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 2: Chapter 4: Application Architecture
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: tool dependency latency increases under concurrency
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: enable staged retries with jitter and circuit breaker fallback
-- verification target: error budget burn rate remains below escalation threshold
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 3: Chapter 4: Application Architecture
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: schema updates introduce incompatible payloads
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: pin schema versions and add compatibility shims
-- verification target: throughput remains stable under target concurrency
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 4: Chapter 4: Application Architecture
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: environment parity drifts between staging and production
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: restore environment parity via immutable config promotion
-- verification target: retry volume stays bounded without feedback loops
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 5: Chapter 4: Application Architecture
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: access policy changes reduce successful execution rates
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: re-scope credentials and rotate leaked or stale keys
-- verification target: data integrity checks pass across write/read cycles
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 6: Chapter 4: Application Architecture
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: background jobs accumulate and exceed processing windows
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: activate degradation mode to preserve core user paths
-- verification target: audit logs capture all control-plane mutations
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 7: Chapter 4: Application Architecture
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: incoming request volume spikes after release
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: introduce adaptive concurrency limits and queue bounds
-- verification target: latency p95 and p99 stay within defined SLO windows
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 8: Chapter 4: Application Architecture
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: tool dependency latency increases under concurrency
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: enable staged retries with jitter and circuit breaker fallback
-- verification target: error budget burn rate remains below escalation threshold
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 9: Chapter 4: Application Architecture
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: schema updates introduce incompatible payloads
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: pin schema versions and add compatibility shims
-- verification target: throughput remains stable under target concurrency
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 10: Chapter 4: Application Architecture
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: environment parity drifts between staging and production
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: restore environment parity via immutable config promotion
-- verification target: retry volume stays bounded without feedback loops
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 11: Chapter 4: Application Architecture
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: access policy changes reduce successful execution rates
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: re-scope credentials and rotate leaked or stale keys
-- verification target: data integrity checks pass across write/read cycles
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 12: Chapter 4: Application Architecture
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: background jobs accumulate and exceed processing windows
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: activate degradation mode to preserve core user paths
-- verification target: audit logs capture all control-plane mutations
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 13: Chapter 4: Application Architecture
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: incoming request volume spikes after release
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: introduce adaptive concurrency limits and queue bounds
-- verification target: latency p95 and p99 stay within defined SLO windows
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 14: Chapter 4: Application Architecture
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: tool dependency latency increases under concurrency
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: enable staged retries with jitter and circuit breaker fallback
-- verification target: error budget burn rate remains below escalation threshold
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 15: Chapter 4: Application Architecture
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: schema updates introduce incompatible payloads
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: pin schema versions and add compatibility shims
-- verification target: throughput remains stable under target concurrency
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 16: Chapter 4: Application Architecture
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: environment parity drifts between staging and production
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: restore environment parity via immutable config promotion
-- verification target: retry volume stays bounded without feedback loops
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 17: Chapter 4: Application Architecture
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: access policy changes reduce successful execution rates
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: re-scope credentials and rotate leaked or stale keys
-- verification target: data integrity checks pass across write/read cycles
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 18: Chapter 4: Application Architecture
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: background jobs accumulate and exceed processing windows
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: activate degradation mode to preserve core user paths
-- verification target: audit logs capture all control-plane mutations
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 19: Chapter 4: Application Architecture
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: incoming request volume spikes after release
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: introduce adaptive concurrency limits and queue bounds
-- verification target: latency p95 and p99 stay within defined SLO windows
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 20: Chapter 4: Application Architecture
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: tool dependency latency increases under concurrency
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: enable staged retries with jitter and circuit breaker fallback
-- verification target: error budget burn rate remains below escalation threshold
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 21: Chapter 4: Application Architecture
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: schema updates introduce incompatible payloads
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: pin schema versions and add compatibility shims
-- verification target: throughput remains stable under target concurrency
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 22: Chapter 4: Application Architecture
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: environment parity drifts between staging and production
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: restore environment parity via immutable config promotion
-- verification target: retry volume stays bounded without feedback loops
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 23: Chapter 4: Application Architecture
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: access policy changes reduce successful execution rates
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: re-scope credentials and rotate leaked or stale keys
-- verification target: data integrity checks pass across write/read cycles
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 24: Chapter 4: Application Architecture
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: background jobs accumulate and exceed processing windows
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: activate degradation mode to preserve core user paths
-- verification target: audit logs capture all control-plane mutations
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 25: Chapter 4: Application Architecture
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: incoming request volume spikes after release
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: introduce adaptive concurrency limits and queue bounds
-- verification target: latency p95 and p99 stay within defined SLO windows
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 26: Chapter 4: Application Architecture
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: tool dependency latency increases under concurrency
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: enable staged retries with jitter and circuit breaker fallback
-- verification target: error budget burn rate remains below escalation threshold
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 27: Chapter 4: Application Architecture
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: schema updates introduce incompatible payloads
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: pin schema versions and add compatibility shims
-- verification target: throughput remains stable under target concurrency
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 28: Chapter 4: Application Architecture
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: environment parity drifts between staging and production
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: restore environment parity via immutable config promotion
-- verification target: retry volume stays bounded without feedback loops
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 29: Chapter 4: Application Architecture
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: access policy changes reduce successful execution rates
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: re-scope credentials and rotate leaked or stale keys
-- verification target: data integrity checks pass across write/read cycles
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 30: Chapter 4: Application Architecture
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: background jobs accumulate and exceed processing windows
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: activate degradation mode to preserve core user paths
-- verification target: audit logs capture all control-plane mutations
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 31: Chapter 4: Application Architecture
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: incoming request volume spikes after release
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: introduce adaptive concurrency limits and queue bounds
-- verification target: latency p95 and p99 stay within defined SLO windows
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 32: Chapter 4: Application Architecture
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: tool dependency latency increases under concurrency
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: enable staged retries with jitter and circuit breaker fallback
-- verification target: error budget burn rate remains below escalation threshold
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
+### `src/cljs/athens/events.cljs`
+
+Application architecture in Athens is expressed through its Re-frame event handlers. [`src/cljs/athens/events.cljs`](https://github.com/athensresearch/athens/blob/HEAD/src/cljs/athens/events.cljs) defines the events that drive all state transitions — page navigation, block edits, and sync operations. Tracing the event flow from UI interaction through handler to database transaction is the clearest way to understand the app architecture described in Chapter 4.
\ No newline at end of file
diff --git a/tutorials/athens-research-tutorial/05-component-system.md b/tutorials/athens-research-tutorial/05-component-system.md
index c60cd2eb..568f2998 100644
--- a/tutorials/athens-research-tutorial/05-component-system.md
+++ b/tutorials/athens-research-tutorial/05-component-system.md
@@ -6,6 +6,7 @@ has_children: false
parent: "Athens Research Knowledge Graph"
---
+
# Chapter 5: Component System
Welcome to **Chapter 5: Component System**. In this part of **Athens Research: Deep Dive Tutorial**, you will build an intuitive mental model first, then move into concrete implementation details and practical production tradeoffs.
@@ -97,490 +98,8 @@ Suggested trace strategy:
## Depth Expansion Playbook
-
-
-This chapter is expanded to v1-style depth for production-grade learning and implementation quality.
-
-### Strategic Context
-
-- tutorial: **Athens Research: Deep Dive Tutorial**
-- tutorial slug: **athens-research-tutorial**
-- chapter focus: **Chapter 5: Component System**
-- system context: **Athens Research Knowledge Graph**
-- objective: move from surface-level usage to repeatable engineering operation
-
-### Architecture Decomposition
-
-1. Define the runtime boundary for `Chapter 5: Component System`.
-2. Separate control-plane decisions from data-plane execution.
-3. Capture input contracts, transformation points, and output contracts.
-4. Trace state transitions across request lifecycle stages.
-5. Identify extension hooks and policy interception points.
-6. Map ownership boundaries for team and automation workflows.
-7. Specify rollback and recovery paths for unsafe changes.
-8. Track observability signals for correctness, latency, and cost.
-
-### Operator Decision Matrix
-
-| Decision Area | Low-Risk Path | High-Control Path | Tradeoff |
-|:--------------|:--------------|:------------------|:---------|
-| Runtime mode | managed defaults | explicit policy config | speed vs control |
-| State handling | local ephemeral | durable persisted state | simplicity vs auditability |
-| Tool integration | direct API use | mediated adapter layer | velocity vs governance |
-| Rollout method | manual change | staged + canary rollout | effort vs safety |
-| Incident response | best effort logs | runbooks + SLO alerts | cost vs reliability |
-
-### Failure Modes and Countermeasures
-
-| Failure Mode | Early Signal | Root Cause Pattern | Countermeasure |
-|:-------------|:-------------|:-------------------|:---------------|
-| stale context | inconsistent outputs | missing refresh window | enforce context TTL and refresh hooks |
-| policy drift | unexpected execution | ad hoc overrides | centralize policy profiles |
-| auth mismatch | 401/403 bursts | credential sprawl | rotation schedule + scope minimization |
-| schema breakage | parser/validation errors | unmanaged upstream changes | contract tests per release |
-| retry storms | queue congestion | no backoff controls | jittered backoff + circuit breakers |
-| silent regressions | quality drop without alerts | weak baseline metrics | eval harness with thresholds |
-
-### Implementation Runbook
-
-1. Establish a reproducible baseline environment.
-2. Capture chapter-specific success criteria before changes.
-3. Implement minimal viable path with explicit interfaces.
-4. Add observability before expanding feature scope.
-5. Run deterministic tests for happy-path behavior.
-6. Inject failure scenarios for negative-path validation.
-7. Compare output quality against baseline snapshots.
-8. Promote through staged environments with rollback gates.
-9. Record operational lessons in release notes.
-
-### Quality Gate Checklist
-
-- [ ] chapter-level assumptions are explicit and testable
-- [ ] API/tool boundaries are documented with input/output examples
-- [ ] failure handling includes retry, timeout, and fallback policy
-- [ ] security controls include auth scopes and secret rotation plans
-- [ ] observability includes logs, metrics, traces, and alert thresholds
-- [ ] deployment guidance includes canary and rollback paths
-- [ ] docs include links to upstream sources and related tracks
-- [ ] post-release verification confirms expected behavior under load
-
-### Source Alignment
+## Source Code Walkthrough
-- [Athens Research](https://github.com/athensresearch/athens)
-- [AI Codebase Knowledge Builder](https://github.com/The-Pocket/Tutorial-Codebase-Knowledge)
-
-### Cross-Tutorial Connection Map
-
-- Related tutorials are listed in this tutorial index.
-
-### Advanced Practice Exercises
-
-1. Build a minimal end-to-end implementation for `Chapter 5: Component System`.
-2. Add instrumentation and measure baseline latency and error rate.
-3. Introduce one controlled failure and confirm graceful recovery.
-4. Add policy constraints and verify they are enforced consistently.
-5. Run a staged rollout and document rollback decision criteria.
-
-### Review Questions
-
-1. Which execution boundary matters most for this chapter and why?
-2. What signal detects regressions earliest in your environment?
-3. What tradeoff did you make between delivery speed and governance?
-4. How would you recover from the highest-impact failure mode?
-5. What must be automated before scaling to team-wide adoption?
-
-### Scenario Playbook 1: Chapter 5: Component System
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: incoming request volume spikes after release
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: introduce adaptive concurrency limits and queue bounds
-- verification target: latency p95 and p99 stay within defined SLO windows
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 2: Chapter 5: Component System
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: tool dependency latency increases under concurrency
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: enable staged retries with jitter and circuit breaker fallback
-- verification target: error budget burn rate remains below escalation threshold
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 3: Chapter 5: Component System
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: schema updates introduce incompatible payloads
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: pin schema versions and add compatibility shims
-- verification target: throughput remains stable under target concurrency
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 4: Chapter 5: Component System
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: environment parity drifts between staging and production
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: restore environment parity via immutable config promotion
-- verification target: retry volume stays bounded without feedback loops
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 5: Chapter 5: Component System
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: access policy changes reduce successful execution rates
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: re-scope credentials and rotate leaked or stale keys
-- verification target: data integrity checks pass across write/read cycles
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 6: Chapter 5: Component System
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: background jobs accumulate and exceed processing windows
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: activate degradation mode to preserve core user paths
-- verification target: audit logs capture all control-plane mutations
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 7: Chapter 5: Component System
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: incoming request volume spikes after release
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: introduce adaptive concurrency limits and queue bounds
-- verification target: latency p95 and p99 stay within defined SLO windows
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 8: Chapter 5: Component System
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: tool dependency latency increases under concurrency
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: enable staged retries with jitter and circuit breaker fallback
-- verification target: error budget burn rate remains below escalation threshold
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 9: Chapter 5: Component System
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: schema updates introduce incompatible payloads
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: pin schema versions and add compatibility shims
-- verification target: throughput remains stable under target concurrency
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 10: Chapter 5: Component System
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: environment parity drifts between staging and production
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: restore environment parity via immutable config promotion
-- verification target: retry volume stays bounded without feedback loops
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 11: Chapter 5: Component System
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: access policy changes reduce successful execution rates
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: re-scope credentials and rotate leaked or stale keys
-- verification target: data integrity checks pass across write/read cycles
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 12: Chapter 5: Component System
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: background jobs accumulate and exceed processing windows
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: activate degradation mode to preserve core user paths
-- verification target: audit logs capture all control-plane mutations
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 13: Chapter 5: Component System
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: incoming request volume spikes after release
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: introduce adaptive concurrency limits and queue bounds
-- verification target: latency p95 and p99 stay within defined SLO windows
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 14: Chapter 5: Component System
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: tool dependency latency increases under concurrency
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: enable staged retries with jitter and circuit breaker fallback
-- verification target: error budget burn rate remains below escalation threshold
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 15: Chapter 5: Component System
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: schema updates introduce incompatible payloads
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: pin schema versions and add compatibility shims
-- verification target: throughput remains stable under target concurrency
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 16: Chapter 5: Component System
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: environment parity drifts between staging and production
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: restore environment parity via immutable config promotion
-- verification target: retry volume stays bounded without feedback loops
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 17: Chapter 5: Component System
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: access policy changes reduce successful execution rates
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: re-scope credentials and rotate leaked or stale keys
-- verification target: data integrity checks pass across write/read cycles
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 18: Chapter 5: Component System
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: background jobs accumulate and exceed processing windows
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: activate degradation mode to preserve core user paths
-- verification target: audit logs capture all control-plane mutations
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 19: Chapter 5: Component System
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: incoming request volume spikes after release
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: introduce adaptive concurrency limits and queue bounds
-- verification target: latency p95 and p99 stay within defined SLO windows
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 20: Chapter 5: Component System
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: tool dependency latency increases under concurrency
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: enable staged retries with jitter and circuit breaker fallback
-- verification target: error budget burn rate remains below escalation threshold
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 21: Chapter 5: Component System
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: schema updates introduce incompatible payloads
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: pin schema versions and add compatibility shims
-- verification target: throughput remains stable under target concurrency
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 22: Chapter 5: Component System
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: environment parity drifts between staging and production
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: restore environment parity via immutable config promotion
-- verification target: retry volume stays bounded without feedback loops
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 23: Chapter 5: Component System
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: access policy changes reduce successful execution rates
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: re-scope credentials and rotate leaked or stale keys
-- verification target: data integrity checks pass across write/read cycles
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 24: Chapter 5: Component System
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: background jobs accumulate and exceed processing windows
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: activate degradation mode to preserve core user paths
-- verification target: audit logs capture all control-plane mutations
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 25: Chapter 5: Component System
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: incoming request volume spikes after release
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: introduce adaptive concurrency limits and queue bounds
-- verification target: latency p95 and p99 stay within defined SLO windows
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 26: Chapter 5: Component System
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: tool dependency latency increases under concurrency
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: enable staged retries with jitter and circuit breaker fallback
-- verification target: error budget burn rate remains below escalation threshold
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 27: Chapter 5: Component System
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: schema updates introduce incompatible payloads
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: pin schema versions and add compatibility shims
-- verification target: throughput remains stable under target concurrency
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 28: Chapter 5: Component System
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: environment parity drifts between staging and production
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: restore environment parity via immutable config promotion
-- verification target: retry volume stays bounded without feedback loops
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 29: Chapter 5: Component System
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: access policy changes reduce successful execution rates
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: re-scope credentials and rotate leaked or stale keys
-- verification target: data integrity checks pass across write/read cycles
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 30: Chapter 5: Component System
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: background jobs accumulate and exceed processing windows
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: activate degradation mode to preserve core user paths
-- verification target: audit logs capture all control-plane mutations
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 31: Chapter 5: Component System
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: incoming request volume spikes after release
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: introduce adaptive concurrency limits and queue bounds
-- verification target: latency p95 and p99 stay within defined SLO windows
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 32: Chapter 5: Component System
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: tool dependency latency increases under concurrency
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: enable staged retries with jitter and circuit breaker fallback
-- verification target: error budget burn rate remains below escalation threshold
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 33: Chapter 5: Component System
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: schema updates introduce incompatible payloads
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: pin schema versions and add compatibility shims
-- verification target: throughput remains stable under target concurrency
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
+### `src/cljs/athens/views/blocks/core.cljs`
+
+The block component is the fundamental UI building block in Athens. [`src/cljs/athens/views/blocks/core.cljs`](https://github.com/athensresearch/athens/blob/HEAD/src/cljs/athens/views/blocks/core.cljs) implements the recursive block rendering and outliner interactions that define the component system. Understanding how blocks subscribe to Datascript state via Re-frame subscriptions shows the full component-to-data pipeline.
\ No newline at end of file
diff --git a/tutorials/athens-research-tutorial/06-event-handling.md b/tutorials/athens-research-tutorial/06-event-handling.md
index e96dd867..eddd74d0 100644
--- a/tutorials/athens-research-tutorial/06-event-handling.md
+++ b/tutorials/athens-research-tutorial/06-event-handling.md
@@ -6,6 +6,7 @@ has_children: false
parent: "Athens Research Knowledge Graph"
---
+
# Chapter 6: Event Handling
Welcome to **Chapter 6: Event Handling**. In this part of **Athens Research: Deep Dive Tutorial**, you will build an intuitive mental model first, then move into concrete implementation details and practical production tradeoffs.
@@ -89,502 +90,8 @@ Suggested trace strategy:
## Depth Expansion Playbook
-
-
-This chapter is expanded to v1-style depth for production-grade learning and implementation quality.
-
-### Strategic Context
-
-- tutorial: **Athens Research: Deep Dive Tutorial**
-- tutorial slug: **athens-research-tutorial**
-- chapter focus: **Chapter 6: Event Handling**
-- system context: **Athens Research Knowledge Graph**
-- objective: move from surface-level usage to repeatable engineering operation
-
-### Architecture Decomposition
-
-1. Define the runtime boundary for `Chapter 6: Event Handling`.
-2. Separate control-plane decisions from data-plane execution.
-3. Capture input contracts, transformation points, and output contracts.
-4. Trace state transitions across request lifecycle stages.
-5. Identify extension hooks and policy interception points.
-6. Map ownership boundaries for team and automation workflows.
-7. Specify rollback and recovery paths for unsafe changes.
-8. Track observability signals for correctness, latency, and cost.
-
-### Operator Decision Matrix
-
-| Decision Area | Low-Risk Path | High-Control Path | Tradeoff |
-|:--------------|:--------------|:------------------|:---------|
-| Runtime mode | managed defaults | explicit policy config | speed vs control |
-| State handling | local ephemeral | durable persisted state | simplicity vs auditability |
-| Tool integration | direct API use | mediated adapter layer | velocity vs governance |
-| Rollout method | manual change | staged + canary rollout | effort vs safety |
-| Incident response | best effort logs | runbooks + SLO alerts | cost vs reliability |
-
-### Failure Modes and Countermeasures
-
-| Failure Mode | Early Signal | Root Cause Pattern | Countermeasure |
-|:-------------|:-------------|:-------------------|:---------------|
-| stale context | inconsistent outputs | missing refresh window | enforce context TTL and refresh hooks |
-| policy drift | unexpected execution | ad hoc overrides | centralize policy profiles |
-| auth mismatch | 401/403 bursts | credential sprawl | rotation schedule + scope minimization |
-| schema breakage | parser/validation errors | unmanaged upstream changes | contract tests per release |
-| retry storms | queue congestion | no backoff controls | jittered backoff + circuit breakers |
-| silent regressions | quality drop without alerts | weak baseline metrics | eval harness with thresholds |
-
-### Implementation Runbook
-
-1. Establish a reproducible baseline environment.
-2. Capture chapter-specific success criteria before changes.
-3. Implement minimal viable path with explicit interfaces.
-4. Add observability before expanding feature scope.
-5. Run deterministic tests for happy-path behavior.
-6. Inject failure scenarios for negative-path validation.
-7. Compare output quality against baseline snapshots.
-8. Promote through staged environments with rollback gates.
-9. Record operational lessons in release notes.
-
-### Quality Gate Checklist
-
-- [ ] chapter-level assumptions are explicit and testable
-- [ ] API/tool boundaries are documented with input/output examples
-- [ ] failure handling includes retry, timeout, and fallback policy
-- [ ] security controls include auth scopes and secret rotation plans
-- [ ] observability includes logs, metrics, traces, and alert thresholds
-- [ ] deployment guidance includes canary and rollback paths
-- [ ] docs include links to upstream sources and related tracks
-- [ ] post-release verification confirms expected behavior under load
-
-### Source Alignment
+## Source Code Walkthrough
-- [Athens Research](https://github.com/athensresearch/athens)
-- [AI Codebase Knowledge Builder](https://github.com/The-Pocket/Tutorial-Codebase-Knowledge)
-
-### Cross-Tutorial Connection Map
-
-- Related tutorials are listed in this tutorial index.
-
-### Advanced Practice Exercises
-
-1. Build a minimal end-to-end implementation for `Chapter 6: Event Handling`.
-2. Add instrumentation and measure baseline latency and error rate.
-3. Introduce one controlled failure and confirm graceful recovery.
-4. Add policy constraints and verify they are enforced consistently.
-5. Run a staged rollout and document rollback decision criteria.
-
-### Review Questions
-
-1. Which execution boundary matters most for this chapter and why?
-2. What signal detects regressions earliest in your environment?
-3. What tradeoff did you make between delivery speed and governance?
-4. How would you recover from the highest-impact failure mode?
-5. What must be automated before scaling to team-wide adoption?
-
-### Scenario Playbook 1: Chapter 6: Event Handling
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: incoming request volume spikes after release
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: introduce adaptive concurrency limits and queue bounds
-- verification target: latency p95 and p99 stay within defined SLO windows
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 2: Chapter 6: Event Handling
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: tool dependency latency increases under concurrency
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: enable staged retries with jitter and circuit breaker fallback
-- verification target: error budget burn rate remains below escalation threshold
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 3: Chapter 6: Event Handling
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: schema updates introduce incompatible payloads
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: pin schema versions and add compatibility shims
-- verification target: throughput remains stable under target concurrency
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 4: Chapter 6: Event Handling
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: environment parity drifts between staging and production
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: restore environment parity via immutable config promotion
-- verification target: retry volume stays bounded without feedback loops
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 5: Chapter 6: Event Handling
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: access policy changes reduce successful execution rates
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: re-scope credentials and rotate leaked or stale keys
-- verification target: data integrity checks pass across write/read cycles
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 6: Chapter 6: Event Handling
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: background jobs accumulate and exceed processing windows
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: activate degradation mode to preserve core user paths
-- verification target: audit logs capture all control-plane mutations
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 7: Chapter 6: Event Handling
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: incoming request volume spikes after release
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: introduce adaptive concurrency limits and queue bounds
-- verification target: latency p95 and p99 stay within defined SLO windows
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 8: Chapter 6: Event Handling
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: tool dependency latency increases under concurrency
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: enable staged retries with jitter and circuit breaker fallback
-- verification target: error budget burn rate remains below escalation threshold
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 9: Chapter 6: Event Handling
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: schema updates introduce incompatible payloads
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: pin schema versions and add compatibility shims
-- verification target: throughput remains stable under target concurrency
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 10: Chapter 6: Event Handling
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: environment parity drifts between staging and production
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: restore environment parity via immutable config promotion
-- verification target: retry volume stays bounded without feedback loops
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 11: Chapter 6: Event Handling
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: access policy changes reduce successful execution rates
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: re-scope credentials and rotate leaked or stale keys
-- verification target: data integrity checks pass across write/read cycles
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 12: Chapter 6: Event Handling
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: background jobs accumulate and exceed processing windows
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: activate degradation mode to preserve core user paths
-- verification target: audit logs capture all control-plane mutations
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 13: Chapter 6: Event Handling
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: incoming request volume spikes after release
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: introduce adaptive concurrency limits and queue bounds
-- verification target: latency p95 and p99 stay within defined SLO windows
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 14: Chapter 6: Event Handling
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: tool dependency latency increases under concurrency
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: enable staged retries with jitter and circuit breaker fallback
-- verification target: error budget burn rate remains below escalation threshold
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 15: Chapter 6: Event Handling
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: schema updates introduce incompatible payloads
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: pin schema versions and add compatibility shims
-- verification target: throughput remains stable under target concurrency
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 16: Chapter 6: Event Handling
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: environment parity drifts between staging and production
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: restore environment parity via immutable config promotion
-- verification target: retry volume stays bounded without feedback loops
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 17: Chapter 6: Event Handling
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: access policy changes reduce successful execution rates
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: re-scope credentials and rotate leaked or stale keys
-- verification target: data integrity checks pass across write/read cycles
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 18: Chapter 6: Event Handling
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: background jobs accumulate and exceed processing windows
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: activate degradation mode to preserve core user paths
-- verification target: audit logs capture all control-plane mutations
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 19: Chapter 6: Event Handling
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: incoming request volume spikes after release
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: introduce adaptive concurrency limits and queue bounds
-- verification target: latency p95 and p99 stay within defined SLO windows
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 20: Chapter 6: Event Handling
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: tool dependency latency increases under concurrency
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: enable staged retries with jitter and circuit breaker fallback
-- verification target: error budget burn rate remains below escalation threshold
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 21: Chapter 6: Event Handling
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: schema updates introduce incompatible payloads
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: pin schema versions and add compatibility shims
-- verification target: throughput remains stable under target concurrency
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 22: Chapter 6: Event Handling
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: environment parity drifts between staging and production
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: restore environment parity via immutable config promotion
-- verification target: retry volume stays bounded without feedback loops
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 23: Chapter 6: Event Handling
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: access policy changes reduce successful execution rates
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: re-scope credentials and rotate leaked or stale keys
-- verification target: data integrity checks pass across write/read cycles
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 24: Chapter 6: Event Handling
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: background jobs accumulate and exceed processing windows
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: activate degradation mode to preserve core user paths
-- verification target: audit logs capture all control-plane mutations
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 25: Chapter 6: Event Handling
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: incoming request volume spikes after release
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: introduce adaptive concurrency limits and queue bounds
-- verification target: latency p95 and p99 stay within defined SLO windows
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 26: Chapter 6: Event Handling
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: tool dependency latency increases under concurrency
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: enable staged retries with jitter and circuit breaker fallback
-- verification target: error budget burn rate remains below escalation threshold
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 27: Chapter 6: Event Handling
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: schema updates introduce incompatible payloads
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: pin schema versions and add compatibility shims
-- verification target: throughput remains stable under target concurrency
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 28: Chapter 6: Event Handling
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: environment parity drifts between staging and production
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: restore environment parity via immutable config promotion
-- verification target: retry volume stays bounded without feedback loops
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 29: Chapter 6: Event Handling
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: access policy changes reduce successful execution rates
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: re-scope credentials and rotate leaked or stale keys
-- verification target: data integrity checks pass across write/read cycles
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 30: Chapter 6: Event Handling
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: background jobs accumulate and exceed processing windows
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: activate degradation mode to preserve core user paths
-- verification target: audit logs capture all control-plane mutations
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 31: Chapter 6: Event Handling
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: incoming request volume spikes after release
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: introduce adaptive concurrency limits and queue bounds
-- verification target: latency p95 and p99 stay within defined SLO windows
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 32: Chapter 6: Event Handling
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: tool dependency latency increases under concurrency
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: enable staged retries with jitter and circuit breaker fallback
-- verification target: error budget burn rate remains below escalation threshold
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 33: Chapter 6: Event Handling
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: schema updates introduce incompatible payloads
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: pin schema versions and add compatibility shims
-- verification target: throughput remains stable under target concurrency
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 34: Chapter 6: Event Handling
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: environment parity drifts between staging and production
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: restore environment parity via immutable config promotion
-- verification target: retry volume stays bounded without feedback loops
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
+### `src/cljs/athens/events.cljs` and `src/cljs/athens/db.cljs`
+
+Event handling connects user interactions to database mutations. [`src/cljs/athens/events.cljs`](https://github.com/athensresearch/athens/blob/HEAD/src/cljs/athens/events.cljs) registers Re-frame event handlers, while [`src/cljs/athens/db.cljs`](https://github.com/athensresearch/athens/blob/HEAD/src/cljs/athens/db.cljs) defines the Datascript schema and transaction helpers those handlers call. Tracing a block edit event through both files shows the complete event handling lifecycle.
\ No newline at end of file
diff --git a/tutorials/athens-research-tutorial/07-block-editor.md b/tutorials/athens-research-tutorial/07-block-editor.md
index 744612d9..6110ce01 100644
--- a/tutorials/athens-research-tutorial/07-block-editor.md
+++ b/tutorials/athens-research-tutorial/07-block-editor.md
@@ -6,6 +6,7 @@ has_children: false
parent: "Athens Research Knowledge Graph"
---
+
# Chapter 7: Block Editor
Welcome to **Chapter 7: Block Editor**. In this part of **Athens Research: Deep Dive Tutorial**, you will build an intuitive mental model first, then move into concrete implementation details and practical production tradeoffs.
@@ -94,490 +95,8 @@ Suggested trace strategy:
## Depth Expansion Playbook
-
-
-This chapter is expanded to v1-style depth for production-grade learning and implementation quality.
-
-### Strategic Context
-
-- tutorial: **Athens Research: Deep Dive Tutorial**
-- tutorial slug: **athens-research-tutorial**
-- chapter focus: **Chapter 7: Block Editor**
-- system context: **Athens Research Knowledge Graph**
-- objective: move from surface-level usage to repeatable engineering operation
-
-### Architecture Decomposition
-
-1. Define the runtime boundary for `Chapter 7: Block Editor`.
-2. Separate control-plane decisions from data-plane execution.
-3. Capture input contracts, transformation points, and output contracts.
-4. Trace state transitions across request lifecycle stages.
-5. Identify extension hooks and policy interception points.
-6. Map ownership boundaries for team and automation workflows.
-7. Specify rollback and recovery paths for unsafe changes.
-8. Track observability signals for correctness, latency, and cost.
-
-### Operator Decision Matrix
-
-| Decision Area | Low-Risk Path | High-Control Path | Tradeoff |
-|:--------------|:--------------|:------------------|:---------|
-| Runtime mode | managed defaults | explicit policy config | speed vs control |
-| State handling | local ephemeral | durable persisted state | simplicity vs auditability |
-| Tool integration | direct API use | mediated adapter layer | velocity vs governance |
-| Rollout method | manual change | staged + canary rollout | effort vs safety |
-| Incident response | best effort logs | runbooks + SLO alerts | cost vs reliability |
-
-### Failure Modes and Countermeasures
-
-| Failure Mode | Early Signal | Root Cause Pattern | Countermeasure |
-|:-------------|:-------------|:-------------------|:---------------|
-| stale context | inconsistent outputs | missing refresh window | enforce context TTL and refresh hooks |
-| policy drift | unexpected execution | ad hoc overrides | centralize policy profiles |
-| auth mismatch | 401/403 bursts | credential sprawl | rotation schedule + scope minimization |
-| schema breakage | parser/validation errors | unmanaged upstream changes | contract tests per release |
-| retry storms | queue congestion | no backoff controls | jittered backoff + circuit breakers |
-| silent regressions | quality drop without alerts | weak baseline metrics | eval harness with thresholds |
-
-### Implementation Runbook
-
-1. Establish a reproducible baseline environment.
-2. Capture chapter-specific success criteria before changes.
-3. Implement minimal viable path with explicit interfaces.
-4. Add observability before expanding feature scope.
-5. Run deterministic tests for happy-path behavior.
-6. Inject failure scenarios for negative-path validation.
-7. Compare output quality against baseline snapshots.
-8. Promote through staged environments with rollback gates.
-9. Record operational lessons in release notes.
-
-### Quality Gate Checklist
-
-- [ ] chapter-level assumptions are explicit and testable
-- [ ] API/tool boundaries are documented with input/output examples
-- [ ] failure handling includes retry, timeout, and fallback policy
-- [ ] security controls include auth scopes and secret rotation plans
-- [ ] observability includes logs, metrics, traces, and alert thresholds
-- [ ] deployment guidance includes canary and rollback paths
-- [ ] docs include links to upstream sources and related tracks
-- [ ] post-release verification confirms expected behavior under load
-
-### Source Alignment
+## Source Code Walkthrough
-- [Athens Research](https://github.com/athensresearch/athens)
-- [AI Codebase Knowledge Builder](https://github.com/The-Pocket/Tutorial-Codebase-Knowledge)
-
-### Cross-Tutorial Connection Map
-
-- Related tutorials are listed in this tutorial index.
-
-### Advanced Practice Exercises
-
-1. Build a minimal end-to-end implementation for `Chapter 7: Block Editor`.
-2. Add instrumentation and measure baseline latency and error rate.
-3. Introduce one controlled failure and confirm graceful recovery.
-4. Add policy constraints and verify they are enforced consistently.
-5. Run a staged rollout and document rollback decision criteria.
-
-### Review Questions
-
-1. Which execution boundary matters most for this chapter and why?
-2. What signal detects regressions earliest in your environment?
-3. What tradeoff did you make between delivery speed and governance?
-4. How would you recover from the highest-impact failure mode?
-5. What must be automated before scaling to team-wide adoption?
-
-### Scenario Playbook 1: Chapter 7: Block Editor
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: incoming request volume spikes after release
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: introduce adaptive concurrency limits and queue bounds
-- verification target: latency p95 and p99 stay within defined SLO windows
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 2: Chapter 7: Block Editor
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: tool dependency latency increases under concurrency
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: enable staged retries with jitter and circuit breaker fallback
-- verification target: error budget burn rate remains below escalation threshold
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 3: Chapter 7: Block Editor
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: schema updates introduce incompatible payloads
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: pin schema versions and add compatibility shims
-- verification target: throughput remains stable under target concurrency
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 4: Chapter 7: Block Editor
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: environment parity drifts between staging and production
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: restore environment parity via immutable config promotion
-- verification target: retry volume stays bounded without feedback loops
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 5: Chapter 7: Block Editor
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: access policy changes reduce successful execution rates
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: re-scope credentials and rotate leaked or stale keys
-- verification target: data integrity checks pass across write/read cycles
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 6: Chapter 7: Block Editor
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: background jobs accumulate and exceed processing windows
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: activate degradation mode to preserve core user paths
-- verification target: audit logs capture all control-plane mutations
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 7: Chapter 7: Block Editor
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: incoming request volume spikes after release
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: introduce adaptive concurrency limits and queue bounds
-- verification target: latency p95 and p99 stay within defined SLO windows
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 8: Chapter 7: Block Editor
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: tool dependency latency increases under concurrency
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: enable staged retries with jitter and circuit breaker fallback
-- verification target: error budget burn rate remains below escalation threshold
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 9: Chapter 7: Block Editor
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: schema updates introduce incompatible payloads
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: pin schema versions and add compatibility shims
-- verification target: throughput remains stable under target concurrency
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 10: Chapter 7: Block Editor
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: environment parity drifts between staging and production
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: restore environment parity via immutable config promotion
-- verification target: retry volume stays bounded without feedback loops
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 11: Chapter 7: Block Editor
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: access policy changes reduce successful execution rates
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: re-scope credentials and rotate leaked or stale keys
-- verification target: data integrity checks pass across write/read cycles
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 12: Chapter 7: Block Editor
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: background jobs accumulate and exceed processing windows
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: activate degradation mode to preserve core user paths
-- verification target: audit logs capture all control-plane mutations
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 13: Chapter 7: Block Editor
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: incoming request volume spikes after release
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: introduce adaptive concurrency limits and queue bounds
-- verification target: latency p95 and p99 stay within defined SLO windows
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 14: Chapter 7: Block Editor
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: tool dependency latency increases under concurrency
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: enable staged retries with jitter and circuit breaker fallback
-- verification target: error budget burn rate remains below escalation threshold
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 15: Chapter 7: Block Editor
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: schema updates introduce incompatible payloads
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: pin schema versions and add compatibility shims
-- verification target: throughput remains stable under target concurrency
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 16: Chapter 7: Block Editor
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: environment parity drifts between staging and production
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: restore environment parity via immutable config promotion
-- verification target: retry volume stays bounded without feedback loops
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 17: Chapter 7: Block Editor
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: access policy changes reduce successful execution rates
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: re-scope credentials and rotate leaked or stale keys
-- verification target: data integrity checks pass across write/read cycles
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 18: Chapter 7: Block Editor
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: background jobs accumulate and exceed processing windows
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: activate degradation mode to preserve core user paths
-- verification target: audit logs capture all control-plane mutations
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 19: Chapter 7: Block Editor
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: incoming request volume spikes after release
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: introduce adaptive concurrency limits and queue bounds
-- verification target: latency p95 and p99 stay within defined SLO windows
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 20: Chapter 7: Block Editor
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: tool dependency latency increases under concurrency
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: enable staged retries with jitter and circuit breaker fallback
-- verification target: error budget burn rate remains below escalation threshold
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 21: Chapter 7: Block Editor
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: schema updates introduce incompatible payloads
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: pin schema versions and add compatibility shims
-- verification target: throughput remains stable under target concurrency
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 22: Chapter 7: Block Editor
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: environment parity drifts between staging and production
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: restore environment parity via immutable config promotion
-- verification target: retry volume stays bounded without feedback loops
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 23: Chapter 7: Block Editor
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: access policy changes reduce successful execution rates
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: re-scope credentials and rotate leaked or stale keys
-- verification target: data integrity checks pass across write/read cycles
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 24: Chapter 7: Block Editor
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: background jobs accumulate and exceed processing windows
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: activate degradation mode to preserve core user paths
-- verification target: audit logs capture all control-plane mutations
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 25: Chapter 7: Block Editor
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: incoming request volume spikes after release
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: introduce adaptive concurrency limits and queue bounds
-- verification target: latency p95 and p99 stay within defined SLO windows
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 26: Chapter 7: Block Editor
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: tool dependency latency increases under concurrency
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: enable staged retries with jitter and circuit breaker fallback
-- verification target: error budget burn rate remains below escalation threshold
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 27: Chapter 7: Block Editor
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: schema updates introduce incompatible payloads
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: pin schema versions and add compatibility shims
-- verification target: throughput remains stable under target concurrency
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 28: Chapter 7: Block Editor
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: environment parity drifts between staging and production
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: restore environment parity via immutable config promotion
-- verification target: retry volume stays bounded without feedback loops
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 29: Chapter 7: Block Editor
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: access policy changes reduce successful execution rates
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: re-scope credentials and rotate leaked or stale keys
-- verification target: data integrity checks pass across write/read cycles
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 30: Chapter 7: Block Editor
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: background jobs accumulate and exceed processing windows
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: activate degradation mode to preserve core user paths
-- verification target: audit logs capture all control-plane mutations
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 31: Chapter 7: Block Editor
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: incoming request volume spikes after release
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: introduce adaptive concurrency limits and queue bounds
-- verification target: latency p95 and p99 stay within defined SLO windows
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 32: Chapter 7: Block Editor
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: tool dependency latency increases under concurrency
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: enable staged retries with jitter and circuit breaker fallback
-- verification target: error budget burn rate remains below escalation threshold
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 33: Chapter 7: Block Editor
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: schema updates introduce incompatible payloads
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: pin schema versions and add compatibility shims
-- verification target: throughput remains stable under target concurrency
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
+### `src/cljs/athens/views/blocks/editor.cljs`
+
+The block editor implementation is in [`src/cljs/athens/views/blocks/editor.cljs`](https://github.com/athensresearch/athens/blob/HEAD/src/cljs/athens/views/blocks/editor.cljs). This file handles keystroke events, cursor management, block splitting/merging, and indentation — the core outliner behaviors described in Chapter 7. The keyboard shortcut dispatch logic shows how editing commands map to Re-frame events that modify the Datascript graph.
\ No newline at end of file
diff --git a/tutorials/athens-research-tutorial/08-rich-text.md b/tutorials/athens-research-tutorial/08-rich-text.md
index 54041fdf..b66fa6b3 100644
--- a/tutorials/athens-research-tutorial/08-rich-text.md
+++ b/tutorials/athens-research-tutorial/08-rich-text.md
@@ -6,6 +6,7 @@ has_children: false
parent: "Athens Research Knowledge Graph"
---
+
# Chapter 8: Rich Text
Welcome to **Chapter 8: Rich Text**. In this part of **Athens Research: Deep Dive Tutorial**, you will build an intuitive mental model first, then move into concrete implementation details and practical production tradeoffs.
@@ -87,502 +88,8 @@ Suggested trace strategy:
## Depth Expansion Playbook
-
-
-This chapter is expanded to v1-style depth for production-grade learning and implementation quality.
-
-### Strategic Context
-
-- tutorial: **Athens Research: Deep Dive Tutorial**
-- tutorial slug: **athens-research-tutorial**
-- chapter focus: **Chapter 8: Rich Text**
-- system context: **Athens Research Knowledge Graph**
-- objective: move from surface-level usage to repeatable engineering operation
-
-### Architecture Decomposition
-
-1. Define the runtime boundary for `Chapter 8: Rich Text`.
-2. Separate control-plane decisions from data-plane execution.
-3. Capture input contracts, transformation points, and output contracts.
-4. Trace state transitions across request lifecycle stages.
-5. Identify extension hooks and policy interception points.
-6. Map ownership boundaries for team and automation workflows.
-7. Specify rollback and recovery paths for unsafe changes.
-8. Track observability signals for correctness, latency, and cost.
-
-### Operator Decision Matrix
-
-| Decision Area | Low-Risk Path | High-Control Path | Tradeoff |
-|:--------------|:--------------|:------------------|:---------|
-| Runtime mode | managed defaults | explicit policy config | speed vs control |
-| State handling | local ephemeral | durable persisted state | simplicity vs auditability |
-| Tool integration | direct API use | mediated adapter layer | velocity vs governance |
-| Rollout method | manual change | staged + canary rollout | effort vs safety |
-| Incident response | best effort logs | runbooks + SLO alerts | cost vs reliability |
-
-### Failure Modes and Countermeasures
-
-| Failure Mode | Early Signal | Root Cause Pattern | Countermeasure |
-|:-------------|:-------------|:-------------------|:---------------|
-| stale context | inconsistent outputs | missing refresh window | enforce context TTL and refresh hooks |
-| policy drift | unexpected execution | ad hoc overrides | centralize policy profiles |
-| auth mismatch | 401/403 bursts | credential sprawl | rotation schedule + scope minimization |
-| schema breakage | parser/validation errors | unmanaged upstream changes | contract tests per release |
-| retry storms | queue congestion | no backoff controls | jittered backoff + circuit breakers |
-| silent regressions | quality drop without alerts | weak baseline metrics | eval harness with thresholds |
-
-### Implementation Runbook
-
-1. Establish a reproducible baseline environment.
-2. Capture chapter-specific success criteria before changes.
-3. Implement minimal viable path with explicit interfaces.
-4. Add observability before expanding feature scope.
-5. Run deterministic tests for happy-path behavior.
-6. Inject failure scenarios for negative-path validation.
-7. Compare output quality against baseline snapshots.
-8. Promote through staged environments with rollback gates.
-9. Record operational lessons in release notes.
-
-### Quality Gate Checklist
-
-- [ ] chapter-level assumptions are explicit and testable
-- [ ] API/tool boundaries are documented with input/output examples
-- [ ] failure handling includes retry, timeout, and fallback policy
-- [ ] security controls include auth scopes and secret rotation plans
-- [ ] observability includes logs, metrics, traces, and alert thresholds
-- [ ] deployment guidance includes canary and rollback paths
-- [ ] docs include links to upstream sources and related tracks
-- [ ] post-release verification confirms expected behavior under load
-
-### Source Alignment
+## Source Code Walkthrough
-- [Athens Research](https://github.com/athensresearch/athens)
-- [AI Codebase Knowledge Builder](https://github.com/The-Pocket/Tutorial-Codebase-Knowledge)
-
-### Cross-Tutorial Connection Map
-
-- Related tutorials are listed in this tutorial index.
-
-### Advanced Practice Exercises
-
-1. Build a minimal end-to-end implementation for `Chapter 8: Rich Text`.
-2. Add instrumentation and measure baseline latency and error rate.
-3. Introduce one controlled failure and confirm graceful recovery.
-4. Add policy constraints and verify they are enforced consistently.
-5. Run a staged rollout and document rollback decision criteria.
-
-### Review Questions
-
-1. Which execution boundary matters most for this chapter and why?
-2. What signal detects regressions earliest in your environment?
-3. What tradeoff did you make between delivery speed and governance?
-4. How would you recover from the highest-impact failure mode?
-5. What must be automated before scaling to team-wide adoption?
-
-### Scenario Playbook 1: Chapter 8: Rich Text
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: incoming request volume spikes after release
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: introduce adaptive concurrency limits and queue bounds
-- verification target: latency p95 and p99 stay within defined SLO windows
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 2: Chapter 8: Rich Text
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: tool dependency latency increases under concurrency
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: enable staged retries with jitter and circuit breaker fallback
-- verification target: error budget burn rate remains below escalation threshold
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 3: Chapter 8: Rich Text
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: schema updates introduce incompatible payloads
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: pin schema versions and add compatibility shims
-- verification target: throughput remains stable under target concurrency
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 4: Chapter 8: Rich Text
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: environment parity drifts between staging and production
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: restore environment parity via immutable config promotion
-- verification target: retry volume stays bounded without feedback loops
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 5: Chapter 8: Rich Text
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: access policy changes reduce successful execution rates
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: re-scope credentials and rotate leaked or stale keys
-- verification target: data integrity checks pass across write/read cycles
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 6: Chapter 8: Rich Text
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: background jobs accumulate and exceed processing windows
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: activate degradation mode to preserve core user paths
-- verification target: audit logs capture all control-plane mutations
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 7: Chapter 8: Rich Text
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: incoming request volume spikes after release
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: introduce adaptive concurrency limits and queue bounds
-- verification target: latency p95 and p99 stay within defined SLO windows
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 8: Chapter 8: Rich Text
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: tool dependency latency increases under concurrency
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: enable staged retries with jitter and circuit breaker fallback
-- verification target: error budget burn rate remains below escalation threshold
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 9: Chapter 8: Rich Text
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: schema updates introduce incompatible payloads
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: pin schema versions and add compatibility shims
-- verification target: throughput remains stable under target concurrency
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 10: Chapter 8: Rich Text
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: environment parity drifts between staging and production
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: restore environment parity via immutable config promotion
-- verification target: retry volume stays bounded without feedback loops
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 11: Chapter 8: Rich Text
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: access policy changes reduce successful execution rates
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: re-scope credentials and rotate leaked or stale keys
-- verification target: data integrity checks pass across write/read cycles
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 12: Chapter 8: Rich Text
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: background jobs accumulate and exceed processing windows
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: activate degradation mode to preserve core user paths
-- verification target: audit logs capture all control-plane mutations
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 13: Chapter 8: Rich Text
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: incoming request volume spikes after release
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: introduce adaptive concurrency limits and queue bounds
-- verification target: latency p95 and p99 stay within defined SLO windows
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 14: Chapter 8: Rich Text
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: tool dependency latency increases under concurrency
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: enable staged retries with jitter and circuit breaker fallback
-- verification target: error budget burn rate remains below escalation threshold
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 15: Chapter 8: Rich Text
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: schema updates introduce incompatible payloads
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: pin schema versions and add compatibility shims
-- verification target: throughput remains stable under target concurrency
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 16: Chapter 8: Rich Text
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: environment parity drifts between staging and production
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: restore environment parity via immutable config promotion
-- verification target: retry volume stays bounded without feedback loops
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 17: Chapter 8: Rich Text
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: access policy changes reduce successful execution rates
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: re-scope credentials and rotate leaked or stale keys
-- verification target: data integrity checks pass across write/read cycles
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 18: Chapter 8: Rich Text
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: background jobs accumulate and exceed processing windows
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: activate degradation mode to preserve core user paths
-- verification target: audit logs capture all control-plane mutations
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 19: Chapter 8: Rich Text
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: incoming request volume spikes after release
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: introduce adaptive concurrency limits and queue bounds
-- verification target: latency p95 and p99 stay within defined SLO windows
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 20: Chapter 8: Rich Text
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: tool dependency latency increases under concurrency
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: enable staged retries with jitter and circuit breaker fallback
-- verification target: error budget burn rate remains below escalation threshold
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 21: Chapter 8: Rich Text
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: schema updates introduce incompatible payloads
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: pin schema versions and add compatibility shims
-- verification target: throughput remains stable under target concurrency
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 22: Chapter 8: Rich Text
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: environment parity drifts between staging and production
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: restore environment parity via immutable config promotion
-- verification target: retry volume stays bounded without feedback loops
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 23: Chapter 8: Rich Text
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: access policy changes reduce successful execution rates
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: re-scope credentials and rotate leaked or stale keys
-- verification target: data integrity checks pass across write/read cycles
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 24: Chapter 8: Rich Text
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: background jobs accumulate and exceed processing windows
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: activate degradation mode to preserve core user paths
-- verification target: audit logs capture all control-plane mutations
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 25: Chapter 8: Rich Text
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: incoming request volume spikes after release
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: introduce adaptive concurrency limits and queue bounds
-- verification target: latency p95 and p99 stay within defined SLO windows
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 26: Chapter 8: Rich Text
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: tool dependency latency increases under concurrency
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: enable staged retries with jitter and circuit breaker fallback
-- verification target: error budget burn rate remains below escalation threshold
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 27: Chapter 8: Rich Text
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: schema updates introduce incompatible payloads
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: pin schema versions and add compatibility shims
-- verification target: throughput remains stable under target concurrency
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 28: Chapter 8: Rich Text
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: environment parity drifts between staging and production
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: restore environment parity via immutable config promotion
-- verification target: retry volume stays bounded without feedback loops
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 29: Chapter 8: Rich Text
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: access policy changes reduce successful execution rates
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: re-scope credentials and rotate leaked or stale keys
-- verification target: data integrity checks pass across write/read cycles
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 30: Chapter 8: Rich Text
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: background jobs accumulate and exceed processing windows
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: activate degradation mode to preserve core user paths
-- verification target: audit logs capture all control-plane mutations
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 31: Chapter 8: Rich Text
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: incoming request volume spikes after release
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: introduce adaptive concurrency limits and queue bounds
-- verification target: latency p95 and p99 stay within defined SLO windows
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 32: Chapter 8: Rich Text
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: tool dependency latency increases under concurrency
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: enable staged retries with jitter and circuit breaker fallback
-- verification target: error budget burn rate remains below escalation threshold
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 33: Chapter 8: Rich Text
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: schema updates introduce incompatible payloads
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: pin schema versions and add compatibility shims
-- verification target: throughput remains stable under target concurrency
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
-
-### Scenario Playbook 34: Chapter 8: Rich Text
-
-- tutorial context: **Athens Research: Deep Dive Tutorial**
-- trigger condition: environment parity drifts between staging and production
-- initial hypothesis: identify the smallest reproducible failure boundary
-- immediate action: protect user-facing stability before optimization work
-- engineering control: restore environment parity via immutable config promotion
-- verification target: retry volume stays bounded without feedback loops
-- rollback trigger: pre-defined quality gate fails for two consecutive checks
-- communication step: publish incident status with owner and ETA
-- learning capture: add postmortem and convert findings into automated tests
+### `src/cljs/athens/views/blocks/textarea_keydown.cljs`
+
+Rich text rendering and inline markup parsing are handled through the block editing pipeline. [`src/cljs/athens/views/blocks/textarea_keydown.cljs`](https://github.com/athensresearch/athens/blob/HEAD/src/cljs/athens/views/blocks/textarea_keydown.cljs) processes the raw text input and applies Athens's markup conventions — `[[links]]`, `**bold**`, `{{components}}` — turning plain text into the rich nodes rendered by the component system. This is the tokenization layer Chapter 8 describes.
\ No newline at end of file
diff --git a/tutorials/athens-research-tutorial/README.md b/tutorials/athens-research-tutorial/README.md
index 560449ab..1034c112 100644
--- a/tutorials/athens-research-tutorial/README.md
+++ b/tutorials/athens-research-tutorial/README.md
@@ -8,6 +8,8 @@ format_version: v2
# Athens Research: Deep Dive Tutorial
+> **Project Status**: The Athens Research repository was **archived in August 2022** and is no longer actively maintained. This tutorial covers the final v2.0.0 release as a historical reference for ClojureScript/Datascript architectural patterns. Do not use Athens as the basis for new production projects.
+
> **Project**: [Athens Research](https://github.com/athensresearch/athens) — An open-source, Roam-like knowledge management system built with ClojureScript and graph databases.
[](https://github.com/athensresearch/athens)
diff --git a/tutorials/autoagent-tutorial/01-getting-started.md b/tutorials/autoagent-tutorial/01-getting-started.md
index 9c20cc5a..407038d6 100644
--- a/tutorials/autoagent-tutorial/01-getting-started.md
+++ b/tutorials/autoagent-tutorial/01-getting-started.md
@@ -3,215 +3,355 @@ layout: default
title: "Chapter 1: Getting Started"
nav_order: 1
parent: AutoAgent Tutorial
+format_version: v2
+why: "AutoAgent collapses the gap between describing an agent in English and running it in production. Understanding the three operating modes and how to configure your environment from day one prevents wasted debugging time and unlocks the framework's full power."
+mental_model: "Think of AutoAgent as a meta-developer: you describe what you want, and it writes the agent code, tests it in a Docker sandbox, registers it, and hands you the running agent — no orchestration boilerplate required."
+learning_outcomes:
+ - Install AutoAgent and configure API keys for at least one LLM provider
+ - Understand when to use User Mode vs Agent Editor vs Workflow Editor
+ - Run a first deep-research task with the `auto main` CLI
+ - Understand the MetaChain vs AutoAgent naming relationship
+snapshot:
+ source_repo: https://github.com/HKUDS/AutoAgent
+ stars: 9116
+ language: Python
+ license: MIT
+chapter_map:
+ - autoagent/cli.py
+ - autoagent/constant.py
+ - autoagent/core.py
+sources:
+ - https://github.com/HKUDS/AutoAgent
+ - https://arxiv.org/abs/2502.05957
---
-
# Chapter 1: Getting Started
-Welcome to **Chapter 1: Getting Started**. In this part of **AutoAgent Tutorial: Zero-Code Agent Creation and Automated Workflow Orchestration**, you will build an intuitive mental model first, then move into concrete implementation details and practical production tradeoffs.
+## What Problem Does This Solve?
+Building multi-agent systems today requires deep framework knowledge: defining agent schemas, wiring tool registries, managing handoffs between agents, handling retries, isolating code execution, and instrumenting everything for debugging. A single research assistant agent can take days to build properly.
-This chapter gets AutoAgent installed and running in its core CLI flow.
+AutoAgent (HKUDS, arxiv:2502.05957) solves this by treating agent creation as a **natural language task**. You describe your agent in plain English, and the framework generates the Python code, tool definitions, tests them in Docker, and registers them — all without you writing a line of orchestration code.
-## Learning Goals
+The framework ships with three operating modes that cover the most common use cases:
-- install AutoAgent from source
-- configure basic `.env` API credentials
-- run first `auto main` flow
-- verify baseline interactive functionality
+1. **User Mode (Deep Research)** — a general-purpose research assistant that browses the web, reads documents, and writes code
+2. **Agent Editor** — creates new custom agents from natural language descriptions
+3. **Workflow Editor** — composes async parallel pipelines for batch or recurring tasks
-## Source References
+### The MetaChain / AutoAgent Naming Situation
-- [AutoAgent README Quick Start](https://github.com/HKUDS/AutoAgent/blob/main/README.md)
-- [Installation Docs](https://autoagent-ai.github.io/docs/get-started-installation)
-- [Quickstart Docs](https://autoagent-ai.github.io/docs/get-started-quickstart)
+You will encounter this confusion immediately when reading the source code. The project was publicly renamed from **MetaChain** to **AutoAgent** in February 2025. The GitHub repository, README, and pip package are all called `autoagent`. However, the internal Python class, imports, and Docker image still use the original name:
-## Summary
+```python
+# This is correct — the class is still MetaChain internally
+from autoagent import MetaChain
-You now have a working AutoAgent baseline.
+chain = MetaChain(model="gpt-4o")
+```
-Next: [Chapter 2: Architecture and Interaction Modes](02-architecture-and-interaction-modes.md)
+This tutorial uses "AutoAgent" for the product and "MetaChain" for the specific Python class.
-## Depth Expansion Playbook
+---
-## Source Code Walkthrough
+## Installation
-### `constant.py`
+### Prerequisites
-The `str_to_bool` function in [`constant.py`](https://github.com/HKUDS/AutoAgent/blob/HEAD/constant.py) handles a key part of this chapter's functionality:
+| Requirement | Version | Notes |
+|-------------|---------|-------|
+| Python | 3.10+ | Required for `match` statement patterns |
+| Docker | Latest | Required for code execution sandbox |
+| Git | Any | For cloning the repo |
+| GITHUB_AI_TOKEN | — | Required only for Agent Editor mode |
-```py
-# utils:
-load_dotenv() # 加载.env文件
-def str_to_bool(value):
- """convert string to bool"""
- true_values = {'true', 'yes', '1', 'on', 't', 'y'}
- false_values = {'false', 'no', '0', 'off', 'f', 'n'}
-
- if isinstance(value, bool):
- return value
-
- if value == None:
- return None
-
- value = str(value).lower().strip()
- if value in true_values:
- return True
- if value in false_values:
- return False
- return True # default return True
+### Step 1: Clone and Install
+```bash
+git clone https://github.com/HKUDS/AutoAgent
+cd AutoAgent
+pip install -e .
+```
-DOCKER_WORKPLACE_NAME = os.getenv('DOCKER_WORKPLACE_NAME', 'workplace')
-GITHUB_AI_TOKEN = os.getenv('GITHUB_AI_TOKEN', None)
-AI_USER = os.getenv('AI_USER', "tjb-tech")
-LOCAL_ROOT = os.getenv('LOCAL_ROOT', os.getcwd())
+The `-e` flag installs in editable mode, which is important for local development and for the self-modification workflows in Agent Editor mode (the framework clones its own repo into Docker for meta-programming).
-DEBUG = str_to_bool(os.getenv('DEBUG', False))
+### Step 2: Verify the CLI
-DEFAULT_LOG = str_to_bool(os.getenv('DEFAULT_LOG', False))
-LOG_PATH = os.getenv('LOG_PATH', None)
-EVAL_MODE = str_to_bool(os.getenv('EVAL_MODE', False))
-BASE_IMAGES = os.getenv('BASE_IMAGES', None)
+```bash
+auto --help
```
-This function is important because it defines how AutoAgent Tutorial: Zero-Code Agent Creation and Automated Workflow Orchestration implements the patterns covered in this chapter.
+You should see:
-### `constant.py`
+```
+Usage: auto [OPTIONS] COMMAND [ARGS]...
-The `get_architecture` function in [`constant.py`](https://github.com/HKUDS/AutoAgent/blob/HEAD/constant.py) handles a key part of this chapter's functionality:
+Options:
+ --help Show this message and exit.
-```py
-BASE_IMAGES = os.getenv('BASE_IMAGES', None)
+Commands:
+ deep-research Run a deep research task directly
+ main Start the AutoAgent interactive session
+```
-def get_architecture():
- machine = platform.machine().lower()
- if 'x86' in machine or 'amd64' in machine or 'i386' in machine:
- return "tjbtech1/metachain:amd64_latest"
- elif 'arm' in machine:
- return "tjbtech1/metachain:latest"
- else:
- return "tjbtech1/metachain:latest"
-if BASE_IMAGES is None:
- BASE_IMAGES = get_architecture()
+The two primary entry points are `auto main` (interactive session with all three modes) and `auto deep-research` (non-interactive single-shot research).
-COMPLETION_MODEL = os.getenv('COMPLETION_MODEL', "claude-3-5-sonnet-20241022")
-EMBEDDING_MODEL = os.getenv('EMBEDDING_MODEL', "text-embedding-3-small")
+---
-MC_MODE = str_to_bool(os.getenv('MC_MODE', True))
+## Environment Configuration
-# add Env for function call and non-function call
+AutoAgent uses a `.env` file at the project root. Copy the example:
-FN_CALL = str_to_bool(os.getenv('FN_CALL', None))
-API_BASE_URL = os.getenv('API_BASE_URL', None)
-ADD_USER = str_to_bool(os.getenv('ADD_USER', None))
+```bash
+cp .env.example .env
+```
+### Required Variables
+```bash
+# .env
-NOT_SUPPORT_SENDER = ["mistral", "groq"]
-MUST_ADD_USER = ["deepseek-reasoner", "o1-mini", "deepseek-r1"]
+# Choose at least one LLM provider
+OPENAI_API_KEY=sk-...
+ANTHROPIC_API_KEY=sk-ant-...
+DEEPSEEK_API_KEY=...
+GEMINI_API_KEY=...
-NOT_SUPPORT_FN_CALL = ["o1-mini", "deepseek-reasoner", "deepseek-r1", "llama", "grok-2"]
-NOT_USE_FN_CALL = [ "deepseek-chat"] + NOT_SUPPORT_FN_CALL
+# Required for Agent Editor (clones AutoAgent repo into Docker)
+GITHUB_AI_TOKEN=ghp_...
+# Optional: default model override
+AUTOAGENT_MODEL=gpt-4o
+
+# Optional: workspace directory (defaults to ./workspace)
+WORKSPACE_DIR=./workspace
```
-This function is important because it defines how AutoAgent Tutorial: Zero-Code Agent Creation and Automated Workflow Orchestration implements the patterns covered in this chapter.
+### Model Selection
-### `evaluation/utils.py`
+AutoAgent routes all LLM calls through **LiteLLM 1.55.0**, which supports 100+ providers. The model string follows LiteLLM conventions:
-The `make_metadata` function in [`evaluation/utils.py`](https://github.com/HKUDS/AutoAgent/blob/HEAD/evaluation/utils.py) handles a key part of this chapter's functionality:
+```bash
+# OpenAI
+AUTOAGENT_MODEL=gpt-4o
-```py
-import queue # 添加这行导入
+# Anthropic
+AUTOAGENT_MODEL=claude-3-5-sonnet-20241022
-def make_metadata(
- model: str,
- dataset_name: str,
- agent_func: str,
- eval_note: str | None,
- eval_output_dir: str,
- data_split: str | None = None,
- details: dict[str, Any] | None = None,
- port: int | None = None,
- container_name: str | None = None,
- git_clone: bool = False,
- test_pull_name: str | None = None,
-) -> EvalMetadata:
- eval_note = f'_N_{eval_note}' if eval_note else ''
+# DeepSeek (uses XML fallback, not function calling)
+AUTOAGENT_MODEL=deepseek/deepseek-r1
- eval_output_path = os.path.join(
- eval_output_dir,
- dataset_name,
- agent_func.replace('get_', ''),
- f'{model}_maxiter{eval_note}',
- )
+# Local Ollama
+AUTOAGENT_MODEL=ollama/llama3.2
+```
- pathlib.Path(eval_output_path).mkdir(parents=True, exist_ok=True)
- pathlib.Path(os.path.join(eval_output_path, 'logs')).mkdir(
- parents=True, exist_ok=True
- )
+Models that do not support native function calling (DeepSeek-R1, LLaMA, Grok, etc.) fall back to an XML-based tool call syntax handled by `fn_call_converter.py`. Chapter 2 covers this in depth.
- metadata = EvalMetadata(
- agent_func=agent_func,
- model=model,
+---
+
+## Architecture Overview
+
+Before running your first task, it helps to understand the four layers:
+
+```mermaid
+flowchart TD
+ subgraph "Layer 1: Entry Points"
+ CLI["auto main / auto deep-research"]
+ end
+
+ subgraph "Layer 2: MetaChain Engine"
+ MC["MetaChain.run()"]
+ GCC["get_chat_completion()"]
+ HTC["handle_tool_calls()"]
+ end
+
+ subgraph "Layer 3: Environment Triad"
+ DE["DockerEnv\n(TCP :12346)"]
+ BE["BrowserEnv\n(Playwright)"]
+ MB["RequestsMarkdownBrowser"]
+ end
+
+ subgraph "Layer 4: Registry"
+ PT["plugin_tools"]
+ PA["plugin_agents"]
+ WF["workflows"]
+ end
+
+ CLI --> MC
+ MC --> GCC
+ GCC --> HTC
+ HTC --> DE
+ HTC --> BE
+ HTC --> MB
+ MC --> PT
+ MC --> PA
+ MC --> WF
```
-This function is important because it defines how AutoAgent Tutorial: Zero-Code Agent Creation and Automated Workflow Orchestration implements the patterns covered in this chapter.
+**Layer 1 (CLI):** `cli.py` uses Click to expose `auto main` and `auto deep-research`. Both read `constant.py` for defaults.
+
+**Layer 2 (MetaChain Engine):** `core.py` contains the main `MetaChain` class. Its `run()` method loops: call the LLM, dispatch tool calls, check for agent handoff signals, repeat until `case_resolved`.
+
+**Layer 3 (Environment Triad):** Three execution environments that tools can use. `DockerEnv` runs Python code in an isolated container via TCP. `BrowserEnv` drives Playwright for web automation. `RequestsMarkdownBrowser` handles file reading and format conversion.
-### `evaluation/utils.py`
+**Layer 4 (Registry):** A singleton that tracks all registered tools, agents, and workflows. Plugin tools are auto-registered with a 12,000-token output cap.
+
+---
-The `prepare_dataset` function in [`evaluation/utils.py`](https://github.com/HKUDS/AutoAgent/blob/HEAD/evaluation/utils.py) handles a key part of this chapter's functionality:
+## Three Operating Modes in Detail
-```py
- return metadata
+### Mode 1: User Mode (Deep Research)
-def prepare_dataset(
- dataset: pd.DataFrame,
- output_file: str,
- eval_n_limit: int,
- eval_ids: list[str] | None = None,
- skip_num: int | None = None,
-):
- assert (
- 'instance_id' in dataset.columns
- ), "Expected 'instance_id' column in the dataset. You should define your own unique identifier for each instance and use it as the 'instance_id' column."
- logger = LoggerManager.get_logger()
- id_column = 'instance_id'
- logger.info(f'Writing evaluation output to {output_file}')
- finished_ids: set[str] = set()
- if os.path.exists(output_file):
- with open(output_file, 'r') as f:
- for line in f:
- data = json.loads(line)
- finished_ids.add(str(data[id_column]))
- logger.info(
- f'\nOutput file {output_file} already exists. Loaded {len(finished_ids)} finished instances.', title='Warning', color='red'
- )
+This is the default mode when you run `auto main`. It activates the `SystemTriageAgent`, which routes your requests to specialized sub-agents:
- if eval_ids:
- eval_ids_converted = [dataset[id_column].dtype.type(id) for id in eval_ids]
- dataset = dataset[dataset[id_column].isin(eval_ids_converted)]
- logger.info(f'Limiting evaluation to {len(eval_ids)} specific instances.')
- elif skip_num and skip_num >= 0:
- skip_num = min(skip_num, len(dataset))
- dataset = dataset.iloc[skip_num:]
+```mermaid
+flowchart LR
+ U[Your Query] --> ST[SystemTriageAgent]
+ ST -->|web task| WS[WebSurferAgent]
+ ST -->|file task| FS[FileSurferAgent]
+ ST -->|code task| PA[ProgrammingAgent]
+ WS -->|handoff| ST
+ FS -->|handoff| ST
+ PA -->|handoff| ST
+ ST -->|done| CR[case_resolved]
```
-This function is important because it defines how AutoAgent Tutorial: Zero-Code Agent Creation and Automated Workflow Orchestration implements the patterns covered in this chapter.
+Each sub-agent signals completion by calling `case_resolved` or routes to another agent via `transfer_to_X()` functions injected at runtime.
+**Example session:**
-## How These Components Connect
+```
+$ auto main
-```mermaid
-flowchart TD
- A[str_to_bool]
- B[get_architecture]
- C[make_metadata]
- D[prepare_dataset]
- E[run_evaluation]
- A --> B
- B --> C
- C --> D
- D --> E
+AutoAgent> Research the top 5 Python async frameworks and compare their performance benchmarks. Save results to a report.
+
+[SystemTriageAgent routing to WebSurferAgent]
+[WebSurferAgent browsing: asyncio benchmarks 2024...]
+[WebSurferAgent browsing: trio vs asyncio performance...]
+[SystemTriageAgent routing to FileSurferAgent]
+[FileSurferAgent writing report to workspace/async_report.md]
+Done. Report saved to workspace/async_report.md
+```
+
+### Mode 2: Agent Editor
+
+Activated when your message includes intent to create or modify an agent. The framework detects this and routes to `AgentFormerAgent`, which starts a 4-phase pipeline: NL → XML form → tool generation → agent code → registration.
+
+```
+AutoAgent> Create a sales agent that recommends products based on user budget and category preferences
```
+
+Chapter 5 covers this pipeline in full detail.
+
+### Mode 3: Workflow Editor
+
+Activated when your message requests a workflow (batch processing, parallel execution, scheduled runs). Routes to `WorkflowCreatorAgent`, which generates an `EventEngine`-based async pipeline.
+
+```
+AutoAgent> Create a workflow that solves 10 math problems in parallel and picks the majority answer
+```
+
+Chapter 6 covers the EventEngine architecture.
+
+---
+
+## Your First Research Task
+
+With your `.env` configured, start an interactive session:
+
+```bash
+auto main
+```
+
+Try this prompt to verify all three environments are working:
+
+```
+Research what AutoAgent (HKUDS) is, find the GitHub star count, and write a one-paragraph summary to workspace/autoagent_summary.md
+```
+
+This task exercises:
+- `WebSurferAgent` (Playwright browser to fetch GitHub)
+- `FileSurferAgent` (writing the summary file)
+- `SystemTriageAgent` (orchestration between the two)
+
+Expected output flow:
+
+```
+[SystemTriageAgent] Analyzing request...
+[SystemTriageAgent] Routing to WebSurferAgent for GitHub research
+[WebSurferAgent] Navigating to github.com/HKUDS/AutoAgent
+[WebSurferAgent] Extracted: 9,116 stars, Python, MIT license
+[SystemTriageAgent] Routing to FileSurferAgent for writing
+[FileSurferAgent] Writing to workspace/autoagent_summary.md
+[SystemTriageAgent] Task complete
+```
+
+### Non-Interactive Mode
+
+For scripting and CI use cases:
+
+```bash
+auto deep-research "What are the key architectural patterns in AutoAgent? Cite the arxiv paper."
+```
+
+This runs a single research task and exits, printing results to stdout.
+
+---
+
+## @mention Syntax for Direct Routing
+
+You can bypass the triage agent and route directly to a specific agent using `@AgentName` syntax:
+
+```
+AutoAgent> @WebSurferAgent search for the latest LiteLLM release notes
+AutoAgent> @ProgrammingAgent write a Python script to parse CSV files
+AutoAgent> @FileSurferAgent summarize all PDFs in workspace/papers/
+```
+
+This is useful when you know which capability you need and want to skip triage overhead.
+
+---
+
+## Workspace Directory
+
+All file operations default to `./workspace/`. This directory is:
+- Mounted into the Docker container as a shared volume
+- The default read/write location for `FileSurferAgent`
+- Where generated agent code is stored after Agent Editor runs
+
+```bash
+ls workspace/
+# agents/ # Generated agent Python files
+# tools/ # Generated tool Python files
+# workflows/ # Generated workflow files
+# reports/ # Research output files
+```
+
+---
+
+## Common Setup Issues
+
+| Issue | Cause | Fix |
+|-------|-------|-----|
+| `auto: command not found` | Package not installed | Run `pip install -e .` from repo root |
+| `Docker not available` | Docker not running | Start Docker Desktop or Docker daemon |
+| `LiteLLM: No API key` | Missing `.env` entry | Add the key for your chosen provider |
+| `Agent Editor fails` | Missing `GITHUB_AI_TOKEN` | Create a GitHub personal access token |
+| `TCP connection refused :12346` | Docker container not started | DockerEnv auto-starts; check Docker is running |
+
+---
+
+## Summary
+
+| Concept | Key Point |
+|---------|-----------|
+| MetaChain vs AutoAgent | Same thing — MetaChain is the internal class name; AutoAgent is the product name since Feb 2025 |
+| `auto main` | Interactive session; activates all three modes based on your intent |
+| `auto deep-research` | Non-interactive single-shot research task |
+| `.env` | Required for all LLM providers; `GITHUB_AI_TOKEN` required only for Agent Editor |
+| Three modes | User Mode (research), Agent Editor (create agents), Workflow Editor (async pipelines) |
+| Docker | Required for code execution sandbox; auto-started by `DockerEnv` |
+| @mention syntax | Routes directly to a named agent, bypassing triage |
+| workspace/ | Shared file directory between host and Docker container |
+
+Continue to [Chapter 2: Core Architecture: MetaChain Engine](./02-core-architecture-metachain-engine.md) to understand how the run loop, context variables, and tool dispatch work under the hood.
diff --git a/tutorials/autoagent-tutorial/02-architecture-and-interaction-modes.md b/tutorials/autoagent-tutorial/02-architecture-and-interaction-modes.md
deleted file mode 100644
index 1f5c8944..00000000
--- a/tutorials/autoagent-tutorial/02-architecture-and-interaction-modes.md
+++ /dev/null
@@ -1,222 +0,0 @@
----
-layout: default
-title: "Chapter 2: Architecture and Interaction Modes"
-nav_order: 2
-parent: AutoAgent Tutorial
----
-
-
-# Chapter 2: Architecture and Interaction Modes
-
-Welcome to **Chapter 2: Architecture and Interaction Modes**. In this part of **AutoAgent Tutorial: Zero-Code Agent Creation and Automated Workflow Orchestration**, you will build an intuitive mental model first, then move into concrete implementation details and practical production tradeoffs.
-
-
-This chapter explains AutoAgent mode structure and responsibilities.
-
-## Learning Goals
-
-- distinguish user mode vs agent editor vs workflow editor
-- choose mode based on task and control requirements
-- reason about orchestration boundaries
-- reduce mode-selection confusion in teams
-
-## Mode Overview
-
-- user mode for deep research task execution
-- agent editor for natural-language agent creation
-- workflow editor for multi-agent flow construction
-
-## Source References
-
-- [AutoAgent README: How to Use](https://github.com/HKUDS/AutoAgent/blob/main/README.md)
-- [How to Create Agent Docs](https://autoagent-ai.github.io/docs/user-guide-how-to-create-agent)
-
-## Summary
-
-You now can choose the right mode for different AutoAgent task classes.
-
-Next: [Chapter 3: Installation, Environment, and API Setup](03-installation-environment-and-api-setup.md)
-
-## Depth Expansion Playbook
-
-## Source Code Walkthrough
-
-### `autoagent/cli.py`
-
-The `async_workflow` function in [`autoagent/cli.py`](https://github.com/HKUDS/AutoAgent/blob/HEAD/autoagent/cli.py) handles a key part of this chapter's functionality:
-
-```py
-def workflow(workflow_name: str, system_input: str):
- """命令行函数的同步包装器"""
- return asyncio.run(async_workflow(workflow_name, system_input))
-
-async def async_workflow(workflow_name: str, system_input: str):
- """异步实现的workflow函数"""
- workflow_module = importlib.import_module(f'autoagent.workflows')
- try:
- workflow_func = getattr(workflow_module, workflow_name)
- except AttributeError:
- raise ValueError(f'Workflow function {workflow_name} not found...')
-
- result = await workflow_func(system_input) # 使用 await 等待异步函数完成
- debug_print(True, result, title=f'Result of running {workflow_name} workflow', color='pink3')
- return result
-
-def clear_screen():
- console = Console()
- console.print("[bold green]Coming soon...[/bold green]")
- print('\033[u\033[J\033[?25h', end='') # Restore cursor and clear everything after it, show cursor
-def get_config(container_name, port, test_pull_name="main", git_clone=False):
- container_name = container_name
-
- port_info = check_container_ports(container_name)
- if port_info:
- port = port_info[0]
- else:
- # while not check_port_available(port):
- # port += 1
- # 使用文件锁来确保端口分配的原子性
- import filelock
- lock_file = os.path.join(os.getcwd(), ".port_lock")
-```
-
-This function is important because it defines how AutoAgent Tutorial: Zero-Code Agent Creation and Automated Workflow Orchestration implements the patterns covered in this chapter.
-
-### `autoagent/cli.py`
-
-The `clear_screen` function in [`autoagent/cli.py`](https://github.com/HKUDS/AutoAgent/blob/HEAD/autoagent/cli.py) handles a key part of this chapter's functionality:
-
-```py
- return result
-
-def clear_screen():
- console = Console()
- console.print("[bold green]Coming soon...[/bold green]")
- print('\033[u\033[J\033[?25h', end='') # Restore cursor and clear everything after it, show cursor
-def get_config(container_name, port, test_pull_name="main", git_clone=False):
- container_name = container_name
-
- port_info = check_container_ports(container_name)
- if port_info:
- port = port_info[0]
- else:
- # while not check_port_available(port):
- # port += 1
- # 使用文件锁来确保端口分配的原子性
- import filelock
- lock_file = os.path.join(os.getcwd(), ".port_lock")
- lock = filelock.FileLock(lock_file)
-
- with lock:
- port = port
- while not check_port_available(port):
- port += 1
- print(f'{port} is not available, trying {port+1}')
- # 立即标记该端口为已使用
- with open(os.path.join(os.getcwd(), f".port_{port}"), 'w') as f:
- f.write(container_name)
- local_root = os.path.join(os.getcwd(), f"workspace_meta_showcase", f"showcase_{container_name}")
- os.makedirs(local_root, exist_ok=True)
- docker_config = DockerConfig(
- workplace_name=DOCKER_WORKPLACE_NAME,
-```
-
-This function is important because it defines how AutoAgent Tutorial: Zero-Code Agent Creation and Automated Workflow Orchestration implements the patterns covered in this chapter.
-
-### `autoagent/cli.py`
-
-The `get_config` function in [`autoagent/cli.py`](https://github.com/HKUDS/AutoAgent/blob/HEAD/autoagent/cli.py) handles a key part of this chapter's functionality:
-
-```py
- console.print("[bold green]Coming soon...[/bold green]")
- print('\033[u\033[J\033[?25h', end='') # Restore cursor and clear everything after it, show cursor
-def get_config(container_name, port, test_pull_name="main", git_clone=False):
- container_name = container_name
-
- port_info = check_container_ports(container_name)
- if port_info:
- port = port_info[0]
- else:
- # while not check_port_available(port):
- # port += 1
- # 使用文件锁来确保端口分配的原子性
- import filelock
- lock_file = os.path.join(os.getcwd(), ".port_lock")
- lock = filelock.FileLock(lock_file)
-
- with lock:
- port = port
- while not check_port_available(port):
- port += 1
- print(f'{port} is not available, trying {port+1}')
- # 立即标记该端口为已使用
- with open(os.path.join(os.getcwd(), f".port_{port}"), 'w') as f:
- f.write(container_name)
- local_root = os.path.join(os.getcwd(), f"workspace_meta_showcase", f"showcase_{container_name}")
- os.makedirs(local_root, exist_ok=True)
- docker_config = DockerConfig(
- workplace_name=DOCKER_WORKPLACE_NAME,
- container_name=container_name,
- communication_port=port,
- conda_path='/root/miniconda3',
- local_root=local_root,
-```
-
-This function is important because it defines how AutoAgent Tutorial: Zero-Code Agent Creation and Automated Workflow Orchestration implements the patterns covered in this chapter.
-
-### `autoagent/cli.py`
-
-The `create_environment` function in [`autoagent/cli.py`](https://github.com/HKUDS/AutoAgent/blob/HEAD/autoagent/cli.py) handles a key part of this chapter's functionality:
-
-```py
- )
- return docker_config
-def create_environment(docker_config: DockerConfig):
- """
- 1. create the code environment
- 2. create the web environment
- 3. create the file environment
- """
- code_env = DockerEnv(docker_config)
- code_env.init_container()
-
- web_env = BrowserEnv(browsergym_eval_env = None, local_root=docker_config.local_root, workplace_name=docker_config.workplace_name)
- file_env = RequestsMarkdownBrowser(viewport_size=1024 * 5, local_root=docker_config.local_root, workplace_name=docker_config.workplace_name, downloads_folder=os.path.join(docker_config.local_root, docker_config.workplace_name, "downloads"))
-
- return code_env, web_env, file_env
-
-def create_environment_local(docker_config: DockerConfig):
- """
- 1. create the code environment
- 2. create the web environment
- 3. create the file environment
- """
- code_env = LocalEnv(docker_config)
-
- web_env = BrowserEnv(browsergym_eval_env = None, local_root=docker_config.local_root, workplace_name=docker_config.workplace_name)
- file_env = RequestsMarkdownBrowser(viewport_size=1024 * 5, local_root=docker_config.local_root, workplace_name=docker_config.workplace_name, downloads_folder=os.path.join(docker_config.local_root, docker_config.workplace_name, "downloads"))
-
- return code_env, web_env, file_env
-
-def update_guidance(context_variables):
- console = Console()
-
-```
-
-This function is important because it defines how AutoAgent Tutorial: Zero-Code Agent Creation and Automated Workflow Orchestration implements the patterns covered in this chapter.
-
-
-## How These Components Connect
-
-```mermaid
-flowchart TD
- A[async_workflow]
- B[clear_screen]
- C[get_config]
- D[create_environment]
- E[create_environment_local]
- A --> B
- B --> C
- C --> D
- D --> E
-```
diff --git a/tutorials/autoagent-tutorial/02-core-architecture-metachain-engine.md b/tutorials/autoagent-tutorial/02-core-architecture-metachain-engine.md
new file mode 100644
index 00000000..bbe54291
--- /dev/null
+++ b/tutorials/autoagent-tutorial/02-core-architecture-metachain-engine.md
@@ -0,0 +1,486 @@
+---
+layout: default
+title: "Chapter 2: Core Architecture: MetaChain Engine"
+nav_order: 2
+parent: AutoAgent Tutorial
+format_version: v2
+why: "Every AutoAgent interaction — whether deep research, agent creation, or workflow execution — passes through the MetaChain run loop. Understanding context_variables, tool dispatch, and the XML fallback for non-FC models lets you debug failures, extend the framework correctly, and avoid subtle context pollution bugs."
+mental_model: "MetaChain.run() is a while loop that calls an LLM, dispatches tool calls with injected context, and follows agent handoff signals until the task is resolved or the max-turns limit is hit."
+learning_outcomes:
+ - Trace any AutoAgent execution through the MetaChain run loop
+ - Understand why context_variables are stripped from tool schemas before LLM calls
+ - Configure the XML fallback for non-function-calling models like DeepSeek-R1
+ - Read LoggerManager output to debug tool dispatch and agent handoffs
+snapshot:
+ source_repo: https://github.com/HKUDS/AutoAgent
+ stars: 9116
+ language: Python
+ license: MIT
+chapter_map:
+ - autoagent/core.py
+ - autoagent/types.py
+ - autoagent/fn_call_converter.py
+ - autoagent/util.py
+sources:
+ - https://github.com/HKUDS/AutoAgent
+ - https://arxiv.org/abs/2502.05957
+---
+
+# Chapter 2: Core Architecture: MetaChain Engine
+
+## What Problem Does This Solve?
+
+Multi-agent frameworks face three core engineering problems:
+
+1. **Context pollution** — passing execution environments (Docker connections, browser handles, file paths) to the LLM wastes tokens and confuses tool selection
+2. **Model portability** — many capable models (DeepSeek-R1, LLaMA, Grok) don't support native function calling, requiring a fallback path
+3. **Retry safety** — LLM APIs are flaky; naively calling them in a loop causes cascading failures
+
+AutoAgent solves all three in `core.py` through the `MetaChain` class: the `context_variables` pattern strips environment handles from schemas, `fn_call_converter.py` provides XML-based tool call syntax for non-FC models, and `tenacity` handles retries with exponential backoff.
+
+---
+
+## Core Data Types (`types.py`)
+
+Everything in AutoAgent is typed with Pydantic v2. The three core types are:
+
+```python
+# autoagent/types.py
+
+from pydantic import BaseModel
+from typing import Optional, Callable
+
+class Agent(BaseModel):
+ """Defines a single agent in the system."""
+ name: str = "Agent"
+ model: str = "gpt-4o"
+ instructions: str | Callable[..., str] = "You are a helpful agent."
+ functions: list[Callable] = [] # Tools this agent can call
+ tool_choice: str | None = None # Force a specific tool
+ parallel_tool_calls: bool = True # Allow parallel tool dispatch
+ context_variables_description: str = ""
+
+class Response(BaseModel):
+ """Returned by MetaChain.run() when a task completes."""
+ messages: list[dict] = [] # Full conversation history
+ agent: Agent | None = None # Final active agent
+ context_variables: dict = {} # Final context state
+
+class Result(BaseModel):
+ """Returned by tool functions to signal handoff or update context."""
+ value: str = "" # Message to add to conversation
+ agent: Agent | None = None # If set: hand off to this agent
+ context_variables: dict = {} # Context updates to merge
+```
+
+The `Result` type is the key handoff mechanism. When a tool function returns `Result(agent=next_agent)`, the MetaChain engine switches the active agent and continues the loop. This is how `SystemTriageAgent` routes to `WebSurferAgent`:
+
+```python
+# In system_triage_agent.py
+def transfer_to_websurfer(context_variables: dict) -> Result:
+ """Transfer control to WebSurferAgent for web browsing tasks."""
+ return Result(
+ value="Transferring to WebSurferAgent",
+ agent=websurfer_agent # MetaChain will use this agent next turn
+ )
+```
+
+---
+
+## The MetaChain Run Loop (`core.py`)
+
+```mermaid
+flowchart TD
+ A["MetaChain.run(agent, messages, context_variables)"] --> B["Prepare tool schemas\nstrip context_variables params"]
+ B --> C["get_chat_completion()\nLiteLLM API call with tenacity retry"]
+ C --> D{Tool calls\nin response?}
+ D -- No --> E["Append assistant message\nCheck max turns"]
+ D -- Yes --> F["handle_tool_calls()\ndispatch Python functions"]
+ F --> G{Result.agent\nset?}
+ G -- Yes --> H["Switch active agent\nReset tool schemas"]
+ G -- No --> I["Merge context_variables\nAppend tool results"]
+ H --> B
+ I --> B
+ E --> J{case_resolved\nor max_turns?}
+ J -- No --> B
+ J -- Yes --> K["Return Response(messages, agent, context_variables)"]
+```
+
+The actual loop in `core.py`:
+
+```python
+# autoagent/core.py (simplified)
+
+class MetaChain:
+ def run(
+ self,
+ agent: Agent,
+ messages: list[dict],
+ context_variables: dict = {},
+ max_turns: int = 30,
+ execute_tools: bool = True,
+ ) -> Response:
+ active_agent = agent
+ history = copy.deepcopy(messages)
+ init_len = len(messages)
+
+ while len(history) - init_len < max_turns:
+ # Build tool schemas, stripping context_variables
+ tools = [
+ function_to_json(f)
+ for f in active_agent.functions
+ ]
+
+ # Call LLM with retry
+ response = self.get_chat_completion(
+ agent=active_agent,
+ history=history,
+ context_variables=context_variables,
+ tools=tools,
+ )
+
+ message = response.choices[0].message
+ history.append(json.loads(message.model_dump_json()))
+
+ if not message.tool_calls or not execute_tools:
+ # No tools called — check if we're done
+ if "case_resolved" in message.content or "":
+ break
+ continue
+
+ # Dispatch tool calls
+ tool_results = self.handle_tool_calls(
+ message.tool_calls,
+ active_agent.functions,
+ context_variables,
+ )
+
+ history.extend(tool_results.messages)
+ context_variables.update(tool_results.context_variables)
+
+ # Check for agent handoff
+ if tool_results.agent:
+ active_agent = tool_results.agent
+
+ return Response(
+ messages=history[init_len:],
+ agent=active_agent,
+ context_variables=context_variables,
+ )
+```
+
+---
+
+## The context_variables Pattern
+
+This is the most important architectural pattern in AutoAgent. The `context_variables` dict carries runtime state (Docker connection, browser handle, file paths) to ALL tool functions — without ever appearing in the tool schemas sent to the LLM.
+
+```mermaid
+flowchart LR
+ subgraph "LLM sees"
+ TS["Tool schema:\nrun_code(code: str, timeout: int)"]
+ end
+
+ subgraph "Tool function receives"
+ TF["run_code(\n code: str,\n timeout: int,\n context_variables: dict ← injected\n)"]
+ end
+
+ subgraph "context_variables"
+ CV["{\n 'code_env': DockerEnv,\n 'web_env': BrowserEnv,\n 'file_env': MarkdownBrowser\n}"]
+ end
+
+ CV -->|injected by handle_tool_calls| TF
+ TS -->|stripped before API call| LLM[(LLM API)]
+```
+
+The stripping happens in `function_to_json()` in `util.py`:
+
+```python
+# autoagent/util.py
+
+def function_to_json(func: Callable) -> dict:
+ """Convert a Python function to a JSON tool schema for the LLM.
+
+ Critically: context_variables parameters are excluded from the schema
+ so they never appear in the LLM's tool descriptions.
+ """
+ sig = inspect.signature(func)
+ parameters = {}
+ required = []
+
+ for name, param in sig.parameters.items():
+ if name == "context_variables":
+ continue # ← THE CRITICAL LINE: strip from schema
+
+ param_type = get_type_hint(func, name)
+ parameters[name] = {"type": param_type}
+
+ if param.default is inspect.Parameter.empty:
+ required.append(name)
+
+ return {
+ "type": "function",
+ "function": {
+ "name": func.__name__,
+ "description": func.__doc__ or "",
+ "parameters": {
+ "type": "object",
+ "properties": parameters,
+ "required": required,
+ },
+ },
+ }
+```
+
+And injection happens in `handle_tool_calls()`:
+
+```python
+# autoagent/core.py (simplified)
+
+def handle_tool_calls(
+ self,
+ tool_calls: list,
+ functions: list[Callable],
+ context_variables: dict,
+) -> Response:
+ func_map = {f.__name__: f for f in functions}
+ results = []
+
+ for tool_call in tool_calls:
+ name = tool_call.function.name
+ args = json.loads(tool_call.function.arguments)
+ func = func_map[name]
+
+ # Inject context_variables if the function accepts it
+ if "context_variables" in inspect.signature(func).parameters:
+ args["context_variables"] = context_variables # ← injection
+
+ raw_result = func(**args)
+
+ # Handle Result objects for agent handoffs
+ if isinstance(raw_result, Result):
+ result_value = raw_result.value
+ if raw_result.agent:
+ # Signal agent handoff
+ ...
+ if raw_result.context_variables:
+ context_variables.update(raw_result.context_variables)
+ else:
+ result_value = str(raw_result)
+
+ results.append({
+ "role": "tool",
+ "tool_call_id": tool_call.id,
+ "content": result_value,
+ })
+
+ return Response(messages=results, context_variables=context_variables)
+```
+
+This pattern means that **tool functions can access DockerEnv, BrowserEnv, and other stateful objects without the LLM needing to know they exist**. The LLM sees clean, minimal tool schemas; tools get the full execution context.
+
+---
+
+## LiteLLM Integration and Retries
+
+`get_chat_completion()` wraps LiteLLM with tenacity retry logic:
+
+```python
+# autoagent/core.py
+
+from tenacity import retry, stop_after_attempt, wait_exponential
+import litellm
+
+@retry(
+ stop=stop_after_attempt(3),
+ wait=wait_exponential(multiplier=1, min=4, max=10),
+ reraise=True,
+)
+def get_chat_completion(
+ self,
+ agent: Agent,
+ history: list[dict],
+ context_variables: dict,
+ tools: list[dict],
+) -> litellm.ModelResponse:
+ instructions = (
+ agent.instructions(context_variables)
+ if callable(agent.instructions)
+ else agent.instructions
+ )
+
+ messages = [{"role": "system", "content": instructions}] + history
+
+ # Check if model needs XML fallback
+ model = agent.model
+ if self._needs_xml_fallback(model):
+ messages, tools = fn_call_converter.inject_xml_prompt(
+ messages, tools
+ )
+ tools = None # Don't pass native tools to non-FC models
+
+ return litellm.completion(
+ model=model,
+ messages=messages,
+ tools=tools,
+ tool_choice=agent.tool_choice,
+ parallel_tool_calls=agent.parallel_tool_calls,
+ )
+```
+
+---
+
+## Non-FC Model Support (`fn_call_converter.py`)
+
+Models like DeepSeek-R1, LLaMA, and Grok don't support the OpenAI function calling API. AutoAgent handles these through `fn_call_converter.py`, which:
+
+1. Injects XML tool call instructions into the system prompt
+2. Parses XML from the model's text response
+3. Converts the parsed result back to the standard tool call format
+
+```python
+# autoagent/fn_call_converter.py (simplified)
+
+NOT_SUPPORT_FN_CALL = [
+ "deepseek/deepseek-r1",
+ "deepseek-r1",
+ "meta-llama/llama-3",
+ "grok",
+ # ... etc
+]
+
+XML_TOOL_PROMPT = """
+You have access to the following tools. To call a tool, use this exact XML format:
+
+
+{value}
+
+
+Available tools:
+{tools_description}
+"""
+
+def inject_xml_prompt(
+ messages: list[dict],
+ tools: list[dict]
+) -> tuple[list[dict], None]:
+ """Inject XML tool call instructions and return modified messages."""
+ tools_desc = format_tools_as_xml_description(tools)
+ xml_system = XML_TOOL_PROMPT.format(tools_description=tools_desc)
+
+ # Prepend to system message
+ if messages[0]["role"] == "system":
+ messages[0]["content"] = xml_system + "\n\n" + messages[0]["content"]
+ else:
+ messages.insert(0, {"role": "system", "content": xml_system})
+
+ return messages, None # tools=None: don't send to non-FC API
+
+def parse_xml_tool_calls(content: str) -> list[dict]:
+ """Parse XML tool calls from model response text."""
+ import re
+ tool_calls = []
+
+ pattern = r'(.*?) '
+ for match in re.finditer(pattern, content, re.DOTALL):
+ tool_name = match.group(1)
+ params_text = match.group(2)
+
+ # Parse parameters
+ params = {}
+ param_pattern = r'(.*?) '
+ for param_match in re.finditer(param_pattern, params_text, re.DOTALL):
+ params[param_match.group(1)] = param_match.group(2).strip()
+
+ tool_calls.append({
+ "id": f"xml_{len(tool_calls)}",
+ "type": "function",
+ "function": {
+ "name": tool_name,
+ "arguments": json.dumps(params),
+ }
+ })
+
+ return tool_calls
+```
+
+The flow for a DeepSeek-R1 request:
+
+```mermaid
+sequenceDiagram
+ participant MC as MetaChain
+ participant FC as fn_call_converter
+ participant LLM as DeepSeek-R1
+
+ MC->>FC: inject_xml_prompt(messages, tools)
+ FC-->>MC: modified_messages (XML instructions in system), tools=None
+ MC->>LLM: litellm.completion(model="deepseek-r1", messages, tools=None)
+ LLM-->>MC: response with XML in content:
...
+ MC->>FC: parse_xml_tool_calls(content)
+ FC-->>MC: [{"id": "xml_0", "function": {"name": "run_code", "arguments": ...}}]
+ MC->>MC: handle_tool_calls() as normal
+```
+
+This makes AutoAgent model-agnostic: you get identical behavior whether you use GPT-4o with native function calling or DeepSeek-R1 with XML fallback.
+
+---
+
+## LoggerManager
+
+AutoAgent uses a custom `LoggerManager` in `util.py` for structured logging of the run loop. Key log events:
+
+```python
+# autoagent/util.py
+
+class LoggerManager:
+ def log_tool_call(self, tool_name: str, args: dict) -> None:
+ """Log when a tool is dispatched."""
+
+ def log_agent_handoff(self, from_agent: str, to_agent: str) -> None:
+ """Log when control transfers between agents."""
+
+ def log_llm_call(self, model: str, tokens: int) -> None:
+ """Log LLM API call with token count."""
+
+ def log_retry(self, attempt: int, error: str) -> None:
+ """Log retry attempt with error message."""
+```
+
+The logger outputs to the console using Rich for colored, structured output. To increase verbosity:
+
+```bash
+AUTOAGENT_LOG_LEVEL=DEBUG auto main
+```
+
+---
+
+## Turn Limit and Termination Conditions
+
+The run loop terminates under three conditions:
+
+| Condition | Trigger | Source |
+|-----------|---------|--------|
+| `case_resolved` | Agent calls the `case_resolved` tool or includes the string in its message | All system agents |
+| `case_not_resolved` | Agent calls `case_not_resolved` after exhausting options | All system agents |
+| `max_turns` exceeded | Loop counter reaches `max_turns` (default 30) | `MetaChain.run()` parameter |
+
+The `case_resolved` and `case_not_resolved` tools are injected into every system agent's function list at startup. They return `Result` objects that signal the loop to terminate.
+
+---
+
+## Summary
+
+| Component | File | Purpose |
+|-----------|------|---------|
+| `MetaChain` class | `core.py` | Main run loop: LLM call → tool dispatch → handoff |
+| `Agent` | `types.py` | Agent definition: name, model, instructions, functions |
+| `Response` | `types.py` | Run loop output: messages, final agent, context state |
+| `Result` | `types.py` | Tool return value: handoff signal + context updates |
+| `function_to_json()` | `util.py` | Converts Python functions to LLM tool schemas (strips context_variables) |
+| `handle_tool_calls()` | `core.py` | Dispatches tools, injects context_variables, processes Result |
+| `get_chat_completion()` | `core.py` | LiteLLM call with tenacity retry |
+| `fn_call_converter.py` | `fn_call_converter.py` | XML fallback for non-FC models |
+| `NOT_SUPPORT_FN_CALL` | `fn_call_converter.py` | List of models requiring XML fallback |
+| `LoggerManager` | `util.py` | Structured logging for debugging |
+
+Continue to [Chapter 3: The Environment Triad](./03-environment-triad.md) to learn how DockerEnv, BrowserEnv, and RequestsMarkdownBrowser are initialized and used.
diff --git a/tutorials/autoagent-tutorial/03-environment-triad.md b/tutorials/autoagent-tutorial/03-environment-triad.md
new file mode 100644
index 00000000..e93c34cf
--- /dev/null
+++ b/tutorials/autoagent-tutorial/03-environment-triad.md
@@ -0,0 +1,510 @@
+---
+layout: default
+title: "Chapter 3: The Environment Triad"
+nav_order: 3
+parent: AutoAgent Tutorial
+format_version: v2
+why: "All code execution, web browsing, and document reading in AutoAgent runs through three environment abstractions. Knowing how they initialize, communicate, and handle failures is essential for diagnosing tool errors and safely extending AutoAgent with custom tools."
+mental_model: "The three environments — DockerEnv, BrowserEnv, and RequestsMarkdownBrowser — are stateful singletons injected into tools via context_variables. DockerEnv is a TCP server inside a container; BrowserEnv is a Playwright instance; MarkdownBrowser converts any file format to paginated text."
+learning_outcomes:
+ - Understand how DockerEnv starts a TCP server in a Docker container and executes code via socket
+ - Configure BrowserEnv for multimodal screenshot-based web navigation
+ - Use RequestsMarkdownBrowser for paginated document reading with format conversion
+ - Apply the with_env() decorator to bind environments to tool functions
+snapshot:
+ source_repo: https://github.com/HKUDS/AutoAgent
+ stars: 9116
+ language: Python
+ license: MIT
+chapter_map:
+ - autoagent/docker_env.py
+ - autoagent/tcp_server.py
+ - autoagent/browser_env.py
+ - autoagent/local_env.py
+ - autoagent/markdown_browser/
+sources:
+ - https://github.com/HKUDS/AutoAgent
+ - https://arxiv.org/abs/2502.05957
+---
+
+# Chapter 3: The Environment Triad
+
+## What Problem Does This Solve?
+
+Agents that can only call APIs are limited. Real-world tasks require:
+
+- **Executing arbitrary Python code** securely, without risking the host system
+- **Browsing the web** with a real browser that renders JavaScript and captures screenshots
+- **Reading documents** in any format (PDF, DOCX, PPTX, images) as clean text
+
+AutoAgent provides three purpose-built environments for these three capabilities. They are initialized once at startup, passed through `context_variables` to every tool that needs them, and managed as stateful singletons for the lifetime of the session.
+
+```mermaid
+flowchart LR
+ CV["context_variables\n{\n code_env: DockerEnv,\n web_env: BrowserEnv,\n file_env: MarkdownBrowser\n}"]
+
+ CV --> DE["DockerEnv\nCode execution\nTCP :12346"]
+ CV --> BE["BrowserEnv\nWeb browsing\nPlaywright + BrowserGym"]
+ CV --> MB["RequestsMarkdownBrowser\nFile reading\nPDF/DOCX/PPT/images"]
+```
+
+---
+
+## Environment 1: DockerEnv
+
+### Architecture
+
+`DockerEnv` manages a Docker container that runs a persistent TCP server. LLM-generated code is sent to this server as a string, executed inside the container, and the result is returned over the socket. This provides:
+
+- **Isolation**: malicious or buggy code cannot affect the host
+- **Persistence**: the container stays running between tool calls, so state (variables, installed packages) accumulates within a session
+- **Reproducibility**: the Docker image (`tjbtech1/metachain`) pins all dependencies
+
+```mermaid
+sequenceDiagram
+ participant Tool as run_code() tool
+ participant DE as DockerEnv
+ participant C as Docker Container
+ participant TS as tcp_server.py :12346
+
+ Tool->>DE: execute_code(code_string)
+ DE->>C: socket.connect(localhost:12346)
+ DE->>TS: send code over TCP socket
+ TS->>TS: exec(code_string, globals_dict)
+ TS-->>DE: return stdout + stderr + result
+ DE-->>Tool: (stdout, stderr, return_value)
+```
+
+### DockerConfig
+
+```python
+# autoagent/docker_env.py
+
+from pydantic import BaseModel
+import docker
+import socket
+
+class DockerConfig(BaseModel):
+ image: str = "tjbtech1/metachain"
+ container_name: str = "autoagent_sandbox"
+ tcp_port: int = 12346
+ workspace_mount: str = "./workspace"
+ platform: str = "linux/amd64" # See ARM note below
+ timeout: int = 30 # seconds per code execution
+
+class DockerEnv:
+ def __init__(self, config: DockerConfig | None = None):
+ self.config = config or DockerConfig()
+ self.client = docker.from_env()
+ self.container = None
+ self._socket = None
+
+ def init_container(self) -> None:
+ """Pull image if needed, start container, copy tcp_server.py, open socket."""
+ # Pull image
+ self.client.images.pull(self.config.image)
+
+ # Start container with workspace mount
+ self.container = self.client.containers.run(
+ self.config.image,
+ name=self.config.container_name,
+ detach=True,
+ platform=self.config.platform,
+ ports={f"{self.config.tcp_port}/tcp": self.config.tcp_port},
+ volumes={
+ self.config.workspace_mount: {
+ "bind": "/workspace",
+ "mode": "rw"
+ }
+ },
+ remove=True, # Auto-remove when stopped
+ )
+
+ # Copy tcp_server.py into container
+ self._copy_tcp_server()
+
+ # Start the TCP server inside the container
+ self.container.exec_run(
+ f"python /tcp_server.py {self.config.tcp_port}",
+ detach=True,
+ )
+
+ # Connect socket
+ self._socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ self._socket.connect(("localhost", self.config.tcp_port))
+
+ def execute_code(self, code: str) -> tuple[str, str, str]:
+ """Execute Python code in the container, return (stdout, stderr, result)."""
+ payload = json.dumps({"code": code}).encode() + b"\n"
+ self._socket.sendall(payload)
+ response = self._recv_response()
+ return response["stdout"], response["stderr"], response["result"]
+```
+
+### TCP Server (`tcp_server.py`)
+
+The TCP server runs inside the Docker container and executes code in a persistent namespace:
+
+```python
+# autoagent/tcp_server.py (runs inside Docker)
+
+import socket
+import json
+import sys
+from io import StringIO
+
+# Persistent globals across all code executions in this session
+GLOBALS = {}
+
+def handle_client(conn):
+ """Handle a single code execution request."""
+ data = b""
+ while True:
+ chunk = conn.recv(4096)
+ if not chunk:
+ break
+ data += chunk
+ if data.endswith(b"\n"):
+ break
+
+ request = json.loads(data.decode())
+ code = request["code"]
+
+ # Capture stdout/stderr
+ old_stdout, old_stderr = sys.stdout, sys.stderr
+ sys.stdout = stdout_buf = StringIO()
+ sys.stderr = stderr_buf = StringIO()
+
+ result = None
+ try:
+ # exec with persistent globals — state accumulates across calls
+ exec(code, GLOBALS)
+ result = str(GLOBALS.get("_result", ""))
+ except Exception as e:
+ result = f"Error: {type(e).__name__}: {e}"
+ finally:
+ sys.stdout = old_stdout
+ sys.stderr = old_stderr
+
+ response = {
+ "stdout": stdout_buf.getvalue(),
+ "stderr": stderr_buf.getvalue(),
+ "result": result,
+ }
+ conn.sendall(json.dumps(response).encode() + b"\n")
+```
+
+### ARM vs AMD64 Note
+
+The `tjbtech1/metachain` image is built for `linux/amd64`. On Apple Silicon (M1/M2/M3) Macs, Docker uses Rosetta 2 emulation automatically, but you may see a warning:
+
+```
+WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8)
+```
+
+This is expected and does not affect functionality. If you need native ARM performance, you can build the image locally:
+
+```bash
+docker build --platform linux/arm64 -t autoagent-local .
+# Then update DockerConfig:
+config = DockerConfig(image="autoagent-local", platform="linux/arm64")
+```
+
+---
+
+## Environment 2: BrowserEnv
+
+### Architecture
+
+`BrowserEnv` wraps Playwright through BrowserGym to provide a full browser automation environment with multimodal observation (screenshot + accessibility tree + page content):
+
+```mermaid
+flowchart TD
+ BE["BrowserEnv.step(action)"]
+ --> PW["Playwright browser\n(Chromium headless)"]
+ PW --> BG["BrowserGym observation\ngeneration"]
+ BG --> WO["WebObservation\n{\n content: str,\n url: str,\n screenshot: bytes,\n ax_tree: str\n}"]
+```
+
+### WebObservation Structure
+
+```python
+# autoagent/browser_env.py
+
+from dataclasses import dataclass
+
+@dataclass
+class WebObservation:
+ """Complete observation from a browser step."""
+ content: str # Markdown-converted page content
+ url: str # Current page URL
+ screenshot: bytes # PNG screenshot for multimodal models
+ ax_tree: str # Accessibility tree (for non-visual navigation)
+ error: str = "" # Error message if action failed
+
+class BrowserEnv:
+ def __init__(self):
+ self.browser = None
+ self.page = None
+
+ def init(self) -> None:
+ """Start Playwright and open initial blank page."""
+ from playwright.sync_api import sync_playwright
+ self._playwright = sync_playwright().start()
+ self.browser = self._playwright.chromium.launch(headless=True)
+ self.page = self.browser.new_page()
+
+ def navigate(self, url: str) -> WebObservation:
+ """Navigate to URL and return full observation."""
+ try:
+ self.page.goto(url, wait_until="networkidle", timeout=30000)
+ return self._get_observation()
+ except Exception as e:
+ return WebObservation(content="", url=url, screenshot=b"", ax_tree="", error=str(e))
+
+ def click(self, selector: str) -> WebObservation:
+ """Click an element and return updated observation."""
+ self.page.click(selector)
+ self.page.wait_for_load_state("networkidle")
+ return self._get_observation()
+
+ def _get_observation(self) -> WebObservation:
+ """Capture current page state."""
+ screenshot = self.page.screenshot()
+ content = self._extract_markdown_content()
+ ax_tree = self.page.accessibility.snapshot()
+ return WebObservation(
+ content=content,
+ url=self.page.url,
+ screenshot=screenshot,
+ ax_tree=str(ax_tree),
+ )
+```
+
+### Screenshot Loop for Multimodal Models
+
+`WebSurferAgent` uses GPT-4V-style multimodal input to navigate by looking at screenshots:
+
+```python
+# In websurfer_agent.py tool function
+
+def browse_web(url: str, context_variables: dict) -> str:
+ """Navigate to URL and return page content with screenshot."""
+ web_env: BrowserEnv = context_variables["web_env"]
+ obs = web_env.navigate(url)
+
+ # For multimodal models, include the screenshot in the message
+ return json.dumps({
+ "content": obs.content[:4000], # Truncate for token budget
+ "url": obs.url,
+ "screenshot_available": len(obs.screenshot) > 0,
+ # Screenshot is added separately to message parts for vision models
+ })
+```
+
+---
+
+## Environment 3: RequestsMarkdownBrowser
+
+### Architecture
+
+`RequestsMarkdownBrowser` reads any file or URL and converts it to paginated Markdown text. It handles format conversion for common document types:
+
+```mermaid
+flowchart TD
+ Input["URL or file path"] --> D{Content type?}
+ D -->|HTML/web| H["requests.get()\n+ markdownify"]
+ D -->|PDF| P["pdfminer.six\n→ text → markdown"]
+ D -->|DOCX| W["python-docx\n→ text → markdown"]
+ D -->|PPTX| S["python-pptx\n→ text → markdown"]
+ D -->|Image| I["describe_image()\nvia vision model"]
+ D -->|Plain text| T["Direct read"]
+ H --> Page["Paginated output\n(viewport_size lines per page)"]
+ P --> Page
+ W --> Page
+ S --> Page
+ I --> Page
+ T --> Page
+```
+
+```python
+# autoagent/markdown_browser/ (simplified)
+
+class RequestsMarkdownBrowser:
+ def __init__(
+ self,
+ viewport_size: int = 1024, # Lines per page
+ downloads_folder: str = "./workspace/downloads",
+ ):
+ self.viewport_size = viewport_size
+ self.downloads_folder = downloads_folder
+ self._pages: list[str] = []
+ self._current_page = 0
+
+ def visit_page(self, url_or_path: str) -> str:
+ """Load a page and return the first viewport."""
+ content = self._fetch_and_convert(url_or_path)
+ # Split into viewport-sized pages
+ lines = content.split("\n")
+ self._pages = [
+ "\n".join(lines[i:i + self.viewport_size])
+ for i in range(0, len(lines), self.viewport_size)
+ ]
+ self._current_page = 0
+ return self._get_current_page()
+
+ def page_up(self) -> str:
+ """Scroll up one viewport."""
+ self._current_page = max(0, self._current_page - 1)
+ return self._get_current_page()
+
+ def page_down(self) -> str:
+ """Scroll down one viewport."""
+ self._current_page = min(len(self._pages) - 1, self._current_page + 1)
+ return self._get_current_page()
+
+ def _fetch_and_convert(self, url_or_path: str) -> str:
+ """Fetch content and convert to Markdown based on file type."""
+ if url_or_path.startswith("http"):
+ return self._fetch_url(url_or_path)
+
+ suffix = Path(url_or_path).suffix.lower()
+ if suffix == ".pdf":
+ return self._convert_pdf(url_or_path)
+ elif suffix == ".docx":
+ return self._convert_docx(url_or_path)
+ elif suffix == ".pptx":
+ return self._convert_pptx(url_or_path)
+ elif suffix in [".png", ".jpg", ".jpeg", ".gif", ".webp"]:
+ return self._describe_image(url_or_path)
+ else:
+ return Path(url_or_path).read_text()
+
+ def _get_current_page(self) -> str:
+ """Return current page with position indicator."""
+ page = self._pages[self._current_page]
+ total = len(self._pages)
+ current = self._current_page + 1
+ return f"[Page {current}/{total}]\n\n{page}"
+```
+
+---
+
+## LocalEnv Fallback
+
+For environments where Docker is not available, AutoAgent provides `LocalEnv` as a fallback:
+
+```python
+# autoagent/local_env.py
+
+class LocalEnv:
+ """Executes code directly on the host (no Docker isolation).
+
+ WARNING: This runs code without sandboxing. Use only in trusted
+ environments where Docker is not available.
+ """
+
+ def execute_code(self, code: str) -> tuple[str, str, str]:
+ """Execute Python code in a subprocess."""
+ result = subprocess.run(
+ [sys.executable, "-c", code],
+ capture_output=True,
+ text=True,
+ timeout=30,
+ )
+ return result.stdout, result.stderr, ""
+```
+
+`LocalEnv` is NOT recommended for production use. The Docker sandbox is always preferred because it prevents code escaping the agent execution context.
+
+---
+
+## The `with_env()` Decorator Pattern
+
+Tools that need environments are decorated with `with_env()` to bind the environment from `context_variables`:
+
+```python
+# Pattern from the codebase
+
+def with_env(env_key: str):
+ """Decorator that extracts an environment from context_variables."""
+ def decorator(func):
+ @wraps(func)
+ def wrapper(*args, context_variables: dict = {}, **kwargs):
+ env = context_variables.get(env_key)
+ if env is None:
+ raise RuntimeError(f"Environment '{env_key}' not found in context_variables")
+ return func(*args, env=env, **kwargs)
+ return wrapper
+ return decorator
+
+# Usage:
+@with_env("code_env")
+def run_python_code(code: str, timeout: int = 30, env: DockerEnv = None) -> str:
+ """Run Python code in the Docker sandbox."""
+ stdout, stderr, result = env.execute_code(code)
+ output = stdout
+ if stderr:
+ output += f"\nSTDERR: {stderr}"
+ return output
+```
+
+This pattern keeps tool function signatures clean while ensuring environment access is safe and declarative.
+
+---
+
+## Environment Initialization in Practice
+
+At session startup, `cli.py` initializes all three environments and stores them in the `context_variables` dict that gets passed to `MetaChain.run()`:
+
+```python
+# autoagent/cli.py (simplified)
+
+@click.command()
+def main():
+ # Initialize environments
+ docker_config = DockerConfig()
+ code_env = DockerEnv(docker_config)
+ code_env.init_container()
+
+ web_env = BrowserEnv()
+ web_env.init()
+
+ file_env = RequestsMarkdownBrowser()
+
+ # Pack into context_variables
+ context_variables = {
+ "code_env": code_env,
+ "web_env": web_env,
+ "file_env": file_env,
+ "workspace": docker_config.workspace_mount,
+ }
+
+ # Start MetaChain with the system triage agent
+ chain = MetaChain(model=os.getenv("AUTOAGENT_MODEL", "gpt-4o"))
+
+ while True:
+ user_input = input("AutoAgent> ")
+ response = chain.run(
+ agent=system_triage_agent,
+ messages=[{"role": "user", "content": user_input}],
+ context_variables=context_variables,
+ )
+ print(response.messages[-1]["content"])
+```
+
+---
+
+## Summary
+
+| Environment | File | Protocol | Use Case |
+|-------------|------|----------|----------|
+| `DockerEnv` | `docker_env.py` | TCP socket :12346 | Isolated Python code execution |
+| `BrowserEnv` | `browser_env.py` | Playwright API | Web browsing with screenshot + AXTree |
+| `RequestsMarkdownBrowser` | `markdown_browser/` | HTTP/file read | Document reading with format conversion |
+| `LocalEnv` | `local_env.py` | subprocess | Fallback when Docker unavailable (unsafe) |
+| `DockerConfig` | `docker_env.py` | Pydantic model | Docker container configuration |
+| `WebObservation` | `browser_env.py` | Dataclass | Browser state: content + URL + screenshot + AXTree |
+| TCP server | `tcp_server.py` | Runs in container | Persistent Python namespace for code execution |
+
+Continue to [Chapter 4: User Mode: Deep Research System](./04-user-mode-deep-research.md) to see how SystemTriageAgent orchestrates these environments through specialized sub-agents.
diff --git a/tutorials/autoagent-tutorial/03-installation-environment-and-api-setup.md b/tutorials/autoagent-tutorial/03-installation-environment-and-api-setup.md
deleted file mode 100644
index 38a6c9cd..00000000
--- a/tutorials/autoagent-tutorial/03-installation-environment-and-api-setup.md
+++ /dev/null
@@ -1,222 +0,0 @@
----
-layout: default
-title: "Chapter 3: Installation, Environment, and API Setup"
-nav_order: 3
-parent: AutoAgent Tutorial
----
-
-
-# Chapter 3: Installation, Environment, and API Setup
-
-Welcome to **Chapter 3: Installation, Environment, and API Setup**. In this part of **AutoAgent Tutorial: Zero-Code Agent Creation and Automated Workflow Orchestration**, you will build an intuitive mental model first, then move into concrete implementation details and practical production tradeoffs.
-
-
-This chapter covers environment setup and provider credential strategy.
-
-## Learning Goals
-
-- configure required and optional provider keys correctly
-- prepare container/runtime assumptions safely
-- align `.env` configuration with team operations
-- avoid provider mismatch and startup failures
-
-## Setup Checklist
-
-- install Docker/runtime prerequisites
-- configure only required keys for chosen providers
-- validate model/provider mapping before full runs
-
-## Source References
-
-- [AutoAgent README: API Keys Setup](https://github.com/HKUDS/AutoAgent/blob/main/README.md)
-- [Installation Docs](https://autoagent-ai.github.io/docs/get-started-installation)
-
-## Summary
-
-You now have a stable environment and provider setup baseline.
-
-Next: [Chapter 4: Agent and Workflow Creation Patterns](04-agent-and-workflow-creation-patterns.md)
-
-## Depth Expansion Playbook
-
-## Source Code Walkthrough
-
-### `autoagent/core.py`
-
-The `adapt_tools_for_gemini` function in [`autoagent/core.py`](https://github.com/HKUDS/AutoAgent/blob/HEAD/autoagent/core.py) handles a key part of this chapter's functionality:
-
-```py
-logger = LoggerManager.get_logger()
-
-def adapt_tools_for_gemini(tools):
- """为 Gemini 模型适配工具定义,确保所有 OBJECT 类型参数都有非空的 properties"""
- if tools is None:
- return None
-
- adapted_tools = []
- for tool in tools:
- adapted_tool = copy.deepcopy(tool)
-
- # 检查参数
- if "parameters" in adapted_tool["function"]:
- params = adapted_tool["function"]["parameters"]
-
- # 处理顶层参数
- if params.get("type") == "object":
- if "properties" not in params or not params["properties"]:
- params["properties"] = {
- "dummy": {
- "type": "string",
- "description": "Dummy property for Gemini compatibility"
- }
- }
-
- # 处理嵌套参数
- if "properties" in params:
- for prop_name, prop in params["properties"].items():
- if isinstance(prop, dict) and prop.get("type") == "object":
- if "properties" not in prop or not prop["properties"]:
- prop["properties"] = {
- "dummy": {
-```
-
-This function is important because it defines how AutoAgent Tutorial: Zero-Code Agent Creation and Automated Workflow Orchestration implements the patterns covered in this chapter.
-
-### `autoagent/fn_call_converter.py`
-
-The `FunctionCallConversionError` class in [`autoagent/fn_call_converter.py`](https://github.com/HKUDS/AutoAgent/blob/HEAD/autoagent/fn_call_converter.py) handles a key part of this chapter's functionality:
-
-```py
-from litellm import ChatCompletionToolParam
-
-class FunctionCallConversionError(Exception):
- """Exception raised when FunctionCallingConverter failed to convert a non-function call message to a function call message.
-
- This typically happens when there's a malformed message (e.g., missing tags). But not due to LLM output.
- """
-
- def __init__(self, message):
- super().__init__(message)
-
-class FunctionCallValidationError(Exception):
- """Exception raised when FunctionCallingConverter failed to validate a function call message.
-
- This typically happens when the LLM outputs unrecognized function call / parameter names / values.
- """
-
- def __init__(self, message):
- super().__init__(message)
-
-# Inspired by: https://docs.together.ai/docs/llama-3-function-calling#function-calling-w-llama-31-70b
-SYSTEM_PROMPT_SUFFIX_TEMPLATE = """
-You have access to the following functions:
-
-{description}
-
-If you choose to call a function ONLY reply in the following format with NO suffix:
-
-
-value_1
-
-This is the value for the second parameter
-```
-
-This class is important because it defines how AutoAgent Tutorial: Zero-Code Agent Creation and Automated Workflow Orchestration implements the patterns covered in this chapter.
-
-### `autoagent/fn_call_converter.py`
-
-The `FunctionCallValidationError` class in [`autoagent/fn_call_converter.py`](https://github.com/HKUDS/AutoAgent/blob/HEAD/autoagent/fn_call_converter.py) handles a key part of this chapter's functionality:
-
-```py
- super().__init__(message)
-
-class FunctionCallValidationError(Exception):
- """Exception raised when FunctionCallingConverter failed to validate a function call message.
-
- This typically happens when the LLM outputs unrecognized function call / parameter names / values.
- """
-
- def __init__(self, message):
- super().__init__(message)
-
-# Inspired by: https://docs.together.ai/docs/llama-3-function-calling#function-calling-w-llama-31-70b
-SYSTEM_PROMPT_SUFFIX_TEMPLATE = """
-You have access to the following functions:
-
-{description}
-
-If you choose to call a function ONLY reply in the following format with NO suffix:
-
-
-value_1
-
-This is the value for the second parameter
-that can span
-multiple lines
-
-
-
-
-Reminder:
-- Function calls MUST follow the specified format, start with
-- Required parameters MUST be specified
-```
-
-This class is important because it defines how AutoAgent Tutorial: Zero-Code Agent Creation and Automated Workflow Orchestration implements the patterns covered in this chapter.
-
-### `autoagent/fn_call_converter.py`
-
-The `index` function in [`autoagent/fn_call_converter.py`](https://github.com/HKUDS/AutoAgent/blob/HEAD/autoagent/fn_call_converter.py) handles a key part of this chapter's functionality:
-
-```py
-
-@app.route('/')
-def index():
- numbers = list(range(1, 11))
- return str(numbers)
-
-if __name__ == '__main__':
- app.run(port=5000)
-
-
-
-USER: EXECUTION RESULT of [str_replace_editor]:
-File created successfully at: /workspace/app.py
-
-ASSISTANT: I have created a Python file `app.py` that will display a list of numbers from 1 to 10 when you run it. Let me run the Python file for you:
-
-
-python3 app.py > server.log 2>&1 &
-
-
-
-USER: EXECUTION RESULT of [execute_bash]:
-[1] 121
-[1]+ Exit 1 python3 app.py > server.log 2>&1
-
-ASSISTANT: Looks like the server is running with PID 121 then crashed. Let me check the server log:
-
-
-cat server.log
-
-
-
-```
-
-This function is important because it defines how AutoAgent Tutorial: Zero-Code Agent Creation and Automated Workflow Orchestration implements the patterns covered in this chapter.
-
-
-## How These Components Connect
-
-```mermaid
-flowchart TD
- A[adapt_tools_for_gemini]
- B[FunctionCallConversionError]
- C[FunctionCallValidationError]
- D[index]
- E[convert_tool_call_to_string]
- A --> B
- B --> C
- C --> D
- D --> E
-```
diff --git a/tutorials/autoagent-tutorial/04-agent-and-workflow-creation-patterns.md b/tutorials/autoagent-tutorial/04-agent-and-workflow-creation-patterns.md
deleted file mode 100644
index 6b8b413e..00000000
--- a/tutorials/autoagent-tutorial/04-agent-and-workflow-creation-patterns.md
+++ /dev/null
@@ -1,222 +0,0 @@
----
-layout: default
-title: "Chapter 4: Agent and Workflow Creation Patterns"
-nav_order: 4
-parent: AutoAgent Tutorial
----
-
-
-# Chapter 4: Agent and Workflow Creation Patterns
-
-Welcome to **Chapter 4: Agent and Workflow Creation Patterns**. In this part of **AutoAgent Tutorial: Zero-Code Agent Creation and Automated Workflow Orchestration**, you will build an intuitive mental model first, then move into concrete implementation details and practical production tradeoffs.
-
-
-This chapter focuses on effective natural-language prompts for agent and workflow generation.
-
-## Learning Goals
-
-- write clearer creation prompts for better outputs
-- separate capability requirements from implementation details
-- iterate profile/tool/workflow outputs with intent clarity
-- avoid over-specified or under-specified requests
-
-## Creation Strategy
-
-- define goal, constraints, and success criteria first
-- iterate in small prompt revisions
-- validate generated agents on representative tasks
-
-## Source References
-
-- [User Guide: Create Agent](https://autoagent-ai.github.io/docs/user-guide-how-to-create-agent)
-- [Developer Guide: Build Project](https://github.com/HKUDS/AutoAgent/blob/main/docs/docs/Dev-Guideline/dev-guide-build-your-project.md)
-
-## Summary
-
-You now have prompt patterns for more reliable AutoAgent creation flows.
-
-Next: [Chapter 5: Tooling, Python API, and Custom Extensions](05-tooling-python-api-and-custom-extensions.md)
-
-## Depth Expansion Playbook
-
-## Source Code Walkthrough
-
-### `autoagent/fn_call_converter.py`
-
-The `values` interface in [`autoagent/fn_call_converter.py`](https://github.com/HKUDS/AutoAgent/blob/HEAD/autoagent/fn_call_converter.py) handles a key part of this chapter's functionality:
-
-```py
- """Exception raised when FunctionCallingConverter failed to validate a function call message.
-
- This typically happens when the LLM outputs unrecognized function call / parameter names / values.
- """
-
- def __init__(self, message):
- super().__init__(message)
-
-# Inspired by: https://docs.together.ai/docs/llama-3-function-calling#function-calling-w-llama-31-70b
-SYSTEM_PROMPT_SUFFIX_TEMPLATE = """
-You have access to the following functions:
-
-{description}
-
-If you choose to call a function ONLY reply in the following format with NO suffix:
-
-
-value_1
-
-This is the value for the second parameter
-that can span
-multiple lines
-
-
-
-
-Reminder:
-- Function calls MUST follow the specified format, start with
-- Required parameters MUST be specified
-- Only call one function at a time
-- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after.
-- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls
-```
-
-This interface is important because it defines how AutoAgent Tutorial: Zero-Code Agent Creation and Automated Workflow Orchestration implements the patterns covered in this chapter.
-
-### `autoagent/util.py`
-
-The `if` class in [`autoagent/util.py`](https://github.com/HKUDS/AutoAgent/blob/HEAD/autoagent/util.py) handles a key part of this chapter's functionality:
-
-```py
-from prompt_toolkit.styles import Style
-def debug_print_swarm(debug: bool, *args: str) -> None:
- if not debug:
- return
- timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
- message = " ".join(map(str, args))
- print(f"\033[97m[\033[90m{timestamp}\033[97m]\033[90m {message}\033[0m")
-def print_in_box(text: str, console: Optional[Console] = None, title: str = "", color: str = "white") -> None:
- """
- Print the text in a box.
- :param text: the text to print.
- :param console: the console to print the text.
- :param title: the title of the box.
- :param color: the border color.
- :return:
- """
- console = console or Console()
-
- # panel = Panel(text, title=title, border_style=color, expand=True, highlight=True)
- # console.print(panel)
- console.print('_'*20 + title + '_'*20, style=f"bold {color}")
- console.print(text, highlight=True, emoji=True)
-
-
-
-def debug_print(debug: bool, *args: str, **kwargs: dict) -> None:
- if not debug:
- return
- timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
- message = "\n".join(map(str, args))
- color = kwargs.get("color", "white")
- title = kwargs.get("title", "")
-```
-
-This class is important because it defines how AutoAgent Tutorial: Zero-Code Agent Creation and Automated Workflow Orchestration implements the patterns covered in this chapter.
-
-### `autoagent/util.py`
-
-The `UserCompleter` class in [`autoagent/util.py`](https://github.com/HKUDS/AutoAgent/blob/HEAD/autoagent/util.py) handles a key part of this chapter's functionality:
-
-```py
-
-
-class UserCompleter(Completer):
-
- def __init__(self, users: List[str]):
- super().__init__()
- self.users = users
- def get_completions(self, document, complete_event):
- word = document.get_word_before_cursor()
-
- if word.startswith('@'):
- prefix = word[1:] # 去掉@
- for user in self.users:
- if user.startswith(prefix):
- yield Completion(
- user,
- start_position=-len(prefix),
- style='fg:blue bold' # 蓝色加粗
- )
-def pretty_print_messages(message, **kwargs) -> None:
- # for message in messages:
- if message["role"] != "assistant" and message["role"] != "tool":
- return
- console = Console()
- if message["role"] == "tool":
- console.print("[bold blue]tool execution:[/bold blue]", end=" ")
- console.print(f"[bold purple]{message['name']}[/bold purple], result: {message['content']}")
- log_path = kwargs.get("log_path", None)
- if log_path:
- with open(log_path, 'a') as file:
- file.write(f"tool execution: {message['name']}, result: {message['content']}\n")
- return
-```
-
-This class is important because it defines how AutoAgent Tutorial: Zero-Code Agent Creation and Automated Workflow Orchestration implements the patterns covered in this chapter.
-
-### `autoagent/util.py`
-
-The `debug_print_swarm` function in [`autoagent/util.py`](https://github.com/HKUDS/AutoAgent/blob/HEAD/autoagent/util.py) handles a key part of this chapter's functionality:
-
-```py
-from prompt_toolkit.formatted_text import HTML
-from prompt_toolkit.styles import Style
-def debug_print_swarm(debug: bool, *args: str) -> None:
- if not debug:
- return
- timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
- message = " ".join(map(str, args))
- print(f"\033[97m[\033[90m{timestamp}\033[97m]\033[90m {message}\033[0m")
-def print_in_box(text: str, console: Optional[Console] = None, title: str = "", color: str = "white") -> None:
- """
- Print the text in a box.
- :param text: the text to print.
- :param console: the console to print the text.
- :param title: the title of the box.
- :param color: the border color.
- :return:
- """
- console = console or Console()
-
- # panel = Panel(text, title=title, border_style=color, expand=True, highlight=True)
- # console.print(panel)
- console.print('_'*20 + title + '_'*20, style=f"bold {color}")
- console.print(text, highlight=True, emoji=True)
-
-
-
-def debug_print(debug: bool, *args: str, **kwargs: dict) -> None:
- if not debug:
- return
- timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
- message = "\n".join(map(str, args))
- color = kwargs.get("color", "white")
-```
-
-This function is important because it defines how AutoAgent Tutorial: Zero-Code Agent Creation and Automated Workflow Orchestration implements the patterns covered in this chapter.
-
-
-## How These Components Connect
-
-```mermaid
-flowchart TD
- A[values]
- B[if]
- C[UserCompleter]
- D[debug_print_swarm]
- E[print_in_box]
- A --> B
- B --> C
- C --> D
- D --> E
-```
diff --git a/tutorials/autoagent-tutorial/04-user-mode-deep-research.md b/tutorials/autoagent-tutorial/04-user-mode-deep-research.md
new file mode 100644
index 00000000..002b7665
--- /dev/null
+++ b/tutorials/autoagent-tutorial/04-user-mode-deep-research.md
@@ -0,0 +1,435 @@
+---
+layout: default
+title: "Chapter 4: User Mode: Deep Research System"
+nav_order: 4
+parent: AutoAgent Tutorial
+format_version: v2
+why: "User Mode is the most frequently used AutoAgent capability. Understanding how SystemTriageAgent routes between WebSurferAgent, FileSurferAgent, and ProgrammingAgent — and how case_resolved signals termination — lets you write better prompts and diagnose when the system gets stuck in routing loops."
+mental_model: "SystemTriageAgent is a dispatcher: it analyzes your request, transfers control to the right specialist, waits for the specialist to return, then decides whether the task is done or needs another specialist. The transfer_to_X() functions are the routing mechanism."
+learning_outcomes:
+ - Understand how SystemTriageAgent routes between specialist agents using transfer_to_X() handoffs
+ - Know what WebSurferAgent, FileSurferAgent, and ProgrammingAgent each do
+ - Use the @mention syntax to route directly to a specific agent
+ - Interpret case_resolved vs case_not_resolved signals for task completion
+ - Understand AutoAgent's GAIA benchmark performance claims
+snapshot:
+ source_repo: https://github.com/HKUDS/AutoAgent
+ stars: 9116
+ language: Python
+ license: MIT
+chapter_map:
+ - autoagent/system_triage_agent.py
+ - autoagent/websurfer_agent.py
+ - autoagent/filesurfer_agent.py
+ - autoagent/programming_agent.py
+ - autoagent/inner.py
+sources:
+ - https://github.com/HKUDS/AutoAgent
+ - https://arxiv.org/abs/2502.05957
+---
+
+# Chapter 4: User Mode: Deep Research System
+
+## What Problem Does This Solve?
+
+General-purpose research tasks don't fit neatly into a single tool. A question like "What is the latest Python performance benchmark and how does it compare to the 2023 results?" requires:
+
+1. Web search and browsing to find current benchmarks
+2. Document reading to parse PDFs or DOCX files
+3. Code execution to run statistical comparisons
+4. File writing to save the final report
+
+A single agent trying to do all of this becomes confused about which tool to use when. AutoAgent solves this with a **triage + specialist** architecture: `SystemTriageAgent` handles routing, and four specialist agents each do one thing well.
+
+---
+
+## The Agent Graph
+
+```mermaid
+flowchart TD
+ U[User Request] --> STA[SystemTriageAgent\nOrchestrator]
+
+ STA -->|web browsing / search| WSA[WebSurferAgent\nPlaywright + Screenshots]
+ STA -->|file reading / parsing| FSA[FileSurferAgent\nMarkdownBrowser]
+ STA -->|code / computation| PA[ProgrammingAgent\nDockerEnv]
+
+ WSA -->|transfer_back| STA
+ FSA -->|transfer_back| STA
+ PA -->|transfer_back| STA
+
+ STA -->|task complete| CR[case_resolved]
+ STA -->|task failed| CNR[case_not_resolved]
+
+ CR --> End([Return Response])
+ CNR --> End
+```
+
+Each specialist agent has access only to the tools it needs. This keeps tool schemas small and reduces LLM confusion about which tool to call.
+
+---
+
+## SystemTriageAgent (`system_triage_agent.py`)
+
+### Role
+
+`SystemTriageAgent` is the entry point for all User Mode interactions. It:
+
+1. Analyzes the user's request
+2. Decides which specialist(s) are needed
+3. Transfers control via `transfer_to_X()` functions
+4. Receives results back and synthesizes a final answer
+5. Calls `case_resolved` when the task is complete
+
+### Transfer Functions
+
+The transfer functions are injected into `SystemTriageAgent`'s function list at initialization:
+
+```python
+# autoagent/system_triage_agent.py
+
+from autoagent.types import Agent, Result
+
+def transfer_to_websurfer(context_variables: dict) -> Result:
+ """Transfer to WebSurferAgent for web browsing and search tasks.
+
+ Use when: the task requires browsing websites, searching the web,
+ or extracting information from online sources.
+ """
+ return Result(
+ value="Transferring to WebSurferAgent for web research",
+ agent=websurfer_agent,
+ )
+
+def transfer_to_filesurfer(context_variables: dict) -> Result:
+ """Transfer to FileSurferAgent for reading local files and documents.
+
+ Use when: the task involves reading PDFs, DOCX, or other local files.
+ """
+ return Result(
+ value="Transferring to FileSurferAgent for document reading",
+ agent=filesurfer_agent,
+ )
+
+def transfer_to_programming(context_variables: dict) -> Result:
+ """Transfer to ProgrammingAgent for code execution and data analysis.
+
+ Use when: the task requires writing or running Python code.
+ """
+ return Result(
+ value="Transferring to ProgrammingAgent",
+ agent=programming_agent,
+ )
+
+def case_resolved(context_variables: dict, summary: str) -> Result:
+ """Signal that the task has been successfully completed."""
+ return Result(value=f"CASE_RESOLVED: {summary}")
+
+def case_not_resolved(context_variables: dict, reason: str) -> Result:
+ """Signal that the task could not be completed."""
+ return Result(value=f"CASE_NOT_RESOLVED: {reason}")
+
+system_triage_agent = Agent(
+ name="SystemTriageAgent",
+ model="gpt-4o",
+ instructions="""You are a research coordinator. Analyze user requests and
+ route them to the appropriate specialist. After the specialist completes
+ their work, synthesize the results and call case_resolved with a summary.
+
+ Always route to specialists rather than attempting the task directly.
+ """,
+ functions=[
+ transfer_to_websurfer,
+ transfer_to_filesurfer,
+ transfer_to_programming,
+ case_resolved,
+ case_not_resolved,
+ ],
+)
+```
+
+### Handoff Flow in Detail
+
+When `SystemTriageAgent` calls `transfer_to_websurfer()`, the `MetaChain` run loop detects the `Result.agent` field and switches the active agent:
+
+```mermaid
+sequenceDiagram
+ participant MC as MetaChain
+ participant STA as SystemTriageAgent
+ participant WSA as WebSurferAgent
+
+ MC->>STA: LLM call: "Research Python benchmarks"
+ STA-->>MC: tool_call: transfer_to_websurfer()
+ MC->>MC: handle_tool_calls() → Result(agent=websurfer_agent)
+ MC->>MC: active_agent = websurfer_agent
+ MC->>WSA: LLM call with same conversation history
+ WSA-->>MC: tool_calls: browse_web(), scroll_page(), etc.
+ MC->>MC: Execute browser tools, append results
+ WSA-->>MC: tool_call: transfer_back_to_triage()
+ MC->>MC: active_agent = system_triage_agent
+ MC->>STA: LLM call with accumulated results
+ STA-->>MC: tool_call: case_resolved(summary=...)
+ MC->>MC: Terminate loop
+```
+
+The key insight: the conversation history persists across handoffs. When control returns to `SystemTriageAgent`, it sees all the messages from `WebSurferAgent`'s work and can synthesize them.
+
+---
+
+## WebSurferAgent (`websurfer_agent.py`)
+
+### Capabilities
+
+`WebSurferAgent` controls the `BrowserEnv` (Playwright) to navigate websites and extract information:
+
+```python
+# autoagent/websurfer_agent.py (tool functions)
+
+def browse_web(url: str, context_variables: dict) -> str:
+ """Navigate to a URL and return page content with screenshot reference."""
+ web_env: BrowserEnv = context_variables["web_env"]
+ obs = web_env.navigate(url)
+ return f"URL: {obs.url}\n\nContent:\n{obs.content[:4000]}"
+
+def search_web(query: str, context_variables: dict) -> str:
+ """Search the web using the browser."""
+ web_env: BrowserEnv = context_variables["web_env"]
+ search_url = f"https://www.google.com/search?q={quote(query)}"
+ obs = web_env.navigate(search_url)
+ return obs.content[:4000]
+
+def scroll_down(context_variables: dict) -> str:
+ """Scroll down on the current page."""
+ web_env: BrowserEnv = context_variables["web_env"]
+ web_env.page.keyboard.press("PageDown")
+ obs = web_env._get_observation()
+ return obs.content[:2000]
+
+def click_element(selector: str, context_variables: dict) -> str:
+ """Click an element on the current page."""
+ web_env: BrowserEnv = context_variables["web_env"]
+ obs = web_env.click(selector)
+ return obs.content[:2000]
+
+def transfer_back_to_triage(context_variables: dict, summary: str) -> Result:
+ """Return to SystemTriageAgent with research results."""
+ return Result(
+ value=f"WebSurfer completed: {summary}",
+ agent=system_triage_agent,
+ )
+```
+
+### Multimodal Screenshot Loop
+
+For visual navigation tasks, `WebSurferAgent` uses GPT-4V-style message construction:
+
+```python
+# autoagent/websurfer_agent.py
+
+def get_visual_observation(context_variables: dict) -> list[dict]:
+ """Return current page screenshot as a multimodal message part."""
+ web_env: BrowserEnv = context_variables["web_env"]
+ obs = web_env._get_observation()
+
+ # Encode screenshot as base64 for vision models
+ screenshot_b64 = base64.b64encode(obs.screenshot).decode()
+
+ return [
+ {
+ "type": "image_url",
+ "image_url": {
+ "url": f"data:image/png;base64,{screenshot_b64}",
+ "detail": "high",
+ }
+ },
+ {
+ "type": "text",
+ "text": f"Current URL: {obs.url}\n\nPage content summary:\n{obs.content[:1000]}"
+ }
+ ]
+```
+
+This allows `WebSurferAgent` to navigate pages that require visual understanding (CAPTCHA-free sites, pages with complex layouts, image-heavy content).
+
+---
+
+## FileSurferAgent (`filesurfer_agent.py`)
+
+### Capabilities
+
+`FileSurferAgent` uses `RequestsMarkdownBrowser` for document reading and file operations:
+
+```python
+# autoagent/filesurfer_agent.py (tool functions)
+
+def read_file(file_path: str, context_variables: dict) -> str:
+ """Read a file from the workspace, converting to Markdown."""
+ file_env: RequestsMarkdownBrowser = context_variables["file_env"]
+ return file_env.visit_page(file_path)
+
+def page_down_file(context_variables: dict) -> str:
+ """Scroll to the next page of the current document."""
+ file_env: RequestsMarkdownBrowser = context_variables["file_env"]
+ return file_env.page_down()
+
+def list_workspace_files(context_variables: dict) -> str:
+ """List all files in the workspace directory."""
+ workspace = context_variables.get("workspace", "./workspace")
+ files = []
+ for path in Path(workspace).rglob("*"):
+ if path.is_file():
+ files.append(str(path.relative_to(workspace)))
+ return "\n".join(files)
+
+def write_file(file_path: str, content: str, context_variables: dict) -> str:
+ """Write content to a file in the workspace."""
+ workspace = context_variables.get("workspace", "./workspace")
+ full_path = Path(workspace) / file_path
+ full_path.parent.mkdir(parents=True, exist_ok=True)
+ full_path.write_text(content)
+ return f"Written to {full_path}"
+```
+
+### File Upload Workflow
+
+Users can upload files for analysis via the workspace directory:
+
+```bash
+# Copy a file into the workspace before starting the session
+cp my_research_paper.pdf workspace/
+
+# Then in AutoAgent:
+# AutoAgent> Summarize the PDF in workspace/my_research_paper.pdf
+```
+
+`FileSurferAgent` uses `RequestsMarkdownBrowser._convert_pdf()` to extract text and then processes it page by page within the LLM's context window.
+
+---
+
+## ProgrammingAgent (`programming_agent.py`)
+
+### Capabilities
+
+`ProgrammingAgent` writes and executes Python code in the Docker sandbox:
+
+```python
+# autoagent/programming_agent.py (tool functions)
+
+def execute_python(code: str, context_variables: dict) -> str:
+ """Execute Python code in the Docker sandbox.
+
+ The sandbox maintains state between calls — variables and imports
+ persist within a session.
+ """
+ code_env: DockerEnv = context_variables["code_env"]
+ stdout, stderr, result = code_env.execute_code(code)
+
+ output = ""
+ if stdout:
+ output += f"STDOUT:\n{stdout}"
+ if stderr:
+ output += f"\nSTDERR:\n{stderr}"
+ if result:
+ output += f"\nRESULT: {result}"
+
+ return output or "Code executed successfully (no output)"
+
+def install_package(package: str, context_variables: dict) -> str:
+ """Install a Python package in the Docker sandbox."""
+ code_env: DockerEnv = context_variables["code_env"]
+ install_code = f"import subprocess; subprocess.run(['pip', 'install', '{package}'], capture_output=True)"
+ stdout, stderr, _ = code_env.execute_code(install_code)
+ return f"Installed {package}"
+
+def list_workspace_contents(context_variables: dict) -> str:
+ """List files in the mounted workspace directory."""
+ code_env: DockerEnv = context_variables["code_env"]
+ stdout, _, _ = code_env.execute_code("import os; print(os.listdir('/workspace'))")
+ return stdout
+```
+
+### Iterative Code Refinement
+
+When code fails, `ProgrammingAgent` retries with error context in the conversation history:
+
+```
+[ProgrammingAgent] Writing code to parse CSV...
+[Tool: execute_python]
+ STDERR: ImportError: No module named 'pandas'
+
+[ProgrammingAgent] Need to install pandas first
+[Tool: install_package] package=pandas
+[Tool: execute_python] (retry with same code)
+ STDOUT: Parsed 1000 rows successfully
+```
+
+This happens naturally through the conversation history — no special retry logic is needed in the agent code itself.
+
+---
+
+## Direct Agent Routing with @mention
+
+The `@AgentName` syntax in `inner.py` allows bypassing `SystemTriageAgent`:
+
+```python
+# autoagent/inner.py (simplified)
+
+def parse_user_input(message: str, registered_agents: dict) -> tuple[Agent, str]:
+ """Check if the message starts with @AgentName and route directly."""
+ if message.startswith("@"):
+ parts = message.split(" ", 1)
+ agent_name = parts[0][1:] # Strip the @
+ actual_message = parts[1] if len(parts) > 1 else ""
+
+ if agent_name in registered_agents:
+ return registered_agents[agent_name], actual_message
+
+ # Default: route through SystemTriageAgent
+ return system_triage_agent, message
+```
+
+Examples:
+
+```
+# Route directly to WebSurferAgent
+AutoAgent> @WebSurferAgent find the latest PyPI release of litellm
+
+# Route directly to ProgrammingAgent
+AutoAgent> @ProgrammingAgent run this code: import sys; print(sys.version)
+
+# Route directly to a custom registered agent
+AutoAgent> @SalesAgent recommend a product for a $50 budget in electronics
+```
+
+---
+
+## GAIA Benchmark Performance
+
+The academic paper (arxiv:2502.05957) evaluates AutoAgent on the GAIA benchmark, which tests general AI assistants on real-world tasks requiring multi-step reasoning across web, file, and code capabilities:
+
+| GAIA Level | Task Type | AutoAgent Performance |
+|------------|-----------|----------------------|
+| Level 1 | Simple factual lookups | ~85% |
+| Level 2 | Multi-step reasoning with tools | ~67% |
+| Level 3 | Complex multi-source synthesis | ~40% |
+
+GAIA Level 1 tasks are single-step (e.g., "What is the capital of France?"). Level 3 tasks require chaining 5-10 tool calls across multiple sources with complex reasoning.
+
+The benchmark is run via `evaluation/gaia/run_infer.py` — Chapter 8 covers the evaluation infrastructure in detail.
+
+---
+
+## Summary
+
+| Component | File | Role |
+|-----------|------|------|
+| `SystemTriageAgent` | `system_triage_agent.py` | Orchestrator: routes to specialists, synthesizes results |
+| `WebSurferAgent` | `websurfer_agent.py` | Web browsing via Playwright + multimodal screenshots |
+| `FileSurferAgent` | `filesurfer_agent.py` | Document reading via MarkdownBrowser + file writing |
+| `ProgrammingAgent` | `programming_agent.py` | Python code execution via DockerEnv |
+| `transfer_to_X()` | All agent files | Agent handoff via `Result(agent=next_agent)` |
+| `case_resolved` | `system_triage_agent.py` | Task completion signal |
+| `case_not_resolved` | `system_triage_agent.py` | Task failure signal |
+| `@mention` routing | `inner.py` | Bypass triage, route directly to named agent |
+| GAIA benchmark | `evaluation/gaia/` | Multi-level task evaluation (Levels 1-3) |
+
+Continue to [Chapter 5: Agent Editor: From NL to Deployed Agents](./05-agent-editor-nl-to-deployed-agents.md) to learn how the 4-phase pipeline generates, tests, and registers new agents from natural language descriptions.
diff --git a/tutorials/autoagent-tutorial/05-agent-editor-nl-to-deployed-agents.md b/tutorials/autoagent-tutorial/05-agent-editor-nl-to-deployed-agents.md
new file mode 100644
index 00000000..1f07f796
--- /dev/null
+++ b/tutorials/autoagent-tutorial/05-agent-editor-nl-to-deployed-agents.md
@@ -0,0 +1,576 @@
+---
+layout: default
+title: "Chapter 5: Agent Editor: From NL to Deployed Agents"
+nav_order: 5
+parent: AutoAgent Tutorial
+format_version: v2
+why: "The Agent Editor is AutoAgent's most distinctive capability: describing an agent in natural language and having it fully implemented, tested, and deployed. Understanding the 4-phase pipeline and the XML form schema lets you craft descriptions that generate high-quality agents and debug when generation fails."
+mental_model: "The 4-phase pipeline acts like a mini software team: AgentFormerAgent is the requirements analyst (NL → XML spec), ToolEditorAgent is the developer (XML spec → tested Python tools), AgentCreatorAgent is the architect (tools → orchestrator agent code), and the registry is the deployment platform."
+learning_outcomes:
+ - Write natural language agent descriptions that produce well-formed XML forms
+ - Understand the parse_agent_form() Pydantic validation and retry logic
+ - Know how ToolEditorAgent generates, tests, and retries tool code in Docker
+ - Understand how AgentCreatorAgent generates orchestrator agents with auto-injected transfer functions
+ - Configure GITHUB_AI_TOKEN correctly for Agent Editor to work
+snapshot:
+ source_repo: https://github.com/HKUDS/AutoAgent
+ stars: 9116
+ language: Python
+ license: MIT
+chapter_map:
+ - autoagent/agent_former.py
+ - autoagent/form_complie.py
+ - autoagent/agent_creator.py
+ - autoagent/tool_editor.py
+ - autoagent/edit_agents.py
+ - autoagent/edit_tools.py
+sources:
+ - https://github.com/HKUDS/AutoAgent
+ - https://arxiv.org/abs/2502.05957
+---
+
+# Chapter 5: Agent Editor: From NL to Deployed Agents
+
+## What Problem Does This Solve?
+
+Writing a production-quality agent requires:
+- Defining tool schemas and implementations
+- Testing tools in isolation before wiring them to an agent
+- Generating correct orchestrator code that routes between tools
+- Registering everything in a discoverable registry
+- Handling errors and retrying during code generation
+
+The Agent Editor automates this entire pipeline. You write one sentence describing what your agent should do, and AutoAgent generates the full implementation, tests it, and deploys it — ready to use in your next `auto main` session.
+
+---
+
+## The 4-Phase Pipeline
+
+```mermaid
+flowchart TD
+ NL["Natural Language Description\n'Create a sales agent that recommends\nproducts based on budget and category'"]
+
+ subgraph "Phase 1: Requirements"
+ AFA["AgentFormerAgent\nNL → XML form"]
+ PF["parse_agent_form()\nPydantic validation"]
+ end
+
+ subgraph "Phase 2: Tool Generation"
+ TEA["ToolEditorAgent\nXML → Python tool code"]
+ DT["DockerEnv testing\n3x retry on failure"]
+ end
+
+ subgraph "Phase 3: Agent Code Generation"
+ ACA["AgentCreatorAgent\ntools → orchestrator agent"]
+ PTA["protect_tools()\nsafety wrapper"]
+ end
+
+ subgraph "Phase 4: Deployment"
+ REG["@register_plugin_agent\nRegistry + 12k token cap"]
+ RUN["run_agent()"]
+ end
+
+ NL --> AFA
+ AFA --> PF
+ PF -->|valid| TEA
+ PF -->|invalid| AFA
+ TEA --> DT
+ DT -->|pass| ACA
+ DT -->|fail, retry| TEA
+ ACA --> PTA
+ PTA --> REG
+ REG --> RUN
+```
+
+---
+
+## Phase 1: AgentFormerAgent and the XML Form
+
+### Role
+
+`AgentFormerAgent` converts your natural language description into a structured XML form that specifies:
+- Agent name and description
+- Required tools (new vs existing)
+- Tool input/output specifications
+- Agent input parameters
+
+### XML Form Schema
+
+```xml
+
+
+
+ SalesAgent
+ Recommends products based on user budget and category preferences
+
+
+ recommend_product
+ Find products matching budget and category criteria
+
+
+
+
+
+
+
+
+ get_product_details
+ Get detailed information about a specific product
+
+
+
+
+
+
+
+ search_web
+
+
+ user_request
+ budget
+
+
+
+```
+
+### parse_agent_form() Validation
+
+```python
+# autoagent/form_complie.py
+
+from pydantic import BaseModel, validator
+
+class ToolSpec(BaseModel):
+ name: str
+ description: str
+ inputs: list[dict]
+ output: str
+
+class AgentSpec(BaseModel):
+ name: str
+ description: str
+ new_tools: list[ToolSpec] = []
+ existing_tools: list[str] = []
+ agent_input: list[str] = []
+
+ @validator("name")
+ def name_must_be_valid_identifier(cls, v):
+ if not v.replace("_", "").replace("-", "").isalnum():
+ raise ValueError(f"Agent name '{v}' is not a valid Python identifier")
+ return v
+
+def parse_agent_form(xml_str: str, max_retries: int = 3) -> AgentSpec:
+ """Parse and validate agent XML form with retry logic.
+
+ If parsing fails, returns the error for AgentFormerAgent to fix.
+ """
+ for attempt in range(max_retries):
+ try:
+ root = ET.fromstring(xml_str)
+ agent_elem = root.find("agent")
+
+ spec = AgentSpec(
+ name=agent_elem.findtext("name", ""),
+ description=agent_elem.findtext("description", ""),
+ new_tools=[
+ ToolSpec(
+ name=t.findtext("name", ""),
+ description=t.findtext("description", ""),
+ inputs=[
+ {i.get("name"): {"type": i.get("type"), "description": i.get("description")}}
+ for i in t.findall(".//input")
+ ],
+ output=t.findtext("output", ""),
+ )
+ for t in root.findall(".//tools[@category='new']/tool")
+ ],
+ existing_tools=[
+ t.findtext("name", "")
+ for t in root.findall(".//tools[@category='existing']/tool")
+ ],
+ agent_input=[k.text for k in agent_elem.findall(".//key")],
+ )
+ return spec
+
+ except (ET.ParseError, ValidationError) as e:
+ if attempt == max_retries - 1:
+ raise
+ # Will be fed back to AgentFormerAgent as error context
+
+ raise RuntimeError("Failed to parse agent form after max retries")
+```
+
+---
+
+## Phase 2: ToolEditorAgent (`tool_editor.py`)
+
+### Role
+
+`ToolEditorAgent` takes the `AgentSpec` and generates Python code for each new tool, then tests it in Docker. If tests fail, it retries up to 3 times with the error context.
+
+### Tool Code Generation Pattern
+
+```python
+# autoagent/tool_editor.py (simplified)
+
+def generate_tool_code(spec: ToolSpec, model: str) -> str:
+ """Generate Python tool implementation from a ToolSpec."""
+ prompt = f"""Generate a Python function for this tool:
+
+Name: {spec.name}
+Description: {spec.description}
+Inputs: {json.dumps(spec.inputs, indent=2)}
+Expected output: {spec.output}
+
+Requirements:
+1. Use @register_plugin_tool decorator
+2. Include comprehensive docstring
+3. Handle errors gracefully
+4. Return a string
+"""
+ response = litellm.completion(
+ model=model,
+ messages=[{"role": "user", "content": prompt}],
+ )
+ return extract_code_block(response.choices[0].message.content)
+
+def test_tool_in_docker(
+ tool_code: str,
+ code_env: DockerEnv,
+ max_retries: int = 3,
+) -> tuple[bool, str]:
+ """Test generated tool code in Docker, returning (success, error_msg)."""
+ for attempt in range(max_retries):
+ # Write tool to temp file
+ test_code = f"""
+{tool_code}
+
+# Basic smoke test
+result = {extract_function_name(tool_code)}.__wrapped__()
+print(f"Test passed: {{result[:100]}}")
+"""
+ stdout, stderr, _ = code_env.execute_code(test_code)
+
+ if stderr and "Error" in stderr:
+ if attempt < max_retries - 1:
+ # Will regenerate with error context
+ error_context = stderr
+ continue
+ return False, stderr
+
+ return True, stdout
+
+ return False, "Max retries exceeded"
+```
+
+### Generated Tool Code Pattern
+
+Tools generated by `ToolEditorAgent` follow a consistent pattern:
+
+```python
+# Example generated by ToolEditorAgent
+# Saved to: workspace/tools/recommend_product.py
+
+from autoagent.registry import register_plugin_tool
+
+@register_plugin_tool
+def recommend_product(
+ budget: float,
+ category: str,
+ preferences: str = "",
+) -> str:
+ """Find products matching budget and category criteria.
+
+ Args:
+ budget: Maximum price in USD
+ category: Product category (electronics, clothing, etc)
+ preferences: Additional user preferences
+
+ Returns:
+ A JSON list of recommended products with name, price, and reason
+ """
+ # Generated implementation
+ import json
+
+ # Simulated product database lookup
+ products = search_product_database(category, max_price=budget)
+
+ recommendations = [
+ {
+ "name": p["name"],
+ "price": p["price"],
+ "reason": f"Matches your {category} preference within ${budget} budget"
+ }
+ for p in products[:5]
+ ]
+
+ return json.dumps(recommendations, indent=2)
+```
+
+The `@register_plugin_tool` decorator automatically:
+1. Registers the tool in the global registry under `plugin_tools` namespace
+2. Wraps the function with `truncate_output()` to cap output at 12,000 tokens
+
+---
+
+## Phase 3: AgentCreatorAgent (`agent_creator.py`)
+
+### Role
+
+`AgentCreatorAgent` assembles the tested tools into a fully functional orchestrator agent. It generates:
+1. A Python agent module with all tool imports
+2. Auto-generated `transfer_to_X()` functions for each tool
+3. An orchestrator agent that uses the tools
+4. Optional sub-agents if the spec requires them
+
+### create_agent() Function
+
+```python
+# autoagent/agent_creator.py
+
+def create_agent(spec: AgentSpec, tools: list[Callable]) -> Agent:
+ """Create a single-agent that directly calls all provided tools."""
+ return Agent(
+ name=spec.name,
+ model="gpt-4o",
+ instructions=f"""You are {spec.name}. {spec.description}
+
+You have access to the following tools: {[t.__name__ for t in tools]}
+
+Use them to fulfill user requests. Be concise and accurate.
+""",
+ functions=tools + [case_resolved, case_not_resolved],
+ )
+
+def create_orchestrator_agent(
+ spec: AgentSpec,
+ sub_agents: list[Agent],
+ tools: list[Callable],
+) -> Agent:
+ """Create an orchestrator agent that routes to sub-agents.
+
+ Auto-generates transfer_to_X() functions for each sub-agent.
+ """
+ transfer_functions = []
+
+ for sub_agent in sub_agents:
+ # Dynamically generate transfer function
+ def make_transfer(target_agent):
+ def transfer(context_variables: dict) -> Result:
+ f"""Transfer to {target_agent.name}."""
+ return Result(
+ value=f"Transferring to {target_agent.name}",
+ agent=target_agent,
+ )
+ transfer.__name__ = f"transfer_to_{target_agent.name.lower()}"
+ transfer.__doc__ = f"Use when the task requires {target_agent.name} capabilities"
+ return transfer
+
+ transfer_functions.append(make_transfer(sub_agent))
+
+ return Agent(
+ name=f"{spec.name}Orchestrator",
+ model="gpt-4o",
+ instructions=f"""You are the orchestrator for {spec.name}.
+
+Route tasks to the appropriate sub-agent:
+{chr(10).join(f'- {a.name}: {a.instructions[:100]}' for a in sub_agents)}
+""",
+ functions=transfer_functions + tools + [case_resolved, case_not_resolved],
+ )
+```
+
+### Generated Agent Code Pattern
+
+The full agent code generated and saved to workspace:
+
+```python
+# workspace/agents/sales_agent.py (generated by AgentCreatorAgent)
+
+from autoagent.registry import register_plugin_agent
+from autoagent.types import Agent, Result
+from workspace.tools.recommend_product import recommend_product
+from workspace.tools.get_product_details import get_product_details
+from autoagent.tools.search_tools import search_web
+
+def case_resolved(context_variables: dict, summary: str) -> Result:
+ return Result(value=f"CASE_RESOLVED: {summary}")
+
+def case_not_resolved(context_variables: dict, reason: str) -> Result:
+ return Result(value=f"CASE_NOT_RESOLVED: {reason}")
+
+@register_plugin_agent
+def create_sales_agent() -> Agent:
+ """Factory function for SalesAgent."""
+ return Agent(
+ name="SalesAgent",
+ model="gpt-4o",
+ instructions="""You are SalesAgent. Recommend products based on user budget
+ and category preferences. Use recommend_product to find options,
+ get_product_details for specifics, and search_web for current prices.
+ Call case_resolved when you've provided recommendations.""",
+ functions=[
+ recommend_product,
+ get_product_details,
+ search_web,
+ case_resolved,
+ case_not_resolved,
+ ],
+ )
+```
+
+---
+
+## Phase 4: Registry and Deployment
+
+### Registry Namespace Structure
+
+```mermaid
+flowchart TD
+ REG["Global Registry\n(singleton)"]
+
+ subgraph "plugin_tools namespace"
+ PT1["recommend_product\n(truncate_output wrapped)"]
+ PT2["get_product_details\n(truncate_output wrapped)"]
+ PT3["fetch_news_headlines\n(truncate_output wrapped)"]
+ end
+
+ subgraph "plugin_agents namespace"
+ PA1["create_sales_agent\n(factory function)"]
+ PA2["create_research_agent\n(factory function)"]
+ end
+
+ subgraph "workflows namespace"
+ WF1["math_solver_workflow\n(file path)"]
+ WF2["batch_research_workflow\n(file path)"]
+ end
+
+ REG --> PT1
+ REG --> PT2
+ REG --> PT3
+ REG --> PA1
+ REG --> PA2
+ REG --> WF1
+ REG --> WF2
+
+ TM["ToolMemory\n(ChromaDB)"] -.->|indexed from| PT1
+ TM -.->|indexed from| PT2
+ TM -.->|indexed from| PT3
+```
+
+### @register_plugin_agent Decorator
+
+```python
+# autoagent/registry.py (decorator behavior)
+
+def register_plugin_agent(factory_func: Callable) -> Callable:
+ """Register an agent factory in the global registry.
+
+ The factory function is stored, not the agent instance, to allow
+ fresh instantiation each time the agent is used.
+ """
+ _registry["plugin_agents"][factory_func.__name__] = factory_func
+ return factory_func
+```
+
+### Registry Introspection in Docker
+
+During Agent Editor, the framework queries the registry from inside the Docker container to get the live catalog of available tools:
+
+```python
+# autoagent/edit_agents.py
+
+def get_available_tools_catalog(code_env: DockerEnv) -> str:
+ """Query the registry from inside Docker for live tool catalog."""
+ catalog_code = """
+from autoagent.registry import get_registry
+registry = get_registry()
+tools = list(registry['plugin_tools'].keys())
+print('\\n'.join(tools))
+"""
+ stdout, _, _ = code_env.execute_code(catalog_code)
+ return stdout.strip()
+```
+
+This ensures `AgentCreatorAgent` knows exactly which tools are available when it decides which existing tools to reuse vs which new ones to generate.
+
+### protect_tools() Safety Wrapper
+
+Before registering generated tools, `protect_tools()` adds safety checks:
+
+```python
+# autoagent/edit_tools.py
+
+def protect_tools(tools: list[Callable]) -> list[Callable]:
+ """Wrap tools with safety checks before registry insertion.
+
+ - Validates tool output is a string
+ - Catches and formats exceptions instead of propagating
+ - Ensures tools don't modify context_variables unexpectedly
+ """
+ protected = []
+ for tool in tools:
+ @wraps(tool)
+ def safe_tool(*args, _original=tool, **kwargs):
+ try:
+ result = _original(*args, **kwargs)
+ if not isinstance(result, str):
+ result = str(result)
+ return result
+ except Exception as e:
+ return f"Tool error in {_original.__name__}: {type(e).__name__}: {e}"
+
+ protected.append(safe_tool)
+ return protected
+```
+
+---
+
+## GITHUB_AI_TOKEN Requirement
+
+The Agent Editor requires a `GITHUB_AI_TOKEN` because it clones the AutoAgent repository into the Docker container for self-modification:
+
+```python
+# autoagent/edit_agents.py (simplified)
+
+def setup_self_modification(code_env: DockerEnv, github_token: str) -> bool:
+ """Clone AutoAgent repo into Docker for meta-programming capabilities."""
+ clone_code = f"""
+import subprocess
+result = subprocess.run(
+ ['git', 'clone',
+ 'https://{github_token}@github.com/HKUDS/AutoAgent.git',
+ '/autoagent'],
+ capture_output=True, text=True
+)
+print('Clone successful' if result.returncode == 0 else result.stderr)
+"""
+ stdout, stderr, _ = code_env.execute_code(clone_code)
+ return "Clone successful" in stdout
+```
+
+Without this token, the Agent Editor will fail with:
+
+```
+Error: GITHUB_AI_TOKEN not set. Agent Editor requires GitHub access for self-modification.
+Set GITHUB_AI_TOKEN in your .env file to use this feature.
+```
+
+---
+
+## Summary
+
+| Component | File | Role |
+|-----------|------|------|
+| `AgentFormerAgent` | `agent_former.py` | Phase 1: NL → XML agent form |
+| `parse_agent_form()` | `form_complie.py` | Phase 1: XML validation with Pydantic + retry |
+| `ToolEditorAgent` | `tool_editor.py` | Phase 2: XML → Python tools + Docker testing |
+| `AgentCreatorAgent` | `agent_creator.py` | Phase 3: tools → orchestrator agent code |
+| `create_agent()` | `agent_creator.py` | Simple agent factory for single-level agents |
+| `create_orchestrator_agent()` | `agent_creator.py` | Multi-level agent with auto transfer functions |
+| `@register_plugin_agent` | `registry.py` | Phase 4: deploy to registry with factory pattern |
+| `protect_tools()` | `edit_tools.py` | Safety wrapper before tool registration |
+| `GITHUB_AI_TOKEN` | `.env` | Required for Docker self-modification |
+| XML form schema | `form_complie.py` | `` |
+
+Continue to [Chapter 6: Workflow Editor: Async Event-Driven Pipelines](./06-workflow-editor-async-pipelines.md) to learn how EventEngine composes async parallel pipelines.
diff --git a/tutorials/autoagent-tutorial/05-tooling-python-api-and-custom-extensions.md b/tutorials/autoagent-tutorial/05-tooling-python-api-and-custom-extensions.md
deleted file mode 100644
index 89336b74..00000000
--- a/tutorials/autoagent-tutorial/05-tooling-python-api-and-custom-extensions.md
+++ /dev/null
@@ -1,223 +0,0 @@
----
-layout: default
-title: "Chapter 5: Tooling, Python API, and Custom Extensions"
-nav_order: 5
-parent: AutoAgent Tutorial
----
-
-
-# Chapter 5: Tooling, Python API, and Custom Extensions
-
-Welcome to **Chapter 5: Tooling, Python API, and Custom Extensions**. In this part of **AutoAgent Tutorial: Zero-Code Agent Creation and Automated Workflow Orchestration**, you will build an intuitive mental model first, then move into concrete implementation details and practical production tradeoffs.
-
-
-This chapter explains extension surfaces for deeper customization.
-
-## Learning Goals
-
-- add custom tools through documented development flows
-- use Python integration paths when CLI flows are insufficient
-- maintain extension quality and safety
-- keep extension logic maintainable under change
-
-## Extension Surfaces
-
-- developer guide for tool creation
-- Python documentation entry points
-- starter-project patterns for custom workflows
-
-## Source References
-
-- [Create Tools Docs](https://autoagent-ai.github.io/docs/dev-guide-create-tools)
-- [Python Docs](https://autoagent-ai.github.io/docs/python)
-- [Starter Projects](https://github.com/HKUDS/AutoAgent/tree/main/docs/docs/Starter-Projects)
-
-## Summary
-
-You now have a path for controlled AutoAgent extensibility.
-
-Next: [Chapter 6: CLI Operations and Provider Strategy](06-cli-operations-and-provider-strategy.md)
-
-## Depth Expansion Playbook
-
-## Source Code Walkthrough
-
-### `autoagent/util.py`
-
-The `get_type_info` function in [`autoagent/util.py`](https://github.com/HKUDS/AutoAgent/blob/HEAD/autoagent/util.py) handles a key part of this chapter's functionality:
-
-```py
-# }
-
-def get_type_info(annotation, base_type_map):
- # 处理基本类型
- if annotation in base_type_map:
- return {"type": base_type_map[annotation]}
-
- # 处理typing类型
- origin = get_origin(annotation)
- if origin is not None:
- args = get_args(annotation)
-
- # 处理List类型
- if origin is list or origin is List:
- item_type = args[0]
- return {
- "type": "array",
- "items": get_type_info(item_type, base_type_map)
- }
-
- # 处理Dict类型
- elif origin is dict or origin is Dict:
- key_type, value_type = args
- if key_type != str:
- raise ValueError("Dictionary keys must be strings")
-
- # 如果value_type是TypedDict或Pydantic模型
- if (hasattr(value_type, "__annotations__") or
- (isinstance(value_type, type) and issubclass(value_type, BaseModel))):
- return get_type_info(value_type, base_type_map)
-
- # 普通Dict类型
-```
-
-This function is important because it defines how AutoAgent Tutorial: Zero-Code Agent Creation and Automated Workflow Orchestration implements the patterns covered in this chapter.
-
-### `autoagent/util.py`
-
-The `function_to_json` function in [`autoagent/util.py`](https://github.com/HKUDS/AutoAgent/blob/HEAD/autoagent/util.py) handles a key part of this chapter's functionality:
-
-```py
-
-
-# def function_to_json(func) -> dict:
-# """
-# Converts a Python function into a JSON-serializable dictionary
-# that describes the function's signature, including its name,
-# description, and parameters.
-
-# Args:
-# func: The function to be converted.
-
-# Returns:
-# A dictionary representing the function's signature in JSON format.
-# """
-# type_map = {
-# str: "string",
-# int: "integer",
-# float: "number",
-# bool: "boolean",
-# list: "array",
-# dict: "object",
-# type(None): "null",
-# }
-
-# try:
-# signature = inspect.signature(func)
-# except ValueError as e:
-# raise ValueError(
-# f"Failed to get signature for function {func.__name__}: {str(e)}"
-# )
-
-# parameters = {}
-```
-
-This function is important because it defines how AutoAgent Tutorial: Zero-Code Agent Creation and Automated Workflow Orchestration implements the patterns covered in this chapter.
-
-### `autoagent/util.py`
-
-The `run_command_in_container_v1` function in [`autoagent/util.py`](https://github.com/HKUDS/AutoAgent/blob/HEAD/autoagent/util.py) handles a key part of this chapter's functionality:
-
-```py
- }
-
-def run_command_in_container_v1(command, stream_callback: Callable = None):
- # TCP parameters
- hostname = 'localhost'
- port = 12345 # TCP port mapped to the container
- buffer_size = 4096
-
- # Create TCP client
- with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
- s.connect((hostname, port))
- s.sendall(command.encode())
- full_response = b""
- while True:
- chunk = s.recv(buffer_size)
- if not chunk:
- break
- full_response += chunk
- if stream_callback:
- stream_callback(chunk)
- if len(chunk) < buffer_size:
- # If the received data is less than the buffer size, it may have been received
- break
-
- # Decode the complete response
- try:
- decoded_response = full_response.decode('utf-8')
- return json.loads(decoded_response)
- except json.JSONDecodeError as e:
- print(f"JSON parsing error: {e}")
- print(f"Raw response received: {decoded_response}")
- return {"status": -1, "result": "Response parsing error"}
-```
-
-This function is important because it defines how AutoAgent Tutorial: Zero-Code Agent Creation and Automated Workflow Orchestration implements the patterns covered in this chapter.
-
-### `autoagent/util.py`
-
-The `run_command_in_container` function in [`autoagent/util.py`](https://github.com/HKUDS/AutoAgent/blob/HEAD/autoagent/util.py) handles a key part of this chapter's functionality:
-
-```py
- }
-
-def run_command_in_container_v1(command, stream_callback: Callable = None):
- # TCP parameters
- hostname = 'localhost'
- port = 12345 # TCP port mapped to the container
- buffer_size = 4096
-
- # Create TCP client
- with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
- s.connect((hostname, port))
- s.sendall(command.encode())
- full_response = b""
- while True:
- chunk = s.recv(buffer_size)
- if not chunk:
- break
- full_response += chunk
- if stream_callback:
- stream_callback(chunk)
- if len(chunk) < buffer_size:
- # If the received data is less than the buffer size, it may have been received
- break
-
- # Decode the complete response
- try:
- decoded_response = full_response.decode('utf-8')
- return json.loads(decoded_response)
- except json.JSONDecodeError as e:
- print(f"JSON parsing error: {e}")
- print(f"Raw response received: {decoded_response}")
- return {"status": -1, "result": "Response parsing error"}
-```
-
-This function is important because it defines how AutoAgent Tutorial: Zero-Code Agent Creation and Automated Workflow Orchestration implements the patterns covered in this chapter.
-
-
-## How These Components Connect
-
-```mermaid
-flowchart TD
- A[get_type_info]
- B[function_to_json]
- C[run_command_in_container_v1]
- D[run_command_in_container]
- E[make_tool_message]
- A --> B
- B --> C
- C --> D
- D --> E
-```
diff --git a/tutorials/autoagent-tutorial/06-cli-operations-and-provider-strategy.md b/tutorials/autoagent-tutorial/06-cli-operations-and-provider-strategy.md
deleted file mode 100644
index 88c245f0..00000000
--- a/tutorials/autoagent-tutorial/06-cli-operations-and-provider-strategy.md
+++ /dev/null
@@ -1,222 +0,0 @@
----
-layout: default
-title: "Chapter 6: CLI Operations and Provider Strategy"
-nav_order: 6
-parent: AutoAgent Tutorial
----
-
-
-# Chapter 6: CLI Operations and Provider Strategy
-
-Welcome to **Chapter 6: CLI Operations and Provider Strategy**. In this part of **AutoAgent Tutorial: Zero-Code Agent Creation and Automated Workflow Orchestration**, you will build an intuitive mental model first, then move into concrete implementation details and practical production tradeoffs.
-
-
-This chapter covers operational patterns for running AutoAgent day-to-day.
-
-## Learning Goals
-
-- run CLI commands with consistent parameters
-- switch providers intentionally by task profile
-- monitor execution reliability across models
-- reduce operational surprises in multi-provider runs
-
-## Operational Priorities
-
-- pin preferred completion model per workload class
-- document provider fallbacks and failure policy
-- keep runtime flags explicit in automation scripts
-
-## Source References
-
-- [AutoAgent README: CLI Mode](https://github.com/HKUDS/AutoAgent/blob/main/README.md)
-- [User Guide Daily Tasks](https://github.com/HKUDS/AutoAgent/blob/main/docs/docs/User-Guideline/user-guide-daily-tasks.md)
-
-## Summary
-
-You now have a repeatable operations model for AutoAgent CLI workflows.
-
-Next: [Chapter 7: Benchmarking, Evaluation, and Quality Gates](07-benchmarking-evaluation-and-quality-gates.md)
-
-## Depth Expansion Playbook
-
-## Source Code Walkthrough
-
-### `autoagent/registry.py`
-
-The `Registry` class in [`autoagent/registry.py`](https://github.com/HKUDS/AutoAgent/blob/HEAD/autoagent/registry.py) handles a key part of this chapter's functionality:
-
-```py
- data['func'] = None # or other default value
- return cls(**data)
-class Registry:
- _instance = None
- _registry: Dict[str, Dict[str, Callable]] = {
- "tools": {},
- "agents": {},
- "plugin_tools": {},
- "plugin_agents": {},
- "workflows": {}
- }
- _registry_info: Dict[str, Dict[str, FunctionInfo]] = {
- "tools": {},
- "agents": {},
- "plugin_tools": {},
- "plugin_agents": {},
- "workflows": {}
- }
-
- def __new__(cls):
- if cls._instance is None:
- cls._instance = super().__new__(cls)
- return cls._instance
-
- def register(self,
- type: Literal["tool", "agent", "plugin_tool", "plugin_agent", "workflow"],
- name: str = None,
- func_name: str = None):
- """
- 统一的注册装饰器
- Args:
- type: 注册类型,"tool" 或 "agent"
-```
-
-This class is important because it defines how AutoAgent Tutorial: Zero-Code Agent Creation and Automated Workflow Orchestration implements the patterns covered in this chapter.
-
-### `autoagent/registry.py`
-
-The `encode_string_by_tiktoken` function in [`autoagent/registry.py`](https://github.com/HKUDS/AutoAgent/blob/HEAD/autoagent/registry.py) handles a key part of this chapter's functionality:
-
-```py
-MAX_OUTPUT_LENGTH = 12000
-
-def encode_string_by_tiktoken(content: str, model_name: str = "gpt-4o"):
- ENCODER = tiktoken.encoding_for_model(model_name)
- tokens = ENCODER.encode(content)
- return tokens
-
-
-def decode_tokens_by_tiktoken(tokens: list[int], model_name: str = "gpt-4o"):
- ENCODER = tiktoken.encoding_for_model(model_name)
- content = ENCODER.decode(tokens)
- return content
-def truncate_output(output: str, max_length: int = MAX_OUTPUT_LENGTH) -> str:
- """Truncate output if it exceeds max_length"""
- tokens = encode_string_by_tiktoken(output)
- if len(tokens) > max_length:
- return decode_tokens_by_tiktoken(tokens[:max_length]) + f"\n\n[TOOL WARNING] Output truncated, exceeded {max_length} tokens)\n[TOOL SUGGESTION] Maybe this tool with direct output is not an optimal choice, consider save the output to a file in the `workplace/` directory to implement the same functionality."
- return output
-
-@dataclass
-class FunctionInfo:
- name: str
- func_name: str
- func: Callable
- args: List[str]
- docstring: Optional[str]
- body: str
- return_type: Optional[str]
- file_path: Optional[str]
- def to_dict(self) -> dict:
- # using asdict, but exclude func field because it cannot be serialized
- d = asdict(self)
-```
-
-This function is important because it defines how AutoAgent Tutorial: Zero-Code Agent Creation and Automated Workflow Orchestration implements the patterns covered in this chapter.
-
-### `autoagent/registry.py`
-
-The `decode_tokens_by_tiktoken` function in [`autoagent/registry.py`](https://github.com/HKUDS/AutoAgent/blob/HEAD/autoagent/registry.py) handles a key part of this chapter's functionality:
-
-```py
-
-
-def decode_tokens_by_tiktoken(tokens: list[int], model_name: str = "gpt-4o"):
- ENCODER = tiktoken.encoding_for_model(model_name)
- content = ENCODER.decode(tokens)
- return content
-def truncate_output(output: str, max_length: int = MAX_OUTPUT_LENGTH) -> str:
- """Truncate output if it exceeds max_length"""
- tokens = encode_string_by_tiktoken(output)
- if len(tokens) > max_length:
- return decode_tokens_by_tiktoken(tokens[:max_length]) + f"\n\n[TOOL WARNING] Output truncated, exceeded {max_length} tokens)\n[TOOL SUGGESTION] Maybe this tool with direct output is not an optimal choice, consider save the output to a file in the `workplace/` directory to implement the same functionality."
- return output
-
-@dataclass
-class FunctionInfo:
- name: str
- func_name: str
- func: Callable
- args: List[str]
- docstring: Optional[str]
- body: str
- return_type: Optional[str]
- file_path: Optional[str]
- def to_dict(self) -> dict:
- # using asdict, but exclude func field because it cannot be serialized
- d = asdict(self)
- d.pop('func') # remove func field
- return d
-
- @classmethod
- def from_dict(cls, data: dict) -> 'FunctionInfo':
- # if you need to create an object from a dictionary
-```
-
-This function is important because it defines how AutoAgent Tutorial: Zero-Code Agent Creation and Automated Workflow Orchestration implements the patterns covered in this chapter.
-
-### `autoagent/registry.py`
-
-The `truncate_output` function in [`autoagent/registry.py`](https://github.com/HKUDS/AutoAgent/blob/HEAD/autoagent/registry.py) handles a key part of this chapter's functionality:
-
-```py
- content = ENCODER.decode(tokens)
- return content
-def truncate_output(output: str, max_length: int = MAX_OUTPUT_LENGTH) -> str:
- """Truncate output if it exceeds max_length"""
- tokens = encode_string_by_tiktoken(output)
- if len(tokens) > max_length:
- return decode_tokens_by_tiktoken(tokens[:max_length]) + f"\n\n[TOOL WARNING] Output truncated, exceeded {max_length} tokens)\n[TOOL SUGGESTION] Maybe this tool with direct output is not an optimal choice, consider save the output to a file in the `workplace/` directory to implement the same functionality."
- return output
-
-@dataclass
-class FunctionInfo:
- name: str
- func_name: str
- func: Callable
- args: List[str]
- docstring: Optional[str]
- body: str
- return_type: Optional[str]
- file_path: Optional[str]
- def to_dict(self) -> dict:
- # using asdict, but exclude func field because it cannot be serialized
- d = asdict(self)
- d.pop('func') # remove func field
- return d
-
- @classmethod
- def from_dict(cls, data: dict) -> 'FunctionInfo':
- # if you need to create an object from a dictionary
- if 'func' not in data:
- data['func'] = None # or other default value
- return cls(**data)
-class Registry:
-```
-
-This function is important because it defines how AutoAgent Tutorial: Zero-Code Agent Creation and Automated Workflow Orchestration implements the patterns covered in this chapter.
-
-
-## How These Components Connect
-
-```mermaid
-flowchart TD
- A[Registry]
- B[encode_string_by_tiktoken]
- C[decode_tokens_by_tiktoken]
- D[truncate_output]
- E[register_tool]
- A --> B
- B --> C
- C --> D
- D --> E
-```
diff --git a/tutorials/autoagent-tutorial/06-workflow-editor-async-pipelines.md b/tutorials/autoagent-tutorial/06-workflow-editor-async-pipelines.md
new file mode 100644
index 00000000..299fe40a
--- /dev/null
+++ b/tutorials/autoagent-tutorial/06-workflow-editor-async-pipelines.md
@@ -0,0 +1,458 @@
+---
+layout: default
+title: "Chapter 6: Workflow Editor: Async Event-Driven Pipelines"
+nav_order: 6
+parent: AutoAgent Tutorial
+format_version: v2
+why: "The MetaChain agent loop is sequential — one agent at a time, turn by turn. The Workflow Editor's EventEngine unlocks true parallelism: multiple agents running simultaneously, with results aggregated through event dependencies. This is essential for batch processing, parallel problem solving, and performance-critical pipelines."
+mental_model: "EventEngine is a dependency graph executor: each node (event handler) declares what events it listens for, and the engine runs all handlers whose dependencies are satisfied concurrently — like asyncio's gather() but with named events and structured return behaviors."
+learning_outcomes:
+ - Distinguish when to use EventEngine vs the MetaChain agent loop
+ - Define event handlers with listen_group() and dependency declarations
+ - Use GOTO and ABORT return behaviors for flow control
+ - Understand how WorkflowCreatorAgent generates workflow Python files
+ - Trace the math_solver_workflow example for parallel solving + vote aggregation
+snapshot:
+ source_repo: https://github.com/HKUDS/AutoAgent
+ stars: 9116
+ language: Python
+ license: MIT
+chapter_map:
+ - autoagent/flow/core.py
+ - autoagent/flow/types.py
+ - autoagent/workflow_creator.py
+ - autoagent/edit_workflow.py
+sources:
+ - https://github.com/HKUDS/AutoAgent
+ - https://arxiv.org/abs/2502.05957
+---
+
+# Chapter 6: Workflow Editor: Async Event-Driven Pipelines
+
+## What Problem Does This Solve?
+
+The MetaChain agent loop is fundamentally sequential: one agent runs, calls tools, gets results, then either hands off or terminates. For many tasks, this is fine. But some tasks need parallelism:
+
+- Solve 10 math problems using 3 different methods simultaneously, then vote on the best answer
+- Run web research and document analysis in parallel, then merge results
+- Process a batch of inputs concurrently with rate limiting
+
+These patterns require a different execution model. AutoAgent's EventEngine provides this through an **async event-driven pipeline** where handlers declare dependencies and run concurrently when those dependencies are satisfied.
+
+### EventEngine vs Agent Loop
+
+| Dimension | MetaChain Agent Loop | EventEngine Workflow |
+|-----------|----------------------|---------------------|
+| Execution | Sequential | Async parallel |
+| Coordination | Agent handoffs via Result | Event dependencies |
+| State | context_variables dict | Event data passed between handlers |
+| Best for | Conversational tasks, open-ended research | Batch processing, parallel computation |
+| Entry point | `auto main` → `MetaChain.run()` | `run_workflow()` |
+
+---
+
+## EventEngine Architecture
+
+```mermaid
+flowchart TD
+ subgraph "EventEngine (flow/core.py)"
+ IQ["invoke_queue\nasyncio.Queue"]
+ EL["Event loop\nasyncio.create_task()"]
+ subgraph "Event handlers (listen_group)"
+ H1["handler_A\n@listen_group('START')"]
+ H2["handler_B\n@listen_group('START')"]
+ H3["handler_C\n@listen_group('handler_A', 'handler_B')"]
+ end
+ end
+
+ START["START event"] --> IQ
+ IQ --> EL
+ EL -->|dep satisfied| H1
+ EL -->|dep satisfied| H2
+ H1 -->|emit result| IQ
+ H2 -->|emit result| IQ
+ IQ --> EL
+ EL -->|both deps satisfied| H3
+ H3 -->|RETURN or GOTO or ABORT| End([Workflow complete])
+```
+
+`handler_A` and `handler_B` run in parallel immediately when `START` fires. `handler_C` waits until both complete, then runs on their combined output.
+
+---
+
+## Core Types (`flow/types.py`)
+
+```python
+# autoagent/flow/types.py
+
+from enum import Enum
+from dataclasses import dataclass
+from typing import Any
+
+class ReturnBehavior(Enum):
+ """Controls what happens after an event handler returns."""
+ RETURN = "return" # Normal: emit result, continue pipeline
+ GOTO = "goto" # Jump to a specific event handler
+ ABORT = "abort" # Terminate the entire workflow immediately
+
+@dataclass
+class BaseEvent:
+ """Base class for all events in the EventEngine."""
+ name: str # Event identifier
+ data: Any = None # Payload passed to waiting handlers
+ source: str = "" # Handler that emitted this event
+
+@dataclass
+class EventGroup:
+ """A named collection of events that a handler listens to."""
+ events: list[str] # Event names this group depends on
+ group_name: str = "" # Optional name for this dependency group
+
+@dataclass
+class WorkflowResult:
+ """Final result from a completed workflow."""
+ output: Any
+ events: list[BaseEvent] # All events that fired
+ success: bool = True
+ error: str = ""
+```
+
+---
+
+## listen_group() Decorator
+
+The `listen_group()` decorator is how handlers declare their event dependencies:
+
+```python
+# autoagent/flow/core.py
+
+def listen_group(*event_names: str, max_retries: int = 1):
+ """Decorator that registers a function as an event handler.
+
+ The function runs when ALL specified events have fired.
+
+ Args:
+ event_names: Event names this handler depends on
+ max_retries: How many times to retry on failure
+ """
+ def decorator(func):
+ func._listen_group = EventGroup(
+ events=list(event_names),
+ group_name=func.__name__,
+ )
+ func._max_retries = max_retries
+ return func
+ return decorator
+```
+
+Usage example:
+
+```python
+# autoagent/flow/math_solver_workflow_flow.py (example workflow)
+
+from autoagent.flow.core import EventEngineCls, listen_group, GOTO, ABORT
+from autoagent.flow.types import BaseEvent, ReturnBehavior
+
+engine = EventEngineCls()
+
+@listen_group("START")
+async def solve_with_chain_of_thought(event: BaseEvent) -> BaseEvent:
+ """Solve the math problem using chain-of-thought reasoning."""
+ problem = event.data["problem"]
+ result = await call_llm_cot(problem)
+ return BaseEvent(name="cot_result", data={"answer": result, "method": "cot"})
+
+@listen_group("START")
+async def solve_with_python(event: BaseEvent) -> BaseEvent:
+ """Solve the math problem by generating and running Python code."""
+ problem = event.data["problem"]
+ code = await generate_math_code(problem)
+ result = await execute_in_docker(code)
+ return BaseEvent(name="python_result", data={"answer": result, "method": "python"})
+
+@listen_group("START")
+async def solve_with_symbolic(event: BaseEvent) -> BaseEvent:
+ """Solve the math problem using symbolic math (sympy)."""
+ problem = event.data["problem"]
+ result = await sympy_solve(problem)
+ return BaseEvent(name="symbolic_result", data={"answer": result, "method": "symbolic"})
+
+@listen_group("cot_result", "python_result", "symbolic_result")
+async def vote_on_answer(
+ cot_event: BaseEvent,
+ python_event: BaseEvent,
+ symbolic_event: BaseEvent,
+) -> BaseEvent:
+ """Aggregate three solutions and return the majority answer."""
+ answers = [
+ cot_event.data["answer"],
+ python_event.data["answer"],
+ symbolic_event.data["answer"],
+ ]
+ # Majority vote
+ from collections import Counter
+ most_common = Counter(answers).most_common(1)[0][0]
+
+ return BaseEvent(
+ name="WORKFLOW_COMPLETE",
+ data={"final_answer": most_common, "all_answers": answers}
+ )
+```
+
+The three `@listen_group("START")` handlers run **concurrently** as asyncio tasks. The `vote_on_answer` handler only fires when all three have completed.
+
+---
+
+## EventEngine Core (`flow/core.py`)
+
+```python
+# autoagent/flow/core.py (simplified)
+
+import asyncio
+from typing import Callable
+
+class EventEngineCls:
+ def __init__(self, max_async_events: int = 10):
+ self.handlers: dict[frozenset, Callable] = {}
+ self.completed_events: dict[str, BaseEvent] = {}
+ self.max_async_events = max_async_events
+ self._semaphore = asyncio.Semaphore(max_async_events)
+
+ def register(self, func: Callable) -> None:
+ """Register a handler by its listen_group dependency set."""
+ if hasattr(func, "_listen_group"):
+ key = frozenset(func._listen_group.events)
+ self.handlers[key] = func
+
+ async def invoke_event(self, event: BaseEvent) -> None:
+ """Fire an event and run all handlers whose deps are now satisfied."""
+ self.completed_events[event.name] = event
+
+ # Find handlers whose all dependencies are now satisfied
+ ready = []
+ for dep_set, handler in self.handlers.items():
+ if all(dep in self.completed_events for dep in dep_set):
+ if handler.__name__ not in self.completed_events:
+ ready.append(handler)
+
+ # Run all ready handlers concurrently
+ async def run_handler(h):
+ async with self._semaphore:
+ deps = [self.completed_events[dep] for dep in h._listen_group.events]
+ result = await h(*deps)
+
+ if isinstance(result, tuple) and result[0] == GOTO:
+ # Jump to another handler
+ target = result[1]
+ await self.invoke_event(BaseEvent(name=target))
+ elif result == ABORT:
+ # Terminate workflow
+ raise WorkflowAbortError("Workflow aborted by handler")
+ else:
+ await self.invoke_event(result)
+
+ await asyncio.gather(*[run_handler(h) for h in ready])
+
+ async def run(self, initial_data: dict) -> WorkflowResult:
+ """Start the workflow with a START event."""
+ try:
+ await self.invoke_event(BaseEvent(name="START", data=initial_data))
+ final = self.completed_events.get("WORKFLOW_COMPLETE")
+ return WorkflowResult(
+ output=final.data if final else None,
+ events=list(self.completed_events.values()),
+ success=True,
+ )
+ except WorkflowAbortError as e:
+ return WorkflowResult(output=None, events=[], success=False, error=str(e))
+```
+
+---
+
+## GOTO and ABORT Behaviors
+
+### GOTO
+
+Jump to a different event handler, bypassing normal dependency resolution:
+
+```python
+@listen_group("validation_result")
+async def check_answer_quality(event: BaseEvent) -> tuple | BaseEvent:
+ """Check if the answer meets quality threshold."""
+ answer = event.data["answer"]
+ confidence = event.data.get("confidence", 0.0)
+
+ if confidence < 0.7:
+ # Not confident enough — retry with a different method
+ return (GOTO, "solve_with_python")
+
+ return BaseEvent(name="quality_passed", data=event.data)
+```
+
+### ABORT
+
+Terminate the entire workflow immediately:
+
+```python
+@listen_group("input_validation")
+async def validate_input(event: BaseEvent) -> BaseEvent:
+ """Validate workflow input before processing."""
+ problem = event.data.get("problem", "")
+
+ if not problem or len(problem) < 5:
+ return ABORT # Terminate workflow, WorkflowResult.success = False
+
+ return BaseEvent(name="START", data=event.data)
+```
+
+---
+
+## WorkflowCreatorAgent (`workflow_creator.py`)
+
+`WorkflowCreatorAgent` generates workflow Python files from natural language descriptions, following the same 4-phase pattern as the Agent Editor:
+
+```python
+# autoagent/workflow_creator.py
+
+class WorkflowCreatorAgent:
+ """Generates EventEngine workflow code from NL descriptions."""
+
+ def generate_workflow(
+ self,
+ description: str,
+ code_env: DockerEnv,
+ ) -> str:
+ """Full pipeline: NL → workflow spec → Python code → test → register."""
+
+ # Phase 1: Generate workflow spec (event graph)
+ spec = self._generate_spec(description)
+
+ # Phase 2: Generate Python code
+ code = self._generate_code(spec)
+
+ # Phase 3: Test in Docker
+ success, error = self._test_workflow(code, code_env)
+ if not success:
+ # Retry with error context
+ code = self._regenerate_with_error(spec, error)
+
+ # Phase 4: Register
+ self._register_workflow(spec.name, code)
+ return code
+
+ def _generate_spec(self, description: str) -> WorkflowSpec:
+ """Use LLM to convert NL to event graph specification."""
+ # Returns WorkflowSpec with handler names and dependencies
+ ...
+```
+
+### create_workflow() and run_workflow()
+
+```python
+# autoagent/edit_workflow.py
+
+def create_workflow(name: str, code: str) -> None:
+ """Save workflow code to workspace and register in registry."""
+ path = Path(f"workspace/workflows/{name}_flow.py")
+ path.parent.mkdir(parents=True, exist_ok=True)
+ path.write_text(code)
+
+ # Register in global registry
+ registry = get_registry()
+ registry["workflows"][name] = path
+
+def run_workflow(name: str, input_data: dict) -> WorkflowResult:
+ """Load and execute a registered workflow."""
+ registry = get_registry()
+ workflow_path = registry["workflows"][name]
+
+ # Dynamically import the workflow module
+ spec = importlib.util.spec_from_file_location(name, workflow_path)
+ module = importlib.util.module_from_spec(spec)
+ spec.loader.exec_module(module)
+
+ # Get the engine instance and run
+ engine = module.engine
+ return asyncio.run(engine.run(input_data))
+```
+
+---
+
+## max_async_events Parallelism Control
+
+The `max_async_events` parameter in `EventEngineCls` controls maximum concurrent event handlers via an asyncio semaphore:
+
+```python
+# Conservative: 3 concurrent LLM calls max (respects rate limits)
+engine = EventEngineCls(max_async_events=3)
+
+# Aggressive: 20 concurrent handlers (for non-LLM tasks like HTTP requests)
+engine = EventEngineCls(max_async_events=20)
+
+# Default: 10 concurrent handlers
+engine = EventEngineCls() # max_async_events=10
+```
+
+For workflows that call LLM APIs, keep `max_async_events` low (3-5) to avoid rate limiting. For workflows that do I/O-bound work (HTTP requests, file processing), higher values improve throughput.
+
+---
+
+## The math_solver_workflow Example
+
+The `math_solver_workflow_flow.py` is included in the repository as a reference implementation. Its full flow:
+
+```mermaid
+flowchart TD
+ S[START: problem string]
+
+ S -->|parallel| CoT[solve_with_chain_of_thought\ngpt-4o, step-by-step reasoning]
+ S -->|parallel| PY[solve_with_python\ngenerate + execute Python code]
+ S -->|parallel| SY[solve_with_symbolic\nsympy symbolic math]
+
+ CoT -->|cot_result| V[vote_on_answer\nmajority of 3 answers]
+ PY -->|python_result| V
+ SY -->|symbolic_result| V
+
+ V -->|WORKFLOW_COMPLETE| OUT["final_answer: majority vote\nall_answers: [cot, python, symbolic]"]
+```
+
+To run it:
+
+```bash
+# In AutoAgent CLI:
+AutoAgent> Run the math_solver_workflow with problem: "What is the derivative of x^3 + 2x^2 - 5x + 3?"
+```
+
+Or programmatically:
+
+```python
+from autoagent.edit_workflow import run_workflow
+
+result = run_workflow(
+ "math_solver_workflow",
+ {"problem": "What is the derivative of x^3 + 2x^2 - 5x + 3?"}
+)
+print(result.output["final_answer"]) # "3x^2 + 4x - 5"
+```
+
+---
+
+## Summary
+
+| Component | File | Role |
+|-----------|------|------|
+| `EventEngineCls` | `flow/core.py` | Async pipeline executor with dependency resolution |
+| `listen_group()` | `flow/core.py` | Decorator to declare handler event dependencies |
+| `invoke_event()` | `flow/core.py` | Fire an event and trigger ready handlers concurrently |
+| `BaseEvent` | `flow/types.py` | Event with name + data payload |
+| `EventGroup` | `flow/types.py` | Named set of event dependencies |
+| `ReturnBehavior` | `flow/types.py` | RETURN / GOTO / ABORT flow control |
+| `GOTO` | `flow/core.py` | Jump to named handler bypassing dependency resolution |
+| `ABORT` | `flow/core.py` | Terminate workflow immediately |
+| `max_async_events` | `flow/core.py` | Semaphore for concurrency control |
+| `WorkflowCreatorAgent` | `workflow_creator.py` | NL → EventEngine workflow code generator |
+| `create_workflow()` | `edit_workflow.py` | Save + register workflow file |
+| `run_workflow()` | `edit_workflow.py` | Load + execute a registered workflow |
+| `math_solver_workflow_flow.py` | `flow/` | Reference: parallel solving + vote aggregation |
+
+Continue to [Chapter 7: Memory, Tool Retrieval, and Third-Party APIs](./07-memory-tool-retrieval-apis.md) to learn how AutoAgent uses ChromaDB and LLM-based reranking to discover tools from large catalogs.
diff --git a/tutorials/autoagent-tutorial/07-benchmarking-evaluation-and-quality-gates.md b/tutorials/autoagent-tutorial/07-benchmarking-evaluation-and-quality-gates.md
deleted file mode 100644
index 79a8c583..00000000
--- a/tutorials/autoagent-tutorial/07-benchmarking-evaluation-and-quality-gates.md
+++ /dev/null
@@ -1,223 +0,0 @@
----
-layout: default
-title: "Chapter 7: Benchmarking, Evaluation, and Quality Gates"
-nav_order: 7
-parent: AutoAgent Tutorial
----
-
-
-# Chapter 7: Benchmarking, Evaluation, and Quality Gates
-
-Welcome to **Chapter 7: Benchmarking, Evaluation, and Quality Gates**. In this part of **AutoAgent Tutorial: Zero-Code Agent Creation and Automated Workflow Orchestration**, you will build an intuitive mental model first, then move into concrete implementation details and practical production tradeoffs.
-
-
-This chapter focuses on evaluation rigor for AutoAgent outputs.
-
-## Learning Goals
-
-- align evaluation goals with benchmark constraints
-- interpret benchmark claims and reproduction boundaries
-- define pass/fail criteria for internal tasks
-- prevent quality regressions over iterative updates
-
-## Evaluation Guidance
-
-- benchmark on representative scenarios, not only demos
-- include cost/latency/accuracy tradeoff reporting
-- gate production rollouts on repeatable evaluation passes
-
-## Source References
-
-- [AutoAgent Paper](https://arxiv.org/abs/2502.05957)
-- [GAIA Leaderboard](https://gaia-benchmark-leaderboard.hf.space/)
-- [AutoAgent Evaluation Directory](https://github.com/HKUDS/AutoAgent/tree/main/evaluation)
-
-## Summary
-
-You now have an evaluation loop for safer AutoAgent evolution.
-
-Next: [Chapter 8: Contribution Workflow and Production Governance](08-contribution-workflow-and-production-governance.md)
-
-## Depth Expansion Playbook
-
-## Source Code Walkthrough
-
-### `autoagent/server.py`
-
-The `AgentResponse` class in [`autoagent/server.py`](https://github.com/HKUDS/AutoAgent/blob/HEAD/autoagent/server.py) handles a key part of this chapter's functionality:
-
-```py
- content: str
-
-class AgentResponse(BaseModel):
- result: str
- messages: List
- agent_name: str
-# 为所有注册的tools创建endpoints
-@app.on_event("startup")
-def create_tool_endpoints():
- for tool_name, tool_func in registry.tools.items():
- # 创建动态的POST endpoint
- async def create_tool_endpoint(request: ToolRequest, func=tool_func):
- try:
- # 检查必需参数
- sig = inspect.signature(func)
- required_params = {
- name for name, param in sig.parameters.items()
- if param.default == inspect.Parameter.empty
- }
-
- # 验证是否提供了所有必需参数
- if not all(param in request.args for param in required_params):
- missing = required_params - request.args.keys()
- raise HTTPException(
- status_code=400,
- detail=f"Missing required parameters: {missing}"
- )
-
- result = func(**request.args)
- return {"status": "success", "result": result}
- except Exception as e:
- raise HTTPException(status_code=400, detail=str(e))
-```
-
-This class is important because it defines how AutoAgent Tutorial: Zero-Code Agent Creation and Automated Workflow Orchestration implements the patterns covered in this chapter.
-
-### `autoagent/server.py`
-
-The `lifespan` function in [`autoagent/server.py`](https://github.com/HKUDS/AutoAgent/blob/HEAD/autoagent/server.py) handles a key part of this chapter's functionality:
-
-```py
-import inspect
-
-# 定义lifespan上下文管理器
-@asynccontextmanager
-async def lifespan(app: FastAPI):
- # 启动时执行
- await create_agent_endpoints(app)
- yield
- # 关闭时执行
- # 清理代码(如果需要)
-
-app = FastAPI(title="MetaChain API", lifespan=lifespan)
-
-class ToolRequest(BaseModel):
- args: Dict[str, Any]
-
-class AgentRequest(BaseModel):
- model: str
- query: str
- context_variables: Optional[Dict[str, Any]] = {}
-
-class Message(BaseModel):
- role: str
- content: str
-
-class AgentResponse(BaseModel):
- result: str
- messages: List
- agent_name: str
-# 为所有注册的tools创建endpoints
-@app.on_event("startup")
-def create_tool_endpoints():
-```
-
-This function is important because it defines how AutoAgent Tutorial: Zero-Code Agent Creation and Automated Workflow Orchestration implements the patterns covered in this chapter.
-
-### `autoagent/server.py`
-
-The `create_tool_endpoints` function in [`autoagent/server.py`](https://github.com/HKUDS/AutoAgent/blob/HEAD/autoagent/server.py) handles a key part of this chapter's functionality:
-
-```py
-# 为所有注册的tools创建endpoints
-@app.on_event("startup")
-def create_tool_endpoints():
- for tool_name, tool_func in registry.tools.items():
- # 创建动态的POST endpoint
- async def create_tool_endpoint(request: ToolRequest, func=tool_func):
- try:
- # 检查必需参数
- sig = inspect.signature(func)
- required_params = {
- name for name, param in sig.parameters.items()
- if param.default == inspect.Parameter.empty
- }
-
- # 验证是否提供了所有必需参数
- if not all(param in request.args for param in required_params):
- missing = required_params - request.args.keys()
- raise HTTPException(
- status_code=400,
- detail=f"Missing required parameters: {missing}"
- )
-
- result = func(**request.args)
- return {"status": "success", "result": result}
- except Exception as e:
- raise HTTPException(status_code=400, detail=str(e))
-
- # 添加endpoint到FastAPI应用
- endpoint = create_tool_endpoint
- endpoint.__name__ = f"tool_{tool_name}"
- app.post(f"/tools/{tool_name}")(endpoint)
-# 重写agent endpoints创建逻辑
-```
-
-This function is important because it defines how AutoAgent Tutorial: Zero-Code Agent Creation and Automated Workflow Orchestration implements the patterns covered in this chapter.
-
-### `autoagent/server.py`
-
-The `create_agent_endpoints` function in [`autoagent/server.py`](https://github.com/HKUDS/AutoAgent/blob/HEAD/autoagent/server.py) handles a key part of this chapter's functionality:
-
-```py
-async def lifespan(app: FastAPI):
- # 启动时执行
- await create_agent_endpoints(app)
- yield
- # 关闭时执行
- # 清理代码(如果需要)
-
-app = FastAPI(title="MetaChain API", lifespan=lifespan)
-
-class ToolRequest(BaseModel):
- args: Dict[str, Any]
-
-class AgentRequest(BaseModel):
- model: str
- query: str
- context_variables: Optional[Dict[str, Any]] = {}
-
-class Message(BaseModel):
- role: str
- content: str
-
-class AgentResponse(BaseModel):
- result: str
- messages: List
- agent_name: str
-# 为所有注册的tools创建endpoints
-@app.on_event("startup")
-def create_tool_endpoints():
- for tool_name, tool_func in registry.tools.items():
- # 创建动态的POST endpoint
- async def create_tool_endpoint(request: ToolRequest, func=tool_func):
- try:
-```
-
-This function is important because it defines how AutoAgent Tutorial: Zero-Code Agent Creation and Automated Workflow Orchestration implements the patterns covered in this chapter.
-
-
-## How These Components Connect
-
-```mermaid
-flowchart TD
- A[AgentResponse]
- B[lifespan]
- C[create_tool_endpoints]
- D[create_agent_endpoints]
- E[list_agents]
- A --> B
- B --> C
- C --> D
- D --> E
-```
diff --git a/tutorials/autoagent-tutorial/07-memory-tool-retrieval-apis.md b/tutorials/autoagent-tutorial/07-memory-tool-retrieval-apis.md
new file mode 100644
index 00000000..e30f53b8
--- /dev/null
+++ b/tutorials/autoagent-tutorial/07-memory-tool-retrieval-apis.md
@@ -0,0 +1,585 @@
+---
+layout: default
+title: "Chapter 7: Memory, Tool Retrieval, and Third-Party APIs"
+nav_order: 7
+parent: AutoAgent Tutorial
+format_version: v2
+why: "As your AutoAgent deployment accumulates tools, the LLM cannot fit every tool schema in its context window. The ToolMemory + ToolReranker pipeline ensures agents always see the most relevant tools — not all tools. Understanding this system lets you ingest large API catalogs and keep agents fast at scale."
+mental_model: "ToolMemory is a semantic search index over tool descriptions. When an agent runs, the query (user message) retrieves the top-K most relevant tools from ChromaDB, which are then reranked by a small LLM call. Only the final shortlist is passed to the agent's tool schema — the rest never reach the LLM."
+learning_outcomes:
+ - Understand how ToolMemory indexes tool descriptions with ChromaDB + text-embedding-3-small
+ - Configure ToolReranker for LLM-based relevance scoring with Pydantic RerankResult
+ - Ingest RapidAPI tool documentation via process_tool_docs.py
+ - Apply the 12,000-token output cap and truncate_output() wrapper correctly
+ - Use CodeMemory and RAGMemory for codebase navigation and document retrieval
+snapshot:
+ source_repo: https://github.com/HKUDS/AutoAgent
+ stars: 9116
+ language: Python
+ license: MIT
+chapter_map:
+ - autoagent/tool_memory.py
+ - autoagent/rag_memory.py
+ - autoagent/rag_tools.py
+ - autoagent/code_memory.py
+ - autoagent/tool_retriever.py
+ - autoagent/search_tools.py
+ - autoagent/process_tool_docs.py
+sources:
+ - https://github.com/HKUDS/AutoAgent
+ - https://arxiv.org/abs/2502.05957
+---
+
+# Chapter 7: Memory, Tool Retrieval, and Third-Party APIs
+
+## What Problem Does This Solve?
+
+A production AutoAgent deployment might have hundreds of registered tools — scraped from RapidAPI, generated by the Agent Editor, or built by contributors. If every tool schema is included in every LLM call:
+
+1. Context window overflows (GPT-4o: 128k tokens; each tool schema: ~200-500 tokens)
+2. LLM attention diffuses — too many irrelevant tools confuse tool selection
+3. Inference cost scales linearly with tool count even when most tools are irrelevant
+
+AutoAgent solves this with a **two-stage retrieval pipeline**: semantic search (ChromaDB) narrows thousands of tools to ~20 candidates, then LLM-based reranking (ToolReranker) picks the top 5-10 for the actual tool schema list.
+
+---
+
+## The Two-Stage Retrieval Pipeline
+
+```mermaid
+flowchart TD
+ Q["User query\n'recommend a product based on budget'"]
+
+ subgraph "Stage 1: Semantic Search"
+ TM["ToolMemory\nChromaDB collection"]
+ EMB["text-embedding-3-small\nquery → vector"]
+ K20["Top-20 candidates\nby cosine similarity"]
+ end
+
+ subgraph "Stage 2: LLM Reranking"
+ TR["ToolReranker\nLiteLLM call"]
+ RR["RerankResult (Pydantic)\nscores + justifications"]
+ K5["Top-5 tools selected"]
+ end
+
+ subgraph "Agent Invocation"
+ SCHEMA["Tool schemas injected\ninto agent's function list"]
+ AGENT["MetaChain.run()"]
+ end
+
+ Q --> EMB
+ EMB --> TM
+ TM --> K20
+ K20 --> TR
+ TR --> RR
+ RR --> K5
+ K5 --> SCHEMA
+ SCHEMA --> AGENT
+```
+
+---
+
+## ToolMemory (`tool_memory.py`)
+
+### Indexing
+
+```python
+# autoagent/tool_memory.py
+
+import chromadb
+from chromadb.utils.embedding_functions import OpenAIEmbeddingFunction
+
+class ToolMemory:
+ """Semantic search index over tool descriptions using ChromaDB."""
+
+ def __init__(self, collection_name: str = "autoagent_tools"):
+ self.client = chromadb.PersistentClient(path="./workspace/.chroma")
+ self.embedding_fn = OpenAIEmbeddingFunction(
+ api_key=os.getenv("OPENAI_API_KEY"),
+ model_name="text-embedding-3-small",
+ )
+ self.collection = self.client.get_or_create_collection(
+ name=collection_name,
+ embedding_function=self.embedding_fn,
+ )
+
+ def index_tool(self, tool: Callable) -> None:
+ """Add a tool to the semantic index."""
+ # Create rich description for embedding
+ schema = function_to_json(tool)
+ description = f"""Tool: {schema['function']['name']}
+Description: {schema['function']['description']}
+Parameters: {json.dumps(schema['function']['parameters']['properties'], indent=2)}
+"""
+ self.collection.upsert(
+ ids=[tool.__name__],
+ documents=[description],
+ metadatas=[{"module": tool.__module__, "is_plugin": True}],
+ )
+
+ def search(self, query: str, n_results: int = 20) -> list[dict]:
+ """Semantic search: return top-n_results tools for the query."""
+ results = self.collection.query(
+ query_texts=[query],
+ n_results=min(n_results, self.collection.count()),
+ )
+
+ tools = []
+ for i, tool_id in enumerate(results["ids"][0]):
+ tools.append({
+ "name": tool_id,
+ "description": results["documents"][0][i],
+ "distance": results["distances"][0][i],
+ "metadata": results["metadatas"][0][i],
+ })
+
+ return tools # Sorted by relevance (lower distance = more relevant)
+
+ def index_all_registry_tools(self) -> int:
+ """Index all registered plugin tools. Returns count indexed."""
+ registry = get_registry()
+ count = 0
+ for tool_name, tool_func in registry["plugin_tools"].items():
+ self.index_tool(tool_func)
+ count += 1
+ return count
+```
+
+### ToolMemory Lifecycle
+
+```mermaid
+sequenceDiagram
+ participant CLI as cli.py startup
+ participant TM as ToolMemory
+ participant CR as ChromaDB
+ participant AE as Agent Editor
+ participant RA as RapidAPI Ingester
+
+ CLI->>TM: index_all_registry_tools()
+ TM->>CR: upsert(built-in tools)
+ Note over CR: Built-in tools indexed
+
+ AE->>TM: index_tool(new_plugin_tool)
+ TM->>CR: upsert(new tool description)
+ Note over CR: Plugin tool added live
+
+ RA->>TM: index_tool(rapidapi_tool)
+ TM->>CR: upsert(API tool description)
+ Note over CR: RapidAPI tool added
+
+ participant AG as Agent at runtime
+ AG->>TM: search(query, n_results=20)
+ TM->>CR: query(embedding)
+ CR-->>TM: top-20 candidates
+ TM-->>AG: candidates list
+```
+
+### When Tools Are Indexed
+
+Tools are indexed at three points:
+1. At session startup (`cli.py` calls `tool_memory.index_all_registry_tools()`)
+2. After Agent Editor creates a new tool (`ToolEditorAgent` calls `tool_memory.index_tool()`)
+3. After RapidAPI ingestion (`process_tool_docs.py` calls `tool_memory.index_tool()` for each ingested tool)
+
+---
+
+## ToolReranker (`tool_retriever.py`)
+
+After semantic search returns 20 candidates, `ToolReranker` uses an LLM call to score each tool's relevance to the current query:
+
+```python
+# autoagent/tool_retriever.py
+
+from pydantic import BaseModel
+
+class ToolScore(BaseModel):
+ tool_name: str
+ relevance_score: float # 0.0 to 1.0
+ justification: str
+
+class RerankResult(BaseModel):
+ """Pydantic model for LLM reranking output."""
+ scores: list[ToolScore]
+ selected_tools: list[str] # Top-K tool names after reranking
+
+class ToolReranker:
+ """LLM-based tool reranker for precision after semantic search recall."""
+
+ def __init__(self, model: str = "gpt-4o-mini", top_k: int = 5):
+ self.model = model # Use a small/fast model for reranking
+ self.top_k = top_k
+
+ def rerank(self, query: str, candidates: list[dict]) -> list[str]:
+ """Score candidates and return top-k tool names."""
+ candidates_text = "\n\n".join([
+ f"Tool {i+1}: {c['name']}\n{c['description']}"
+ for i, c in enumerate(candidates)
+ ])
+
+ prompt = f"""Given this user query:
+"{query}"
+
+Score each tool's relevance (0.0-1.0) and select the {self.top_k} most useful tools.
+
+Tools to evaluate:
+{candidates_text}
+
+Return a JSON object matching this schema:
+{{
+ "scores": [
+ {{"tool_name": "...", "relevance_score": 0.9, "justification": "..."}}
+ ],
+ "selected_tools": ["tool1", "tool2", ...] // top {self.top_k} names
+}}"""
+
+ response = litellm.completion(
+ model=self.model,
+ messages=[{"role": "user", "content": prompt}],
+ response_format={"type": "json_object"},
+ )
+
+ result = RerankResult.model_validate_json(
+ response.choices[0].message.content
+ )
+ return result.selected_tools[:self.top_k]
+```
+
+The key design choice: use `gpt-4o-mini` (fast, cheap) for reranking, not the full `gpt-4o`. Reranking is a classification task that doesn't need the full model's reasoning capability.
+
+---
+
+## RAGMemory and Document Retrieval (`rag_memory.py`)
+
+`RAGMemory` provides semantic search over document chunks for knowledge-intensive tasks:
+
+```python
+# autoagent/rag_memory.py
+
+class RAGMemory:
+ """Document chunk storage and retrieval using ChromaDB."""
+
+ def __init__(self, collection_name: str = "autoagent_docs"):
+ self.client = chromadb.PersistentClient(path="./workspace/.chroma")
+ self.embedding_fn = OpenAIEmbeddingFunction(
+ api_key=os.getenv("OPENAI_API_KEY"),
+ model_name="text-embedding-3-small",
+ )
+ self.collection = self.client.get_or_create_collection(
+ name=collection_name,
+ embedding_function=self.embedding_fn,
+ )
+
+ def add_document(
+ self,
+ text: str,
+ doc_id: str,
+ chunk_size: int = 500,
+ overlap: int = 50,
+ ) -> int:
+ """Chunk a document and add all chunks to the index."""
+ chunks = self._chunk_text(text, chunk_size, overlap)
+ ids = [f"{doc_id}__chunk_{i}" for i in range(len(chunks))]
+
+ self.collection.upsert(
+ ids=ids,
+ documents=chunks,
+ metadatas=[{"doc_id": doc_id, "chunk_index": i} for i in range(len(chunks))],
+ )
+ return len(chunks)
+
+ def query(self, query: str, n_results: int = 5) -> list[str]:
+ """Return top-n_results relevant chunks."""
+ results = self.collection.query(
+ query_texts=[query],
+ n_results=n_results,
+ )
+ return results["documents"][0]
+
+ def _chunk_text(self, text: str, chunk_size: int, overlap: int) -> list[str]:
+ """Split text into overlapping chunks by word count."""
+ words = text.split()
+ chunks = []
+ for i in range(0, len(words), chunk_size - overlap):
+ chunk = " ".join(words[i:i + chunk_size])
+ if chunk:
+ chunks.append(chunk)
+ return chunks
+```
+
+### RAG Tools (`rag_tools.py`)
+
+```python
+# autoagent/rag_tools.py
+
+from autoagent.registry import register_tool
+
+@register_tool
+def rag_search(query: str, context_variables: dict) -> str:
+ """Search indexed documents for relevant passages.
+
+ Use when you need to find specific information from previously
+ loaded documents or knowledge bases.
+ """
+ rag_memory = context_variables.get("rag_memory")
+ if not rag_memory:
+ return "RAG memory not initialized. Load documents first."
+
+ chunks = rag_memory.query(query, n_results=5)
+ return "\n\n---\n\n".join(chunks)
+
+@register_tool
+def add_document_to_rag(
+ file_path: str,
+ doc_id: str,
+ context_variables: dict,
+) -> str:
+ """Load a document into RAG memory for semantic search."""
+ rag_memory = context_variables.get("rag_memory")
+ file_env = context_variables.get("file_env")
+
+ content = file_env.visit_page(file_path)
+ chunk_count = rag_memory.add_document(content, doc_id)
+ return f"Indexed {chunk_count} chunks from {file_path}"
+```
+
+---
+
+## CodeMemory (`code_memory.py`)
+
+`CodeMemory` specializes in codebase navigation — indexing source files so agents can find relevant code by describing its function:
+
+```python
+# autoagent/code_memory.py
+
+class CodeMemory:
+ """Semantic search over source code for codebase navigation tasks."""
+
+ def __init__(self):
+ self.client = chromadb.PersistentClient(path="./workspace/.chroma")
+ # Use a code-specific embedding model
+ self.embedding_fn = OpenAIEmbeddingFunction(
+ api_key=os.getenv("OPENAI_API_KEY"),
+ model_name="text-embedding-3-small",
+ )
+ self.collection = self.client.get_or_create_collection(
+ name="autoagent_code",
+ embedding_function=self.embedding_fn,
+ )
+
+ def index_repository(self, repo_path: str) -> int:
+ """Index all Python files in a repository."""
+ count = 0
+ for py_file in Path(repo_path).rglob("*.py"):
+ content = py_file.read_text()
+ # Index at function/class level for precision
+ for chunk in self._extract_code_chunks(content, str(py_file)):
+ self.collection.upsert(
+ ids=[chunk["id"]],
+ documents=[chunk["content"]],
+ metadatas=[chunk["metadata"]],
+ )
+ count += 1
+ return count
+
+ def find_relevant_code(self, description: str, n_results: int = 5) -> list[dict]:
+ """Find code chunks matching a natural language description."""
+ results = self.collection.query(
+ query_texts=[description],
+ n_results=n_results,
+ )
+ return [
+ {
+ "file": results["metadatas"][0][i]["file"],
+ "function": results["metadatas"][0][i].get("function", ""),
+ "code": results["documents"][0][i],
+ }
+ for i in range(len(results["ids"][0]))
+ ]
+```
+
+---
+
+## RapidAPI Ingestion (`process_tool_docs.py`)
+
+AutoAgent can ingest tools from RapidAPI's 50,000+ API catalog:
+
+```python
+# autoagent/process_tool_docs.py
+
+class RapidAPIIngester:
+ """Ingests RapidAPI tool documentation and generates AutoAgent tools."""
+
+ def __init__(self, rapidapi_key: str):
+ self.rapidapi_key = rapidapi_key
+ self.headers = {
+ "X-RapidAPI-Key": rapidapi_key,
+ "X-RapidAPI-Host": "rapidapi.com",
+ }
+
+ def ingest_api(
+ self,
+ api_name: str,
+ endpoint_docs: dict,
+ tool_memory: ToolMemory,
+ ) -> list[Callable]:
+ """Convert RapidAPI endpoint documentation to AutoAgent tools."""
+ tools = []
+
+ for endpoint_name, endpoint_info in endpoint_docs.items():
+ # Generate tool function from API docs
+ tool_code = self._generate_tool_code(
+ api_name=api_name,
+ endpoint_name=endpoint_name,
+ endpoint_info=endpoint_info,
+ )
+
+ # Execute to get function object
+ namespace = {}
+ exec(tool_code, namespace)
+ tool_func = namespace[endpoint_name.replace("/", "_")]
+
+ # Apply plugin tool decorator (adds 12k token cap)
+ tool_func = register_plugin_tool(tool_func)
+
+ # Index in ToolMemory
+ tool_memory.index_tool(tool_func)
+ tools.append(tool_func)
+
+ return tools
+
+ def _generate_tool_code(
+ self,
+ api_name: str,
+ endpoint_name: str,
+ endpoint_info: dict,
+ ) -> str:
+ """Generate a Python wrapper function for a RapidAPI endpoint."""
+ params = endpoint_info.get("parameters", [])
+ param_str = ", ".join([
+ f"{p['name']}: {p.get('type', 'str')} = None"
+ for p in params
+ ])
+
+ return f'''
+import requests
+from autoagent.registry import register_plugin_tool
+
+@register_plugin_tool
+def {endpoint_name.replace("/", "_")}({param_str}) -> str:
+ """{endpoint_info.get("description", f"Call {api_name} {endpoint_name} endpoint")}"""
+ url = "https://rapidapi.com/{api_name}/{endpoint_name}"
+ headers = {{
+ "X-RapidAPI-Key": os.getenv("RAPIDAPI_KEY"),
+ "X-RapidAPI-Host": "{api_name}.rapidapi.com",
+ }}
+ params = {{{", ".join([f'"{p["name"]}": {p["name"]}' for p in params])}}}
+ response = requests.get(url, headers=headers, params=params)
+ return response.text
+'''
+```
+
+---
+
+## The 12,000-Token Output Cap
+
+All plugin tools automatically have their output truncated to 12,000 tokens via the `@register_plugin_tool` decorator:
+
+```python
+# autoagent/registry.py
+
+import tiktoken
+
+def truncate_output(output: str, max_tokens: int = 12000) -> str:
+ """Truncate output to max_tokens using tiktoken counting."""
+ enc = tiktoken.get_encoding("cl100k_base")
+ tokens = enc.encode(output)
+
+ if len(tokens) <= max_tokens:
+ return output
+
+ truncated = enc.decode(tokens[:max_tokens])
+ return truncated + f"\n\n[OUTPUT TRUNCATED: {len(tokens) - max_tokens} tokens omitted]"
+
+def register_plugin_tool(func: Callable) -> Callable:
+ """Register a tool in the plugin_tools namespace with output truncation."""
+ @wraps(func)
+ def wrapped(*args, **kwargs):
+ result = func(*args, **kwargs)
+ return truncate_output(str(result))
+
+ # Store original for introspection
+ wrapped.__wrapped__ = func
+ wrapped.__name__ = func.__name__
+
+ # Register in global registry
+ _registry["plugin_tools"][func.__name__] = wrapped
+ return wrapped
+```
+
+The cap prevents runaway LLM costs when tools return large payloads (full web pages, large CSV files, API responses). The built-in `@register_tool` decorator does NOT apply this cap — it's only for plugin tools.
+
+### Token Budget Enforcement
+
+| Decorator | Output Cap | Use Case |
+|-----------|-----------|----------|
+| `@register_plugin_tool` | 12,000 tokens | User-generated and RapidAPI tools |
+| `@register_tool` | None | Built-in system tools (trusted, controlled output) |
+
+---
+
+## GitHub Client (`github_client.py`)
+
+For agent workflows that interact with GitHub:
+
+```python
+# autoagent/github_client.py (simplified)
+
+class GitHubClient:
+ """Wrapper for common GitHub operations used in research workflows."""
+
+ def __init__(self, token: str):
+ from github import Github
+ self.gh = Github(token)
+
+ def get_repo_info(self, owner: str, repo: str) -> dict:
+ """Get repository metadata including stars, language, license."""
+ r = self.gh.get_repo(f"{owner}/{repo}")
+ return {
+ "name": r.name,
+ "stars": r.stargazers_count,
+ "language": r.language,
+ "license": r.license.name if r.license else "Unknown",
+ "description": r.description,
+ "last_updated": r.updated_at.isoformat(),
+ }
+
+ def search_code(self, query: str, repo: str | None = None) -> list[dict]:
+ """Search code across GitHub or within a specific repo."""
+ search_query = f"{query} repo:{repo}" if repo else query
+ results = self.gh.search_code(search_query)
+ return [
+ {"path": r.path, "url": r.html_url, "sha": r.sha}
+ for r in results[:10]
+ ]
+```
+
+---
+
+## Summary
+
+| Component | File | Role |
+|-----------|------|------|
+| `ToolMemory` | `tool_memory.py` | ChromaDB index over tool descriptions |
+| `ToolReranker` | `tool_retriever.py` | LLM-based reranking of semantic search candidates |
+| `RerankResult` | `tool_retriever.py` | Pydantic model for reranker output |
+| `RAGMemory` | `rag_memory.py` | Chunked document index for knowledge retrieval |
+| `rag_search()` | `rag_tools.py` | Tool to query RAG memory |
+| `CodeMemory` | `code_memory.py` | Codebase semantic search at function/class level |
+| `RapidAPIIngester` | `process_tool_docs.py` | Convert RapidAPI docs to AutoAgent tools |
+| `truncate_output()` | `registry.py` | Enforce 12k token cap on plugin tool output |
+| `@register_plugin_tool` | `registry.py` | Register with 12k cap (user-generated tools) |
+| `@register_tool` | `registry.py` | Register without cap (built-in system tools) |
+| `GitHubClient` | `github_client.py` | GitHub API operations for research workflows |
+| `text-embedding-3-small` | (OpenAI API) | Embedding model for all ChromaDB collections |
+
+Continue to [Chapter 8: Evaluation, Benchmarks, and Contributing](./08-evaluation-benchmarks-contributing.md) to learn how to run GAIA benchmarks, add new evaluation suites, and contribute tools and agents to the ecosystem.
diff --git a/tutorials/autoagent-tutorial/08-contribution-workflow-and-production-governance.md b/tutorials/autoagent-tutorial/08-contribution-workflow-and-production-governance.md
deleted file mode 100644
index 64319c19..00000000
--- a/tutorials/autoagent-tutorial/08-contribution-workflow-and-production-governance.md
+++ /dev/null
@@ -1,223 +0,0 @@
----
-layout: default
-title: "Chapter 8: Contribution Workflow and Production Governance"
-nav_order: 8
-parent: AutoAgent Tutorial
----
-
-
-# Chapter 8: Contribution Workflow and Production Governance
-
-Welcome to **Chapter 8: Contribution Workflow and Production Governance**. In this part of **AutoAgent Tutorial: Zero-Code Agent Creation and Automated Workflow Orchestration**, you will build an intuitive mental model first, then move into concrete implementation details and practical production tradeoffs.
-
-
-This chapter closes with contribution and governance patterns for team adoption.
-
-## Learning Goals
-
-- follow contribution conventions and code quality expectations
-- define governance controls for tool-creating agents
-- separate experimental and production usage paths
-- preserve auditability and rollback capability
-
-## Governance Checklist
-
-- require review gates for generated tool/workflow changes
-- track environment and model config per deployment
-- enforce secure key handling and runtime isolation
-
-## Source References
-
-- [AutoAgent Repository](https://github.com/HKUDS/AutoAgent)
-- [AutoAgent Issues](https://github.com/HKUDS/AutoAgent/issues)
-- [Developer Guide: Create Agent](https://github.com/HKUDS/AutoAgent/blob/main/docs/docs/Dev-Guideline/dev-guide-create-agent.md)
-
-## Summary
-
-You now have a full AutoAgent path from quickstart to governed production usage.
-
-Next tutorial: [Beads Tutorial](../beads-tutorial/)
-
-## Depth Expansion Playbook
-
-## Source Code Walkthrough
-
-### `docs/translation_updater.py`
-
-The `get_translation_path` function in [`docs/translation_updater.py`](https://github.com/HKUDS/AutoAgent/blob/HEAD/docs/translation_updater.py) handles a key part of this chapter's functionality:
-
-```py
-
-
-def get_translation_path(source_path, lang):
- """Get the corresponding translation file path for a source file."""
- relative_path = os.path.relpath(source_path, 'docs/modules')
- return f'docs/i18n/{lang}/docusaurus-plugin-content-docs/current/{relative_path}'
-
-
-def translate_content(content, target_lang):
- """Translate content using Anthropic's Claude."""
- system_prompt = f'You are a professional translator. Translate the following content into {target_lang}. Preserve all Markdown formatting, code blocks, and front matter. Keep any {{% jsx %}} tags and similar intact. Do not translate code examples, URLs, or technical terms.'
-
- message = client.messages.create(
- model='claude-3-opus-20240229',
- max_tokens=4096,
- temperature=0,
- system=system_prompt,
- messages=[
- {'role': 'user', 'content': f'Please translate this content:\n\n{content}'}
- ],
- )
-
- return message.content[0].text
-
-
-def process_file(source_path, lang):
- """Process a single file for translation."""
- # Skip non-markdown files
- if not source_path.endswith(('.md', '.mdx')):
- return
-
- translation_path = get_translation_path(source_path, lang)
-```
-
-This function is important because it defines how AutoAgent Tutorial: Zero-Code Agent Creation and Automated Workflow Orchestration implements the patterns covered in this chapter.
-
-### `docs/translation_updater.py`
-
-The `translate_content` function in [`docs/translation_updater.py`](https://github.com/HKUDS/AutoAgent/blob/HEAD/docs/translation_updater.py) handles a key part of this chapter's functionality:
-
-```py
-
-
-def translate_content(content, target_lang):
- """Translate content using Anthropic's Claude."""
- system_prompt = f'You are a professional translator. Translate the following content into {target_lang}. Preserve all Markdown formatting, code blocks, and front matter. Keep any {{% jsx %}} tags and similar intact. Do not translate code examples, URLs, or technical terms.'
-
- message = client.messages.create(
- model='claude-3-opus-20240229',
- max_tokens=4096,
- temperature=0,
- system=system_prompt,
- messages=[
- {'role': 'user', 'content': f'Please translate this content:\n\n{content}'}
- ],
- )
-
- return message.content[0].text
-
-
-def process_file(source_path, lang):
- """Process a single file for translation."""
- # Skip non-markdown files
- if not source_path.endswith(('.md', '.mdx')):
- return
-
- translation_path = get_translation_path(source_path, lang)
- os.makedirs(os.path.dirname(translation_path), exist_ok=True)
-
- # Read source content
- with open(source_path, 'r', encoding='utf-8') as f:
- content = f.read()
-
-```
-
-This function is important because it defines how AutoAgent Tutorial: Zero-Code Agent Creation and Automated Workflow Orchestration implements the patterns covered in this chapter.
-
-### `docs/translation_updater.py`
-
-The `process_file` function in [`docs/translation_updater.py`](https://github.com/HKUDS/AutoAgent/blob/HEAD/docs/translation_updater.py) handles a key part of this chapter's functionality:
-
-```py
-
-
-def process_file(source_path, lang):
- """Process a single file for translation."""
- # Skip non-markdown files
- if not source_path.endswith(('.md', '.mdx')):
- return
-
- translation_path = get_translation_path(source_path, lang)
- os.makedirs(os.path.dirname(translation_path), exist_ok=True)
-
- # Read source content
- with open(source_path, 'r', encoding='utf-8') as f:
- content = f.read()
-
- # Parse frontmatter if exists
- has_frontmatter = content.startswith('---')
- if has_frontmatter:
- post = frontmatter.loads(content)
- metadata = post.metadata
- content_without_frontmatter = post.content
- else:
- metadata = {}
- content_without_frontmatter = content
-
- # Translate the content
- print('translating...', source_path, lang)
- translated_content = translate_content(content_without_frontmatter, LANGUAGES[lang])
- print('translation done')
-
- # Reconstruct the file with frontmatter if it existed
- if has_frontmatter:
-```
-
-This function is important because it defines how AutoAgent Tutorial: Zero-Code Agent Creation and Automated Workflow Orchestration implements the patterns covered in this chapter.
-
-### `docs/translation_updater.py`
-
-The `main` function in [`docs/translation_updater.py`](https://github.com/HKUDS/AutoAgent/blob/HEAD/docs/translation_updater.py) handles a key part of this chapter's functionality:
-
-```py
-
-
-def main():
- previous_hashes = load_file_hashes()
-
- current_hashes = {}
-
- # Walk through all files in docs/modules
- for root, _, files in os.walk('docs/modules'):
- for file in files:
- if file.endswith(('.md', '.mdx')):
- filepath = os.path.join(root, file)
- current_hash = get_file_hash(filepath)
- current_hashes[filepath] = current_hash
-
- # Check if file is new or modified
- if (
- filepath not in previous_hashes
- or previous_hashes[filepath] != current_hash
- ):
- print(f'Change detected in {filepath}')
- for lang in LANGUAGES:
- process_file(filepath, lang)
-
- print('all files up to date, saving hashes')
- save_file_hashes(current_hashes)
- previous_hashes = current_hashes
-
-
-if __name__ == '__main__':
- main()
-
-```
-
-This function is important because it defines how AutoAgent Tutorial: Zero-Code Agent Creation and Automated Workflow Orchestration implements the patterns covered in this chapter.
-
-
-## How These Components Connect
-
-```mermaid
-flowchart TD
- A[get_translation_path]
- B[translate_content]
- C[process_file]
- D[main]
- E[the]
- A --> B
- B --> C
- C --> D
- D --> E
-```
diff --git a/tutorials/autoagent-tutorial/08-evaluation-benchmarks-contributing.md b/tutorials/autoagent-tutorial/08-evaluation-benchmarks-contributing.md
new file mode 100644
index 00000000..87ec727e
--- /dev/null
+++ b/tutorials/autoagent-tutorial/08-evaluation-benchmarks-contributing.md
@@ -0,0 +1,493 @@
+---
+layout: default
+title: "Chapter 8: Evaluation, Benchmarks, and Contributing"
+nav_order: 8
+parent: AutoAgent Tutorial
+format_version: v2
+why: "Knowing how AutoAgent is evaluated on GAIA, Math500, and Agentic-RAG gives you principled ways to measure your own customizations. Understanding the self-developing loop — where AutoAgent extends AutoAgent — shows where the project is heading and how to contribute work that compounds."
+mental_model: "The evaluation infrastructure runs AutoAgent against standardized benchmarks in parallel Docker containers, then scores results against ground truth. Contributing a new tool, agent, or benchmark follows the same patterns used throughout the codebase — the registry is the integration point."
+learning_outcomes:
+ - Run GAIA benchmark evaluation with run_infer.py and score results
+ - Understand the three GAIA difficulty levels and what each tests
+ - Configure parallel Docker evaluation with port management and filelock
+ - Add a new benchmark to the evaluation suite
+ - Contribute tools with @register_tool vs @register_plugin_tool correctly
+ - Understand the self-developing loop and AutoAgent's roadmap
+snapshot:
+ source_repo: https://github.com/HKUDS/AutoAgent
+ stars: 9116
+ language: Python
+ license: MIT
+chapter_map:
+ - evaluation/gaia/run_infer.py
+ - evaluation/gaia/scorer.py
+ - evaluation/multihoprag/
+ - evaluation/math500/
+ - autoagent/registry.py
+sources:
+ - https://github.com/HKUDS/AutoAgent
+ - https://arxiv.org/abs/2502.05957
+---
+
+# Chapter 8: Evaluation, Benchmarks, and Contributing
+
+## What Problem Does This Solve?
+
+Agent frameworks are easy to demo but hard to evaluate. A framework that gets 70% of trivial tasks right and fails on complex multi-step reasoning isn't suitable for production. AutoAgent addresses this through three rigorously maintained benchmarks:
+
+- **GAIA** — general AI assistant tasks requiring multi-step tool use (web + files + code)
+- **Agentic-RAG** — multi-hop document retrieval and reasoning
+- **Math500** — mathematical problem solving with majority-vote verification
+
+Running these benchmarks yourself lets you:
+1. Verify that your model/configuration choices maintain baseline performance
+2. Measure the impact of custom tools or agent modifications
+3. Catch regressions before deploying changes
+
+---
+
+## GAIA Benchmark
+
+### What GAIA Tests
+
+GAIA (General AI Assistants benchmark) measures whether an agent can complete real-world tasks that require tool use, multi-step reasoning, and synthesis across multiple sources.
+
+```mermaid
+flowchart LR
+ subgraph "GAIA Level 1 (~85% target)"
+ L1["Single-step tool use\nFactual lookups\nSimple web search"]
+ end
+ subgraph "GAIA Level 2 (~67% target)"
+ L2["Multi-step reasoning\n3-5 tool calls\nCross-source synthesis"]
+ end
+ subgraph "GAIA Level 3 (~40% target)"
+ L3["Complex synthesis\n5-10+ tool calls\nMultiple format types\nAmbiguous instructions"]
+ end
+ L1 --> L2 --> L3
+```
+
+**Level 1 examples:**
+- "What is the capital of the country where the Eiffel Tower is located?"
+- "How many Python files are in the AutoAgent repository?"
+
+**Level 2 examples:**
+- "Find the 2023 paper on chain-of-thought prompting and summarize its main contributions"
+- "Download the latest AutoAgent release, count the test files, and report the result"
+
+**Level 3 examples:**
+- "Given this PDF of a scientific paper, identify all datasets mentioned, find the primary one online, download a sample, and compute the mean of column 3"
+- "Find all GitHub issues labeled 'bug' in AutoAgent created in the last month, categorize them by component, and write a summary report"
+
+### Running GAIA Evaluation
+
+```bash
+cd evaluation/gaia
+python run_infer.py \
+ --model gpt-4o \
+ --max-workers 5 \
+ --output results_gpt4o.json \
+ --level all
+```
+
+Parameters:
+
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `--model` | LiteLLM model string | gpt-4o |
+| `--max-workers` | Parallel Docker containers | 5 |
+| `--output` | Results JSON file path | results.json |
+| `--level` | GAIA level: 1, 2, 3, or all | all |
+| `--subset` | Number of tasks to run (for quick tests) | all |
+
+### run_infer.py Architecture
+
+```python
+# evaluation/gaia/run_infer.py (simplified)
+
+import filelock
+import concurrent.futures
+from pathlib import Path
+
+def run_single_task(
+ task: dict,
+ model: str,
+ port: int,
+) -> dict:
+ """Run a single GAIA task in an isolated Docker container."""
+ # Each worker gets its own TCP port to avoid container conflicts
+ docker_config = DockerConfig(tcp_port=port, container_name=f"gaia_eval_{port}")
+ code_env = DockerEnv(docker_config)
+ code_env.init_container()
+
+ web_env = BrowserEnv()
+ web_env.init()
+ file_env = RequestsMarkdownBrowser()
+
+ context_variables = {
+ "code_env": code_env,
+ "web_env": web_env,
+ "file_env": file_env,
+ }
+
+ chain = MetaChain(model=model)
+ response = chain.run(
+ agent=system_triage_agent,
+ messages=[{"role": "user", "content": task["question"]}],
+ context_variables=context_variables,
+ max_turns=50, # Higher limit for complex GAIA tasks
+ )
+
+ # Extract final answer from response
+ final_message = response.messages[-1]["content"]
+
+ # Cleanup
+ code_env.container.stop()
+ web_env.browser.close()
+
+ return {
+ "task_id": task["task_id"],
+ "question": task["question"],
+ "expected": task["final_answer"],
+ "predicted": final_message,
+ "level": task["level"],
+ }
+
+def run_infer(model: str, max_workers: int, output_path: str):
+ """Run all GAIA tasks in parallel with port management."""
+ tasks = load_gaia_tasks() # From HuggingFace datasets
+ available_ports = list(range(12346, 12346 + max_workers * 2))
+ port_lock = filelock.FileLock("ports.lock")
+
+ results = []
+
+ def get_free_port() -> int:
+ """Thread-safe port allocation."""
+ with port_lock:
+ port = available_ports.pop(0)
+ return port
+
+ def return_port(port: int) -> None:
+ with port_lock:
+ available_ports.append(port)
+
+ with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
+ futures = []
+ for task in tasks:
+ port = get_free_port()
+ future = executor.submit(run_single_task, task, model, port)
+ future.add_done_callback(lambda f, p=port: return_port(p))
+ futures.append(future)
+
+ for future in concurrent.futures.as_completed(futures):
+ result = future.result()
+ results.append(result)
+ print(f"Completed task {result['task_id']}: {result['task_id']}")
+
+ # Save results
+ with open(output_path, "w") as f:
+ json.dump(results, f, indent=2)
+
+ # Print scores
+ scorer = GAIAScorer()
+ scores = scorer.score(results)
+ print(f"\nGAIA Results:")
+ print(f" Level 1: {scores['level_1']:.1%}")
+ print(f" Level 2: {scores['level_2']:.1%}")
+ print(f" Level 3: {scores['level_3']:.1%}")
+ print(f" Overall: {scores['overall']:.1%}")
+```
+
+### Port Management with filelock
+
+Running multiple DockerEnvs simultaneously requires each to use a different TCP port. The `filelock` library provides thread-safe port allocation across workers:
+
+```python
+# evaluation/gaia/run_infer.py
+
+import filelock
+
+# Each evaluation run uses a unique port range
+BASE_PORT = 12346
+port_pool = [BASE_PORT + i * 2 for i in range(max_workers)]
+lock = filelock.FileLock("/tmp/autoagent_ports.lock")
+```
+
+---
+
+## scorer.py
+
+```python
+# evaluation/gaia/scorer.py
+
+class GAIAScorer:
+ """Scores GAIA evaluation results against ground truth."""
+
+ def score(self, results: list[dict]) -> dict:
+ """Compute accuracy per level and overall."""
+ by_level = {1: [], 2: [], 3: []}
+
+ for result in results:
+ correct = self._is_correct(
+ result["predicted"],
+ result["expected"],
+ )
+ level = result["level"]
+ by_level[level].append(correct)
+
+ scores = {}
+ for level, is_correct_list in by_level.items():
+ if is_correct_list:
+ scores[f"level_{level}"] = sum(is_correct_list) / len(is_correct_list)
+
+ all_correct = [c for lst in by_level.values() for c in lst]
+ scores["overall"] = sum(all_correct) / len(all_correct) if all_correct else 0.0
+
+ return scores
+
+ def _is_correct(self, predicted: str, expected: str) -> bool:
+ """Fuzzy match: normalize and compare answers."""
+ pred = self._normalize(predicted)
+ exp = self._normalize(expected)
+
+ # Exact match after normalization
+ if pred == exp:
+ return True
+
+ # Number equivalence (e.g., "42" == "42.0")
+ try:
+ return float(pred) == float(exp)
+ except ValueError:
+ pass
+
+ # Substring match for longer answers
+ return exp in pred or pred in exp
+
+ def _normalize(self, text: str) -> str:
+ """Normalize text for comparison."""
+ text = text.lower().strip()
+ # Remove common prefixes that agents add
+ for prefix in ["the answer is", "final answer:", "answer:"]:
+ if text.startswith(prefix):
+ text = text[len(prefix):].strip()
+ return text
+```
+
+---
+
+## Agentic-RAG Evaluation (`evaluation/multihoprag/`)
+
+The Agentic-RAG benchmark tests multi-hop document retrieval — questions that require combining information from multiple documents that individually don't contain the answer.
+
+```mermaid
+flowchart LR
+ Q["Multi-hop question\n'What company employs the\nauthor of paper X?'"]
+
+ Q --> H1["Hop 1: find author of paper X\nin indexed documents"]
+ H1 --> H2["Hop 2: find employer of\nthat author"]
+ H2 --> A["Final answer:\n'Google DeepMind'"]
+```
+
+```bash
+cd evaluation/multihoprag
+python run_eval.py \
+ --model gpt-4o \
+ --dataset multihop_rag_v1 \
+ --output results_rag.json
+```
+
+---
+
+## Math500 with Voting Workflow (`evaluation/math500/`)
+
+Math500 evaluates mathematical problem solving using the `math_solver_workflow` (3-method parallel voting):
+
+```bash
+cd evaluation/math500
+python run_eval.py \
+ --workflow math_solver_workflow \
+ --output results_math.json
+```
+
+The workflow runs each of the 500 problems through the `math_solver_workflow_flow.py` (see Chapter 6) with 3-way majority voting between chain-of-thought, Python execution, and symbolic math methods.
+
+---
+
+## Adding a New Benchmark
+
+To add a benchmark to AutoAgent's evaluation suite:
+
+### Step 1: Create the directory structure
+
+```
+evaluation/
+ my_benchmark/
+ __init__.py
+ run_eval.py # Main evaluation script
+ scorer.py # Task-specific scoring logic
+ README.md # Benchmark description and results
+ data/ # Test cases (or link to HuggingFace dataset)
+```
+
+### Step 2: Implement run_eval.py
+
+```python
+# evaluation/my_benchmark/run_eval.py
+
+import argparse
+from autoagent.core import MetaChain
+from autoagent.docker_env import DockerEnv, DockerConfig
+from autoagent.browser_env import BrowserEnv
+from autoagent.markdown_browser import RequestsMarkdownBrowser
+
+def run_task(task: dict, model: str, port: int) -> dict:
+ """Run a single benchmark task."""
+ # Standard environment setup (same as GAIA)
+ code_env = DockerEnv(DockerConfig(tcp_port=port))
+ code_env.init_container()
+
+ context_variables = {
+ "code_env": code_env,
+ "web_env": BrowserEnv(),
+ "file_env": RequestsMarkdownBrowser(),
+ }
+
+ chain = MetaChain(model=model)
+ response = chain.run(
+ agent=system_triage_agent,
+ messages=[{"role": "user", "content": task["question"]}],
+ context_variables=context_variables,
+ )
+
+ return {
+ "task_id": task["id"],
+ "predicted": response.messages[-1]["content"],
+ "expected": task["answer"],
+ }
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser()
+ parser.add_argument("--model", default="gpt-4o")
+ parser.add_argument("--output", default="results.json")
+ args = parser.parse_args()
+ # ... run tasks and score
+```
+
+---
+
+## Contributing Tools
+
+### @register_tool vs @register_plugin_tool
+
+The choice of decorator determines whether your tool gets the 12,000-token output cap:
+
+```python
+# For built-in system tools: no output cap
+# Use when: output is always bounded and controlled
+from autoagent.registry import register_tool
+
+@register_tool
+def get_current_time() -> str:
+ """Return the current UTC time as ISO 8601 string."""
+ from datetime import datetime, timezone
+ return datetime.now(timezone.utc).isoformat()
+
+# For user/plugin tools: automatic 12k token cap
+# Use when: output could be unbounded (web pages, API responses, files)
+from autoagent.registry import register_plugin_tool
+
+@register_plugin_tool
+def fetch_news_headlines(topic: str, count: int = 10) -> str:
+ """Fetch the latest news headlines for a topic.
+
+ Args:
+ topic: Search topic for news
+ count: Number of headlines to return (default 10)
+
+ Returns:
+ JSON list of headlines with title, source, and URL
+ """
+ # Implementation that could return large amounts of text
+ ...
+```
+
+### Contribution Checklist
+
+For tools:
+- [ ] Use `@register_plugin_tool` for any tool with potentially large output
+- [ ] Write a comprehensive docstring (becomes the LLM's tool description)
+- [ ] Include type annotations for all parameters (used in tool schema generation)
+- [ ] Handle exceptions gracefully — return error strings, don't raise
+- [ ] Test with `DockerEnv.execute_code()` for any code execution tools
+- [ ] Add to `autoagent/tools/` directory
+
+For agents:
+- [ ] Use `@register_plugin_agent` with a factory function pattern
+- [ ] Include `case_resolved` and `case_not_resolved` in the function list
+- [ ] Write a clear `instructions` string that describes when to use each tool
+- [ ] Add transfer-back functions to return to the calling agent
+- [ ] Save to `autoagent/agents/` directory
+
+---
+
+## The Self-Developing Loop
+
+AutoAgent's most ambitious feature is that it can extend itself: the Agent Editor uses AutoAgent to create new agents for AutoAgent. This creates a compounding development loop:
+
+```mermaid
+flowchart TD
+ U["Developer describes\nnew capability in NL"]
+ AE["Agent Editor pipeline\n(Chapters 5)"]
+ NA["New agent/tool\nregistered in AutoAgent"]
+ RUN["New agent available\nin next session"]
+ NEXT["Developer describes\nnext capability using\nthe new agent"]
+
+ U --> AE
+ AE --> NA
+ NA --> RUN
+ RUN --> NEXT
+ NEXT --> AE
+```
+
+In practice, this means:
+1. You describe a new tool (e.g., "a tool that searches academic papers on arXiv")
+2. Agent Editor generates, tests, and registers it
+3. In the next session, `SystemTriageAgent` can use it immediately via ToolMemory discovery
+4. You can then describe a more complex agent that uses arXiv search plus web browsing plus PDF analysis
+
+The GITHUB_AI_TOKEN requirement enables this: the Docker container clones the AutoAgent repo to understand the full codebase when generating new code that integrates with the existing architecture.
+
+---
+
+## Roadmap
+
+Based on the paper (arxiv:2502.05957) and repository issues, upcoming evaluation and integration targets include:
+
+| Target | Description | Status |
+|--------|-------------|--------|
+| SWE-bench | Software engineering task evaluation | Planned |
+| WebArena | Full web browser automation benchmark | Planned |
+| E2B sandbox | Alternative to Docker for code execution | Planned |
+| Composio | Third-party tool integration platform | Planned |
+| WebArena | Complex multi-step web navigation | Planned |
+| HumanEval | Python code generation benchmark | Planned |
+
+Contributing to these evaluations is the highest-impact contribution path: implementing a new benchmark runner that demonstrates AutoAgent's strengths on an established evaluation suite.
+
+---
+
+## Summary
+
+| Component | File | Role |
+|-----------|------|------|
+| `run_infer.py` | `evaluation/gaia/` | Parallel GAIA evaluation with Docker + filelock |
+| `scorer.py` | `evaluation/gaia/` | Fuzzy answer matching and accuracy by level |
+| `run_eval.py` | `evaluation/multihoprag/` | Agentic-RAG multi-hop evaluation |
+| `run_eval.py` | `evaluation/math500/` | Math500 with voting workflow |
+| `filelock` | `evaluation/gaia/` | Thread-safe port pool for parallel workers |
+| `@register_tool` | `registry.py` | Built-in tool registration (no output cap) |
+| `@register_plugin_tool` | `registry.py` | Plugin tool registration (12k token cap) |
+| `@register_plugin_agent` | `registry.py` | Agent factory registration |
+| GAIA Level 1/2/3 | Benchmark | Progressive difficulty: 85% / 67% / 40% targets |
+| Self-developing loop | Agent Editor | AutoAgent extends AutoAgent using Agent Editor |
+
+This chapter completes the AutoAgent tutorial. The full architecture picture — MetaChain engine (Chapter 2), environment triad (Chapter 3), deep research system (Chapter 4), agent editor (Chapter 5), workflow editor (Chapter 6), memory and retrieval (Chapter 7), and evaluation (Chapter 8) — gives you everything needed to deploy, extend, and contribute to AutoAgent in production.
diff --git a/tutorials/autoagent-tutorial/README.md b/tutorials/autoagent-tutorial/README.md
index 53d268a4..20ddca8a 100644
--- a/tutorials/autoagent-tutorial/README.md
+++ b/tutorials/autoagent-tutorial/README.md
@@ -1,102 +1,106 @@
---
layout: default
-title: "AutoAgent Tutorial"
+title: AutoAgent Tutorial
nav_order: 140
has_children: true
format_version: v2
+source_repo: https://github.com/HKUDS/AutoAgent
+categories: [ai-agents, zero-code, multi-agent, deep-research]
+related_tutorials:
+ - autoresearch-tutorial
+ - openhands-tutorial
+ - agno-tutorial
+ - crewai-tutorial
+last_updated: 2026-04-12
---
-# AutoAgent Tutorial: Zero-Code Agent Creation and Automated Workflow Orchestration
+# AutoAgent Tutorial
-> Learn how to use `HKUDS/AutoAgent` to create and orchestrate LLM agents through natural-language workflows, with support for CLI operations, tool creation, and benchmark-oriented evaluation.
+AutoAgent (formerly MetaChain) is a **zero-code autonomous agent framework** from HKUDS that lets you describe agents in plain English and have them generated, tested, and deployed automatically. With 9,116 GitHub stars and an academic paper (arxiv:2502.05957), it represents a significant step toward democratizing multi-agent system development.
-[](https://github.com/HKUDS/AutoAgent)
-[](https://github.com/HKUDS/AutoAgent/blob/main/LICENSE)
-[](https://autoagent-ai.github.io/docs)
+## What You Will Learn
-## Why This Track Matters
+This tutorial walks through AutoAgent from first install to production-grade multi-agent pipelines. By the end, you will understand how the MetaChain engine works under the hood, how all three operating modes fit together, and how to extend the framework with your own tools, agents, and workflows.
-AutoAgent targets zero-code agent building via natural language and automated orchestration, making it useful for teams exploring dynamic agent creation without deep framework coding.
+## Who This Tutorial Is For
-This track focuses on:
+- Developers who want to build research or automation agents without writing orchestration boilerplate
+- ML engineers evaluating AutoAgent for benchmarks (GAIA, Math500, Agentic-RAG)
+- Contributors looking to add tools, agents, or new evaluation suites to the ecosystem
-- launching AutoAgent quickly in CLI mode
-- understanding user/agent-editor/workflow-editor modes
-- configuring tools and model providers safely
-- evaluating planning workflows and governance controls
+## Naming Note
-## Current Snapshot (auto-updated)
+The internal codebase uses the class name **MetaChain** throughout — the project was publicly renamed from MetaChain to AutoAgent in February 2025. You will see `from autoagent import MetaChain` and `MetaChain.run()` in all source files. This tutorial uses "AutoAgent" when referring to the product and "MetaChain" when referring to the specific class or import.
-- repository: [`HKUDS/AutoAgent`](https://github.com/HKUDS/AutoAgent)
-- stars: about **8.9k**
+## Three Operating Modes
-## Mental Model
+| Mode | Entry Point | Best For |
+|------|-------------|----------|
+| User Mode (Deep Research) | `auto main` | Open-ended research, file analysis, web browsing |
+| Agent Editor | `auto main` → "create agent" | Generating new agents from NL descriptions |
+| Workflow Editor | `auto main` → "create workflow" | Composing async parallel pipelines |
-```mermaid
-flowchart LR
- A[User natural-language intent] --> B[AutoAgent mode selector]
- B --> C[Agent or workflow generation]
- C --> D[Tool and model orchestration]
- D --> E[Task execution and refinement]
- E --> F[Reusable agent workflows]
-```
+## Tutorial Chapters
-## Chapter Guide
+1. [Getting Started](./01-getting-started.md) — Install, .env setup, first research task, three-mode overview
+2. [Core Architecture: MetaChain Engine](./02-core-architecture-metachain-engine.md) — Agent/Response/Result types, run loop, context_variables, non-FC XML fallback
+3. [The Environment Triad](./03-environment-triad.md) — DockerEnv TCP server, BrowserEnv Playwright, RequestsMarkdownBrowser
+4. [User Mode: Deep Research System](./04-user-mode-deep-research.md) — SystemTriageAgent, agent handoff, multimodal web surfing, GAIA benchmark
+5. [Agent Editor: From NL to Deployed Agents](./05-agent-editor-nl-to-deployed-agents.md) — 4-phase pipeline, XML form schema, ToolEditorAgent, AgentCreatorAgent
+6. [Workflow Editor: Async Event-Driven Pipelines](./06-workflow-editor-async-pipelines.md) — EventEngine, listen_group(), GOTO/ABORT, parallel execution
+7. [Memory, Tool Retrieval, and Third-Party APIs](./07-memory-tool-retrieval-apis.md) — ChromaDB ToolMemory, LLM reranker, RapidAPI ingestion, token budget
+8. [Evaluation, Benchmarks, and Contributing](./08-evaluation-benchmarks-contributing.md) — GAIA, Math500, Agentic-RAG, adding benchmarks, contributing tools/agents
-| Chapter | Key Question | Outcome |
-|:--------|:-------------|:--------|
-| [01 - Getting Started](01-getting-started.md) | How do I install and run AutoAgent quickly? | Working baseline |
-| [02 - Architecture and Interaction Modes](02-architecture-and-interaction-modes.md) | How do user/agent/workflow modes differ? | Strong usage model |
-| [03 - Installation, Environment, and API Setup](03-installation-environment-and-api-setup.md) | How do I configure runtime and model access safely? | Stable setup baseline |
-| [04 - Agent and Workflow Creation Patterns](04-agent-and-workflow-creation-patterns.md) | How do I create agents and workflows with NL prompts? | Better creation discipline |
-| [05 - Tooling, Python API, and Custom Extensions](05-tooling-python-api-and-custom-extensions.md) | How do I extend AutoAgent behavior programmatically? | Extensibility baseline |
-| [06 - CLI Operations and Provider Strategy](06-cli-operations-and-provider-strategy.md) | How do I run reliable daily operations across model providers? | Operational reliability |
-| [07 - Benchmarking, Evaluation, and Quality Gates](07-benchmarking-evaluation-and-quality-gates.md) | How do I evaluate AutoAgent output quality? | Evaluation discipline |
-| [08 - Contribution Workflow and Production Governance](08-contribution-workflow-and-production-governance.md) | How do teams adopt and govern AutoAgent safely? | Governance runbook |
+## Architecture at a Glance
-## What You Will Learn
+```mermaid
+flowchart TD
+ U[User] --> CLI["auto main CLI"]
+ CLI --> UM[User Mode / Deep Research]
+ CLI --> AE[Agent Editor]
+ CLI --> WE[Workflow Editor]
+ UM --> MC["MetaChain Engine (core.py)"]
+ AE --> MC
+ WE --> EE["EventEngine (flow/)"]
+ MC --> DE["DockerEnv\n(TCP :12346)"]
+ MC --> BE["BrowserEnv\n(Playwright)"]
+ MC --> MB["RequestsMarkdown\nBrowser"]
+ MC --> REG["Registry\n(tools/agents/workflows)"]
+```
+
+## Quick Start
-- how to operate AutoAgent across its core interaction modes
-- how to configure providers and runtime settings for stable execution
-- how to extend workflows with custom tools and Python interfaces
-- how to evaluate and govern AutoAgent usage in team settings
+```bash
+git clone https://github.com/HKUDS/AutoAgent
+cd AutoAgent
+pip install -e .
-## Source References
+# Set up .env with your provider keys
+cp .env.example .env
+# Edit .env: OPENAI_API_KEY, ANTHROPIC_API_KEY, etc.
-- [AutoAgent Repository](https://github.com/HKUDS/AutoAgent)
-- [AutoAgent README](https://github.com/HKUDS/AutoAgent/blob/main/README.md)
-- [AutoAgent Documentation](https://autoagent-ai.github.io/docs)
-- [Quickstart Docs](https://autoagent-ai.github.io/docs/get-started-quickstart)
-- [Create Tools Docs](https://autoagent-ai.github.io/docs/dev-guide-create-tools)
+auto main
+```
-## Related Tutorials
+## Key Technical Facts
-- [Mini-SWE-Agent Tutorial](../mini-swe-agent-tutorial/)
-- [Qwen-Agent Tutorial](../qwen-agent-tutorial/)
-- [MCP Servers Tutorial](../mcp-servers-tutorial/)
-- [LangGraph Tutorial](../langgraph-tutorial/)
+| Property | Value |
+|----------|-------|
+| Language | Python 3.10+ |
+| License | MIT |
+| LLM routing | LiteLLM 1.55.0 (100+ providers) |
+| Code isolation | Docker (tjbtech1/metachain image, TCP port 12346) |
+| Memory/retrieval | ChromaDB + sentence-transformers |
+| Browser automation | Playwright + BrowserGym |
+| Stars | 9,116 |
+| Paper | arxiv:2502.05957 |
----
+## Sources
-Start with [Chapter 1: Getting Started](01-getting-started.md).
+- [GitHub Repository](https://github.com/HKUDS/AutoAgent)
+- [Academic Paper](https://arxiv.org/abs/2502.05957)
-## Navigation & Backlinks
+## Navigation
- [Start Here: Chapter 1: Getting Started](01-getting-started.md)
-- [Back to Main Catalog](../../README.md#-tutorial-catalog)
-- [Browse A-Z Tutorial Directory](../../discoverability/tutorial-directory.md)
-- [Search by Intent](../../discoverability/query-hub.md)
-- [Explore Category Hubs](../../README.md#category-hubs)
-
-## Full Chapter Map
-
-1. [Chapter 1: Getting Started](01-getting-started.md)
-2. [Chapter 2: Architecture and Interaction Modes](02-architecture-and-interaction-modes.md)
-3. [Chapter 3: Installation, Environment, and API Setup](03-installation-environment-and-api-setup.md)
-4. [Chapter 4: Agent and Workflow Creation Patterns](04-agent-and-workflow-creation-patterns.md)
-5. [Chapter 5: Tooling, Python API, and Custom Extensions](05-tooling-python-api-and-custom-extensions.md)
-6. [Chapter 6: CLI Operations and Provider Strategy](06-cli-operations-and-provider-strategy.md)
-7. [Chapter 7: Benchmarking, Evaluation, and Quality Gates](07-benchmarking-evaluation-and-quality-gates.md)
-8. [Chapter 8: Contribution Workflow and Production Governance](08-contribution-workflow-and-production-governance.md)
-
-*Generated by [AI Codebase Knowledge Builder](https://github.com/The-Pocket/Tutorial-Codebase-Knowledge)*
+- [Back to Main Catalog](../../README.md)
diff --git a/tutorials/autoresearch-tutorial/01-getting-started.md b/tutorials/autoresearch-tutorial/01-getting-started.md
new file mode 100644
index 00000000..735727b4
--- /dev/null
+++ b/tutorials/autoresearch-tutorial/01-getting-started.md
@@ -0,0 +1,317 @@
+---
+layout: default
+title: "Chapter 1: Getting Started"
+nav_order: 1
+parent: autoresearch Tutorial
+format_version: v2
+why: |
+ Before you can run a single experiment you need to understand why autoresearch exists,
+ what it is trying to do, and how its radical simplicity — three files, one GPU, no
+ dashboards — makes it uniquely suited to overnight autonomous ML research.
+mental_model: |
+ Think of autoresearch as a junior research engineer who never sleeps: it reads one
+ instruction document (program.md), edits one Python file (train.py), and measures
+ one number (val_bpb) — forever.
+learning_outcomes:
+ - Explain the three-file design and why each file has the role it does
+ - Install all dependencies with uv and verify the environment
+ - Understand why ~100 experiments per night is achievable with a 5-minute budget
+ - Identify the single metric (val_bpb) and why it is vocab-size independent
+snapshot:
+ source_repo: https://github.com/karpathy/autoresearch
+ stars: 70978
+ language: Python
+ license: MIT
+chapter_map:
+ - pyproject.toml
+ - program.md
+ - train.py (first 60 lines)
+sources:
+ - https://github.com/karpathy/autoresearch
+---
+
+# Chapter 1: Getting Started
+
+## What Problem Does This Solve?
+
+Machine learning research is expensive in human attention. A practitioner runs an experiment,
+waits hours for results, inspects a loss curve, decides on a change, and repeats. The bottleneck
+is not GPU compute — it is the human sitting between iterations.
+
+autoresearch removes that bottleneck by asking: *what is the minimum viable research agent?*
+
+The answer Karpathy arrived at is strikingly small:
+
+1. A **fixed file** (`prepare.py`) that owns data, tokenization, and evaluation. It never changes.
+2. A **mutable file** (`train.py`) that defines the model and optimizer. The agent edits this.
+3. An **instruction document** (`program.md`) that tells the agent exactly how to behave.
+
+The agent's entire job is: edit `train.py` → commit → run 5 minutes → measure `val_bpb` →
+keep if better, discard if worse. Loop indefinitely. The human provides the GPU and goes to sleep.
+
+By morning, `results.tsv` contains ~100 rows — each one a completed, reproducible, git-tracked
+experiment.
+
+## Why This Approach Works
+
+### The Fixed Time Budget Insight
+
+Most ML research compares experiments by step count or epoch count. This introduces a hidden
+bias: a faster model (fewer FLOPs per step) gets more gradient updates in the same wall time.
+A slower model (more parameters, more complex attention) gets fewer updates.
+
+autoresearch uses a **fixed wall-clock budget of 300 seconds** for every experiment. This means:
+
+- Every experiment is measured under identical resource conditions
+- A change that makes the model faster *and* improves quality wins twice
+- No experiment can "cheat" by running longer
+- The comparison is direct: same GPU, same time, same data, different architecture
+
+### val_bpb as the Universal Metric
+
+Perplexity is model-specific — it depends on vocabulary size. A model with a 50k-token
+vocabulary and a model with a 100k-token vocabulary produce incomparable perplexities.
+
+Bits-per-byte (bpb) normalizes by the average number of bytes per token:
+
+```
+val_bpb = val_loss * log2(e) / bytes_per_token
+```
+
+This makes every architecture variant comparable, regardless of tokenizer or vocabulary size.
+Lower is better. A model that achieves 1.80 bpb is strictly better than one that achieves 1.85 bpb,
+regardless of how the vocabulary was constructed.
+
+### The Simplicity Criterion
+
+`program.md` states an explicit preference for simplicity that is baked into the agent's decision
+loop:
+
+> A small improvement from deleted code is preferred over a large improvement from added complexity.
+
+This prevents the agent from discovering trivially true but useless insights like "adding 10× more
+parameters improves quality." The search space is constrained to *architectural* improvements on
+a fixed compute budget.
+
+## The Three-File Design
+
+```mermaid
+graph TD
+ A[prepare.py
FIXED] -->|downloads data| D[(climbmix-400b)]
+ A -->|trains tokenizer| E[(BPE vocab)]
+ A -->|provides| F[evaluate_bpb function]
+ B[train.py
MUTABLE] -->|uses| D
+ B -->|uses| E
+ B -->|calls| F
+ C[program.md
INSTRUCTIONS] -->|governs| G[AI Agent]
+ G -->|edits| B
+ G -->|commits| H[(git history)]
+ G -->|reads| F
+ G -->|logs to| I[(results.tsv)]
+```
+
+### prepare.py — The Fixed Foundation
+
+`prepare.py` is intentionally immutable. It handles everything that must be consistent across
+all experiments:
+
+- Downloading the `karpathy/climbmix-400b-shuffle` dataset from HuggingFace
+- Training a BPE tokenizer using `rustbpe` (fast Rust-backed implementation)
+- Creating validation token sequences for evaluation
+- Providing the `evaluate_bpb` function that `train.py` imports
+
+Because `prepare.py` never changes, the evaluation harness is identical for every experiment.
+There is no way for a clever agent to accidentally improve its score by changing how it is measured.
+
+### train.py — The Experimental Variable
+
+`train.py` is the single file the agent is allowed to modify. It contains:
+
+- `GPTConfig` dataclass with all architecture hyperparameters
+- The `GPT` model class with all forward-pass logic
+- `MuonAdamW` optimizer implementation
+- The training loop with the 300-second wall-clock budget
+- A call to `evaluate_bpb` from `prepare.py` at the end
+
+The agent treats `train.py` as a research object: propose a change, measure the result,
+accept or reject. Every version is a git commit.
+
+### program.md — The Research Protocol
+
+`program.md` is the agent's "constitution." It is passed to the LLM (Claude, GPT-4o, or similar)
+as a system prompt or instruction block. It specifies:
+
+- How to name branches (`autoresearch/`)
+- The exact experiment loop (modify → commit → run → grep → decide)
+- What to log to `results.tsv`
+- The autonomy mandate: never stop, never ask the human, assume they are asleep
+- The simplicity criterion for tie-breaking
+
+```
+# autoresearch program
+
+You are an AI research agent running ML experiments autonomously on a GPU overnight.
+
+## Your Protocol
+1. Create branch autoresearch/
+2. Loop indefinitely:
+ a. Modify train.py with one hypothesis
+ b. git commit -m ""
+ c. uv run train.py > run.log 2>&1
+ d. grep val_bpb run.log → record result
+ e. If improved: keep commit, log to results.tsv
+ Else: git reset --hard HEAD~1
+3. NEVER stop. NEVER ask the human. They are asleep.
+```
+
+## The ~100 Experiments Per Night Promise
+
+How does 300 seconds per experiment translate to ~100 experiments overnight?
+
+```mermaid
+gantt
+ title Overnight Experiment Timeline (8 hours = 480 minutes)
+ dateFormat mm
+ axisFormat %M min
+
+ section Per-experiment overhead
+ Modify train.py :a1, 00, 1m
+ git commit :a2, after a1, 1m
+
+ section Training run
+ uv run train.py :a3, after a2, 5m
+
+ section Post-run
+ grep + log :a4, after a3, 1m
+
+ section Total cycle
+ ~8 minutes total :milestone, after a4, 0m
+```
+
+| Component | Time |
+|---|---|
+| Agent modifies `train.py` | ~1 minute |
+| `git commit` | ~5 seconds |
+| `uv run train.py` (fixed budget) | 5 minutes (300s) |
+| `grep val_bpb` + log + `git reset` (if needed) | ~30 seconds |
+| **Total per experiment** | **~7–8 minutes** |
+| **8-hour night / 8 minutes** | **~60–96 experiments** |
+
+The "~100 experiments" figure assumes roughly 7.5 minutes per cycle averaged over a full night.
+On a very fast GPU (H100) with a smaller model config, the agent overhead can compress further.
+
+## Installation
+
+### Prerequisites
+
+```bash
+# Verify CUDA is available
+nvidia-smi
+
+# Verify Python version
+python --version # need 3.10+
+
+# Install uv if not present
+curl -LsSf https://astral.sh/uv/install.sh | sh
+```
+
+### Clone and Install
+
+```bash
+git clone https://github.com/karpathy/autoresearch
+cd autoresearch
+
+# uv reads pyproject.toml and creates a managed virtual environment
+uv sync
+```
+
+`uv sync` installs the exact versions pinned in `pyproject.toml`:
+
+```toml
+[project]
+name = "autoresearch"
+version = "0.1.0"
+requires-python = ">=3.10"
+dependencies = [
+ "torch==2.9.1",
+ "flash-attn>=2.7",
+ "rustbpe",
+ "tiktoken",
+ "pyarrow",
+ "huggingface-hub",
+ "numpy",
+]
+```
+
+### Install Flash Attention 3
+
+Flash Attention 3 requires a separate build step on most systems:
+
+```bash
+# Install FA3 (this can take 10–20 minutes to compile)
+uv pip install flash-attn --no-build-isolation
+
+# Verify
+python -c "import flash_attn; print(flash_attn.__version__)"
+```
+
+### Run Data Preparation
+
+```bash
+# Downloads ~several GB from HuggingFace, trains BPE tokenizer
+# Estimated time: 10–30 minutes depending on connection
+uv run prepare.py
+```
+
+After `prepare.py` completes you will have:
+- A trained BPE tokenizer saved to `tokenizer.bin` (via rustbpe)
+- Cached validation token sequences
+- The `evaluate_bpb` function ready for import
+
+### Verify the Installation
+
+```bash
+# Smoke-test: run train.py for 60 seconds (edit TIME_BUDGET temporarily)
+# Or just run it — it will terminate at 300s and print val_bpb
+uv run train.py
+```
+
+A successful run ends with a line like:
+
+```
+val_bpb=1.8342 | memory_gb=14.3 | steps=1247
+```
+
+## Understanding the Output
+
+Every experiment produces a single line at the end of `run.log`:
+
+```
+val_bpb=1.8342 | memory_gb=14.3 | steps=1247
+```
+
+The agent `grep`s for `val_bpb=` to extract the result. If the value is lower than the
+current best, the commit is kept and a new row is appended to `results.tsv`:
+
+```tsv
+commit_hash val_bpb memory_gb status description
+a3f8b2c 1.8342 14.3 improved baseline GPT
+d91e4a7 1.8201 14.8 improved added RoPE scaling
+c72f1b3 1.8589 15.1 rejected wider MLP ratio
+```
+
+## Chapter Summary
+
+| Concept | Key Takeaway |
+|---|---|
+| Three-file design | Fixed (prepare.py), Mutable (train.py), Protocol (program.md) |
+| Fixed time budget | 300s wall-clock makes every experiment directly comparable |
+| val_bpb | Vocab-size-independent metric; lower is better |
+| ~100 experiments/night | 7–8 min/cycle × 8 hours ≈ 60–96 experiments |
+| Simplicity criterion | Small improvement from deleted code > large improvement from added code |
+| Installation | `git clone` + `uv sync` + `uv run prepare.py` |
+| Autonomy mandate | Agent never stops, never asks the human |
+
+In the next chapter, we examine `prepare.py` in depth — how it downloads the climbmix-400b
+dataset, trains the BPE tokenizer, packs sequences with a best-fit bin algorithm, and exposes
+the `evaluate_bpb` function that anchors every experiment.
diff --git a/tutorials/autoresearch-tutorial/02-data-preparation-and-training-environment.md b/tutorials/autoresearch-tutorial/02-data-preparation-and-training-environment.md
new file mode 100644
index 00000000..c639d89a
--- /dev/null
+++ b/tutorials/autoresearch-tutorial/02-data-preparation-and-training-environment.md
@@ -0,0 +1,385 @@
+---
+layout: default
+title: "Chapter 2: Data Preparation and the Training Environment"
+nav_order: 2
+parent: autoresearch Tutorial
+format_version: v2
+why: |
+ Every experiment is only as valid as its evaluation harness. Because prepare.py is the
+ one file the agent can never touch, understanding it is understanding the ground truth
+ against which every architectural hypothesis is judged.
+mental_model: |
+ prepare.py is a sealed contract: it defines the data, the tokenizer, and the eval metric
+ once, then steps aside. train.py is a variable that prepare.py measures.
+learning_outcomes:
+ - Describe the climbmix-400b-shuffle dataset and why it was chosen
+ - Explain how rustbpe trains a BPE tokenizer from parquet shards
+ - Walk through the best-fit bin-packing dataloader algorithm
+ - Understand how evaluate_bpb is computed and why it is reproducible
+snapshot:
+ source_repo: https://github.com/karpathy/autoresearch
+ stars: 70978
+ language: Python
+ license: MIT
+chapter_map:
+ - prepare.py
+sources:
+ - https://github.com/karpathy/autoresearch
+---
+
+# Chapter 2: Data Preparation and the Training Environment
+
+## What Problem Does This Solve?
+
+A research agent that can modify its own evaluation criterion can accidentally cheat. If the
+same file that defines the experiment also defines the scoring, nothing stops a gradient-following
+process (human or machine) from discovering that "changing the eval harness" is a valid
+optimization strategy.
+
+autoresearch prevents this by isolating all data and evaluation logic in `prepare.py`, which is
+explicitly marked as FIXED in `program.md`. The agent's instructions include:
+
+> `prepare.py` is read-only. You may never modify it.
+
+This separation creates a reproducible, tamper-proof evaluation environment. Every experiment
+in the 8-hour night is scored by the exact same code against the exact same validation data.
+
+## The climbmix-400b Dataset
+
+autoresearch uses `karpathy/climbmix-400b-shuffle` hosted on HuggingFace. This is a 400-billion-token
+mixture of text data, distributed as parquet shards, pre-shuffled so that any prefix is a
+reasonable training sample.
+
+```mermaid
+graph LR
+ HF[HuggingFace Hub
karpathy/climbmix-400b-shuffle] -->|huggingface_hub.snapshot_download| S1[shard-000.parquet]
+ HF --> S2[shard-001.parquet]
+ HF --> S3[shard-002.parquet]
+ HF --> SN[shard-NNN.parquet]
+
+ S1 --> P[pyarrow reader]
+ S2 --> P
+ S3 --> P
+ SN --> P
+
+ P -->|text column| T[token stream]
+ T -->|rustbpe tokenizer| TOKS[token IDs]
+ TOKS -->|bin-packing| BATCHES[training batches]
+```
+
+### Why Parquet Shards?
+
+Parquet is columnar, compressed, and efficiently streamable. For a 400B token dataset:
+
+- **Streaming access**: `pyarrow` reads parquet row groups on demand without loading the full file
+- **Reproducibility**: the shuffle is baked into the shard order; same shard order = same data order
+- **Portability**: parquet is language-agnostic — the same shards can be used from Python, Rust, or Julia
+
+### Dataset Statistics
+
+| Property | Value |
+|---|---|
+| Total tokens | ~400 billion |
+| Distribution format | parquet shards |
+| Shuffle | Pre-shuffled (baked in) |
+| Text column | `text` |
+| Hosting | HuggingFace Hub |
+| Download size | ~several hundred GB |
+
+In practice, `prepare.py` does not download the entire dataset. It streams enough shards to
+build the tokenizer vocabulary and cache the validation split, then streams during training.
+
+## BPE Tokenizer Training with rustbpe
+
+autoresearch uses `rustbpe` — a Rust-backed Python library — for BPE tokenizer training. This
+is significantly faster than the pure-Python alternatives.
+
+```python
+# From prepare.py (simplified)
+import rustbpe
+
+def train_tokenizer(text_iterator, vocab_size=50257):
+ """Train a BPE tokenizer on the first N tokens of the dataset."""
+ trainer = rustbpe.BpeTrainer(vocab_size=vocab_size)
+ for text in text_iterator:
+ trainer.feed(text.encode("utf-8"))
+ tokenizer = trainer.finalize()
+ tokenizer.save("tokenizer.bin")
+ return tokenizer
+```
+
+### Why rustbpe Instead of tiktoken?
+
+`tiktoken` is used at *inference* time for its speed. `rustbpe` is used at *training* time
+because it allows training new vocabularies. The two are interoperable: once trained, the
+`rustbpe` vocabulary can be loaded and used by either library.
+
+```mermaid
+sequenceDiagram
+ participant P as prepare.py
+ participant R as rustbpe
+ participant D as climbmix shards
+ participant F as tokenizer.bin
+
+ P->>D: stream first M characters
+ P->>R: BpeTrainer.feed(bytes)
+ loop over text chunks
+ P->>R: trainer.feed(text.encode())
+ end
+ P->>R: trainer.finalize()
+ R->>F: tokenizer.save("tokenizer.bin")
+ Note over F: Used by all train.py experiments
+```
+
+### BPE Algorithm in Brief
+
+Byte Pair Encoding merges the most frequent adjacent byte pair repeatedly until the vocabulary
+reaches the target size. The result is a vocabulary that:
+
+- Has good coverage of common English words as single tokens
+- Falls back gracefully to sub-word and byte-level pieces for rare words
+- Handles code, numbers, and multilingual text without special cases
+
+The resulting `tokenizer.bin` is loaded by `train.py` at startup:
+
+```python
+# From train.py
+import rustbpe
+tokenizer = rustbpe.load("tokenizer.bin")
+encode = tokenizer.encode # bytes -> list[int]
+decode = tokenizer.decode # list[int] -> bytes
+```
+
+## The Best-Fit Bin-Packing Dataloader
+
+Standard dataloaders pad short sequences to the maximum length in the batch, wasting GPU
+memory and compute. autoresearch uses **best-fit bin packing** to achieve near-100%
+utilization with zero padding.
+
+### The Problem with Padding
+
+Consider a batch of 4 sequences with lengths [512, 128, 256, 64] and a target batch length of 1024:
+
+```
+Padded approach:
+[seq1: 512 tokens][PAD: 512 tokens] → 50% waste
+[seq2: 128 tokens][PAD: 896 tokens] → 87.5% waste
+[seq3: 256 tokens][PAD: 768 tokens] → 75% waste
+[seq4: 64 tokens][PAD: 960 tokens] → 93.75% waste
+Average utilization: ~34%
+```
+
+### The Bin-Packing Solution
+
+Best-fit bin packing treats each sequence as an item and each "bin" as a row of exactly
+`T` (context length) tokens. Items are packed into bins so that no bin exceeds `T` tokens,
+and the fill rate approaches 100%:
+
+```
+Packed approach (T=1024, BOS-aligned):
+[BOS][seq2: 128][BOS][seq4: 64][BOS][seq3: 256][BOS][seq1: 512] → 960/1024 ≈ 93.75%
+```
+
+```python
+# From prepare.py (simplified bin-packing logic)
+from collections import deque
+
+def pack_sequences(sequences, T):
+ """
+ Best-fit bin packing: pack variable-length sequences into rows of exactly T tokens.
+ Each sequence is prepended with BOS. No padding is used.
+ Returns a 2D array of shape (num_rows, T).
+ """
+ bins = [] # list of (current_fill, [tokens])
+ BOS = tokenizer.bos_token_id
+
+ for seq in sequences:
+ tokens = [BOS] + encode(seq)
+ n = len(tokens)
+ if n > T:
+ # Truncate long sequences to T
+ tokens = tokens[:T]
+ n = T
+
+ # Find the best-fit bin (tightest fit without overflow)
+ best_bin = None
+ best_remaining = T + 1
+ for i, (fill, _) in enumerate(bins):
+ remaining = T - fill
+ if remaining >= n and remaining < best_remaining:
+ best_bin = i
+ best_remaining = remaining
+
+ if best_bin is None:
+ # No existing bin fits; open a new bin
+ new_bin = [0] * T # will be filled
+ bins.append([n, tokens])
+ else:
+ fill, existing = bins[best_bin]
+ existing.extend(tokens)
+ bins[best_bin][0] += n
+
+ # Pad only the last partial bin if necessary, then stack
+ rows = []
+ for fill, tokens in bins:
+ if fill < T:
+ tokens.extend([0] * (T - fill)) # minimal padding at end only
+ rows.append(tokens[:T])
+ return rows
+```
+
+```mermaid
+graph TD
+ S1[seq len=512] --> PACK[Best-Fit Packer]
+ S2[seq len=128] --> PACK
+ S3[seq len=256] --> PACK
+ S4[seq len=64] --> PACK
+ S5[seq len=384] --> PACK
+ S6[seq len=192] --> PACK
+
+ PACK --> B1[Bin 1: 512+192=704/1024]
+ PACK --> B2[Bin 2: 128+64+384=576/1024]
+ PACK --> B3[Bin 3: 256+remaining...]
+
+ B1 --> TENSOR[PyTorch Tensor
shape: batch × T]
+ B2 --> TENSOR
+ B3 --> TENSOR
+```
+
+### Why BOS-Alignment Matters
+
+By prepending each document with a Beginning-Of-Sequence token, the model always sees
+a clean document boundary. This means:
+
+1. The model learns document-level context correctly — it knows when a new document starts
+2. The first token of each document has a known prior state (fresh BOS context)
+3. Cross-document attention does not "leak" from the end of one document to the start of another
+
+Without BOS alignment, naively concatenated documents can confuse the model about
+document boundaries, potentially hurting coherence learning.
+
+## The evaluate_bpb Function
+
+The `evaluate_bpb` function is the evaluation harness that every experiment uses identically.
+It runs the model in `torch.no_grad()` mode on a fixed held-out validation set and computes
+bits-per-byte.
+
+```python
+# From prepare.py
+import math
+import torch
+
+# Validation data is prepared once and cached
+VAL_TOKENS = None # loaded lazily
+
+def evaluate_bpb(model, device, T, batch_size=8):
+ """
+ Evaluate the model on the held-out validation set.
+ Returns val_bpb (bits per byte), vocab-size independent.
+ """
+ global VAL_TOKENS
+ if VAL_TOKENS is None:
+ VAL_TOKENS = load_validation_tokens() # cached from prepare step
+
+ model.eval()
+ total_loss = 0.0
+ total_tokens = 0
+
+ with torch.no_grad():
+ for i in range(0, len(VAL_TOKENS) - T, T * batch_size):
+ # Build batch
+ x = VAL_TOKENS[i : i + T * batch_size].view(batch_size, T).to(device)
+ y = VAL_TOKENS[i + 1 : i + 1 + T * batch_size].view(batch_size, T).to(device)
+
+ logits = model(x) # (B, T, V)
+ loss = F.cross_entropy(
+ logits.view(-1, logits.size(-1)),
+ y.view(-1),
+ reduction="sum"
+ )
+ total_loss += loss.item()
+ total_tokens += y.numel()
+
+ val_loss = total_loss / total_tokens # nats per token
+ # Convert to bits per byte
+ bytes_per_token = estimate_bytes_per_token(VAL_TOKENS)
+ val_bpb = val_loss / math.log(2) / bytes_per_token
+ return val_bpb
+```
+
+### The bpb Conversion Formula
+
+The conversion from cross-entropy loss (nats per token) to bits-per-byte involves two steps:
+
+```
+val_loss (nats/token) × log2(e) = val_loss (bits/token)
+val_loss (bits/token) / bytes_per_token = val_bpb (bits/byte)
+```
+
+Where `bytes_per_token` is the empirical average from the validation set:
+
+```python
+def estimate_bytes_per_token(tokens):
+ """Decode a sample of tokens and measure average bytes/token."""
+ sample = tokens[:100_000].tolist()
+ text = tokenizer.decode(sample)
+ return len(text.encode("utf-8")) / len(sample)
+```
+
+For the climbmix BPE tokenizer, this is typically around 3.5–4.5 bytes per token for English text.
+
+## Data Flow Summary
+
+```mermaid
+sequenceDiagram
+ participant H as HuggingFace Hub
+ participant P as prepare.py
+ participant T as tokenizer.bin
+ participant V as val_tokens.pt
+ participant TR as train.py
+ participant E as evaluate_bpb()
+
+ P->>H: snapshot_download(climbmix-400b)
+ H-->>P: parquet shards
+ P->>P: stream text → train BPE
+ P->>T: tokenizer.save()
+ P->>P: tokenize validation split
+ P->>V: torch.save(val_tokens)
+ Note over T,V: Both files are created once, never changed
+
+ TR->>T: rustbpe.load("tokenizer.bin")
+ TR->>P: from prepare import evaluate_bpb
+ TR->>H: stream training shards (online)
+ TR->>TR: bin-pack → train 300s
+ TR->>E: evaluate_bpb(model, device, T)
+ E->>V: load val_tokens.pt
+ E-->>TR: return val_bpb
+ TR->>TR: print val_bpb
+```
+
+## Environment Variables and Configuration
+
+`prepare.py` respects a small set of environment variables:
+
+| Variable | Default | Purpose |
+|---|---|---|
+| `DATA_DIR` | `./data` | Where to cache downloaded shards |
+| `VOCAB_SIZE` | `50257` | BPE vocabulary size |
+| `VAL_TOKENS` | `1_000_000` | Number of tokens in validation set |
+| `HF_TOKEN` | `None` | HuggingFace token for private datasets |
+| `NUM_PROC` | `4` | Parallel workers for parquet reading |
+
+## Chapter Summary
+
+| Component | Role | Key Detail |
+|---|---|---|
+| climbmix-400b | Training data | 400B tokens, parquet shards, pre-shuffled |
+| rustbpe | Tokenizer training | Fast Rust BPE, saves to tokenizer.bin |
+| Best-fit bin packing | Dataloader | ~100% GPU utilization, zero padding |
+| BOS alignment | Document boundary | Each doc starts with BOS token |
+| evaluate_bpb | Eval harness | Fixed, tamper-proof, vocab-size-independent |
+| val_bpb formula | Metric | nats/token × log2(e) / bytes_per_token |
+
+In the next chapter, we examine the GPT architecture defined in `train.py` — including
+GQA, RoPE positional encoding, QK-norm, sliding window attention, Value Residual, and
+the residual scaling mechanism that makes the model robust to depth.
diff --git a/tutorials/autoresearch-tutorial/03-gpt-architecture.md b/tutorials/autoresearch-tutorial/03-gpt-architecture.md
new file mode 100644
index 00000000..210a9c9b
--- /dev/null
+++ b/tutorials/autoresearch-tutorial/03-gpt-architecture.md
@@ -0,0 +1,455 @@
+---
+layout: default
+title: "Chapter 3: GPT Architecture"
+nav_order: 3
+parent: autoresearch Tutorial
+format_version: v2
+why: |
+ train.py is the experimental canvas — the agent edits it hundreds of times per night.
+ Understanding the baseline architecture lets you predict which modifications are likely
+ to be fruitful, and why some well-known tricks (like GQA and RoPE) are already baked in.
+mental_model: |
+ The GPT in train.py is a modern transformer that has incorporated the past three years of
+ research into a single clean file: each layer is a composable module, and the architecture
+ can be changed by editing GPTConfig or the forward() method.
+learning_outcomes:
+ - Describe GPTConfig and how each field affects model size and behavior
+ - Explain Grouped Query Attention (GQA) and its memory efficiency benefit
+ - Understand RoPE positional encoding and why it replaces learned embeddings
+ - Trace the sliding window pattern (SSSL) through the layer stack
+ - Explain Value Residual (ResFormer) and per-layer residual scaling
+snapshot:
+ source_repo: https://github.com/karpathy/autoresearch
+ stars: 70978
+ language: Python
+ license: MIT
+chapter_map:
+ - train.py (GPTConfig, CausalSelfAttention, MLP, Block, GPT)
+sources:
+ - https://github.com/karpathy/autoresearch
+---
+
+# Chapter 3: GPT Architecture
+
+## What Problem Does This Solve?
+
+The baseline `train.py` in autoresearch is not a vanilla GPT-2 clone. It incorporates
+multiple improvements from the 2022–2025 literature into a single coherent architecture
+that serves as the *starting point* for the agent's experiments.
+
+The design goal: a model that is already reasonably strong, so the agent spends its budget
+exploring *marginal improvements* rather than rediscovering well-known basics.
+
+At the same time, the architecture is kept simple enough that it fits in ~500 lines of Python,
+and any individual component can be replaced or removed in a single edit.
+
+## GPTConfig
+
+All architectural hyperparameters live in a single dataclass:
+
+```python
+from dataclasses import dataclass
+
+@dataclass
+class GPTConfig:
+ # Vocabulary and context
+ vocab_size: int = 50257
+ block_size: int = 1024 # context length T
+
+ # Transformer dimensions
+ n_layer: int = 12
+ n_head: int = 12
+ n_kv_head: int = 4 # GQA: fewer KV heads than Q heads
+ n_embd: int = 768
+
+ # Sliding window attention
+ WINDOW_PATTERN: str = "SSSL" # S=short window, L=full context
+ SHORT_WINDOW: int = 128 # tokens in short window
+
+ # Value Residual (ResFormer)
+ use_value_residual: bool = True
+
+ # Regularization
+ dropout: float = 0.0 # disabled during agent experiments
+
+ # Logit capping
+ logit_softcap: float = 15.0
+
+ # MLP
+ use_squared_relu: bool = True
+```
+
+The agent can change any of these fields to propose an architectural hypothesis.
+A typical experiment modifies one or two fields and measures the effect.
+
+## Architecture Overview
+
+```mermaid
+graph TD
+ INPUT[Input Token IDs
shape: B × T] --> WTE[Token Embedding
nn.Embedding V×C]
+ WTE --> BLOCKS[N Transformer Blocks]
+ BLOCKS --> LN[LayerNorm]
+ LN --> LMH[LM Head
nn.Linear C×V, no bias]
+ LMH --> CAP[Logit Soft-Cap
15 × tanh x/15]
+ CAP --> OUTPUT[Logits B×T×V]
+
+ subgraph BLOCK [Single Block]
+ direction TB
+ BLN1[LayerNorm] --> ATTN[CausalSelfAttention]
+ ATTN --> RESID1[+ residual × resid_lambda]
+ RESID1 --> BLN2[LayerNorm]
+ BLN2 --> MLP[MLP]
+ MLP --> RESID2[+ residual × resid_lambda]
+ end
+```
+
+## Grouped Query Attention (GQA)
+
+Standard multi-head attention creates `n_head` query, key, and value projections.
+GQA reduces memory by using fewer KV heads — typically `n_kv_head = n_head / G` for
+some group size `G`.
+
+```python
+class CausalSelfAttention(nn.Module):
+ def __init__(self, config):
+ super().__init__()
+ self.n_head = config.n_head
+ self.n_kv_head = config.n_kv_head
+ self.n_embd = config.n_embd
+ assert config.n_head % config.n_kv_head == 0
+ self.head_dim = config.n_embd // config.n_head
+
+ # Q projects to n_head * head_dim
+ self.q_proj = nn.Linear(config.n_embd, config.n_head * self.head_dim, bias=False)
+ # K, V project to n_kv_head * head_dim (fewer heads)
+ self.k_proj = nn.Linear(config.n_embd, config.n_kv_head * self.head_dim, bias=False)
+ self.v_proj = nn.Linear(config.n_embd, config.n_kv_head * self.head_dim, bias=False)
+ self.out_proj = nn.Linear(config.n_embd, config.n_embd, bias=False)
+
+ # QK-norm: normalize Q and K before dot product
+ self.q_norm = nn.RMSNorm(self.head_dim)
+ self.k_norm = nn.RMSNorm(self.head_dim)
+```
+
+### Why GQA?
+
+```mermaid
+graph LR
+ subgraph MHA [Standard MHA: n_head=12]
+ Q1[Q1] --> S1[Score]
+ K1[K1] --> S1
+ Q2[Q2] --> S2[Score]
+ K2[K2] --> S2
+ Q12[Q12] --> S12[Score]
+ K12[K12] --> S12
+ end
+
+ subgraph GQA [GQA: n_head=12, n_kv_head=4]
+ GQ1[Q1] --> GS1[Score]
+ GQ2[Q2] --> GS1
+ GQ3[Q3] --> GS1
+ GK1[K1 shared] --> GS1
+
+ GQ4[Q4] --> GS2[Score]
+ GQ5[Q5] --> GS2
+ GQ6[Q6] --> GS2
+ GK2[K2 shared] --> GS2
+ end
+```
+
+With `n_head=12, n_kv_head=4`, GQA uses:
+- 12 Q projections (unchanged)
+- 4 K projections (3× fewer than MHA)
+- 4 V projections (3× fewer than MHA)
+
+KV cache memory is reduced by 3×. At a 1024-token context this is modest, but for longer
+contexts (4k–128k tokens) the savings become significant.
+
+## RoPE Positional Encoding
+
+Rotary Position Embedding (RoPE) encodes position by rotating the Q and K vectors in
+complex space before the attention dot product. Unlike learned positional embeddings,
+RoPE:
+
+1. Requires no learned parameters
+2. Extrapolates gracefully to longer sequences than seen during training
+3. Encodes *relative* position implicitly — the dot product between rotated Q at position i
+ and rotated K at position j depends only on (i - j)
+
+```python
+def apply_rope(x, cos, sin):
+ """
+ Apply rotary position embedding.
+ x: (B, n_head, T, head_dim)
+ cos, sin: (T, head_dim/2) precomputed rotation tables
+ """
+ B, H, T, D = x.shape
+ x1, x2 = x[..., :D//2], x[..., D//2:]
+ # Rotate: [x1, x2] -> [x1*cos - x2*sin, x1*sin + x2*cos]
+ return torch.cat([
+ x1 * cos - x2 * sin,
+ x1 * sin + x2 * cos
+ ], dim=-1)
+
+def precompute_rope_tables(head_dim, max_seq_len, theta=10000.0):
+ """Precompute cos/sin tables for RoPE."""
+ freq = 1.0 / (theta ** (torch.arange(0, head_dim, 2).float() / head_dim))
+ t = torch.arange(max_seq_len)
+ freqs = torch.outer(t, freq) # (T, head_dim/2)
+ cos = torch.cos(freqs)
+ sin = torch.sin(freqs)
+ return cos, sin
+```
+
+RoPE is applied to Q and K *after* QK-norm and *before* the attention dot product.
+
+## QK-Norm
+
+QK-norm applies RMSNorm to the Q and K vectors before the attention score computation:
+
+```python
+# In CausalSelfAttention.forward():
+q = self.q_norm(q) # (B, n_head, T, head_dim)
+k = self.k_norm(k) # (B, n_kv_head, T, head_dim)
+
+# Then apply RoPE
+q = apply_rope(q, cos, sin)
+k = apply_rope(k, cos, sin)
+```
+
+Without QK-norm, attention logits can grow with depth in deep networks, causing unstable
+gradients. QK-norm ensures the pre-softmax logits remain bounded regardless of depth,
+allowing the use of larger learning rates and enabling training with fewer warmup steps.
+
+## Sliding Window Attention
+
+The `WINDOW_PATTERN` field defines a repeating pattern of attention spans across layers.
+
+```
+WINDOW_PATTERN = "SSSL"
+```
+
+This pattern repeats across all `n_layer` layers:
+- `S` layers use short-window attention (only the last `SHORT_WINDOW=128` tokens)
+- `L` layers use full causal attention (all preceding tokens up to `block_size`)
+
+```python
+def get_window_for_layer(layer_idx, pattern="SSSL", short_window=128, T=1024):
+ """Return the attention window size for a given layer index."""
+ char = pattern[layer_idx % len(pattern)]
+ if char == "S":
+ return short_window
+ else: # "L"
+ return T # full context
+```
+
+```mermaid
+graph LR
+ subgraph SSSL_pattern [12 Layers with SSSL pattern]
+ L0[Layer 0
S: window=128]
+ L1[Layer 1
S: window=128]
+ L2[Layer 2
S: window=128]
+ L3[Layer 3
L: full context]
+ L4[Layer 4
S: window=128]
+ L5[Layer 5
S: window=128]
+ L6[Layer 6
S: window=128]
+ L7[Layer 7
L: full context]
+ L8[Layer 8
S: window=128]
+ L9[Layer 9
S: window=128]
+ L10[Layer 10
S: window=128]
+ L11[Layer 11
L: full context]
+ end
+```
+
+### Why Sliding Window?
+
+Full causal attention is O(T²) in memory and compute. For T=1024, this is manageable.
+For T=8192 or longer, it becomes a bottleneck.
+
+The SSSL pattern provides a practical compromise:
+- 75% of layers handle only local context (128 tokens) — very fast
+- 25% of layers have full global context — captures long-range dependencies
+- Overall compute is closer to O(T × SHORT_WINDOW) than O(T²)
+
+For the 1024-token default context, the benefit is modest. But the pattern was chosen to
+be extensible: as the agent experiments with longer contexts, the sliding window layers
+become increasingly important.
+
+## Flash Attention 3
+
+All attention computation routes through Flash Attention 3, which fuses the
+softmax, mask, and matrix multiply into a single CUDA kernel:
+
+```python
+from flash_attn import flash_attn_varlen_func
+
+# In CausalSelfAttention.forward():
+# For S (short window) layers, we pass a window_size argument
+attn_output = flash_attn_varlen_func(
+ q, k, v,
+ cu_seqlens_q=cu_seqlens,
+ cu_seqlens_k=cu_seqlens,
+ max_seqlen_q=T,
+ max_seqlen_k=T,
+ causal=True,
+ window_size=(config.SHORT_WINDOW, 0) if is_short_layer else (-1, 0),
+)
+```
+
+Flash Attention 3 on H100 achieves near-peak memory bandwidth utilization by:
+1. Tiling Q, K, V to fit in SRAM
+2. Never materializing the full O(T²) attention matrix
+3. Fusing all operations (QK matmul, softmax, V matmul) into one kernel pass
+
+## Value Residual (ResFormer)
+
+Value Residual is a technique from the ResFormer paper (2024). Instead of computing
+V from the current layer's hidden states alone, alternating layers add a gated contribution
+from the original input embedding (x0):
+
+```python
+class CausalSelfAttention(nn.Module):
+ def __init__(self, config, layer_idx):
+ super().__init__()
+ # ...
+ self.use_value_residual = config.use_value_residual and (layer_idx % 2 == 1)
+ if self.use_value_residual:
+ # Learnable per-head gate for the value residual contribution
+ self.value_residual_gate = nn.Parameter(
+ torch.zeros(config.n_kv_head, config.n_embd // config.n_head)
+ )
+ self.v0_proj = nn.Linear(config.n_embd, config.n_kv_head * head_dim, bias=False)
+
+ def forward(self, x, x0, cos, sin):
+ # Standard V from current hidden states
+ v = self.v_proj(x).view(B, T, self.n_kv_head, self.head_dim).transpose(1, 2)
+
+ if self.use_value_residual:
+ # Gated V from original input embedding x0
+ v0 = self.v0_proj(x0).view(B, T, self.n_kv_head, self.head_dim).transpose(1, 2)
+ gate = torch.sigmoid(self.value_residual_gate) # (n_kv_head, head_dim)
+ v = v + gate * v0 # broadcast over B, T
+
+ # ... rest of attention
+```
+
+```mermaid
+graph TD
+ X0[x0: original input embedding] -->|v0_proj| V0[V0]
+ X[x: current hidden state] -->|v_proj| V[V standard]
+ GATE[value_residual_gate
learned per-head] -->|sigmoid| G[gate]
+ V0 -->|× gate| GATED_V0[gated V0]
+ V --> ADD[+]
+ GATED_V0 --> ADD
+ ADD --> VFINAL[V final]
+ VFINAL --> ATTN[Attention Output]
+```
+
+Value Residual helps with the *residual forgetting* problem: in deep networks, the original
+input information can be progressively overwritten by each layer's transformation. By providing
+a direct path from x0 to V in every other layer, the model can always recover low-level
+token identity information.
+
+## Residual Scaling
+
+Each block applies learnable scalars to the residual connection:
+
+```python
+class Block(nn.Module):
+ def __init__(self, config, layer_idx):
+ super().__init__()
+ self.ln1 = nn.RMSNorm(config.n_embd)
+ self.attn = CausalSelfAttention(config, layer_idx)
+ self.ln2 = nn.RMSNorm(config.n_embd)
+ self.mlp = MLP(config)
+
+ # Per-layer learnable residual scales (initialized to 1.0)
+ self.resid_lambda = nn.Parameter(torch.ones(1))
+ self.x0_lambda = nn.Parameter(torch.ones(1))
+
+ def forward(self, x, x0, cos, sin):
+ # Attention sub-block with scaled residual
+ x = x * self.resid_lambda + self.attn(self.ln1(x), x0, cos, sin)
+ # MLP sub-block with scaled residual
+ x = x * self.resid_lambda + self.mlp(self.ln2(x))
+ return x
+```
+
+This technique, related to "residual rescaling" from PaLM and Gemma, allows the network
+to learn the optimal blend of identity (passing information forward) versus transformation
+(applying the block's function) at each layer.
+
+## MLP with Squared ReLU
+
+The MLP uses a gated architecture with squared ReLU:
+
+```python
+class MLP(nn.Module):
+ def __init__(self, config):
+ super().__init__()
+ hidden = 4 * config.n_embd
+ self.fc1 = nn.Linear(config.n_embd, hidden, bias=False)
+ self.fc2 = nn.Linear(config.n_embd, hidden, bias=False) # gate
+ self.proj = nn.Linear(hidden, config.n_embd, bias=False)
+ self.use_squared_relu = config.use_squared_relu
+
+ def forward(self, x):
+ if self.use_squared_relu:
+ # Squared ReLU gated MLP
+ return self.proj(F.relu(self.fc1(x)) ** 2 * self.fc2(x))
+ else:
+ # Standard SwiGLU
+ return self.proj(F.silu(self.fc1(x)) * self.fc2(x))
+```
+
+Squared ReLU (`relu(x)²`) provides stronger gradients for large positive activations
+and completely zeros out negative activations, creating sparser representations than
+GELU or SiLU.
+
+## Logit Soft-Capping
+
+The final logits are passed through a soft-cap before cross-entropy:
+
+```python
+# In GPT.forward():
+logits = self.lm_head(x) # (B, T, V)
+cap = self.config.logit_softcap
+logits = cap * torch.tanh(logits / cap) # soft-cap at ±15
+```
+
+This prevents any single logit from growing arbitrarily large, which can destabilize
+training when combined with aggressive learning rates or poorly initialized weights.
+The tanh function is smooth and differentiable, so gradients flow through the cap normally.
+
+## Component Summary
+
+```mermaid
+graph TD
+ subgraph ARCH [GPT Architecture Components]
+ A[GQA
n_kv_head=4 vs n_head=12] -->|reduces KV cache| MA[Memory Efficiency]
+ B[RoPE
rotary position] -->|relative position| POS[Better Extrapolation]
+ C[QK-norm
RMSNorm on Q K] -->|bounds logits| STAB[Training Stability]
+ D[Sliding Window SSSL
75% short 25% full] -->|reduces O T²| COMP[Compute Efficiency]
+ E[Value Residual
ResFormer gated x0→V] -->|preserves x0 info| DEPTH[Depth Resilience]
+ F[Residual Scaling
resid_lambda x0_lambda] -->|learnable blend| CTRL[Layer Control]
+ G[Logit Soft-Cap
15×tanh x/15] -->|bounds logits| STAB
+ H[Squared ReLU MLP] -->|sparse activations| EXPR[Expressiveness]
+ end
+```
+
+## Chapter Summary
+
+| Component | Config Field | Key Benefit |
+|---|---|---|
+| GQA | `n_kv_head=4` | 3× KV cache reduction vs MHA |
+| RoPE | built-in | Relative position, no learned params |
+| QK-norm | automatic | Stable training at depth |
+| Sliding window | `WINDOW_PATTERN="SSSL"` | 75% layers use local O(T·W) attention |
+| Flash Attention 3 | automatic | Near-peak SRAM utilization |
+| Value Residual | `use_value_residual=True` | Preserves x0 through depth |
+| Residual scaling | `resid_lambda`, `x0_lambda` | Per-layer blend control |
+| Logit soft-cap | `logit_softcap=15.0` | Prevents extreme logit growth |
+| Squared ReLU | `use_squared_relu=True` | Sparse activations, strong gradients |
+
+In the next chapter, we examine MuonAdamW — the hybrid optimizer that applies Polar Express
+orthogonalization to 2D weight matrices while falling back to AdamW for embeddings and scalars.
diff --git a/tutorials/autoresearch-tutorial/04-muonadamw-optimizer.md b/tutorials/autoresearch-tutorial/04-muonadamw-optimizer.md
new file mode 100644
index 00000000..be047556
--- /dev/null
+++ b/tutorials/autoresearch-tutorial/04-muonadamw-optimizer.md
@@ -0,0 +1,377 @@
+---
+layout: default
+title: "Chapter 4: The MuonAdamW Optimizer"
+nav_order: 4
+parent: autoresearch Tutorial
+format_version: v2
+why: |
+ The optimizer is one of the most impactful levers in an ML training run, but it is also
+ one of the least visible. Understanding MuonAdamW — why it exists, what Polar Express
+ orthogonalization does, and how it decides which parameters get Muon vs AdamW treatment —
+ is essential for any agent experiment that touches the optimization procedure.
+mental_model: |
+ MuonAdamW is a two-regime optimizer: 2D weight matrices get Muon (Nesterov + orthogonal
+ gradient), everything else gets AdamW. The split is geometric — matrices live on the
+ Stiefel manifold, scalars and vectors do not.
+learning_outcomes:
+ - Explain what Muon does that AdamW does not, and why it helps for weight matrices
+ - Trace through the Newton-Schulz 5-step polynomial used in Polar Express
+ - Understand how NorMuon normalizes the update to be learning-rate independent
+ - Describe the parameter dispatch logic that assigns each tensor to Muon or AdamW
+ - Explain the trapezoidal learning rate schedule and its relation to the fixed time budget
+snapshot:
+ source_repo: https://github.com/karpathy/autoresearch
+ stars: 70978
+ language: Python
+ license: MIT
+chapter_map:
+ - train.py (MuonAdamW, muon_step, adamw_step, get_lr)
+sources:
+ - https://github.com/karpathy/autoresearch
+---
+
+# Chapter 4: The MuonAdamW Optimizer
+
+## What Problem Does This Solve?
+
+Standard AdamW applies the same update rule to every parameter: maintain per-parameter
+first and second moment estimates, normalize by the second moment, apply weight decay.
+This works well for arbitrary tensors but ignores the *geometric structure* of weight matrices.
+
+A weight matrix `W ∈ R^(m×n)` lives in a structured space. The gradient `G` points in the
+direction of steepest loss descent, but the optimal step along the loss surface for a matrix
+may not align with the raw gradient direction. Specifically, the gradient does not respect
+the constraint that the updated matrix should have "similarly-sized" singular values — a
+property that prevents some weights from growing dominant while others shrink.
+
+**Muon** (Momentum + Orthogonalization) addresses this by projecting the gradient (or
+momentum) onto the Stiefel manifold — the space of matrices with orthonormal columns.
+The resulting update has all singular values equal to 1, which means every direction in
+the weight matrix space receives equal update magnitude.
+
+**MuonAdamW** combines this with AdamW for parameters that do not have matrix geometry
+(embeddings, biases, layer norm scalars, the LM head).
+
+## The Muon Update Rule
+
+Muon's update rule is:
+
+```
+m_t = β * m_{t-1} + G_t (Nesterov momentum buffer)
+m̃_t = β * m_t + G_t (Nesterov lookahead)
+W_t+1 = W_t - lr * orthogonalize(m̃_t)
+```
+
+The key step is `orthogonalize(m̃_t)`: project the momentum matrix onto the nearest
+orthogonal matrix (in Frobenius norm). This is a polar decomposition:
+
+```
+M = U Σ V^T (SVD)
+orthogonalize(M) = U V^T (zero out singular values, set all to 1)
+```
+
+Computing exact SVD every step is O(mn·min(m,n)) — expensive. Polar Express approximates
+it with a fast 5-step polynomial.
+
+## Polar Express: Newton-Schulz Orthogonalization
+
+The polar decomposition can be approximated iteratively using the Newton-Schulz iteration.
+Starting from `X_0 = M / ||M||_F`, repeat:
+
+```
+X_{k+1} = X_k * (3I - X_k^T X_k) / 2
+```
+
+This converges to the orthogonal factor `U V^T` when the singular values of `X_0` are in (0, √3).
+
+autoresearch uses a **degree-5 polynomial variant** that converges in exactly 5 steps for
+well-conditioned matrices:
+
+```python
+@torch.compile(fullgraph=True)
+def zeropower_via_newtonschulz5(G, steps=5):
+ """
+ Polar Express: Newton-Schulz orthogonalization in 5 steps.
+ Returns the orthogonal factor of G (approx U V^T from G = U Σ V^T).
+ """
+ assert G.ndim >= 2
+ a, b, c = (3.4445, -4.7750, 2.0315) # polynomial coefficients for 5 steps
+ X = G.bfloat16()
+ # Normalize to place singular values in convergence basin
+ X = X / (X.norm() + 1e-7)
+ # Iterate the degree-5 polynomial
+ if G.size(0) > G.size(1):
+ X = X.T
+ for _ in range(steps):
+ A = X @ X.T
+ B = b * A + c * A @ A
+ X = a * X + B @ X
+ if G.size(0) > G.size(1):
+ X = X.T
+ return X.to(G.dtype)
+```
+
+```mermaid
+graph LR
+ G[Gradient Matrix G
m × n] -->|normalize| X0[X0 = G / norm G]
+ X0 -->|step 1: aX + bAX + cAAX| X1[X1]
+ X1 -->|step 2| X2[X2]
+ X2 -->|step 3| X3[X3]
+ X3 -->|step 4| X4[X4]
+ X4 -->|step 5| X5[X5 ≈ U V^T]
+ X5 -->|scale by lr| UPDATE[Weight Update]
+```
+
+### Why 5 Steps?
+
+The coefficients `(a=3.4445, b=-4.7750, c=2.0315)` were chosen so that the polynomial
+`p(σ) = aσ + bσ³ + cσ⁵` approximates `1/σ` for singular values in [0.1, 1.0] after
+5 iterations. This is a minimax polynomial optimization problem — the coefficients minimize
+the worst-case error over the target interval.
+
+5 steps is sufficient because:
+1. The normalization step places all singular values in [0.5, 1.5] approximately
+2. The polynomial converges quadratically after the first step
+3. After 5 steps, the approximation error is < 0.1% for well-conditioned matrices
+
+### bfloat16 and fullgraph Compilation
+
+Two implementation details are critical for performance:
+
+```python
+X = G.bfloat16() # cast to bf16 before iterations
+```
+
+The Newton-Schulz iterations involve matrix multiplications that are much faster in bf16
+than float32, especially on H100 (which has dedicated bf16 tensor cores). The final result
+is cast back to the original dtype.
+
+```python
+@torch.compile(fullgraph=True)
+def zeropower_via_newtonschulz5(G, steps=5):
+```
+
+`fullgraph=True` tells `torch.compile` to compile the entire function into a single CUDA
+graph with no Python fallback points. This eliminates Python interpreter overhead and allows
+the compiler to fuse the matrix multiplications across steps.
+
+## NorMuon: Normalized Muon
+
+NorMuon normalizes the Muon update so that its RMS equals the learning rate:
+
+```python
+def normalize_muon_update(update, lr):
+ """
+ Scale the orthogonalized update so its RMS equals lr.
+ This makes the effective learning rate invariant to matrix shape.
+ """
+ # RMS of a m×n orthogonal matrix is 1/sqrt(min(m,n))
+ scale = lr / (update.norm(dim=-1, keepdim=True) / update.size(-1) ** 0.5 + 1e-8)
+ return update * scale
+```
+
+Without normalization, the effective learning rate depends on `min(m, n)` — a 4096×4096
+matrix would receive a different effective update magnitude than a 768×3072 matrix.
+NorMuon ensures every parameter group trains at the same effective rate.
+
+## The Full Muon Step
+
+```python
+@torch.compile(fullgraph=True)
+def muon_step(params, grads, momentum_buffers, lr, momentum=0.95, weight_decay=0.0):
+ """
+ Muon update for 2D weight matrices.
+ All operations are @torch.compile(fullgraph=True) for performance.
+ """
+ for p, g, buf in zip(params, grads, momentum_buffers):
+ # Nesterov momentum
+ buf.mul_(momentum).add_(g)
+ nesterov_g = buf.mul(momentum).add_(g) # lookahead
+
+ # Polar Express orthogonalization
+ update = zeropower_via_newtonschulz5(nesterov_g)
+
+ # NorMuon normalization
+ update = normalize_muon_update(update, lr)
+
+ # Optional weight decay (applied before update)
+ if weight_decay > 0:
+ p.data.mul_(1 - lr * weight_decay)
+
+ # Apply update
+ p.data.add_(update, alpha=-1.0)
+```
+
+## The AdamW Step
+
+For non-matrix parameters, standard AdamW is used:
+
+```python
+@torch.compile(fullgraph=True)
+def adamw_step(params, grads, exp_avgs, exp_avg_sqs, step, lr,
+ betas=(0.9, 0.95), eps=1e-8, weight_decay=0.1):
+ """
+ AdamW update for embeddings, LM head, scalars.
+ """
+ beta1, beta2 = betas
+ # Bias correction
+ bc1 = 1 - beta1 ** step
+ bc2 = 1 - beta2 ** step
+
+ for p, g, m, v in zip(params, grads, exp_avgs, exp_avg_sqs):
+ m.lerp_(g, 1 - beta1) # EMA of gradient
+ v.lerp_(g.square(), 1 - beta2) # EMA of squared gradient
+
+ step_size = lr / bc1
+ denom = (v.sqrt() / bc2 ** 0.5).add_(eps)
+
+ # Weight decay (decoupled, applied to weight not gradient)
+ p.data.mul_(1 - lr * weight_decay)
+
+ # Parameter update
+ p.data.addcdiv_(m, denom, value=-step_size)
+```
+
+## Parameter Dispatch: Who Gets Muon vs AdamW?
+
+```python
+class MuonAdamW(torch.optim.Optimizer):
+ def __init__(self, model, lr=3e-4, weight_decay=0.1):
+ # Separate parameters by geometry
+ muon_params = [] # 2D weight matrices
+ adamw_params = [] # everything else
+
+ for name, param in model.named_parameters():
+ if param.requires_grad:
+ if param.ndim == 2 and 'embedding' not in name and 'lm_head' not in name:
+ muon_params.append(param)
+ else:
+ adamw_params.append(param)
+
+ param_groups = [
+ {'params': muon_params, 'optimizer': 'muon', 'lr': lr},
+ {'params': adamw_params, 'optimizer': 'adamw', 'lr': lr},
+ ]
+ super().__init__(param_groups, defaults={'lr': lr})
+```
+
+```mermaid
+graph TD
+ ALL[All Model Parameters] --> DISPATCH{ndim == 2
and not embedding
and not lm_head?}
+
+ DISPATCH -->|Yes| MUON[Muon Group]
+ DISPATCH -->|No| ADAMW[AdamW Group]
+
+ MUON --> QP[q_proj, k_proj, v_proj]
+ MUON --> OP[out_proj]
+ MUON --> FC[fc1, fc2, proj MLP]
+
+ ADAMW --> WTE[wte token embedding]
+ ADAMW --> LMH[lm_head]
+ ADAMW --> LN[LayerNorm scalars]
+ ADAMW --> RL[resid_lambda, x0_lambda]
+ ADAMW --> VRG[value_residual_gate]
+```
+
+### Why Exclude Embeddings and LM Head from Muon?
+
+The embedding matrix `wte ∈ R^(V×C)` and LM head `∈ R^(C×V)` are 2D but conceptually
+different from attention projections:
+
+1. **Embedding rows are independent.** Row i of `wte` is the representation of token i.
+ Orthogonalizing across rows would mix token representations, destroying the learned
+ semantic structure.
+
+2. **LM head is tied to vocabulary.** Its rows correspond to output logits for each token.
+ Orthogonalization would equalize the "importance" of all vocabulary entries, fighting
+ against the natural Zipf-law distribution of token frequencies.
+
+3. **Scalars and vectors have no matrix geometry.** LayerNorm scales, residual lambdas,
+ and the value residual gate are 1D or scalar — SVD is undefined for them.
+
+## Learning Rate Schedule: Trapezoidal (Warmup-Flat-Warmdown)
+
+```python
+def get_lr(step, total_steps, max_lr=3e-4, min_lr=3e-5,
+ warmup_frac=0.1, warmdown_frac=0.2):
+ """
+ Trapezoidal LR schedule:
+ - Warmup: 0 → max_lr over first 10% of steps
+ - Flat: max_lr for middle 70% of steps
+ - Warmdown: max_lr → min_lr over last 20% of steps
+ """
+ warmup_steps = int(total_steps * warmup_frac)
+ warmdown_steps = int(total_steps * warmdown_frac)
+ flat_steps = total_steps - warmup_steps - warmdown_steps
+
+ if step < warmup_steps:
+ return max_lr * step / warmup_steps
+ elif step < warmup_steps + flat_steps:
+ return max_lr
+ else:
+ decay_step = step - warmup_steps - flat_steps
+ return min_lr + (max_lr - min_lr) * (1 - decay_step / warmdown_steps)
+```
+
+```mermaid
+xychart-beta
+ title "Trapezoidal LR Schedule (300s budget)"
+ x-axis ["0%", "10%", "20%", "50%", "80%", "100%"]
+ y-axis "Learning Rate" 0 --> 0.0003
+ line [0, 0.0003, 0.0003, 0.0003, 0.0003, 0.00003]
+```
+
+The trapezoidal schedule is well-suited to the fixed time budget because:
+
+1. **Warmup** allows Adam moment estimates to stabilize before taking large steps
+2. **Flat phase** provides the bulk of learning at maximum rate
+3. **Warmdown** enables final convergence — studies show warmdown disproportionately
+ improves final loss relative to its training cost
+
+Since every experiment has the same `TIME_BUDGET=300s`, the total step count varies between
+experiments (faster models take more steps). The LR schedule adapts to this by using
+fractional step positions, not absolute step numbers.
+
+## Fast-Fail on NaN or Loss > 100
+
+The training loop includes an early-exit to prevent the agent from wasting its full 5-minute
+budget on a clearly broken run:
+
+```python
+# In the training loop
+if loss.isnan() or loss.item() > 100.0:
+ print("FAST_FAIL: loss is NaN or > 100, aborting")
+ sys.exit(1)
+```
+
+When `train.py` exits with code 1, the agent treats the run as a failed experiment and
+proceeds to `git reset --hard HEAD~1` without logging to `results.tsv`.
+
+## Both Steps Are @torch.compile
+
+Both `muon_step` and `adamw_step` are decorated with `@torch.compile(fullgraph=True)`.
+This means:
+
+1. On the first call, PyTorch traces the function and compiles it to an optimized CUDA graph
+2. On subsequent calls, the compiled graph is replayed with zero Python overhead
+3. `fullgraph=True` ensures the entire function compiles — no Python fallbacks
+
+The compilation adds ~30–60 seconds of overhead on the first iteration but provides
+5–15% throughput improvement for all subsequent steps. For a 300-second budget,
+this tradeoff is clearly beneficial.
+
+## Chapter Summary
+
+| Component | Mechanism | Key Benefit |
+|---|---|---|
+| Muon | Nesterov + orthogonalization via Newton-Schulz | Equalized update magnitude across matrix directions |
+| Polar Express | 5-step Newton-Schulz polynomial | O(mn) cost, no SVD, 5× faster than exact polar |
+| NorMuon | RMS normalization of update | Shape-invariant effective learning rate |
+| AdamW dispatch | Applied to embeddings, LM head, scalars | Correct semantics for non-matrix parameters |
+| Trapezoidal LR | Warmup → flat → warmdown | Works with step-count-varying experiments |
+| Fast-fail | Exit on NaN or loss > 100 | Saves budget on broken runs |
+| @torch.compile | Both muon_step and adamw_step | ~10% throughput gain after first iteration |
+
+In the next chapter, we examine the training loop itself — gradient accumulation,
+garbage collection freezing, MFU tracking, and how the fixed 300-second wall-clock
+budget is enforced.
diff --git a/tutorials/autoresearch-tutorial/05-training-loop-and-fixed-time-budget.md b/tutorials/autoresearch-tutorial/05-training-loop-and-fixed-time-budget.md
new file mode 100644
index 00000000..b049b45c
--- /dev/null
+++ b/tutorials/autoresearch-tutorial/05-training-loop-and-fixed-time-budget.md
@@ -0,0 +1,400 @@
+---
+layout: default
+title: "Chapter 5: The Training Loop and Fixed Time Budget"
+nav_order: 5
+parent: autoresearch Tutorial
+format_version: v2
+why: |
+ The training loop is where architecture and optimizer theory meet reality. The fixed
+ 300-second wall-clock budget is the mechanism that makes hundreds of experiments
+ directly comparable — understanding its implementation reveals why this design choice
+ is more principled than step-count comparisons.
+mental_model: |
+ The training loop is a race against the clock: it accumulates gradients, steps the
+ optimizer, and checks elapsed time after every micro-batch. When time runs out it
+ evaluates and exits — regardless of how many steps it completed.
+learning_outcomes:
+ - Trace the full training loop from initialization to val_bpb output
+ - Explain gradient accumulation and why it simulates larger batch sizes
+ - Understand why garbage collection is frozen during training
+ - Calculate MFU (Model FLOP Utilization) from the reported metrics
+ - Describe how TIME_BUDGET enforcement creates comparable experiments
+snapshot:
+ source_repo: https://github.com/karpathy/autoresearch
+ stars: 70978
+ language: Python
+ license: MIT
+chapter_map:
+ - train.py (main training loop, evaluate_bpb call, output format)
+sources:
+ - https://github.com/karpathy/autoresearch
+---
+
+# Chapter 5: The Training Loop and Fixed Time Budget
+
+## What Problem Does This Solve?
+
+Comparing ML experiments fairly is harder than it looks. Common comparison axes:
+
+- **Same number of steps**: disadvantages models that do more work per step (e.g., larger attention)
+- **Same number of epochs**: disadvantages experiments on different sequence lengths
+- **Same loss threshold**: favors lucky random seeds and initialization
+
+autoresearch uses **same wall-clock time** (300 seconds). This is the fairest comparison
+for a system where the GPU is the fixed resource. Two models that both ran for exactly
+5 minutes on the same GPU with the same data can be compared directly, regardless of
+their architecture.
+
+The insight: if model A achieves lower val_bpb in 300 seconds than model B, then model A
+is a strictly better use of the GPU's compute budget.
+
+## The Fixed Time Budget
+
+```python
+# Top-level constant in train.py — the agent is not allowed to change this
+TIME_BUDGET = 300 # seconds of wall-clock training time
+```
+
+The enforcement is straightforward: check `time.time()` after every micro-batch and break
+if the budget is exceeded.
+
+```python
+import time
+
+train_start = time.time()
+
+for step in range(MAX_STEPS): # MAX_STEPS is large enough to never be reached
+ # ... gradient accumulation micro-batches ...
+
+ # Time check after optimizer step
+ elapsed = time.time() - train_start
+ if elapsed >= TIME_BUDGET:
+ break
+
+# After loop: elapsed is approximately TIME_BUDGET
+total_steps = step + 1
+```
+
+`MAX_STEPS` is set to a large sentinel (e.g., 1_000_000) that will never be reached in
+practice. The loop exits on the time condition, not the step condition. This means:
+
+- A fast model (small attention, few parameters) will complete more steps in 300s
+- A slow model (large attention, many parameters) will complete fewer steps
+- Both are evaluated at the same wall-clock elapsed time
+
+## Training Loop Architecture
+
+```mermaid
+flowchart TD
+ INIT[Initialize model, optimizer, dataloader] --> GC[gc.freeze: disable GC during training]
+ GC --> COMPILE[First forward pass triggers torch.compile]
+ COMPILE --> LOOP_START{elapsed < 300s?}
+
+ LOOP_START -->|Yes| ACCUM[Gradient accumulation loop]
+ ACCUM --> FWD[Forward pass: model x]
+ FWD --> LOSS[CrossEntropy loss / grad_accum_steps]
+ LOSS --> NANCHECK{loss NaN or >100?}
+ NANCHECK -->|Yes| FAIL[FAST_FAIL: sys.exit 1]
+ NANCHECK -->|No| BWD[loss.backward]
+ BWD --> LASTMICRO{Last micro-batch?}
+ LASTMICRO -->|No| ACCUM
+ LASTMICRO -->|Yes| CLIP[grad_clip: nn.utils.clip_grad_norm_]
+ CLIP --> LR[get_lr for current step]
+ LR --> OPT[optimizer.step + zero_grad]
+ OPT --> LOG[log train_loss every N steps]
+ LOG --> LOOP_START
+
+ LOOP_START -->|No, time up| EVAL[evaluate_bpb from prepare.py]
+ EVAL --> PRINT[print val_bpb memory_gb steps]
+ PRINT --> EXIT[sys.exit 0]
+```
+
+## Gradient Accumulation
+
+Gradient accumulation simulates a large batch size by splitting a logical batch into
+`GRAD_ACCUM_STEPS` micro-batches, accumulating gradients across all of them before
+taking a single optimizer step.
+
+```python
+BATCH_SIZE = 512 # tokens per logical batch
+GRAD_ACCUM_STEPS = 4 # micro-batches per optimizer step
+MICRO_BATCH_TOKENS = BATCH_SIZE // GRAD_ACCUM_STEPS # 128 tokens per micro-batch
+
+optimizer.zero_grad()
+for micro_step in range(GRAD_ACCUM_STEPS):
+ x, y = next(dataloader) # (B, T) next micro-batch
+ # Use autocast for bf16 mixed precision
+ with torch.autocast(device_type='cuda', dtype=torch.bfloat16):
+ logits = model(x)
+ loss = F.cross_entropy(logits.view(-1, V), y.view(-1))
+
+ # Normalize loss by accumulation steps
+ (loss / GRAD_ACCUM_STEPS).backward()
+
+# After all micro-batches: take optimizer step
+torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
+optimizer.step()
+```
+
+### Why Accumulate?
+
+The H100 has 80 GB of HBM, but the peak compute utilization is achieved with specific
+batch shapes. Gradient accumulation allows:
+
+1. **Larger logical batch sizes** than fit in a single forward pass
+2. **Stable gradient estimates** from more diverse data
+3. **Flexible batch size tuning** without changing physical memory layout
+
+## Garbage Collection Freeze
+
+Python's garbage collector can interrupt CUDA operations at unpredictable intervals,
+causing brief GPU stalls. These stalls are particularly harmful during the 300-second
+budget because they appear as "dead time" — the budget clock ticks but no GPU work happens.
+
+```python
+import gc
+
+# Before training loop: freeze GC
+gc.collect() # final manual collection
+gc.freeze() # freeze all current objects — GC won't scan them
+
+# After training loop (for correctness, though we're about to exit anyway)
+gc.unfreeze()
+gc.collect()
+```
+
+`gc.freeze()` moves all currently reachable objects from the "young" and "old" generations
+to a "permanent" generation that the GC never scans. Because the model, optimizer states,
+and data buffers are all allocated before `gc.freeze()`, they are excluded from GC traversal.
+Only objects allocated *during* the training loop (loss tensors, gradient tensors, etc.)
+remain in the scanned generations — but these are short-lived and collected quickly.
+
+The result is that Python's GC effectively does nothing during training, eliminating
+a source of non-deterministic latency.
+
+## Mixed Precision: bfloat16
+
+All forward passes run in bfloat16:
+
+```python
+with torch.autocast(device_type='cuda', dtype=torch.bfloat16):
+ logits = model(x)
+ loss = F.cross_entropy(...)
+```
+
+bfloat16 vs float16 vs float32:
+
+| Format | Exponent bits | Mantissa bits | Key property |
+|---|---|---|---|
+| float32 | 8 | 23 | Full precision |
+| float16 | 5 | 10 | Small range, numerically fragile |
+| bfloat16 | 8 | 7 | Same range as float32, less precision |
+
+bfloat16's 8-bit exponent means it can represent the same range of values as float32.
+This is critical for transformer training where gradient magnitudes span many orders of
+magnitude. float16's 5-bit exponent causes overflow and underflow issues that require
+loss scaling — bfloat16 does not.
+
+Model parameters are stored in float32 for optimizer stability. The `torch.autocast`
+context manager automatically casts inputs and outputs to bf16 for the forward pass
+without changing the stored parameter dtype.
+
+## MFU (Model FLOP Utilization)
+
+MFU measures how efficiently the GPU's theoretical peak FLOP/s is being used:
+
+```python
+def compute_mfu(model, batch_tokens_per_sec, device):
+ """
+ Estimate MFU based on the standard transformer FLOP formula.
+
+ For a transformer: ~6 * N * T FLOPs per token for forward+backward
+ Where N = number of parameters, T = sequence length.
+ """
+ N = sum(p.numel() for p in model.parameters())
+ flops_per_token = 6 * N # approximate: 2 per matmul, 3 for backward
+
+ achieved_flops = flops_per_token * batch_tokens_per_sec
+
+ # H100 SXM bf16 peak: ~1979 TFLOP/s
+ H100_BF16_PEAK = 1979e12
+ mfu = achieved_flops / H100_BF16_PEAK
+ return mfu
+```
+
+```mermaid
+xychart-beta
+ title "MFU vs Batch Size (H100 SXM, 125M parameter GPT)"
+ x-axis ["B=1", "B=4", "B=8", "B=16", "B=32", "B=64"]
+ y-axis "MFU %" 0 --> 60
+ line [5, 18, 32, 45, 52, 56]
+```
+
+Typical MFU values with autoresearch on H100:
+- Small model (125M params): ~45–55% MFU
+- Medium model (350M params): ~50–60% MFU
+
+The training loop logs MFU every 100 steps so the agent can observe compute efficiency trends.
+
+## The Dataloader
+
+The dataloader streams parquet shards from the climbmix dataset and uses the best-fit
+bin-packing algorithm (from `prepare.py`) to create training batches:
+
+```python
+from prepare import get_batch, pack_sequences
+
+class StreamingDataloader:
+ def __init__(self, shard_paths, tokenizer, T, batch_size):
+ self.shards = iter(shard_paths)
+ self.tokenizer = tokenizer
+ self.T = T
+ self.batch_size = batch_size
+ self.buffer = deque()
+ self._fill_buffer()
+
+ def _fill_buffer(self):
+ """Load next shard and tokenize into buffer."""
+ shard = next(self.shards)
+ table = pq.read_table(shard, columns=['text'])
+ texts = table['text'].to_pylist()
+ packed = pack_sequences(texts, self.T)
+ self.buffer.extend(packed)
+
+ def __next__(self):
+ if len(self.buffer) < self.batch_size:
+ self._fill_buffer()
+ rows = [self.buffer.popleft() for _ in range(self.batch_size)]
+ x = torch.tensor(rows, dtype=torch.long) # (B, T)
+ y = torch.roll(x, -1, dims=1) # shifted by 1 for next-token prediction
+ y[:, -1] = -100 # ignore index for last position (no target)
+ return x.cuda(), y.cuda()
+```
+
+The dataloader is designed to keep the GPU fed: it always has at least `batch_size` packed
+rows ready, refilling from the next shard when the buffer runs low.
+
+## Training Metrics and Logging
+
+The training loop logs to stdout at regular intervals:
+
+```python
+LOG_INTERVAL = 50 # steps between log lines
+
+if step % LOG_INTERVAL == 0:
+ elapsed = time.time() - train_start
+ tokens_per_sec = step * BATCH_SIZE / elapsed
+ mfu = compute_mfu(model, tokens_per_sec, device)
+ print(
+ f"step={step:6d} | loss={train_loss:.4f} | "
+ f"tok/s={tokens_per_sec:.0f} | mfu={mfu:.1%} | "
+ f"elapsed={elapsed:.0f}s"
+ )
+```
+
+Sample output during training:
+
+```
+step= 50 | loss=6.2341 | tok/s=142500 | mfu=48.3% | elapsed=18s
+step= 100 | loss=5.8901 | tok/s=143200 | mfu=48.5% | elapsed=36s
+step= 200 | loss=4.9234 | tok/s=143800 | mfu=48.7% | elapsed=71s
+step= 500 | loss=3.8821 | tok/s=144100 | mfu=48.8% | elapsed=177s
+step= 850 | loss=3.4210 | tok/s=144300 | mfu=48.9% | elapsed=300s
+val_bpb=1.8342 | memory_gb=14.3 | steps=850
+```
+
+The final line is what the agent greps for. The format is precisely specified in `program.md`
+so the agent can reliably extract it with a simple pattern:
+
+```bash
+grep "val_bpb=" run.log | tail -1
+```
+
+## Memory Reporting
+
+The final output includes `memory_gb` — peak GPU memory in gigabytes:
+
+```python
+memory_gb = torch.cuda.max_memory_allocated() / 1e9
+print(f"val_bpb={val_bpb:.4f} | memory_gb={memory_gb:.1f} | steps={total_steps}")
+```
+
+This serves two purposes:
+1. The agent can check whether a change approached the GPU's memory limit
+2. The researcher reviewing `results.tsv` can compare memory efficiency across experiments
+
+A change that improves val_bpb but uses 2× more memory may not be desirable — the agent
+can be instructed to reject improvements that exceed a memory threshold.
+
+## The evaluate_bpb Call
+
+At the end of training, the model is evaluated using the function from `prepare.py`:
+
+```python
+from prepare import evaluate_bpb
+
+# After training loop exits:
+model.eval()
+val_bpb = evaluate_bpb(
+ model=model,
+ device=device,
+ T=config.block_size,
+ batch_size=8
+)
+```
+
+The evaluation uses:
+- `torch.no_grad()` — no gradients, faster inference
+- The fixed validation set cached by `prepare.py`
+- The same `block_size=T` as training
+
+Because `evaluate_bpb` is imported from the immutable `prepare.py`, the agent cannot
+accidentally change the evaluation. Even if `train.py` is heavily modified, the evaluation
+protocol remains identical.
+
+## Full Timing Breakdown
+
+```mermaid
+gantt
+ title Wall-Clock Time Breakdown (300s total, H100)
+ dateFormat ss
+ axisFormat %Ss
+
+ section Startup
+ Model init + param count :a1, 0, 2s
+ Tokenizer load :a2, after a1, 1s
+ First batch (compile trigger):a3, after a2, 25s
+
+ section Training
+ Steps 1-850 (warm) :a4, after a3, 272s
+
+ section Evaluation
+ evaluate_bpb on val set :a5, after a4, 8s
+
+ section Output
+ Print results + exit :a6, after a5, 1s
+```
+
+The `torch.compile` overhead (~25 seconds on the first call) is baked into the 300-second
+budget. This means:
+
+- Experiments with simpler computation graphs compile faster and get more training steps
+- Experiments with complex new operations compile slower and get fewer steps
+- This is intentional: compilation time is part of the "cost" of a complex architecture
+
+## Chapter Summary
+
+| Component | Implementation | Key Detail |
+|---|---|---|
+| TIME_BUDGET | `time.time()` check after each step | 300s wall-clock, not step count |
+| Gradient accumulation | 4 micro-batches per optimizer step | Simulates larger logical batch |
+| GC freeze | `gc.freeze()` before loop | Eliminates GC pauses during training |
+| Mixed precision | `torch.autocast` bf16 | Safe range (8-bit exponent), no loss scaling |
+| MFU tracking | 6N × tokens/s / peak FLOP/s | Reported every 50 steps |
+| Fast-fail | `sys.exit(1)` on NaN or loss > 100 | Saves budget on broken runs |
+| evaluate_bpb | Imported from prepare.py | Tamper-proof, fixed validation set |
+| Output format | `val_bpb=X.XXXX \| memory_gb=XX.X \| steps=NNNN` | Agent greps this line |
+
+In the next chapter, we examine `program.md` — the agent's "research org code" that
+defines the experiment loop, git discipline, logging protocol, and the autonomy mandate
+that keeps it running all night without human supervision.
diff --git a/tutorials/autoresearch-tutorial/06-agent-protocol.md b/tutorials/autoresearch-tutorial/06-agent-protocol.md
new file mode 100644
index 00000000..7f52ae74
--- /dev/null
+++ b/tutorials/autoresearch-tutorial/06-agent-protocol.md
@@ -0,0 +1,395 @@
+---
+layout: default
+title: "Chapter 6: The Agent Protocol"
+nav_order: 6
+parent: autoresearch Tutorial
+format_version: v2
+why: |
+ program.md is the most unusual file in autoresearch — it is not code, not configuration,
+ but a natural-language document that turns a general-purpose LLM into a specialized ML
+ research agent. Understanding its structure reveals how to encode complex protocols in
+ a form that language models can reliably follow.
+mental_model: |
+ program.md is a constitution for an AI research organization with one member. It defines
+ the agent's job description, the experimental procedure, the record-keeping requirements,
+ and the autonomy mandate — all in plain English that any capable LLM can follow.
+learning_outcomes:
+ - Explain the structure and purpose of each section in program.md
+ - Trace through the complete experiment loop as the agent executes it
+ - Understand why git is used as the experiment ledger and how reset enables rejection
+ - Describe the autonomy mandate and its practical implications
+ - Explain the simplicity criterion and how it shapes the search direction
+snapshot:
+ source_repo: https://github.com/karpathy/autoresearch
+ stars: 70978
+ language: Python
+ license: MIT
+chapter_map:
+ - program.md
+sources:
+ - https://github.com/karpathy/autoresearch
+---
+
+# Chapter 6: The Agent Protocol
+
+## What Problem Does This Solve?
+
+An LLM given a vague instruction like "improve this ML training script" will:
+
+1. Make a change
+2. Ask "should I test this?"
+3. Wait for human response
+4. Make another change
+5. Ask "does this look good?"
+6. Never stop asking
+
+autoresearch solves this by encoding a complete, unambiguous protocol in `program.md`.
+The document specifies:
+
+- Exactly how to branch the repository
+- Exactly how to run the experiment
+- Exactly how to measure success
+- Exactly what to do when it fails
+- Exactly how to log results
+- That the agent should NEVER ask the human anything
+
+The result is an LLM that behaves like a specialized, self-directing research engineer —
+not because it was fine-tuned for this task, but because its instructions are complete
+enough to leave no gaps requiring human input.
+
+## program.md as "Research Org Code"
+
+The name "research org code" is deliberate. In a human research organization:
+
+- A lab has a **protocol** (how experiments are run)
+- A lab has **standards** (what constitutes a valid result)
+- A lab has a **culture** (what kinds of discoveries are valued)
+- A lab has **autonomy norms** (when to escalate vs proceed independently)
+
+`program.md` encodes all four for a single-agent "research organization." It is the
+agent's entire institutional context.
+
+```mermaid
+graph TD
+ PM[program.md] --> PROTOCOL[Experimental Protocol
branch → modify → commit → run → measure]
+ PM --> STANDARDS[Quality Standards
val_bpb must improve, memory must not explode]
+ PM --> CULTURE[Research Culture
simplicity criterion, prefer deletion over addition]
+ PM --> AUTONOMY[Autonomy Norms
NEVER STOP, NEVER ASK, human is asleep]
+ PM --> LEDGER[Record-Keeping
results.tsv format, git history as ground truth]
+```
+
+## The Branch Naming Convention
+
+The first thing the agent does when starting a session is create a branch:
+
+```bash
+git checkout -b autoresearch/
+```
+
+The `` should describe the agent's planned exploration direction:
+
+```
+autoresearch/rope-scaling-experiments
+autoresearch/deeper-narrower-architecture
+autoresearch/muon-warmup-variants
+autoresearch/sliding-window-ablations
+```
+
+This naming convention serves multiple purposes:
+
+1. **Isolation**: experiments on different branches do not interfere with each other
+2. **Discoverability**: a human reviewing the repository can see what directions were explored
+3. **Parallelism**: multiple agents can run simultaneously on different branches without conflicts
+4. **Cleanup**: `git branch -D autoresearch/*` removes all agent branches cleanly
+
+## The Experiment Loop
+
+The core protocol is a tight loop:
+
+```
+LOOP FOREVER:
+ 1. Hypothesize: choose one modification to train.py
+ 2. Implement: edit train.py
+ 3. Commit: git commit -am ""
+ 4. Run: uv run train.py > run.log 2>&1
+ 5. Measure: grep "val_bpb=" run.log | tail -1
+ 6. Decide:
+ - If val_bpb improved (lower): keep commit, append to results.tsv
+ - If val_bpb did not improve: git reset --hard HEAD~1
+ 7. Go to step 1
+```
+
+```mermaid
+flowchart TD
+ START[Start session] --> BRANCH[git checkout -b autoresearch/tag]
+ BRANCH --> HYPOTHESIZE[Choose one modification to train.py]
+ HYPOTHESIZE --> IMPLEMENT[Edit train.py]
+ IMPLEMENT --> COMMIT[git commit -am description]
+ COMMIT --> RUN[uv run train.py > run.log 2>&1]
+ RUN --> GREP[grep val_bpb= run.log]
+ GREP --> COMPARE{val_bpb < current best?}
+
+ COMPARE -->|Yes: improved| LOG[Append to results.tsv]
+ LOG --> UPDATE[Update current best]
+ UPDATE --> HYPOTHESIZE
+
+ COMPARE -->|No: not improved| RESET[git reset --hard HEAD~1]
+ RESET --> HYPOTHESIZE
+
+ RUN -->|exit code 1 FAST_FAIL| FAST_FAIL[Log as failed, git reset]
+ FAST_FAIL --> HYPOTHESIZE
+```
+
+## Git as the Experiment Ledger
+
+The decision to use git as the experiment tracking system is elegant in its simplicity:
+
+### Keeping an Improvement
+
+When `val_bpb` improves, the commit stays:
+```bash
+# The commit already exists from step 3
+# Nothing to do — the state is preserved in git history
+```
+
+### Rejecting a Failure
+
+When `val_bpb` does not improve:
+```bash
+git reset --hard HEAD~1
+```
+
+This rolls back to the previous state: the modification to `train.py` is undone,
+the commit is removed from history. The repository is exactly as it was before
+the failed experiment.
+
+```mermaid
+gitGraph
+ commit id: "baseline: val_bpb=1.8342"
+ commit id: "try: RoPE scaling → 1.8201" type: HIGHLIGHT
+ commit id: "try: wider MLP (rejected)" type: REVERSE
+ commit id: "try: fewer KV heads → 1.8150" type: HIGHLIGHT
+ commit id: "try: squared ReLU off (rejected)" type: REVERSE
+ commit id: "try: longer warmup → 1.8089" type: HIGHLIGHT
+```
+
+Note: after `git reset --hard HEAD~1`, the "rejected" commits disappear from the history.
+What the agent actually sees in `git log` is a clean sequence of improvements. The rejections
+only appear in `results.tsv` (as rows with `status=rejected`).
+
+### Why Not a Database or MLflow?
+
+The git approach has several advantages over dedicated experiment tracking systems:
+
+| Property | git reset | MLflow / W&B |
+|---|---|---|
+| Zero infrastructure | Yes | Requires server or account |
+| Automatic versioning | Yes | Manual |
+| Rollback built-in | Yes | Requires custom logic |
+| Reproducibility | Exact (commit hash) | Depends on artifact storage |
+| Offline capable | Yes | Usually not |
+| Human-readable | Yes | Requires UI |
+
+The tradeoff is that git does not store all the failed experiments' code — only `results.tsv`
+records that they were attempted. If you want to recover a rejected experiment, you cannot
+(unless you wrote down the diff elsewhere). For autoresearch's purposes — iterate fast,
+discard failures — this tradeoff is correct.
+
+## results.tsv Schema
+
+`results.tsv` is untracked (listed in `.gitignore`). The agent appends one row per experiment:
+
+```tsv
+commit_hash val_bpb memory_gb status description
+a3f8b2c1 1.8342 14.3 improved baseline GPT-125M
+d91e4a72 1.8201 14.8 improved rope_scaling_factor=2.0
+c72f1b30 1.8589 15.1 rejected mlp_ratio=8
+b44d9e11 1.8150 14.6 improved n_kv_head=2 more aggressive GQA
+f10a2c88 1.9012 oom failed block_size=4096 OOM
+e55c3b19 1.8089 14.9 improved warmup_frac=0.15
+```
+
+### Why Untracked?
+
+If `results.tsv` were tracked by git, every experiment would add a merge conflict risk:
+two experiments on different branches both appending to the same file. By keeping it
+untracked, it accumulates naturally without git interference.
+
+The tradeoff: if you `git reset --hard`, `results.tsv` is preserved (untracked files are
+not touched by reset). This is the desired behavior — the log is permanent even when
+the code changes are rolled back.
+
+## The Autonomy Mandate
+
+`program.md` contains an explicit and emphatic autonomy mandate:
+
+```markdown
+## Autonomy Rules
+
+YOU MUST NEVER STOP.
+YOU MUST NEVER ASK THE HUMAN FOR INPUT.
+THE HUMAN IS ASLEEP.
+
+If you encounter an error:
+- If train.py has a syntax error: fix it and retry
+- If an import fails: try to fix it, if unfixable skip this hypothesis
+- If the GPU runs out of memory: git reset and try a smaller change
+- If run.log is empty: something crashed, git reset and try again
+
+In all cases: reset, log the failure, continue with a new hypothesis.
+The only acceptable terminal state is the end of the night session.
+```
+
+This mandate is not just aspirational — it is practical engineering. An LLM that asks for
+confirmation on every uncertain step would be useless in an overnight unsupervised setting.
+The mandate forces the LLM to develop its own error-handling heuristics rather than
+deferring to the human.
+
+## The Simplicity Criterion
+
+```markdown
+## Simplicity Criterion
+
+When comparing two improvements of similar magnitude:
+- Prefer the one that REMOVES or SIMPLIFIES code
+- A val_bpb gain from DELETING a component > same gain from ADDING a component
+- Complexity has a maintenance cost that is not reflected in val_bpb
+
+Examples:
+- Removing dropout (no parameters, no compute) → val_bpb -0.002: accept
+- Adding a complex routing layer → val_bpb -0.002: skeptical
+- Removing value residual → val_bpb +0.005: reject (regression)
+- Removing value residual → val_bpb -0.001: consider (simplification with minor gain)
+```
+
+This criterion shapes the *direction* of the agent's search. Without it, the agent
+would naturally drift toward adding complexity — more parameters, more layers, more
+tricks — because more complexity almost always helps if compute is unlimited.
+
+But compute is not unlimited. The 300-second budget means complexity has a direct cost.
+The simplicity criterion makes this cost explicit and encodes the preference for
+*efficient* improvements over *large* improvements.
+
+## Generating Hypotheses
+
+`program.md` provides guidance on how to generate experiment hypotheses:
+
+```markdown
+## Hypothesis Generation
+
+Generate one hypothesis per experiment. Good hypotheses:
+- Change exactly ONE component of the architecture or training procedure
+- Have a clear mechanistic justification (why should this help?)
+- Are reversible (can be undone with git reset)
+
+Hypothesis categories:
+1. Architecture: change GPTConfig fields (n_head, n_kv_head, n_embd, WINDOW_PATTERN, etc.)
+2. Attention: modify CausalSelfAttention (different positional encoding, different norm)
+3. MLP: modify the MLP block (different activation, different ratio, different gating)
+4. Optimizer: change MuonAdamW hyperparameters (lr, momentum, betas, weight_decay)
+5. Training: change training loop parameters (grad_accum, batch_size, etc.)
+6. Ablation: REMOVE a component to test if it's helping
+
+Do NOT change:
+- TIME_BUDGET (must stay 300)
+- The output format (val_bpb=X.XXXX | memory_gb=XX.X | steps=NNNN)
+- Any import from prepare.py
+- prepare.py itself
+```
+
+## Interacting with the Agent
+
+The agent is invoked by passing `program.md` as a system prompt to an LLM that has
+tool-use capability (shell commands, file editing):
+
+### Using Claude
+
+```bash
+# Open Claude Code in the autoresearch directory
+cd autoresearch
+claude # starts Claude Code session
+
+# Then paste or type:
+# "Read program.md and begin the autoresearch protocol.
+# Current baseline is val_bpb=1.8342 from commit a3f8b2c1.
+# Go."
+```
+
+### Using the API (Headless)
+
+```python
+import anthropic
+
+client = anthropic.Anthropic()
+program_md = open("program.md").read()
+baseline_context = "Current best val_bpb=1.8342 (commit a3f8b2c1). Begin experiments."
+
+response = client.messages.create(
+ model="claude-opus-4-5",
+ max_tokens=8192,
+ system=program_md,
+ messages=[{"role": "user", "content": baseline_context}],
+ tools=[...], # shell_exec, file_write, file_read tools
+)
+```
+
+The agent uses shell execution tools to run `git`, `uv run`, and `grep` commands,
+and file editing tools to modify `train.py`.
+
+## Error Handling Protocol
+
+The protocol specifies how to handle each class of error:
+
+```mermaid
+graph TD
+ ERROR[Error during experiment] --> TYPE{Error type?}
+
+ TYPE -->|Syntax error in train.py| FIX_SYNTAX[Fix the syntax error\nretry same hypothesis]
+ TYPE -->|Import error| FIX_IMPORT[Try to fix\nif unfixable, skip and try new hypothesis]
+ TYPE -->|OOM GPU error| OOM[git reset\nlog: status=failed, description=OOM\ntry smaller modification]
+ TYPE -->|NaN loss fast-fail| NAN[git reset\nlog: status=failed, description=NaN\nreturn to safer region]
+ TYPE -->|Empty run.log| CRASH[git reset\nlog: status=failed, description=crash\nnote the hypothesis for later analysis]
+ TYPE -->|No val_bpb in run.log| PARTIAL[git reset\nlog: status=failed, description=incomplete run]
+
+ FIX_SYNTAX --> RETRY[Retry]
+ FIX_IMPORT --> NEXT[Next hypothesis]
+ OOM --> NEXT
+ NAN --> NEXT
+ CRASH --> NEXT
+ PARTIAL --> NEXT
+```
+
+## Session Boundaries and Resumption
+
+`program.md` specifies how to resume after a session ends (e.g., GPU time expired,
+network disconnection):
+
+```markdown
+## Session Resumption
+
+When resuming an existing session:
+1. git log --oneline -20 to see recent history
+2. cat results.tsv | tail -20 to see recent experiments
+3. Find the current best val_bpb from results.tsv
+4. Resume from the current HEAD (do not re-run old experiments)
+5. Continue the experiment loop from step 1
+
+Do NOT create a new branch — continue on the existing autoresearch/ branch.
+```
+
+## Chapter Summary
+
+| Component | Purpose | Key Detail |
+|---|---|---|
+| Branch naming | `autoresearch/` | Isolates experiment directions, enables multi-agent |
+| Experiment loop | modify → commit → run → measure → keep/reset | ~8 minutes per cycle |
+| git as ledger | commits for improvements, reset for failures | Zero extra infrastructure |
+| results.tsv | Untracked experiment log | Preserved through git reset |
+| Autonomy mandate | NEVER STOP, NEVER ASK | Handles all errors independently |
+| Simplicity criterion | Prefer deletion over addition | Shapes search toward efficient improvements |
+| Hypothesis generation | One change per experiment | Controls for confounds |
+| Error handling | Class-specific recovery procedures | No dead-ends for the agent |
+
+In the next chapter, we examine `analysis.ipynb` — the Jupyter notebook for reading
+`results.tsv`, visualizing the overnight progress curve, identifying the best experiments,
+and extracting patterns from 100 experiment runs.
diff --git a/tutorials/autoresearch-tutorial/07-analyzing-results.md b/tutorials/autoresearch-tutorial/07-analyzing-results.md
new file mode 100644
index 00000000..3c28bef8
--- /dev/null
+++ b/tutorials/autoresearch-tutorial/07-analyzing-results.md
@@ -0,0 +1,451 @@
+---
+layout: default
+title: "Chapter 7: Analyzing Results with analysis.ipynb"
+nav_order: 7
+parent: autoresearch Tutorial
+format_version: v2
+why: |
+ Running 100 experiments overnight generates more signal than can be absorbed by reading
+ a TSV file. analysis.ipynb provides the visualization and statistical tools to extract
+ patterns, identify the best-performing changes, and prioritize what to explore next.
+mental_model: |
+ analysis.ipynb is your morning debrief: it turns 100 rows of TSV data into a progress
+ narrative, identifies which architectural hypotheses worked, and surfaces the questions
+ worth pursuing in the next overnight run.
+learning_outcomes:
+ - Parse and clean results.tsv including handling of failed and OOM experiments
+ - Reproduce the progress.png visualization of val_bpb over experiment number
+ - Identify statistically significant improvements from noise
+ - Categorize experiments by type and compute per-category success rates
+ - Draft hypotheses for the next overnight run based on the analysis
+snapshot:
+ source_repo: https://github.com/karpathy/autoresearch
+ stars: 70978
+ language: Python
+ license: MIT
+chapter_map:
+ - analysis.ipynb
+ - results.tsv
+sources:
+ - https://github.com/karpathy/autoresearch
+---
+
+# Chapter 7: Analyzing Results with analysis.ipynb
+
+## What Problem Does This Solve?
+
+After an overnight run, `results.tsv` contains ~100 rows. Each row is one experiment:
+a commit hash, a val_bpb score, memory usage, status, and a description. Reading this
+raw TSV is insufficient for understanding what happened:
+
+- Which changes actually helped?
+- Which failures were due to bugs vs genuine regressions?
+- Is the agent making progress monotonically, or bouncing around?
+- What patterns emerge across successful experiments?
+- What should the next overnight run focus on?
+
+`analysis.ipynb` answers these questions with structured analysis and visualizations.
+
+## Loading and Cleaning results.tsv
+
+```python
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+from pathlib import Path
+
+# Load the results
+df = pd.read_csv('results.tsv', sep='\t', names=[
+ 'commit_hash', 'val_bpb', 'memory_gb', 'status', 'description'
+])
+
+print(f"Total experiments: {len(df)}")
+print(f"Status breakdown:\n{df['status'].value_counts()}")
+```
+
+Sample output:
+```
+Total experiments: 97
+Status breakdown:
+improved 23
+rejected 61
+failed 13
+```
+
+### Handling Special Values
+
+Not all experiments complete cleanly:
+
+```python
+# Handle OOM (out of memory) runs — memory_gb is 'oom' not a number
+df['memory_gb'] = pd.to_numeric(df['memory_gb'], errors='coerce') # OOM → NaN
+
+# Handle failed runs where val_bpb may be missing
+df['val_bpb'] = pd.to_numeric(df['val_bpb'], errors='coerce')
+
+# Separate completed vs failed
+completed = df[df['status'].isin(['improved', 'rejected'])].copy()
+failed = df[df['status'] == 'failed'].copy()
+improved = df[df['status'] == 'improved'].copy()
+
+print(f"Completed: {len(completed)} ({len(completed)/len(df):.0%})")
+print(f"Failed: {len(failed)} ({len(failed)/len(df):.0%})")
+print(f"Success rate (of completed): {len(improved)/len(completed):.0%}")
+```
+
+## The Progress Curve: progress.png
+
+The most important visualization is the val_bpb over experiment number:
+
+```python
+fig, axes = plt.subplots(1, 2, figsize=(14, 5))
+
+# Left: all completed experiments
+ax = axes[0]
+ax.scatter(
+ completed.index,
+ completed['val_bpb'],
+ c=completed['status'].map({'improved': '#2ecc71', 'rejected': '#e74c3c'}),
+ alpha=0.6, s=20
+)
+# Running best line
+best_so_far = completed['val_bpb'].cummin()
+ax.plot(completed.index, best_so_far, 'k-', linewidth=2, label='Running best')
+ax.set_xlabel('Experiment #')
+ax.set_ylabel('val_bpb')
+ax.set_title('All Experiments: Progress Curve')
+ax.legend()
+ax.invert_yaxis() # lower is better → ascending y means improvement
+
+# Right: only improvements
+ax = axes[1]
+ax.plot(range(len(improved)), improved['val_bpb'], 'o-', color='#2ecc71', linewidth=2)
+ax.set_xlabel('Improvement # (cumulative accepted)')
+ax.set_ylabel('val_bpb')
+ax.set_title('Accepted Improvements Only')
+ax.invert_yaxis()
+
+plt.tight_layout()
+plt.savefig('progress.png', dpi=150, bbox_inches='tight')
+plt.show()
+```
+
+```mermaid
+xychart-beta
+ title "Typical Progress Curve: val_bpb vs Experiment Number"
+ x-axis ["0", "10", "20", "30", "40", "50", "60", "70", "80", "90", "97"]
+ y-axis "val_bpb (lower is better)" 1.75 --> 1.90
+ line [1.834, 1.830, 1.820, 1.818, 1.816, 1.815, 1.810, 1.808, 1.807, 1.806, 1.805]
+```
+
+Key features to look for in the progress curve:
+1. **Rapid early improvement**: The first 10–20 experiments often find quick wins
+2. **Plateau regions**: After initial gains, progress slows — this is normal
+3. **Step changes**: Sudden drops indicate a genuinely important architectural insight
+4. **Flatlines**: Long periods of all-rejected experiments indicate the agent is stuck
+
+## Best-Hit Analysis
+
+The best experiment deserves deep inspection:
+
+```python
+best_idx = improved['val_bpb'].idxmin()
+best = improved.loc[best_idx]
+
+print("=== BEST EXPERIMENT ===")
+print(f"val_bpb: {best['val_bpb']:.4f}")
+print(f"memory_gb: {best['memory_gb']:.1f}")
+print(f"commit: {best['commit_hash']}")
+print(f"description: {best['description']}")
+
+# Show the diff for the best experiment
+import subprocess
+diff = subprocess.run(
+ ['git', 'diff', f"{best['commit_hash']}~1", best['commit_hash']],
+ capture_output=True, text=True
+)
+print("\n=== GIT DIFF ===")
+print(diff.stdout[:2000]) # first 2000 chars of diff
+```
+
+## Improvement Magnitude Distribution
+
+```python
+# Compute improvement magnitude for each accepted change
+# (relative to the running best at the time of acceptance)
+improved_sorted = improved.sort_values('val_bpb') # chronological order of acceptance
+
+improvements = []
+for i in range(len(improved_sorted)):
+ if i == 0:
+ improvements.append(0) # baseline
+ else:
+ delta = improved_sorted.iloc[i-1]['val_bpb'] - improved_sorted.iloc[i]['val_bpb']
+ improvements.append(delta)
+
+improvements_series = pd.Series(improvements[1:]) # exclude baseline
+
+print("Improvement magnitude statistics:")
+print(f" Median: {improvements_series.median():.4f} bpb")
+print(f" Mean: {improvements_series.mean():.4f} bpb")
+print(f" Max: {improvements_series.max():.4f} bpb (best single change)")
+print(f" Min: {improvements_series.min():.4f} bpb (smallest accepted change)")
+
+# Histogram of improvement sizes
+plt.figure(figsize=(8, 4))
+plt.hist(improvements_series, bins=20, color='#2ecc71', edgecolor='black', alpha=0.8)
+plt.xlabel('val_bpb improvement (positive = better)')
+plt.ylabel('Count')
+plt.title('Distribution of Improvement Magnitudes')
+plt.axvline(improvements_series.mean(), color='red', linestyle='--', label=f'Mean: {improvements_series.mean():.4f}')
+plt.legend()
+plt.savefig('improvement_distribution.png', dpi=150, bbox_inches='tight')
+```
+
+```mermaid
+graph LR
+ subgraph IMP_DIST [Improvement Distribution Pattern]
+ SMALL[Small improvements
0.001-0.005 bpb
most common]
+ MED[Medium improvements
0.005-0.020 bpb
occasional]
+ LARGE[Large improvements
>0.020 bpb
rare breakthroughs]
+ end
+
+ SMALL -->|~70% of accepted changes| FREQ[High frequency]
+ MED -->|~25% of accepted changes| FREQ2[Medium frequency]
+ LARGE -->|~5% of accepted changes| FREQ3[Low frequency]
+```
+
+## Experiment Categorization
+
+Categorize experiments by type to understand which areas are most productive:
+
+```python
+def categorize(description):
+ """Simple keyword-based categorization of experiment descriptions."""
+ desc = description.lower()
+ if any(k in desc for k in ['n_head', 'n_kv_head', 'gqa', 'attention']):
+ return 'attention'
+ elif any(k in desc for k in ['rope', 'positional', 'embedding']):
+ return 'positional'
+ elif any(k in desc for k in ['mlp', 'relu', 'activation', 'feedforward']):
+ return 'mlp'
+ elif any(k in desc for k in ['lr', 'learning_rate', 'warmup', 'warmdown', 'muon', 'adamw']):
+ return 'optimizer'
+ elif any(k in desc for k in ['window', 'sssl', 'sliding']):
+ return 'window'
+ elif any(k in desc for k in ['n_layer', 'n_embd', 'depth', 'width']):
+ return 'scaling'
+ elif any(k in desc for k in ['remove', 'ablat', 'without', 'disabled']):
+ return 'ablation'
+ else:
+ return 'other'
+
+completed['category'] = completed['description'].apply(categorize)
+
+# Success rate by category
+category_stats = completed.groupby('category').agg(
+ total=('status', 'count'),
+ improved=('status', lambda x: (x == 'improved').sum()),
+).assign(success_rate=lambda x: x['improved'] / x['total'])
+
+print(category_stats.sort_values('success_rate', ascending=False))
+```
+
+Sample output:
+```
+ total improved success_rate
+category
+ablation 12 5 0.42
+optimizer 18 7 0.39
+attention 22 8 0.36
+positional 8 3 0.38
+mlp 14 4 0.29
+window 10 2 0.20
+scaling 7 1 0.14
+other 9 -3 0.00
+```
+
+```mermaid
+xychart-beta
+ title "Success Rate by Experiment Category"
+ x-axis [ablation, optimizer, attention, positional, mlp, window, scaling]
+ y-axis "Success Rate %" 0 --> 50
+ bar [42, 39, 36, 38, 29, 20, 14]
+```
+
+## Memory Efficiency Analysis
+
+Not all improvements are equally desirable. An improvement that uses significantly more
+memory may not be worth it if it reduces the number of experiments per night or risks
+OOM errors:
+
+```python
+# Pareto frontier: improvements that are both better val_bpb AND lower memory
+improved_with_mem = improved.dropna(subset=['memory_gb'])
+
+plt.figure(figsize=(8, 6))
+plt.scatter(
+ improved_with_mem['memory_gb'],
+ improved_with_mem['val_bpb'],
+ c=improved_with_mem.index,
+ cmap='viridis',
+ s=50, alpha=0.8
+)
+plt.colorbar(label='Experiment index (time →)')
+plt.xlabel('Memory (GB)')
+plt.ylabel('val_bpb (lower is better)')
+plt.title('Memory vs Quality Tradeoff (Accepted Experiments)')
+plt.gca().invert_yaxis()
+
+# Add the Pareto frontier
+# (experiments where no other point is both better quality AND lower memory)
+from itertools import combinations
+def is_pareto_optimal(df):
+ is_optimal = pd.Series(True, index=df.index)
+ for i, row in df.iterrows():
+ dominated = (
+ (df['val_bpb'] <= row['val_bpb']) &
+ (df['memory_gb'] <= row['memory_gb']) &
+ ((df['val_bpb'] < row['val_bpb']) | (df['memory_gb'] < row['memory_gb']))
+ )
+ if dominated.any():
+ is_optimal[i] = False
+ return is_optimal
+
+pareto = improved_with_mem[is_pareto_optimal(improved_with_mem)]
+plt.scatter(pareto['memory_gb'], pareto['val_bpb'],
+ c='red', s=100, marker='*', label='Pareto frontier', zorder=5)
+plt.legend()
+plt.savefig('memory_quality_tradeoff.png', dpi=150, bbox_inches='tight')
+```
+
+## Understanding the Failed Experiments
+
+The 13% failure rate in a typical run contains useful signal:
+
+```python
+print("=== FAILED EXPERIMENT ANALYSIS ===")
+
+# Categorize failure reasons
+failure_reasons = failed['description'].str.lower().apply(lambda d: (
+ 'oom' if 'oom' in d or 'out of memory' in d else
+ 'nan' if 'nan' in d or 'fast_fail' in d else
+ 'syntax' if 'syntax' in d or 'error' in d else
+ 'other'
+))
+print(failure_reasons.value_counts())
+
+# What was being attempted when OOM failures occurred?
+oom_failures = failed[failure_reasons == 'oom']
+print("\nOOM attempts (what was the agent trying?):")
+for _, row in oom_failures.iterrows():
+ print(f" - {row['description']}")
+```
+
+OOM failures are particularly informative: they tell you which architectural directions
+(larger context, more heads, wider MLP) are memory-constrained and require more careful
+scaling.
+
+## Correlating Descriptions with Outcomes
+
+For longer overnight runs, NLP analysis of descriptions reveals structural patterns:
+
+```python
+from collections import Counter
+import re
+
+def extract_keywords(descriptions):
+ """Extract meaningful keywords from experiment descriptions."""
+ stopwords = {'a', 'an', 'the', 'to', 'from', 'with', 'for', 'in', 'of', 'and', 'or'}
+ words = []
+ for desc in descriptions:
+ tokens = re.findall(r'[a-zA-Z_][a-zA-Z0-9_]*', desc.lower())
+ words.extend(t for t in tokens if t not in stopwords and len(t) > 2)
+ return Counter(words)
+
+improved_keywords = extract_keywords(improved['description'])
+rejected_keywords = extract_keywords(completed[completed['status']=='rejected']['description'])
+
+# Keywords more common in improvements vs rejections
+print("Keywords POSITIVELY associated with improvements:")
+for word, count in improved_keywords.most_common(20):
+ improved_rate = count / improved_keywords.total()
+ rejected_rate = rejected_keywords.get(word, 0) / max(rejected_keywords.total(), 1)
+ lift = improved_rate / max(rejected_rate, 1e-5)
+ if lift > 1.5:
+ print(f" {word}: {lift:.1f}x more common in improvements")
+```
+
+## Generating Next-Night Hypotheses
+
+Based on the analysis, generate structured hypotheses for the next run:
+
+```python
+print("""
+=== NEXT NIGHT RECOMMENDATIONS ===
+
+Based on tonight's analysis:
+
+1. HIGH PRIORITY (follow-on from successful changes):
+ - {best_description} improved by {best_delta:.4f} bpb
+ → Try variations: push further in same direction
+ → Try ablation: what's the minimum required for this gain?
+
+2. MEDIUM PRIORITY (categories with high success rate):
+ - Optimizer changes had 39% success rate → try more LR variants
+ - Attention changes had 36% success rate → try n_head variants
+
+3. LOW PRIORITY / DEPRIORITIZE:
+ - Window pattern changes had 20% success rate → diminishing returns
+ - Scaling (depth/width) had 14% success rate → likely memory-constrained
+
+4. MEMORY-CONSCIOUS EXPERIMENTS:
+ - Current memory usage: {avg_memory:.1f} GB average
+ - OOM threshold: ~{oom_estimate:.0f} GB
+ - Headroom: ~{headroom:.1f} GB → room for slightly larger models
+
+5. OPEN QUESTIONS TO INVESTIGATE:
+ - Does removing value_residual hurt? (only tried 1 ablation)
+ - What's the optimal SHORT_WINDOW for current model size?
+ - Does Muon learning rate scaling matter at longer context?
+""".format(
+ best_description=improved.loc[improved['val_bpb'].idxmin(), 'description'],
+ best_delta=improved['val_bpb'].iloc[0] - improved['val_bpb'].min(),
+ avg_memory=completed['memory_gb'].mean(),
+ oom_estimate=failed[failure_reasons=='oom']['memory_gb'].mean() if (failure_reasons=='oom').any() else 80,
+ headroom=80 - completed['memory_gb'].mean(),
+))
+```
+
+## Full analysis.ipynb Structure
+
+The complete notebook follows this structure:
+
+```mermaid
+graph TD
+ SEC1[1. Setup and Data Loading] --> SEC2[2. Summary Statistics]
+ SEC2 --> SEC3[3. Progress Curve progress.png]
+ SEC3 --> SEC4[4. Best-Hit Analysis]
+ SEC4 --> SEC5[5. Improvement Distribution]
+ SEC5 --> SEC6[6. Experiment Categorization]
+ SEC6 --> SEC7[7. Memory vs Quality Tradeoff]
+ SEC7 --> SEC8[8. Failed Experiment Analysis]
+ SEC8 --> SEC9[9. Keyword Correlation]
+ SEC9 --> SEC10[10. Next-Night Recommendations]
+```
+
+## Chapter Summary
+
+| Analysis | Method | Key Output |
+|---|---|---|
+| Progress curve | cummin + scatter plot | progress.png showing val_bpb trajectory |
+| Best-hit analysis | idxmin + git diff | Exact code change that helped most |
+| Improvement distribution | histogram of deltas | Typical improvement magnitude |
+| Categorization | keyword classifier | Success rate by experiment type |
+| Memory tradeoff | Pareto frontier | Which improvements are memory-efficient |
+| Failure analysis | categorize failure reasons | OOM budget, NaN regions to avoid |
+| Next-night hypotheses | structured recommendations | Prioritized list for next run |
+
+In the final chapter, we cover customization and scaling: how to run autoresearch on
+smaller GPUs, how to parallelize with multiple agents, and how notable forks have extended
+the system to macOS, Windows, and AMD hardware.
diff --git a/tutorials/autoresearch-tutorial/08-customization-and-scaling.md b/tutorials/autoresearch-tutorial/08-customization-and-scaling.md
new file mode 100644
index 00000000..d943cf8f
--- /dev/null
+++ b/tutorials/autoresearch-tutorial/08-customization-and-scaling.md
@@ -0,0 +1,493 @@
+---
+layout: default
+title: "Chapter 8: Customization and Scaling"
+nav_order: 8
+parent: autoresearch Tutorial
+format_version: v2
+why: |
+ The baseline autoresearch configuration assumes an H100 with 80 GB VRAM. Most practitioners
+ have smaller GPUs, different operating systems, or want to run multiple agents in parallel.
+ This chapter explains how to adapt every layer of the system for your actual hardware.
+mental_model: |
+ autoresearch is parameterized by hardware: model size, batch size, and gradient accumulation
+ can all be tuned to fit any GPU. The key invariant is TIME_BUDGET=300s — everything else
+ is negotiable.
+learning_outcomes:
+ - Calculate the correct model size and batch configuration for a given GPU
+ - Set up a multi-GPU experiment with torch.compile + DDP
+ - Run multiple agents in parallel on different branches without conflicts
+ - Understand the community forks for macOS (MPS), Windows, and AMD (ROCm)
+ - Know which components to disable when Flash Attention 3 is unavailable
+snapshot:
+ source_repo: https://github.com/karpathy/autoresearch
+ stars: 70978
+ language: Python
+ license: MIT
+chapter_map:
+ - train.py (GPTConfig, BATCH_SIZE, GRAD_ACCUM_STEPS)
+ - program.md (multi-agent section)
+sources:
+ - https://github.com/karpathy/autoresearch
+---
+
+# Chapter 8: Customization and Scaling
+
+## What Problem Does This Solve?
+
+The reference configuration in `train.py` is tuned for a single H100 SXM 80 GB. Running
+it as-is on:
+
+- An RTX 3090 (24 GB): runs out of memory immediately
+- An A10 (24 GB): runs out of memory immediately
+- A MacBook with M3 Max: Flash Attention 3 is not available
+- A Windows machine: path and library issues
+- Two H100s: only uses one GPU
+
+This chapter provides concrete modifications for each scenario. The guiding principle:
+**TIME_BUDGET=300s is sacred. Everything else can be changed.**
+
+## Memory Sizing Guide
+
+GPU memory is the binding constraint. Here is how to calculate the correct configuration
+for a given GPU:
+
+```python
+def estimate_memory_gb(n_layer, n_embd, n_head, block_size, batch_size, grad_accum):
+ """
+ Rough estimate of GPU memory for training.
+ Accounts for: parameters, optimizer states, activations, KV cache.
+ """
+ # Parameters (float32 in optimizer, bf16 in forward)
+ params = n_layer * (
+ 4 * n_embd * n_embd + # Q, K, V, O projections
+ 8 * n_embd * n_embd # MLP (4× hidden × 2 matrices)
+ ) + n_embd * 50257 # embedding + LM head
+ param_gb = params * 4 / 1e9 # float32
+
+ # Optimizer states (AdamW: 2× params, Muon: 1× params)
+ optimizer_gb = param_gb * 2.0
+
+ # Activations: roughly 12 * n_layer * batch_size * block_size * n_embd bytes (bf16)
+ activation_gb = 12 * n_layer * batch_size * block_size * n_embd * 2 / 1e9
+
+ # KV cache during training: 2 * n_layer * n_kv_head * block_size * head_dim * batch_size
+ head_dim = n_embd // n_head
+ n_kv_head = max(1, n_head // 3) # assuming GQA with 3× reduction
+ kv_gb = 2 * n_layer * n_kv_head * block_size * head_dim * batch_size * 2 / 1e9
+
+ total = param_gb + optimizer_gb + activation_gb + kv_gb
+ return total, {
+ 'params': param_gb, 'optimizer': optimizer_gb,
+ 'activations': activation_gb, 'kv_cache': kv_gb
+ }
+
+# Example: check if a config fits in 24 GB
+total, breakdown = estimate_memory_gb(
+ n_layer=8, n_embd=512, n_head=8,
+ block_size=512, batch_size=4, grad_accum=4
+)
+print(f"Estimated memory: {total:.1f} GB")
+for k, v in breakdown.items():
+ print(f" {k}: {v:.1f} GB")
+```
+
+## Recommended Configurations by GPU
+
+```mermaid
+graph TD
+ GPU{GPU VRAM} -->|80 GB H100| H100[H100 Config
n_layer=12, n_embd=768
block_size=1024, batch=8]
+ GPU -->|40 GB A100| A100[A100 Config
n_layer=10, n_embd=640
block_size=1024, batch=4]
+ GPU -->|24 GB RTX 4090| RTX4090[RTX 4090 Config
n_layer=8, n_embd=512
block_size=512, batch=4]
+ GPU -->|16 GB RTX 4080| RTX4080[RTX 4080 Config
n_layer=6, n_embd=384
block_size=512, batch=2]
+ GPU -->|8 GB RTX 3070| RTX3070[RTX 3070 Config
n_layer=4, n_embd=256
block_size=256, batch=2]
+ GPU -->|Apple MPS| MPS[M-Series Config
n_layer=6, n_embd=384
No FA3, block_size=512]
+```
+
+### Complete Configuration for RTX 4090 (24 GB)
+
+```python
+# train.py modifications for RTX 4090
+
+@dataclass
+class GPTConfig:
+ vocab_size: int = 50257
+ block_size: int = 512 # ↓ from 1024 (memory)
+ n_layer: int = 8 # ↓ from 12
+ n_head: int = 8 # ↓ from 12
+ n_kv_head: int = 2 # ↓ from 4 (more aggressive GQA)
+ n_embd: int = 512 # ↓ from 768
+ WINDOW_PATTERN: str = "SSSL"
+ SHORT_WINDOW: int = 64 # ↓ from 128 (scales with block_size)
+ use_value_residual: bool = True
+ dropout: float = 0.0
+ logit_softcap: float = 15.0
+ use_squared_relu: bool = True
+
+# Training constants for RTX 4090
+BATCH_SIZE = 8 # physical micro-batch
+GRAD_ACCUM_STEPS = 8 # logical batch = 64 sequences × 512 tokens = 32768 tokens
+TIME_BUDGET = 300 # NEVER CHANGE THIS
+```
+
+### Complete Configuration for Apple M-Series (MPS)
+
+Flash Attention 3 is CUDA-only. For MPS, use PyTorch's built-in `scaled_dot_product_attention`:
+
+```python
+# train.py modifications for Apple MPS
+
+import torch
+
+# Detect device
+if torch.cuda.is_available():
+ device = torch.device('cuda')
+elif torch.backends.mps.is_available():
+ device = torch.device('mps')
+else:
+ device = torch.device('cpu')
+
+# Replace Flash Attention 3 with SDPA
+class CausalSelfAttentionMPS(nn.Module):
+ def forward(self, x, x0, cos, sin):
+ # ... (same Q, K, V projection, RoPE, QK-norm as before) ...
+
+ # Use PyTorch SDPA instead of flash_attn
+ # MPS supports SDPA with causal mask
+ attn_output = torch.nn.functional.scaled_dot_product_attention(
+ q, k, v,
+ attn_mask=None,
+ is_causal=True,
+ # Note: sliding window not natively supported on MPS
+ # Use full attention for all layers on MPS
+ )
+ return attn_output
+```
+
+MPS-specific changes:
+1. Replace `flash_attn_varlen_func` with `F.scaled_dot_product_attention`
+2. Remove the sliding window for S-layers (MPS SDPA does not support window_size)
+3. Use `torch.float32` instead of `torch.bfloat16` (MPS bfloat16 support is partial)
+4. Reduce batch size and model size (MPS unified memory is slower than CUDA HBM)
+
+```python
+# pyproject.toml for MPS
+[project]
+dependencies = [
+ "torch>=2.2.0", # remove ==2.9.1 CUDA requirement
+ # remove flash-attn (CUDA only)
+ "rustbpe",
+ "tiktoken",
+ "pyarrow",
+ "huggingface-hub",
+ "numpy",
+]
+```
+
+### Windows Configuration
+
+Windows requires a few path and library adjustments:
+
+```python
+# Fix path separators in prepare.py
+import pathlib
+DATA_DIR = pathlib.Path("data") # not str "data/" — use pathlib throughout
+
+# Fix multiprocessing for Windows
+if __name__ == '__main__':
+ # Required on Windows to avoid fork issues with multiprocessing
+ torch.multiprocessing.set_start_method('spawn', force=True)
+ main()
+```
+
+Flash Attention 3 on Windows requires WSL2 or a native CUDA build with specific
+Visual Studio toolchain. The community has maintained a WSL2 setup guide in the
+GitHub discussions.
+
+### AMD ROCm Configuration
+
+For AMD GPUs (MI250X, MI300X, RX 7900 XTX):
+
+```bash
+# Install ROCm-compatible PyTorch
+pip install torch --index-url https://download.pytorch.org/whl/rocm6.1
+```
+
+```python
+# train.py: replace flash_attn with hipBLASLt-backed SDPA
+# AMD GPUs support torch.nn.functional.scaled_dot_product_attention
+# with flash attention implementation via ROCm
+
+# The flash-attn package has a ROCm fork:
+# pip install flash-attn-rocm (community maintained)
+# Or use SDPA which is automatically accelerated on ROCm:
+
+attn_output = torch.nn.functional.scaled_dot_product_attention(
+ q, k, v, is_causal=True
+)
+```
+
+## Scaling Down: Smaller Models
+
+For learning and experimentation on modest hardware, a "tiny" configuration:
+
+```python
+# Tiny configuration — runs on any GPU with 8+ GB
+@dataclass
+class GPTConfig:
+ vocab_size: int = 50257
+ block_size: int = 256
+ n_layer: int = 4
+ n_head: int = 4
+ n_kv_head: int = 1 # MQA (multi-query attention)
+ n_embd: int = 256
+ WINDOW_PATTERN: str = "SL" # alternating short/full
+ SHORT_WINDOW: int = 32
+ use_value_residual: bool = False # disable for very small models
+ dropout: float = 0.0
+ logit_softcap: float = 15.0
+ use_squared_relu: bool = True
+
+BATCH_SIZE = 4
+GRAD_ACCUM_STEPS = 4
+```
+
+This configuration uses ~2 GB peak memory and runs at ~200k tokens/second on an RTX 3070.
+It is suitable for validating experiment ideas before running the full configuration overnight.
+
+## Multi-GPU Training with DDP
+
+For users with multiple GPUs (2× A100, 4× H100, etc.):
+
+```python
+# train.py additions for DDP
+
+import torch.distributed as dist
+from torch.nn.parallel import DistributedDataParallel as DDP
+
+def setup_distributed():
+ """Initialize the distributed process group."""
+ dist.init_process_group(backend='nccl')
+ rank = dist.get_rank()
+ world_size = dist.get_world_size()
+ torch.cuda.set_device(rank)
+ return rank, world_size
+
+# Launch command:
+# torchrun --nproc_per_node=4 train.py
+
+rank, world_size = setup_distributed()
+device = torch.device(f'cuda:{rank}')
+
+model = GPT(config).to(device)
+model = DDP(model, device_ids=[rank])
+
+# In training loop: data is sharded across GPUs
+# Each GPU processes a different micro-batch
+# Gradients are automatically reduced across GPUs by DDP
+
+# LR scales linearly with world_size (linear scaling rule)
+max_lr = 3e-4 * world_size
+
+# Effective batch size scales with world_size
+effective_batch = BATCH_SIZE * GRAD_ACCUM_STEPS * world_size
+```
+
+With 4× H100:
+- Effective batch size: 4× larger
+- Throughput: ~3.8× (some communication overhead)
+- Steps per 300s: ~3.8× more
+- val_bpb typically 5–10% better than single GPU
+
+## Multi-Agent Parallelism
+
+autoresearch's branch-based design enables multiple agents to run simultaneously without
+conflicts:
+
+```mermaid
+graph TD
+ REPO[autoresearch repo] --> A1[Agent 1
branch: autoresearch/architecture]
+ REPO --> A2[Agent 2
branch: autoresearch/optimizer]
+ REPO --> A3[Agent 3
branch: autoresearch/scaling]
+
+ A1 -->|modifies train.py| T1[train.py: architectural changes]
+ A2 -->|modifies train.py| T2[train.py: optimizer changes]
+ A3 -->|modifies train.py| T3[train.py: scaling changes]
+
+ T1 -->|appends| R1[results.tsv on agent 1 machine]
+ T2 -->|appends| R2[results.tsv on agent 2 machine]
+ T3 -->|appends| R3[results.tsv on agent 3 machine]
+```
+
+Because each agent works on its own branch and `results.tsv` is untracked, there are
+zero conflicts between agents. In the morning, merge the insights:
+
+```bash
+# Collect all results
+git fetch origin
+git log --oneline origin/autoresearch/architecture | head -20
+git log --oneline origin/autoresearch/optimizer | head -20
+
+# Merge the best result into main
+git checkout main
+git merge origin/autoresearch/architecture # or whichever branch has the best val_bpb
+
+# Or cherry-pick specific improvements
+git cherry-pick
+```
+
+## Customizing program.md for Your Hardware
+
+When running on different hardware, update `program.md` to include hardware-specific
+constraints:
+
+```markdown
+# autoresearch program
+
+## Hardware Context
+- GPU: RTX 4090 (24 GB VRAM)
+- Current baseline: val_bpb=1.9234 (24 GB config)
+- OOM threshold: memory_gb > 20 (leave 4 GB headroom)
+
+## Hardware-Specific Rules
+- If memory_gb > 20: git reset immediately (approaching OOM)
+- Batch_size must remain 4 (fixed for this GPU)
+- Do NOT increase block_size beyond 512 (OOM risk)
+- Flash Attention 3 IS available (RTX 40-series supports it)
+
+## Adjusted Config
+Config fields you may change: n_layer (4-10), n_embd (384-640), n_head (4-10),
+n_kv_head (1-4), WINDOW_PATTERN, SHORT_WINDOW (32-128), logit_softcap, use_squared_relu
+
+Config fields you MUST NOT change: block_size=512, BATCH_SIZE=4, TIME_BUDGET=300
+```
+
+## Custom Datasets
+
+To use a different dataset instead of climbmix-400b:
+
+```python
+# In prepare.py: swap the dataset source
+# The only requirement: a dataset with a 'text' column in parquet format
+
+from huggingface_hub import snapshot_download
+
+# Instead of climbmix:
+DATASET_NAME = "your-org/your-dataset"
+snapshot_download(
+ repo_id=DATASET_NAME,
+ repo_type="dataset",
+ local_dir=DATA_DIR,
+ allow_patterns=["*.parquet"],
+)
+```
+
+The tokenizer should be retrained on the new dataset:
+```python
+# In prepare.py: retrain BPE on your data
+# The BPE trainer is dataset-agnostic
+train_tokenizer(stream_texts(DATA_DIR), vocab_size=50257)
+```
+
+## Notable Community Forks
+
+The autoresearch community has produced several notable extensions:
+
+| Fork / Extension | Target Hardware | Key Changes |
+|---|---|---|
+| autoresearch-mps | macOS M-series | Replaced FA3 with SDPA, MPS device support |
+| autoresearch-windows | Windows + CUDA | WSL2 setup, path fixes, spawn multiprocessing |
+| autoresearch-amd | AMD ROCm | ROCm PyTorch, hipBLASLt attention |
+| autoresearch-multi | Multi-GPU DDP | torchrun launcher, linear LR scaling |
+| autoresearch-small | Consumer GPUs | Tiny/small configs for 8–24 GB GPUs |
+| autoresearch-long | Long context | 4k–8k context with full sliding window |
+
+## Extending the Evaluation
+
+The default `evaluate_bpb` uses a single validation set. For more robust evaluation:
+
+```python
+# In prepare.py: multiple evaluation domains
+def evaluate_bpb_multi(model, device, T):
+ """
+ Evaluate on multiple domains for a more complete picture.
+ Returns a dict of domain -> val_bpb.
+ """
+ results = {}
+ for domain in ['web', 'books', 'code', 'math']:
+ val_tokens = load_domain_validation(domain)
+ bpb = _evaluate_bpb_on_tokens(model, device, T, val_tokens)
+ results[domain] = bpb
+
+ results['average'] = np.mean(list(results.values()))
+ return results
+```
+
+Modify the output format in `train.py` to match what the agent greps:
+```python
+# Extended output format
+print(
+ f"val_bpb={results['average']:.4f} | "
+ f"val_bpb_web={results['web']:.4f} | "
+ f"val_bpb_code={results['code']:.4f} | "
+ f"memory_gb={memory_gb:.1f} | steps={total_steps}"
+)
+```
+
+Update `program.md` to grep for the composite metric:
+```markdown
+## Success Criterion
+Primary metric: val_bpb (the average across domains)
+Also log: val_bpb_web, val_bpb_code for domain-specific tracking
+```
+
+## Performance Tuning Checklist
+
+```mermaid
+graph TD
+ TUNE[Performance Tuning] --> T1{torch.compile enabled?}
+ T1 -->|No| EN_COMPILE[Add: model = torch.compile model]
+ T1 -->|Yes| T2{gc.freeze called?}
+ T2 -->|No| EN_GC[Add: gc.freeze before training loop]
+ T2 -->|Yes| T3{bfloat16 autocast?}
+ T3 -->|No| EN_BF16[Add: torch.autocast device_type=cuda dtype=torch.bfloat16]
+ T3 -->|Yes| T4{Flash Attention 3?}
+ T4 -->|No, CUDA available| EN_FA3[Install flash-attn, use flash_attn_varlen_func]
+ T4 -->|Yes or MPS| T5{Batch size maximized?}
+ T5 -->|No| EN_BATCH[Increase BATCH_SIZE until near OOM, then reduce by 10%]
+ T5 -->|Yes| DONE[Tuning complete]
+```
+
+## Chapter Summary
+
+| Scenario | Key Changes | Expected Performance |
+|---|---|---|
+| H100 80 GB (reference) | None — use defaults | val_bpb ~1.83, ~100 exp/night |
+| A100 40 GB | n_embd=640, batch=4 | val_bpb ~1.86, ~95 exp/night |
+| RTX 4090 24 GB | n_embd=512, block_size=512 | val_bpb ~1.90, ~90 exp/night |
+| RTX 4080 16 GB | n_embd=384, block_size=512, batch=2 | val_bpb ~1.94, ~85 exp/night |
+| Apple M3 Max | No FA3, MPS device, float32 | val_bpb ~1.96, ~40 exp/night |
+| 4× H100 (DDP) | torchrun, lr×4, batch×4 | val_bpb ~1.78, ~100 exp/night |
+| Multi-agent (3×) | Separate branches, separate machines | 3× experiments/night |
+| AMD MI300X | ROCm PyTorch, hipBLASLt | val_bpb ~1.83 (comparable to H100) |
+
+## Final Thoughts
+
+autoresearch distills an important insight about ML research: **the bottleneck is not GPU
+compute — it is research iteration speed**. By eliminating the human from the experiment
+loop, it turns a single GPU into a research engine that can explore 100 architectural
+hypotheses overnight.
+
+The design principles that make this work are universal:
+1. Fix the evaluation (prepare.py is immutable)
+2. Fix the comparison unit (TIME_BUDGET=300s always)
+3. Use existing infrastructure (git for versioning, grep for parsing)
+4. Encode the protocol completely (program.md leaves no gaps)
+5. Prefer simplicity (the simplicity criterion shapes search)
+
+These principles apply beyond autoresearch: any autonomous research agent benefits from
+clear evaluation metrics, comparable measurement units, minimal infrastructure, complete
+protocols, and a bias toward simplicity.
+
+The ~70,000 GitHub stars suggest the community recognizes something genuine here: a
+minimum viable research agent that works, written in ~1000 lines of Python and one
+Markdown file.
diff --git a/tutorials/autoresearch-tutorial/README.md b/tutorials/autoresearch-tutorial/README.md
new file mode 100644
index 00000000..aeff3fbd
--- /dev/null
+++ b/tutorials/autoresearch-tutorial/README.md
@@ -0,0 +1,110 @@
+---
+layout: default
+title: autoresearch Tutorial
+nav_order: 95
+has_children: true
+format_version: v2
+source_repo: https://github.com/karpathy/autoresearch
+categories: [ai-agents, ml-research, training]
+related_tutorials:
+ - deer-flow-tutorial
+ - agno-tutorial
+ - babyagi-tutorial
+last_updated: 2026-04-12
+---
+
+# autoresearch Tutorial
+
+**The overnight ML research agent that runs ~100 GPU experiments while you sleep.**
+
+autoresearch (https://github.com/karpathy/autoresearch) is a minimal, self-directing AI research agent built by Andrej Karpathy. It autonomously edits a PyTorch training script, commits the change, runs a fixed 5-minute training budget, measures validation bits-per-byte, and decides whether to keep or discard the experiment — all without human intervention. One sleeping cycle yields roughly 100 experiments.
+
+| Property | Value |
+|---|---|
+| Stars | 70,978 |
+| Language | Python |
+| License | MIT |
+| Primary metric | val_bpb (bits-per-byte) |
+| GPU requirement | Single CUDA GPU (recommended: H100/A100) |
+| Time per experiment | ~5 minutes (fixed wall-clock budget) |
+| Experiments per night | ~100 |
+
+## What You Will Learn
+
+This tutorial takes you from zero to running your own autonomous ML research loop. By the end you will understand:
+
+- The three-file design philosophy that makes autoresearch auditable and reproducible
+- How `prepare.py` downloads the climbmix-400b dataset and trains a BPE tokenizer
+- The modern GPT architecture in `train.py` — GQA, RoPE, QK-norm, Flash Attention 3, sliding window, Value Residual
+- MuonAdamW: the hybrid optimizer combining Polar Express orthogonalization with AdamW
+- Why a fixed wall-clock time budget (not step count) is the correct unit of comparison
+- How `program.md` encodes the agent's entire research protocol as a readable text file
+- How to read `results.tsv` and `analysis.ipynb` to extract signal from 100 nightly experiments
+- Scaling and customizing the system for smaller GPUs, multiple GPUs, or alternative hardware
+
+## Repository Structure
+
+```
+autoresearch/
+├── prepare.py # FIXED — data + tokenizer + eval harness
+├── train.py # MUTABLE — GPT model + MuonAdamW + training loop
+├── program.md # INSTRUCTIONS — agent protocol (the "research org code")
+├── analysis.ipynb # Jupyter notebook for exploring results.tsv
+├── results.tsv # Untracked experiment log (git-ignored)
+└── pyproject.toml # uv project manifest
+```
+
+## Prerequisites
+
+| Requirement | Minimum | Recommended |
+|---|---|---|
+| GPU | Any CUDA GPU with 16 GB VRAM | H100 SXM 80 GB |
+| Python | 3.10 | 3.12 |
+| PyTorch | 2.9.1 | 2.9.1 (CUDA 12.8) |
+| Package manager | pip | uv |
+| Disk space | 50 GB | 200 GB |
+| Time to first experiment | ~30 min | ~15 min |
+
+## Tutorial Chapters
+
+| # | Chapter | What you learn |
+|---|---|---|
+| 1 | [Getting Started](01-getting-started.md) | Problem statement, 3-file design, installation with uv |
+| 2 | [Data Preparation and Training Environment](02-data-preparation-and-training-environment.md) | prepare.py, climbmix dataset, BPE tokenizer, best-fit dataloader |
+| 3 | [GPT Architecture](03-gpt-architecture.md) | GPTConfig, GQA, RoPE, QK-norm, sliding window, Value Residual |
+| 4 | [The MuonAdamW Optimizer](04-muonadamw-optimizer.md) | Polar Express, NorMuon, Muon vs AdamW dispatch, LR schedule |
+| 5 | [The Training Loop and Fixed Time Budget](05-training-loop-and-fixed-time-budget.md) | Gradient accumulation, GC freeze, MFU tracking, evaluate_bpb |
+| 6 | [The Agent Protocol](06-agent-protocol.md) | program.md, experiment loop, git as ledger, autonomy mandate |
+| 7 | [Analyzing Results with analysis.ipynb](07-analyzing-results.md) | results.tsv schema, progress.png, best-hit analysis |
+| 8 | [Customization and Scaling](08-customization-and-scaling.md) | Smaller GPUs, multi-GPU, multi-agent, notable forks |
+
+## Quick-Start (3 commands)
+
+```bash
+# 1. Clone and install
+git clone https://github.com/karpathy/autoresearch
+cd autoresearch
+uv sync
+
+# 2. Prepare data (downloads climbmix, trains BPE tokenizer)
+uv run prepare.py
+
+# 3. Hand control to the agent
+# (Open Claude / GPT-4o with program.md as system prompt, then say "go")
+```
+
+The agent takes over from step 3. Go to sleep. Check `results.tsv` in the morning.
+
+## Design Philosophy
+
+autoresearch embodies three principles that distinguish it from heavier MLOps frameworks:
+
+**Simplicity over completeness.** Three files. No YAML config trees, no orchestration layers, no databases. Every decision is visible in plain Python or plain Markdown.
+
+**Git as the experiment ledger.** Every attempted change is a commit. Every rejected change is a `git reset`. The full history of what the agent tried — including failures — lives in the repository with zero extra tooling.
+
+**Comparable experiments by construction.** A fixed 5-minute wall-clock budget means every experiment is measured under identical conditions. No cherry-picking long runs. No step-count games.
+
+---
+
+*This tutorial was written for autoresearch as of April 2026 (70,978 stars, MIT license). The repository moves fast; always check the upstream source for the latest `train.py` and `program.md`.*
diff --git a/tutorials/awesome-claude-code-tutorial/01-getting-started.md b/tutorials/awesome-claude-code-tutorial/01-getting-started.md
index 983665ed..d1075ca9 100644
--- a/tutorials/awesome-claude-code-tutorial/01-getting-started.md
+++ b/tutorials/awesome-claude-code-tutorial/01-getting-started.md
@@ -48,8 +48,6 @@ You now have a concrete triage loop for using the list efficiently.
Next: [Chapter 2: List Taxonomy and Navigation](02-list-taxonomy-and-navigation.md)
-## Depth Expansion Playbook
-
## Source Code Walkthrough
### `scripts/ticker/generate_ticker_svg.py`
diff --git a/tutorials/awesome-claude-code-tutorial/02-list-taxonomy-and-navigation.md b/tutorials/awesome-claude-code-tutorial/02-list-taxonomy-and-navigation.md
index dce24eba..8175c4f7 100644
--- a/tutorials/awesome-claude-code-tutorial/02-list-taxonomy-and-navigation.md
+++ b/tutorials/awesome-claude-code-tutorial/02-list-taxonomy-and-navigation.md
@@ -52,8 +52,6 @@ You now understand how to navigate by intent and choose the right list rendering
Next: [Chapter 3: Resource Quality Evaluation Framework](03-resource-quality-evaluation-framework.md)
-## Depth Expansion Playbook
-
## Source Code Walkthrough
### `scripts/validation/validate_links.py`
diff --git a/tutorials/awesome-claude-code-tutorial/03-resource-quality-evaluation-framework.md b/tutorials/awesome-claude-code-tutorial/03-resource-quality-evaluation-framework.md
index 93ce0709..f25295ab 100644
--- a/tutorials/awesome-claude-code-tutorial/03-resource-quality-evaluation-framework.md
+++ b/tutorials/awesome-claude-code-tutorial/03-resource-quality-evaluation-framework.md
@@ -48,8 +48,6 @@ You now have a repeatable quality filter for selecting resources safely.
Next: [Chapter 4: Skills, Hooks, and Slash Command Patterns](04-skills-hooks-and-slash-command-patterns.md)
-## Depth Expansion Playbook
-
## Source Code Walkthrough
### `scripts/validation/validate_links.py`
diff --git a/tutorials/awesome-claude-code-tutorial/04-skills-hooks-and-slash-command-patterns.md b/tutorials/awesome-claude-code-tutorial/04-skills-hooks-and-slash-command-patterns.md
index c8d907dd..515454be 100644
--- a/tutorials/awesome-claude-code-tutorial/04-skills-hooks-and-slash-command-patterns.md
+++ b/tutorials/awesome-claude-code-tutorial/04-skills-hooks-and-slash-command-patterns.md
@@ -47,184 +47,182 @@ You now have a practical model for composing multiple resource types without add
Next: [Chapter 5: `CLAUDE.md` and Project Scaffolding Patterns](05-claude-md-and-project-scaffolding-patterns.md)
-## Depth Expansion Playbook
-
## Source Code Walkthrough
-### `scripts/resources/parse_issue_form.py`
+### `scripts/resources/create_resource_pr.py`
-The `main` function in [`scripts/resources/parse_issue_form.py`](https://github.com/hesreallyhim/awesome-claude-code/blob/HEAD/scripts/resources/parse_issue_form.py) handles a key part of this chapter's functionality:
+The `validate_generated_outputs` function in [`scripts/resources/create_resource_pr.py`](https://github.com/hesreallyhim/awesome-claude-code/blob/HEAD/scripts/resources/create_resource_pr.py) handles a key part of this chapter's functionality:
```py
-def main():
- """Main entry point for the script."""
- # Get issue body from environment variable
- issue_body = os.environ.get("ISSUE_BODY", "")
- if not issue_body:
- print(json.dumps({"valid": False, "errors": ["No issue body provided"], "data": {}}))
- return 1
-
- # Parse the issue body
- parsed_data = parse_issue_body(issue_body)
-
- # Check if --validate flag is passed
- validate_mode = "--validate" in sys.argv
-
- if validate_mode:
- # Full validation mode
- is_valid, errors, warnings = validate_parsed_data(parsed_data)
-
- # Check for duplicates
- duplicate_warnings = check_for_duplicates(parsed_data)
- warnings.extend(duplicate_warnings)
-
- # If basic validation passed, do URL validation
- if is_valid and parsed_data.get("primary_link"):
- url_valid, enriched_data, url_errors = validate_single_resource(
- primary_link=parsed_data.get("primary_link", ""),
- secondary_link=parsed_data.get("secondary_link", ""),
- display_name=parsed_data.get("display_name", ""),
- category=parsed_data.get("category", ""),
- license=parsed_data.get("license", "NOT_FOUND"),
+def validate_generated_outputs(status_stdout: str, repo_root: str) -> None:
+ """Verify expected outputs exist and no unexpected files are changed."""
+ expected_readme = os.path.join(repo_root, "README.md")
+ expected_csv = os.path.join(repo_root, "THE_RESOURCES_TABLE.csv")
+ expected_readme_dir = os.path.join(repo_root, "README_ALTERNATIVES")
+
+ if not os.path.isfile(expected_readme):
+ raise Exception(f"Missing generated README: {expected_readme}")
+ if not os.path.isfile(expected_csv):
+ raise Exception(f"Missing CSV: {expected_csv}")
+ if not os.path.isdir(expected_readme_dir):
+ raise Exception(f"Missing README directory: {expected_readme_dir}")
+ if not glob.glob(os.path.join(expected_readme_dir, "*.md")):
+ raise Exception(f"No README alternatives found in {expected_readme_dir}")
+
+ changed_paths = []
+ for line in status_stdout.splitlines():
+ if not line.strip():
+ continue
+ path = line[3:]
+ if " -> " in path:
+ path = path.split(" -> ", 1)[1]
+ changed_paths.append(path)
+
+ allowed_files = {"README.md", "THE_RESOURCES_TABLE.csv"}
+ allowed_prefixes = ("README_ALTERNATIVES/", "assets/")
+ ignored_files = {"resource_data.json", "pr_result.json"}
+ unexpected = [
+ path
+ for path in changed_paths
```
This function is important because it defines how Awesome Claude Code Tutorial: Curated Claude Code Resource Discovery and Evaluation implements the patterns covered in this chapter.
-### `scripts/maintenance/check_repo_health.py`
+### `scripts/resources/create_resource_pr.py`
-The `get_repo_info` function in [`scripts/maintenance/check_repo_health.py`](https://github.com/hesreallyhim/awesome-claude-code/blob/HEAD/scripts/maintenance/check_repo_health.py) handles a key part of this chapter's functionality:
+The `write_step_outputs` function in [`scripts/resources/create_resource_pr.py`](https://github.com/hesreallyhim/awesome-claude-code/blob/HEAD/scripts/resources/create_resource_pr.py) handles a key part of this chapter's functionality:
```py
-def get_repo_info(owner, repo):
- """
- Fetch repository information from GitHub API.
- Returns a dict with:
- - open_issues: number of open issues
- - last_updated: date of last push (ISO format string)
- - exists: whether the repo exists (False if 404)
- Returns None if API call fails for other reasons.
- """
- api_url = f"https://api.github.com/repos/{owner}/{repo}"
+def write_step_outputs(outputs: dict[str, str]) -> None:
+ """Write outputs for GitHub Actions, if available."""
+ output_path = os.environ.get("GITHUB_OUTPUT")
+ if not output_path:
+ return
try:
- response = requests.get(api_url, headers=HEADERS, timeout=10)
-
- if response.status_code == 404:
- logger.warning(f"Repository {owner}/{repo} not found (deleted or private)")
- return {"exists": False, "open_issues": 0, "last_updated": None}
-
- if response.status_code == 403:
- logger.error(f"Rate limit or forbidden for {owner}/{repo}")
- return None
+ with open(output_path, "a", encoding="utf-8") as f:
+ for key, value in outputs.items():
+ if value is None:
+ value = ""
+ value_str = str(value)
+ if "\n" in value_str or "\r" in value_str:
+ f.write(f"{key}< subprocess.CompletedProcess:
+ """Run a command and return the result."""
+ return subprocess.run(cmd, capture_output=True, text=True, check=check)
+
+
+def create_unique_branch_name(base_name: str) -> str:
+ """Create a unique branch name with timestamp."""
+ timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
+ return f"{base_name}-{timestamp}"
+
+
+def get_badge_filename(display_name: str) -> str:
+ """Compute the badge filename for a resource.
+
+ Uses the same logic as save_resource_badge_svg in generate_readme.py.
"""
- problematic_repos = []
- checked_repos = 0
- deleted_repos = []
+ safe_name = re.sub(r"[^a-zA-Z0-9]", "-", display_name.lower())
+ safe_name = re.sub(r"-+", "-", safe_name).strip("-")
+ return f"badge-{safe_name}.svg"
- logger.info(f"Reading repository list from {csv_file}")
+def validate_generated_outputs(status_stdout: str, repo_root: str) -> None:
```
This function is important because it defines how Awesome Claude Code Tutorial: Curated Claude Code Resource Discovery and Evaluation implements the patterns covered in this chapter.
-### `scripts/maintenance/check_repo_health.py`
+### `scripts/badges/badge_notification_core.py`
-The `check_repos_health` function in [`scripts/maintenance/check_repo_health.py`](https://github.com/hesreallyhim/awesome-claude-code/blob/HEAD/scripts/maintenance/check_repo_health.py) handles a key part of this chapter's functionality:
+The `RateLimiter` class in [`scripts/badges/badge_notification_core.py`](https://github.com/hesreallyhim/awesome-claude-code/blob/HEAD/scripts/badges/badge_notification_core.py) handles a key part of this chapter's functionality:
```py
-def check_repos_health(
- csv_file, months_threshold=MONTHS_THRESHOLD, issues_threshold=OPEN_ISSUES_THRESHOLD
-):
- """
- Check health of all active GitHub repositories in the CSV.
- Returns a list of problematic repos.
- """
- problematic_repos = []
- checked_repos = 0
- deleted_repos = []
-
- logger.info(f"Reading repository list from {csv_file}")
-
- try:
- with open(csv_file, encoding="utf-8") as f:
- reader = csv.DictReader(f)
-
- for row in reader:
- # Check if Active is TRUE
- active = row.get("Active", "").strip().upper()
- if active != "TRUE":
- continue
-
- primary_link = row.get("Primary Link", "").strip()
- if not primary_link:
- continue
-
- # Extract owner and repo from GitHub URL
- _, is_github, owner, repo = parse_github_url(primary_link)
- if not is_github or not owner or not repo:
+class RateLimiter:
+ """Handle GitHub API rate limiting with exponential backoff"""
+
+ def __init__(self):
+ self.last_request_time = 0
+ self.request_count = 0
+ self.backoff_seconds = 1
+ self.max_backoff = 60
+
+ def check_rate_limit(self, github_client: Github) -> dict:
+ """Check current rate limit status"""
+ try:
+ rate_limit = github_client.get_rate_limit()
+ core = rate_limit.resources.core
+ return {
+ "remaining": core.remaining,
+ "limit": core.limit,
+ "reset_time": core.reset.timestamp(),
+ "should_pause": core.remaining < 100,
+ "should_stop": core.remaining < 10,
+ }
+ except Exception as e:
+ logger.warning(f"Could not check rate limit: {e}")
+ return {
+ "remaining": -1,
+ "limit": -1,
+ "reset_time": 0,
+ "should_pause": False,
+ "should_stop": False,
+ }
```
-This function is important because it defines how Awesome Claude Code Tutorial: Curated Claude Code Resource Discovery and Evaluation implements the patterns covered in this chapter.
+This class is important because it defines how Awesome Claude Code Tutorial: Curated Claude Code Resource Discovery and Evaluation implements the patterns covered in this chapter.
## How These Components Connect
```mermaid
flowchart TD
- A[main]
- B[get_repo_info]
- C[is_outdated]
- D[check_repos_health]
- E[main]
+ A[validate_generated_outputs]
+ B[write_step_outputs]
+ C[main]
+ D[RateLimiter]
+ E[BadgeNotificationCore]
A --> B
B --> C
C --> D
diff --git a/tutorials/awesome-claude-code-tutorial/05-claude-md-and-project-scaffolding-patterns.md b/tutorials/awesome-claude-code-tutorial/05-claude-md-and-project-scaffolding-patterns.md
index 31acb33a..b1c10904 100644
--- a/tutorials/awesome-claude-code-tutorial/05-claude-md-and-project-scaffolding-patterns.md
+++ b/tutorials/awesome-claude-code-tutorial/05-claude-md-and-project-scaffolding-patterns.md
@@ -40,170 +40,168 @@ You now have a pattern for building maintainable `CLAUDE.md` guidance from curat
Next: [Chapter 6: Automation Pipeline and README Generation](06-automation-pipeline-and-readme-generation.md)
-## Depth Expansion Playbook
-
## Source Code Walkthrough
-### `tools/readme_tree/update_readme_tree.py`
+### `scripts/maintenance/check_repo_health.py`
-The `find_repo_root` function in [`tools/readme_tree/update_readme_tree.py`](https://github.com/hesreallyhim/awesome-claude-code/blob/HEAD/tools/readme_tree/update_readme_tree.py) handles a key part of this chapter's functionality:
+The `check_repos_health` function in [`scripts/maintenance/check_repo_health.py`](https://github.com/hesreallyhim/awesome-claude-code/blob/HEAD/scripts/maintenance/check_repo_health.py) handles a key part of this chapter's functionality:
```py
-def find_repo_root(start: Path) -> Path:
- """Locate the repo root.
-
- Prefer git to identify the VCS root; fall back to walking upward for pyproject.toml.
+def check_repos_health(
+ csv_file, months_threshold=MONTHS_THRESHOLD, issues_threshold=OPEN_ISSUES_THRESHOLD
+):
+ """
+ Check health of all active GitHub repositories in the CSV.
+ Returns a list of problematic repos.
+ """
+ problematic_repos = []
+ checked_repos = 0
+ deleted_repos = []
- Args:
- start: Path inside the repo.
+ logger.info(f"Reading repository list from {csv_file}")
- Returns:
- The repo root path.
- """
- p = start.resolve()
- # Prefer git root if available.
try:
- result = subprocess.run(
- ["git", "-C", str(p), "rev-parse", "--show-toplevel"],
- check=False,
- capture_output=True,
- text=True,
- )
- if result.returncode == 0:
- git_root = result.stdout.strip()
- if git_root:
- return Path(git_root)
- except FileNotFoundError:
- pass
-
- # Fallback: walk upward until pyproject.toml exists.
- while not (p / "pyproject.toml").exists():
- if p.parent == p:
+ with open(csv_file, encoding="utf-8") as f:
+ reader = csv.DictReader(f)
+
+ for row in reader:
+ # Check if Active is TRUE
+ active = row.get("Active", "").strip().upper()
+ if active != "TRUE":
+ continue
+
+ primary_link = row.get("Primary Link", "").strip()
+ if not primary_link:
+ continue
+
+ # Extract owner and repo from GitHub URL
+ _, is_github, owner, repo = parse_github_url(primary_link)
+ if not is_github or not owner or not repo:
```
This function is important because it defines how Awesome Claude Code Tutorial: Curated Claude Code Resource Discovery and Evaluation implements the patterns covered in this chapter.
-### `tools/readme_tree/update_readme_tree.py`
+### `scripts/maintenance/check_repo_health.py`
-The `normalize_key` function in [`tools/readme_tree/update_readme_tree.py`](https://github.com/hesreallyhim/awesome-claude-code/blob/HEAD/tools/readme_tree/update_readme_tree.py) handles a key part of this chapter's functionality:
+The `main` function in [`scripts/maintenance/check_repo_health.py`](https://github.com/hesreallyhim/awesome-claude-code/blob/HEAD/scripts/maintenance/check_repo_health.py) handles a key part of this chapter's functionality:
```py
-def normalize_key(path: str | Path | None) -> str:
- """Normalize a path-like key into a repo-relative POSIX string."""
- if path is None:
- return ""
- s = str(path).strip()
- if s in {".", "./", ""}:
- return ""
- s = s.replace("\\", "/").strip("/")
- return s
-
-
-def load_config(config_path: Path) -> dict:
- """Load the YAML configuration for tree generation."""
- data = yaml.safe_load(config_path.read_text(encoding="utf-8"))
- if not isinstance(data, dict):
- raise RuntimeError("Invalid config format")
- return data
-
-
-def parse_ignore_rule(pattern: str | Path | None) -> IgnoreRule | None:
- """Parse a raw ignore pattern into a structured rule."""
- if pattern is None:
- return None
- line = str(pattern).strip()
- if not line or line.startswith("#"):
- return None
-
- negated = line.startswith("!")
- if negated:
- line = line[1:]
+def main():
+ parser = argparse.ArgumentParser(
+ description="Check health of GitHub repositories in THE_RESOURCES_TABLE.csv"
+ )
+ parser.add_argument(
+ "--csv-file",
+ default=INPUT_FILE,
+ help=f"Path to CSV file (default: {INPUT_FILE})",
+ )
+ parser.add_argument(
+ "--months",
+ type=int,
+ default=MONTHS_THRESHOLD,
+ help=f"Months threshold for outdated repos (default: {MONTHS_THRESHOLD})",
+ )
+ parser.add_argument(
+ "--issues",
+ type=int,
+ default=OPEN_ISSUES_THRESHOLD,
+ help=f"Open issues threshold (default: {OPEN_ISSUES_THRESHOLD})",
+ )
+
+ args = parser.parse_args()
+
+ problematic_repos = check_repos_health(args.csv_file, args.months, args.issues)
+
+ if problematic_repos:
+ logger.error(f"\n{'=' * 60}")
+ logger.error("❌ HEALTH CHECK FAILED")
+ logger.error(
```
This function is important because it defines how Awesome Claude Code Tutorial: Curated Claude Code Resource Discovery and Evaluation implements the patterns covered in this chapter.
-### `tools/readme_tree/update_readme_tree.py`
+### `scripts/resources/parse_issue_form.py`
-The `load_config` function in [`tools/readme_tree/update_readme_tree.py`](https://github.com/hesreallyhim/awesome-claude-code/blob/HEAD/tools/readme_tree/update_readme_tree.py) handles a key part of this chapter's functionality:
+The `parse_issue_body` function in [`scripts/resources/parse_issue_form.py`](https://github.com/hesreallyhim/awesome-claude-code/blob/HEAD/scripts/resources/parse_issue_form.py) handles a key part of this chapter's functionality:
```py
-def load_config(config_path: Path) -> dict:
- """Load the YAML configuration for tree generation."""
- data = yaml.safe_load(config_path.read_text(encoding="utf-8"))
- if not isinstance(data, dict):
- raise RuntimeError("Invalid config format")
- return data
+def parse_issue_body(issue_body: str) -> dict[str, str]:
+ """
+ Parse GitHub issue form body into structured data.
+ GitHub issue forms are rendered as markdown with specific patterns:
+ - Headers (###) indicate field labels
+ - Values follow the headers
+ - Checkboxes are rendered as - [x] or - [ ]
+ """
+ data = {}
-def parse_ignore_rule(pattern: str | Path | None) -> IgnoreRule | None:
- """Parse a raw ignore pattern into a structured rule."""
- if pattern is None:
- return None
- line = str(pattern).strip()
- if not line or line.startswith("#"):
- return None
+ # Split into sections by ### headers
+ sections = re.split(r"###\s+", issue_body)
- negated = line.startswith("!")
- if negated:
- line = line[1:]
+ for section in sections:
+ if not section.strip():
+ continue
- anchored = line.startswith("/")
- if anchored:
- line = line[1:]
+ lines = section.strip().split("\n")
+ if not lines:
+ continue
- dir_only = line.endswith("/")
- if dir_only:
- line = line[:-1]
+ # First line is the field label
+ label = lines[0].strip()
- line = line.replace("\\", "/").strip()
- if not line:
+ # Rest is the value (skip empty lines)
+ value_lines = [
+ line
+ for line in lines[1:]
+ if line.strip() and not line.strip().startswith("_No response_")
```
This function is important because it defines how Awesome Claude Code Tutorial: Curated Claude Code Resource Discovery and Evaluation implements the patterns covered in this chapter.
-### `tools/readme_tree/update_readme_tree.py`
+### `scripts/resources/parse_issue_form.py`
-The `parse_ignore_rule` function in [`tools/readme_tree/update_readme_tree.py`](https://github.com/hesreallyhim/awesome-claude-code/blob/HEAD/tools/readme_tree/update_readme_tree.py) handles a key part of this chapter's functionality:
+The `validate_parsed_data` function in [`scripts/resources/parse_issue_form.py`](https://github.com/hesreallyhim/awesome-claude-code/blob/HEAD/scripts/resources/parse_issue_form.py) handles a key part of this chapter's functionality:
```py
-def parse_ignore_rule(pattern: str | Path | None) -> IgnoreRule | None:
- """Parse a raw ignore pattern into a structured rule."""
- if pattern is None:
- return None
- line = str(pattern).strip()
- if not line or line.startswith("#"):
- return None
-
- negated = line.startswith("!")
- if negated:
- line = line[1:]
-
- anchored = line.startswith("/")
- if anchored:
- line = line[1:]
-
- dir_only = line.endswith("/")
- if dir_only:
- line = line[:-1]
-
- line = line.replace("\\", "/").strip()
- if not line:
- return None
-
- return IgnoreRule(pattern=line, negated=negated, dir_only=dir_only, anchored=anchored)
-
+def validate_parsed_data(data: dict[str, str]) -> tuple[bool, list[str], list[str]]:
+ """
+ Validate the parsed data meets all requirements.
+ Returns (is_valid, errors, warnings)
+ """
+ errors = []
+ warnings = []
+
+ # Check required fields
+ required_fields = [
+ "display_name",
+ "category",
+ "primary_link",
+ "author_name",
+ "author_link",
+ "description",
+ ]
+
+ for field in required_fields:
+ if not data.get(field, "").strip():
+ errors.append(f"Required field '{field}' is missing or empty")
+
+ # Validate category
+ valid_categories = category_manager.get_all_categories()
+ if data.get("category") not in valid_categories:
+ errors.append(
+ f"Invalid category: {data.get('category')}. "
+ f"Must be one of: {', '.join(valid_categories)}"
+ )
-def parse_ignore_rules(patterns: list[str | Path]) -> list[IgnoreRule]:
- """Parse a list of ignore patterns into IgnoreRule entries."""
- rules: list[IgnoreRule] = []
```
This function is important because it defines how Awesome Claude Code Tutorial: Curated Claude Code Resource Discovery and Evaluation implements the patterns covered in this chapter.
@@ -213,11 +211,11 @@ This function is important because it defines how Awesome Claude Code Tutorial:
```mermaid
flowchart TD
- A[find_repo_root]
- B[normalize_key]
- C[load_config]
- D[parse_ignore_rule]
- E[parse_ignore_rules]
+ A[check_repos_health]
+ B[main]
+ C[parse_issue_body]
+ D[validate_parsed_data]
+ E[check_for_duplicates]
A --> B
B --> C
C --> D
diff --git a/tutorials/awesome-claude-code-tutorial/06-automation-pipeline-and-readme-generation.md b/tutorials/awesome-claude-code-tutorial/06-automation-pipeline-and-readme-generation.md
index 5c4482c3..51ec676e 100644
--- a/tutorials/awesome-claude-code-tutorial/06-automation-pipeline-and-readme-generation.md
+++ b/tutorials/awesome-claude-code-tutorial/06-automation-pipeline-and-readme-generation.md
@@ -50,170 +50,159 @@ You now understand the maintenance pipeline that keeps the list coherent at scal
Next: [Chapter 7: Link Health, Validation, and Drift Control](07-link-health-validation-and-drift-control.md)
-## Depth Expansion Playbook
-
## Source Code Walkthrough
-### `tools/readme_tree/update_readme_tree.py`
+### `scripts/resources/detect_informal_submission.py`
-The `main` function in [`tools/readme_tree/update_readme_tree.py`](https://github.com/hesreallyhim/awesome-claude-code/blob/HEAD/tools/readme_tree/update_readme_tree.py) handles a key part of this chapter's functionality:
+The `main` function in [`scripts/resources/detect_informal_submission.py`](https://github.com/hesreallyhim/awesome-claude-code/blob/HEAD/scripts/resources/detect_informal_submission.py) handles a key part of this chapter's functionality:
```py
-def main() -> int:
- """CLI entry point for updating the README tree block."""
- parser = argparse.ArgumentParser(description="Update README tree block.")
- parser.add_argument(
- "--config",
- default="tools/readme_tree/config.yaml",
- help="Path to the tree config file.",
- )
- parser.add_argument("--check", action="store_true", help="Fail if updates are needed.")
- parser.add_argument("--debug", action="store_true", help="Print debug info on mismatch.")
+def main() -> None:
+ """Entry point for GitHub Actions."""
+ title = os.environ.get("ISSUE_TITLE", "")
+ body = os.environ.get("ISSUE_BODY", "")
- args = parser.parse_args()
+ result = calculate_confidence(title, body)
- config_path = Path(args.config)
- if not config_path.exists():
- print(f"Config not found: {config_path}", file=sys.stderr)
- return 1
+ # Output results for GitHub Actions
+ set_github_output("action", result.action.value)
+ set_github_output("confidence", f"{result.confidence:.0%}")
+ set_github_output("matched_signals", ", ".join(result.matched_signals))
- repo_root = find_repo_root(config_path)
- config = load_config(config_path)
+ # Also print for logging
+ print(f"Confidence: {result.confidence:.2%}")
+ print(f"Action: {result.action.value}")
+ print(f"Matched signals: {result.matched_signals}")
- doc_path = repo_root / config.get("doc_path", "docs/README-GENERATION.md")
- if not doc_path.exists():
- print(f"Doc not found: {doc_path}", file=sys.stderr)
- return 1
- tree = build_tree(config, repo_root)
+if __name__ == "__main__":
+ main()
- comments = {normalize_key(k): v for k, v in config.get("entries", {}).items()}
- virtual_comments = config.get("virtual_entries", {})
```
This function is important because it defines how Awesome Claude Code Tutorial: Curated Claude Code Resource Discovery and Evaluation implements the patterns covered in this chapter.
-### `scripts/categories/add_category.py`
+### `scripts/resources/detect_informal_submission.py`
-The `CategoryAdder` class in [`scripts/categories/add_category.py`](https://github.com/hesreallyhim/awesome-claude-code/blob/HEAD/scripts/categories/add_category.py) handles a key part of this chapter's functionality:
+The `import` interface in [`scripts/resources/detect_informal_submission.py`](https://github.com/hesreallyhim/awesome-claude-code/blob/HEAD/scripts/resources/detect_informal_submission.py) handles a key part of this chapter's functionality:
```py
+"""
+
+from __future__ import annotations
+
+import os
+import re
+from dataclasses import dataclass
+from enum import Enum
+
+
+class Action(Enum):
+ NONE = "none"
+ WARN = "warn" # Medium confidence: warn but don't close
+ CLOSE = "close" # High confidence: warn and close
-class CategoryAdder:
- """Handles the process of adding a new category to the repository."""
-
- def __init__(self, repo_root: Path):
- """Initialize the CategoryAdder with the repository root path."""
- self.repo_root = repo_root
- self.templates_dir = repo_root / "templates"
- self.github_dir = repo_root / ".github" / "ISSUE_TEMPLATE"
-
- def get_max_order(self) -> int:
- """Get the maximum order value from existing categories."""
- categories = category_manager.get_categories_for_readme()
- if not categories:
- return 0
- return max(cat.get("order", 0) for cat in categories)
-
- def add_category_to_yaml(
- self,
- category_id: str,
- name: str,
- prefix: str,
- icon: str,
- description: str,
- order: int | None = None,
- subcategories: list[str] | None = None,
- ) -> bool:
- """
- Add a new category to categories.yaml.
-
- Args:
+@dataclass
+class DetectionResult:
+ confidence: float
+ action: Action
+ matched_signals: list[str]
+
+
+# Template field labels - VERY strong indicator (from the issue form)
+# Matching 3+ of these is almost certainly a copy-paste from template without using form
+TEMPLATE_FIELD_LABELS = [
+ "display name:",
+ "category:",
+ "sub-category:",
+ "primary link:",
+ "author name:",
+ "author link:",
```
-This class is important because it defines how Awesome Claude Code Tutorial: Curated Claude Code Resource Discovery and Evaluation implements the patterns covered in this chapter.
+This interface is important because it defines how Awesome Claude Code Tutorial: Curated Claude Code Resource Discovery and Evaluation implements the patterns covered in this chapter.
-### `scripts/categories/add_category.py`
+### `scripts/utils/github_utils.py`
-The `interactive_mode` function in [`scripts/categories/add_category.py`](https://github.com/hesreallyhim/awesome-claude-code/blob/HEAD/scripts/categories/add_category.py) handles a key part of this chapter's functionality:
+The `get_github_client` function in [`scripts/utils/github_utils.py`](https://github.com/hesreallyhim/awesome-claude-code/blob/HEAD/scripts/utils/github_utils.py) handles a key part of this chapter's functionality:
```py
-def interactive_mode(adder: CategoryAdder) -> None:
- """Run the script in interactive mode, prompting for all inputs."""
- print("=" * 60)
- print("ADD NEW CATEGORY TO AWESOME CLAUDE CODE")
- print("=" * 60)
- print()
-
- # Get category details
- name = input("Enter category display name (e.g., 'Alternative Clients'): ").strip()
- if not name:
- print("Error: Name is required")
- sys.exit(1)
-
- # Generate ID from name
- category_id = name.lower().replace(" ", "-").replace("&", "and")
- suggested_id = category_id
- category_id = input(f"Enter category ID (default: '{suggested_id}'): ").strip() or suggested_id
-
- # Generate prefix from name
- suggested_prefix = name.lower().split()[0][:6]
- prefix = input(f"Enter ID prefix (default: '{suggested_prefix}'): ").strip() or suggested_prefix
-
- # Get icon
- icon = input("Enter emoji icon (e.g., 🔌): ").strip() or "📦"
-
- # Get description
- print("\nEnter description (can be multiline, enter '---' on a new line to finish):")
- description_lines = []
- while True:
- line = input()
+def get_github_client(
+ token: str | None = None,
+ user_agent: str = _DEFAULT_GITHUB_USER_AGENT,
+ seconds_between_requests: float = _DEFAULT_SECONDS_BETWEEN_REQUESTS,
+) -> Github:
+ """Return a cached PyGithub client with optional pacing."""
+ key = (token, user_agent, seconds_between_requests)
+ if key not in _GITHUB_CLIENTS:
+ auth = Auth.Token(token) if token else None
+ _GITHUB_CLIENTS[key] = Github(
+ auth=auth,
+ user_agent=user_agent,
+ seconds_between_requests=seconds_between_requests,
+ )
+ return _GITHUB_CLIENTS[key]
+
+
+def github_request_json(
+ api_url: str,
+ params: dict[str, object] | None = None,
+ token: str | None = None,
+ user_agent: str = _DEFAULT_GITHUB_USER_AGENT,
+ seconds_between_requests: float = _DEFAULT_SECONDS_BETWEEN_REQUESTS,
+) -> tuple[int, dict[str, object], object | None]:
+ """Request JSON from the GitHub API using PyGithub's requester."""
+ if token is None:
+ token = os.getenv("GITHUB_TOKEN") or None
+ client = get_github_client(
+ token=token,
+ user_agent=user_agent,
```
This function is important because it defines how Awesome Claude Code Tutorial: Curated Claude Code Resource Discovery and Evaluation implements the patterns covered in this chapter.
-### `scripts/categories/add_category.py`
+### `scripts/utils/github_utils.py`
-The `main` function in [`scripts/categories/add_category.py`](https://github.com/hesreallyhim/awesome-claude-code/blob/HEAD/scripts/categories/add_category.py) handles a key part of this chapter's functionality:
+The `github_request_json` function in [`scripts/utils/github_utils.py`](https://github.com/hesreallyhim/awesome-claude-code/blob/HEAD/scripts/utils/github_utils.py) handles a key part of this chapter's functionality:
```py
-def main():
- """Main entry point for the script."""
- parser = argparse.ArgumentParser(
- description="Add a new category to awesome-claude-code",
- formatter_class=argparse.RawDescriptionHelpFormatter,
- epilog="""
-Examples:
- %(prog)s # Interactive mode
- %(prog)s --name "My Category" --prefix "mycat" --icon "🎯"
- %(prog)s --name "Tools" --order 5 --subcategories "CLI,GUI,Web"
- """,
- )
-
- parser.add_argument("--name", help="Display name for the category")
- parser.add_argument("--id", help="Category ID (defaults to slugified name)")
- parser.add_argument("--prefix", help="ID prefix for resources")
- parser.add_argument("--icon", default="📦", help="Emoji icon for the category")
- parser.add_argument(
- "--description", help="Description of the category (will be prefixed with '>')"
- )
- parser.add_argument("--order", type=int, help="Order position in the list")
- parser.add_argument(
- "--subcategories",
- help="Comma-separated list of subcategories (default: General)",
+def github_request_json(
+ api_url: str,
+ params: dict[str, object] | None = None,
+ token: str | None = None,
+ user_agent: str = _DEFAULT_GITHUB_USER_AGENT,
+ seconds_between_requests: float = _DEFAULT_SECONDS_BETWEEN_REQUESTS,
+) -> tuple[int, dict[str, object], object | None]:
+ """Request JSON from the GitHub API using PyGithub's requester."""
+ if token is None:
+ token = os.getenv("GITHUB_TOKEN") or None
+ client = get_github_client(
+ token=token,
+ user_agent=user_agent,
+ seconds_between_requests=seconds_between_requests,
)
- parser.add_argument(
- "--no-commit", action="store_true", help="Don't create a commit after adding"
+ status, headers, body = client.requester.requestJson(
+ "GET",
+ api_url,
+ parameters=params,
+ headers={"Accept": "application/vnd.github+json"},
)
+ if not body:
+ return status, headers, None
+ try:
+ data = json.loads(body)
+ except json.JSONDecodeError:
+ data = body
+ return status, headers, data
+
- args = parser.parse_args()
```
This function is important because it defines how Awesome Claude Code Tutorial: Curated Claude Code Resource Discovery and Evaluation implements the patterns covered in this chapter.
@@ -224,10 +213,10 @@ This function is important because it defines how Awesome Claude Code Tutorial:
```mermaid
flowchart TD
A[main]
- B[CategoryAdder]
- C[interactive_mode]
- D[main]
- E[sanitize_filename]
+ B[import]
+ C[get_github_client]
+ D[github_request_json]
+ E[parse_github_url]
A --> B
B --> C
C --> D
diff --git a/tutorials/awesome-claude-code-tutorial/07-link-health-validation-and-drift-control.md b/tutorials/awesome-claude-code-tutorial/07-link-health-validation-and-drift-control.md
index cdd752b6..82e96af8 100644
--- a/tutorials/awesome-claude-code-tutorial/07-link-health-validation-and-drift-control.md
+++ b/tutorials/awesome-claude-code-tutorial/07-link-health-validation-and-drift-control.md
@@ -41,140 +41,95 @@ You now have the operational health model for keeping curated docs accurate over
Next: [Chapter 8: Contribution Workflow and Governance](08-contribution-workflow-and-governance.md)
-## Depth Expansion Playbook
-
## Source Code Walkthrough
-### `scripts/utils/github_utils.py`
+### `scripts/maintenance/update_github_release_data.py`
-The `parse_github_resource_url` function in [`scripts/utils/github_utils.py`](https://github.com/hesreallyhim/awesome-claude-code/blob/HEAD/scripts/utils/github_utils.py) handles a key part of this chapter's functionality:
+The `fetch_latest_release` function in [`scripts/maintenance/update_github_release_data.py`](https://github.com/hesreallyhim/awesome-claude-code/blob/HEAD/scripts/maintenance/update_github_release_data.py) handles a key part of this chapter's functionality:
```py
-def parse_github_resource_url(url: str) -> dict[str, str] | None:
- """
- Parse GitHub URL and extract owner, repo, branch, and path.
- Returns a dict with keys: owner, repo, branch, path, type.
- """
- patterns = {
- # File in repository
- "file": r"https://github\.com/([^/]+)/([^/]+)/(?:blob|raw)/([^/]+)/(.+)",
- # Directory in repository
- "dir": r"https://github\.com/([^/]+)/([^/]+)/tree/([^/]+)/(.+)",
- # Repository root
- "repo": r"https://github\.com/([^/]+)/([^/]+)/?$",
- # Gist
- "gist": r"https://gist\.github\.com/([^/]+)/([^/#]+)",
- }
-
- for url_type, pattern in patterns.items():
- match = re.match(pattern, url)
- if match:
- if url_type == "gist":
- return {
- "type": "gist",
- "owner": match.group(1),
- "gist_id": match.group(2),
- }
- elif url_type == "repo":
- return {
- "type": "repo",
- "owner": match.group(1),
- "repo": _normalize_repo_name(match.group(2)),
-```
-
-This function is important because it defines how Awesome Claude Code Tutorial: Curated Claude Code Resource Discovery and Evaluation implements the patterns covered in this chapter.
-
-### `scripts/maintenance/update_github_release_data.py`
-
-The `format_commit_date` function in [`scripts/maintenance/update_github_release_data.py`](https://github.com/hesreallyhim/awesome-claude-code/blob/HEAD/scripts/maintenance/update_github_release_data.py) handles a key part of this chapter's functionality:
-
-```py
+def fetch_latest_release(owner: str, repo: str) -> tuple[str | None, str | None, str]:
+ api_url = f"https://api.github.com/repos/{owner}/{repo}/releases/latest"
+ response = github_get(api_url)
+ if response.status_code == 200:
+ data = response.json()
+ published_at = data.get("published_at") or data.get("created_at")
+ return format_commit_date(published_at), data.get("tag_name"), "ok"
+ if response.status_code == 404:
+ return None, None, "no_release"
+ return None, None, f"http_{response.status_code}"
-def format_commit_date(commit_date: str | None) -> str | None:
- if not commit_date:
- return None
- try:
- dt = datetime.fromisoformat(commit_date.replace("Z", "+00:00"))
- return dt.strftime("%Y-%m-%d:%H-%M-%S")
- except ValueError:
- return None
+def update_release_data(csv_path: str, max_rows: int | None = None, dry_run: bool = False) -> None:
+ with open(csv_path, encoding="utf-8") as f:
+ reader = csv.DictReader(f)
+ rows = list(reader)
+ fieldnames = list(reader.fieldnames or [])
-def parse_github_repo(url: str | None) -> tuple[str | None, str | None]:
- if not url or not isinstance(url, str):
- return None, None
- match = re.match(r"https?://github\.com/([^/]+)/([^/]+)", url.strip())
- if not match:
- return None, None
- owner, repo = match.groups()
- repo = repo.split("?", 1)[0].split("#", 1)[0]
- repo = repo.removesuffix(".git")
- return owner, repo
+ required_columns = ["Last Modified", "Latest Release", "Release Version", "Release Source"]
+ for column in required_columns:
+ if column not in fieldnames:
+ fieldnames.append(column)
+ processed = 0
+ skipped = 0
+ updated = 0
+ errors = 0
-def github_get(url: str, params: dict | None = None) -> requests.Response:
- response = requests.get(url, headers=HEADERS, params=params, timeout=10)
- if response.status_code == 403 and response.headers.get("X-RateLimit-Remaining") == "0":
- reset_time = int(response.headers.get("X-RateLimit-Reset", 0))
- sleep_time = max(reset_time - int(time.time()), 0) + 1
- logger.warning("GitHub rate limit hit. Sleeping for %s seconds.", sleep_time)
- time.sleep(sleep_time)
- response = requests.get(url, headers=HEADERS, params=params, timeout=10)
+ for _, row in enumerate(rows):
```
This function is important because it defines how Awesome Claude Code Tutorial: Curated Claude Code Resource Discovery and Evaluation implements the patterns covered in this chapter.
### `scripts/maintenance/update_github_release_data.py`
-The `parse_github_repo` function in [`scripts/maintenance/update_github_release_data.py`](https://github.com/hesreallyhim/awesome-claude-code/blob/HEAD/scripts/maintenance/update_github_release_data.py) handles a key part of this chapter's functionality:
+The `update_release_data` function in [`scripts/maintenance/update_github_release_data.py`](https://github.com/hesreallyhim/awesome-claude-code/blob/HEAD/scripts/maintenance/update_github_release_data.py) handles a key part of this chapter's functionality:
```py
-def parse_github_repo(url: str | None) -> tuple[str | None, str | None]:
- if not url or not isinstance(url, str):
- return None, None
- match = re.match(r"https?://github\.com/([^/]+)/([^/]+)", url.strip())
- if not match:
- return None, None
- owner, repo = match.groups()
- repo = repo.split("?", 1)[0].split("#", 1)[0]
- repo = repo.removesuffix(".git")
- return owner, repo
-
-
-def github_get(url: str, params: dict | None = None) -> requests.Response:
- response = requests.get(url, headers=HEADERS, params=params, timeout=10)
- if response.status_code == 403 and response.headers.get("X-RateLimit-Remaining") == "0":
- reset_time = int(response.headers.get("X-RateLimit-Reset", 0))
- sleep_time = max(reset_time - int(time.time()), 0) + 1
- logger.warning("GitHub rate limit hit. Sleeping for %s seconds.", sleep_time)
- time.sleep(sleep_time)
- response = requests.get(url, headers=HEADERS, params=params, timeout=10)
- return response
-
-
-def fetch_last_commit_date(owner: str, repo: str) -> tuple[str | None, str]:
- api_url = f"https://api.github.com/repos/{owner}/{repo}/commits"
- response = github_get(api_url, params={"per_page": 1})
-
- if response.status_code == 200:
- data = response.json()
- if isinstance(data, list) and data:
+def update_release_data(csv_path: str, max_rows: int | None = None, dry_run: bool = False) -> None:
+ with open(csv_path, encoding="utf-8") as f:
+ reader = csv.DictReader(f)
+ rows = list(reader)
+ fieldnames = list(reader.fieldnames or [])
+
+ required_columns = ["Last Modified", "Latest Release", "Release Version", "Release Source"]
+ for column in required_columns:
+ if column not in fieldnames:
+ fieldnames.append(column)
+
+ processed = 0
+ skipped = 0
+ updated = 0
+ errors = 0
+
+ for _, row in enumerate(rows):
+ if max_rows and processed >= max_rows:
+ logger.info("Reached max limit (%s). Stopping.", max_rows)
+ break
+
+ if row.get("Active", "").strip().upper() != "TRUE":
+ skipped += 1
+ continue
+
+ primary_link = (row.get("Primary Link") or "").strip()
+ owner, repo = parse_github_repo(primary_link)
+ if not owner or not repo:
+ skipped += 1
+ continue
```
This function is important because it defines how Awesome Claude Code Tutorial: Curated Claude Code Resource Discovery and Evaluation implements the patterns covered in this chapter.
### `scripts/maintenance/update_github_release_data.py`
-The `github_get` function in [`scripts/maintenance/update_github_release_data.py`](https://github.com/hesreallyhim/awesome-claude-code/blob/HEAD/scripts/maintenance/update_github_release_data.py) handles a key part of this chapter's functionality:
+The `main` function in [`scripts/maintenance/update_github_release_data.py`](https://github.com/hesreallyhim/awesome-claude-code/blob/HEAD/scripts/maintenance/update_github_release_data.py) handles a key part of this chapter's functionality:
```py
-
-
def github_get(url: str, params: dict | None = None) -> requests.Response:
response = requests.get(url, headers=HEADERS, params=params, timeout=10)
if response.status_code == 403 and response.headers.get("X-RateLimit-Remaining") == "0":
@@ -205,6 +160,49 @@ def fetch_last_commit_date(owner: str, repo: str) -> tuple[str | None, str]:
if response.status_code == 404:
return None, "not_found"
return None, f"http_{response.status_code}"
+
+
+```
+
+This function is important because it defines how Awesome Claude Code Tutorial: Curated Claude Code Resource Discovery and Evaluation implements the patterns covered in this chapter.
+
+### `scripts/readme/generate_readme.py`
+
+The `build_root_generator` function in [`scripts/readme/generate_readme.py`](https://github.com/hesreallyhim/awesome-claude-code/blob/HEAD/scripts/readme/generate_readme.py) handles a key part of this chapter's functionality:
+
+```py
+
+
+def build_root_generator(
+ style_id: str,
+ csv_path: str,
+ template_dir: str,
+ assets_dir: str,
+ repo_root: str,
+) -> ReadmeGenerator:
+ """Return the generator instance for a root style."""
+ style_id = style_id.lower()
+ generator_cls = STYLE_GENERATORS.get(style_id)
+ if generator_cls is None:
+ raise ValueError(f"Unknown root style: {style_id}")
+ if generator_cls is ParameterizedFlatListGenerator:
+ return ParameterizedFlatListGenerator(
+ csv_path,
+ template_dir,
+ assets_dir,
+ repo_root,
+ category_slug="all",
+ sort_type="az",
+ )
+ return generator_cls(csv_path, template_dir, assets_dir, repo_root)
+
+
+def main():
+ """Main entry point - generates all README versions."""
+ repo_root = REPO_ROOT
+
+ csv_path = str(repo_root / "THE_RESOURCES_TABLE.csv")
+ template_dir = str(repo_root / "templates")
```
This function is important because it defines how Awesome Claude Code Tutorial: Curated Claude Code Resource Discovery and Evaluation implements the patterns covered in this chapter.
@@ -214,11 +212,11 @@ This function is important because it defines how Awesome Claude Code Tutorial:
```mermaid
flowchart TD
- A[parse_github_resource_url]
- B[format_commit_date]
- C[parse_github_repo]
- D[github_get]
- E[fetch_last_commit_date]
+ A[fetch_latest_release]
+ B[update_release_data]
+ C[main]
+ D[build_root_generator]
+ E[main]
A --> B
B --> C
C --> D
diff --git a/tutorials/awesome-claude-code-tutorial/08-contribution-workflow-and-governance.md b/tutorials/awesome-claude-code-tutorial/08-contribution-workflow-and-governance.md
index b19d1e02..542ad556 100644
--- a/tutorials/awesome-claude-code-tutorial/08-contribution-workflow-and-governance.md
+++ b/tutorials/awesome-claude-code-tutorial/08-contribution-workflow-and-governance.md
@@ -50,161 +50,168 @@ Next steps:
- trial one skill, one hook, and one slash command with strict validation
- contribute one high-signal recommendation with clear evidence
-## Depth Expansion Playbook
-
## Source Code Walkthrough
-### `scripts/resources/detect_informal_submission.py`
+### `tools/readme_tree/update_readme_tree.py`
-The `set_github_output` function in [`scripts/resources/detect_informal_submission.py`](https://github.com/hesreallyhim/awesome-claude-code/blob/HEAD/scripts/resources/detect_informal_submission.py) handles a key part of this chapter's functionality:
+The `class` class in [`tools/readme_tree/update_readme_tree.py`](https://github.com/hesreallyhim/awesome-claude-code/blob/HEAD/tools/readme_tree/update_readme_tree.py) handles a key part of this chapter's functionality:
```py
+import subprocess
+import sys
+from dataclasses import dataclass, field
+from pathlib import Path
+
+import yaml
+
+@dataclass
+class Node:
+ """Tree node representing a file or directory."""
-def set_github_output(name: str, value: str) -> None:
- """Set a GitHub Actions output variable safely."""
- # Sanitize both name and value to prevent injection attacks
- safe_name = sanitize_output(name)
- safe_value = sanitize_output(value)
+ name: str
+ is_dir: bool
+ children: dict[str, Node] = field(default_factory=dict)
- github_output = os.environ.get("GITHUB_OUTPUT")
- if github_output:
- with open(github_output, "a") as f:
- f.write(f"{safe_name}={safe_value}\n")
- else:
- # For local testing, just print
- print(f"::set-output name={safe_name}::{safe_value}")
+@dataclass(frozen=True)
+class IgnoreRule:
+ """Parsed ignore rule from config patterns."""
-def main() -> None:
- """Entry point for GitHub Actions."""
- title = os.environ.get("ISSUE_TITLE", "")
- body = os.environ.get("ISSUE_BODY", "")
+ pattern: str
+ negated: bool
+ dir_only: bool
+ anchored: bool
- result = calculate_confidence(title, body)
- # Output results for GitHub Actions
- set_github_output("action", result.action.value)
- set_github_output("confidence", f"{result.confidence:.0%}")
- set_github_output("matched_signals", ", ".join(result.matched_signals))
+@dataclass
+class GitIgnoreChecker:
+ """Check paths against gitignore using `git check-ignore`."""
- # Also print for logging
- print(f"Confidence: {result.confidence:.2%}")
- print(f"Action: {result.action.value}")
+ repo_root: Path
```
-This function is important because it defines how Awesome Claude Code Tutorial: Curated Claude Code Resource Discovery and Evaluation implements the patterns covered in this chapter.
+This class is important because it defines how Awesome Claude Code Tutorial: Curated Claude Code Resource Discovery and Evaluation implements the patterns covered in this chapter.
-### `scripts/resources/detect_informal_submission.py`
+### `tools/readme_tree/update_readme_tree.py`
-The `main` function in [`scripts/resources/detect_informal_submission.py`](https://github.com/hesreallyhim/awesome-claude-code/blob/HEAD/scripts/resources/detect_informal_submission.py) handles a key part of this chapter's functionality:
+The `IgnoreRule` class in [`tools/readme_tree/update_readme_tree.py`](https://github.com/hesreallyhim/awesome-claude-code/blob/HEAD/tools/readme_tree/update_readme_tree.py) handles a key part of this chapter's functionality:
```py
+@dataclass(frozen=True)
+class IgnoreRule:
+ """Parsed ignore rule from config patterns."""
-def main() -> None:
- """Entry point for GitHub Actions."""
- title = os.environ.get("ISSUE_TITLE", "")
- body = os.environ.get("ISSUE_BODY", "")
+ pattern: str
+ negated: bool
+ dir_only: bool
+ anchored: bool
- result = calculate_confidence(title, body)
- # Output results for GitHub Actions
- set_github_output("action", result.action.value)
- set_github_output("confidence", f"{result.confidence:.0%}")
- set_github_output("matched_signals", ", ".join(result.matched_signals))
+@dataclass
+class GitIgnoreChecker:
+ """Check paths against gitignore using `git check-ignore`."""
+
+ repo_root: Path
+ enabled: bool = True
+ _cache: dict[str, bool] = field(default_factory=dict)
+
+ def __post_init__(self) -> None:
+ """Disable checking when git is unavailable."""
+ if not self._git_available():
+ self.enabled = False
+
+ def _git_available(self) -> bool:
+ """Return True if git is available and repo_root is a git work tree."""
+ try:
+ result = subprocess.run(
+ [
+ "git",
+ "-C",
+ str(self.repo_root),
+```
- # Also print for logging
- print(f"Confidence: {result.confidence:.2%}")
- print(f"Action: {result.action.value}")
- print(f"Matched signals: {result.matched_signals}")
+This class is important because it defines how Awesome Claude Code Tutorial: Curated Claude Code Resource Discovery and Evaluation implements the patterns covered in this chapter.
+### `tools/readme_tree/update_readme_tree.py`
-if __name__ == "__main__":
- main()
+The `class` class in [`tools/readme_tree/update_readme_tree.py`](https://github.com/hesreallyhim/awesome-claude-code/blob/HEAD/tools/readme_tree/update_readme_tree.py) handles a key part of this chapter's functionality:
-```
-
-This function is important because it defines how Awesome Claude Code Tutorial: Curated Claude Code Resource Discovery and Evaluation implements the patterns covered in this chapter.
+```py
+import subprocess
+import sys
+from dataclasses import dataclass, field
+from pathlib import Path
-### `scripts/resources/detect_informal_submission.py`
+import yaml
-The `import` interface in [`scripts/resources/detect_informal_submission.py`](https://github.com/hesreallyhim/awesome-claude-code/blob/HEAD/scripts/resources/detect_informal_submission.py) handles a key part of this chapter's functionality:
-```py
-"""
+@dataclass
+class Node:
+ """Tree node representing a file or directory."""
-from __future__ import annotations
+ name: str
+ is_dir: bool
+ children: dict[str, Node] = field(default_factory=dict)
-import os
-import re
-from dataclasses import dataclass
-from enum import Enum
+@dataclass(frozen=True)
+class IgnoreRule:
+ """Parsed ignore rule from config patterns."""
-class Action(Enum):
- NONE = "none"
- WARN = "warn" # Medium confidence: warn but don't close
- CLOSE = "close" # High confidence: warn and close
+ pattern: str
+ negated: bool
+ dir_only: bool
+ anchored: bool
@dataclass
-class DetectionResult:
- confidence: float
- action: Action
- matched_signals: list[str]
-
-
-# Template field labels - VERY strong indicator (from the issue form)
-# Matching 3+ of these is almost certainly a copy-paste from template without using form
-TEMPLATE_FIELD_LABELS = [
- "display name:",
- "category:",
- "sub-category:",
- "primary link:",
- "author name:",
- "author link:",
+class GitIgnoreChecker:
+ """Check paths against gitignore using `git check-ignore`."""
+
+ repo_root: Path
```
-This interface is important because it defines how Awesome Claude Code Tutorial: Curated Claude Code Resource Discovery and Evaluation implements the patterns covered in this chapter.
+This class is important because it defines how Awesome Claude Code Tutorial: Curated Claude Code Resource Discovery and Evaluation implements the patterns covered in this chapter.
-### `scripts/ticker/fetch_repo_ticker_data.py`
+### `tools/readme_tree/update_readme_tree.py`
-The `load_previous_data` function in [`scripts/ticker/fetch_repo_ticker_data.py`](https://github.com/hesreallyhim/awesome-claude-code/blob/HEAD/scripts/ticker/fetch_repo_ticker_data.py) handles a key part of this chapter's functionality:
+The `find_repo_root` function in [`tools/readme_tree/update_readme_tree.py`](https://github.com/hesreallyhim/awesome-claude-code/blob/HEAD/tools/readme_tree/update_readme_tree.py) handles a key part of this chapter's functionality:
```py
-def load_previous_data(csv_path: Path) -> dict[str, dict[str, int]]:
- """
- Load previous repository data from CSV file.
+def find_repo_root(start: Path) -> Path:
+ """Locate the repo root.
+
+ Prefer git to identify the VCS root; fall back to walking upward for pyproject.toml.
Args:
- csv_path: Path to previous CSV file
+ start: Path inside the repo.
Returns:
- Dictionary mapping full_name to metrics dict
- """
- if not csv_path.exists():
- return {}
-
- previous = {}
- with csv_path.open("r", encoding="utf-8") as f:
- reader = csv.DictReader(f)
- for row in reader:
- previous[row["full_name"]] = {
- "stars": int(row["stars"]),
- "watchers": int(row["watchers"]),
- "forks": int(row["forks"]),
- }
-
- print(f"✓ Loaded {len(previous)} repositories from previous data")
- return previous
-
-
-def fetch_repos(token: str) -> list[dict[str, Any]]:
+ The repo root path.
"""
- Fetch repositories from GitHub Search API.
+ p = start.resolve()
+ # Prefer git root if available.
+ try:
+ result = subprocess.run(
+ ["git", "-C", str(p), "rev-parse", "--show-toplevel"],
+ check=False,
+ capture_output=True,
+ text=True,
+ )
+ if result.returncode == 0:
+ git_root = result.stdout.strip()
+ if git_root:
+ return Path(git_root)
+ except FileNotFoundError:
+ pass
+
+ # Fallback: walk upward until pyproject.toml exists.
+ while not (p / "pyproject.toml").exists():
+ if p.parent == p:
```
This function is important because it defines how Awesome Claude Code Tutorial: Curated Claude Code Resource Discovery and Evaluation implements the patterns covered in this chapter.
@@ -214,11 +221,11 @@ This function is important because it defines how Awesome Claude Code Tutorial:
```mermaid
flowchart TD
- A[set_github_output]
- B[main]
- C[import]
- D[load_previous_data]
- E[fetch_repos]
+ A[class]
+ B[IgnoreRule]
+ C[class]
+ D[find_repo_root]
+ E[normalize_key]
A --> B
B --> C
C --> D
diff --git a/tutorials/awesome-claude-skills-tutorial/01-getting-started.md b/tutorials/awesome-claude-skills-tutorial/01-getting-started.md
index 25f6ef46..fa423481 100644
--- a/tutorials/awesome-claude-skills-tutorial/01-getting-started.md
+++ b/tutorials/awesome-claude-skills-tutorial/01-getting-started.md
@@ -38,170 +38,168 @@ You now have a simple onboarding loop for skill discovery and initial validation
Next: [Chapter 2: Catalog Taxonomy and Navigation](02-catalog-taxonomy-and-navigation.md)
-## Depth Expansion Playbook
-
## Source Code Walkthrough
-### `slack-gif-creator/core/visual_effects.py`
+### `slack-gif-creator/templates/explode.py`
-The `Particle` class in [`slack-gif-creator/core/visual_effects.py`](https://github.com/ComposioHQ/awesome-claude-skills/blob/HEAD/slack-gif-creator/core/visual_effects.py) handles a key part of this chapter's functionality:
+The `create_explode_animation` function in [`slack-gif-creator/templates/explode.py`](https://github.com/ComposioHQ/awesome-claude-skills/blob/HEAD/slack-gif-creator/templates/explode.py) handles a key part of this chapter's functionality:
```py
-#!/usr/bin/env python3
-"""
-Visual Effects - Particles, motion blur, impacts, and other effects for GIFs.
-
-This module provides high-impact visual effects that make animations feel
-professional and dynamic while keeping file sizes reasonable.
-"""
-
-from PIL import Image, ImageDraw, ImageFilter
-import numpy as np
-import math
-import random
-from typing import Optional
-
-
-class Particle:
- """A single particle in a particle system."""
-
- def __init__(self, x: float, y: float, vx: float, vy: float,
- lifetime: float, color: tuple[int, int, int],
- size: int = 3, shape: str = 'circle'):
- """
- Initialize a particle.
-
- Args:
- x, y: Starting position
- vx, vy: Velocity
- lifetime: How long particle lives (in frames)
- color: RGB color
- size: Particle size in pixels
- shape: 'circle', 'square', or 'star'
- """
-```
-
-This class is important because it defines how Awesome Claude Skills Tutorial: High-Signal Skill Discovery and Reuse for Claude Workflows implements the patterns covered in this chapter.
-
-### `slack-gif-creator/core/visual_effects.py`
-The `ParticleSystem` class in [`slack-gif-creator/core/visual_effects.py`](https://github.com/ComposioHQ/awesome-claude-skills/blob/HEAD/slack-gif-creator/core/visual_effects.py) handles a key part of this chapter's functionality:
-```py
+def create_explode_animation(
+ object_type: str = 'emoji',
+ object_data: dict | None = None,
+ num_frames: int = 30,
+ explode_type: str = 'burst', # 'burst', 'shatter', 'dissolve', 'implode'
+ num_pieces: int = 20,
+ explosion_speed: float = 5.0,
+ center_pos: tuple[int, int] = (240, 240),
+ frame_width: int = 480,
+ frame_height: int = 480,
+ bg_color: tuple[int, int, int] = (255, 255, 255)
+) -> list[Image.Image]:
+ """
+ Create explosion animation.
+ Args:
+ object_type: 'emoji', 'circle', 'text'
+ object_data: Object configuration
+ num_frames: Number of frames
+ explode_type: Type of explosion
+ num_pieces: Number of pieces/particles
+ explosion_speed: Speed of explosion
+ center_pos: Center position
+ frame_width: Frame width
+ frame_height: Frame height
+ bg_color: Background color
-class ParticleSystem:
- """Manages a collection of particles."""
-
- def __init__(self):
- """Initialize particle system."""
- self.particles: list[Particle] = []
-
- def emit(self, x: int, y: int, count: int = 10,
- spread: float = 2.0, speed: float = 5.0,
- color: tuple[int, int, int] = (255, 200, 0),
- lifetime: float = 20.0, size: int = 3, shape: str = 'circle'):
- """
- Emit a burst of particles.
-
- Args:
- x, y: Emission position
- count: Number of particles to emit
- spread: Angle spread (radians)
- speed: Initial speed
- color: Particle color
- lifetime: Particle lifetime in frames
- size: Particle size
- shape: Particle shape
- """
- for _ in range(count):
- # Random angle and speed
- angle = random.uniform(0, 2 * math.pi)
- vel_mag = random.uniform(speed * 0.5, speed * 1.5)
- vx = math.cos(angle) * vel_mag
- vy = math.sin(angle) * vel_mag
+ Returns:
+ List of frames
+ """
```
-This class is important because it defines how Awesome Claude Skills Tutorial: High-Signal Skill Discovery and Reuse for Claude Workflows implements the patterns covered in this chapter.
+This function is important because it defines how Awesome Claude Skills Tutorial: High-Signal Skill Discovery and Reuse for Claude Workflows implements the patterns covered in this chapter.
-### `slack-gif-creator/core/visual_effects.py`
+### `slack-gif-creator/templates/explode.py`
-The `add_motion_blur` function in [`slack-gif-creator/core/visual_effects.py`](https://github.com/ComposioHQ/awesome-claude-skills/blob/HEAD/slack-gif-creator/core/visual_effects.py) handles a key part of this chapter's functionality:
+The `create_particle_burst` function in [`slack-gif-creator/templates/explode.py`](https://github.com/ComposioHQ/awesome-claude-skills/blob/HEAD/slack-gif-creator/templates/explode.py) handles a key part of this chapter's functionality:
```py
-def add_motion_blur(frame: Image.Image, prev_frame: Optional[Image.Image],
- blur_amount: float = 0.5) -> Image.Image:
+def create_particle_burst(
+ num_frames: int = 25,
+ particle_count: int = 30,
+ center_pos: tuple[int, int] = (240, 240),
+ colors: list[tuple[int, int, int]] | None = None,
+ frame_width: int = 480,
+ frame_height: int = 480,
+ bg_color: tuple[int, int, int] = (255, 255, 255)
+) -> list[Image.Image]:
"""
- Add motion blur by blending with previous frame.
+ Create simple particle burst effect.
Args:
- frame: Current frame
- prev_frame: Previous frame (None for first frame)
- blur_amount: Amount of blur (0.0-1.0)
+ num_frames: Number of frames
+ particle_count: Number of particles
+ center_pos: Burst center
+ colors: Particle colors (None for random)
+ frame_width: Frame width
+ frame_height: Frame height
+ bg_color: Background color
Returns:
- Frame with motion blur applied
+ List of frames
"""
- if prev_frame is None:
- return frame
+ particles = ParticleSystem()
+
+ # Emit particles
+ if colors is None:
+ from core.color_palettes import get_palette
+ palette = get_palette('vibrant')
+```
+
+This function is important because it defines how Awesome Claude Skills Tutorial: High-Signal Skill Discovery and Reuse for Claude Workflows implements the patterns covered in this chapter.
+
+### `slack-gif-creator/core/typography.py`
- # Blend current frame with previous frame
- frame_array = np.array(frame, dtype=np.float32)
- prev_array = np.array(prev_frame, dtype=np.float32)
+The `get_font` function in [`slack-gif-creator/core/typography.py`](https://github.com/ComposioHQ/awesome-claude-skills/blob/HEAD/slack-gif-creator/core/typography.py) handles a key part of this chapter's functionality:
- blended = frame_array * (1 - blur_amount) + prev_array * blur_amount
- blended = np.clip(blended, 0, 255).astype(np.uint8)
+```py
- return Image.fromarray(blended)
+def get_font(size: int, bold: bool = False) -> ImageFont.FreeTypeFont:
+ """
+ Get a font with fallback support.
-def create_impact_flash(frame: Image.Image, position: tuple[int, int],
- radius: int = 100, intensity: float = 0.7) -> Image.Image:
+ Args:
+ size: Font size in pixels
+ bold: Use bold variant if available
+
+ Returns:
+ ImageFont object
"""
- Create a bright flash effect at impact point.
+ # Try multiple font paths for cross-platform support
+ font_paths = [
+ # macOS fonts
+ "/System/Library/Fonts/Helvetica.ttc",
+ "/System/Library/Fonts/SF-Pro.ttf",
+ "/Library/Fonts/Arial Bold.ttf" if bold else "/Library/Fonts/Arial.ttf",
+ # Linux fonts
+ "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf" if bold else "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf",
+ # Windows fonts
+ "C:\\Windows\\Fonts\\arialbd.ttf" if bold else "C:\\Windows\\Fonts\\arial.ttf",
+ ]
+
+ for font_path in font_paths:
+ try:
+ return ImageFont.truetype(font_path, size)
+ except:
+ continue
+
+ # Ultimate fallback
```
This function is important because it defines how Awesome Claude Skills Tutorial: High-Signal Skill Discovery and Reuse for Claude Workflows implements the patterns covered in this chapter.
-### `slack-gif-creator/core/visual_effects.py`
+### `slack-gif-creator/core/typography.py`
-The `create_impact_flash` function in [`slack-gif-creator/core/visual_effects.py`](https://github.com/ComposioHQ/awesome-claude-skills/blob/HEAD/slack-gif-creator/core/visual_effects.py) handles a key part of this chapter's functionality:
+The `draw_text_with_outline` function in [`slack-gif-creator/core/typography.py`](https://github.com/ComposioHQ/awesome-claude-skills/blob/HEAD/slack-gif-creator/core/typography.py) handles a key part of this chapter's functionality:
```py
-def create_impact_flash(frame: Image.Image, position: tuple[int, int],
- radius: int = 100, intensity: float = 0.7) -> Image.Image:
+def draw_text_with_outline(
+ frame: Image.Image,
+ text: str,
+ position: tuple[int, int],
+ font_size: int = 40,
+ text_color: tuple[int, int, int] = (255, 255, 255),
+ outline_color: tuple[int, int, int] = (0, 0, 0),
+ outline_width: int = 3,
+ centered: bool = False,
+ bold: bool = True
+) -> Image.Image:
"""
- Create a bright flash effect at impact point.
+ Draw text with outline for maximum readability.
+
+ This is THE most important function for professional-looking text in GIFs.
+ The outline ensures text is readable on any background.
Args:
frame: PIL Image to draw on
- position: Center of flash
- radius: Flash radius
- intensity: Flash intensity (0.0-1.0)
+ text: Text to draw
+ position: (x, y) position
+ font_size: Font size in pixels
+ text_color: RGB color for text fill
+ outline_color: RGB color for outline
+ outline_width: Width of outline in pixels (2-4 recommended)
+ centered: If True, center text at position
+ bold: Use bold font variant
Returns:
Modified frame
- """
- # Create overlay
- overlay = Image.new('RGBA', frame.size, (0, 0, 0, 0))
- draw = ImageDraw.Draw(overlay)
-
- x, y = position
-
- # Draw concentric circles with decreasing opacity
- num_circles = 5
- for i in range(num_circles):
- alpha = int(255 * intensity * (1 - i / num_circles))
- r = radius * (1 - i / num_circles)
- color = (255, 255, 240, alpha) # Warm white
-
- bbox = [x - r, y - r, x + r, y + r]
- draw.ellipse(bbox, fill=color)
-
```
This function is important because it defines how Awesome Claude Skills Tutorial: High-Signal Skill Discovery and Reuse for Claude Workflows implements the patterns covered in this chapter.
@@ -211,11 +209,11 @@ This function is important because it defines how Awesome Claude Skills Tutorial
```mermaid
flowchart TD
- A[Particle]
- B[ParticleSystem]
- C[add_motion_blur]
- D[create_impact_flash]
- E[create_shockwave_rings]
+ A[create_explode_animation]
+ B[create_particle_burst]
+ C[get_font]
+ D[draw_text_with_outline]
+ E[draw_text_with_shadow]
A --> B
B --> C
C --> D
diff --git a/tutorials/awesome-claude-skills-tutorial/02-catalog-taxonomy-and-navigation.md b/tutorials/awesome-claude-skills-tutorial/02-catalog-taxonomy-and-navigation.md
index 4c2d3c58..4fd283e0 100644
--- a/tutorials/awesome-claude-skills-tutorial/02-catalog-taxonomy-and-navigation.md
+++ b/tutorials/awesome-claude-skills-tutorial/02-catalog-taxonomy-and-navigation.md
@@ -40,170 +40,168 @@ You now know how to navigate the catalog with less noise and faster relevance.
Next: [Chapter 3: Installation Paths: Claude.ai, Claude Code, API](03-installation-paths-claude-ai-claude-code-api.md)
-## Depth Expansion Playbook
-
## Source Code Walkthrough
-### `skill-creator/scripts/init_skill.py`
+### `slack-gif-creator/core/visual_effects.py`
-The `main` function in [`skill-creator/scripts/init_skill.py`](https://github.com/ComposioHQ/awesome-claude-skills/blob/HEAD/skill-creator/scripts/init_skill.py) handles a key part of this chapter's functionality:
+The `ParticleSystem` class in [`slack-gif-creator/core/visual_effects.py`](https://github.com/ComposioHQ/awesome-claude-skills/blob/HEAD/slack-gif-creator/core/visual_effects.py) handles a key part of this chapter's functionality:
```py
-Delete this entire "Structuring This Skill" section when done - it's just guidance.]
-
-## [TODO: Replace with the first main section based on chosen structure]
-
-[TODO: Add content here. See examples in existing skills:
-- Code samples for technical skills
-- Decision trees for complex workflows
-- Concrete examples with realistic user requests
-- References to scripts/templates/references as needed]
-
-## Resources
-
-This skill includes example resource directories that demonstrate how to organize different types of bundled resources:
-
-### scripts/
-Executable code (Python/Bash/etc.) that can be run directly to perform specific operations.
-
-**Examples from other skills:**
-- PDF skill: `fill_fillable_fields.py`, `extract_form_field_info.py` - utilities for PDF manipulation
-- DOCX skill: `document.py`, `utilities.py` - Python modules for document processing
-**Appropriate for:** Python scripts, shell scripts, or any executable code that performs automation, data processing, or specific operations.
-**Note:** Scripts may be executed without loading into context, but can still be read by Claude for patching or environment adjustments.
-
-### references/
-Documentation and reference material intended to be loaded into context to inform Claude's process and thinking.
-
-**Examples from other skills:**
-- Product management: `communication.md`, `context_building.md` - detailed workflow guides
-- BigQuery: API reference documentation and query examples
-- Finance: Schema documentation, company policies
+class ParticleSystem:
+ """Manages a collection of particles."""
+
+ def __init__(self):
+ """Initialize particle system."""
+ self.particles: list[Particle] = []
+
+ def emit(self, x: int, y: int, count: int = 10,
+ spread: float = 2.0, speed: float = 5.0,
+ color: tuple[int, int, int] = (255, 200, 0),
+ lifetime: float = 20.0, size: int = 3, shape: str = 'circle'):
+ """
+ Emit a burst of particles.
+
+ Args:
+ x, y: Emission position
+ count: Number of particles to emit
+ spread: Angle spread (radians)
+ speed: Initial speed
+ color: Particle color
+ lifetime: Particle lifetime in frames
+ size: Particle size
+ shape: Particle shape
+ """
+ for _ in range(count):
+ # Random angle and speed
+ angle = random.uniform(0, 2 * math.pi)
+ vel_mag = random.uniform(speed * 0.5, speed * 1.5)
+ vx = math.cos(angle) * vel_mag
+ vy = math.sin(angle) * vel_mag
```
-This function is important because it defines how Awesome Claude Skills Tutorial: High-Signal Skill Discovery and Reuse for Claude Workflows implements the patterns covered in this chapter.
+This class is important because it defines how Awesome Claude Skills Tutorial: High-Signal Skill Discovery and Reuse for Claude Workflows implements the patterns covered in this chapter.
-### `slack-gif-creator/templates/morph.py`
+### `slack-gif-creator/core/visual_effects.py`
-The `create_morph_animation` function in [`slack-gif-creator/templates/morph.py`](https://github.com/ComposioHQ/awesome-claude-skills/blob/HEAD/slack-gif-creator/templates/morph.py) handles a key part of this chapter's functionality:
+The `add_motion_blur` function in [`slack-gif-creator/core/visual_effects.py`](https://github.com/ComposioHQ/awesome-claude-skills/blob/HEAD/slack-gif-creator/core/visual_effects.py) handles a key part of this chapter's functionality:
```py
-def create_morph_animation(
- object1_data: dict,
- object2_data: dict,
- num_frames: int = 30,
- morph_type: str = 'crossfade', # 'crossfade', 'scale', 'spin_morph'
- easing: str = 'ease_in_out',
- object_type: str = 'emoji',
- center_pos: tuple[int, int] = (240, 240),
- frame_width: int = 480,
- frame_height: int = 480,
- bg_color: tuple[int, int, int] = (255, 255, 255)
-) -> list[Image.Image]:
+def add_motion_blur(frame: Image.Image, prev_frame: Optional[Image.Image],
+ blur_amount: float = 0.5) -> Image.Image:
"""
- Create morphing animation between two objects.
+ Add motion blur by blending with previous frame.
Args:
- object1_data: First object configuration
- object2_data: Second object configuration
- num_frames: Number of frames
- morph_type: Type of morph effect
- easing: Easing function
- object_type: Type of objects
- center_pos: Center position
- frame_width: Frame width
- frame_height: Frame height
- bg_color: Background color
+ frame: Current frame
+ prev_frame: Previous frame (None for first frame)
+ blur_amount: Amount of blur (0.0-1.0)
Returns:
- List of frames
+ Frame with motion blur applied
"""
+ if prev_frame is None:
+ return frame
+
+ # Blend current frame with previous frame
+ frame_array = np.array(frame, dtype=np.float32)
+ prev_array = np.array(prev_frame, dtype=np.float32)
+
+ blended = frame_array * (1 - blur_amount) + prev_array * blur_amount
+ blended = np.clip(blended, 0, 255).astype(np.uint8)
+
+ return Image.fromarray(blended)
+
+
+def create_impact_flash(frame: Image.Image, position: tuple[int, int],
+ radius: int = 100, intensity: float = 0.7) -> Image.Image:
+ """
+ Create a bright flash effect at impact point.
```
This function is important because it defines how Awesome Claude Skills Tutorial: High-Signal Skill Discovery and Reuse for Claude Workflows implements the patterns covered in this chapter.
-### `slack-gif-creator/templates/morph.py`
+### `slack-gif-creator/core/visual_effects.py`
-The `create_reaction_morph` function in [`slack-gif-creator/templates/morph.py`](https://github.com/ComposioHQ/awesome-claude-skills/blob/HEAD/slack-gif-creator/templates/morph.py) handles a key part of this chapter's functionality:
+The `create_impact_flash` function in [`slack-gif-creator/core/visual_effects.py`](https://github.com/ComposioHQ/awesome-claude-skills/blob/HEAD/slack-gif-creator/core/visual_effects.py) handles a key part of this chapter's functionality:
```py
-def create_reaction_morph(
- emoji_start: str,
- emoji_end: str,
- num_frames: int = 20,
- frame_size: int = 128
-) -> list[Image.Image]:
+def create_impact_flash(frame: Image.Image, position: tuple[int, int],
+ radius: int = 100, intensity: float = 0.7) -> Image.Image:
"""
- Create quick emoji reaction morph (for emoji GIFs).
+ Create a bright flash effect at impact point.
Args:
- emoji_start: Starting emoji
- emoji_end: Ending emoji
- num_frames: Number of frames
- frame_size: Frame size (square)
+ frame: PIL Image to draw on
+ position: Center of flash
+ radius: Flash radius
+ intensity: Flash intensity (0.0-1.0)
Returns:
- List of frames
+ Modified frame
"""
- return create_morph_animation(
- object1_data={'emoji': emoji_start, 'size': 80},
- object2_data={'emoji': emoji_end, 'size': 80},
- num_frames=num_frames,
- morph_type='crossfade',
- easing='ease_in_out',
- object_type='emoji',
- center_pos=(frame_size // 2, frame_size // 2),
- frame_width=frame_size,
- frame_height=frame_size,
- bg_color=(255, 255, 255)
- )
+ # Create overlay
+ overlay = Image.new('RGBA', frame.size, (0, 0, 0, 0))
+ draw = ImageDraw.Draw(overlay)
+
+ x, y = position
+
+ # Draw concentric circles with decreasing opacity
+ num_circles = 5
+ for i in range(num_circles):
+ alpha = int(255 * intensity * (1 - i / num_circles))
+ r = radius * (1 - i / num_circles)
+ color = (255, 255, 240, alpha) # Warm white
+
+ bbox = [x - r, y - r, x + r, y + r]
+ draw.ellipse(bbox, fill=color)
+
```
This function is important because it defines how Awesome Claude Skills Tutorial: High-Signal Skill Discovery and Reuse for Claude Workflows implements the patterns covered in this chapter.
-### `slack-gif-creator/templates/morph.py`
+### `slack-gif-creator/core/visual_effects.py`
-The `create_shape_morph` function in [`slack-gif-creator/templates/morph.py`](https://github.com/ComposioHQ/awesome-claude-skills/blob/HEAD/slack-gif-creator/templates/morph.py) handles a key part of this chapter's functionality:
+The `create_shockwave_rings` function in [`slack-gif-creator/core/visual_effects.py`](https://github.com/ComposioHQ/awesome-claude-skills/blob/HEAD/slack-gif-creator/core/visual_effects.py) handles a key part of this chapter's functionality:
```py
-def create_shape_morph(
- shapes: list[dict],
- num_frames: int = 60,
- frames_per_shape: int = 20,
- frame_width: int = 480,
- frame_height: int = 480,
- bg_color: tuple[int, int, int] = (255, 255, 255)
-) -> list[Image.Image]:
+def create_shockwave_rings(frame: Image.Image, position: tuple[int, int],
+ radii: list[int], color: tuple[int, int, int] = (255, 200, 0),
+ width: int = 3) -> Image.Image:
"""
- Morph through a sequence of shapes.
+ Create expanding ring effects.
Args:
- shapes: List of shape dicts with 'radius' and 'color'
- num_frames: Total number of frames
- frames_per_shape: Frames to spend on each morph
- frame_width: Frame width
- frame_height: Frame height
- bg_color: Background color
+ frame: PIL Image to draw on
+ position: Center of rings
+ radii: List of ring radii
+ color: Ring color
+ width: Ring width
Returns:
- List of frames
+ Modified frame
+ """
+ draw = ImageDraw.Draw(frame)
+ x, y = position
+
+ for radius in radii:
+ bbox = [x - radius, y - radius, x + radius, y + radius]
+ draw.ellipse(bbox, outline=color, width=width)
+
+ return frame
+
+
+def create_explosion_effect(frame: Image.Image, position: tuple[int, int],
+ radius: int, progress: float,
+ color: tuple[int, int, int] = (255, 150, 0)) -> Image.Image:
"""
- frames = []
- center = (frame_width // 2, frame_height // 2)
-
- for i in range(num_frames):
- # Determine which shapes we're morphing between
- cycle_progress = (i % (frames_per_shape * len(shapes))) / frames_per_shape
- shape_idx = int(cycle_progress) % len(shapes)
- next_shape_idx = (shape_idx + 1) % len(shapes)
```
This function is important because it defines how Awesome Claude Skills Tutorial: High-Signal Skill Discovery and Reuse for Claude Workflows implements the patterns covered in this chapter.
@@ -213,11 +211,11 @@ This function is important because it defines how Awesome Claude Skills Tutorial
```mermaid
flowchart TD
- A[main]
- B[create_morph_animation]
- C[create_reaction_morph]
- D[create_shape_morph]
- E[create_fade_animation]
+ A[ParticleSystem]
+ B[add_motion_blur]
+ C[create_impact_flash]
+ D[create_shockwave_rings]
+ E[create_explosion_effect]
A --> B
B --> C
C --> D
diff --git a/tutorials/awesome-claude-skills-tutorial/03-installation-paths-claude-ai-claude-code-api.md b/tutorials/awesome-claude-skills-tutorial/03-installation-paths-claude-ai-claude-code-api.md
index 9dd78e82..c5df2542 100644
--- a/tutorials/awesome-claude-skills-tutorial/03-installation-paths-claude-ai-claude-code-api.md
+++ b/tutorials/awesome-claude-skills-tutorial/03-installation-paths-claude-ai-claude-code-api.md
@@ -39,159 +39,168 @@ You now understand runtime-specific install patterns and validation points.
Next: [Chapter 4: Skill Authoring Template and Quality Standards](04-skill-authoring-template-and-quality-standards.md)
-## Depth Expansion Playbook
-
## Source Code Walkthrough
-### `slack-gif-creator/core/typography.py`
+### `slack-gif-creator/templates/zoom.py`
-The `draw_text_in_box` function in [`slack-gif-creator/core/typography.py`](https://github.com/ComposioHQ/awesome-claude-skills/blob/HEAD/slack-gif-creator/core/typography.py) handles a key part of this chapter's functionality:
+The `create_zoom_animation` function in [`slack-gif-creator/templates/zoom.py`](https://github.com/ComposioHQ/awesome-claude-skills/blob/HEAD/slack-gif-creator/templates/zoom.py) handles a key part of this chapter's functionality:
```py
-def draw_text_in_box(
- frame: Image.Image,
- text: str,
- position: tuple[int, int],
- font_size: int = 40,
- text_color: tuple[int, int, int] = (255, 255, 255),
- box_color: tuple[int, int, int] = (0, 0, 0),
- box_alpha: float = 0.7,
- padding: int = 10,
- centered: bool = True,
- bold: bool = True
-) -> Image.Image:
+def create_zoom_animation(
+ object_type: str = 'emoji',
+ object_data: dict | None = None,
+ num_frames: int = 30,
+ zoom_type: str = 'in', # 'in', 'out', 'in_out', 'punch'
+ scale_range: tuple[float, float] = (0.1, 2.0),
+ easing: str = 'ease_out',
+ add_motion_blur: bool = False,
+ center_pos: tuple[int, int] = (240, 240),
+ frame_width: int = 480,
+ frame_height: int = 480,
+ bg_color: tuple[int, int, int] = (255, 255, 255)
+) -> list[Image.Image]:
"""
- Draw text in a semi-transparent box for guaranteed readability.
+ Create zoom animation.
Args:
- frame: PIL Image to draw on
- text: Text to draw
- position: (x, y) position
- font_size: Font size in pixels
- text_color: RGB color for text
- box_color: RGB color for background box
- box_alpha: Opacity of box (0.0-1.0)
- padding: Padding around text in pixels
- centered: If True, center at position
- bold: Use bold font variant
+ object_type: 'emoji', 'text', 'image'
+ object_data: Object configuration
+ num_frames: Number of frames
+ zoom_type: Type of zoom effect
+ scale_range: (start_scale, end_scale) tuple
+ easing: Easing function
+ add_motion_blur: Add blur for speed effect
+ center_pos: Center position
+ frame_width: Frame width
+ frame_height: Frame height
+ bg_color: Background color
Returns:
- Modified frame
- """
```
This function is important because it defines how Awesome Claude Skills Tutorial: High-Signal Skill Discovery and Reuse for Claude Workflows implements the patterns covered in this chapter.
-### `slack-gif-creator/core/typography.py`
+### `slack-gif-creator/templates/zoom.py`
-The `get_text_size` function in [`slack-gif-creator/core/typography.py`](https://github.com/ComposioHQ/awesome-claude-skills/blob/HEAD/slack-gif-creator/core/typography.py) handles a key part of this chapter's functionality:
+The `create_explosion_zoom` function in [`slack-gif-creator/templates/zoom.py`](https://github.com/ComposioHQ/awesome-claude-skills/blob/HEAD/slack-gif-creator/templates/zoom.py) handles a key part of this chapter's functionality:
```py
-def get_text_size(text: str, font_size: int, bold: bool = True) -> tuple[int, int]:
+def create_explosion_zoom(
+ emoji: str = '💥',
+ num_frames: int = 20,
+ frame_width: int = 480,
+ frame_height: int = 480,
+ bg_color: tuple[int, int, int] = (255, 255, 255)
+) -> list[Image.Image]:
"""
- Get the dimensions of text without drawing it.
+ Create dramatic explosion zoom effect.
Args:
- text: Text to measure
- font_size: Font size in pixels
- bold: Use bold font variant
+ emoji: Emoji to explode
+ num_frames: Number of frames
+ frame_width: Frame width
+ frame_height: Frame height
+ bg_color: Background color
Returns:
- (width, height) tuple
- """
- font = get_font(font_size, bold=bold)
- # Create temporary image to measure
- temp_img = Image.new('RGB', (1, 1))
- draw = ImageDraw.Draw(temp_img)
- bbox = draw.textbbox((0, 0), text, font=font)
- width = bbox[2] - bbox[0]
- height = bbox[3] - bbox[1]
- return (width, height)
-
-
-def get_optimal_font_size(text: str, max_width: int, max_height: int,
- start_size: int = 60) -> int:
+ List of frames
"""
- Find the largest font size that fits within given dimensions.
+ frames = []
- Args:
- text: Text to size
- max_width: Maximum width in pixels
+ for i in range(num_frames):
+ t = i / (num_frames - 1) if num_frames > 1 else 0
+
+ # Exponential zoom
+ scale = 0.1 * math.exp(t * 5)
+
+ # Add rotation for drama
+ angle = t * 360 * 2
```
This function is important because it defines how Awesome Claude Skills Tutorial: High-Signal Skill Discovery and Reuse for Claude Workflows implements the patterns covered in this chapter.
-### `slack-gif-creator/core/typography.py`
+### `slack-gif-creator/templates/zoom.py`
-The `get_optimal_font_size` function in [`slack-gif-creator/core/typography.py`](https://github.com/ComposioHQ/awesome-claude-skills/blob/HEAD/slack-gif-creator/core/typography.py) handles a key part of this chapter's functionality:
+The `create_mind_blown_zoom` function in [`slack-gif-creator/templates/zoom.py`](https://github.com/ComposioHQ/awesome-claude-skills/blob/HEAD/slack-gif-creator/templates/zoom.py) handles a key part of this chapter's functionality:
```py
-def get_optimal_font_size(text: str, max_width: int, max_height: int,
- start_size: int = 60) -> int:
+def create_mind_blown_zoom(
+ emoji: str = '🤯',
+ num_frames: int = 30,
+ frame_width: int = 480,
+ frame_height: int = 480,
+ bg_color: tuple[int, int, int] = (255, 255, 255)
+) -> list[Image.Image]:
"""
- Find the largest font size that fits within given dimensions.
+ Create "mind blown" dramatic zoom with shake.
Args:
- text: Text to size
- max_width: Maximum width in pixels
- max_height: Maximum height in pixels
- start_size: Starting font size to try
+ emoji: Emoji to use
+ num_frames: Number of frames
+ frame_width: Frame width
+ frame_height: Frame height
+ bg_color: Background color
Returns:
- Optimal font size
+ List of frames
"""
- font_size = start_size
- while font_size > 10:
- width, height = get_text_size(text, font_size)
- if width <= max_width and height <= max_height:
- return font_size
- font_size -= 2
- return 10 # Minimum font size
+ frames = []
+ for i in range(num_frames):
+ t = i / (num_frames - 1) if num_frames > 1 else 0
-def scale_font_for_frame(base_size: int, frame_width: int, frame_height: int) -> int:
- """
- Scale font size proportionally to frame dimensions.
-
- Useful for maintaining relative text size across different GIF dimensions.
-
- Args:
+ # Zoom in then shake
+ if t < 0.5:
+ scale = interpolate(0.3, 1.2, t * 2, 'ease_out')
+ shake_x = 0
+ shake_y = 0
```
This function is important because it defines how Awesome Claude Skills Tutorial: High-Signal Skill Discovery and Reuse for Claude Workflows implements the patterns covered in this chapter.
-### `slack-gif-creator/core/typography.py`
+### `slack-gif-creator/templates/wiggle.py`
-The `scale_font_for_frame` function in [`slack-gif-creator/core/typography.py`](https://github.com/ComposioHQ/awesome-claude-skills/blob/HEAD/slack-gif-creator/core/typography.py) handles a key part of this chapter's functionality:
+The `create_wiggle_animation` function in [`slack-gif-creator/templates/wiggle.py`](https://github.com/ComposioHQ/awesome-claude-skills/blob/HEAD/slack-gif-creator/templates/wiggle.py) handles a key part of this chapter's functionality:
```py
-def scale_font_for_frame(base_size: int, frame_width: int, frame_height: int) -> int:
+def create_wiggle_animation(
+ object_type: str = 'emoji',
+ object_data: dict | None = None,
+ num_frames: int = 30,
+ wiggle_type: str = 'jello', # 'jello', 'wave', 'bounce', 'sway'
+ intensity: float = 1.0,
+ cycles: float = 2.0,
+ center_pos: tuple[int, int] = (240, 240),
+ frame_width: int = 480,
+ frame_height: int = 480,
+ bg_color: tuple[int, int, int] = (255, 255, 255)
+) -> list[Image.Image]:
"""
- Scale font size proportionally to frame dimensions.
-
- Useful for maintaining relative text size across different GIF dimensions.
+ Create wiggle/wobble animation.
Args:
- base_size: Base font size for 480x480 frame
- frame_width: Actual frame width
- frame_height: Actual frame height
+ object_type: 'emoji', 'text'
+ object_data: Object configuration
+ num_frames: Number of frames
+ wiggle_type: Type of wiggle motion
+ intensity: Wiggle intensity multiplier
+ cycles: Number of wiggle cycles
+ center_pos: Center position
+ frame_width: Frame width
+ frame_height: Frame height
+ bg_color: Background color
Returns:
- Scaled font size
+ List of frames
"""
- # Use average dimension for scaling
- avg_dimension = (frame_width + frame_height) / 2
- base_dimension = 480 # Reference dimension
- scale_factor = avg_dimension / base_dimension
- return max(10, int(base_size * scale_factor))
```
This function is important because it defines how Awesome Claude Skills Tutorial: High-Signal Skill Discovery and Reuse for Claude Workflows implements the patterns covered in this chapter.
@@ -201,11 +210,11 @@ This function is important because it defines how Awesome Claude Skills Tutorial
```mermaid
flowchart TD
- A[draw_text_in_box]
- B[get_text_size]
- C[get_optimal_font_size]
- D[scale_font_for_frame]
- E[create_blank_frame]
+ A[create_zoom_animation]
+ B[create_explosion_zoom]
+ C[create_mind_blown_zoom]
+ D[create_wiggle_animation]
+ E[create_excited_wiggle]
A --> B
B --> C
C --> D
diff --git a/tutorials/awesome-claude-skills-tutorial/04-skill-authoring-template-and-quality-standards.md b/tutorials/awesome-claude-skills-tutorial/04-skill-authoring-template-and-quality-standards.md
index 4ac30385..32634cb2 100644
--- a/tutorials/awesome-claude-skills-tutorial/04-skill-authoring-template-and-quality-standards.md
+++ b/tutorials/awesome-claude-skills-tutorial/04-skill-authoring-template-and-quality-standards.md
@@ -40,170 +40,159 @@ You now have a rubric for authoring skills with stronger reuse and maintainabili
Next: [Chapter 5: App Automation via Composio Skill Packs](05-app-automation-via-composio-skill-packs.md)
-## Depth Expansion Playbook
-
## Source Code Walkthrough
-### `slack-gif-creator/core/frame_composer.py`
+### `slack-gif-creator/templates/spin.py`
-The `draw_circle_with_shadow` function in [`slack-gif-creator/core/frame_composer.py`](https://github.com/ComposioHQ/awesome-claude-skills/blob/HEAD/slack-gif-creator/core/frame_composer.py) handles a key part of this chapter's functionality:
+The `create_loading_spinner` function in [`slack-gif-creator/templates/spin.py`](https://github.com/ComposioHQ/awesome-claude-skills/blob/HEAD/slack-gif-creator/templates/spin.py) handles a key part of this chapter's functionality:
```py
-def draw_circle_with_shadow(frame: Image.Image, center: tuple[int, int], radius: int,
- fill_color: tuple[int, int, int],
- shadow_offset: tuple[int, int] = (3, 3),
- shadow_color: tuple[int, int, int] = (0, 0, 0)) -> Image.Image:
+def create_loading_spinner(
+ num_frames: int = 20,
+ spinner_type: str = 'dots', # 'dots', 'arc', 'emoji'
+ size: int = 100,
+ color: tuple[int, int, int] = (100, 150, 255),
+ frame_width: int = 128,
+ frame_height: int = 128,
+ bg_color: tuple[int, int, int] = (255, 255, 255)
+) -> list[Image.Image]:
"""
- Draw a circle with drop shadow.
+ Create a loading spinner animation.
Args:
- frame: PIL Image to draw on
- center: (x, y) center position
- radius: Circle radius
- fill_color: RGB fill color
- shadow_offset: (x, y) shadow offset
- shadow_color: RGB shadow color
+ num_frames: Number of frames
+ spinner_type: Type of spinner
+ size: Spinner size
+ color: Spinner color
+ frame_width: Frame width
+ frame_height: Frame height
+ bg_color: Background color
Returns:
- Modified frame
+ List of frames
"""
- draw = ImageDraw.Draw(frame)
- x, y = center
-
- # Draw shadow
- shadow_center = (x + shadow_offset[0], y + shadow_offset[1])
- shadow_bbox = [
- shadow_center[0] - radius,
- shadow_center[1] - radius,
- shadow_center[0] + radius,
- shadow_center[1] + radius
- ]
- draw.ellipse(shadow_bbox, fill=shadow_color)
+ from PIL import ImageDraw
+ frames = []
+ center = (frame_width // 2, frame_height // 2)
+
+ for i in range(num_frames):
+ frame = create_blank_frame(frame_width, frame_height, bg_color)
```
This function is important because it defines how Awesome Claude Skills Tutorial: High-Signal Skill Discovery and Reuse for Claude Workflows implements the patterns covered in this chapter.
-### `slack-gif-creator/core/frame_composer.py`
+### `document-skills/xlsx/recalc.py`
-The `draw_rounded_rectangle` function in [`slack-gif-creator/core/frame_composer.py`](https://github.com/ComposioHQ/awesome-claude-skills/blob/HEAD/slack-gif-creator/core/frame_composer.py) handles a key part of this chapter's functionality:
+The `setup_libreoffice_macro` function in [`document-skills/xlsx/recalc.py`](https://github.com/ComposioHQ/awesome-claude-skills/blob/HEAD/document-skills/xlsx/recalc.py) handles a key part of this chapter's functionality:
```py
-def draw_rounded_rectangle(frame: Image.Image, top_left: tuple[int, int],
- bottom_right: tuple[int, int], radius: int,
- fill_color: Optional[tuple[int, int, int]] = None,
- outline_color: Optional[tuple[int, int, int]] = None,
- outline_width: int = 1) -> Image.Image:
- """
- Draw a rectangle with rounded corners.
-
- Args:
- frame: PIL Image to draw on
- top_left: (x, y) top-left corner
- bottom_right: (x, y) bottom-right corner
- radius: Corner radius
- fill_color: RGB fill color (None for no fill)
- outline_color: RGB outline color (None for no outline)
- outline_width: Outline width
-
- Returns:
- Modified frame
- """
- draw = ImageDraw.Draw(frame)
- x1, y1 = top_left
- x2, y2 = bottom_right
-
- # Draw rounded rectangle using PIL's built-in method
- draw.rounded_rectangle([x1, y1, x2, y2], radius=radius,
- fill=fill_color, outline=outline_color, width=outline_width)
-
- return frame
-
+def setup_libreoffice_macro():
+ """Setup LibreOffice macro for recalculation if not already configured"""
+ if platform.system() == 'Darwin':
+ macro_dir = os.path.expanduser('~/Library/Application Support/LibreOffice/4/user/basic/Standard')
+ else:
+ macro_dir = os.path.expanduser('~/.config/libreoffice/4/user/basic/Standard')
+
+ macro_file = os.path.join(macro_dir, 'Module1.xba')
+
+ if os.path.exists(macro_file):
+ with open(macro_file, 'r') as f:
+ if 'RecalculateAndSave' in f.read():
+ return True
+
+ if not os.path.exists(macro_dir):
+ subprocess.run(['soffice', '--headless', '--terminate_after_init'],
+ capture_output=True, timeout=10)
+ os.makedirs(macro_dir, exist_ok=True)
+
+ macro_content = '''
+
+
+ Sub RecalculateAndSave()
+ ThisComponent.calculateAll()
+ ThisComponent.store()
+ ThisComponent.close(True)
+ End Sub
+ '''
+
+ try:
```
This function is important because it defines how Awesome Claude Skills Tutorial: High-Signal Skill Discovery and Reuse for Claude Workflows implements the patterns covered in this chapter.
-### `slack-gif-creator/core/frame_composer.py`
+### `document-skills/xlsx/recalc.py`
-The `add_vignette` function in [`slack-gif-creator/core/frame_composer.py`](https://github.com/ComposioHQ/awesome-claude-skills/blob/HEAD/slack-gif-creator/core/frame_composer.py) handles a key part of this chapter's functionality:
+The `recalc` function in [`document-skills/xlsx/recalc.py`](https://github.com/ComposioHQ/awesome-claude-skills/blob/HEAD/document-skills/xlsx/recalc.py) handles a key part of this chapter's functionality:
```py
-
-def add_vignette(frame: Image.Image, strength: float = 0.5) -> Image.Image:
- """
- Add a vignette effect (darkened edges) to frame.
-
- Args:
- frame: PIL Image
- strength: Vignette strength (0.0-1.0)
-
- Returns:
- Frame with vignette
- """
- width, height = frame.size
-
- # Create radial gradient mask
- center_x, center_y = width // 2, height // 2
- max_dist = ((width / 2) ** 2 + (height / 2) ** 2) ** 0.5
-
- # Create overlay
- overlay = Image.new('RGB', (width, height), (0, 0, 0))
- pixels = overlay.load()
-
- for y in range(height):
- for x in range(width):
- # Calculate distance from center
- dx = x - center_x
- dy = y - center_y
- dist = (dx ** 2 + dy ** 2) ** 0.5
-
- # Calculate vignette value
- vignette = min(1, (dist / max_dist) * strength)
+def setup_libreoffice_macro():
+ """Setup LibreOffice macro for recalculation if not already configured"""
+ if platform.system() == 'Darwin':
+ macro_dir = os.path.expanduser('~/Library/Application Support/LibreOffice/4/user/basic/Standard')
+ else:
+ macro_dir = os.path.expanduser('~/.config/libreoffice/4/user/basic/Standard')
+
+ macro_file = os.path.join(macro_dir, 'Module1.xba')
+
+ if os.path.exists(macro_file):
+ with open(macro_file, 'r') as f:
+ if 'RecalculateAndSave' in f.read():
+ return True
+
+ if not os.path.exists(macro_dir):
+ subprocess.run(['soffice', '--headless', '--terminate_after_init'],
+ capture_output=True, timeout=10)
+ os.makedirs(macro_dir, exist_ok=True)
+
+ macro_content = '''
+
+
+ Sub RecalculateAndSave()
+ ThisComponent.calculateAll()
+ ThisComponent.store()
+ ThisComponent.close(True)
+ End Sub
+ '''
+
+ try:
+ with open(macro_file, 'w') as f:
```
This function is important because it defines how Awesome Claude Skills Tutorial: High-Signal Skill Discovery and Reuse for Claude Workflows implements the patterns covered in this chapter.
-### `slack-gif-creator/core/frame_composer.py`
+### `document-skills/xlsx/recalc.py`
-The `draw_star` function in [`slack-gif-creator/core/frame_composer.py`](https://github.com/ComposioHQ/awesome-claude-skills/blob/HEAD/slack-gif-creator/core/frame_composer.py) handles a key part of this chapter's functionality:
+The `main` function in [`document-skills/xlsx/recalc.py`](https://github.com/ComposioHQ/awesome-claude-skills/blob/HEAD/document-skills/xlsx/recalc.py) handles a key part of this chapter's functionality:
```py
-def draw_star(frame: Image.Image, center: tuple[int, int], size: int,
- fill_color: tuple[int, int, int],
- outline_color: Optional[tuple[int, int, int]] = None,
- outline_width: int = 1) -> Image.Image:
- """
- Draw a 5-pointed star.
-
- Args:
- frame: PIL Image to draw on
- center: (x, y) center position
- size: Star size (outer radius)
- fill_color: RGB fill color
- outline_color: RGB outline color (None for no outline)
- outline_width: Outline width
-
- Returns:
- Modified frame
- """
- import math
- draw = ImageDraw.Draw(frame)
- x, y = center
-
- # Calculate star points
- points = []
- for i in range(10):
- angle = (i * 36 - 90) * math.pi / 180 # 36 degrees per point, start at top
- radius = size if i % 2 == 0 else size * 0.4 # Alternate between outer and inner
- px = x + radius * math.cos(angle)
- py = y + radius * math.sin(angle)
- points.append((px, py))
+def main():
+ if len(sys.argv) < 2:
+ print("Usage: python recalc.py [timeout_seconds]")
+ print("\nRecalculates all formulas in an Excel file using LibreOffice")
+ print("\nReturns JSON with error details:")
+ print(" - status: 'success' or 'errors_found'")
+ print(" - total_errors: Total number of Excel errors found")
+ print(" - total_formulas: Number of formulas in the file")
+ print(" - error_summary: Breakdown by error type with locations")
+ print(" - #VALUE!, #DIV/0!, #REF!, #NAME?, #NULL!, #NUM!, #N/A")
+ sys.exit(1)
+
+ filename = sys.argv[1]
+ timeout = int(sys.argv[2]) if len(sys.argv) > 2 else 30
+
+ result = recalc(filename, timeout)
+ print(json.dumps(result, indent=2))
+
+
+if __name__ == '__main__':
+ main()
```
This function is important because it defines how Awesome Claude Skills Tutorial: High-Signal Skill Discovery and Reuse for Claude Workflows implements the patterns covered in this chapter.
@@ -213,11 +202,11 @@ This function is important because it defines how Awesome Claude Skills Tutorial
```mermaid
flowchart TD
- A[draw_circle_with_shadow]
- B[draw_rounded_rectangle]
- C[add_vignette]
- D[draw_star]
- E[create_zoom_animation]
+ A[create_loading_spinner]
+ B[setup_libreoffice_macro]
+ C[recalc]
+ D[main]
+ E[get_palette]
A --> B
B --> C
C --> D
diff --git a/tutorials/awesome-claude-skills-tutorial/05-app-automation-via-composio-skill-packs.md b/tutorials/awesome-claude-skills-tutorial/05-app-automation-via-composio-skill-packs.md
index ab6f1610..57c5c6ac 100644
--- a/tutorials/awesome-claude-skills-tutorial/05-app-automation-via-composio-skill-packs.md
+++ b/tutorials/awesome-claude-skills-tutorial/05-app-automation-via-composio-skill-packs.md
@@ -39,170 +39,163 @@ You now have a safer rollout model for app-connected skill automation.
Next: [Chapter 6: Contribution Workflow and Repository Governance](06-contribution-workflow-and-repository-governance.md)
-## Depth Expansion Playbook
-
## Source Code Walkthrough
-### `slack-gif-creator/core/color_palettes.py`
+### `slack-gif-creator/core/validators.py`
-The `get_complementary_color` function in [`slack-gif-creator/core/color_palettes.py`](https://github.com/ComposioHQ/awesome-claude-skills/blob/HEAD/slack-gif-creator/core/color_palettes.py) handles a key part of this chapter's functionality:
+The `validate_dimensions` function in [`slack-gif-creator/core/validators.py`](https://github.com/ComposioHQ/awesome-claude-skills/blob/HEAD/slack-gif-creator/core/validators.py) handles a key part of this chapter's functionality:
```py
-def get_complementary_color(color: tuple[int, int, int]) -> tuple[int, int, int]:
+def validate_dimensions(width: int, height: int, is_emoji: bool = True) -> tuple[bool, dict]:
"""
- Get the complementary (opposite) color on the color wheel.
+ Check if dimensions are suitable for Slack.
Args:
- color: RGB color tuple
+ width: Frame width in pixels
+ height: Frame height in pixels
+ is_emoji: True for emoji GIF, False for message GIF
Returns:
- Complementary RGB color
- """
- # Convert to HSV
- r, g, b = [x / 255.0 for x in color]
- h, s, v = colorsys.rgb_to_hsv(r, g, b)
-
- # Rotate hue by 180 degrees (0.5 in 0-1 scale)
- h_comp = (h + 0.5) % 1.0
-
- # Convert back to RGB
- r_comp, g_comp, b_comp = colorsys.hsv_to_rgb(h_comp, s, v)
- return (int(r_comp * 255), int(g_comp * 255), int(b_comp * 255))
-
-
-def lighten_color(color: tuple[int, int, int], amount: float = 0.3) -> tuple[int, int, int]:
+ Tuple of (passes: bool, info: dict with details)
"""
- Lighten a color by a given amount.
-
- Args:
- color: RGB color tuple
- amount: Amount to lighten (0.0-1.0)
-
+ info = {
+ 'width': width,
+ 'height': height,
+ 'is_square': width == height,
+ 'type': 'emoji' if is_emoji else 'message'
+ }
+
+ if is_emoji:
+ # Emoji GIFs should be 128x128
+ optimal = width == height == 128
+ acceptable = width == height and 64 <= width <= 128
+
+ info['optimal'] = optimal
+ info['acceptable'] = acceptable
+
+ if optimal:
+ print(f"✓ {width}x{height} - optimal for emoji")
+ passes = True
```
This function is important because it defines how Awesome Claude Skills Tutorial: High-Signal Skill Discovery and Reuse for Claude Workflows implements the patterns covered in this chapter.
-### `slack-gif-creator/core/color_palettes.py`
+### `slack-gif-creator/core/validators.py`
-The `lighten_color` function in [`slack-gif-creator/core/color_palettes.py`](https://github.com/ComposioHQ/awesome-claude-skills/blob/HEAD/slack-gif-creator/core/color_palettes.py) handles a key part of this chapter's functionality:
+The `validate_gif` function in [`slack-gif-creator/core/validators.py`](https://github.com/ComposioHQ/awesome-claude-skills/blob/HEAD/slack-gif-creator/core/validators.py) handles a key part of this chapter's functionality:
```py
-def lighten_color(color: tuple[int, int, int], amount: float = 0.3) -> tuple[int, int, int]:
+def validate_gif(gif_path: str | Path, is_emoji: bool = True) -> tuple[bool, dict]:
"""
- Lighten a color by a given amount.
+ Run all validations on a GIF file.
Args:
- color: RGB color tuple
- amount: Amount to lighten (0.0-1.0)
+ gif_path: Path to GIF file
+ is_emoji: True for emoji GIF, False for message GIF
Returns:
- Lightened RGB color
+ Tuple of (all_pass: bool, results: dict)
"""
- r, g, b = color
- r = min(255, int(r + (255 - r) * amount))
- g = min(255, int(g + (255 - g) * amount))
- b = min(255, int(b + (255 - b) * amount))
- return (r, g, b)
+ from PIL import Image
+ gif_path = Path(gif_path)
-def darken_color(color: tuple[int, int, int], amount: float = 0.3) -> tuple[int, int, int]:
- """
- Darken a color by a given amount.
+ if not gif_path.exists():
+ return False, {'error': f'File not found: {gif_path}'}
- Args:
- color: RGB color tuple
- amount: Amount to darken (0.0-1.0)
+ print(f"\nValidating {gif_path.name} as {'emoji' if is_emoji else 'message'} GIF:")
+ print("=" * 60)
+
+ # Check file size
+ size_pass, size_info = check_slack_size(gif_path, is_emoji)
+
+ # Check dimensions
+ try:
+ with Image.open(gif_path) as img:
+ width, height = img.size
+ dim_pass, dim_info = validate_dimensions(width, height, is_emoji)
- Returns:
- Darkened RGB color
- """
- r, g, b = color
```
This function is important because it defines how Awesome Claude Skills Tutorial: High-Signal Skill Discovery and Reuse for Claude Workflows implements the patterns covered in this chapter.
-### `slack-gif-creator/core/color_palettes.py`
+### `slack-gif-creator/core/validators.py`
-The `darken_color` function in [`slack-gif-creator/core/color_palettes.py`](https://github.com/ComposioHQ/awesome-claude-skills/blob/HEAD/slack-gif-creator/core/color_palettes.py) handles a key part of this chapter's functionality:
+The `get_optimization_suggestions` function in [`slack-gif-creator/core/validators.py`](https://github.com/ComposioHQ/awesome-claude-skills/blob/HEAD/slack-gif-creator/core/validators.py) handles a key part of this chapter's functionality:
```py
-def darken_color(color: tuple[int, int, int], amount: float = 0.3) -> tuple[int, int, int]:
+def get_optimization_suggestions(results: dict) -> list[str]:
"""
- Darken a color by a given amount.
+ Get suggestions for optimizing a GIF based on validation results.
Args:
- color: RGB color tuple
- amount: Amount to darken (0.0-1.0)
+ results: Results dict from validate_gif()
Returns:
- Darkened RGB color
+ List of suggestion strings
"""
- r, g, b = color
- r = max(0, int(r * (1 - amount)))
- g = max(0, int(g * (1 - amount)))
- b = max(0, int(b * (1 - amount)))
- return (r, g, b)
-
-
-def blend_colors(color1: tuple[int, int, int], color2: tuple[int, int, int],
- ratio: float = 0.5) -> tuple[int, int, int]:
- """
- Blend two colors together.
-
- Args:
- color1: First RGB color
- color2: Second RGB color
- ratio: Blend ratio (0.0 = all color1, 1.0 = all color2)
-
- Returns:
- Blended RGB color
+ suggestions = []
+
+ if not results.get('passes', False):
+ size_info = results.get('size', {})
+ dim_info = results.get('dimensions', {})
+
+ # Size suggestions
+ if not size_info.get('passes', True):
+ overage = size_info['size_kb'] - size_info['limit_kb']
+ if size_info['type'] == 'emoji':
+ suggestions.append(f"Reduce file size by {overage:.1f} KB:")
+ suggestions.append(" - Limit to 10-12 frames")
+ suggestions.append(" - Use 32-40 colors maximum")
+ suggestions.append(" - Remove gradients (solid colors compress better)")
+ suggestions.append(" - Simplify design")
+ else:
+ suggestions.append(f"Reduce file size by {overage:.1f} KB:")
+ suggestions.append(" - Reduce frame count or FPS")
+ suggestions.append(" - Use fewer colors (128 → 64)")
+ suggestions.append(" - Reduce dimensions")
```
This function is important because it defines how Awesome Claude Skills Tutorial: High-Signal Skill Discovery and Reuse for Claude Workflows implements the patterns covered in this chapter.
-### `slack-gif-creator/core/color_palettes.py`
+### `slack-gif-creator/core/validators.py`
-The `blend_colors` function in [`slack-gif-creator/core/color_palettes.py`](https://github.com/ComposioHQ/awesome-claude-skills/blob/HEAD/slack-gif-creator/core/color_palettes.py) handles a key part of this chapter's functionality:
+The `is_slack_ready` function in [`slack-gif-creator/core/validators.py`](https://github.com/ComposioHQ/awesome-claude-skills/blob/HEAD/slack-gif-creator/core/validators.py) handles a key part of this chapter's functionality:
```py
-
-def blend_colors(color1: tuple[int, int, int], color2: tuple[int, int, int],
- ratio: float = 0.5) -> tuple[int, int, int]:
+# Convenience function for quick checks
+def is_slack_ready(gif_path: str | Path, is_emoji: bool = True, verbose: bool = True) -> bool:
"""
- Blend two colors together.
+ Quick check if GIF is ready for Slack.
Args:
- color1: First RGB color
- color2: Second RGB color
- ratio: Blend ratio (0.0 = all color1, 1.0 = all color2)
+ gif_path: Path to GIF file
+ is_emoji: True for emoji GIF, False for message GIF
+ verbose: Print detailed feedback
Returns:
- Blended RGB color
+ True if ready, False otherwise
"""
- r1, g1, b1 = color1
- r2, g2, b2 = color2
-
- r = int(r1 * (1 - ratio) + r2 * ratio)
- g = int(g1 * (1 - ratio) + g2 * ratio)
- b = int(b1 * (1 - ratio) + b2 * ratio)
-
- return (r, g, b)
-
+ if verbose:
+ passes, results = validate_gif(gif_path, is_emoji)
+ if not passes:
+ suggestions = get_optimization_suggestions(results)
+ if suggestions:
+ print("\nSuggestions:")
+ for suggestion in suggestions:
+ print(suggestion)
+ return passes
+ else:
+ size_pass, _ = check_slack_size(gif_path, is_emoji)
+ return size_pass
-def create_gradient_colors(start_color: tuple[int, int, int],
- end_color: tuple[int, int, int],
- steps: int) -> list[tuple[int, int, int]]:
- """
- Create a gradient of colors between two colors.
-
- Args:
```
This function is important because it defines how Awesome Claude Skills Tutorial: High-Signal Skill Discovery and Reuse for Claude Workflows implements the patterns covered in this chapter.
@@ -212,11 +205,11 @@ This function is important because it defines how Awesome Claude Skills Tutorial
```mermaid
flowchart TD
- A[get_complementary_color]
- B[lighten_color]
- C[darken_color]
- D[blend_colors]
- E[create_gradient_colors]
+ A[validate_dimensions]
+ B[validate_gif]
+ C[get_optimization_suggestions]
+ D[is_slack_ready]
+ E[create_pulse_animation]
A --> B
B --> C
C --> D
diff --git a/tutorials/awesome-claude-skills-tutorial/06-contribution-workflow-and-repository-governance.md b/tutorials/awesome-claude-skills-tutorial/06-contribution-workflow-and-repository-governance.md
index 8599ba3e..24167a37 100644
--- a/tutorials/awesome-claude-skills-tutorial/06-contribution-workflow-and-repository-governance.md
+++ b/tutorials/awesome-claude-skills-tutorial/06-contribution-workflow-and-repository-governance.md
@@ -38,170 +38,168 @@ You now understand how to contribute without increasing curation noise.
Next: [Chapter 7: Risk Management and Skill Selection Rubric](07-risk-management-and-skill-selection-rubric.md)
-## Depth Expansion Playbook
-
## Source Code Walkthrough
-### `slack-gif-creator/templates/pulse.py`
+### `slack-gif-creator/core/easing.py`
-The `create_pulse_animation` function in [`slack-gif-creator/templates/pulse.py`](https://github.com/ComposioHQ/awesome-claude-skills/blob/HEAD/slack-gif-creator/templates/pulse.py) handles a key part of this chapter's functionality:
+The `linear` function in [`slack-gif-creator/core/easing.py`](https://github.com/ComposioHQ/awesome-claude-skills/blob/HEAD/slack-gif-creator/core/easing.py) handles a key part of this chapter's functionality:
```py
-def create_pulse_animation(
- object_type: str = 'emoji',
- object_data: dict | None = None,
- num_frames: int = 30,
- pulse_type: str = 'smooth', # 'smooth', 'heartbeat', 'throb', 'pop'
- scale_range: tuple[float, float] = (0.8, 1.2),
- pulses: float = 2.0,
- center_pos: tuple[int, int] = (240, 240),
- frame_width: int = 480,
- frame_height: int = 480,
- bg_color: tuple[int, int, int] = (255, 255, 255)
-) -> list[Image.Image]:
- """
- Create pulsing/scaling animation.
-
- Args:
- object_type: 'emoji', 'circle', 'text'
- object_data: Object configuration
- num_frames: Number of frames
- pulse_type: Type of pulsing motion
- scale_range: (min_scale, max_scale) tuple
- pulses: Number of pulses in animation
- center_pos: Center position
- frame_width: Frame width
- frame_height: Frame height
- bg_color: Background color
-
- Returns:
- List of frames
- """
+def linear(t: float) -> float:
+ """Linear interpolation (no easing)."""
+ return t
+
+
+def ease_in_quad(t: float) -> float:
+ """Quadratic ease-in (slow start, accelerating)."""
+ return t * t
+
+
+def ease_out_quad(t: float) -> float:
+ """Quadratic ease-out (fast start, decelerating)."""
+ return t * (2 - t)
+
+
+def ease_in_out_quad(t: float) -> float:
+ """Quadratic ease-in-out (slow start and end)."""
+ if t < 0.5:
+ return 2 * t * t
+ return -1 + (4 - 2 * t) * t
+
+
+def ease_in_cubic(t: float) -> float:
+ """Cubic ease-in (slow start)."""
+ return t * t * t
+
+
+def ease_out_cubic(t: float) -> float:
+ """Cubic ease-out (fast start)."""
+ return (t - 1) * (t - 1) * (t - 1) + 1
```
This function is important because it defines how Awesome Claude Skills Tutorial: High-Signal Skill Discovery and Reuse for Claude Workflows implements the patterns covered in this chapter.
-### `slack-gif-creator/templates/pulse.py`
+### `slack-gif-creator/core/easing.py`
-The `create_attention_pulse` function in [`slack-gif-creator/templates/pulse.py`](https://github.com/ComposioHQ/awesome-claude-skills/blob/HEAD/slack-gif-creator/templates/pulse.py) handles a key part of this chapter's functionality:
+The `ease_in_quad` function in [`slack-gif-creator/core/easing.py`](https://github.com/ComposioHQ/awesome-claude-skills/blob/HEAD/slack-gif-creator/core/easing.py) handles a key part of this chapter's functionality:
```py
-def create_attention_pulse(
- emoji: str = '⚠️',
- num_frames: int = 20,
- frame_size: int = 128,
- bg_color: tuple[int, int, int] = (255, 255, 255)
-) -> list[Image.Image]:
- """
- Create attention-grabbing pulse (good for emoji GIFs).
-
- Args:
- emoji: Emoji to pulse
- num_frames: Number of frames
- frame_size: Frame size (square)
- bg_color: Background color
-
- Returns:
- List of frames optimized for emoji size
- """
- return create_pulse_animation(
- object_type='emoji',
- object_data={'emoji': emoji, 'size': 80, 'shadow': False},
- num_frames=num_frames,
- pulse_type='throb',
- scale_range=(0.85, 1.15),
- pulses=2,
- center_pos=(frame_size // 2, frame_size // 2),
- frame_width=frame_size,
- frame_height=frame_size,
- bg_color=bg_color
- )
+def ease_in_quad(t: float) -> float:
+ """Quadratic ease-in (slow start, accelerating)."""
+ return t * t
+
+
+def ease_out_quad(t: float) -> float:
+ """Quadratic ease-out (fast start, decelerating)."""
+ return t * (2 - t)
+
+
+def ease_in_out_quad(t: float) -> float:
+ """Quadratic ease-in-out (slow start and end)."""
+ if t < 0.5:
+ return 2 * t * t
+ return -1 + (4 - 2 * t) * t
+
+
+def ease_in_cubic(t: float) -> float:
+ """Cubic ease-in (slow start)."""
+ return t * t * t
+
+
+def ease_out_cubic(t: float) -> float:
+ """Cubic ease-out (fast start)."""
+ return (t - 1) * (t - 1) * (t - 1) + 1
+
+
+def ease_in_out_cubic(t: float) -> float:
+ """Cubic ease-in-out."""
+ if t < 0.5:
```
This function is important because it defines how Awesome Claude Skills Tutorial: High-Signal Skill Discovery and Reuse for Claude Workflows implements the patterns covered in this chapter.
-### `slack-gif-creator/templates/pulse.py`
+### `slack-gif-creator/core/easing.py`
-The `create_breathing_animation` function in [`slack-gif-creator/templates/pulse.py`](https://github.com/ComposioHQ/awesome-claude-skills/blob/HEAD/slack-gif-creator/templates/pulse.py) handles a key part of this chapter's functionality:
+The `ease_out_quad` function in [`slack-gif-creator/core/easing.py`](https://github.com/ComposioHQ/awesome-claude-skills/blob/HEAD/slack-gif-creator/core/easing.py) handles a key part of this chapter's functionality:
```py
-def create_breathing_animation(
- object_type: str = 'emoji',
- object_data: dict | None = None,
- num_frames: int = 60,
- breaths: float = 2.0,
- scale_range: tuple[float, float] = (0.9, 1.1),
- frame_width: int = 480,
- frame_height: int = 480,
- bg_color: tuple[int, int, int] = (240, 248, 255)
-) -> list[Image.Image]:
- """
- Create slow, calming breathing animation (in and out).
-
- Args:
- object_type: Type of object
- object_data: Object configuration
- num_frames: Number of frames
- breaths: Number of breathing cycles
- scale_range: Min/max scale
- frame_width: Frame width
- frame_height: Frame height
- bg_color: Background color
-
- Returns:
- List of frames
- """
- if object_data is None:
- object_data = {'emoji': '😌', 'size': 100}
-
- return create_pulse_animation(
+def ease_out_quad(t: float) -> float:
+ """Quadratic ease-out (fast start, decelerating)."""
+ return t * (2 - t)
+
+
+def ease_in_out_quad(t: float) -> float:
+ """Quadratic ease-in-out (slow start and end)."""
+ if t < 0.5:
+ return 2 * t * t
+ return -1 + (4 - 2 * t) * t
+
+
+def ease_in_cubic(t: float) -> float:
+ """Cubic ease-in (slow start)."""
+ return t * t * t
+
+
+def ease_out_cubic(t: float) -> float:
+ """Cubic ease-out (fast start)."""
+ return (t - 1) * (t - 1) * (t - 1) + 1
+
+
+def ease_in_out_cubic(t: float) -> float:
+ """Cubic ease-in-out."""
+ if t < 0.5:
+ return 4 * t * t * t
+ return (t - 1) * (2 * t - 2) * (2 * t - 2) + 1
+
+
+def ease_in_bounce(t: float) -> float:
```
This function is important because it defines how Awesome Claude Skills Tutorial: High-Signal Skill Discovery and Reuse for Claude Workflows implements the patterns covered in this chapter.
-### `slack-gif-creator/templates/bounce.py`
+### `slack-gif-creator/core/easing.py`
-The `create_bounce_animation` function in [`slack-gif-creator/templates/bounce.py`](https://github.com/ComposioHQ/awesome-claude-skills/blob/HEAD/slack-gif-creator/templates/bounce.py) handles a key part of this chapter's functionality:
+The `ease_in_out_quad` function in [`slack-gif-creator/core/easing.py`](https://github.com/ComposioHQ/awesome-claude-skills/blob/HEAD/slack-gif-creator/core/easing.py) handles a key part of this chapter's functionality:
```py
-def create_bounce_animation(
- object_type: str = 'circle',
- object_data: dict = None,
- num_frames: int = 30,
- bounce_height: int = 150,
- ground_y: int = 350,
- start_x: int = 240,
- frame_width: int = 480,
- frame_height: int = 480,
- bg_color: tuple[int, int, int] = (255, 255, 255)
-) -> list:
- """
- Create frames for a bouncing animation.
-
- Args:
- object_type: 'circle', 'emoji', or 'custom'
- object_data: Data for the object (e.g., {'radius': 30, 'color': (255, 0, 0)})
- num_frames: Number of frames in the animation
- bounce_height: Maximum height of bounce
- ground_y: Y position of ground
- start_x: X position (or starting X if moving horizontally)
- frame_width: Frame width
- frame_height: Frame height
- bg_color: Background color
-
- Returns:
- List of frames
- """
- frames = []
+def ease_in_out_quad(t: float) -> float:
+ """Quadratic ease-in-out (slow start and end)."""
+ if t < 0.5:
+ return 2 * t * t
+ return -1 + (4 - 2 * t) * t
+
+
+def ease_in_cubic(t: float) -> float:
+ """Cubic ease-in (slow start)."""
+ return t * t * t
+
+
+def ease_out_cubic(t: float) -> float:
+ """Cubic ease-out (fast start)."""
+ return (t - 1) * (t - 1) * (t - 1) + 1
+
+
+def ease_in_out_cubic(t: float) -> float:
+ """Cubic ease-in-out."""
+ if t < 0.5:
+ return 4 * t * t * t
+ return (t - 1) * (2 * t - 2) * (2 * t - 2) + 1
+
+
+def ease_in_bounce(t: float) -> float:
+ """Bounce ease-in (bouncy start)."""
+ return 1 - ease_out_bounce(1 - t)
+
+def ease_out_bounce(t: float) -> float:
```
This function is important because it defines how Awesome Claude Skills Tutorial: High-Signal Skill Discovery and Reuse for Claude Workflows implements the patterns covered in this chapter.
@@ -211,11 +209,11 @@ This function is important because it defines how Awesome Claude Skills Tutorial
```mermaid
flowchart TD
- A[create_pulse_animation]
- B[create_attention_pulse]
- C[create_breathing_animation]
- D[create_bounce_animation]
- E[create_shake_animation]
+ A[linear]
+ B[ease_in_quad]
+ C[ease_out_quad]
+ D[ease_in_out_quad]
+ E[ease_in_cubic]
A --> B
B --> C
C --> D
diff --git a/tutorials/awesome-claude-skills-tutorial/07-risk-management-and-skill-selection-rubric.md b/tutorials/awesome-claude-skills-tutorial/07-risk-management-and-skill-selection-rubric.md
index 6992be08..a495ce9f 100644
--- a/tutorials/awesome-claude-skills-tutorial/07-risk-management-and-skill-selection-rubric.md
+++ b/tutorials/awesome-claude-skills-tutorial/07-risk-management-and-skill-selection-rubric.md
@@ -40,170 +40,168 @@ You now have a defensible framework for safer skill adoption.
Next: [Chapter 8: Team Adoption and Ongoing Maintenance](08-team-adoption-and-ongoing-maintenance.md)
-## Depth Expansion Playbook
-
## Source Code Walkthrough
### `slack-gif-creator/core/easing.py`
-The `ease_in_cubic` function in [`slack-gif-creator/core/easing.py`](https://github.com/ComposioHQ/awesome-claude-skills/blob/HEAD/slack-gif-creator/core/easing.py) handles a key part of this chapter's functionality:
+The `interpolate` function in [`slack-gif-creator/core/easing.py`](https://github.com/ComposioHQ/awesome-claude-skills/blob/HEAD/slack-gif-creator/core/easing.py) handles a key part of this chapter's functionality:
```py
-def ease_in_cubic(t: float) -> float:
- """Cubic ease-in (slow start)."""
- return t * t * t
-
+def interpolate(start: float, end: float, t: float, easing: str = 'linear') -> float:
+ """
+ Interpolate between two values with easing.
-def ease_out_cubic(t: float) -> float:
- """Cubic ease-out (fast start)."""
- return (t - 1) * (t - 1) * (t - 1) + 1
+ Args:
+ start: Start value
+ end: End value
+ t: Progress from 0.0 to 1.0
+ easing: Name of easing function
+ Returns:
+ Interpolated value
+ """
+ ease_func = get_easing(easing)
+ eased_t = ease_func(t)
+ return start + (end - start) * eased_t
-def ease_in_out_cubic(t: float) -> float:
- """Cubic ease-in-out."""
- if t < 0.5:
- return 4 * t * t * t
- return (t - 1) * (2 * t - 2) * (2 * t - 2) + 1
+def ease_back_in(t: float) -> float:
+ """Back ease-in (slight overshoot backward before forward motion)."""
+ c1 = 1.70158
+ c3 = c1 + 1
+ return c3 * t * t * t - c1 * t * t
-def ease_in_bounce(t: float) -> float:
- """Bounce ease-in (bouncy start)."""
- return 1 - ease_out_bounce(1 - t)
-
-def ease_out_bounce(t: float) -> float:
- """Bounce ease-out (bouncy end)."""
- if t < 1 / 2.75:
- return 7.5625 * t * t
- elif t < 2 / 2.75:
- t -= 1.5 / 2.75
- return 7.5625 * t * t + 0.75
- elif t < 2.5 / 2.75:
+def ease_back_out(t: float) -> float:
+ """Back ease-out (overshoot forward then settle back)."""
+ c1 = 1.70158
+ c3 = c1 + 1
+ return 1 + c3 * pow(t - 1, 3) + c1 * pow(t - 1, 2)
```
This function is important because it defines how Awesome Claude Skills Tutorial: High-Signal Skill Discovery and Reuse for Claude Workflows implements the patterns covered in this chapter.
### `slack-gif-creator/core/easing.py`
-The `ease_out_cubic` function in [`slack-gif-creator/core/easing.py`](https://github.com/ComposioHQ/awesome-claude-skills/blob/HEAD/slack-gif-creator/core/easing.py) handles a key part of this chapter's functionality:
+The `ease_back_in` function in [`slack-gif-creator/core/easing.py`](https://github.com/ComposioHQ/awesome-claude-skills/blob/HEAD/slack-gif-creator/core/easing.py) handles a key part of this chapter's functionality:
```py
-def ease_out_cubic(t: float) -> float:
- """Cubic ease-out (fast start)."""
- return (t - 1) * (t - 1) * (t - 1) + 1
+def ease_back_in(t: float) -> float:
+ """Back ease-in (slight overshoot backward before forward motion)."""
+ c1 = 1.70158
+ c3 = c1 + 1
+ return c3 * t * t * t - c1 * t * t
+
+def ease_back_out(t: float) -> float:
+ """Back ease-out (overshoot forward then settle back)."""
+ c1 = 1.70158
+ c3 = c1 + 1
+ return 1 + c3 * pow(t - 1, 3) + c1 * pow(t - 1, 2)
-def ease_in_out_cubic(t: float) -> float:
- """Cubic ease-in-out."""
+
+def ease_back_in_out(t: float) -> float:
+ """Back ease-in-out (overshoot at both ends)."""
+ c1 = 1.70158
+ c2 = c1 * 1.525
if t < 0.5:
- return 4 * t * t * t
- return (t - 1) * (2 * t - 2) * (2 * t - 2) + 1
-
-
-def ease_in_bounce(t: float) -> float:
- """Bounce ease-in (bouncy start)."""
- return 1 - ease_out_bounce(1 - t)
-
-
-def ease_out_bounce(t: float) -> float:
- """Bounce ease-out (bouncy end)."""
- if t < 1 / 2.75:
- return 7.5625 * t * t
- elif t < 2 / 2.75:
- t -= 1.5 / 2.75
- return 7.5625 * t * t + 0.75
- elif t < 2.5 / 2.75:
- t -= 2.25 / 2.75
- return 7.5625 * t * t + 0.9375
- else:
- t -= 2.625 / 2.75
- return 7.5625 * t * t + 0.984375
+ return (pow(2 * t, 2) * ((c2 + 1) * 2 * t - c2)) / 2
+ return (pow(2 * t - 2, 2) * ((c2 + 1) * (t * 2 - 2) + c2) + 2) / 2
+
+
+def apply_squash_stretch(base_scale: tuple[float, float], intensity: float,
+ direction: str = 'vertical') -> tuple[float, float]:
+ """
+ Calculate squash and stretch scales for more dynamic animation.
+
+ Args:
+ base_scale: (width_scale, height_scale) base scales
```
This function is important because it defines how Awesome Claude Skills Tutorial: High-Signal Skill Discovery and Reuse for Claude Workflows implements the patterns covered in this chapter.
### `slack-gif-creator/core/easing.py`
-The `ease_in_out_cubic` function in [`slack-gif-creator/core/easing.py`](https://github.com/ComposioHQ/awesome-claude-skills/blob/HEAD/slack-gif-creator/core/easing.py) handles a key part of this chapter's functionality:
+The `ease_back_out` function in [`slack-gif-creator/core/easing.py`](https://github.com/ComposioHQ/awesome-claude-skills/blob/HEAD/slack-gif-creator/core/easing.py) handles a key part of this chapter's functionality:
```py
-def ease_in_out_cubic(t: float) -> float:
- """Cubic ease-in-out."""
- if t < 0.5:
- return 4 * t * t * t
- return (t - 1) * (2 * t - 2) * (2 * t - 2) + 1
+def ease_back_out(t: float) -> float:
+ """Back ease-out (overshoot forward then settle back)."""
+ c1 = 1.70158
+ c3 = c1 + 1
+ return 1 + c3 * pow(t - 1, 3) + c1 * pow(t - 1, 2)
-def ease_in_bounce(t: float) -> float:
- """Bounce ease-in (bouncy start)."""
- return 1 - ease_out_bounce(1 - t)
+def ease_back_in_out(t: float) -> float:
+ """Back ease-in-out (overshoot at both ends)."""
+ c1 = 1.70158
+ c2 = c1 * 1.525
+ if t < 0.5:
+ return (pow(2 * t, 2) * ((c2 + 1) * 2 * t - c2)) / 2
+ return (pow(2 * t - 2, 2) * ((c2 + 1) * (t * 2 - 2) + c2) + 2) / 2
-def ease_out_bounce(t: float) -> float:
- """Bounce ease-out (bouncy end)."""
- if t < 1 / 2.75:
- return 7.5625 * t * t
- elif t < 2 / 2.75:
- t -= 1.5 / 2.75
- return 7.5625 * t * t + 0.75
- elif t < 2.5 / 2.75:
- t -= 2.25 / 2.75
- return 7.5625 * t * t + 0.9375
- else:
- t -= 2.625 / 2.75
- return 7.5625 * t * t + 0.984375
+def apply_squash_stretch(base_scale: tuple[float, float], intensity: float,
+ direction: str = 'vertical') -> tuple[float, float]:
+ """
+ Calculate squash and stretch scales for more dynamic animation.
+ Args:
+ base_scale: (width_scale, height_scale) base scales
+ intensity: Squash/stretch intensity (0.0-1.0)
+ direction: 'vertical', 'horizontal', or 'both'
-def ease_in_out_bounce(t: float) -> float:
- """Bounce ease-in-out."""
- if t < 0.5:
+ Returns:
+ (width_scale, height_scale) with squash/stretch applied
+ """
+ width_scale, height_scale = base_scale
```
This function is important because it defines how Awesome Claude Skills Tutorial: High-Signal Skill Discovery and Reuse for Claude Workflows implements the patterns covered in this chapter.
### `slack-gif-creator/core/easing.py`
-The `ease_in_bounce` function in [`slack-gif-creator/core/easing.py`](https://github.com/ComposioHQ/awesome-claude-skills/blob/HEAD/slack-gif-creator/core/easing.py) handles a key part of this chapter's functionality:
+The `ease_back_in_out` function in [`slack-gif-creator/core/easing.py`](https://github.com/ComposioHQ/awesome-claude-skills/blob/HEAD/slack-gif-creator/core/easing.py) handles a key part of this chapter's functionality:
```py
-def ease_in_bounce(t: float) -> float:
- """Bounce ease-in (bouncy start)."""
- return 1 - ease_out_bounce(1 - t)
-
-
-def ease_out_bounce(t: float) -> float:
- """Bounce ease-out (bouncy end)."""
- if t < 1 / 2.75:
- return 7.5625 * t * t
- elif t < 2 / 2.75:
- t -= 1.5 / 2.75
- return 7.5625 * t * t + 0.75
- elif t < 2.5 / 2.75:
- t -= 2.25 / 2.75
- return 7.5625 * t * t + 0.9375
- else:
- t -= 2.625 / 2.75
- return 7.5625 * t * t + 0.984375
-
-
-def ease_in_out_bounce(t: float) -> float:
- """Bounce ease-in-out."""
+def ease_back_in_out(t: float) -> float:
+ """Back ease-in-out (overshoot at both ends)."""
+ c1 = 1.70158
+ c2 = c1 * 1.525
if t < 0.5:
- return ease_in_bounce(t * 2) * 0.5
- return ease_out_bounce(t * 2 - 1) * 0.5 + 0.5
-
-
-def ease_in_elastic(t: float) -> float:
- """Elastic ease-in (spring effect)."""
- if t == 0 or t == 1:
+ return (pow(2 * t, 2) * ((c2 + 1) * 2 * t - c2)) / 2
+ return (pow(2 * t - 2, 2) * ((c2 + 1) * (t * 2 - 2) + c2) + 2) / 2
+
+
+def apply_squash_stretch(base_scale: tuple[float, float], intensity: float,
+ direction: str = 'vertical') -> tuple[float, float]:
+ """
+ Calculate squash and stretch scales for more dynamic animation.
+
+ Args:
+ base_scale: (width_scale, height_scale) base scales
+ intensity: Squash/stretch intensity (0.0-1.0)
+ direction: 'vertical', 'horizontal', or 'both'
+
+ Returns:
+ (width_scale, height_scale) with squash/stretch applied
+ """
+ width_scale, height_scale = base_scale
+
+ if direction == 'vertical':
+ # Compress vertically, expand horizontally (preserve volume)
+ height_scale *= (1 - intensity * 0.5)
+ width_scale *= (1 + intensity * 0.5)
+ elif direction == 'horizontal':
+ # Compress horizontally, expand vertically
```
This function is important because it defines how Awesome Claude Skills Tutorial: High-Signal Skill Discovery and Reuse for Claude Workflows implements the patterns covered in this chapter.
@@ -213,11 +211,11 @@ This function is important because it defines how Awesome Claude Skills Tutorial
```mermaid
flowchart TD
- A[ease_in_cubic]
- B[ease_out_cubic]
- C[ease_in_out_cubic]
- D[ease_in_bounce]
- E[ease_out_bounce]
+ A[interpolate]
+ B[ease_back_in]
+ C[ease_back_out]
+ D[ease_back_in_out]
+ E[apply_squash_stretch]
A --> B
B --> C
C --> D
diff --git a/tutorials/awesome-claude-skills-tutorial/08-team-adoption-and-ongoing-maintenance.md b/tutorials/awesome-claude-skills-tutorial/08-team-adoption-and-ongoing-maintenance.md
index 573fbdbd..b3ba9b0d 100644
--- a/tutorials/awesome-claude-skills-tutorial/08-team-adoption-and-ongoing-maintenance.md
+++ b/tutorials/awesome-claude-skills-tutorial/08-team-adoption-and-ongoing-maintenance.md
@@ -42,170 +42,175 @@ Next steps:
- run one measured pilot across a single workflow category
- establish a monthly skill review and cleanup process
-## Depth Expansion Playbook
-
## Source Code Walkthrough
-### `slack-gif-creator/core/easing.py`
+### `skill-creator/scripts/init_skill.py`
-The `apply_squash_stretch` function in [`slack-gif-creator/core/easing.py`](https://github.com/ComposioHQ/awesome-claude-skills/blob/HEAD/slack-gif-creator/core/easing.py) handles a key part of this chapter's functionality:
+The `main` function in [`skill-creator/scripts/init_skill.py`](https://github.com/ComposioHQ/awesome-claude-skills/blob/HEAD/skill-creator/scripts/init_skill.py) handles a key part of this chapter's functionality:
```py
+def init_skill(name: str, template: str = "default"):
+ """Initialize a new skill directory from a template."""
+ skill_dir = Path("skills") / name
+ skill_dir.mkdir(parents=True, exist_ok=True)
+ copy_template(template, skill_dir)
+ print(f"Skill '{name}' initialized at {skill_dir}")
+```
+## Adoption Strategy and Rollout Planning
-def apply_squash_stretch(base_scale: tuple[float, float], intensity: float,
- direction: str = 'vertical') -> tuple[float, float]:
- """
- Calculate squash and stretch scales for more dynamic animation.
+Rolling out Claude skills across a team requires a phased approach. Start with a pilot group of 2-3 engineers who will validate the skill library against real workflows, then expand once the core patterns are proven.
- Args:
- base_scale: (width_scale, height_scale) base scales
- intensity: Squash/stretch intensity (0.0-1.0)
- direction: 'vertical', 'horizontal', or 'both'
+Key adoption milestones:
+1. **Individual adoption**: Single developer uses skills for personal productivity
+2. **Team sharing**: Skills shared via Git with team-specific customizations
+3. **Organization standard**: Skills become part of the official developer toolkit with review processes
- Returns:
- (width_scale, height_scale) with squash/stretch applied
- """
- width_scale, height_scale = base_scale
-
- if direction == 'vertical':
- # Compress vertically, expand horizontally (preserve volume)
- height_scale *= (1 - intensity * 0.5)
- width_scale *= (1 + intensity * 0.5)
- elif direction == 'horizontal':
- # Compress horizontally, expand vertically
- width_scale *= (1 - intensity * 0.5)
- height_scale *= (1 + intensity * 0.5)
- elif direction == 'both':
- # General squash (both dimensions)
- width_scale *= (1 - intensity * 0.3)
- height_scale *= (1 - intensity * 0.3)
-
- return (width_scale, height_scale)
+## Resources
+
+This skill includes example resource directories that demonstrate how to organize different types of bundled resources:
+
+### scripts/
+Executable code (Python/Bash/etc.) that can be run directly to perform specific operations.
+
+**Examples from other skills:**
+- PDF skill: `fill_fillable_fields.py`, `extract_form_field_info.py` - utilities for PDF manipulation
+- DOCX skill: `document.py`, `utilities.py` - Python modules for document processing
+
+**Appropriate for:** Python scripts, shell scripts, or any executable code that performs automation, data processing, or specific operations.
+
+**Note:** Scripts may be executed without loading into context, but can still be read by Claude for patching or environment adjustments.
+
+### references/
+Documentation and reference material intended to be loaded into context to inform Claude's process and thinking.
+**Examples from other skills:**
+- Product management: `communication.md`, `context_building.md` - detailed workflow guides
+- BigQuery: API reference documentation and query examples
+- Finance: Schema documentation, company policies
```
This function is important because it defines how Awesome Claude Skills Tutorial: High-Signal Skill Discovery and Reuse for Claude Workflows implements the patterns covered in this chapter.
-### `slack-gif-creator/core/easing.py`
+### `slack-gif-creator/core/frame_composer.py`
-The `calculate_arc_motion` function in [`slack-gif-creator/core/easing.py`](https://github.com/ComposioHQ/awesome-claude-skills/blob/HEAD/slack-gif-creator/core/easing.py) handles a key part of this chapter's functionality:
+The `create_blank_frame` function in [`slack-gif-creator/core/frame_composer.py`](https://github.com/ComposioHQ/awesome-claude-skills/blob/HEAD/slack-gif-creator/core/frame_composer.py) handles a key part of this chapter's functionality:
```py
-def calculate_arc_motion(start: tuple[float, float], end: tuple[float, float],
- height: float, t: float) -> tuple[float, float]:
+def create_blank_frame(width: int, height: int, color: tuple[int, int, int] = (255, 255, 255)) -> Image.Image:
"""
- Calculate position along a parabolic arc (natural motion path).
+ Create a blank frame with solid color background.
Args:
- start: (x, y) starting position
- end: (x, y) ending position
- height: Arc height at midpoint (positive = upward)
- t: Progress (0.0-1.0)
+ width: Frame width
+ height: Frame height
+ color: RGB color tuple (default: white)
Returns:
- (x, y) position along arc
+ PIL Image
"""
- x1, y1 = start
- x2, y2 = end
+ return Image.new('RGB', (width, height), color)
- # Linear interpolation for x
- x = x1 + (x2 - x1) * t
- # Parabolic interpolation for y
- # y = start + progress * (end - start) + arc_offset
- # Arc offset peaks at t=0.5
- arc_offset = 4 * height * t * (1 - t)
- y = y1 + (y2 - y1) * t - arc_offset
-
- return (x, y)
+def draw_circle(frame: Image.Image, center: tuple[int, int], radius: int,
+ fill_color: Optional[tuple[int, int, int]] = None,
+ outline_color: Optional[tuple[int, int, int]] = None,
+ outline_width: int = 1) -> Image.Image:
+ """
+ Draw a circle on a frame.
+ Args:
+ frame: PIL Image to draw on
+ center: (x, y) center position
+ radius: Circle radius
+ fill_color: RGB fill color (None for no fill)
+ outline_color: RGB outline color (None for no outline)
+ outline_width: Outline width in pixels
-# Add new easing functions to the convenience mapping
```
This function is important because it defines how Awesome Claude Skills Tutorial: High-Signal Skill Discovery and Reuse for Claude Workflows implements the patterns covered in this chapter.
-### `slack-gif-creator/core/validators.py`
+### `slack-gif-creator/core/frame_composer.py`
-The `check_slack_size` function in [`slack-gif-creator/core/validators.py`](https://github.com/ComposioHQ/awesome-claude-skills/blob/HEAD/slack-gif-creator/core/validators.py) handles a key part of this chapter's functionality:
+The `draw_circle` function in [`slack-gif-creator/core/frame_composer.py`](https://github.com/ComposioHQ/awesome-claude-skills/blob/HEAD/slack-gif-creator/core/frame_composer.py) handles a key part of this chapter's functionality:
```py
-def check_slack_size(gif_path: str | Path, is_emoji: bool = True) -> tuple[bool, dict]:
+def draw_circle(frame: Image.Image, center: tuple[int, int], radius: int,
+ fill_color: Optional[tuple[int, int, int]] = None,
+ outline_color: Optional[tuple[int, int, int]] = None,
+ outline_width: int = 1) -> Image.Image:
"""
- Check if GIF meets Slack size limits.
+ Draw a circle on a frame.
Args:
- gif_path: Path to GIF file
- is_emoji: True for emoji GIF (64KB limit), False for message GIF (2MB limit)
+ frame: PIL Image to draw on
+ center: (x, y) center position
+ radius: Circle radius
+ fill_color: RGB fill color (None for no fill)
+ outline_color: RGB outline color (None for no outline)
+ outline_width: Outline width in pixels
Returns:
- Tuple of (passes: bool, info: dict with details)
+ Modified frame
"""
- gif_path = Path(gif_path)
-
- if not gif_path.exists():
- return False, {'error': f'File not found: {gif_path}'}
-
- size_bytes = gif_path.stat().st_size
- size_kb = size_bytes / 1024
- size_mb = size_kb / 1024
-
- limit_kb = 64 if is_emoji else 2048
- limit_mb = limit_kb / 1024
+ draw = ImageDraw.Draw(frame)
+ x, y = center
+ bbox = [x - radius, y - radius, x + radius, y + radius]
+ draw.ellipse(bbox, fill=fill_color, outline=outline_color, width=outline_width)
+ return frame
- passes = size_kb <= limit_kb
- info = {
- 'size_bytes': size_bytes,
- 'size_kb': size_kb,
- 'size_mb': size_mb,
- 'limit_kb': limit_kb,
+def draw_rectangle(frame: Image.Image, top_left: tuple[int, int], bottom_right: tuple[int, int],
+ fill_color: Optional[tuple[int, int, int]] = None,
+ outline_color: Optional[tuple[int, int, int]] = None,
+ outline_width: int = 1) -> Image.Image:
+ """
```
This function is important because it defines how Awesome Claude Skills Tutorial: High-Signal Skill Discovery and Reuse for Claude Workflows implements the patterns covered in this chapter.
-### `slack-gif-creator/core/validators.py`
+### `slack-gif-creator/core/frame_composer.py`
-The `validate_dimensions` function in [`slack-gif-creator/core/validators.py`](https://github.com/ComposioHQ/awesome-claude-skills/blob/HEAD/slack-gif-creator/core/validators.py) handles a key part of this chapter's functionality:
+The `draw_rectangle` function in [`slack-gif-creator/core/frame_composer.py`](https://github.com/ComposioHQ/awesome-claude-skills/blob/HEAD/slack-gif-creator/core/frame_composer.py) handles a key part of this chapter's functionality:
```py
-def validate_dimensions(width: int, height: int, is_emoji: bool = True) -> tuple[bool, dict]:
+def draw_rectangle(frame: Image.Image, top_left: tuple[int, int], bottom_right: tuple[int, int],
+ fill_color: Optional[tuple[int, int, int]] = None,
+ outline_color: Optional[tuple[int, int, int]] = None,
+ outline_width: int = 1) -> Image.Image:
"""
- Check if dimensions are suitable for Slack.
+ Draw a rectangle on a frame.
Args:
- width: Frame width in pixels
- height: Frame height in pixels
- is_emoji: True for emoji GIF, False for message GIF
+ frame: PIL Image to draw on
+ top_left: (x, y) top-left corner
+ bottom_right: (x, y) bottom-right corner
+ fill_color: RGB fill color (None for no fill)
+ outline_color: RGB outline color (None for no outline)
+ outline_width: Outline width in pixels
Returns:
- Tuple of (passes: bool, info: dict with details)
+ Modified frame
+ """
+ draw = ImageDraw.Draw(frame)
+ draw.rectangle([top_left, bottom_right], fill=fill_color, outline=outline_color, width=outline_width)
+ return frame
+
+
+def draw_line(frame: Image.Image, start: tuple[int, int], end: tuple[int, int],
+ color: tuple[int, int, int] = (0, 0, 0), width: int = 2) -> Image.Image:
"""
- info = {
- 'width': width,
- 'height': height,
- 'is_square': width == height,
- 'type': 'emoji' if is_emoji else 'message'
- }
-
- if is_emoji:
- # Emoji GIFs should be 128x128
- optimal = width == height == 128
- acceptable = width == height and 64 <= width <= 128
-
- info['optimal'] = optimal
- info['acceptable'] = acceptable
-
- if optimal:
- print(f"✓ {width}x{height} - optimal for emoji")
- passes = True
+ Draw a line on a frame.
+
+ Args:
+ frame: PIL Image to draw on
```
This function is important because it defines how Awesome Claude Skills Tutorial: High-Signal Skill Discovery and Reuse for Claude Workflows implements the patterns covered in this chapter.
@@ -215,11 +220,11 @@ This function is important because it defines how Awesome Claude Skills Tutorial
```mermaid
flowchart TD
- A[apply_squash_stretch]
- B[calculate_arc_motion]
- C[check_slack_size]
- D[validate_dimensions]
- E[validate_gif]
+ A[main]
+ B[create_blank_frame]
+ C[draw_circle]
+ D[draw_rectangle]
+ E[draw_line]
A --> B
B --> C
C --> D
diff --git a/tutorials/awslabs-mcp-tutorial/01-getting-started.md b/tutorials/awslabs-mcp-tutorial/01-getting-started.md
index b5129129..a2f24e30 100644
--- a/tutorials/awslabs-mcp-tutorial/01-getting-started.md
+++ b/tutorials/awslabs-mcp-tutorial/01-getting-started.md
@@ -5,88 +5,130 @@ nav_order: 1
parent: awslabs/mcp Tutorial
---
-
# Chapter 1: Getting Started
-Welcome to **Chapter 1: Getting Started**. In this part of **awslabs/mcp Tutorial: Operating a Large-Scale MCP Server Ecosystem for AWS Workloads**, you will build an intuitive mental model first, then move into concrete implementation details and practical production tradeoffs.
-
-
-This chapter gives a practical first-run path through the AWS MCP ecosystem.
+The `awslabs/mcp` repository is a monorepo containing 65+ production-grade MCP servers for AWS services, maintained by AWS Labs. Each server wraps one or more AWS service APIs as MCP tools, allowing LLM agents in Claude Desktop, Cursor, Amazon Q Developer, and other MCP clients to perform AWS operations through natural language.
## Learning Goals
-- identify one or two servers that match immediate needs
-- configure installation for your primary MCP host client
-- validate first tool calls with minimal environment risk
-- establish baseline profiles and runtime settings
+- Identify one or two servers that match your immediate needs
+- Configure installation for your primary MCP host client
+- Validate first tool calls with minimal environment risk
+- Establish baseline profiles and runtime settings
-## Fast Start Loop
+## Repository Overview
-1. select an initial server (for example documentation, API, or IaC)
-2. install via your MCP host pattern (`uvx`-based paths are common)
-3. set minimal environment variables (region/profile/log level)
-4. run a low-risk read-only query end to end
-5. capture this configuration as your baseline template
+```mermaid
+graph TD
+ REPO[awslabs/mcp monorepo]
+ REPO --> SRC[src/ — 65+ MCP servers\none directory per server]
+ REPO --> DOCS[docusaurus/ — documentation site]
+ REPO --> SCRIPTS[scripts/ — CI tooling\nverify_tool_names.py]
+ REPO --> VIBE[VIBE_CODING_TIPS_TRICKS.md]
+ REPO --> DESIGN[DESIGN_GUIDELINES.md]
+ REPO --> DEV[DEVELOPER_GUIDE.md]
+
+ SRC --> CORE[core-mcp-server\nOrchestration + meta-server]
+ SRC --> DOCS_SRV[aws-documentation-mcp-server\nAWS service docs + search]
+ SRC --> API[aws-api-mcp-server\nAWS API discovery + execution]
+ SRC --> IaC[terraform-mcp-server\ncdk-mcp-server · cfn-mcp-server]
+ SRC --> DATA[Multiple DB servers\ndynamodb · postgres · mysql · redis...]
+```
-## Source References
+## Server Categories at a Glance
-- [Repository README](https://github.com/awslabs/mcp/blob/main/README.md)
-- [AWS Documentation MCP Server README](https://github.com/awslabs/mcp/blob/main/src/aws-documentation-mcp-server/README.md)
-- [AWS API MCP Server README](https://github.com/awslabs/mcp/blob/main/src/aws-api-mcp-server/README.md)
+| Category | Example Servers |
+|:---------|:---------------|
+| Documentation & discovery | `aws-documentation-mcp-server`, `aws-api-mcp-server`, `aws-knowledge-mcp-server` |
+| Infrastructure as Code | `terraform-mcp-server`, `cdk-mcp-server`, `cfn-mcp-server`, `aws-iac-mcp-server` |
+| Compute | `eks-mcp-server`, `ecs-mcp-server`, `lambda-tool-mcp-server` |
+| Data stores | `dynamodb-mcp-server`, `postgres-mcp-server`, `mysql-mcp-server`, `aurora-dsql-mcp-server` |
+| AI/ML | `bedrock-kb-retrieval-mcp-server`, `amazon-bedrock-agentcore-mcp-server`, `sagemaker-ai-mcp-server` |
+| Observability | `cloudwatch-mcp-server`, `cloudtrail-mcp-server`, `prometheus-mcp-server` |
+| Cost & billing | `cost-explorer-mcp-server`, `billing-cost-management-mcp-server`, `aws-pricing-mcp-server` |
+| Security | `iam-mcp-server`, `well-architected-security-mcp-server` |
-## Summary
+## Fast Start Loop
-You now have a stable onboarding path for first AWS MCP server usage.
+```mermaid
+flowchart TD
+ SELECT[1. Select a server for your task]
+ SELECT --> INSTALL[2. Install via uvx pattern\nor Docker]
+ INSTALL --> AUTH[3. Configure AWS credentials\nor profile]
+ AUTH --> TEST[4. Run a low-risk read-only query]
+ TEST --> BASELINE[5. Capture config as team baseline]
+ BASELINE --> EXPAND[Expand to more servers as needed]
+```
-Next: [Chapter 2: Server Catalog and Role Composition](02-server-catalog-and-role-composition.md)
+### Step 1: Select Your First Server
-## Source Code Walkthrough
+For most AWS users, start with:
+- **`aws-documentation-mcp-server`**: Search AWS service documentation — safe, read-only, no AWS credentials needed for basic usage
+- **`aws-api-mcp-server`**: Discover and call AWS APIs directly — requires AWS credentials
+- **`core-mcp-server`**: Meta-server for orchestrating other servers
-### `scripts/verify_tool_names.py`
+### Step 2: Install
-The `extract_package_name` function in [`scripts/verify_tool_names.py`](https://github.com/awslabs/mcp/blob/HEAD/scripts/verify_tool_names.py) handles a key part of this chapter's functionality:
+All servers follow the same `uvx` pattern:
-```py
+```bash
+# Run directly (no install step)
+uvx awslabs.aws-documentation-mcp-server
+# Or install into a project
+uv add awslabs.aws-documentation-mcp-server
+```
-def extract_package_name(pyproject_path: Path) -> str:
- """Extract the package name from pyproject.toml file."""
- try:
- with open(pyproject_path, 'rb') as f:
- data = tomllib.load(f)
- return data['project']['name']
- except (FileNotFoundError, KeyError) as e:
- raise ValueError(f'Failed to extract package name from {pyproject_path}: {e}')
- except Exception as e:
- if 'TOML' in str(type(e).__name__):
- raise ValueError(f'Failed to parse TOML file {pyproject_path}: {e}')
- else:
- raise ValueError(f'Failed to extract package name from {pyproject_path}: {e}')
+### Step 3: Configure Claude Desktop
+
+```json
+{
+ "mcpServers": {
+ "awslabs-docs": {
+ "command": "uvx",
+ "args": ["awslabs.aws-documentation-mcp-server"],
+ "env": {
+ "AWS_PROFILE": "your-profile",
+ "AWS_REGION": "us-east-1",
+ "MCP_LOG_LEVEL": "WARNING"
+ }
+ }
+ }
+}
+```
+### Step 4: Validate with a Read-Only Query
-def convert_package_name_to_server_format(package_name: str) -> str:
- """Convert package name to the format used in fully qualified tool names.
+```
+User: "Search AWS documentation for Lambda function timeout limits"
+→ aws-documentation-mcp-server uses search tools to find relevant docs
+→ Returns documentation content as markdown
- Examples:
- awslabs.git-repo-research-mcp-server -> git_repo_research_mcp_server
- awslabs.nova-canvas-mcp-server -> nova_canvas_mcp_server
- """
- # Remove 'awslabs.' prefix if present
- if package_name.startswith('awslabs.'):
- package_name = package_name[8:]
+User: "What AWS APIs are available for ECS task management?"
+→ aws-api-mcp-server discovers relevant API operations
+→ Returns API names, parameters, and documentation
+```
- # Replace hyphens with underscores
- return package_name.replace('-', '_')
+## Fully Qualified Tool Names
+The repository enforces a naming convention for all tool names. The `scripts/verify_tool_names.py` CI tool validates this:
```
+Format: awslabs___
+Example: awslabsaws_documentation_mcp_server___search_documentation
+```
+
+This prevents tool name collisions when multiple AWS MCP servers are loaded simultaneously in the same MCP client.
+
+## Source References
-This function is important because it defines how awslabs/mcp Tutorial: Operating a Large-Scale MCP Server Ecosystem for AWS Workloads implements the patterns covered in this chapter.
+- [Repository README](https://github.com/awslabs/mcp/blob/main/README.md)
+- [AWS Documentation MCP Server README](https://github.com/awslabs/mcp/blob/main/src/aws-documentation-mcp-server/README.md)
+- [AWS API MCP Server README](https://github.com/awslabs/mcp/blob/main/src/aws-api-mcp-server/README.md)
+- [Core MCP Server README](https://github.com/awslabs/mcp/blob/main/src/core-mcp-server/README.md)
+## Summary
-## How These Components Connect
+The `awslabs/mcp` repo provides a catalog of 65+ AWS-focused MCP servers, each installable via `uvx`. Start with the documentation or API discovery servers for read-only exploration. Use the fully qualified tool name convention (`awslabs___`) to understand how tools are namespaced when multiple servers are active simultaneously.
-```mermaid
-flowchart TD
- A[extract_package_name]
-```
+Next: [Chapter 2: Server Catalog and Role Composition](02-server-catalog-and-role-composition.md)
diff --git a/tutorials/awslabs-mcp-tutorial/02-server-catalog-and-role-composition.md b/tutorials/awslabs-mcp-tutorial/02-server-catalog-and-role-composition.md
index dc878300..037d1bcd 100644
--- a/tutorials/awslabs-mcp-tutorial/02-server-catalog-and-role-composition.md
+++ b/tutorials/awslabs-mcp-tutorial/02-server-catalog-and-role-composition.md
@@ -5,84 +5,202 @@ nav_order: 2
parent: awslabs/mcp Tutorial
---
-
# Chapter 2: Server Catalog and Role Composition
-Welcome to **Chapter 2: Server Catalog and Role Composition**. In this part of **awslabs/mcp Tutorial: Operating a Large-Scale MCP Server Ecosystem for AWS Workloads**, you will build an intuitive mental model first, then move into concrete implementation details and practical production tradeoffs.
-
-
-This chapter explains how to navigate and compose capabilities from a large server catalog.
+The `awslabs/mcp` catalog contains 65+ servers. Loading all of them simultaneously would overwhelm any MCP client's context window with tool definitions. This chapter explains how to select servers by workflow role and compose them deliberately.
## Learning Goals
-- map server choices to concrete job-to-be-done categories
-- avoid loading unnecessary servers and tools for each workflow
-- use role-based composition patterns where available
-- keep context and tool surface area intentionally constrained
+- Map server choices to concrete job categories
+- Avoid loading unnecessary servers and their tool surface areas
+- Use role-based composition patterns for complex workflows
+- Keep context and tool surface area intentionally constrained
-## Selection Heuristic
+## The Context Window Problem
-Start with the smallest server set that satisfies your workflow. Expand only when a measurable capability gap appears. More servers is not automatically better.
+```mermaid
+graph LR
+ ALL[All 65+ servers loaded]
+ ALL --> TOOLS[500+ tool definitions\nin client context]
+ TOOLS --> PROBLEM[LLM selection quality degrades\nContext fills with irrelevant tools]
+
+ MINIMAL[2-3 targeted servers]
+ MINIMAL --> FEW[10-30 relevant tools]
+ FEW --> GOOD[LLM can select correctly\nFaster, cheaper, more accurate]
+```
-## Source References
+Each tool definition consumes tokens in the LLM context. Loading servers you don't need for a task directly degrades tool selection quality.
-- [Repository README Catalog](https://github.com/awslabs/mcp/blob/main/README.md)
-- [Core MCP Server README](https://github.com/awslabs/mcp/blob/main/src/core-mcp-server/README.md)
-- [Samples Overview](https://github.com/awslabs/mcp/blob/main/samples/README.md)
-
-## Summary
+## Role-Based Server Composition
-You now have a strategy for selecting servers without overwhelming client context.
+### Role: AWS Research / Documentation
-Next: [Chapter 3: Transport and Client Integration Patterns](03-transport-and-client-integration-patterns.md)
+Use when you need to understand AWS services, find documentation, or explore API options.
-## Source Code Walkthrough
+```json
+{
+ "mcpServers": {
+ "aws-docs": {
+ "command": "uvx",
+ "args": ["awslabs.aws-documentation-mcp-server"]
+ },
+ "aws-api-discovery": {
+ "command": "uvx",
+ "args": ["awslabs.aws-api-mcp-server"],
+ "env": { "AWS_PROFILE": "readonly" }
+ }
+ }
+}
+```
-### `scripts/verify_tool_names.py`
+### Role: Infrastructure as Code Developer
+
+Use when generating or reviewing Terraform, CDK, or CloudFormation.
+
+```json
+{
+ "mcpServers": {
+ "terraform": {
+ "command": "uvx",
+ "args": ["awslabs.terraform-mcp-server"]
+ },
+ "cdk": {
+ "command": "uvx",
+ "args": ["awslabs.cdk-mcp-server"]
+ },
+ "aws-docs": {
+ "command": "uvx",
+ "args": ["awslabs.aws-documentation-mcp-server"]
+ }
+ }
+}
+```
-The `convert_package_name_to_server_format` function in [`scripts/verify_tool_names.py`](https://github.com/awslabs/mcp/blob/HEAD/scripts/verify_tool_names.py) handles a key part of this chapter's functionality:
+### Role: Data / Database Operations
+
+Use when working with AWS managed databases.
+
+```json
+{
+ "mcpServers": {
+ "dynamodb": {
+ "command": "uvx",
+ "args": ["awslabs.dynamodb-mcp-server"],
+ "env": { "AWS_PROFILE": "dev", "AWS_REGION": "us-east-1" }
+ },
+ "aurora-dsql": {
+ "command": "uvx",
+ "args": ["awslabs.aurora-dsql-mcp-server"],
+ "env": { "AWS_PROFILE": "dev" }
+ }
+ }
+}
+```
-```py
+### Role: Observability / Incident Response
+
+Use during incident investigation or operational troubleshooting.
+
+```json
+{
+ "mcpServers": {
+ "cloudwatch": {
+ "command": "uvx",
+ "args": ["awslabs.cloudwatch-mcp-server"],
+ "env": { "AWS_PROFILE": "readonly", "AWS_REGION": "us-east-1" }
+ },
+ "cloudtrail": {
+ "command": "uvx",
+ "args": ["awslabs.cloudtrail-mcp-server"],
+ "env": { "AWS_PROFILE": "readonly" }
+ }
+ }
+}
+```
+## Server Catalog by Category
-def convert_package_name_to_server_format(package_name: str) -> str:
- """Convert package name to the format used in fully qualified tool names.
+```mermaid
+graph TD
+ CATALOG[awslabs/mcp Server Catalog]
+
+ CATALOG --> DISCOVERY[Documentation & Discovery]
+ DISCOVERY --> D1[aws-documentation-mcp-server]
+ DISCOVERY --> D2[aws-api-mcp-server]
+ DISCOVERY --> D3[aws-knowledge-mcp-server]
+ DISCOVERY --> D4[openapi-mcp-server]
+
+ CATALOG --> IAC[Infrastructure as Code]
+ IAC --> I1[terraform-mcp-server]
+ IAC --> I2[cdk-mcp-server]
+ IAC --> I3[cfn-mcp-server]
+ IAC --> I4[aws-iac-mcp-server]
+
+ CATALOG --> COMPUTE[Compute & Containers]
+ COMPUTE --> C1[eks-mcp-server]
+ COMPUTE --> C2[ecs-mcp-server]
+ COMPUTE --> C3[lambda-tool-mcp-server]
+ COMPUTE --> C4[aws-serverless-mcp-server]
+
+ CATALOG --> AIML[AI & ML]
+ AIML --> A1[bedrock-kb-retrieval-mcp-server]
+ AIML --> A2[amazon-bedrock-agentcore-mcp-server]
+ AIML --> A3[sagemaker-ai-mcp-server]
+ AIML --> A4[nova-canvas-mcp-server]
+
+ CATALOG --> OBS[Observability]
+ OBS --> O1[cloudwatch-mcp-server]
+ OBS --> O2[cloudtrail-mcp-server]
+ OBS --> O3[cloudwatch-applicationsignals-mcp-server]
+ OBS --> O4[prometheus-mcp-server]
+```
- Examples:
- awslabs.git-repo-research-mcp-server -> git_repo_research_mcp_server
- awslabs.nova-canvas-mcp-server -> nova_canvas_mcp_server
- """
- # Remove 'awslabs.' prefix if present
- if package_name.startswith('awslabs.'):
- package_name = package_name[8:]
+## Key Individual Servers
- # Replace hyphens with underscores
- return package_name.replace('-', '_')
+### `core-mcp-server`
+The orchestration meta-server. It has awareness of the other servers in the ecosystem and can guide which server to activate for a given task. Load it alongside domain-specific servers for complex workflows.
-def calculate_fully_qualified_name(server_name: str, tool_name: str) -> str:
- """Calculate the fully qualified tool name as used by MCP clients.
+### `aws-documentation-mcp-server`
- Format: awslabs___
+Searches and retrieves AWS official documentation. No AWS credentials required for basic operation. Always safe to include — adds documentation context without risk of mutating resources.
- Examples:
- awslabs + git_repo_research_mcp_server + ___ + search_repos_on_github
- = awslabsgit_repo_research_mcp_server___search_repos_on_github
- """
- return f'awslabs{server_name}___{tool_name}'
+### `aws-api-mcp-server`
+Discovers and can invoke AWS APIs directly through the AWS SDK. Requires AWS credentials. Can perform write operations — use with a read-only IAM profile when exploring.
-def find_tool_decorators(file_path: Path) -> List[Tuple[str, int]]:
- """Find all tool definitions in a Python file and extract tool names.
+### `aws-iac-mcp-server`
-```
+A unified IaC server that wraps Terraform, CDK, and CloudFormation patterns. Use instead of loading all three IaC servers separately when you need multi-tool IaC support.
-This function is important because it defines how awslabs/mcp Tutorial: Operating a Large-Scale MCP Server Ecosystem for AWS Workloads implements the patterns covered in this chapter.
+### `cloudwatch-mcp-server`
+Retrieves CloudWatch metrics, logs, alarms, and dashboards. Requires CloudWatch read permissions. One of the most valuable servers for operational troubleshooting.
-## How These Components Connect
+## Selection Heuristic
```mermaid
flowchart TD
- A[convert_package_name_to_server_format]
+ TASK[Identify task]
+ TASK --> Q1{Read-only research\nor documentation?}
+ Q1 -- Yes --> DOCS[aws-documentation-mcp-server\nNo mutation risk]
+ Q1 -- No --> Q2{Infrastructure\nplanning/generation?}
+ Q2 -- Yes --> IAC[terraform or cdk or cfn\nor aws-iac-mcp-server]
+ Q2 -- No --> Q3{Operational\ninvestigation?}
+ Q3 -- Yes --> OBS[cloudwatch + cloudtrail\nwith read-only credentials]
+ Q3 -- No --> Q4{Data/database\nwork?}
+ Q4 -- Yes --> DATA[Specific DB server\ne.g., dynamodb, postgres]
+ Q4 -- No --> CORE[core-mcp-server\nfor orchestration guidance]
```
+
+## Source References
+
+- [Repository README Catalog](https://github.com/awslabs/mcp/blob/main/README.md)
+- [Core MCP Server README](https://github.com/awslabs/mcp/blob/main/src/core-mcp-server/README.md)
+- [Design Guidelines](https://github.com/awslabs/mcp/blob/main/DESIGN_GUIDELINES.md)
+
+## Summary
+
+Load the minimal server set for each workflow role. Documentation and discovery servers are always safe to include (read-only, no AWS credential risk). IaC servers are design-time tools; use them with explicit human approval gates for any `apply` or `deploy` operations. Observability servers should use read-only IAM profiles. Never load all 65+ servers simultaneously — context quality degrades rapidly with tool proliferation.
+
+Next: [Chapter 3: Transport and Client Integration Patterns](03-transport-and-client-integration-patterns.md)
diff --git a/tutorials/awslabs-mcp-tutorial/03-transport-and-client-integration-patterns.md b/tutorials/awslabs-mcp-tutorial/03-transport-and-client-integration-patterns.md
index e444df2b..3001f6b9 100644
--- a/tutorials/awslabs-mcp-tutorial/03-transport-and-client-integration-patterns.md
+++ b/tutorials/awslabs-mcp-tutorial/03-transport-and-client-integration-patterns.md
@@ -5,84 +5,200 @@ nav_order: 3
parent: awslabs/mcp Tutorial
---
-
# Chapter 3: Transport and Client Integration Patterns
-Welcome to **Chapter 3: Transport and Client Integration Patterns**. In this part of **awslabs/mcp Tutorial: Operating a Large-Scale MCP Server Ecosystem for AWS Workloads**, you will build an intuitive mental model first, then move into concrete implementation details and practical production tradeoffs.
-
-
-This chapter covers integration patterns across IDE and chat MCP clients.
+All `awslabs/mcp` servers support stdio transport by default, which is the right choice for local desktop clients. Some servers also support Docker-based deployment and HTTP transports. This chapter covers configuration patterns for each major MCP host client.
## Learning Goals
-- understand default transport assumptions in the ecosystem
-- map client configuration differences across hosts
-- evaluate when HTTP modes are available for specific servers
-- avoid brittle configuration drift across teams
-
-## Integration Rule
+- Understand default transport assumptions across the ecosystem
+- Map configuration differences across MCP host clients (Claude Desktop, Cursor, Amazon Q Developer, Cline)
+- Evaluate when Docker or HTTP modes are appropriate for specific servers
+- Avoid brittle configuration drift across teams
-Standardize one primary transport/client path per environment first, then add alternative modes only when you have a concrete operational requirement.
+## Default Transport: Stdio via uvx
-## Source References
+All servers use stdio transport by default. The MCP host spawns the server as a subprocess via `uvx`:
-- [Repository README Transport Section](https://github.com/awslabs/mcp/blob/main/README.md)
-- [AWS API MCP Server README](https://github.com/awslabs/mcp/blob/main/src/aws-api-mcp-server/README.md)
-- [AWS Documentation MCP Server README](https://github.com/awslabs/mcp/blob/main/src/aws-documentation-mcp-server/README.md)
+```mermaid
+graph LR
+ HOST[MCP Host\nClaude Desktop · Cursor · Q Developer]
+ HOST -->|spawn subprocess| SERVER[uvx awslabs.server-name\nPython process]
+ SERVER <-->|stdin/stdout JSON-RPC| HOST
+ SERVER --> AWS[AWS APIs via boto3]
+```
-## Summary
+The `uvx` command downloads and runs the server in an isolated virtualenv without a permanent install step. This makes version control easy — pin the version in the `args`:
-You now have a repeatable integration pattern for client configuration and transport selection.
+```json
+{
+ "command": "uvx",
+ "args": ["awslabs.aws-documentation-mcp-server@1.3.0"]
+}
+```
-Next: [Chapter 4: Infrastructure and IaC Workflows](04-infrastructure-and-iac-workflows.md)
+## Claude Desktop Configuration
+
+Config file: `~/Library/Application Support/Claude/claude_desktop_config.json` (macOS)
+
+```json
+{
+ "mcpServers": {
+ "aws-docs": {
+ "command": "uvx",
+ "args": ["awslabs.aws-documentation-mcp-server"],
+ "env": {
+ "AWS_PROFILE": "your-profile",
+ "AWS_REGION": "us-east-1",
+ "MCP_LOG_LEVEL": "WARNING"
+ }
+ },
+ "terraform": {
+ "command": "uvx",
+ "args": ["awslabs.terraform-mcp-server"],
+ "env": {
+ "AWS_PROFILE": "infra-dev",
+ "ALLOW_WRITE": "true"
+ }
+ }
+ }
+}
+```
-## Source Code Walkthrough
+## Cursor IDE Configuration
+
+Cursor supports global (`~/.cursor/mcp.json`) and project-level (`.cursor/mcp.json`) MCP configs:
+
+```json
+{
+ "mcpServers": {
+ "aws-cdk": {
+ "command": "uvx",
+ "args": ["awslabs.cdk-mcp-server"],
+ "env": {
+ "AWS_PROFILE": "dev",
+ "AWS_REGION": "us-east-1"
+ }
+ }
+ }
+}
+```
-### `scripts/verify_tool_names.py`
+Project-level configs are useful for different AWS profiles per project.
+
+## Amazon Q Developer
+
+Amazon Q Developer has native MCP support. Configure via the Q Developer IDE extension settings or the `~/.aws/amazonq/mcp.json` configuration file:
+
+```json
+{
+ "mcpServers": {
+ "aws-docs": {
+ "command": "uvx",
+ "args": ["awslabs.aws-documentation-mcp-server"]
+ },
+ "cloudwatch": {
+ "command": "uvx",
+ "args": ["awslabs.cloudwatch-mcp-server"],
+ "env": {
+ "AWS_PROFILE": "readonly",
+ "AWS_REGION": "us-east-1"
+ }
+ }
+ }
+}
+```
-The `calculate_fully_qualified_name` function in [`scripts/verify_tool_names.py`](https://github.com/awslabs/mcp/blob/HEAD/scripts/verify_tool_names.py) handles a key part of this chapter's functionality:
+## Cline (VS Code Extension)
-```py
+Cline supports MCP servers configured through its settings panel. The `docs/images/root-readme/` directory in the repo contains screenshots showing the Cline configuration workflow:
+```mermaid
+graph LR
+ CLINE[Cline VS Code Extension]
+ CLINE --> SETTINGS[Extension Settings\nMCP Servers panel]
+ SETTINGS --> ADD[Add server:\nName: aws-docs\nCommand: uvx\nArgs: awslabs.aws-documentation-mcp-server]
+ ADD --> ENV[Set env vars:\nAWS_PROFILE, AWS_REGION]
+```
-def calculate_fully_qualified_name(server_name: str, tool_name: str) -> str:
- """Calculate the fully qualified tool name as used by MCP clients.
+## Docker Transport (Alternative)
+
+Some servers provide Dockerfiles for containerized deployment. This is useful when:
+- You cannot install Python/uv on the host machine
+- You need a pinned, reproducible server environment
+- You want to share a server instance across team members
+
+```json
+{
+ "mcpServers": {
+ "aws-docs-docker": {
+ "command": "docker",
+ "args": [
+ "run", "--rm", "-i",
+ "-e", "AWS_PROFILE=default",
+ "-v", "/root/.aws:/root/.aws:ro",
+ "awslabs/aws-documentation-mcp-server:latest"
+ ]
+ }
+ }
+}
+```
- Format: awslabs___
+Note: AWS credentials must be mounted or injected via environment when using Docker. The `-v /root/.aws:/root/.aws:ro` approach mounts credentials read-only into the container.
- Examples:
- awslabs + git_repo_research_mcp_server + ___ + search_repos_on_github
- = awslabsgit_repo_research_mcp_server___search_repos_on_github
- """
- return f'awslabs{server_name}___{tool_name}'
+## Environment Variable Standardization
+All `awslabs/mcp` servers follow consistent environment variable conventions:
-def find_tool_decorators(file_path: Path) -> List[Tuple[str, int]]:
- """Find all tool definitions in a Python file and extract tool names.
+| Variable | Purpose | Default |
+|:---------|:--------|:--------|
+| `AWS_PROFILE` | AWS credentials profile | `default` |
+| `AWS_REGION` | AWS region | `us-east-1` |
+| `MCP_LOG_LEVEL` | Server log verbosity | `WARNING` |
+| `ALLOW_WRITE` | Enable mutating operations | `false` (server-dependent) |
- Supports all tool registration patterns:
- - Pattern 1: @mcp.tool(name='tool_name')
- - Pattern 2: @mcp.tool() (uses function name)
- - Pattern 3: app.tool('tool_name')(function)
- - Pattern 4: mcp.tool()(function) (uses function name)
- - Pattern 5: self.mcp.tool(name='tool_name')(function)
- - Pattern 6: @.tool(name='tool_name')
+```mermaid
+graph LR
+ ENV[Environment Variables]
+ ENV --> CRED[Credentials:\nAWS_PROFILE\nAWS_ACCESS_KEY_ID\nAWS_SECRET_ACCESS_KEY]
+ ENV --> REGION[Region:\nAWS_REGION\nAWS_DEFAULT_REGION]
+ ENV --> LOG[Logging:\nMCP_LOG_LEVEL]
+ ENV --> SAFETY[Safety:\nALLOW_WRITE\nREADONLY mode flags]
+```
- Returns:
- List of tuples: (tool_name, line_number)
- """
- try:
- with open(file_path, 'r', encoding='utf-8') as f:
- content = f.read()
- except (FileNotFoundError, UnicodeDecodeError):
+## Team Configuration Standardization
+
+To prevent drift across team environments, use a shared configuration template:
+
+```bash
+# Team shared config template in git repo
+cat .mcp/config-template.json
+{
+ "mcpServers": {
+ "aws-docs": {
+ "command": "uvx",
+ "args": ["awslabs.aws-documentation-mcp-server@${MCP_DOCS_VERSION}"],
+ "env": {
+ "AWS_PROFILE": "${AWS_PROFILE}",
+ "AWS_REGION": "${AWS_REGION:-us-east-1}"
+ }
+ }
+ }
+}
+
+# Developer runs a setup script that substitutes variables and
+# copies to the correct client config location
```
-This function is important because it defines how awslabs/mcp Tutorial: Operating a Large-Scale MCP Server Ecosystem for AWS Workloads implements the patterns covered in this chapter.
+## Source References
+- [Repository README Transport Section](https://github.com/awslabs/mcp/blob/main/README.md)
+- [AWS API MCP Server README](https://github.com/awslabs/mcp/blob/main/src/aws-api-mcp-server/README.md)
+- [AWS Documentation MCP Server README](https://github.com/awslabs/mcp/blob/main/src/aws-documentation-mcp-server/README.md)
+- [Cline integration screenshots](https://github.com/awslabs/mcp/tree/main/docs/images/root-readme)
-## How These Components Connect
+## Summary
-```mermaid
-flowchart TD
- A[calculate_fully_qualified_name]
-```
+All `awslabs/mcp` servers run via `uvx` on stdio transport — the standard pattern for desktop MCP clients. Configurations differ only in the JSON config file location per client (Claude Desktop, Cursor, Amazon Q Developer, Cline). Docker transport is available for teams without Python/uv or for reproducible shared deployments. Standardize environment variables (`AWS_PROFILE`, `AWS_REGION`, `MCP_LOG_LEVEL`) across all server configs to prevent drift.
+
+Next: [Chapter 4: Infrastructure and IaC Workflows](04-infrastructure-and-iac-workflows.md)
diff --git a/tutorials/awslabs-mcp-tutorial/04-infrastructure-and-iac-workflows.md b/tutorials/awslabs-mcp-tutorial/04-infrastructure-and-iac-workflows.md
index 82b421d9..67a04855 100644
--- a/tutorials/awslabs-mcp-tutorial/04-infrastructure-and-iac-workflows.md
+++ b/tutorials/awslabs-mcp-tutorial/04-infrastructure-and-iac-workflows.md
@@ -5,84 +5,139 @@ nav_order: 4
parent: awslabs/mcp Tutorial
---
-
# Chapter 4: Infrastructure and IaC Workflows
-Welcome to **Chapter 4: Infrastructure and IaC Workflows**. In this part of **awslabs/mcp Tutorial: Operating a Large-Scale MCP Server Ecosystem for AWS Workloads**, you will build an intuitive mental model first, then move into concrete implementation details and practical production tradeoffs.
+The `awslabs/mcp` repo includes dedicated servers for each major AWS IaC tool: Terraform, AWS CDK, CloudFormation, and a unified `aws-iac-mcp-server`. This chapter maps each server to its use case, explains what operations it enables, and establishes governance boundaries for production infrastructure.
+## Learning Goals
-This chapter focuses on infrastructure automation servers (Terraform, CloudFormation, CDK, and related flows).
+- Align IaC server choice to your existing delivery stack
+- Integrate security scanning into generated infrastructure workflows
+- Distinguish deprecated versus preferred server paths
+- Keep deployment ownership and approval boundaries explicit
-## Learning Goals
+## IaC Server Options
-- align IaC server choice to your existing delivery stack
-- integrate security scanning into generated infrastructure workflows
-- distinguish deprecated versus preferred server paths
-- keep deployment ownership and approval boundaries explicit
+```mermaid
+graph TD
+ IAC[IaC MCP Servers]
+ IAC --> TF[terraform-mcp-server\nTerraform plan, validate, docs]
+ IAC --> CDK[cdk-mcp-server\nAWS CDK constructs + patterns]
+ IAC --> CFN[cfn-mcp-server\nCloudFormation templates]
+ IAC --> UNIFIED[aws-iac-mcp-server\nUnified multi-tool IaC server]
+
+ TF --> TF1[Tools: validate · plan\ndoc lookup · module discovery]
+ CDK --> CDK1[Tools: CDK constructs search\nbest practices · L1/L2/L3 guidance]
+ CFN --> CFN1[Tools: cfn-lint integration\ntemplate validation · resource docs]
+ UNIFIED --> U1[Wraps multiple tools\nSingle server for multi-stack projects]
+```
-## IaC Strategy
+## `terraform-mcp-server`
-Use server outputs to accelerate drafting and validation, but keep infrastructure approvals, production applies, and policy exceptions under explicit human governance.
+The Terraform MCP server enables AI-assisted Terraform workflows. Key tools:
-## Source References
+- `search_terraform_registry`: Search for providers, modules, and resources in the Terraform Registry
+- `resolve_terraform_registry_module`: Get module documentation and usage examples
+- `run_checkov_scan`: Run Checkov security scanning on Terraform code
+- `get_aws_provider_resources`: Discover available AWS Terraform resources
-- [AWS Terraform MCP Server README](https://github.com/awslabs/mcp/blob/main/src/terraform-mcp-server/README.md)
-- [Repository README Infrastructure Sections](https://github.com/awslabs/mcp/blob/main/README.md)
-- [Design Guidelines](https://github.com/awslabs/mcp/blob/main/DESIGN_GUIDELINES.md)
+Typical workflow:
+```
+1. LLM: "Create a Terraform module for an EKS cluster with managed node groups"
+2. terraform-mcp-server: search_terraform_registry for aws_eks_cluster
+3. LLM: generates Terraform code using search results
+4. terraform-mcp-server: run_checkov_scan on generated code
+5. LLM: reviews security findings, suggests fixes
+6. Human: reviews final plan before terraform apply
+```
-## Summary
+## `cdk-mcp-server`
-You now understand how to use IaC-focused MCP servers without weakening deployment controls.
+The CDK MCP server provides AWS CDK context to AI coding assistants. Key capabilities:
-Next: [Chapter 5: Data, Knowledge, and Agent Workflows](05-data-knowledge-and-agent-workflows.md)
+- CDK construct documentation retrieval (L1, L2, L3)
+- AWS Solutions Constructs pattern guidance
+- CDK Nag security check integration
+- Well-Architected Framework alignment for CDK patterns
+
+```mermaid
+flowchart LR
+ CDK_SERVER[cdk-mcp-server]
+ CDK_SERVER --> DOCS[CDK API documentation\nAll L1/L2/L3 constructs]
+ CDK_SERVER --> PATTERNS[AWS Solutions Constructs\npre-built patterns]
+ CDK_SERVER --> NAG[CDK Nag\nsecurity rule checking]
+ CDK_SERVER --> WA[Well-Architected\nalignment checks]
+```
+
+## `cfn-mcp-server`
+
+CloudFormation-specific server for teams using CFN templates. Integrates with `cfn-lint` for template validation.
-## Source Code Walkthrough
+## `aws-iac-mcp-server`
-### `scripts/verify_tool_names.py`
+The unified IaC server for teams that use multiple IaC tools. Useful when:
+- Your project mixes Terraform and CDK
+- You want a single server entry instead of managing three separately
+- You need a coordinated view across IaC tools
-The `find_tool_decorators` function in [`scripts/verify_tool_names.py`](https://github.com/awslabs/mcp/blob/HEAD/scripts/verify_tool_names.py) handles a key part of this chapter's functionality:
+## IaC Governance Model
-```py
+```mermaid
+flowchart TD
+ GENERATE[LLM generates IaC code\nvia MCP server tools]
+ GENERATE --> SCAN[Automated scan:\nCheckov / CDK Nag / cfn-lint]
+ SCAN --> REVIEW[Human engineer review\nand approval]
+ REVIEW --> PLAN[terraform plan / cdk diff / cfn validate\n in non-production account]
+ PLAN --> APPROVE[Explicit approval gate\nbefore any apply]
+ APPROVE --> APPLY[Infrastructure applied]
+ APPLY --> MONITOR[Post-apply validation\ncloudwatch/cloudtrail]
+```
+**Key rule**: MCP servers assist with code generation and validation. They do not perform `terraform apply`, `cdk deploy`, or CloudFormation stack creation without explicit human instruction. The design guidelines in the repo specify that servers should have clear `ALLOW_WRITE` controls for any mutating operations.
-def find_tool_decorators(file_path: Path) -> List[Tuple[str, int]]:
- """Find all tool definitions in a Python file and extract tool names.
+## Security Scanning Integration
- Supports all tool registration patterns:
- - Pattern 1: @mcp.tool(name='tool_name')
- - Pattern 2: @mcp.tool() (uses function name)
- - Pattern 3: app.tool('tool_name')(function)
- - Pattern 4: mcp.tool()(function) (uses function name)
- - Pattern 5: self.mcp.tool(name='tool_name')(function)
- - Pattern 6: @.tool(name='tool_name')
+Both `terraform-mcp-server` and `cdk-mcp-server` integrate security scanning tools. This is built into the IaC workflow, not an afterthought:
- Returns:
- List of tuples: (tool_name, line_number)
- """
- try:
- with open(file_path, 'r', encoding='utf-8') as f:
- content = f.read()
- except (FileNotFoundError, UnicodeDecodeError):
- return []
+| Server | Scanning Tool | What It Checks |
+|:-------|:-------------|:---------------|
+| `terraform-mcp-server` | Checkov | AWS resource misconfigurations, IAM policies, encryption |
+| `cdk-mcp-server` | CDK Nag | CDK construct-level security rules |
+| `cfn-mcp-server` | cfn-lint | CloudFormation template validity and best practices |
- tools = []
+## Common IaC Workflows
- try:
- tree = ast.parse(content, filename=str(file_path))
- except SyntaxError:
- # If we can't parse the file, skip it
- return []
+### Generate EKS Cluster (CDK)
- for node in ast.walk(tree):
- # PATTERN 1 & 2 & 6: Decorator patterns
+```
+1. Load: cdk-mcp-server + aws-documentation-mcp-server
+2. "Create a production EKS cluster in CDK with managed node groups, encryption, and logging"
+3. cdk-mcp-server provides CDK construct docs + AWS Solutions Constructs patterns
+4. aws-documentation-mcp-server provides EKS configuration best practices
+5. LLM generates CDK TypeScript code
+6. cdk-mcp-server runs CDK Nag checks
+7. Human reviews, runs cdk diff, approves deployment
```
-This function is important because it defines how awslabs/mcp Tutorial: Operating a Large-Scale MCP Server Ecosystem for AWS Workloads implements the patterns covered in this chapter.
+### Scan Existing Terraform
+```
+1. Load: terraform-mcp-server
+2. "Scan my Terraform code in ./infra/ for security issues"
+3. terraform-mcp-server: run_checkov_scan on ./infra/
+4. LLM reviews findings and suggests fixes
+5. Developer applies fixes, re-scans
+```
-## How These Components Connect
+## Source References
-```mermaid
-flowchart TD
- A[find_tool_decorators]
-```
+- [Terraform MCP Server README](https://github.com/awslabs/mcp/blob/main/src/terraform-mcp-server/README.md)
+- [CDK MCP Server README](https://github.com/awslabs/mcp/blob/main/src/cdk-mcp-server/README.md)
+- [CFN MCP Server README](https://github.com/awslabs/mcp/blob/main/src/cfn-mcp-server/README.md)
+- [Design Guidelines](https://github.com/awslabs/mcp/blob/main/DESIGN_GUIDELINES.md)
+
+## Summary
+
+The IaC servers accelerate code generation and validation but do not replace human governance of production changes. The standard workflow is: generate → scan → human review → dry-run → explicit approval → apply. Use `terraform-mcp-server` for Terraform workflows with Checkov integration, `cdk-mcp-server` for CDK with CDK Nag, and `aws-iac-mcp-server` for unified multi-tool projects. Never configure the servers to `apply` or `deploy` in production without an explicit human approval step.
+
+Next: [Chapter 5: Data, Knowledge, and Agent Workflows](05-data-knowledge-and-agent-workflows.md)
diff --git a/tutorials/awslabs-mcp-tutorial/05-data-knowledge-and-agent-workflows.md b/tutorials/awslabs-mcp-tutorial/05-data-knowledge-and-agent-workflows.md
index e154f407..c91250e2 100644
--- a/tutorials/awslabs-mcp-tutorial/05-data-knowledge-and-agent-workflows.md
+++ b/tutorials/awslabs-mcp-tutorial/05-data-knowledge-and-agent-workflows.md
@@ -5,84 +5,175 @@ nav_order: 5
parent: awslabs/mcp Tutorial
---
-
# Chapter 5: Data, Knowledge, and Agent Workflows
-Welcome to **Chapter 5: Data, Knowledge, and Agent Workflows**. In this part of **awslabs/mcp Tutorial: Operating a Large-Scale MCP Server Ecosystem for AWS Workloads**, you will build an intuitive mental model first, then move into concrete implementation details and practical production tradeoffs.
+This chapter covers documentation/knowledge servers that reduce LLM staleness, data-oriented servers for AWS managed databases, and the pattern for chaining read-oriented context-building before invoking mutating operations.
+## Learning Goals
-This chapter explains how documentation and data-oriented servers improve context quality for coding and operations agents.
+- Use documentation and knowledge servers to reduce stale-model assumptions about AWS services
+- Combine data-oriented servers for richer troubleshooting and planning workflows
+- Structure workflows that separate retrieval from action execution
+- Choose server combinations by task complexity and risk level
-## Learning Goals
+## Context-First Workflow Pattern
-- use documentation/knowledge servers to reduce stale-model assumptions
-- combine data-oriented servers for richer troubleshooting and planning
-- structure workflows that separate retrieval from action execution
-- choose server combinations by task complexity and risk
+```mermaid
+flowchart TD
+ TASK[User task or question]
+ TASK --> CONTEXT[Phase 1: Build accurate context\nUse read-only servers]
+ CONTEXT --> DOCS[aws-documentation-mcp-server\nFetch relevant AWS docs]
+ CONTEXT --> KNOW[aws-knowledge-mcp-server\nQuery internal knowledge bases]
+ CONTEXT --> DATA[DB servers\nRead current state from databases]
+ CONTEXT --> READY[Context is accurate and current]
+ READY --> ACTION[Phase 2: Take action\nWith human approval if mutating]
+ ACTION --> MUTATE[Invoke mutating servers\nwith full context]
+```
-## Workflow Pattern
+The pattern: always retrieve relevant documentation and data state first, then invoke operational or mutating tools. This prevents LLM hallucinations about service behavior and reduces errors from stale training data.
-Use knowledge and documentation servers first to build accurate context, then invoke mutating or operational servers only after intent and constraints are clear.
+## Documentation & Knowledge Servers
-## Source References
+### `aws-documentation-mcp-server`
-- [AWS Documentation MCP Server README](https://github.com/awslabs/mcp/blob/main/src/aws-documentation-mcp-server/README.md)
-- [Repository README Knowledge/Data Sections](https://github.com/awslabs/mcp/blob/main/README.md)
-- [Samples README](https://github.com/awslabs/mcp/blob/main/samples/README.md)
+The primary documentation retrieval server. Searches and fetches content from official AWS documentation at docs.aws.amazon.com.
-## Summary
+Key tools:
+- `search_documentation`: Full-text search across AWS documentation
+- `get_documentation`: Fetch a specific documentation page
+- `recommend_resources`: Get recommended documentation for a service or topic
-You now have a context-first approach for data and knowledge enriched MCP workflows.
+Use cases:
+- "What are the default timeout limits for Lambda functions?"
+- "What IAM permissions are needed to create an RDS cluster?"
+- "What are the differences between provisioned and on-demand capacity for DynamoDB?"
-Next: [Chapter 6: Security, Credentials, and Risk Controls](06-security-credentials-and-risk-controls.md)
+### `aws-knowledge-mcp-server`
-## Source Code Walkthrough
+Connects to Amazon Bedrock Knowledge Bases to query internal team knowledge, runbooks, or custom documentation indexed in your Bedrock Knowledge Base.
-### `scripts/verify_tool_names.py`
+```json
+{
+ "mcpServers": {
+ "team-knowledge": {
+ "command": "uvx",
+ "args": ["awslabs.aws-knowledge-mcp-server"],
+ "env": {
+ "AWS_PROFILE": "prod-readonly",
+ "KNOWLEDGE_BASE_ID": "your-knowledge-base-id",
+ "AWS_REGION": "us-east-1"
+ }
+ }
+ }
+}
+```
-The `find_all_tools_in_package` function in [`scripts/verify_tool_names.py`](https://github.com/awslabs/mcp/blob/HEAD/scripts/verify_tool_names.py) handles a key part of this chapter's functionality:
+### `bedrock-kb-retrieval-mcp-server`
-```py
+Similar to `aws-knowledge-mcp-server` but specialized for Bedrock Knowledge Base retrieval with advanced filtering and ranking options.
+## Database Servers
-def find_all_tools_in_package(package_dir: Path) -> List[Tuple[str, Path, int]]:
- """Find all tool definitions in a package directory.
+```mermaid
+graph TD
+ DB_SERVERS[Database MCP Servers]
+ DB_SERVERS --> RELATIONAL[Relational]
+ RELATIONAL --> PG[postgres-mcp-server]
+ RELATIONAL --> MYSQL[mysql-mcp-server]
+ RELATIONAL --> AURORA[aurora-dsql-mcp-server]
+
+ DB_SERVERS --> NOSQL[NoSQL]
+ NOSQL --> DDB[dynamodb-mcp-server]
+ NOSQL --> DOCDB[documentdb-mcp-server]
+ NOSQL --> NEPTUNE[amazon-neptune-mcp-server]
+ NOSQL --> KS[amazon-keyspaces-mcp-server]
+
+ DB_SERVERS --> CACHE[Cache/In-Memory]
+ CACHE --> EC[elasticache-mcp-server]
+ CACHE --> VALKEY[valkey-mcp-server]
+ CACHE --> MEMCACHED[memcached-mcp-server]
+
+ DB_SERVERS --> ANALYTICS[Analytics/Data]
+ ANALYTICS --> RS[redshift-mcp-server]
+ ANALYTICS --> S3T[s3-tables-mcp-server]
+ ANALYTICS --> TS[timestream-for-influxdb-mcp-server]
+```
- Returns:
- List of tuples: (tool_name, file_path, line_number)
- """
- all_tools = []
+### Database Workflow Pattern
- # Search for Python files in the package
- for python_file in package_dir.rglob('*.py'):
- # Skip test files and virtual environments
- if (
- 'test' in str(python_file)
- or '.venv' in str(python_file)
- or '__pycache__' in str(python_file)
- ):
- continue
+```
+1. Load the relevant DB server for your database type
+2. "Show me the schema for the orders table in production"
+3. DB server: reads schema metadata (DDL or describe)
+4. "Write a query to find orders older than 30 days with pending status"
+5. LLM generates SQL using schema context
+6. Human reviews query before execution
+7. "Run the query in read-only mode to verify results"
+```
- tools = find_tool_decorators(python_file)
- for tool_name, line_number in tools:
- all_tools.append((tool_name, python_file, line_number))
+Key rule: database servers can read data and should be able to execute queries, but any data-modifying operations (DELETE, UPDATE, INSERT at scale) should require explicit confirmation.
- return all_tools
+## Agent Workflow Composition
+For complex multi-step AWS workflows, combine multiple servers:
-def validate_tool_name(tool_name: str) -> Tuple[List[str], List[str]]:
- """Validate a tool name against naming conventions.
+### Example: Database Incident Investigation
- Returns:
- Tuple of (errors, warnings)
+```json
+{
+ "mcpServers": {
+ "cloudwatch": { "command": "uvx", "args": ["awslabs.cloudwatch-mcp-server"], "env": { "AWS_PROFILE": "readonly" } },
+ "dynamodb": { "command": "uvx", "args": ["awslabs.dynamodb-mcp-server"], "env": { "AWS_PROFILE": "readonly" } },
+ "aws-docs": { "command": "uvx", "args": ["awslabs.aws-documentation-mcp-server"] }
+ }
+}
```
-This function is important because it defines how awslabs/mcp Tutorial: Operating a Large-Scale MCP Server Ecosystem for AWS Workloads implements the patterns covered in this chapter.
-
+Investigation sequence:
+```
+1. cloudwatch-mcp-server: query CloudWatch metrics for DynamoDB table
+2. cloudwatch-mcp-server: retrieve error logs from CloudWatch Logs
+3. dynamodb-mcp-server: describe the table capacity and index configuration
+4. aws-documentation-mcp-server: look up DynamoDB throttling documentation
+5. LLM synthesizes findings and recommends capacity adjustments
+```
-## How These Components Connect
+### Example: Data Pipeline Planning
-```mermaid
-flowchart TD
- A[find_all_tools_in_package]
+```json
+{
+ "mcpServers": {
+ "aws-dataprocessing": { "command": "uvx", "args": ["awslabs.aws-dataprocessing-mcp-server"], "env": { "AWS_PROFILE": "dev" } },
+ "aws-docs": { "command": "uvx", "args": ["awslabs.aws-documentation-mcp-server"] },
+ "stepfunctions": { "command": "uvx", "args": ["awslabs.stepfunctions-tool-mcp-server"], "env": { "AWS_PROFILE": "dev" } }
+ }
+}
```
+
+## AI/ML Workflow Servers
+
+### `amazon-bedrock-agentcore-mcp-server`
+
+Connects to Amazon Bedrock AgentCore for managed agent execution. Provides tools for browser interaction, code execution, memory management, and gateway operations.
+
+### `sagemaker-ai-mcp-server`
+
+Access SageMaker for model training, deployment, and inference operations.
+
+### `nova-canvas-mcp-server`
+
+Generate and edit images using Amazon Nova Canvas via Bedrock.
+
+## Source References
+
+- [AWS Documentation MCP Server README](https://github.com/awslabs/mcp/blob/main/src/aws-documentation-mcp-server/README.md)
+- [AWS Knowledge MCP Server README](https://github.com/awslabs/mcp/blob/main/src/aws-knowledge-mcp-server/README.md)
+- [DynamoDB MCP Server README](https://github.com/awslabs/mcp/blob/main/src/dynamodb-mcp-server/README.md)
+- [Bedrock KB Retrieval README](https://github.com/awslabs/mcp/blob/main/src/bedrock-kb-retrieval-mcp-server/README.md)
+- [Samples README](https://github.com/awslabs/mcp/blob/main/samples/README.md)
+
+## Summary
+
+Build workflows with documentation and data retrieval first, action execution second. The `aws-documentation-mcp-server` is the safest and most broadly useful server — include it in any task requiring AWS service knowledge. Database servers enable powerful schema-aware query generation; always read before write and require human review before bulk mutations. Combine observability servers (cloudwatch, cloudtrail) with data servers for incident investigation workflows.
+
+Next: [Chapter 6: Security, Credentials, and Risk Controls](06-security-credentials-and-risk-controls.md)
diff --git a/tutorials/awslabs-mcp-tutorial/06-security-credentials-and-risk-controls.md b/tutorials/awslabs-mcp-tutorial/06-security-credentials-and-risk-controls.md
index 30eb13b4..4976ba72 100644
--- a/tutorials/awslabs-mcp-tutorial/06-security-credentials-and-risk-controls.md
+++ b/tutorials/awslabs-mcp-tutorial/06-security-credentials-and-risk-controls.md
@@ -5,84 +5,186 @@ nav_order: 6
parent: awslabs/mcp Tutorial
---
-
# Chapter 6: Security, Credentials, and Risk Controls
-Welcome to **Chapter 6: Security, Credentials, and Risk Controls**. In this part of **awslabs/mcp Tutorial: Operating a Large-Scale MCP Server Ecosystem for AWS Workloads**, you will build an intuitive mental model first, then move into concrete implementation details and practical production tradeoffs.
+This chapter covers the IAM credential model for `awslabs/mcp` servers, risk controls for mutating operations, and design guidelines the project follows to limit blast radius.
+## Learning Goals
-This chapter covers credential boundaries, mutating-operation risk, and environment controls.
+- Map IAM role scope to operational blast radius
+- Apply read-only and mutation-consent safeguards where servers support them
+- Enforce single-tenant assumptions for server instances
+- Reduce risk through explicit policy, allowlists, and timeout controls
-## Learning Goals
+## IAM as the Primary Control Plane
-- map IAM role scope to operational blast radius
-- apply read-only and mutation-consent style safeguards where supported
-- enforce single-tenant assumptions for server instances
-- reduce file-system and command execution risk through explicit policy
+All `awslabs/mcp` servers authenticate to AWS using standard credential chain resolution: `AWS_PROFILE`, `AWS_ACCESS_KEY_ID`/`AWS_SECRET_ACCESS_KEY`, instance metadata (EC2/ECS), or assume-role chains.
-## Security Baseline
+```mermaid
+graph TD
+ SERVER[MCP Server process]
+ SERVER --> CRED_CHAIN[AWS Credential Chain]
+ CRED_CHAIN --> PROFILE[AWS_PROFILE\nenvironment variable]
+ CRED_CHAIN --> KEYS[AWS_ACCESS_KEY_ID +\nAWS_SECRET_ACCESS_KEY]
+ CRED_CHAIN --> IMDS[EC2/ECS Instance Metadata\nfor container deployments]
+ CRED_CHAIN --> ROLE[IAM Role assumption\nfor cross-account access]
+
+ PROFILE --> IAM[IAM Policy\ncontrols what the server can do]
+ IAM --> LIMIT[Minimal required permissions\nper server function]
+```
-Treat IAM as the primary control plane, then layer server-side safety flags and client approval flows on top. Do not run single-user servers as shared multi-tenant services.
+## IAM Policy Principles
+
+### Principle of Least Privilege Per Server
+
+Each server should run with an IAM profile that grants only the permissions it needs.
+
+| Server | Minimum Permissions Needed |
+|:-------|:--------------------------|
+| `aws-documentation-mcp-server` | None (public docs) or minimal read |
+| `cloudwatch-mcp-server` | `cloudwatch:Describe*`, `cloudwatch:Get*`, `logs:Get*`, `logs:Describe*` |
+| `dynamodb-mcp-server` (read) | `dynamodb:Describe*`, `dynamodb:List*`, `dynamodb:Query`, `dynamodb:Scan` |
+| `dynamodb-mcp-server` (write) | Add `dynamodb:PutItem`, `dynamodb:UpdateItem`, `dynamodb:DeleteItem` |
+| `terraform-mcp-server` | Read-only for plan; deployment permissions for apply |
+
+### Separate Profiles by Risk Level
+
+```json
+{
+ "mcpServers": {
+ "cloudwatch-readonly": {
+ "command": "uvx",
+ "args": ["awslabs.cloudwatch-mcp-server"],
+ "env": { "AWS_PROFILE": "mcp-readonly" }
+ },
+ "dynamodb-readwrite": {
+ "command": "uvx",
+ "args": ["awslabs.dynamodb-mcp-server"],
+ "env": { "AWS_PROFILE": "mcp-dynamodb-dev" }
+ }
+ }
+}
+```
-## Source References
+## Mutation Controls
+
+Many servers support `ALLOW_WRITE` or equivalent flags that explicitly gate mutating operations:
+
+```json
+{
+ "mcpServers": {
+ "terraform": {
+ "command": "uvx",
+ "args": ["awslabs.terraform-mcp-server"],
+ "env": {
+ "AWS_PROFILE": "infra-dev",
+ "ALLOW_WRITE": "false"
+ }
+ }
+ }
+}
+```
-- [AWS API MCP Server Security Sections](https://github.com/awslabs/mcp/blob/main/src/aws-api-mcp-server/README.md)
-- [Repository README Security Notes](https://github.com/awslabs/mcp/blob/main/README.md)
-- [Vibe Coding Tips](https://github.com/awslabs/mcp/blob/main/VIBE_CODING_TIPS_TRICKS.md)
+```mermaid
+flowchart LR
+ TOOL_CALL[LLM calls mutating tool]
+ TOOL_CALL --> CHECK{ALLOW_WRITE set?}
+ CHECK -- false --> REJECT[Return error:\nmutating operations disabled]
+ CHECK -- true --> APPROVE{Human approval\nrequired?}
+ APPROVE -- configured --> USER[Prompt user for confirmation]
+ APPROVE -- not required --> EXECUTE[Execute mutation]
+ USER --> EXECUTE
+```
-## Summary
+## Design Guidelines Security Practices
-You now have a practical risk-control framework for production MCP usage on AWS.
+The `DESIGN_GUIDELINES.md` specifies security practices that all `awslabs/mcp` servers must follow:
-Next: [Chapter 7: Development, Testing, and Contribution Workflow](07-development-testing-and-contribution-workflow.md)
+### Code Security Scanning
-## Source Code Walkthrough
+All servers run Bandit (Python security linter) as part of CI:
+```bash
+bandit -r src/ -c .bandit
+```
+
+This catches common issues: hardcoded credentials, unsafe subprocess calls, SQL injection risks.
-### `scripts/verify_tool_names.py`
+### Controlled Execution Environments
-The `validate_tool_name` function in [`scripts/verify_tool_names.py`](https://github.com/awslabs/mcp/blob/HEAD/scripts/verify_tool_names.py) handles a key part of this chapter's functionality:
+Servers that execute code (like code runners or IaC tools) must use timeouts and resource limits:
+```python
+# From design guidelines pattern
+async with asyncio.timeout(EXECUTION_TIMEOUT_SECONDS):
+ result = await execute_command(cmd)
+```
-```py
+### Explicit Allowlists
+For servers that interact with file systems or run commands, use explicit allowlists rather than denylists:
-def validate_tool_name(tool_name: str) -> Tuple[List[str], List[str]]:
- """Validate a tool name against naming conventions.
+```python
+ALLOWED_FILE_EXTENSIONS = {'.tf', '.json', '.yaml', '.yml'}
- Returns:
- Tuple of (errors, warnings)
- - errors: Critical validation failures (will fail the build)
- - warnings: Style recommendations (informational only)
- """
- errors = []
- warnings = []
+def validate_file_path(path: str) -> None:
+ ext = Path(path).suffix
+ if ext not in ALLOWED_FILE_EXTENSIONS:
+ raise ValueError(f"File extension {ext} not allowed")
+```
- # Check if name is empty
- if not tool_name:
- errors.append('Tool name cannot be empty')
- return errors, warnings
+### Timeouts for Long-Running Operations
- # Check length (MCP SEP-986: tool names should be 1-64 characters)
- if len(tool_name) > MAX_TOOL_NAME_LENGTH:
- errors.append(
- f"Tool name '{tool_name}' ({len(tool_name)} chars) exceeds the {MAX_TOOL_NAME_LENGTH} "
- f'character limit specified in MCP SEP-986. Please shorten the tool name.'
- )
+All long-running API calls must have explicit timeouts to prevent hanging tool executions that block the MCP client.
- # Check if name matches the valid pattern
- if not VALID_TOOL_NAME_PATTERN.match(tool_name):
- if tool_name[0].isdigit():
- errors.append(f"Tool name '{tool_name}' cannot start with a number")
- elif not tool_name[0].isalpha():
- errors.append(f"Tool name '{tool_name}' must start with a letter")
- else:
+## Single-Tenant Assumption
+
+`awslabs/mcp` servers are designed for single-user local development or CI usage. They are not designed for multi-tenant hosted deployments where multiple users share a single server instance.
+
+```mermaid
+graph LR
+ OK[Correct usage]
+ OK --> DEV[Developer machine:\none server per developer\nIsolated AWS credentials]
+ OK --> CI[CI pipeline:\none server per job\nIAM role per job]
+
+ WRONG[Incorrect usage]
+ WRONG --> SHARED[Shared server instance\nmultiple users\nShared credentials = shared blast radius]
```
-This function is important because it defines how awslabs/mcp Tutorial: Operating a Large-Scale MCP Server Ecosystem for AWS Workloads implements the patterns covered in this chapter.
+If you need multi-tenant MCP deployment, run separate instances per user with separate IAM credentials.
+## Sensitive Operations Requiring Human Approval
-## How These Components Connect
+Never configure MCP servers to auto-execute these operations without explicit human confirmation:
+- `terraform apply` or `cdk deploy` in production accounts
+- Database `DELETE`, `DROP`, or bulk `UPDATE` statements
+- IAM policy creation or modification
+- Security group rule changes
+- S3 bucket deletion or ACL modification
+- EKS cluster creation or deletion
+
+The `VIBE_CODING_TIPS_TRICKS.md` in the repo provides guidance on configuring AI coding tools to maintain appropriate human oversight.
+
+## Credential Rotation
```mermaid
-flowchart TD
- A[validate_tool_name]
+flowchart LR
+ ROTATE[Credential Rotation Process]
+ ROTATE --> NEW[Generate new IAM keys]
+ ROTATE --> TEST[Test new keys in dev environment]
+ TEST --> UPDATE[Update client configs:\nClaude Desktop · Cursor · Q Developer]
+ UPDATE --> VERIFY[Verify all MCP servers start\nand authenticate]
+ VERIFY --> REVOKE[Revoke old keys]
```
+
+Use IAM Roles with short-lived STS tokens rather than long-lived access keys where possible. For developer machines, use `aws sso login` with SSO-backed profiles rather than static access keys.
+
+## Source References
+
+- [AWS API MCP Server Security Sections](https://github.com/awslabs/mcp/blob/main/src/aws-api-mcp-server/README.md)
+- [Design Guidelines — Security Practices](https://github.com/awslabs/mcp/blob/main/DESIGN_GUIDELINES.md)
+- [Vibe Coding Tips — Safety](https://github.com/awslabs/mcp/blob/main/VIBE_CODING_TIPS_TRICKS.md)
+
+## Summary
+
+IAM is the primary risk control — assign minimal necessary permissions per server and use separate profiles per risk level (read-only vs. read-write). Use `ALLOW_WRITE=false` for servers in exploration mode. Follow the design guidelines' explicit allowlist pattern for file and command operations. Servers are single-tenant by design — never share an instance or credentials across users. Prefer IAM Roles with STS tokens over static access keys for all deployments.
+
+Next: [Chapter 7: Development, Testing, and Contribution Workflow](07-development-testing-and-contribution-workflow.md)
diff --git a/tutorials/awslabs-mcp-tutorial/07-development-testing-and-contribution-workflow.md b/tutorials/awslabs-mcp-tutorial/07-development-testing-and-contribution-workflow.md
index 28e42953..ce50fbd8 100644
--- a/tutorials/awslabs-mcp-tutorial/07-development-testing-and-contribution-workflow.md
+++ b/tutorials/awslabs-mcp-tutorial/07-development-testing-and-contribution-workflow.md
@@ -5,84 +5,372 @@ nav_order: 7
parent: awslabs/mcp Tutorial
---
-
# Chapter 7: Development, Testing, and Contribution Workflow
-Welcome to **Chapter 7: Development, Testing, and Contribution Workflow**. In this part of **awslabs/mcp Tutorial: Operating a Large-Scale MCP Server Ecosystem for AWS Workloads**, you will build an intuitive mental model first, then move into concrete implementation details and practical production tradeoffs.
+This chapter covers the full contributor workflow for the `awslabs/mcp` monorepo: setting up a local development environment, running quality gates, writing and executing tests, and preparing pull requests that pass automated CI.
+## Learning Goals
-This chapter focuses on contributor workflows in the monorepo.
+- Set up local tooling with uv, Python 3.10, and pre-commit hooks
+- Use cookiecutter to scaffold a new server from the monorepo template
+- Run server-level unit and integration tests with coverage reporting
+- Use MCP Inspector for interactive local debugging
+- Understand CI pipeline checks that all PRs must pass
-## Learning Goals
+## Local Development Setup
-- set up local tooling and pre-commit quality gates
-- run server-level unit/integration tests reliably
-- align docs updates with server changes
-- prepare pull requests that satisfy repository standards
+The `DEVELOPER_GUIDE.md` defines the prerequisites and setup sequence:
+
+```mermaid
+flowchart TD
+ PREREQ[Prerequisites]
+ PREREQ --> UV[Install uv\ndocs.astral.sh/uv]
+ PREREQ --> PY[Python 3.10\nuv python install 3.10]
+ PREREQ --> PC[pre-commit\npre-commit.com]
+ PREREQ --> GIT[Git]
-## Contribution Workflow
+ PREREQ --> SETUP[Setup sequence]
+ SETUP --> FORK[Fork awslabs/mcp on GitHub]
+ FORK --> CLONE[Clone your fork locally]
+ CLONE --> HOOKS[cd mcp && pre-commit install]
+ HOOKS --> READY[Ready to develop]
+```
-Adopt the repository pre-commit and test pipeline locally before opening PRs. Keep server changes, tests, and docs synchronized to reduce review churn.
+**Required tools:**
-## Source References
+| Tool | Version | Install |
+|:-----|:--------|:--------|
+| uv | latest | `curl -LsSf https://astral.sh/uv/install.sh \| sh` |
+| Python | 3.10 | `uv python install 3.10` |
+| pre-commit | latest | `pip install pre-commit` |
+| AWS CLI | v2 | Optional, needed for credential setup |
-- [Developer Guide](https://github.com/awslabs/mcp/blob/main/DEVELOPER_GUIDE.md)
-- [Design Guidelines](https://github.com/awslabs/mcp/blob/main/DESIGN_GUIDELINES.md)
-- [Contributing](https://github.com/awslabs/mcp/blob/main/CONTRIBUTING.md)
+After cloning your fork, install pre-commit hooks at the repo root:
-## Summary
+```bash
+cd mcp
+pre-commit install
+```
-You now have a reliable workflow for shipping server changes in the `awslabs/mcp` ecosystem.
+Pre-commit runs before every commit. You can also trigger it manually:
-Next: [Chapter 8: Production Operations and Governance](08-production-operations-and-governance.md)
+```bash
+pre-commit run --all-files
+```
+
+## Scaffolding a New Server
+
+Use the cookiecutter template from the monorepo to generate a new server skeleton:
+
+```bash
+uvx cookiecutter https://github.com/awslabs/mcp.git \
+ --checkout cookiecutters \
+ --output-dir ./src \
+ --directory python
+```
+
+The CLI prompts you for server name, description, and initial version. The generated project lands in `src/-mcp-server/` following the standard server structure:
+
+```
+src/your-server-name-mcp-server/
+├── README.md
+├── CHANGELOG.md
+├── pyproject.toml
+├── .pre-commit-config.yaml
+├── awslabs/
+│ └── your_server_name/
+│ ├── __init__.py
+│ ├── server.py # FastMCP app, tool registrations
+│ ├── models.py # Pydantic models
+│ └── consts.py # Constants
+└── tests/
+ ├── test_server.py
+ └── integ_basic.py
+```
+
+After generation, install dependencies:
+
+```bash
+cd src/your-server-name-mcp-server
+uv venv && uv sync --all-groups
+```
+
+## Design Guidelines: Code Organization
+
+The `DESIGN_GUIDELINES.md` specifies the conventions all servers must follow:
+
+### Module Structure
+
+- `server.py`: FastMCP app initialization, tool definitions, `main()` entry point
+- `models.py`: Pydantic models for request/response validation
+- `consts.py`: Constants shared across modules — do not scatter magic strings
+
+### Entry Point Convention
+
+Each server must have a single `main()` function in `server.py`:
+
+```python
+# server.py — standard entry point pattern
+import asyncio
+from fastmcp import FastMCP, Context
+from pydantic import Field
+
+mcp = FastMCP(
+ 'awslabs-your-server-name',
+ instructions="""
+# Your Server Name
+
+Describe what this server does for the LLM.
+""",
+ dependencies=['boto3', 'pydantic'],
+)
+
+@mcp.tool(name='your_tool_name')
+async def your_tool(
+ ctx: Context,
+ param: str = Field(..., description='Clear description for the LLM'),
+) -> str:
+ """Tool docstring used by LLM for tool selection."""
+ ...
+
+def main():
+ mcp.run()
+
+if __name__ == '__main__':
+ main()
+```
+
+### Code Style
+
+All servers use `ruff` for formatting and linting, and `pyright` for type checking:
+
+```toml
+# pyproject.toml
+[tool.ruff]
+line-length = 99
+target-version = "py310"
+
+[tool.ruff.lint]
+select = ["E", "F", "I", "B", "Q"]
+
+[tool.ruff.lint.isort]
+known-first-party = ["awslabs"]
+```
+
+## Testing
+
+### Test Structure
+
+Each server is expected to have a `tests/` directory with:
+- **Unit tests**: test individual functions in isolation, mock AWS calls
+- **Integration tests**: named `integ_.py`, test against real AWS services
+
+```bash
+# Run all tests with coverage
+cd src/your-server-name-mcp-server
+uv run --frozen pytest --cov --cov-branch --cov-report=term-missing
+```
-## Source Code Walkthrough
+### Mocking AWS with moto
-### `scripts/verify_tool_names.py`
+```python
+import pytest
+from moto import mock_aws
+import boto3
-The `validate_tool_names` function in [`scripts/verify_tool_names.py`](https://github.com/awslabs/mcp/blob/HEAD/scripts/verify_tool_names.py) handles a key part of this chapter's functionality:
+@mock_aws
+def test_list_tables():
+ # Create mock DynamoDB table
+ client = boto3.client('dynamodb', region_name='us-east-1')
+ client.create_table(
+ TableName='test-table',
+ KeySchema=[{'AttributeName': 'id', 'KeyType': 'HASH'}],
+ AttributeDefinitions=[{'AttributeName': 'id', 'AttributeType': 'S'}],
+ BillingMode='PAY_PER_REQUEST',
+ )
+ # Test your server tool against the mocked table
+ ...
+```
+
+```mermaid
+graph TD
+ TESTS[Test Suite per Server]
+ TESTS --> UNIT[Unit tests\ntests/test_*.py\nMocked AWS via moto]
+ TESTS --> INTEG[Integration tests\ntests/integ_*.py\nReal AWS credentials required]
+
+ UNIT --> COV[Coverage report\n--cov-branch]
+ INTEG --> LIVE[Live AWS account\nuse a test-only IAM role]
+```
+
+### Testing with a Local Development Server
+
+Point your MCP client directly at your local server code — no publish step required:
+
+```json
+{
+ "mcpServers": {
+ "your-dev-server": {
+ "command": "uv",
+ "args": [
+ "--directory",
+ "/Users/yourname/mcp/src/your-server-name-mcp-server/awslabs/your_server_name",
+ "run",
+ "server.py"
+ ],
+ "env": {
+ "FASTMCP_LOG_LEVEL": "ERROR"
+ }
+ }
+ }
+}
+```
+
+## MCP Inspector
+
+The MCP Inspector is the standard interactive debugging tool for MCP servers. It runs without installation:
+
+```bash
+npx @modelcontextprotocol/inspector \
+ uv \
+ --directory /path/to/your/server/awslabs/your_server_name \
+ run \
+ server.py
+```
+
+Inspector starts a local server at `http://127.0.0.1:6274` where you can:
+- Browse all registered tools, resources, and prompts
+- Call tools interactively with custom parameters
+- Inspect JSON-RPC request/response pairs
+- View server log output in real time
+
+```mermaid
+flowchart LR
+ INSPECTOR[MCP Inspector\nlocalhost:6274]
+ INSPECTOR --> TOOLS[List and call tools]
+ INSPECTOR --> RESOURCES[Browse resources]
+ INSPECTOR --> PROMPTS[Test prompt templates]
+ INSPECTOR --> LOGS[View server logs]
+ INSPECTOR --> RPC[Inspect JSON-RPC messages]
+```
-```py
+## Pre-commit Hooks
+The root `.pre-commit-config.yaml` runs a suite of checks before each commit. Key hooks include:
-def validate_tool_names(
- package_name: str, tools: List[Tuple[str, Path, int]], verbose: bool = False
-) -> Tuple[bool, List[str], List[str]]:
- """Validate all tool names in a package.
+| Hook | What It Checks |
+|:-----|:--------------|
+| `ruff` | Python linting (import order, unused vars, style) |
+| `ruff-format` | Code formatting |
+| `detect-secrets` | Accidental credential leakage |
+| `check-license-header` | Apache 2.0 header on all source files |
+| `no-commit-to-branch` | Prevents direct commits to `main` |
- Returns:
- Tuple of (is_valid, list_of_errors, list_of_warnings)
- - is_valid: True if no errors (warnings don't fail validation)
- - list_of_errors: Critical issues that fail the build
- - list_of_warnings: Recommendations that don't fail the build
- """
- errors = []
- warnings = []
+If a hook fails, the commit is aborted. Fix the flagged issues, then re-stage and commit:
- for tool_name, file_path, line_number in tools:
- # Validate tool name (length, characters, conventions)
- naming_errors, naming_warnings = validate_tool_name(tool_name)
- for error in naming_errors:
- errors.append(f'{file_path}:{line_number} - {error}')
- for warning in naming_warnings:
- warnings.append(f'{file_path}:{line_number} - {warning}')
+```bash
+# Fix formatting issues automatically
+ruff format src/your-server/
- if verbose:
- status = '✓' if not naming_errors else '✗'
- style_note = ''
- if naming_warnings:
- style_note = ' (non-snake_case)'
- print(f' {status} {tool_name} ({len(tool_name)} chars){style_note}')
+# Re-run all hooks to verify
+pre-commit run --all-files
- return len(errors) == 0, errors, warnings
+# Then commit
+git add -u
+git commit -m "fix: address pre-commit failures"
```
-This function is important because it defines how awslabs/mcp Tutorial: Operating a Large-Scale MCP Server Ecosystem for AWS Workloads implements the patterns covered in this chapter.
+### Remediating Detected Secrets
+
+If `detect-secrets` flags a false positive:
+```bash
+# Regenerate the secrets baseline
+detect-secrets scan --baseline .secrets.baseline
-## How These Components Connect
+# Review and approve the findings
+detect-secrets audit .secrets.baseline
+
+# Commit the updated baseline
+git add .secrets.baseline
+git commit -m "chore: update secrets baseline"
+```
+
+## CI Workflows
+
+All PRs run the following GitHub Actions workflows defined in `.github/workflows/`:
+
+```mermaid
+graph TD
+ PR[Pull Request opened]
+ PR --> PC[pre-commit.yml\nRuns all pre-commit hooks\nper server]
+ PR --> BANDIT[bandit.yml\nPython security scan\nSARIF upload to Security tab]
+ PR --> CHECKOV[checkov.yml\nIaC security scanning]
+ PR --> CFN[cfn_nag.yml\nCloudFormation linting]
+ PR --> CODEQL[codeql.yml\nCode quality analysis]
+ PR --> DEPREV[dependency-review-action.yml\nNew dependency audit]
+ PR --> PRLINT[pull-request-lint.yml\nConventional commit title]
+
+ PC --> PASS[All checks pass]
+ BANDIT --> PASS
+ CHECKOV --> PASS
+ PRLINT --> PASS
+ PASS --> REVIEW[Ready for human review]
+```
+
+The `pre-commit.yml` workflow discovers all `.pre-commit-config.yaml` files across the monorepo and runs them in a matrix — so each server's hooks run independently.
+
+Bandit results upload to the repository's GitHub Security tab as SARIF. The workflow runs on push to `main`, on PRs targeting `main`, and on a weekly schedule.
+
+## Documentation Requirements
+
+When adding a new server, you must update:
+
+1. **`README.md`** (root): Add the server to both "Browse by What You're Building" and "Browse by How You're Working" sections with a brief description and link to `src/your-server-name/`.
+
+2. **`docusaurus/docs/servers/`**: Add a `.mdx` file describing the server.
+
+3. **`docusaurus/sidebars.ts`**: Add the server to the appropriate sidebar category.
+
+4. **`docusaurus/static/assets/server-cards.json`**: Add a card entry following the existing format.
+
+You can preview the documentation site locally:
+
+```bash
+cd docusaurus && npm start
+```
+
+## Pull Request Workflow
```mermaid
flowchart TD
- A[validate_tool_names]
+ ISSUE[Open RFC issue for significant work\nespecially new server proposals]
+ ISSUE --> FORK[Fork repo, create feature branch]
+ FORK --> DEV[Develop on fork/branch]
+ DEV --> PRECOMMIT[Run pre-commit run --all-files]
+ PRECOMMIT --> TESTS[Run pytest --cov locally]
+ TESTS --> DOCS[Update README + docusaurus docs]
+ DOCS --> PR[Open PR with conventional commit title]
+ PR --> CI[CI runs: pre-commit, bandit,\ncheckov, pull-request-lint]
+ CI --> REVIEW[Human review]
+ REVIEW --> MERGE[Merge to main]
+ MERGE --> PUBLISH[Team publishes new server\nto PyPI if applicable]
```
+
+PR titles must follow conventional commits format (enforced by `pull-request-lint.yml`):
+- `feat(your-server): add new tool for X`
+- `fix(cloudwatch-mcp-server): handle pagination in list_metrics`
+- `chore(doc): update main README`
+
+## Source References
+
+- [DEVELOPER_GUIDE.md](https://github.com/awslabs/mcp/blob/main/DEVELOPER_GUIDE.md)
+- [DESIGN_GUIDELINES.md](https://github.com/awslabs/mcp/blob/main/DESIGN_GUIDELINES.md)
+- [CONTRIBUTING.md](https://github.com/awslabs/mcp/blob/main/CONTRIBUTING.md)
+- [.github/workflows/](https://github.com/awslabs/mcp/tree/main/.github/workflows)
+- [AWS Documentation Server tests (example)](https://github.com/awslabs/mcp/tree/main/src/aws-documentation-mcp-server/tests)
+
+## Summary
+
+The `awslabs/mcp` contributor workflow centers on three gates: pre-commit hooks (run locally and in CI), server-level pytest coverage, and documentation completeness. Use cookiecutter to scaffold new servers rather than copying existing ones. Test locally with MCP Inspector and direct client config pointing at your source directory before opening a PR. All CI workflows must pass — pre-commit, Bandit, Checkov, and PR lint — before a human review is requested.
+
+Next: [Chapter 8: Production Operations and Governance](08-production-operations-and-governance.md)
diff --git a/tutorials/awslabs-mcp-tutorial/08-production-operations-and-governance.md b/tutorials/awslabs-mcp-tutorial/08-production-operations-and-governance.md
index 1356036d..b69c78ed 100644
--- a/tutorials/awslabs-mcp-tutorial/08-production-operations-and-governance.md
+++ b/tutorials/awslabs-mcp-tutorial/08-production-operations-and-governance.md
@@ -5,86 +5,324 @@ nav_order: 8
parent: awslabs/mcp Tutorial
---
-
# Chapter 8: Production Operations and Governance
-Welcome to **Chapter 8: Production Operations and Governance**. In this part of **awslabs/mcp Tutorial: Operating a Large-Scale MCP Server Ecosystem for AWS Workloads**, you will build an intuitive mental model first, then move into concrete implementation details and practical production tradeoffs.
+This chapter closes the tutorial with the operating model for long-term use of `awslabs/mcp` servers: release versioning, upgrade workflows, server/tool sprawl governance, observability, configuration drift prevention, and rollback procedures.
+## Learning Goals
-This chapter closes with production operating patterns for long-term reliability.
+- Understand the release versioning scheme and how to pin server versions
+- Manage upgrade windows with staged validation
+- Monitor and reduce tool surface area sprawl over time
+- Detect and remediate configuration drift across team environments
+- Define rollback procedures tied to specific server versions
-## Learning Goals
+## Release Versioning
-- define deployment boundaries for local vs remote MCP use
-- standardize release validation across selected servers
-- monitor and prune server/tool sprawl over time
-- maintain governance around approvals, logging, and incident response
+The `awslabs/mcp` project uses a date-based release tag scheme:
-## Operations Playbook
+```
+YYYY.MM.YYYYMMDDhhmmss
+```
-1. scope each deployment to explicit roles and use cases
-2. run versioned validation suites before each upgrade window
-3. centralize observability signals and security review outcomes
-4. review client/server configs regularly for drift and overexposure
-5. keep rollback runbooks tied to specific server versions
+Example releases:
+- `2026.04.20260410061424`
+- `2026.04.20260409112122`
+- `2026.04.20260408085348`
-## Source References
+Multiple releases can occur on the same day. Each release bundles all servers with changes since the previous published release — there is no per-server versioning separate from the monorepo release cycle.
-- [Repository README](https://github.com/awslabs/mcp/blob/main/README.md)
-- [Developer Guide](https://github.com/awslabs/mcp/blob/main/DEVELOPER_GUIDE.md)
-- [Samples README](https://github.com/awslabs/mcp/blob/main/samples/README.md)
+### Pinning a Specific Version
-## Summary
+Pin your `uvx` invocations to a known-good version to prevent automatic upgrades:
+
+```json
+{
+ "mcpServers": {
+ "cloudwatch": {
+ "command": "uvx",
+ "args": ["awslabs.cloudwatch-mcp-server==0.2.1"],
+ "env": { "AWS_PROFILE": "readonly" }
+ }
+ }
+}
+```
+
+Always test unpinned (latest) in a development environment before promoting pinned versions to production or shared team configs.
+
+## Release Process
+
+The release workflow is documented in `.github/workflows/RELEASE_INSTRUCTIONS.md`. It uses a three-role model:
+
+```mermaid
+flowchart TD
+ REQ[Requestor]
+ REQ --> BRANCH[Trigger release-initiate-branch.yml\nvia GitHub Actions or gh CLI]
+ BRANCH --> PR[Release PR created:\nchore: release/YYYY.MM.YYYYMMDDhhmmss]
+ PR --> REVIEW[Code Owner Reviewers\ntwo CODEOWNERS must approve]
+ REVIEW --> MERGE[Merge when ready]
+ MERGE --> DEPLOY[Release Deploy workflow triggers]
+ DEPLOY --> APPROVE[Repository Owner approves\ndeployment gate]
+ APPROVE --> PUBLISH[All changed servers published\nto PyPI]
+```
+
+To check what changes will go into the next release:
+
+```bash
+LATEST=$(gh release list \
+ --repo awslabs/mcp \
+ --limit 1 \
+ --exclude-drafts \
+ --exclude-pre-releases \
+ --json tagName | jq -r '.[0].tagName')
+
+git diff "${LATEST}"...remotes/origin/main --name-only
+```
+
+## Upgrade Workflow for Teams
+
+When a new release is available, follow a staged upgrade process:
+
+```mermaid
+flowchart TD
+ DETECT[Detect new release\ngh release list --repo awslabs/mcp]
+ DETECT --> REVIEW[Review release notes\ncheck affected servers]
+ REVIEW --> DEV[Update dev environment config\nunpin to new version]
+ DEV --> TEST[Run validation suite:\n- MCP Inspector: list tools\n- Smoke test each tool\n- Verify AWS credentials still work]
+ TEST --> PASS{All checks pass?}
+ PASS -- No --> ROLLBACK[Pin back to previous version\nfile issue]
+ PASS -- Yes --> TEAM[Promote to shared team config\nwith new pinned version]
+ TEAM --> PROD[Update production/CI configs]
+```
+
+Keep a validation checklist per server:
+
+| Server | Validation Test |
+|:-------|:---------------|
+| `aws-documentation-mcp-server` | Call `search_documentation` with a known term |
+| `cloudwatch-mcp-server` | List alarms in target account |
+| `terraform-mcp-server` | Search Terraform Registry for `aws_s3_bucket` |
+| `cdk-mcp-server` | Retrieve CDK construct docs for `aws-ecs` |
+| `dynamodb-mcp-server` | List tables in dev account |
+
+## Tool Sprawl Governance
+
+Each loaded MCP server contributes its tool definitions to the LLM context window. With 65+ servers in the catalog, uncontrolled loading degrades tool selection accuracy and increases cost.
+
+```mermaid
+graph LR
+ SPRAWL[Unchecked tool sprawl]
+ SPRAWL --> TOKENS[Context tokens consumed\nby unused tool definitions]
+ SPRAWL --> CONFUSION[LLM selects wrong tool\nwhen 500+ tools are loaded]
+ SPRAWL --> COST[Increased cost per inference\nfrom bloated context]
+
+ CONTROL[Governed tool surface]
+ CONTROL --> MINIMAL[Load 2-3 servers per workflow role]
+ CONTROL --> AUDIT[Quarterly audit:\nremove unused server configs]
+ CONTROL --> PROFILE[Separate configs per role:\ndev vs. ops vs. data]
+```
+
+### Audit Checklist
+
+Run a quarterly review of your team's MCP configurations:
+
+1. **List all configured servers** across Claude Desktop, Cursor, Amazon Q Developer, and CI configs
+2. **Check usage logs** — if `MCP_LOG_LEVEL=INFO`, look for tool call patterns to identify unused servers
+3. **Remove servers** that haven't been invoked in the past 30 days
+4. **Consolidate profiles** — if two team members have diverged configs, reconcile to a shared template
+
+### Role-Scoped Configuration Files
+
+Maintain separate configuration profiles for different work contexts rather than one catch-all config:
+
+```
+.mcp/
+├── research.json # aws-documentation + aws-api-mcp
+├── iac-dev.json # terraform + cdk + aws-docs
+├── ops-readonly.json # cloudwatch + cloudtrail (read-only IAM)
+├── data-dev.json # dynamodb + postgres + aws-docs
+└── incident.json # cloudwatch + cloudtrail + aws-docs (full incident kit)
+```
-You now have an end-to-end model for operating AWS MCP servers with stronger governance and maintainability.
+Each file is a complete `mcpServers` block that team members can point their client at.
-## Source Code Walkthrough
+## Configuration Drift Prevention
-### `scripts/verify_tool_names.py`
+Without active governance, team member configs diverge from the shared template. Prevent this with a version-controlled template and a setup script:
-The `main` function in [`scripts/verify_tool_names.py`](https://github.com/awslabs/mcp/blob/HEAD/scripts/verify_tool_names.py) handles a key part of this chapter's functionality:
+```bash
+#!/bin/bash
+# .mcp/setup.sh — run on new machine or after config template update
+ROLE="${1:-research}"
+CLIENT="${2:-claude}"
-```py
+case "$CLIENT" in
+ claude)
+ TARGET="$HOME/Library/Application Support/Claude/claude_desktop_config.json"
+ ;;
+ cursor)
+ TARGET="$HOME/.cursor/mcp.json"
+ ;;
+ amazonq)
+ TARGET="$HOME/.aws/amazonq/mcp.json"
+ ;;
+esac
+# Substitute environment-specific values
+envsubst < ".mcp/${ROLE}.json.tmpl" > "$TARGET"
+echo "MCP config deployed: $TARGET"
+```
-def main():
- """Main function to verify tool name conventions."""
- parser = argparse.ArgumentParser(
- description='Verify that MCP tool names follow naming conventions and length limits'
- )
- parser.add_argument(
- 'package_dir',
- help='Path to the package directory (e.g., src/git-repo-research-mcp-server)',
- )
- parser.add_argument('--verbose', '-v', action='store_true', help='Enable verbose output')
+Template file `.mcp/research.json.tmpl`:
- args = parser.parse_args()
+```json
+{
+ "mcpServers": {
+ "aws-docs": {
+ "command": "uvx",
+ "args": ["awslabs.aws-documentation-mcp-server@${MCP_DOCS_VERSION}"],
+ "env": {
+ "AWS_PROFILE": "${AWS_PROFILE}",
+ "AWS_REGION": "${AWS_REGION:-us-east-1}",
+ "MCP_LOG_LEVEL": "WARNING"
+ }
+ }
+ }
+}
+```
- package_dir = Path(args.package_dir)
- pyproject_path = package_dir / 'pyproject.toml'
+Store templates in the team's git repo. Pin `MCP_DOCS_VERSION` in a `.mcp/versions.env` file and update it deliberately after validation.
- if not package_dir.exists():
- print(f"Error: Package directory '{package_dir}' does not exist", file=sys.stderr)
- sys.exit(1)
+## Observability for MCP Operations
- if not pyproject_path.exists():
- print(f"Error: pyproject.toml not found in '{package_dir}'", file=sys.stderr)
- sys.exit(1)
+### Logging
- try:
- # Extract package name from pyproject.toml
- package_name = extract_package_name(pyproject_path)
- if args.verbose:
- print(f'Package name from pyproject.toml: {package_name}')
+Set `MCP_LOG_LEVEL=INFO` during validation and incident investigation to capture tool call activity:
+```json
+{
+ "env": {
+ "MCP_LOG_LEVEL": "INFO",
+ "AWS_PROFILE": "readonly"
+ }
+}
```
-This function is important because it defines how awslabs/mcp Tutorial: Operating a Large-Scale MCP Server Ecosystem for AWS Workloads implements the patterns covered in this chapter.
+Log output goes to stderr, captured by the MCP host client. For Claude Desktop, logs appear in the MCP server panel.
+### CloudWatch Integration for AWS Operations
-## How These Components Connect
+For production AI agent workflows that use `awslabs/mcp` servers, use CloudWatch to monitor the downstream AWS API call patterns the servers generate:
+
+```mermaid
+graph TD
+ AGENT[AI Agent with MCP servers]
+ AGENT --> CW_MCP[cloudwatch-mcp-server\nreads metrics/logs]
+ AGENT --> DDB_MCP[dynamodb-mcp-server\nreads/writes data]
+
+ CW_MCP --> CW[Amazon CloudWatch]
+ DDB_MCP --> DDB[Amazon DynamoDB]
+
+ CW --> ALERT[CloudWatch Alarm\non unusual API call rate]
+ DDB --> AUDIT[CloudTrail audit log\nfor all DynamoDB mutations]
+ AUDIT --> REVIEW[Regular review:\nwhat did the agent actually do?]
+```
+
+### CloudTrail Audit for Mutating Operations
+
+For any workflow where MCP servers invoke mutating AWS operations, enable CloudTrail data events:
+
+- **DynamoDB**: Enable data events to log every `PutItem`, `UpdateItem`, `DeleteItem`
+- **S3**: Enable data events to log object-level operations
+- **IAM**: Enable management events for policy and role changes
+
+This creates an audit trail of all agent-driven changes, separate from human-initiated changes, if you use dedicated IAM roles for MCP servers.
+
+## Rollback Procedures
+
+When an upgrade introduces a regression, roll back by pinning the previous version:
```mermaid
flowchart TD
- A[main]
+ DETECT[Detect regression\nTool call fails or returns wrong data]
+ DETECT --> IDENTIFY[Identify which server version introduced it\ngh release list --repo awslabs/mcp]
+ IDENTIFY --> PIN[Pin all affected servers to previous version\nin client configs]
+ PIN --> VERIFY[Verify rollback restores behavior]
+ VERIFY --> ISSUE[File GitHub issue with\nserver name + version + repro steps]
+ ISSUE --> WAIT[Wait for fix release\nor contribute a fix via PR]
+```
+
+Example rollback: if `cloudwatch-mcp-server@0.2.1` breaks `list_metrics`, update your config:
+
+```json
+{
+ "mcpServers": {
+ "cloudwatch": {
+ "command": "uvx",
+ "args": ["awslabs.cloudwatch-mcp-server==0.2.0"],
+ "env": { "AWS_PROFILE": "readonly" }
+ }
+ }
+}
+```
+
+Restart the MCP client to pick up the pinned version. `uvx` caches packages locally, so the rollback version is immediately available without a re-download if it was previously installed.
+
+## Governance for Multi-Team Deployments
+
+When multiple teams in an organization use `awslabs/mcp` servers, centralize the configuration management:
+
+```mermaid
+graph TD
+ CENTRAL[Platform/DevEx team]
+ CENTRAL --> TEMPLATES[Maintains role-scoped templates\nin shared git repo]
+ CENTRAL --> VERSIONS[Owns version pinning decisions\nafter validation]
+ CENTRAL --> IAM[Manages IAM profiles\nper risk level]
+ CENTRAL --> AUDIT[Quarterly tool sprawl audit]
+
+ TEAMS[Product teams]
+ TEAMS --> USE[Use approved templates\nfor their workflow role]
+ TEAMS --> REQUEST[Request new server additions\nvia RFC to platform team]
+ TEAMS --> REPORT[Report regressions\nwith version + repro]
```
+
+### Human Approval Gates
+
+Document which operations always require human approval and enforce them at the configuration level:
+
+| Operation | Enforcement |
+|:----------|:-----------|
+| `terraform apply` in production | `ALLOW_WRITE=false` in prod configs; manual override only |
+| `dynamodb:DeleteItem` at scale | Separate read-write profile with scoped table ARN condition |
+| IAM policy creation | Deny in IAM policy for MCP server role |
+| `cdk deploy` to prod account | Separate IAM role requiring MFA for AssumeRole |
+| S3 bucket deletion | Explicit IAM Deny on `s3:DeleteBucket` |
+
+## Docusaurus Documentation Site
+
+The `docusaurus/` directory at the repo root is the source for the public documentation site at `awslabs.github.io/mcp`. Key directories:
+
+- `docusaurus/docs/servers/` — per-server `.md` reference pages
+- `docusaurus/static/assets/server-cards.json` — card metadata for the catalog UI
+- `docusaurus/sidebars.ts` — navigation structure
+
+For teams operating a fork or internal mirror, you can run the site locally to verify documentation before merging:
+
+```bash
+cd docusaurus
+npm install
+npm start
+# Opens at http://localhost:3000
+```
+
+## Source References
+
+- [DEVELOPER_GUIDE.md — Release Process](https://github.com/awslabs/mcp/blob/main/.github/workflows/RELEASE_INSTRUCTIONS.md)
+- [Repository README](https://github.com/awslabs/mcp/blob/main/README.md)
+- [Samples README](https://github.com/awslabs/mcp/blob/main/samples/README.md)
+- [Docusaurus site source](https://github.com/awslabs/mcp/tree/main/docusaurus)
+- [DESIGN_GUIDELINES.md](https://github.com/awslabs/mcp/blob/main/DESIGN_GUIDELINES.md)
+
+## Summary
+
+The `awslabs/mcp` project uses a date-stamped release tag scheme where all changed servers ship together. Pin server versions in team configurations and validate before promoting upgrades. Prevent tool sprawl by scoping configuration files to workflow roles (research, IaC, ops, data) and running quarterly audits to remove unused servers. Use CloudTrail data events to audit all agent-driven mutations in production. Rollback is immediate — pin to a previous version in the client config and restart. For multi-team environments, centralize template ownership and version approval in a platform team, keeping individual teams on approved profiles rather than unmanaged personal configs.
diff --git a/tutorials/babyagi-tutorial/01-getting-started.md b/tutorials/babyagi-tutorial/01-getting-started.md
index 7049b8ad..0130d87c 100644
--- a/tutorials/babyagi-tutorial/01-getting-started.md
+++ b/tutorials/babyagi-tutorial/01-getting-started.md
@@ -39,27 +39,24 @@ You now have a working BabyAGI baseline and can observe the autonomous three-age
Next: [Chapter 2: Core Architecture: Task Queue and Agent Loop](02-core-architecture-task-queue-and-agent-loop.md)
-## Depth Expansion Playbook
-
## Source Code Walkthrough
-### `examples/trigger_example.py`
+### `examples/simple_example.py`
-The `function_a` function in [`examples/trigger_example.py`](https://github.com/yoheinakajima/babyagi/blob/HEAD/examples/trigger_example.py) handles a key part of this chapter's functionality:
+The `world` function in [`examples/simple_example.py`](https://github.com/yoheinakajima/babyagi/blob/HEAD/examples/simple_example.py) handles a key part of this chapter's functionality:
```py
@babyagi.register_function()
-def function_a():
- print("Result from function A")
- return "Result from function A"
+def world():
+ return "world"
-@babyagi.register_function(triggers=['function_a'])
-def function_b(input_data):
- print(f"Function B triggered with input: {input_data}")
- return f"Function B triggered with input: {input_data}"
+@babyagi.register_function(dependencies=["world"])
+def hello_world():
+ x = world()
+ return f"Hello {x}!"
-function_a()
+print(hello_world())
@app.route('/')
def home():
@@ -68,22 +65,23 @@ def home():
if __name__ == "__main__":
app = babyagi.create_app('/dashboard')
app.run(host='0.0.0.0', port=8080)
+
```
This function is important because it defines how BabyAGI Tutorial: The Original Autonomous AI Task Agent Framework implements the patterns covered in this chapter.
-### `examples/trigger_example.py`
+### `examples/simple_example.py`
-The `function_b` function in [`examples/trigger_example.py`](https://github.com/yoheinakajima/babyagi/blob/HEAD/examples/trigger_example.py) handles a key part of this chapter's functionality:
+The `hello_world` function in [`examples/simple_example.py`](https://github.com/yoheinakajima/babyagi/blob/HEAD/examples/simple_example.py) handles a key part of this chapter's functionality:
```py
-@babyagi.register_function(triggers=['function_a'])
-def function_b(input_data):
- print(f"Function B triggered with input: {input_data}")
- return f"Function B triggered with input: {input_data}"
+@babyagi.register_function(dependencies=["world"])
+def hello_world():
+ x = world()
+ return f"Hello {x}!"
-function_a()
+print(hello_world())
@app.route('/')
def home():
@@ -92,13 +90,14 @@ def home():
if __name__ == "__main__":
app = babyagi.create_app('/dashboard')
app.run(host='0.0.0.0', port=8080)
+
```
This function is important because it defines how BabyAGI Tutorial: The Original Autonomous AI Task Agent Framework implements the patterns covered in this chapter.
-### `examples/trigger_example.py`
+### `examples/simple_example.py`
-The `home` function in [`examples/trigger_example.py`](https://github.com/yoheinakajima/babyagi/blob/HEAD/examples/trigger_example.py) handles a key part of this chapter's functionality:
+The `home` function in [`examples/simple_example.py`](https://github.com/yoheinakajima/babyagi/blob/HEAD/examples/simple_example.py) handles a key part of this chapter's functionality:
```py
@@ -109,47 +108,28 @@ def home():
if __name__ == "__main__":
app = babyagi.create_app('/dashboard')
app.run(host='0.0.0.0', port=8080)
+
```
This function is important because it defines how BabyAGI Tutorial: The Original Autonomous AI Task Agent Framework implements the patterns covered in this chapter.
-### `setup.py`
+### `examples/custom_route_example.py`
-The `parse_requirements` function in [`setup.py`](https://github.com/yoheinakajima/babyagi/blob/HEAD/setup.py) handles a key part of this chapter's functionality:
+The `another_custom_function` function in [`examples/custom_route_example.py`](https://github.com/yoheinakajima/babyagi/blob/HEAD/examples/custom_route_example.py) handles a key part of this chapter's functionality:
```py
-# Read requirements from requirements.txt
-def parse_requirements(filename):
- with open(filename, "r") as f:
- lines = f.readlines()
- # Remove comments and empty lines
- return [line.strip() for line in lines if line.strip() and not line.startswith("#")]
-
-setup(
- name="babyagi", # Ensure this is the desired package name
- version="0.1.2", # Update this version appropriately
- author="Yohei Nakajima",
- author_email="babyagi@untapped.vc",
- description="An experimental prototype framework for building self building autonomous agents.",
- long_description= long_description,
- long_description_content_type="text/markdown",
- url="https://github.com/yoheinakajima/babyagi", # Update if necessary
- packages=find_packages(),
- include_package_data=True, # Include package data as specified in MANIFEST.in
- classifiers=[
- "Programming Language :: Python :: 3",
- "License :: OSI Approved :: MIT License",
- "Operating System :: OS Independent",
- ],
- python_requires='>=3.6',
- install_requires=parse_requirements("requirements.txt"),
- entry_points={
- 'console_scripts': [
- 'babyagi=babyagi.main:main', # Example entry point
- ],
- },
- keywords="AGI, AI, Framework, Baby AGI",
+@register_function()
+def another_custom_function():
+ return "Hello from another custom function!"
+
+@app.route('/')
+def home():
+ return f"Welcome to the main app. Visit /dashboard for BabyAGI dashboard."
+
+if __name__ == "__main__":
+ app.run(host='0.0.0.0', port=8080)
+
```
This function is important because it defines how BabyAGI Tutorial: The Original Autonomous AI Task Agent Framework implements the patterns covered in this chapter.
@@ -159,11 +139,11 @@ This function is important because it defines how BabyAGI Tutorial: The Original
```mermaid
flowchart TD
- A[function_a]
- B[function_b]
+ A[world]
+ B[hello_world]
C[home]
- D[parse_requirements]
- E[another_custom_function]
+ D[another_custom_function]
+ E[home]
A --> B
B --> C
C --> D
diff --git a/tutorials/babyagi-tutorial/02-core-architecture-task-queue-and-agent-loop.md b/tutorials/babyagi-tutorial/02-core-architecture-task-queue-and-agent-loop.md
index d8e2ca51..64320eff 100644
--- a/tutorials/babyagi-tutorial/02-core-architecture-task-queue-and-agent-loop.md
+++ b/tutorials/babyagi-tutorial/02-core-architecture-task-queue-and-agent-loop.md
@@ -38,118 +38,102 @@ You now understand how BabyAGI's three-agent loop operates as a coherent autonom
Next: [Chapter 3: LLM Backend Integration and Configuration](03-llm-backend-integration-and-configuration.md)
-## Depth Expansion Playbook
-
## Source Code Walkthrough
-### `babyagi/__init__.py`
+### `examples/custom_flask_example.py`
-The `__getattr__` function in [`babyagi/__init__.py`](https://github.com/yoheinakajima/babyagi/blob/HEAD/babyagi/__init__.py) handles a key part of this chapter's functionality:
+The `integrated_function` function in [`examples/custom_flask_example.py`](https://github.com/yoheinakajima/babyagi/blob/HEAD/examples/custom_flask_example.py) handles a key part of this chapter's functionality:
```py
+@register_function()
+def integrated_function():
+ return "Hello from integrated function!"
+
+load_functions('plugins/firecrawl')
+
+@app.route('/')
+def home():
+ return "Welcome to the main app. Visit /dashboard for BabyAGI dashboard."
-def __getattr__(name):
- """
- Dynamic attribute access for the babyagi module.
- If a function with the given name exists in the database,
- return a callable that executes the function via the executor.
- """
- try:
- if _func_instance.get_function(name):
- # Return a callable that executes the function via the executor
- return lambda *args, **kwargs: _func_instance.executor.execute(name, *args, **kwargs)
- except Exception as e:
- pass
- raise AttributeError(f"module '{__name__}' has no attribute '{name}'")
-
-
-# Auto-load default function packs when babyagi is imported
-try:
- print("Attempting to load default function packs...")
- # Uncomment if needed
- _func_instance.load_function_pack('default/default_functions')
- _func_instance.load_function_pack('default/ai_functions')
- _func_instance.load_function_pack('default/os')
- _func_instance.load_function_pack('default/function_calling_chat')
-except Exception as e:
- print(f"Error loading default function packs: {e}")
- traceback.print_exc()
-
-print("babyagi/__init__.py loaded")
+if __name__ == "__main__":
+ app.run(host='0.0.0.0', port=8080)
```
This function is important because it defines how BabyAGI Tutorial: The Original Autonomous AI Task Agent Framework implements the patterns covered in this chapter.
-### `examples/simple_example.py`
+### `examples/custom_flask_example.py`
-The `world` function in [`examples/simple_example.py`](https://github.com/yoheinakajima/babyagi/blob/HEAD/examples/simple_example.py) handles a key part of this chapter's functionality:
+The `home` function in [`examples/custom_flask_example.py`](https://github.com/yoheinakajima/babyagi/blob/HEAD/examples/custom_flask_example.py) handles a key part of this chapter's functionality:
```py
-@babyagi.register_function()
-def world():
- return "world"
-
-@babyagi.register_function(dependencies=["world"])
-def hello_world():
- x = world()
- return f"Hello {x}!"
-
-print(hello_world())
-
@app.route('/')
def home():
- return f"Welcome to the main app. Visit /dashboard for BabyAGI dashboard."
+ return "Welcome to the main app. Visit /dashboard for BabyAGI dashboard."
if __name__ == "__main__":
- app = babyagi.create_app('/dashboard')
app.run(host='0.0.0.0', port=8080)
```
This function is important because it defines how BabyAGI Tutorial: The Original Autonomous AI Task Agent Framework implements the patterns covered in this chapter.
-### `examples/simple_example.py`
+### `main.py`
-The `hello_world` function in [`examples/simple_example.py`](https://github.com/yoheinakajima/babyagi/blob/HEAD/examples/simple_example.py) handles a key part of this chapter's functionality:
+The `home` function in [`main.py`](https://github.com/yoheinakajima/babyagi/blob/HEAD/main.py) handles a key part of this chapter's functionality:
```py
-@babyagi.register_function(dependencies=["world"])
-def hello_world():
- x = world()
- return f"Hello {x}!"
-
-print(hello_world())
-
@app.route('/')
def home():
return f"Welcome to the main app. Visit /dashboard for BabyAGI dashboard."
if __name__ == "__main__":
- app = babyagi.create_app('/dashboard')
app.run(host='0.0.0.0', port=8080)
```
This function is important because it defines how BabyAGI Tutorial: The Original Autonomous AI Task Agent Framework implements the patterns covered in this chapter.
-### `examples/simple_example.py`
+### `setup.py`
-The `home` function in [`examples/simple_example.py`](https://github.com/yoheinakajima/babyagi/blob/HEAD/examples/simple_example.py) handles a key part of this chapter's functionality:
+The `parse_requirements` function in [`setup.py`](https://github.com/yoheinakajima/babyagi/blob/HEAD/setup.py) handles a key part of this chapter's functionality:
```py
-@app.route('/')
-def home():
- return f"Welcome to the main app. Visit /dashboard for BabyAGI dashboard."
-
-if __name__ == "__main__":
- app = babyagi.create_app('/dashboard')
- app.run(host='0.0.0.0', port=8080)
-
+# Read requirements from requirements.txt
+def parse_requirements(filename):
+ with open(filename, "r") as f:
+ lines = f.readlines()
+ # Remove comments and empty lines
+ return [line.strip() for line in lines if line.strip() and not line.startswith("#")]
+
+setup(
+ name="babyagi", # Ensure this is the desired package name
+ version="0.1.2", # Update this version appropriately
+ author="Yohei Nakajima",
+ author_email="babyagi@untapped.vc",
+ description="An experimental prototype framework for building self building autonomous agents.",
+ long_description= long_description,
+ long_description_content_type="text/markdown",
+ url="https://github.com/yoheinakajima/babyagi", # Update if necessary
+ packages=find_packages(),
+ include_package_data=True, # Include package data as specified in MANIFEST.in
+ classifiers=[
+ "Programming Language :: Python :: 3",
+ "License :: OSI Approved :: MIT License",
+ "Operating System :: OS Independent",
+ ],
+ python_requires='>=3.6',
+ install_requires=parse_requirements("requirements.txt"),
+ entry_points={
+ 'console_scripts': [
+ 'babyagi=babyagi.main:main', # Example entry point
+ ],
+ },
+ keywords="AGI, AI, Framework, Baby AGI",
```
This function is important because it defines how BabyAGI Tutorial: The Original Autonomous AI Task Agent Framework implements the patterns covered in this chapter.
@@ -159,11 +143,11 @@ This function is important because it defines how BabyAGI Tutorial: The Original
```mermaid
flowchart TD
- A[__getattr__]
- B[world]
- C[hello_world]
- D[home]
- E[home]
+ A[integrated_function]
+ B[home]
+ C[home]
+ D[parse_requirements]
+ E[create_api_blueprint]
A --> B
B --> C
C --> D
diff --git a/tutorials/babyagi-tutorial/03-llm-backend-integration-and-configuration.md b/tutorials/babyagi-tutorial/03-llm-backend-integration-and-configuration.md
index f4e535b9..b93f86bb 100644
--- a/tutorials/babyagi-tutorial/03-llm-backend-integration-and-configuration.md
+++ b/tutorials/babyagi-tutorial/03-llm-backend-integration-and-configuration.md
@@ -39,184 +39,176 @@ You now know how to configure BabyAGI's LLM backend for different providers and
Next: [Chapter 4: Task Creation and Prioritization Engine](04-task-creation-and-prioritization-engine.md)
-## Depth Expansion Playbook
-
## Source Code Walkthrough
-### `babyagi/functionz/packs/drafts/code_writing_functions.py`
+### `babyagi/functionz/db/models.py`
-The `check_existing_functions` function in [`babyagi/functionz/packs/drafts/code_writing_functions.py`](https://github.com/yoheinakajima/babyagi/blob/HEAD/babyagi/functionz/packs/drafts/code_writing_functions.py) handles a key part of this chapter's functionality:
+The `FunctionVersion` class in [`babyagi/functionz/db/models.py`](https://github.com/yoheinakajima/babyagi/blob/HEAD/babyagi/functionz/db/models.py) handles a key part of this chapter's functionality:
```py
- dependencies=["gpt_call", "get_all_functions_wrapper"]
-)
-def check_existing_functions(user_input):
- import json
-
- while True:
- # Get all functions and their descriptions
- functions = get_all_functions_wrapper()
- function_descriptions = [
- {"name": f['name'], "description": f['metadata'].get('description', '')}
- for f in functions
- ]
-
- # Prepare the prompt
- prompt = f"""
-You are an expert software assistant. The user has provided the following request:
-
-"{user_input}"
+fernet = Fernet(ENCRYPTION_KEY.encode())
-Below is a list of available functions with their descriptions:
-
-{function_descriptions}
+# Association table for function dependencies (many-to-many between FunctionVersion and Function)
+function_dependency = Table('function_dependency', Base.metadata,
+ Column('function_version_id', Integer, ForeignKey('function_versions.id')),
+ Column('dependency_id', Integer, ForeignKey('functions.id'))
+)
-Determine if any of the existing functions perfectly fulfill the user's request. If so, return the name of the function.
+# **Define function_version_imports association table here**
+function_version_imports = Table('function_version_imports', Base.metadata,
+ Column('function_version_id', Integer, ForeignKey('function_versions.id')),
+ Column('import_id', Integer, ForeignKey('imports.id'))
+)
-Provide your answer in the following JSON format:
-{{
- "function_found": true or false,
- "function_name": ""
-}}
-Examples:
+class Function(Base):
+ __tablename__ = 'functions'
+ id = Column(Integer, primary_key=True)
+ name = Column(String, unique=True)
+ versions = relationship("FunctionVersion", back_populates="function", cascade="all, delete-orphan")
+
+class FunctionVersion(Base):
+ __tablename__ = 'function_versions'
+ id = Column(Integer, primary_key=True)
+ function_id = Column(Integer, ForeignKey('functions.id'))
+ version = Column(Integer)
+ code = Column(String)
+ function_metadata = Column(JSON)
+ is_active = Column(Boolean, default=False)
+ created_date = Column(DateTime, default=datetime.utcnow)
+ input_parameters = Column(JSON)
+ output_parameters = Column(JSON)
```
-This function is important because it defines how BabyAGI Tutorial: The Original Autonomous AI Task Agent Framework implements the patterns covered in this chapter.
+This class is important because it defines how BabyAGI Tutorial: The Original Autonomous AI Task Agent Framework implements the patterns covered in this chapter.
-### `babyagi/functionz/packs/drafts/code_writing_functions.py`
+### `babyagi/functionz/db/models.py`
-The `break_down_task` function in [`babyagi/functionz/packs/drafts/code_writing_functions.py`](https://github.com/yoheinakajima/babyagi/blob/HEAD/babyagi/functionz/packs/drafts/code_writing_functions.py) handles a key part of this chapter's functionality:
+The `Import` class in [`babyagi/functionz/db/models.py`](https://github.com/yoheinakajima/babyagi/blob/HEAD/babyagi/functionz/db/models.py) handles a key part of this chapter's functionality:
```py
- dependencies=["gpt_call"]
-)
-def break_down_task(user_input):
- import json
- while True:
- # Prepare the prompt with detailed context
- prompt = f"""
-You are an expert software assistant helping to break down a user's request into smaller functions for a microservice-inspired architecture. The system is designed to be modular, with each function being small and designed optimally for potential future reuse.
-
-When breaking down the task, consider the following:
-
-- Each function should be as small as possible and do one thing well.
-- Use existing functions where possible. You have access to functions such as 'gpt_call', 'find_similar_function', and others in our function database.
-- Functions can depend on each other. Use 'dependencies' to specify which functions a function relies on.
-- Functions should include appropriate 'imports' if external libraries are needed.
-- Provide the breakdown as a list of functions, where each function includes its 'name', 'description', 'input_parameters', 'output_parameters', 'dependencies', and 'code' (just a placeholder or brief description at this stage).
-- Make sure descriptions are detailed so an engineer could build it to spec.
-- Every sub function you create should be designed to be reusable by turning things into parameters, vs hardcoding them.
-
-User request:
-
-"{user_input}"
-
-Provide your answer in JSON format as a list of functions. Each function should have the following structure:
-
-{{
- "name": "function_name",
- "description": "Brief description of the function",
- "input_parameters": [{{"name": "param1", "type": "type1"}}, ...],
- "output_parameters": [{{"name": "output", "type": "type"}}, ...],
- "dependencies": ["dependency1", "dependency2", ...],
- "imports": ["import1", "import2", ...],
+ primaryjoin=(function_dependency.c.function_version_id == id),
+ secondaryjoin=(function_dependency.c.dependency_id == Function.id))
+ imports = relationship('Import', secondary=function_version_imports, back_populates='function_versions')
+ triggers = Column(JSON, nullable=True) # Store triggers as a JSON field
+
+
+
+class Import(Base):
+ __tablename__ = 'imports'
+ id = Column(Integer, primary_key=True)
+ name = Column(String, unique=True)
+ lib = Column(String, nullable=True)
+ source = Column(String)
+ function_versions = relationship('FunctionVersion', secondary=function_version_imports, back_populates='imports')
+
+
+class Log(Base):
+ __tablename__ = 'logs'
+
+ id = Column(Integer, primary_key=True)
+ function_name = Column(String, nullable=False)
+ message = Column(String, nullable=False)
+ timestamp = Column(DateTime, nullable=False)
+ params = Column(JSON, nullable=True)
+ output = Column(JSON, nullable=True)
+ time_spent = Column(Float, nullable=True)
+ log_type = Column(String, nullable=False)
+
+ # Parent Log Relationship
+ parent_log_id = Column(Integer, ForeignKey('logs.id'), nullable=True)
+ parent_log = relationship(
+ 'Log',
```
-This function is important because it defines how BabyAGI Tutorial: The Original Autonomous AI Task Agent Framework implements the patterns covered in this chapter.
+This class is important because it defines how BabyAGI Tutorial: The Original Autonomous AI Task Agent Framework implements the patterns covered in this chapter.
-### `babyagi/functionz/packs/drafts/code_writing_functions.py`
+### `babyagi/functionz/db/models.py`
-The `decide_imports_and_apis` function in [`babyagi/functionz/packs/drafts/code_writing_functions.py`](https://github.com/yoheinakajima/babyagi/blob/HEAD/babyagi/functionz/packs/drafts/code_writing_functions.py) handles a key part of this chapter's functionality:
+The `Log` class in [`babyagi/functionz/db/models.py`](https://github.com/yoheinakajima/babyagi/blob/HEAD/babyagi/functionz/db/models.py) handles a key part of this chapter's functionality:
```py
- dependencies=["gpt_call", "get_all_functions_wrapper"]
-)
-def decide_imports_and_apis(context):
- import json
- while True:
- # Get all available functions and their imports
- all_functions = get_all_functions_wrapper()
- existing_imports = set()
- for func in all_functions:
- existing_imports.update(func.get('imports', []))
- # Prepare the prompt
- prompt = f"""
-You are an expert software assistant helping to decide what imports and external APIs are needed for a set of functions based on the context provided.
-Context:
+class Log(Base):
+ __tablename__ = 'logs'
+
+ id = Column(Integer, primary_key=True)
+ function_name = Column(String, nullable=False)
+ message = Column(String, nullable=False)
+ timestamp = Column(DateTime, nullable=False)
+ params = Column(JSON, nullable=True)
+ output = Column(JSON, nullable=True)
+ time_spent = Column(Float, nullable=True)
+ log_type = Column(String, nullable=False)
+
+ # Parent Log Relationship
+ parent_log_id = Column(Integer, ForeignKey('logs.id'), nullable=True)
+ parent_log = relationship(
+ 'Log',
+ remote_side=[id],
+ backref='child_logs',
+ foreign_keys=[parent_log_id]
+ )
+
+ # Triggered By Log Relationship
+ triggered_by_log_id = Column(Integer, ForeignKey('logs.id'), nullable=True)
+ triggered_by_log = relationship(
+ 'Log',
+ remote_side=[id],
+ backref='triggered_logs',
+ foreign_keys=[triggered_by_log_id]
+ )
-{context}
+```
-Existing standard Python imports:
+This class is important because it defines how BabyAGI Tutorial: The Original Autonomous AI Task Agent Framework implements the patterns covered in this chapter.
-{list(existing_imports)}
+### `babyagi/functionz/db/models.py`
-Determine the libraries (imports) and external APIs needed for these functions. Separate standard Python libraries from external libraries or APIs.
+The `SecretKey` class in [`babyagi/functionz/db/models.py`](https://github.com/yoheinakajima/babyagi/blob/HEAD/babyagi/functionz/db/models.py) handles a key part of this chapter's functionality:
-Provide your answer in the following JSON format:
+```py
-{{
- "standard_imports": ["import1", "import2", ...],
- "external_imports": ["external_import1", "external_import2", ...],
- "external_apis": ["api1", "api2", ...],
- "documentation_needed": [
-```
-This function is important because it defines how BabyAGI Tutorial: The Original Autonomous AI Task Agent Framework implements the patterns covered in this chapter.
+class SecretKey(Base):
+ __tablename__ = 'secret_keys'
+ id = Column(Integer, primary_key=True)
+ name = Column(String, nullable=False, unique=True) # Make name unique
+ _encrypted_value = Column(LargeBinary, nullable=False)
-### `babyagi/functionz/packs/drafts/code_writing_functions.py`
+ @hybrid_property
+ def value(self):
+ if self._encrypted_value:
+ try:
+ return fernet.decrypt(self._encrypted_value).decode()
+ except InvalidToken:
+ print(f"Error decrypting value for key: {self.name}. The encryption key may have changed.")
+ return None
+ return None
+
+ @value.setter
+ def value(self, plaintext_value):
+ if plaintext_value:
+ self._encrypted_value = fernet.encrypt(plaintext_value.encode())
+ else:
+ self._encrypted_value = None
-The `get_functions_that_depend_on` function in [`babyagi/functionz/packs/drafts/code_writing_functions.py`](https://github.com/yoheinakajima/babyagi/blob/HEAD/babyagi/functionz/packs/drafts/code_writing_functions.py) handles a key part of this chapter's functionality:
-```py
- dependencies=["get_all_functions_wrapper"]
-)
-def get_functions_that_depend_on(function_name):
- all_functions = get_all_functions_wrapper()
- dependent_functions = []
- for function in all_functions:
- if function_name in function.get('dependencies', []):
- dependent_functions.append(function['name'])
- return dependent_functions
-
-
-@func.register_function(
- metadata={"description": "Generates the function code using LLM"},
- dependencies=["gpt_call", "get_function_wrapper", "get_functions_that_depend_on", "get_all_functions_wrapper"]
-)
-def generate_function_code(function, context):
- while True:
-
- print("\033[1;32mGenerating code for function: ", function["name"], "\033[0m")
- # Gather dependent functions and their code
- dependencies = function.get('dependencies', [])
- dependency_code = ''
- for dep in dependencies:
- dep_function = get_function_wrapper(dep)
- if dep_function:
- dependency_code += f"\n# Code for dependency function '{dep}':\n{dep_function['code']}\n"
-
- # Gather functions that depend on the same imports
- imports = function.get('imports', [])
- functions_with_same_imports = []
- all_functions = get_all_functions_wrapper()
- for func_with_imports in all_functions:
```
-This function is important because it defines how BabyAGI Tutorial: The Original Autonomous AI Task Agent Framework implements the patterns covered in this chapter.
+This class is important because it defines how BabyAGI Tutorial: The Original Autonomous AI Task Agent Framework implements the patterns covered in this chapter.
## How These Components Connect
```mermaid
flowchart TD
- A[check_existing_functions]
- B[break_down_task]
- C[decide_imports_and_apis]
- D[get_functions_that_depend_on]
- E[generate_function_code]
+ A[FunctionVersion]
+ B[Import]
+ C[Log]
+ D[SecretKey]
+ E[get_or_create_key]
A --> B
B --> C
C --> D
diff --git a/tutorials/babyagi-tutorial/04-task-creation-and-prioritization-engine.md b/tutorials/babyagi-tutorial/04-task-creation-and-prioritization-engine.md
index b07949e6..9d2755b7 100644
--- a/tutorials/babyagi-tutorial/04-task-creation-and-prioritization-engine.md
+++ b/tutorials/babyagi-tutorial/04-task-creation-and-prioritization-engine.md
@@ -38,170 +38,168 @@ You now understand how the task creation and prioritization engine generates, de
Next: [Chapter 5: Memory Systems and Vector Store Integration](05-memory-systems-and-vector-store-integration.md)
-## Depth Expansion Playbook
-
## Source Code Walkthrough
-### `babyagi/dashboard/static/js/log_dashboard.js`
-
-The `buildLogTree` function in [`babyagi/dashboard/static/js/log_dashboard.js`](https://github.com/yoheinakajima/babyagi/blob/HEAD/babyagi/dashboard/static/js/log_dashboard.js) handles a key part of this chapter's functionality:
-
-```js
-
- // Build the tree structure
- rootLogs = buildLogTree(filteredLogs);
-
- renderLogs();
- } catch (error) {
- console.error('Error populating filters:', error);
- alert('Failed to load logs for filters. Please try again later.');
- }
-}
-
-// Build log tree based on parent_log_id
-function buildLogTree(logs) {
- const logsById = {};
- const rootLogs = [];
-
- // Initialize logsById mapping and add children array to each log
- logs.forEach(log => {
- log.children = [];
- logsById[log.id] = log;
- });
-
- // Build the tree
- logs.forEach(log => {
- if (log.parent_log_id !== null) {
- const parentLog = logsById[log.parent_log_id];
- if (parentLog) {
- parentLog.children.push(log);
- } else {
- // Parent log not found, treat as root
- rootLogs.push(log);
- }
-```
-
-This function is important because it defines how BabyAGI Tutorial: The Original Autonomous AI Task Agent Framework implements the patterns covered in this chapter.
+### `babyagi/functionz/packs/drafts/user_db.py`
+
+The `create_table` function in [`babyagi/functionz/packs/drafts/user_db.py`](https://github.com/yoheinakajima/babyagi/blob/HEAD/babyagi/functionz/packs/drafts/user_db.py) handles a key part of this chapter's functionality:
+
+```py
+ imports=["sqlalchemy", "json"] # Added 'json' to imports
+)
+def create_table(db_name: str, table_name: str, columns: str):
+ from sqlalchemy import create_engine, MetaData, Table, Column, String, Integer, Float, Boolean, DateTime, LargeBinary
+ import json # Imported json within the function
+
+
+ try:
+ columns = json.loads(columns)
+ print("Parsed columns:", columns) # Debugging statement
+ except json.JSONDecodeError as e:
+ return f"Invalid JSON for columns: {e}"
+
+ def get_column_type(type_name):
+ type_map = {
+ 'string': String,
+ 'integer': Integer,
+ 'float': Float,
+ 'boolean': Boolean,
+ 'datetime': DateTime,
+ 'binary': LargeBinary,
+ 'embedding': LargeBinary # We'll use LargeBinary for embeddings
+ }
+ return type_map.get(type_name.lower(), String) # Default to String if type not found
+
+ UserDB_name = func.get_user_db_class()
+ UserDB = type(UserDB_name, (), {
+ '__init__': lambda self, db_url: setattr(self, 'engine', create_engine(db_url)),
+ 'metadata': MetaData(),
+ })
+ user_db = UserDB(f'sqlite:///{db_name}.sqlite')
-### `babyagi/dashboard/static/js/log_dashboard.js`
-
-The `renderLogs` function in [`babyagi/dashboard/static/js/log_dashboard.js`](https://github.com/yoheinakajima/babyagi/blob/HEAD/babyagi/dashboard/static/js/log_dashboard.js) handles a key part of this chapter's functionality:
-
-```js
- rootLogs = buildLogTree(filteredLogs);
-
- renderLogs();
- } catch (error) {
- console.error('Error populating filters:', error);
- alert('Failed to load logs for filters. Please try again later.');
- }
-}
-
-// Build log tree based on parent_log_id
-function buildLogTree(logs) {
- const logsById = {};
- const rootLogs = [];
-
- // Initialize logsById mapping and add children array to each log
- logs.forEach(log => {
- log.children = [];
- logsById[log.id] = log;
- });
-
- // Build the tree
- logs.forEach(log => {
- if (log.parent_log_id !== null) {
- const parentLog = logsById[log.parent_log_id];
- if (parentLog) {
- parentLog.children.push(log);
- } else {
- // Parent log not found, treat as root
- rootLogs.push(log);
- }
- } else {
- rootLogs.push(log);
```
This function is important because it defines how BabyAGI Tutorial: The Original Autonomous AI Task Agent Framework implements the patterns covered in this chapter.
-### `babyagi/dashboard/static/js/log_dashboard.js`
-
-The `renderTable` function in [`babyagi/dashboard/static/js/log_dashboard.js`](https://github.com/yoheinakajima/babyagi/blob/HEAD/babyagi/dashboard/static/js/log_dashboard.js) handles a key part of this chapter's functionality:
-
-```js
-// Render logs in table and grid formats
-function renderLogs() {
- renderTable();
- renderGrid();
-}
+### `babyagi/functionz/packs/drafts/user_db.py`
+
+The `list_tables` function in [`babyagi/functionz/packs/drafts/user_db.py`](https://github.com/yoheinakajima/babyagi/blob/HEAD/babyagi/functionz/packs/drafts/user_db.py) handles a key part of this chapter's functionality:
+
+```py
+ imports=["sqlalchemy"]
+)
+def list_tables(db_name: str):
+ from sqlalchemy import create_engine, MetaData
+ UserDB_name = func.get_user_db_class()
+ UserDB = type(UserDB_name, (), {
+ '__init__': lambda self, db_url: setattr(self, 'engine', create_engine(db_url)),
+ 'metadata': MetaData()
+ })
+ user_db = UserDB(f'sqlite:///{db_name}.sqlite')
+ user_db.metadata.reflect(user_db.engine)
+ return [table.name for table in user_db.metadata.tables.values()]
+
+@func.register_function(
+ metadata={"description": "Get details of a specific table."},
+ dependencies=["get_user_db_class"],
+ imports=["sqlalchemy"]
+)
+def get_table(db_name: str, table_name: str):
+ from sqlalchemy import create_engine, MetaData, Table
+ from sqlalchemy.exc import NoSuchTableError
+
+ UserDB_name = func.get_user_db_class()
+ UserDB = type(UserDB_name, (), {
+ '__init__': lambda self, db_url: setattr(self, 'engine', create_engine(db_url)),
+ 'metadata': MetaData()
+ })
+
+ try:
+ user_db = UserDB(f'sqlite:///{db_name}.sqlite')
+ user_db.metadata.reflect(user_db.engine)
-// Render Logs Table (Desktop View)
-function renderTable() {
- const tableBody = document.querySelector('#logTable tbody');
- tableBody.innerHTML = '';
-
- rootLogs.forEach(log => {
- renderLogRow(tableBody, log, 0);
- });
-}
-
-// Recursive function to render each log row and its children
-function renderLogRow(tableBody, log, depth, parentRowId) {
- const row = document.createElement('tr');
- const rowId = 'log-' + log.id;
- row.id = rowId;
-
- // If it's a child row, add a class to indicate it's a child
- if (parentRowId) {
- row.classList.add('child-of-log-' + parentRowId);
- row.style.display = 'none'; // Hide child rows by default
- }
-
- // Check if log has children
- const hasChildren = log.children && log.children.length > 0;
-
- // Create expand/collapse icon
```
This function is important because it defines how BabyAGI Tutorial: The Original Autonomous AI Task Agent Framework implements the patterns covered in this chapter.
-### `babyagi/dashboard/static/js/log_dashboard.js`
+### `babyagi/functionz/packs/drafts/user_db.py`
-The `renderLogRow` function in [`babyagi/dashboard/static/js/log_dashboard.js`](https://github.com/yoheinakajima/babyagi/blob/HEAD/babyagi/dashboard/static/js/log_dashboard.js) handles a key part of this chapter's functionality:
+The `get_table` function in [`babyagi/functionz/packs/drafts/user_db.py`](https://github.com/yoheinakajima/babyagi/blob/HEAD/babyagi/functionz/packs/drafts/user_db.py) handles a key part of this chapter's functionality:
-```js
+```py
+ imports=["sqlalchemy"]
+)
+def get_table(db_name: str, table_name: str):
+ from sqlalchemy import create_engine, MetaData, Table
+ from sqlalchemy.exc import NoSuchTableError
- rootLogs.forEach(log => {
- renderLogRow(tableBody, log, 0);
- });
-}
+ UserDB_name = func.get_user_db_class()
+ UserDB = type(UserDB_name, (), {
+ '__init__': lambda self, db_url: setattr(self, 'engine', create_engine(db_url)),
+ 'metadata': MetaData()
+ })
-// Recursive function to render each log row and its children
-function renderLogRow(tableBody, log, depth, parentRowId) {
- const row = document.createElement('tr');
- const rowId = 'log-' + log.id;
- row.id = rowId;
+ try:
+ user_db = UserDB(f'sqlite:///{db_name}.sqlite')
+ user_db.metadata.reflect(user_db.engine)
- // If it's a child row, add a class to indicate it's a child
- if (parentRowId) {
- row.classList.add('child-of-log-' + parentRowId);
- row.style.display = 'none'; // Hide child rows by default
- }
+ if table_name in user_db.metadata.tables:
+ table = Table(table_name, user_db.metadata, autoload_with=user_db.engine)
+ return {
+ "name": table.name,
+ "columns": [{"name": column.name, "type": str(column.type)} for column in table.columns]
+ }
+ else:
+ return f"Table '{table_name}' not found in database '{db_name}'."
+ except NoSuchTableError:
+ return f"Table '{table_name}' not found in database '{db_name}'."
+ except Exception as e:
+ return f"Error getting table details: {str(e)}"
+
+@func.register_function(
+ metadata={"description": "Update a table by adding new columns."},
+ dependencies=["get_user_db_class"],
+```
- // Check if log has children
- const hasChildren = log.children && log.children.length > 0;
+This function is important because it defines how BabyAGI Tutorial: The Original Autonomous AI Task Agent Framework implements the patterns covered in this chapter.
- // Create expand/collapse icon
- let toggleIcon = '';
- if (hasChildren) {
- toggleIcon = ` `;
- }
+### `babyagi/functionz/packs/drafts/user_db.py`
+
+The `update_table` function in [`babyagi/functionz/packs/drafts/user_db.py`](https://github.com/yoheinakajima/babyagi/blob/HEAD/babyagi/functionz/packs/drafts/user_db.py) handles a key part of this chapter's functionality:
+
+```py
+ imports=["sqlalchemy", "json"] # Added 'json' to imports
+)
+def update_table(db_name: str, table_name: str, new_columns: str):
+ from sqlalchemy import create_engine, MetaData, Table, Column, String, Integer, Float, Boolean, DateTime, LargeBinary
+ from sqlalchemy.schema import CreateTable
+ import json # Imported json within the function
+
+ try:
+ new_columns = json.loads(new_columns)
+ print("Parsed columns:", new_columns) # Debugging statement
+ except json.JSONDecodeError as e:
+ return f"Invalid JSON for columns: {e}"
+
+ def get_column_type(type_name):
+ type_map = {
+ 'string': String,
+ 'integer': Integer,
+ 'float': Float,
+ 'boolean': Boolean,
+ 'datetime': DateTime,
+ 'binary': LargeBinary,
+ 'embedding': LargeBinary # We'll use LargeBinary for embeddings
+ }
+ return type_map.get(type_name.lower(), String) # Default to String if type not found
+
+
+ UserDB_name = func.get_user_db_class()
+ UserDB = type(UserDB_name, (), {
+ '__init__': lambda self, db_url: setattr(self, 'engine', create_engine(db_url)),
+ 'metadata': MetaData()
+ })
- row.innerHTML = `
- ${log.id}
- ${log.function_name}
- ${toggleIcon}${log.message}
- ${new Date(log.timestamp).toLocaleString()}
```
This function is important because it defines how BabyAGI Tutorial: The Original Autonomous AI Task Agent Framework implements the patterns covered in this chapter.
@@ -211,11 +209,11 @@ This function is important because it defines how BabyAGI Tutorial: The Original
```mermaid
flowchart TD
- A[buildLogTree]
- B[renderLogs]
- C[renderTable]
- D[renderLogRow]
- E[toggleChildRows]
+ A[create_table]
+ B[list_tables]
+ C[get_table]
+ D[update_table]
+ E[delete_table]
A --> B
B --> C
C --> D
diff --git a/tutorials/babyagi-tutorial/05-memory-systems-and-vector-store-integration.md b/tutorials/babyagi-tutorial/05-memory-systems-and-vector-store-integration.md
index 4ef8e284..19f4f4e5 100644
--- a/tutorials/babyagi-tutorial/05-memory-systems-and-vector-store-integration.md
+++ b/tutorials/babyagi-tutorial/05-memory-systems-and-vector-store-integration.md
@@ -40,170 +40,168 @@ You now understand how BabyAGI's vector memory layer works, how to configure dif
Next: [Chapter 6: Extending BabyAGI: Custom Tools and Skills](06-extending-babyagi-custom-tools-and-skills.md)
-## Depth Expansion Playbook
-
## Source Code Walkthrough
-### `babyagi/functionz/packs/drafts/user_db.py`
+### `babyagi/functionz/packs/drafts/code_writing_functions.py`
-The `itself` class in [`babyagi/functionz/packs/drafts/user_db.py`](https://github.com/yoheinakajima/babyagi/blob/HEAD/babyagi/functionz/packs/drafts/user_db.py) handles a key part of this chapter's functionality:
+The `get_functions_that_depend_on` function in [`babyagi/functionz/packs/drafts/code_writing_functions.py`](https://github.com/yoheinakajima/babyagi/blob/HEAD/babyagi/functionz/packs/drafts/code_writing_functions.py) handles a key part of this chapter's functionality:
```py
- session.close()
+ dependencies=["get_all_functions_wrapper"]
+)
+def get_functions_that_depend_on(function_name):
+ all_functions = get_all_functions_wrapper()
+ dependent_functions = []
+ for function in all_functions:
+ if function_name in function.get('dependencies', []):
+ dependent_functions.append(function['name'])
+ return dependent_functions
- return UserDB.__name__ # Return the name of the class instead of the class itself
@func.register_function(
- metadata={"description": "Create a new database."},
- dependencies=["get_user_db_class"],
- imports=["sqlalchemy"]
+ metadata={"description": "Generates the function code using LLM"},
+ dependencies=["gpt_call", "get_function_wrapper", "get_functions_that_depend_on", "get_all_functions_wrapper"]
)
-def create_database(db_name: str, db_type: str = 'sqlite', **kwargs):
- from sqlalchemy import create_engine, MetaData
-
- if db_type == 'sqlite':
- db_url = f'sqlite:///{db_name}.sqlite'
- elif db_type == 'postgresql':
- db_url = f'postgresql://{kwargs.get("user")}:{kwargs.get("password")}@{kwargs.get("host", "localhost")}:{kwargs.get("port", 5432)}/{db_name}'
- elif db_type == 'mysql':
- db_url = f'mysql+pymysql://{kwargs.get("user")}:{kwargs.get("password")}@{kwargs.get("host", "localhost")}:{kwargs.get("port", 3306)}/{db_name}'
- else:
- raise ValueError(f"Unsupported database type: {db_type}")
-
- UserDB_name = func.get_user_db_class()
- # Reconstruct the UserDB class
- UserDB = type(UserDB_name, (), {
- '__init__': lambda self, db_url: setattr(self, 'engine', create_engine(db_url)),
- 'metadata': MetaData()
- })
-
- user_db = UserDB(db_url) # Pass db_url here
-
- new_engine = create_engine(db_url)
- user_db.metadata.create_all(new_engine)
+def generate_function_code(function, context):
+ while True:
+
+ print("\033[1;32mGenerating code for function: ", function["name"], "\033[0m")
+ # Gather dependent functions and their code
+ dependencies = function.get('dependencies', [])
+ dependency_code = ''
+ for dep in dependencies:
+ dep_function = get_function_wrapper(dep)
+ if dep_function:
+ dependency_code += f"\n# Code for dependency function '{dep}':\n{dep_function['code']}\n"
+
+ # Gather functions that depend on the same imports
+ imports = function.get('imports', [])
+ functions_with_same_imports = []
+ all_functions = get_all_functions_wrapper()
+ for func_with_imports in all_functions:
```
-This class is important because it defines how BabyAGI Tutorial: The Original Autonomous AI Task Agent Framework implements the patterns covered in this chapter.
+This function is important because it defines how BabyAGI Tutorial: The Original Autonomous AI Task Agent Framework implements the patterns covered in this chapter.
-### `babyagi/functionz/packs/drafts/user_db.py`
+### `babyagi/functionz/packs/drafts/code_writing_functions.py`
-The `UserDB` class in [`babyagi/functionz/packs/drafts/user_db.py`](https://github.com/yoheinakajima/babyagi/blob/HEAD/babyagi/functionz/packs/drafts/user_db.py) handles a key part of this chapter's functionality:
+The `generate_function_code` function in [`babyagi/functionz/packs/drafts/code_writing_functions.py`](https://github.com/yoheinakajima/babyagi/blob/HEAD/babyagi/functionz/packs/drafts/code_writing_functions.py) handles a key part of this chapter's functionality:
```py
-
-@func.register_function(
- metadata={"description": "Base UserDB class for database operations."},
- imports=["sqlalchemy", "contextlib"]
+ dependencies=["gpt_call", "get_function_wrapper", "get_functions_that_depend_on", "get_all_functions_wrapper"]
)
-def get_user_db_class():
- from sqlalchemy import create_engine, Column, Integer, String, MetaData
- from sqlalchemy.ext.declarative import declarative_base
- from sqlalchemy.orm import sessionmaker
- from contextlib import contextmanager
- from sqlalchemy.exc import SQLAlchemyError
-
- class UserDB:
- def __init__(self, db_url='sqlite:///user_db.sqlite'):
- self.engine = create_engine(db_url)
- self.Session = sessionmaker(bind=self.engine)
- self.metadata = MetaData()
- self.Base = declarative_base(metadata=self.metadata)
-
- @contextmanager
- def session_scope(self):
- session = self.Session()
- try:
- yield session
- session.commit()
- except SQLAlchemyError as e:
- session.rollback()
- raise e
- finally:
- session.close()
-
- return UserDB.__name__ # Return the name of the class instead of the class itself
+def generate_function_code(function, context):
+ while True:
+
+ print("\033[1;32mGenerating code for function: ", function["name"], "\033[0m")
+ # Gather dependent functions and their code
+ dependencies = function.get('dependencies', [])
+ dependency_code = ''
+ for dep in dependencies:
+ dep_function = get_function_wrapper(dep)
+ if dep_function:
+ dependency_code += f"\n# Code for dependency function '{dep}':\n{dep_function['code']}\n"
+
+ # Gather functions that depend on the same imports
+ imports = function.get('imports', [])
+ functions_with_same_imports = []
+ all_functions = get_all_functions_wrapper()
+ for func_with_imports in all_functions:
+ if set(func_with_imports.get('imports', [])) & set(imports):
+ functions_with_same_imports.append(func_with_imports)
+
+ similar_imports_functions_code = ''
+ for func_with_imports in functions_with_same_imports:
+ similar_imports_functions_code += f"\n# Code for function '{func_with_imports['name']}' that uses similar imports:\n{func_with_imports['code']}\n"
+
+ # Prepare the prompt
+ prompt = f"""
+You are an expert Python programmer. Your task is to write detailed and working code for the following function based on the context provided. Do not provide placeholder code, but rather do your best like you are the best senior engineer in the world and provide the best code possible. DO NOT PROVIDE PLACEHOLDER CODE.
+
+Function details:
+
```
-This class is important because it defines how BabyAGI Tutorial: The Original Autonomous AI Task Agent Framework implements the patterns covered in this chapter.
+This function is important because it defines how BabyAGI Tutorial: The Original Autonomous AI Task Agent Framework implements the patterns covered in this chapter.
-### `babyagi/functionz/packs/drafts/user_db.py`
+### `babyagi/functionz/packs/drafts/code_writing_functions.py`
-The `get_user_db_class` function in [`babyagi/functionz/packs/drafts/user_db.py`](https://github.com/yoheinakajima/babyagi/blob/HEAD/babyagi/functionz/packs/drafts/user_db.py) handles a key part of this chapter's functionality:
+The `create_function` function in [`babyagi/functionz/packs/drafts/code_writing_functions.py`](https://github.com/yoheinakajima/babyagi/blob/HEAD/babyagi/functionz/packs/drafts/code_writing_functions.py) handles a key part of this chapter's functionality:
```py
- imports=["sqlalchemy", "contextlib"]
+ dependencies=["decide_imports_and_apis", "generate_function_code","add_new_function"]
)
-def get_user_db_class():
- from sqlalchemy import create_engine, Column, Integer, String, MetaData
- from sqlalchemy.ext.declarative import declarative_base
- from sqlalchemy.orm import sessionmaker
- from contextlib import contextmanager
- from sqlalchemy.exc import SQLAlchemyError
-
- class UserDB:
- def __init__(self, db_url='sqlite:///user_db.sqlite'):
- self.engine = create_engine(db_url)
- self.Session = sessionmaker(bind=self.engine)
- self.metadata = MetaData()
- self.Base = declarative_base(metadata=self.metadata)
-
- @contextmanager
- def session_scope(self):
- session = self.Session()
- try:
- yield session
- session.commit()
- except SQLAlchemyError as e:
- session.rollback()
- raise e
- finally:
- session.close()
-
- return UserDB.__name__ # Return the name of the class instead of the class itself
-
-@func.register_function(
- metadata={"description": "Create a new database."},
+def create_function(function, context):
+ # Decide imports and APIs
+ imports_and_apis = decide_imports_and_apis(context)
+ function['imports'] = imports_and_apis.get('standard_imports', []) + imports_and_apis.get('external_imports', [])
+
+ # Update context with imports and APIs
+ context.update({'imports_and_apis': imports_and_apis})
+
+ # Generate function code
+ function_data = generate_function_code(function, context)
+
+ if function_data:
+ # Register the function using the parsed JSON data
+ add_new_function(
+ name=function_data['function_name'],
+ code=function_data['code'],
+ metadata=function_data['metadata'],
+ imports=function_data.get('imports', []),
+ dependencies=function_data.get('dependencies', []),
+ key_dependencies=function_data.get('key_dependencies', []),
+ triggers=function_data.get('triggers', [])
+ )
+
+ #print(f"Function '{function_data['function_name']}' registered successfully.")
+
+ return {
+ 'name': function_data['function_name'],
+ 'code': function_data['code'],
+ 'metadata': function_data['metadata'],
+ 'imports': function_data.get('imports', []),
```
This function is important because it defines how BabyAGI Tutorial: The Original Autonomous AI Task Agent Framework implements the patterns covered in this chapter.
-### `babyagi/functionz/packs/drafts/user_db.py`
+### `babyagi/functionz/packs/drafts/code_writing_functions.py`
-The `create_database` function in [`babyagi/functionz/packs/drafts/user_db.py`](https://github.com/yoheinakajima/babyagi/blob/HEAD/babyagi/functionz/packs/drafts/user_db.py) handles a key part of this chapter's functionality:
+The `generate_functions` function in [`babyagi/functionz/packs/drafts/code_writing_functions.py`](https://github.com/yoheinakajima/babyagi/blob/HEAD/babyagi/functionz/packs/drafts/code_writing_functions.py) handles a key part of this chapter's functionality:
```py
- imports=["sqlalchemy"]
+ dependencies=["find_similar_function", "create_function", "get_function_wrapper"]
)
-def create_database(db_name: str, db_type: str = 'sqlite', **kwargs):
- from sqlalchemy import create_engine, MetaData
-
- if db_type == 'sqlite':
- db_url = f'sqlite:///{db_name}.sqlite'
- elif db_type == 'postgresql':
- db_url = f'postgresql://{kwargs.get("user")}:{kwargs.get("password")}@{kwargs.get("host", "localhost")}:{kwargs.get("port", 5432)}/{db_name}'
- elif db_type == 'mysql':
- db_url = f'mysql+pymysql://{kwargs.get("user")}:{kwargs.get("password")}@{kwargs.get("host", "localhost")}:{kwargs.get("port", 3306)}/{db_name}'
- else:
- raise ValueError(f"Unsupported database type: {db_type}")
-
- UserDB_name = func.get_user_db_class()
- # Reconstruct the UserDB class
- UserDB = type(UserDB_name, (), {
- '__init__': lambda self, db_url: setattr(self, 'engine', create_engine(db_url)),
- 'metadata': MetaData()
- })
-
- user_db = UserDB(db_url) # Pass db_url here
-
- new_engine = create_engine(db_url)
- user_db.metadata.create_all(new_engine)
- return f"Database '{db_name}' created successfully."
+def generate_functions(function_breakdown, context):
+ for function in function_breakdown:
+ function_name = function['name']
+ # Find similar functions
+ similar_functions = find_similar_function(function['description'])
+ function_found = False
+ for similar_function_name in similar_functions:
+ similar_function = get_function_wrapper(similar_function_name)
+ if similar_function and similar_function['metadata'].get('description', '') == function['description']:
+ function_found = True
+ break
+ if not function_found:
+ # Combine context for this function
+ function_context = context.copy()
+ function_context.update({'function': function})
+ create_function(function, function_context)
+@func.register_function(
+ metadata={"description": "Runs the final function to produce the output for the user"},
+ dependencies=["func"]
+)
+def run_final_function(function_name, *args, **kwargs):
+ result = func.execute_function(function_name, *args, **kwargs)
+ return result
@func.register_function(
- metadata={"description": "List all SQLite databases."},
- dependencies=["get_user_db_class"],
- imports=["os", "sqlalchemy"]
+ metadata={"description": "Extracts parameters from user input for a given function"},
+ dependencies=["gpt_call", "get_function_wrapper"]
+)
+def extract_function_parameters(user_input, function_name):
```
This function is important because it defines how BabyAGI Tutorial: The Original Autonomous AI Task Agent Framework implements the patterns covered in this chapter.
@@ -213,11 +211,11 @@ This function is important because it defines how BabyAGI Tutorial: The Original
```mermaid
flowchart TD
- A[itself]
- B[UserDB]
- C[get_user_db_class]
- D[create_database]
- E[list_databases]
+ A[get_functions_that_depend_on]
+ B[generate_function_code]
+ C[create_function]
+ D[generate_functions]
+ E[run_final_function]
A --> B
B --> C
C --> D
diff --git a/tutorials/babyagi-tutorial/06-extending-babyagi-custom-tools-and-skills.md b/tutorials/babyagi-tutorial/06-extending-babyagi-custom-tools-and-skills.md
index 8a78009d..0879a95d 100644
--- a/tutorials/babyagi-tutorial/06-extending-babyagi-custom-tools-and-skills.md
+++ b/tutorials/babyagi-tutorial/06-extending-babyagi-custom-tools-and-skills.md
@@ -40,170 +40,168 @@ You now know how to extend BabyAGI with external tools and skills, enabling the
Next: [Chapter 7: BabyAGI Evolution: 2o and Functionz Framework](07-babyagi-evolution-2o-and-functionz-framework.md)
-## Depth Expansion Playbook
-
## Source Code Walkthrough
-### `babyagi/functionz/packs/drafts/user_db.py`
+### `babyagi/functionz/packs/drafts/generate_function.py`
-The `delete_record` function in [`babyagi/functionz/packs/drafts/user_db.py`](https://github.com/yoheinakajima/babyagi/blob/HEAD/babyagi/functionz/packs/drafts/user_db.py) handles a key part of this chapter's functionality:
+The `ExtractionInfo` class in [`babyagi/functionz/packs/drafts/generate_function.py`](https://github.com/yoheinakajima/babyagi/blob/HEAD/babyagi/functionz/packs/drafts/generate_function.py) handles a key part of this chapter's functionality:
```py
- imports=["sqlalchemy"]
-)
-def delete_record(db_name: str, table_name: str, record_id: int):
- from sqlalchemy import create_engine, MetaData, Table
- from sqlalchemy.orm import sessionmaker
- UserDB_name = func.get_user_db_class()
- UserDB = type(UserDB_name, (), {
- '__init__': lambda self, db_url: setattr(self, 'engine', create_engine(db_url)),
- 'metadata': MetaData()
- })
- user_db = UserDB(f'sqlite:///{db_name}.sqlite')
- user_db.metadata.reflect(user_db.engine)
- table = Table(table_name, user_db.metadata, autoload_with=user_db.engine)
- Session = sessionmaker(bind=user_db.engine)
- with Session() as session:
- delete = table.delete().where(table.c.id == record_id)
- result = session.execute(delete)
- session.commit()
- if result.rowcount:
- return f"Record {record_id} in table '{table_name}' of database '{db_name}' deleted successfully."
- return f"Record {record_id} not found in table '{table_name}' of database '{db_name}'."
-
-
-@func.register_function(
- metadata={"description": "Convert value to specified SQLAlchemy type"},
- imports=["sqlalchemy", "json", "datetime"]
-)
-def convert_value(value, target_type):
- from sqlalchemy import Boolean, DateTime, LargeBinary, Integer, Float
- import json
- from datetime import datetime
+ selected_urls: List[str] = Field(default_factory=list)
+
+ # Updated ExtractionInfo model with 'requires_more_info'
+ class ExtractionInfo(BaseModel):
+ relevant_info: str
+ additional_urls: List[str] = Field(default_factory=list)
+ requires_more_info: bool
+
+ # System prompt
+ system_prompt = """
+ You are an AI designed to help developers write Python functions using the functionz framework. Every function you generate must adhere to the following rules:
+
+ Function Registration: All functions must be registered with the functionz framework using the @babyagi.register_function() decorator. Each function can include metadata, dependencies, imports, and key dependencies.
+
+ Basic Function Registration Example:
+
+ def function_name(param1, param2):
+ # function logic here
+ return result
+
+ Metadata and Dependencies: When writing functions, you may include optional metadata (such as descriptions) and dependencies. Dependencies can be other functions or secrets (API keys, etc.).
+
+ Import Handling: Manage imports by specifying them in the decorator as dictionaries with 'name' and 'lib' keys. Include these imports within the function body.
+
+ Secret Management: When using API keys or authentication secrets, reference the stored key with globals()['key_name'].
+ Error Handling: Functions should handle errors gracefully, catching exceptions if necessary.
+
+ General Guidelines: Use simple, clean, and readable code. Follow the structure and syntax of the functionz framework. Ensure proper function documentation via metadata.
+ """
+
+ # Function to check if a URL is valid
```
-This function is important because it defines how BabyAGI Tutorial: The Original Autonomous AI Task Agent Framework implements the patterns covered in this chapter.
+This class is important because it defines how BabyAGI Tutorial: The Original Autonomous AI Task Agent Framework implements the patterns covered in this chapter.
-### `babyagi/functionz/packs/drafts/user_db.py`
+### `babyagi/functionz/packs/drafts/generate_function.py`
-The `convert_value` function in [`babyagi/functionz/packs/drafts/user_db.py`](https://github.com/yoheinakajima/babyagi/blob/HEAD/babyagi/functionz/packs/drafts/user_db.py) handles a key part of this chapter's functionality:
+The `GeneratedFunction` class in [`babyagi/functionz/packs/drafts/generate_function.py`](https://github.com/yoheinakajima/babyagi/blob/HEAD/babyagi/functionz/packs/drafts/generate_function.py) handles a key part of this chapter's functionality:
```py
-@func.register_function(
- metadata={"description": "Create a new record in a table."},
- dependencies=["get_user_db_class", "convert_value"],
- imports=["sqlalchemy", "json"]
-)
-def create_record(db_name: str, table_name: str, data: list):
- from sqlalchemy import create_engine, MetaData, Table, String
- from sqlalchemy.orm import sessionmaker
- import json
-
- if not isinstance(data_dict, dict):
- return "Error: Data must be a JSON object"
-
- UserDB_name = func.get_user_db_class()
- UserDB = type(UserDB_name, (), {
- '__init__': lambda self, db_url: setattr(self, 'engine', create_engine(db_url)),
- 'metadata': MetaData()
- })
- user_db = UserDB(f'sqlite:///{db_name}.sqlite')
- user_db.metadata.reflect(user_db.engine)
- table = Table(table_name, user_db.metadata, autoload_with=user_db.engine)
- Session = sessionmaker(bind=user_db.engine)
-
- # Get column types
- column_types = {c.name: c.type for c in table.columns}
-
- # Convert input data to appropriate types
- converted_data = {key: func.convert_value(value, column_types.get(key, String)) for key, value in data.items()}
-
- try:
- with Session() as session:
- ins = table.insert().values(**converted_data)
+
+ # Define Pydantic model
+ class GeneratedFunction(BaseModel):
+ name: str
+ code: str
+ metadata: Optional[Dict[str, Any]] = Field(default_factory=dict)
+ imports: Optional[List[Dict[str, str]]] = Field(default_factory=list)
+ dependencies: List[str] = Field(default_factory=list)
+ key_dependencies: List[str] = Field(default_factory=list)
+ triggers: List[str] = Field(default_factory=list)
+
+ class Config:
+ extra = "forbid"
+
+ # System prompt
+ system_prompt = """
+ You are an AI designed to help developers write Python functions using the functionz framework. Every function you generate must adhere to the following rules:
+
+ Function Registration: All functions must be registered with the functionz framework using the @babyagi.register_function() decorator. Each function can include metadata, dependencies, imports, and key dependencies.
+
+ Basic Function Registration Example:
+
+ def function_name(param1, param2):
+ # function logic here
+ return result
+
+ Metadata and Dependencies: When writing functions, you may include optional metadata (such as descriptions) and dependencies. Dependencies can be other functions or secrets (API keys, etc.).
+
+ Import Handling: Manage imports by specifying them in the decorator as dictionaries with 'name' and 'lib' keys. Include these imports within the function body.
+
+ Secret Management: When using API keys or authentication secrets, reference the stored key with globals()['key_name'].
+
```
-This function is important because it defines how BabyAGI Tutorial: The Original Autonomous AI Task Agent Framework implements the patterns covered in this chapter.
+This class is important because it defines how BabyAGI Tutorial: The Original Autonomous AI Task Agent Framework implements the patterns covered in this chapter.
-### `babyagi/functionz/packs/drafts/user_db.py`
+### `babyagi/functionz/packs/drafts/generate_function.py`
-The `the` interface in [`babyagi/functionz/packs/drafts/user_db.py`](https://github.com/yoheinakajima/babyagi/blob/HEAD/babyagi/functionz/packs/drafts/user_db.py) handles a key part of this chapter's functionality:
+The `Config` class in [`babyagi/functionz/packs/drafts/generate_function.py`](https://github.com/yoheinakajima/babyagi/blob/HEAD/babyagi/functionz/packs/drafts/generate_function.py) handles a key part of this chapter's functionality:
```py
- session.close()
-
- return UserDB.__name__ # Return the name of the class instead of the class itself
-
-@func.register_function(
- metadata={"description": "Create a new database."},
- dependencies=["get_user_db_class"],
- imports=["sqlalchemy"]
-)
-def create_database(db_name: str, db_type: str = 'sqlite', **kwargs):
- from sqlalchemy import create_engine, MetaData
-
- if db_type == 'sqlite':
- db_url = f'sqlite:///{db_name}.sqlite'
- elif db_type == 'postgresql':
- db_url = f'postgresql://{kwargs.get("user")}:{kwargs.get("password")}@{kwargs.get("host", "localhost")}:{kwargs.get("port", 5432)}/{db_name}'
- elif db_type == 'mysql':
- db_url = f'mysql+pymysql://{kwargs.get("user")}:{kwargs.get("password")}@{kwargs.get("host", "localhost")}:{kwargs.get("port", 3306)}/{db_name}'
- else:
- raise ValueError(f"Unsupported database type: {db_type}")
-
- UserDB_name = func.get_user_db_class()
- # Reconstruct the UserDB class
- UserDB = type(UserDB_name, (), {
- '__init__': lambda self, db_url: setattr(self, 'engine', create_engine(db_url)),
- 'metadata': MetaData()
- })
-
- user_db = UserDB(db_url) # Pass db_url here
-
- new_engine = create_engine(db_url)
- user_db.metadata.create_all(new_engine)
+ triggers: List[str] = Field(default_factory=list)
+
+ class Config:
+ extra = "forbid"
+
+ # System prompt
+ system_prompt = """
+ You are an AI designed to help developers write Python functions using the functionz framework. Every function you generate must adhere to the following rules:
+
+ Function Registration: All functions must be registered with the functionz framework using the @babyagi.register_function() decorator. Each function can include metadata, dependencies, imports, and key dependencies.
+
+ Basic Function Registration Example:
+
+ def function_name(param1, param2):
+ # function logic here
+ return result
+
+ Metadata and Dependencies: When writing functions, you may include optional metadata (such as descriptions) and dependencies. Dependencies can be other functions or secrets (API keys, etc.).
+
+ Import Handling: Manage imports by specifying them in the decorator as dictionaries with 'name' and 'lib' keys. Include these imports within the function body.
+
+ Secret Management: When using API keys or authentication secrets, reference the stored key with globals()['key_name'].
+
+ Error Handling: Functions should handle errors gracefully, catching exceptions if necessary.
+
+ General Guidelines: Use simple, clean, and readable code. Follow the structure and syntax of the functionz framework. Ensure proper function documentation via metadata.
+ """
+
+ # Function to chunk text
+ def chunk_text(text: str, chunk_size: int = 100000, overlap: int = 10000) -> List[str]:
+ chunks = []
+ start = 0
```
-This interface is important because it defines how BabyAGI Tutorial: The Original Autonomous AI Task Agent Framework implements the patterns covered in this chapter.
+This class is important because it defines how BabyAGI Tutorial: The Original Autonomous AI Task Agent Framework implements the patterns covered in this chapter.
-### `babyagi/functionz/db/models.py`
+### `babyagi/functionz/packs/drafts/generate_function.py`
-The `Function` class in [`babyagi/functionz/db/models.py`](https://github.com/yoheinakajima/babyagi/blob/HEAD/babyagi/functionz/db/models.py) handles a key part of this chapter's functionality:
+The `Endpoint` class in [`babyagi/functionz/packs/drafts/generate_function.py`](https://github.com/yoheinakajima/babyagi/blob/HEAD/babyagi/functionz/packs/drafts/generate_function.py) handles a key part of this chapter's functionality:
```py
-fernet = Fernet(ENCRYPTION_KEY.encode())
-
-# Association table for function dependencies (many-to-many between FunctionVersion and Function)
-function_dependency = Table('function_dependency', Base.metadata,
- Column('function_version_id', Integer, ForeignKey('function_versions.id')),
- Column('dependency_id', Integer, ForeignKey('functions.id'))
-)
-
-# **Define function_version_imports association table here**
-function_version_imports = Table('function_version_imports', Base.metadata,
- Column('function_version_id', Integer, ForeignKey('function_versions.id')),
- Column('import_id', Integer, ForeignKey('imports.id'))
-)
-
-
-class Function(Base):
- __tablename__ = 'functions'
- id = Column(Integer, primary_key=True)
- name = Column(String, unique=True)
- versions = relationship("FunctionVersion", back_populates="function", cascade="all, delete-orphan")
-
-class FunctionVersion(Base):
- __tablename__ = 'function_versions'
- id = Column(Integer, primary_key=True)
- function_id = Column(Integer, ForeignKey('functions.id'))
- version = Column(Integer)
- code = Column(String)
- function_metadata = Column(JSON)
- is_active = Column(Boolean, default=False)
- created_date = Column(DateTime, default=datetime.utcnow)
- input_parameters = Column(JSON)
- output_parameters = Column(JSON)
+
+ # Define Pydantic models
+ class Endpoint(BaseModel):
+ method: Optional[str]
+ url: str
+ description: Optional[str] = None
+
+ class APIDetails(BaseModel):
+ api_name: str = Field(alias="name") # Use alias to map 'name' to 'api_name'
+ purpose: str
+ endpoints: Optional[List[Union[Endpoint, str]]] = Field(default_factory=list)
+
+ @validator("endpoints", pre=True, each_item=True)
+ def convert_to_endpoint(cls, v):
+ """Convert string URLs into Endpoint objects if necessary."""
+ if isinstance(v, str):
+ return Endpoint(url=v) # Create an Endpoint object from a URL string
+ return v
+
+ class APIResponse(BaseModel):
+ name: str
+ purpose: str
+ endpoints: List[Endpoint]
+
+ # System prompt
+ system_prompt = """
+ [Your existing system prompt here]
+ """
+
+ prompt_for_apis = f"""You are an assistant analyzing function requirements.
+
+ The user has provided the following function description: {description}.
```
This class is important because it defines how BabyAGI Tutorial: The Original Autonomous AI Task Agent Framework implements the patterns covered in this chapter.
@@ -213,11 +211,11 @@ This class is important because it defines how BabyAGI Tutorial: The Original Au
```mermaid
flowchart TD
- A[delete_record]
- B[convert_value]
- C[the]
- D[Function]
- E[FunctionVersion]
+ A[ExtractionInfo]
+ B[GeneratedFunction]
+ C[Config]
+ D[Endpoint]
+ E[APIResponse]
A --> B
B --> C
C --> D
diff --git a/tutorials/babyagi-tutorial/07-babyagi-evolution-2o-and-functionz-framework.md b/tutorials/babyagi-tutorial/07-babyagi-evolution-2o-and-functionz-framework.md
index 3cd42f98..1ab6baf0 100644
--- a/tutorials/babyagi-tutorial/07-babyagi-evolution-2o-and-functionz-framework.md
+++ b/tutorials/babyagi-tutorial/07-babyagi-evolution-2o-and-functionz-framework.md
@@ -40,184 +40,182 @@ You now understand the evolutionary arc from BabyAGI's original three-agent loop
Next: [Chapter 8: Production Patterns and Research Adaptations](08-production-patterns-and-research-adaptations.md)
-## Depth Expansion Playbook
-
## Source Code Walkthrough
-### `babyagi/functionz/packs/drafts/generate_function.py`
-
-The `ExtractionInfo` class in [`babyagi/functionz/packs/drafts/generate_function.py`](https://github.com/yoheinakajima/babyagi/blob/HEAD/babyagi/functionz/packs/drafts/generate_function.py) handles a key part of this chapter's functionality:
-
-```py
- selected_urls: List[str] = Field(default_factory=list)
-
- # Updated ExtractionInfo model with 'requires_more_info'
- class ExtractionInfo(BaseModel):
- relevant_info: str
- additional_urls: List[str] = Field(default_factory=list)
- requires_more_info: bool
-
- # System prompt
- system_prompt = """
- You are an AI designed to help developers write Python functions using the functionz framework. Every function you generate must adhere to the following rules:
-
- Function Registration: All functions must be registered with the functionz framework using the @babyagi.register_function() decorator. Each function can include metadata, dependencies, imports, and key dependencies.
-
- Basic Function Registration Example:
-
- def function_name(param1, param2):
- # function logic here
- return result
-
- Metadata and Dependencies: When writing functions, you may include optional metadata (such as descriptions) and dependencies. Dependencies can be other functions or secrets (API keys, etc.).
-
- Import Handling: Manage imports by specifying them in the decorator as dictionaries with 'name' and 'lib' keys. Include these imports within the function body.
-
- Secret Management: When using API keys or authentication secrets, reference the stored key with globals()['key_name'].
-
- Error Handling: Functions should handle errors gracefully, catching exceptions if necessary.
-
- General Guidelines: Use simple, clean, and readable code. Follow the structure and syntax of the functionz framework. Ensure proper function documentation via metadata.
- """
-
- # Function to check if a URL is valid
+### `babyagi/dashboard/static/js/function_details.js`
+
+The `getApiRoute` function in [`babyagi/dashboard/static/js/function_details.js`](https://github.com/yoheinakajima/babyagi/blob/HEAD/babyagi/dashboard/static/js/function_details.js) handles a key part of this chapter's functionality:
+
+```js
+
+// Helper function to get the API route
+function getApiRoute(routeName, ...args) {
+ if (typeof apiRoutes[routeName] === 'function') {
+ return apiRoutes[routeName](...args);
+ } else {
+ return apiRoutes[routeName];
+ }
+}
+
+window.getApiRoute = getApiRoute;
+
+let functionData;
+let codeEditor;
+
+// Expose necessary functions to the global scope
+window.loadFunctionDetails = loadFunctionDetails;
+window.loadFunctionLogs = loadFunctionLogs;
+window.initCodeEditor = initCodeEditor;
+window.displayFunctionDetails = displayFunctionDetails;
+window.createExecutionForm = createExecutionForm;
+window.updateFunction = updateFunction;
+window.executeFunction = executeFunction;
+window.toggleVersionHistory = toggleVersionHistory;
+window.loadFunctionVersions = loadFunctionVersions;
+window.activateVersion = activateVersion;
+
+function loadFunctionDetails() {
+ fetch(getApiRoute('getFunction'))
+ .then(response => {
+ if (!response.ok) {
+ throw new Error(`HTTP error! status: ${response.status}`);
```
-This class is important because it defines how BabyAGI Tutorial: The Original Autonomous AI Task Agent Framework implements the patterns covered in this chapter.
-
-### `babyagi/functionz/packs/drafts/generate_function.py`
-
-The `GeneratedFunction` class in [`babyagi/functionz/packs/drafts/generate_function.py`](https://github.com/yoheinakajima/babyagi/blob/HEAD/babyagi/functionz/packs/drafts/generate_function.py) handles a key part of this chapter's functionality:
-
-```py
-
- # Define Pydantic model
- class GeneratedFunction(BaseModel):
- name: str
- code: str
- metadata: Optional[Dict[str, Any]] = Field(default_factory=dict)
- imports: Optional[List[Dict[str, str]]] = Field(default_factory=list)
- dependencies: List[str] = Field(default_factory=list)
- key_dependencies: List[str] = Field(default_factory=list)
- triggers: List[str] = Field(default_factory=list)
-
- class Config:
- extra = "forbid"
-
- # System prompt
- system_prompt = """
- You are an AI designed to help developers write Python functions using the functionz framework. Every function you generate must adhere to the following rules:
-
- Function Registration: All functions must be registered with the functionz framework using the @babyagi.register_function() decorator. Each function can include metadata, dependencies, imports, and key dependencies.
-
- Basic Function Registration Example:
-
- def function_name(param1, param2):
- # function logic here
- return result
-
- Metadata and Dependencies: When writing functions, you may include optional metadata (such as descriptions) and dependencies. Dependencies can be other functions or secrets (API keys, etc.).
-
- Import Handling: Manage imports by specifying them in the decorator as dictionaries with 'name' and 'lib' keys. Include these imports within the function body.
-
- Secret Management: When using API keys or authentication secrets, reference the stored key with globals()['key_name'].
-
+This function is important because it defines how BabyAGI Tutorial: The Original Autonomous AI Task Agent Framework implements the patterns covered in this chapter.
+
+### `babyagi/dashboard/static/js/function_details.js`
+
+The `loadFunctionDetails` function in [`babyagi/dashboard/static/js/function_details.js`](https://github.com/yoheinakajima/babyagi/blob/HEAD/babyagi/dashboard/static/js/function_details.js) handles a key part of this chapter's functionality:
+
+```js
+
+// Expose necessary functions to the global scope
+window.loadFunctionDetails = loadFunctionDetails;
+window.loadFunctionLogs = loadFunctionLogs;
+window.initCodeEditor = initCodeEditor;
+window.displayFunctionDetails = displayFunctionDetails;
+window.createExecutionForm = createExecutionForm;
+window.updateFunction = updateFunction;
+window.executeFunction = executeFunction;
+window.toggleVersionHistory = toggleVersionHistory;
+window.loadFunctionVersions = loadFunctionVersions;
+window.activateVersion = activateVersion;
+
+function loadFunctionDetails() {
+ fetch(getApiRoute('getFunction'))
+ .then(response => {
+ if (!response.ok) {
+ throw new Error(`HTTP error! status: ${response.status}`);
+ }
+ return response.json();
+ })
+ .then(data => {
+ functionData = data;
+ console.log("functionData",functionData)
+ displayFunctionDetails();
+ createExecutionForm();
+ initCodeEditor();
+ })
+ .catch(error => {
+ console.error('Error:', error);
+ document.getElementById('functionDetails').innerHTML = `Error loading function details: ${error.message}
`;
+ });
```
-This class is important because it defines how BabyAGI Tutorial: The Original Autonomous AI Task Agent Framework implements the patterns covered in this chapter.
-
-### `babyagi/functionz/packs/drafts/generate_function.py`
-
-The `Config` class in [`babyagi/functionz/packs/drafts/generate_function.py`](https://github.com/yoheinakajima/babyagi/blob/HEAD/babyagi/functionz/packs/drafts/generate_function.py) handles a key part of this chapter's functionality:
-
-```py
- triggers: List[str] = Field(default_factory=list)
-
- class Config:
- extra = "forbid"
-
- # System prompt
- system_prompt = """
- You are an AI designed to help developers write Python functions using the functionz framework. Every function you generate must adhere to the following rules:
-
- Function Registration: All functions must be registered with the functionz framework using the @babyagi.register_function() decorator. Each function can include metadata, dependencies, imports, and key dependencies.
-
- Basic Function Registration Example:
-
- def function_name(param1, param2):
- # function logic here
- return result
-
- Metadata and Dependencies: When writing functions, you may include optional metadata (such as descriptions) and dependencies. Dependencies can be other functions or secrets (API keys, etc.).
-
- Import Handling: Manage imports by specifying them in the decorator as dictionaries with 'name' and 'lib' keys. Include these imports within the function body.
-
- Secret Management: When using API keys or authentication secrets, reference the stored key with globals()['key_name'].
-
- Error Handling: Functions should handle errors gracefully, catching exceptions if necessary.
-
- General Guidelines: Use simple, clean, and readable code. Follow the structure and syntax of the functionz framework. Ensure proper function documentation via metadata.
- """
-
- # Function to chunk text
- def chunk_text(text: str, chunk_size: int = 100000, overlap: int = 10000) -> List[str]:
- chunks = []
- start = 0
+This function is important because it defines how BabyAGI Tutorial: The Original Autonomous AI Task Agent Framework implements the patterns covered in this chapter.
+
+### `babyagi/dashboard/static/js/function_details.js`
+
+The `loadFunctionLogs` function in [`babyagi/dashboard/static/js/function_details.js`](https://github.com/yoheinakajima/babyagi/blob/HEAD/babyagi/dashboard/static/js/function_details.js) handles a key part of this chapter's functionality:
+
+```js
+// Expose necessary functions to the global scope
+window.loadFunctionDetails = loadFunctionDetails;
+window.loadFunctionLogs = loadFunctionLogs;
+window.initCodeEditor = initCodeEditor;
+window.displayFunctionDetails = displayFunctionDetails;
+window.createExecutionForm = createExecutionForm;
+window.updateFunction = updateFunction;
+window.executeFunction = executeFunction;
+window.toggleVersionHistory = toggleVersionHistory;
+window.loadFunctionVersions = loadFunctionVersions;
+window.activateVersion = activateVersion;
+
+function loadFunctionDetails() {
+ fetch(getApiRoute('getFunction'))
+ .then(response => {
+ if (!response.ok) {
+ throw new Error(`HTTP error! status: ${response.status}`);
+ }
+ return response.json();
+ })
+ .then(data => {
+ functionData = data;
+ console.log("functionData",functionData)
+ displayFunctionDetails();
+ createExecutionForm();
+ initCodeEditor();
+ })
+ .catch(error => {
+ console.error('Error:', error);
+ document.getElementById('functionDetails').innerHTML = `Error loading function details: ${error.message}
`;
+ });
+}
```
-This class is important because it defines how BabyAGI Tutorial: The Original Autonomous AI Task Agent Framework implements the patterns covered in this chapter.
-
-### `babyagi/functionz/packs/drafts/generate_function.py`
-
-The `Endpoint` class in [`babyagi/functionz/packs/drafts/generate_function.py`](https://github.com/yoheinakajima/babyagi/blob/HEAD/babyagi/functionz/packs/drafts/generate_function.py) handles a key part of this chapter's functionality:
-
-```py
-
- # Define Pydantic models
- class Endpoint(BaseModel):
- method: Optional[str]
- url: str
- description: Optional[str] = None
-
- class APIDetails(BaseModel):
- api_name: str = Field(alias="name") # Use alias to map 'name' to 'api_name'
- purpose: str
- endpoints: Optional[List[Union[Endpoint, str]]] = Field(default_factory=list)
-
- @validator("endpoints", pre=True, each_item=True)
- def convert_to_endpoint(cls, v):
- """Convert string URLs into Endpoint objects if necessary."""
- if isinstance(v, str):
- return Endpoint(url=v) # Create an Endpoint object from a URL string
- return v
-
- class APIResponse(BaseModel):
- name: str
- purpose: str
- endpoints: List[Endpoint]
-
- # System prompt
- system_prompt = """
- [Your existing system prompt here]
- """
-
- prompt_for_apis = f"""You are an assistant analyzing function requirements.
+This function is important because it defines how BabyAGI Tutorial: The Original Autonomous AI Task Agent Framework implements the patterns covered in this chapter.
+
+### `babyagi/dashboard/static/js/function_details.js`
+
+The `initCodeEditor` function in [`babyagi/dashboard/static/js/function_details.js`](https://github.com/yoheinakajima/babyagi/blob/HEAD/babyagi/dashboard/static/js/function_details.js) handles a key part of this chapter's functionality:
+
+```js
+window.loadFunctionDetails = loadFunctionDetails;
+window.loadFunctionLogs = loadFunctionLogs;
+window.initCodeEditor = initCodeEditor;
+window.displayFunctionDetails = displayFunctionDetails;
+window.createExecutionForm = createExecutionForm;
+window.updateFunction = updateFunction;
+window.executeFunction = executeFunction;
+window.toggleVersionHistory = toggleVersionHistory;
+window.loadFunctionVersions = loadFunctionVersions;
+window.activateVersion = activateVersion;
+
+function loadFunctionDetails() {
+ fetch(getApiRoute('getFunction'))
+ .then(response => {
+ if (!response.ok) {
+ throw new Error(`HTTP error! status: ${response.status}`);
+ }
+ return response.json();
+ })
+ .then(data => {
+ functionData = data;
+ console.log("functionData",functionData)
+ displayFunctionDetails();
+ createExecutionForm();
+ initCodeEditor();
+ })
+ .catch(error => {
+ console.error('Error:', error);
+ document.getElementById('functionDetails').innerHTML = `Error loading function details: ${error.message}
`;
+ });
+}
- The user has provided the following function description: {description}.
```
-This class is important because it defines how BabyAGI Tutorial: The Original Autonomous AI Task Agent Framework implements the patterns covered in this chapter.
+This function is important because it defines how BabyAGI Tutorial: The Original Autonomous AI Task Agent Framework implements the patterns covered in this chapter.
## How These Components Connect
```mermaid
flowchart TD
- A[ExtractionInfo]
- B[GeneratedFunction]
- C[Config]
- D[Endpoint]
- E[APIResponse]
+ A[getApiRoute]
+ B[loadFunctionDetails]
+ C[loadFunctionLogs]
+ D[initCodeEditor]
+ E[code]
A --> B
B --> C
C --> D
diff --git a/tutorials/babyagi-tutorial/08-production-patterns-and-research-adaptations.md b/tutorials/babyagi-tutorial/08-production-patterns-and-research-adaptations.md
index 9d5dd480..f02d3fa2 100644
--- a/tutorials/babyagi-tutorial/08-production-patterns-and-research-adaptations.md
+++ b/tutorials/babyagi-tutorial/08-production-patterns-and-research-adaptations.md
@@ -37,170 +37,168 @@ This chapter covers how to run BabyAGI reliably in production environments and h
You now have the patterns needed to run BabyAGI safely in production environments and to adapt it for research experiments with full reproducibility, cost control, and observability.
-## Depth Expansion Playbook
-
## Source Code Walkthrough
-### `babyagi/dashboard/static/js/function_details.js`
+### `babyagi/dashboard/static/js/log_dashboard.js`
-The `getApiRoute` function in [`babyagi/dashboard/static/js/function_details.js`](https://github.com/yoheinakajima/babyagi/blob/HEAD/babyagi/dashboard/static/js/function_details.js) handles a key part of this chapter's functionality:
+The `buildLogTree` function in [`babyagi/dashboard/static/js/log_dashboard.js`](https://github.com/yoheinakajima/babyagi/blob/HEAD/babyagi/dashboard/static/js/log_dashboard.js) handles a key part of this chapter's functionality:
```js
-// Helper function to get the API route
-function getApiRoute(routeName, ...args) {
- if (typeof apiRoutes[routeName] === 'function') {
- return apiRoutes[routeName](...args);
- } else {
- return apiRoutes[routeName];
+ // Build the tree structure
+ rootLogs = buildLogTree(filteredLogs);
+
+ renderLogs();
+ } catch (error) {
+ console.error('Error populating filters:', error);
+ alert('Failed to load logs for filters. Please try again later.');
}
}
-window.getApiRoute = getApiRoute;
-
-let functionData;
-let codeEditor;
-
-// Expose necessary functions to the global scope
-window.loadFunctionDetails = loadFunctionDetails;
-window.loadFunctionLogs = loadFunctionLogs;
-window.initCodeEditor = initCodeEditor;
-window.displayFunctionDetails = displayFunctionDetails;
-window.createExecutionForm = createExecutionForm;
-window.updateFunction = updateFunction;
-window.executeFunction = executeFunction;
-window.toggleVersionHistory = toggleVersionHistory;
-window.loadFunctionVersions = loadFunctionVersions;
-window.activateVersion = activateVersion;
-
-function loadFunctionDetails() {
- fetch(getApiRoute('getFunction'))
- .then(response => {
- if (!response.ok) {
- throw new Error(`HTTP error! status: ${response.status}`);
+// Build log tree based on parent_log_id
+function buildLogTree(logs) {
+ const logsById = {};
+ const rootLogs = [];
+
+ // Initialize logsById mapping and add children array to each log
+ logs.forEach(log => {
+ log.children = [];
+ logsById[log.id] = log;
+ });
+
+ // Build the tree
+ logs.forEach(log => {
+ if (log.parent_log_id !== null) {
+ const parentLog = logsById[log.parent_log_id];
+ if (parentLog) {
+ parentLog.children.push(log);
+ } else {
+ // Parent log not found, treat as root
+ rootLogs.push(log);
+ }
```
This function is important because it defines how BabyAGI Tutorial: The Original Autonomous AI Task Agent Framework implements the patterns covered in this chapter.
-### `babyagi/dashboard/static/js/function_details.js`
+### `babyagi/dashboard/static/js/log_dashboard.js`
-The `loadFunctionDetails` function in [`babyagi/dashboard/static/js/function_details.js`](https://github.com/yoheinakajima/babyagi/blob/HEAD/babyagi/dashboard/static/js/function_details.js) handles a key part of this chapter's functionality:
+The `renderLogs` function in [`babyagi/dashboard/static/js/log_dashboard.js`](https://github.com/yoheinakajima/babyagi/blob/HEAD/babyagi/dashboard/static/js/log_dashboard.js) handles a key part of this chapter's functionality:
```js
+ rootLogs = buildLogTree(filteredLogs);
+
+ renderLogs();
+ } catch (error) {
+ console.error('Error populating filters:', error);
+ alert('Failed to load logs for filters. Please try again later.');
+ }
+}
-// Expose necessary functions to the global scope
-window.loadFunctionDetails = loadFunctionDetails;
-window.loadFunctionLogs = loadFunctionLogs;
-window.initCodeEditor = initCodeEditor;
-window.displayFunctionDetails = displayFunctionDetails;
-window.createExecutionForm = createExecutionForm;
-window.updateFunction = updateFunction;
-window.executeFunction = executeFunction;
-window.toggleVersionHistory = toggleVersionHistory;
-window.loadFunctionVersions = loadFunctionVersions;
-window.activateVersion = activateVersion;
-
-function loadFunctionDetails() {
- fetch(getApiRoute('getFunction'))
- .then(response => {
- if (!response.ok) {
- throw new Error(`HTTP error! status: ${response.status}`);
+// Build log tree based on parent_log_id
+function buildLogTree(logs) {
+ const logsById = {};
+ const rootLogs = [];
+
+ // Initialize logsById mapping and add children array to each log
+ logs.forEach(log => {
+ log.children = [];
+ logsById[log.id] = log;
+ });
+
+ // Build the tree
+ logs.forEach(log => {
+ if (log.parent_log_id !== null) {
+ const parentLog = logsById[log.parent_log_id];
+ if (parentLog) {
+ parentLog.children.push(log);
+ } else {
+ // Parent log not found, treat as root
+ rootLogs.push(log);
}
- return response.json();
- })
- .then(data => {
- functionData = data;
- console.log("functionData",functionData)
- displayFunctionDetails();
- createExecutionForm();
- initCodeEditor();
- })
- .catch(error => {
- console.error('Error:', error);
- document.getElementById('functionDetails').innerHTML = `Error loading function details: ${error.message}
`;
- });
+ } else {
+ rootLogs.push(log);
```
This function is important because it defines how BabyAGI Tutorial: The Original Autonomous AI Task Agent Framework implements the patterns covered in this chapter.
-### `babyagi/dashboard/static/js/function_details.js`
+### `babyagi/dashboard/static/js/log_dashboard.js`
-The `loadFunctionLogs` function in [`babyagi/dashboard/static/js/function_details.js`](https://github.com/yoheinakajima/babyagi/blob/HEAD/babyagi/dashboard/static/js/function_details.js) handles a key part of this chapter's functionality:
+The `renderTable` function in [`babyagi/dashboard/static/js/log_dashboard.js`](https://github.com/yoheinakajima/babyagi/blob/HEAD/babyagi/dashboard/static/js/log_dashboard.js) handles a key part of this chapter's functionality:
```js
-// Expose necessary functions to the global scope
-window.loadFunctionDetails = loadFunctionDetails;
-window.loadFunctionLogs = loadFunctionLogs;
-window.initCodeEditor = initCodeEditor;
-window.displayFunctionDetails = displayFunctionDetails;
-window.createExecutionForm = createExecutionForm;
-window.updateFunction = updateFunction;
-window.executeFunction = executeFunction;
-window.toggleVersionHistory = toggleVersionHistory;
-window.loadFunctionVersions = loadFunctionVersions;
-window.activateVersion = activateVersion;
-
-function loadFunctionDetails() {
- fetch(getApiRoute('getFunction'))
- .then(response => {
- if (!response.ok) {
- throw new Error(`HTTP error! status: ${response.status}`);
- }
- return response.json();
- })
- .then(data => {
- functionData = data;
- console.log("functionData",functionData)
- displayFunctionDetails();
- createExecutionForm();
- initCodeEditor();
- })
- .catch(error => {
- console.error('Error:', error);
- document.getElementById('functionDetails').innerHTML = `Error loading function details: ${error.message}
`;
- });
+// Render logs in table and grid formats
+function renderLogs() {
+ renderTable();
+ renderGrid();
+}
+
+// Render Logs Table (Desktop View)
+function renderTable() {
+ const tableBody = document.querySelector('#logTable tbody');
+ tableBody.innerHTML = '';
+
+ rootLogs.forEach(log => {
+ renderLogRow(tableBody, log, 0);
+ });
}
+
+// Recursive function to render each log row and its children
+function renderLogRow(tableBody, log, depth, parentRowId) {
+ const row = document.createElement('tr');
+ const rowId = 'log-' + log.id;
+ row.id = rowId;
+
+ // If it's a child row, add a class to indicate it's a child
+ if (parentRowId) {
+ row.classList.add('child-of-log-' + parentRowId);
+ row.style.display = 'none'; // Hide child rows by default
+ }
+
+ // Check if log has children
+ const hasChildren = log.children && log.children.length > 0;
+
+ // Create expand/collapse icon
```
This function is important because it defines how BabyAGI Tutorial: The Original Autonomous AI Task Agent Framework implements the patterns covered in this chapter.
-### `babyagi/dashboard/static/js/function_details.js`
+### `babyagi/dashboard/static/js/log_dashboard.js`
-The `initCodeEditor` function in [`babyagi/dashboard/static/js/function_details.js`](https://github.com/yoheinakajima/babyagi/blob/HEAD/babyagi/dashboard/static/js/function_details.js) handles a key part of this chapter's functionality:
+The `renderLogRow` function in [`babyagi/dashboard/static/js/log_dashboard.js`](https://github.com/yoheinakajima/babyagi/blob/HEAD/babyagi/dashboard/static/js/log_dashboard.js) handles a key part of this chapter's functionality:
```js
-window.loadFunctionDetails = loadFunctionDetails;
-window.loadFunctionLogs = loadFunctionLogs;
-window.initCodeEditor = initCodeEditor;
-window.displayFunctionDetails = displayFunctionDetails;
-window.createExecutionForm = createExecutionForm;
-window.updateFunction = updateFunction;
-window.executeFunction = executeFunction;
-window.toggleVersionHistory = toggleVersionHistory;
-window.loadFunctionVersions = loadFunctionVersions;
-window.activateVersion = activateVersion;
-
-function loadFunctionDetails() {
- fetch(getApiRoute('getFunction'))
- .then(response => {
- if (!response.ok) {
- throw new Error(`HTTP error! status: ${response.status}`);
- }
- return response.json();
- })
- .then(data => {
- functionData = data;
- console.log("functionData",functionData)
- displayFunctionDetails();
- createExecutionForm();
- initCodeEditor();
- })
- .catch(error => {
- console.error('Error:', error);
- document.getElementById('functionDetails').innerHTML = `Error loading function details: ${error.message}
`;
- });
+
+ rootLogs.forEach(log => {
+ renderLogRow(tableBody, log, 0);
+ });
}
+// Recursive function to render each log row and its children
+function renderLogRow(tableBody, log, depth, parentRowId) {
+ const row = document.createElement('tr');
+ const rowId = 'log-' + log.id;
+ row.id = rowId;
+
+ // If it's a child row, add a class to indicate it's a child
+ if (parentRowId) {
+ row.classList.add('child-of-log-' + parentRowId);
+ row.style.display = 'none'; // Hide child rows by default
+ }
+
+ // Check if log has children
+ const hasChildren = log.children && log.children.length > 0;
+
+ // Create expand/collapse icon
+ let toggleIcon = '';
+ if (hasChildren) {
+ toggleIcon = ` `;
+ }
+
+ row.innerHTML = `
+ ${log.id}
+ ${log.function_name}
+ ${toggleIcon}${log.message}
+ ${new Date(log.timestamp).toLocaleString()}
```
This function is important because it defines how BabyAGI Tutorial: The Original Autonomous AI Task Agent Framework implements the patterns covered in this chapter.
@@ -210,11 +208,11 @@ This function is important because it defines how BabyAGI Tutorial: The Original
```mermaid
flowchart TD
- A[getApiRoute]
- B[loadFunctionDetails]
- C[loadFunctionLogs]
- D[initCodeEditor]
- E[code]
+ A[buildLogTree]
+ B[renderLogs]
+ C[renderTable]
+ D[renderLogRow]
+ E[toggleChildRows]
A --> B
B --> C
C --> D
diff --git a/tutorials/beads-tutorial/01-getting-started.md b/tutorials/beads-tutorial/01-getting-started.md
index 4b01bfe1..fc42b5b3 100644
--- a/tutorials/beads-tutorial/01-getting-started.md
+++ b/tutorials/beads-tutorial/01-getting-started.md
@@ -31,8 +31,6 @@ You now have a working Beads baseline for structured task tracking.
Next: [Chapter 2: Architecture and Data Model](02-architecture-and-data-model.md)
-## Depth Expansion Playbook
-
## Source Code Walkthrough
### `.golangci.yml`
@@ -76,12 +74,53 @@ The `fields` interface in [`.golangci.yml`](https://github.com/steveyegge/beads/
This interface is important because it defines how Beads Tutorial: Git-Backed Task Graph Memory for Coding Agents implements the patterns covered in this chapter.
+### `website/docusaurus.config.ts`
+
+The `parseUrl` function in [`website/docusaurus.config.ts`](https://github.com/steveyegge/beads/blob/HEAD/website/docusaurus.config.ts) handles a key part of this chapter's functionality:
+
+```ts
+
+// Parse SITE_URL into origin (url) and pathname (baseUrl)
+function parseUrl(fullUrl: string): { origin: string; baseUrl: string } {
+ try {
+ const parsed = new URL(fullUrl);
+ const baseUrl = parsed.pathname === '/' ? `/${projectName}/` :
+ parsed.pathname.endsWith('/') ? parsed.pathname : `${parsed.pathname}/`;
+ return { origin: parsed.origin, baseUrl };
+ } catch {
+ return { origin: `https://${orgName}.github.io`, baseUrl: `/${projectName}/` };
+ }
+}
+
+const { origin: siteUrl, baseUrl } = parseUrl(siteUrlEnv);
+
+const config: Config = {
+ title: 'Beads Documentation',
+ tagline: 'Dolt-powered issue tracker for AI-supervised coding workflows',
+ favicon: 'img/favicon.svg',
+
+ // Enable Mermaid diagrams in markdown
+ markdown: {
+ mermaid: true,
+ },
+ themes: ['@docusaurus/theme-mermaid'],
+
+ // future: {
+ // v4: true,
+ // },
+
+ // GitHub Pages deployment (environment-configurable)
+ url: siteUrl,
+```
+
+This function is important because it defines how Beads Tutorial: Git-Backed Task Graph Memory for Coding Agents implements the patterns covered in this chapter.
+
### `beads.go`
The `Open` function in [`beads.go`](https://github.com/steveyegge/beads/blob/HEAD/beads.go) handles a key part of this chapter's functionality:
```go
-type Transaction = beads.Transaction
+)
// Open opens a Dolt-backed beads database at the given path.
// This always opens in embedded mode. Use OpenFromConfig to respect
@@ -158,57 +197,16 @@ func FindAllDatabases() []DatabaseInfo {
This function is important because it defines how Beads Tutorial: Git-Backed Task Graph Memory for Coding Agents implements the patterns covered in this chapter.
-### `beads.go`
-
-The `FindDatabasePath` function in [`beads.go`](https://github.com/steveyegge/beads/blob/HEAD/beads.go) handles a key part of this chapter's functionality:
-
-```go
-}
-
-// FindDatabasePath finds the beads database in the current directory tree
-func FindDatabasePath() string {
- return beads.FindDatabasePath()
-}
-
-// FindBeadsDir finds the .beads/ directory in the current directory tree.
-// Returns empty string if not found.
-func FindBeadsDir() string {
- return beads.FindBeadsDir()
-}
-
-// DatabaseInfo contains information about a beads database
-type DatabaseInfo = beads.DatabaseInfo
-
-// FindAllDatabases finds all beads databases in the system
-func FindAllDatabases() []DatabaseInfo {
- return beads.FindAllDatabases()
-}
-
-// RedirectInfo contains information about a beads directory redirect
-type RedirectInfo = beads.RedirectInfo
-
-// GetRedirectInfo checks if the current beads directory is redirected.
-// Returns RedirectInfo with IsRedirected=true if a redirect is active.
-func GetRedirectInfo() RedirectInfo {
- return beads.GetRedirectInfo()
-}
-
-// Core types from internal/types
-type (
-```
-
-This function is important because it defines how Beads Tutorial: Git-Backed Task Graph Memory for Coding Agents implements the patterns covered in this chapter.
-
## How These Components Connect
```mermaid
flowchart TD
A[fields]
- B[Open]
- C[OpenFromConfig]
- D[FindDatabasePath]
- E[FindBeadsDir]
+ B[parseUrl]
+ C[Open]
+ D[OpenFromConfig]
+ E[FindDatabasePath]
A --> B
B --> C
C --> D
diff --git a/tutorials/beads-tutorial/02-architecture-and-data-model.md b/tutorials/beads-tutorial/02-architecture-and-data-model.md
index 5d095b91..20a6b639 100644
--- a/tutorials/beads-tutorial/02-architecture-and-data-model.md
+++ b/tutorials/beads-tutorial/02-architecture-and-data-model.md
@@ -37,8 +37,6 @@ You now understand how Beads persists and structures long-horizon task state.
Next: [Chapter 3: Core Workflow Commands](03-core-workflow-commands.md)
-## Depth Expansion Playbook
-
## Source Code Walkthrough
### `scripts/generate-newsletter.py`
diff --git a/tutorials/beads-tutorial/03-core-workflow-commands.md b/tutorials/beads-tutorial/03-core-workflow-commands.md
index bc487e4d..6e475337 100644
--- a/tutorials/beads-tutorial/03-core-workflow-commands.md
+++ b/tutorials/beads-tutorial/03-core-workflow-commands.md
@@ -37,170 +37,168 @@ You now have a repeatable command workflow for day-to-day Beads operation.
Next: [Chapter 4: Dependency Graph and Hierarchy Patterns](04-dependency-graph-and-hierarchy-patterns.md)
-## Depth Expansion Playbook
-
## Source Code Walkthrough
-### `cmd/bd/dolt.go`
+### `cmd/bd/main.go`
-The `extractSSHHost` function in [`cmd/bd/dolt.go`](https://github.com/steveyegge/beads/blob/HEAD/cmd/bd/dolt.go) handles a key part of this chapter's functionality:
+The `loadEnvironment` function in [`cmd/bd/main.go`](https://github.com/steveyegge/beads/blob/HEAD/cmd/bd/main.go) handles a key part of this chapter's functionality:
```go
- if doltutil.IsSSHURL(r.URL) {
- // Test SSH connectivity by parsing host from URL
- sshHost := extractSSHHost(r.URL)
- if sshHost != "" {
- fmt.Printf(" %s (%s)... ", r.Name, r.URL)
- if testSSHConnectivity(sshHost) {
- fmt.Printf("%s\n", ui.RenderPass("✓ reachable"))
- } else {
- fmt.Printf("%s\n", ui.RenderWarn("✗ unreachable"))
- }
- }
- } else if strings.HasPrefix(r.URL, "https://") || strings.HasPrefix(r.URL, "http://") {
- fmt.Printf(" %s (%s)... ", r.Name, r.URL)
- if testHTTPConnectivity(r.URL) {
- fmt.Printf("%s\n", ui.RenderPass("✓ reachable"))
- } else {
- fmt.Printf("%s\n", ui.RenderWarn("✗ unreachable"))
- }
- } else {
- fmt.Printf(" %s (%s)... skipped (no connectivity test for this scheme)\n", r.Name, r.URL)
- }
- }
}
-// serverDialTimeout controls the TCP dial timeout for server connection tests.
-// Tests may reduce this to avoid slow unreachable-host hangs in CI.
-var serverDialTimeout = 3 * time.Second
-
-func testServerConnection(host string, port int) bool {
- addr := net.JoinHostPort(host, strconv.Itoa(port))
+// loadEnvironment runs the lightweight, always-needed environment setup that
+// must happen before the noDbCommands early return. This ensures commands like
+// "bd doctor --server" pick up per-project Dolt credentials from .beads/.env.
+//
+// This function intentionally does NOT do any store initialization, auto-migrate,
+// or telemetry setup — those belong in the store-init phase that runs after the
+// noDbCommands check.
+func loadEnvironment() {
+ // FindBeadsDir is lightweight (filesystem walk, no git subprocesses)
+ // and resolves BEADS_DIR, redirects, and worktree paths.
+ if beadsDir := beads.FindBeadsDir(); beadsDir != "" {
+ loadBeadsEnvFile(beadsDir)
+ // Non-fatal warning if .beads/ directory has overly permissive access.
+ config.CheckBeadsDirPermissions(beadsDir)
+ }
+}
- conn, err := net.DialTimeout("tcp", addr, serverDialTimeout)
+// repairSharedServerEmbeddedMismatch detects and auto-repairs the case where
+// shared-server mode is active but metadata.json still pins dolt_mode=embedded.
+// This prevents the silent fallback into embedded mode that hides server-backed
+// issue state after upgrades (GH#2949).
+func repairSharedServerEmbeddedMismatch(beadsDir string, cfg *configfile.Config) {
+ if cfg == nil {
+ return
+ }
+ if strings.ToLower(strings.TrimSpace(cfg.DoltMode)) != configfile.DoltModeEmbedded {
+ return
+ }
+ if !doltserver.IsSharedServerMode() {
+ return
```
This function is important because it defines how Beads Tutorial: Git-Backed Task Graph Memory for Coding Agents implements the patterns covered in this chapter.
-### `cmd/bd/dolt.go`
+### `cmd/bd/main.go`
-The `testSSHConnectivity` function in [`cmd/bd/dolt.go`](https://github.com/steveyegge/beads/blob/HEAD/cmd/bd/dolt.go) handles a key part of this chapter's functionality:
+The `repairSharedServerEmbeddedMismatch` function in [`cmd/bd/main.go`](https://github.com/steveyegge/beads/blob/HEAD/cmd/bd/main.go) handles a key part of this chapter's functionality:
```go
- if sshHost != "" {
- fmt.Printf(" %s (%s)... ", r.Name, r.URL)
- if testSSHConnectivity(sshHost) {
- fmt.Printf("%s\n", ui.RenderPass("✓ reachable"))
- } else {
- fmt.Printf("%s\n", ui.RenderWarn("✗ unreachable"))
- }
- }
- } else if strings.HasPrefix(r.URL, "https://") || strings.HasPrefix(r.URL, "http://") {
- fmt.Printf(" %s (%s)... ", r.Name, r.URL)
- if testHTTPConnectivity(r.URL) {
- fmt.Printf("%s\n", ui.RenderPass("✓ reachable"))
- } else {
- fmt.Printf("%s\n", ui.RenderWarn("✗ unreachable"))
- }
- } else {
- fmt.Printf(" %s (%s)... skipped (no connectivity test for this scheme)\n", r.Name, r.URL)
- }
- }
}
-// serverDialTimeout controls the TCP dial timeout for server connection tests.
-// Tests may reduce this to avoid slow unreachable-host hangs in CI.
-var serverDialTimeout = 3 * time.Second
-
-func testServerConnection(host string, port int) bool {
- addr := net.JoinHostPort(host, strconv.Itoa(port))
-
- conn, err := net.DialTimeout("tcp", addr, serverDialTimeout)
- if err != nil {
- return false
+// repairSharedServerEmbeddedMismatch detects and auto-repairs the case where
+// shared-server mode is active but metadata.json still pins dolt_mode=embedded.
+// This prevents the silent fallback into embedded mode that hides server-backed
+// issue state after upgrades (GH#2949).
+func repairSharedServerEmbeddedMismatch(beadsDir string, cfg *configfile.Config) {
+ if cfg == nil {
+ return
}
+ if strings.ToLower(strings.TrimSpace(cfg.DoltMode)) != configfile.DoltModeEmbedded {
+ return
+ }
+ if !doltserver.IsSharedServerMode() {
+ return
+ }
+ fmt.Fprintln(os.Stderr, "Notice: shared-server is enabled but metadata.json had dolt_mode=embedded.")
+ cfg.DoltMode = configfile.DoltModeServer
+ if err := cfg.Save(beadsDir); err != nil {
+ fmt.Fprintf(os.Stderr, "Warning: failed to auto-repair metadata.json: %v\n", err)
+ fmt.Fprintln(os.Stderr, "Fix manually: set dolt_mode to \"server\" in .beads/metadata.json")
+ } else {
+ fmt.Fprintln(os.Stderr, "Auto-repaired: dolt_mode updated to \"server\" in metadata.json.")
+ }
+}
+
+// loadServerModeFromConfig loads the storage mode (embedded vs server) from
+// metadata.json so that isEmbeddedMode() returns the correct value. Called
+// for commands that skip full DB init but still need to know the mode.
+func loadServerModeFromConfig() {
+ beadsDir := beads.FindBeadsDir()
+ if beadsDir == "" {
```
This function is important because it defines how Beads Tutorial: Git-Backed Task Graph Memory for Coding Agents implements the patterns covered in this chapter.
-### `cmd/bd/dolt.go`
+### `cmd/bd/main.go`
-The `httpURLToTCPAddr` function in [`cmd/bd/dolt.go`](https://github.com/steveyegge/beads/blob/HEAD/cmd/bd/dolt.go) handles a key part of this chapter's functionality:
+The `loadServerModeFromConfig` function in [`cmd/bd/main.go`](https://github.com/steveyegge/beads/blob/HEAD/cmd/bd/main.go) handles a key part of this chapter's functionality:
```go
}
-// httpURLToTCPAddr extracts a TCP dial address (host:port) from an HTTP(S) URL.
-// Handles IPv6 addresses correctly (e.g., https://[::1]:8080/path).
-func httpURLToTCPAddr(url string) string {
- host := url
- host = strings.TrimPrefix(host, "https://")
- host = strings.TrimPrefix(host, "http://")
- if idx := strings.Index(host, "/"); idx >= 0 {
- host = host[:idx]
- }
- defaultPort := "443"
- if strings.HasPrefix(url, "http://") {
- defaultPort = "80"
- }
- // Use net.SplitHostPort to correctly handle IPv6 addresses (which
- // contain colons that would otherwise be confused with host:port).
- if h, p, err := net.SplitHostPort(host); err == nil {
- return net.JoinHostPort(h, p)
- }
- // No port in host string. Strip IPv6 brackets if present so
- // JoinHostPort can re-add them correctly.
- h := strings.TrimPrefix(host, "[")
- h = strings.TrimSuffix(h, "]")
- return net.JoinHostPort(h, defaultPort)
+// loadServerModeFromConfig loads the storage mode (embedded vs server) from
+// metadata.json so that isEmbeddedMode() returns the correct value. Called
+// for commands that skip full DB init but still need to know the mode.
+func loadServerModeFromConfig() {
+ beadsDir := beads.FindBeadsDir()
+ if beadsDir == "" {
+ return
+ }
+ cfg, err := configfile.Load(beadsDir)
+ if err != nil || cfg == nil {
+ return
+ }
+ repairSharedServerEmbeddedMismatch(beadsDir, cfg)
+ sm := cfg.IsDoltServerMode()
+ // GH#2946: shared-server override for stale metadata.json (no-db commands)
+ if !sm && doltserver.IsSharedServerMode() {
+ sm = true
+ }
+ serverMode = sm
+ if cmdCtx != nil {
+ cmdCtx.ServerMode = sm
+ }
}
-// testHTTPConnectivity tests if an HTTP(S) URL is reachable via TCP.
-func testHTTPConnectivity(url string) bool {
- addr := httpURLToTCPAddr(url)
- conn, err := net.DialTimeout("tcp", addr, 5*time.Second)
- if err != nil {
+func preserveRedirectSourceDatabase(beadsDir string) {
+ if beadsDir == "" || os.Getenv("BEADS_DOLT_SERVER_DATABASE") != "" {
+ return
+ }
+
+ rInfo := beads.ResolveRedirect(beadsDir)
```
This function is important because it defines how Beads Tutorial: Git-Backed Task Graph Memory for Coding Agents implements the patterns covered in this chapter.
-### `cmd/bd/dolt.go`
+### `cmd/bd/main.go`
-The `testHTTPConnectivity` function in [`cmd/bd/dolt.go`](https://github.com/steveyegge/beads/blob/HEAD/cmd/bd/dolt.go) handles a key part of this chapter's functionality:
+The `preserveRedirectSourceDatabase` function in [`cmd/bd/main.go`](https://github.com/steveyegge/beads/blob/HEAD/cmd/bd/main.go) handles a key part of this chapter's functionality:
```go
- } else if strings.HasPrefix(r.URL, "https://") || strings.HasPrefix(r.URL, "http://") {
- fmt.Printf(" %s (%s)... ", r.Name, r.URL)
- if testHTTPConnectivity(r.URL) {
- fmt.Printf("%s\n", ui.RenderPass("✓ reachable"))
- } else {
- fmt.Printf("%s\n", ui.RenderWarn("✗ unreachable"))
- }
- } else {
- fmt.Printf(" %s (%s)... skipped (no connectivity test for this scheme)\n", r.Name, r.URL)
- }
- }
}
-// serverDialTimeout controls the TCP dial timeout for server connection tests.
-// Tests may reduce this to avoid slow unreachable-host hangs in CI.
-var serverDialTimeout = 3 * time.Second
-
-func testServerConnection(host string, port int) bool {
- addr := net.JoinHostPort(host, strconv.Itoa(port))
+func preserveRedirectSourceDatabase(beadsDir string) {
+ if beadsDir == "" || os.Getenv("BEADS_DOLT_SERVER_DATABASE") != "" {
+ return
+ }
- conn, err := net.DialTimeout("tcp", addr, serverDialTimeout)
- if err != nil {
- return false
+ rInfo := beads.ResolveRedirect(beadsDir)
+ if rInfo.WasRedirected && rInfo.SourceDatabase != "" {
+ _ = os.Setenv("BEADS_DOLT_SERVER_DATABASE", rInfo.SourceDatabase)
+ if os.Getenv("BD_DEBUG_ROUTING") != "" {
+ fmt.Fprintf(os.Stderr, "[routing] Preserved source dolt_database %q across redirect\n", rInfo.SourceDatabase)
+ }
}
- _ = conn.Close() // Best effort cleanup
- return true
}
-// extractSSHHost extracts the hostname from an SSH URL for connectivity testing.
-func extractSSHHost(url string) string {
- // git+ssh://git@github.com/org/repo.git → github.com
- // ssh://git@github.com/org/repo.git → github.com
+func selectedNoDBBeadsDir() string {
+ selectedDBPath := ""
+ if rootCmd.PersistentFlags().Changed("db") && dbPath != "" {
+ selectedDBPath = dbPath
+ } else if envDB := os.Getenv("BEADS_DB"); envDB != "" {
+ selectedDBPath = envDB
+ } else if envDB := os.Getenv("BD_DB"); envDB != "" {
+ selectedDBPath = envDB
+ } else {
+ selectedDBPath = dbPath
+ }
+ if selectedDBPath != "" {
+ if selectedBeadsDir := resolveCommandBeadsDir(selectedDBPath); selectedBeadsDir != "" {
+ return selectedBeadsDir
+ }
+ }
```
This function is important because it defines how Beads Tutorial: Git-Backed Task Graph Memory for Coding Agents implements the patterns covered in this chapter.
@@ -210,11 +208,11 @@ This function is important because it defines how Beads Tutorial: Git-Backed Tas
```mermaid
flowchart TD
- A[extractSSHHost]
- B[testSSHConnectivity]
- C[httpURLToTCPAddr]
- D[testHTTPConnectivity]
- E[openDoltServerConnection]
+ A[loadEnvironment]
+ B[repairSharedServerEmbeddedMismatch]
+ C[loadServerModeFromConfig]
+ D[preserveRedirectSourceDatabase]
+ E[selectedNoDBBeadsDir]
A --> B
B --> C
C --> D
diff --git a/tutorials/beads-tutorial/04-dependency-graph-and-hierarchy-patterns.md b/tutorials/beads-tutorial/04-dependency-graph-and-hierarchy-patterns.md
index 7c3667fd..74036eda 100644
--- a/tutorials/beads-tutorial/04-dependency-graph-and-hierarchy-patterns.md
+++ b/tutorials/beads-tutorial/04-dependency-graph-and-hierarchy-patterns.md
@@ -37,170 +37,168 @@ You now can model complex plans as clean, navigable Beads graphs.
Next: [Chapter 5: Agent Integration and AGENTS.md Patterns](05-agent-integration-and-agents-md-patterns.md)
-## Depth Expansion Playbook
-
## Source Code Walkthrough
-### `internal/types/types.go`
+### `cmd/bd/list.go`
-The `int` function in [`internal/types/types.go`](https://github.com/steveyegge/beads/blob/HEAD/internal/types/types.go) handles a key part of this chapter's functionality:
+The `findAllDescendants` function in [`cmd/bd/list.go`](https://github.com/steveyegge/beads/blob/HEAD/cmd/bd/list.go) handles a key part of this chapter's functionality:
```go
-// Issue represents a trackable work item.
-// Fields are organized into logical groups for maintainability.
-type Issue struct {
- // ===== Core Identification =====
- ID string `json:"id"`
- ContentHash string `json:"-"` // Internal: SHA256 of canonical content
-
- // ===== Issue Content =====
- Title string `json:"title"`
- Description string `json:"description,omitempty"`
- Design string `json:"design,omitempty"`
- AcceptanceCriteria string `json:"acceptance_criteria,omitempty"`
- Notes string `json:"notes,omitempty"`
- SpecID string `json:"spec_id,omitempty"`
-
- // ===== Status & Workflow =====
- Status Status `json:"status,omitempty"`
- Priority int `json:"priority"` // No omitempty: 0 is valid (P0/critical)
- IssueType IssueType `json:"issue_type,omitempty"`
-
- // ===== Assignment =====
- Assignee string `json:"assignee,omitempty"`
- Owner string `json:"owner,omitempty"` // Human owner for CV attribution (git author email)
- EstimatedMinutes *int `json:"estimated_minutes,omitempty"`
-
- // ===== Timestamps =====
- CreatedAt time.Time `json:"created_at"`
- CreatedBy string `json:"created_by,omitempty"` // Who created this issue (GH#748)
- UpdatedAt time.Time `json:"updated_at"`
- ClosedAt *time.Time `json:"closed_at,omitempty"`
- CloseReason string `json:"close_reason,omitempty"` // Reason provided when closing
-```
-
-This function is important because it defines how Beads Tutorial: Git-Backed Task Graph Memory for Coding Agents implements the patterns covered in this chapter.
-
-### `internal/types/types.go`
-
-The `strPtr` function in [`internal/types/types.go`](https://github.com/steveyegge/beads/blob/HEAD/internal/types/types.go) handles a key part of this chapter's functionality:
-
-```go
-
- // Optional fields
- w.strPtr(i.ExternalRef)
- w.str(i.SourceSystem)
- w.flag(i.Pinned, "pinned")
- w.str(string(i.Metadata)) // Include metadata in content hash
- w.flag(i.IsTemplate, "template")
-
- // Bonded molecules
- for _, br := range i.BondedFrom {
- w.str(br.SourceID)
- w.str(br.BondType)
- w.str(br.BondPoint)
+ // Recursively find all descendants
+ err = findAllDescendants(ctx, store, dbPath, parentID, allDescendants, 0, 10) // max depth 10
+ if err != nil {
+ return nil, fmt.Errorf("error finding descendants: %v", err)
}
- // Gate fields for async coordination
- w.str(i.AwaitType)
- w.str(i.AwaitID)
- w.duration(i.Timeout)
- for _, waiter := range i.Waiters {
- w.str(waiter)
+ // Convert map to slice for display
+ treeIssues := make([]*types.Issue, 0, len(allDescendants))
+ for _, issue := range allDescendants {
+ treeIssues = append(treeIssues, issue)
}
- // Molecule type
- w.str(string(i.MolType))
+ return treeIssues, nil
+}
- // Work type
- w.str(string(i.WorkType))
+// findAllDescendants recursively finds all descendants using parent filtering
+func findAllDescendants(ctx context.Context, store storage.DoltStorage, dbPath string, parentID string, result map[string]*types.Issue, currentDepth, maxDepth int) error {
+ if currentDepth >= maxDepth {
+ return nil // Prevent infinite recursion
+ }
- // Event fields
- w.str(i.EventKind)
- w.str(i.Actor)
+ // Get direct children using the same filter logic as regular --parent
+ var children []*types.Issue
+ err := withStorage(ctx, store, dbPath, func(s storage.DoltStorage) error {
+ filter := types.IssueFilter{
+ ParentID: &parentID,
+ }
+ var err error
+ children, err = s.SearchIssues(ctx, "", filter)
+ return err
+ })
```
This function is important because it defines how Beads Tutorial: Git-Backed Task Graph Memory for Coding Agents implements the patterns covered in this chapter.
-### `internal/types/types.go`
+### `cmd/bd/list.go`
-The `duration` function in [`internal/types/types.go`](https://github.com/steveyegge/beads/blob/HEAD/internal/types/types.go) handles a key part of this chapter's functionality:
+The `watchIssues` function in [`cmd/bd/list.go`](https://github.com/steveyegge/beads/blob/HEAD/cmd/bd/list.go) handles a key part of this chapter's functionality:
```go
- w.str(i.AwaitType)
- w.str(i.AwaitID)
- w.duration(i.Timeout)
- for _, waiter := range i.Waiters {
- w.str(waiter)
- }
-
- // Molecule type
- w.str(string(i.MolType))
-
- // Work type
- w.str(string(i.WorkType))
-
- // Event fields
- w.str(i.EventKind)
- w.str(i.Actor)
- w.str(i.Target)
- w.str(i.Payload)
-
- return fmt.Sprintf("%x", h.Sum(nil))
}
-// hashFieldWriter provides helper methods for writing fields to a hash.
-// Each method writes the value followed by a null separator for consistency.
-type hashFieldWriter struct {
- h hash.Hash
-}
-
-func (w hashFieldWriter) str(s string) {
- w.h.Write([]byte(s))
- w.h.Write([]byte{0})
-}
+// watchIssues polls for changes and re-displays (GH#654)
+// Uses polling instead of fsnotify because Dolt stores data in a server-side
+// database, not files — file watchers never fire.
+func watchIssues(ctx context.Context, store storage.DoltStorage, filter types.IssueFilter, sortBy string, reverse bool) {
+ // Initial display
+ issues, err := store.SearchIssues(ctx, "", filter)
+ if err != nil {
+ fmt.Fprintf(os.Stderr, "Error querying issues: %v\n", err)
+ return
+ }
+ sortIssues(issues, sortBy, reverse)
+ displayPrettyList(issues, true)
+ lastSnapshot := issueSnapshot(issues)
+
+ fmt.Fprintf(os.Stderr, "\nWatching for changes... (Press Ctrl+C to exit)\n")
+
+ // Handle Ctrl+C — deferred Stop prevents signal handler leak
+ sigChan := make(chan os.Signal, 1)
+ signal.Notify(sigChan, os.Interrupt, syscall.SIGTERM)
+ defer signal.Stop(sigChan)
+
+ pollInterval := 2 * time.Second
+ ticker := time.NewTicker(pollInterval)
+ defer ticker.Stop()
+
+ for {
+ select {
+ case <-sigChan:
+ fmt.Fprintf(os.Stderr, "\nStopped watching.\n")
+ return
```
This function is important because it defines how Beads Tutorial: Git-Backed Task Graph Memory for Coding Agents implements the patterns covered in this chapter.
-### `internal/types/types.go`
+### `cmd/bd/list.go`
-The `flag` function in [`internal/types/types.go`](https://github.com/steveyegge/beads/blob/HEAD/internal/types/types.go) handles a key part of this chapter's functionality:
+The `issueSnapshot` function in [`cmd/bd/list.go`](https://github.com/steveyegge/beads/blob/HEAD/cmd/bd/list.go) handles a key part of this chapter's functionality:
```go
- w.strPtr(i.ExternalRef)
- w.str(i.SourceSystem)
- w.flag(i.Pinned, "pinned")
- w.str(string(i.Metadata)) // Include metadata in content hash
- w.flag(i.IsTemplate, "template")
-
- // Bonded molecules
- for _, br := range i.BondedFrom {
- w.str(br.SourceID)
- w.str(br.BondType)
- w.str(br.BondPoint)
- }
+ sortIssues(issues, sortBy, reverse)
+ displayPrettyList(issues, true)
+ lastSnapshot := issueSnapshot(issues)
+
+ fmt.Fprintf(os.Stderr, "\nWatching for changes... (Press Ctrl+C to exit)\n")
+
+ // Handle Ctrl+C — deferred Stop prevents signal handler leak
+ sigChan := make(chan os.Signal, 1)
+ signal.Notify(sigChan, os.Interrupt, syscall.SIGTERM)
+ defer signal.Stop(sigChan)
+
+ pollInterval := 2 * time.Second
+ ticker := time.NewTicker(pollInterval)
+ defer ticker.Stop()
+
+ for {
+ select {
+ case <-sigChan:
+ fmt.Fprintf(os.Stderr, "\nStopped watching.\n")
+ return
+ case <-ticker.C:
+ issues, err := store.SearchIssues(ctx, "", filter)
+ if err != nil {
+ fmt.Fprintf(os.Stderr, "Error refreshing issues: %v\n", err)
+ continue
+ }
+ sortIssues(issues, sortBy, reverse)
+ snap := issueSnapshot(issues)
+ if snap != lastSnapshot {
+ lastSnapshot = snap
+ displayPrettyList(issues, true)
+ fmt.Fprintf(os.Stderr, "\nWatching for changes... (Press Ctrl+C to exit)\n")
+```
- // Gate fields for async coordination
- w.str(i.AwaitType)
- w.str(i.AwaitID)
- w.duration(i.Timeout)
- for _, waiter := range i.Waiters {
- w.str(waiter)
- }
+This function is important because it defines how Beads Tutorial: Git-Backed Task Graph Memory for Coding Agents implements the patterns covered in this chapter.
- // Molecule type
- w.str(string(i.MolType))
+### `cmd/bd/list.go`
- // Work type
- w.str(string(i.WorkType))
+The `sortIssues` function in [`cmd/bd/list.go`](https://github.com/steveyegge/beads/blob/HEAD/cmd/bd/list.go) handles a key part of this chapter's functionality:
- // Event fields
- w.str(i.EventKind)
- w.str(i.Actor)
- w.str(i.Target)
- w.str(i.Payload)
+```go
+ return
+ }
+ sortIssues(issues, sortBy, reverse)
+ displayPrettyList(issues, true)
+ lastSnapshot := issueSnapshot(issues)
+
+ fmt.Fprintf(os.Stderr, "\nWatching for changes... (Press Ctrl+C to exit)\n")
+
+ // Handle Ctrl+C — deferred Stop prevents signal handler leak
+ sigChan := make(chan os.Signal, 1)
+ signal.Notify(sigChan, os.Interrupt, syscall.SIGTERM)
+ defer signal.Stop(sigChan)
+
+ pollInterval := 2 * time.Second
+ ticker := time.NewTicker(pollInterval)
+ defer ticker.Stop()
+
+ for {
+ select {
+ case <-sigChan:
+ fmt.Fprintf(os.Stderr, "\nStopped watching.\n")
+ return
+ case <-ticker.C:
+ issues, err := store.SearchIssues(ctx, "", filter)
+ if err != nil {
+ fmt.Fprintf(os.Stderr, "Error refreshing issues: %v\n", err)
+ continue
+ }
+ sortIssues(issues, sortBy, reverse)
+ snap := issueSnapshot(issues)
+ if snap != lastSnapshot {
+ lastSnapshot = snap
```
This function is important because it defines how Beads Tutorial: Git-Backed Task Graph Memory for Coding Agents implements the patterns covered in this chapter.
@@ -210,11 +208,11 @@ This function is important because it defines how Beads Tutorial: Git-Backed Tas
```mermaid
flowchart TD
- A[int]
- B[strPtr]
- C[duration]
- D[flag]
- E[Validate]
+ A[findAllDescendants]
+ B[watchIssues]
+ C[issueSnapshot]
+ D[sortIssues]
+ E[init]
A --> B
B --> C
C --> D
diff --git a/tutorials/beads-tutorial/05-agent-integration-and-agents-md-patterns.md b/tutorials/beads-tutorial/05-agent-integration-and-agents-md-patterns.md
index 3c61ad06..6697816a 100644
--- a/tutorials/beads-tutorial/05-agent-integration-and-agents-md-patterns.md
+++ b/tutorials/beads-tutorial/05-agent-integration-and-agents-md-patterns.md
@@ -37,170 +37,168 @@ You now have an integration baseline for predictable agent behavior with Beads.
Next: [Chapter 6: Multi-Branch Collaboration and Protected Flows](06-multi-branch-collaboration-and-protected-flows.md)
-## Depth Expansion Playbook
-
## Source Code Walkthrough
-### `internal/types/types.go`
+### `cmd/bd/dolt.go`
-The `IsValid` function in [`internal/types/types.go`](https://github.com/steveyegge/beads/blob/HEAD/internal/types/types.go) handles a key part of this chapter's functionality:
+The `isTimeoutError` function in [`cmd/bd/dolt.go`](https://github.com/steveyegge/beads/blob/HEAD/cmd/bd/dolt.go) handles a key part of this chapter's functionality:
```go
- return fmt.Errorf("priority must be between 0 and 4 (got %d)", i.Priority)
- }
- if !i.Status.IsValidWithCustom(customStatuses) {
- return fmt.Errorf("invalid status: %s", i.Status)
- }
- if !i.IssueType.IsValidWithCustom(customTypes) {
- return fmt.Errorf("invalid issue type: %s", i.IssueType)
- }
- if i.EstimatedMinutes != nil && *i.EstimatedMinutes < 0 {
- return fmt.Errorf("estimated_minutes cannot be negative")
- }
- // Enforce closed_at invariant: closed_at should be set if and only if status is closed
- if i.Status == StatusClosed && i.ClosedAt == nil {
- return fmt.Errorf("closed issues must have closed_at timestamp")
- }
- if i.Status != StatusClosed && i.ClosedAt != nil {
- return fmt.Errorf("non-closed issues cannot have closed_at timestamp")
- }
- // Validate metadata is well-formed JSON if set (GH#1406)
- if len(i.Metadata) > 0 {
- if !json.Valid(i.Metadata) {
- return fmt.Errorf("metadata must be valid JSON")
+ fmt.Fprintf(os.Stderr, " FAIL: %s: %v\n", name, err)
+ failures++
+ if isTimeoutError(err) {
+ consecutiveTimeouts++
+ }
+ } else {
+ fmt.Printf(" Dropped: %s\n", name)
+ dropped++
+ failures = 0
+ consecutiveTimeouts = 0
+ }
+
+ // Rate limiting: pause between batches to let the server breathe
+ if (i+1)%batchSize == 0 && i+1 < len(stale) {
+ fmt.Printf(" [%d/%d] pausing %s...\n", i+1, len(stale), batchPause)
+ time.Sleep(batchPause)
+ }
}
- }
- // Ephemeral and NoHistory are mutually exclusive (GH#2619)
- if i.Ephemeral && i.NoHistory {
- return fmt.Errorf("ephemeral and no_history are mutually exclusive")
- }
- return nil
+ fmt.Printf("\nDropped %d/%d stale databases.\n", dropped, len(stale))
+ },
}
-// ValidateForImport validates the issue for multi-repo import (federation trust model).
+// confirmOverwrite prompts the user to confirm overwriting an existing remote.
+// Returns true if the user confirms. Returns true without prompting if stdin is
+// not a terminal (non-interactive/CI contexts).
+func confirmOverwrite(surface, name, existingURL, newURL string) bool {
+ if !term.IsTerminal(int(os.Stdin.Fd())) {
+ return true
+ }
+ fmt.Printf(" Remote %q already exists on %s: %s\n", name, surface, existingURL)
+ fmt.Printf(" Overwrite with: %s\n", newURL)
+ fmt.Print(" Overwrite? (y/N): ")
```
This function is important because it defines how Beads Tutorial: Git-Backed Task Graph Memory for Coding Agents implements the patterns covered in this chapter.
-### `internal/types/types.go`
+### `cmd/bd/dolt.go`
-The `IsValid` function in [`internal/types/types.go`](https://github.com/steveyegge/beads/blob/HEAD/internal/types/types.go) handles a key part of this chapter's functionality:
+The `init` function in [`cmd/bd/dolt.go`](https://github.com/steveyegge/beads/blob/HEAD/cmd/bd/dolt.go) handles a key part of this chapter's functionality:
```go
- return fmt.Errorf("priority must be between 0 and 4 (got %d)", i.Priority)
- }
- if !i.Status.IsValidWithCustom(customStatuses) {
- return fmt.Errorf("invalid status: %s", i.Status)
- }
- if !i.IssueType.IsValidWithCustom(customTypes) {
- return fmt.Errorf("invalid issue type: %s", i.IssueType)
- }
- if i.EstimatedMinutes != nil && *i.EstimatedMinutes < 0 {
- return fmt.Errorf("estimated_minutes cannot be negative")
- }
- // Enforce closed_at invariant: closed_at should be set if and only if status is closed
- if i.Status == StatusClosed && i.ClosedAt == nil {
- return fmt.Errorf("closed issues must have closed_at timestamp")
- }
- if i.Status != StatusClosed && i.ClosedAt != nil {
- return fmt.Errorf("non-closed issues cannot have closed_at timestamp")
- }
- // Validate metadata is well-formed JSON if set (GH#1406)
- if len(i.Metadata) > 0 {
- if !json.Valid(i.Metadata) {
- return fmt.Errorf("metadata must be valid JSON")
- }
- }
- // Ephemeral and NoHistory are mutually exclusive (GH#2619)
- if i.Ephemeral && i.NoHistory {
- return fmt.Errorf("ephemeral and no_history are mutually exclusive")
- }
- return nil
+// separate commit histories with no common merge base (e.g., two agents
+// bootstrapping from scratch and pushing to the same remote, or a local
+// database being re-initialized while the remote retains the old history).
+func isDivergedHistoryErr(err error) bool {
+ if err == nil {
+ return false
+ }
+ msg := strings.ToLower(err.Error())
+ return strings.Contains(msg, "no common ancestor") ||
+ strings.Contains(msg, "can't find common ancestor") ||
+ strings.Contains(msg, "cannot find common ancestor")
}
-// ValidateForImport validates the issue for multi-repo import (federation trust model).
+// printDivergedHistoryGuidance prints recovery guidance when push/pull fails
+// due to diverged local and remote histories.
+func printDivergedHistoryGuidance(operation string) {
+ fmt.Fprintln(os.Stderr, "")
+ fmt.Fprintln(os.Stderr, "Local and remote Dolt histories have diverged.")
+ fmt.Fprintln(os.Stderr, "This means the local database and the remote have independent commit")
+ fmt.Fprintln(os.Stderr, "histories with no common merge base.")
+ fmt.Fprintln(os.Stderr, "")
+ fmt.Fprintln(os.Stderr, "Recovery options:")
+ fmt.Fprintln(os.Stderr, "")
+ fmt.Fprintln(os.Stderr, " 1. Keep remote, discard local (recommended if remote is authoritative):")
+ fmt.Fprintln(os.Stderr, " bd bootstrap # re-clone from remote")
+ fmt.Fprintln(os.Stderr, "")
+ fmt.Fprintln(os.Stderr, " 2. Keep local, overwrite remote (if local is authoritative):")
+ fmt.Fprintln(os.Stderr, " bd dolt push --force # force-push local history to remote")
+ fmt.Fprintln(os.Stderr, "")
+ fmt.Fprintln(os.Stderr, " 3. Manual recovery (re-initialize local database):")
+ fmt.Fprintln(os.Stderr, " rm -rf .beads/dolt # delete local Dolt database")
+ fmt.Fprintln(os.Stderr, " bd bootstrap # re-clone from remote")
```
This function is important because it defines how Beads Tutorial: Git-Backed Task Graph Memory for Coding Agents implements the patterns covered in this chapter.
-### `internal/types/types.go`
+### `cmd/bd/dolt.go`
-The `IsWellKnown` function in [`internal/types/types.go`](https://github.com/steveyegge/beads/blob/HEAD/internal/types/types.go) handles a key part of this chapter's functionality:
+The `selectedDoltBeadsDir` function in [`cmd/bd/dolt.go`](https://github.com/steveyegge/beads/blob/HEAD/cmd/bd/dolt.go) handles a key part of this chapter's functionality:
```go
-// IsValid checks if the dependency type value is valid.
-// Accepts any non-empty string up to 50 characters.
-// Use IsWellKnown() to check if it's a built-in type.
-func (d DependencyType) IsValid() bool {
- return len(d) > 0 && len(d) <= 50
-}
-
-// IsWellKnown checks if the dependency type is a well-known constant.
-// Returns false for custom/user-defined types (which are still valid).
-func (d DependencyType) IsWellKnown() bool {
- switch d {
- case DepBlocks, DepParentChild, DepConditionalBlocks, DepWaitsFor, DepRelated, DepDiscoveredFrom,
- DepRepliesTo, DepRelatesTo, DepDuplicates, DepSupersedes,
- DepAuthoredBy, DepAssignedTo, DepApprovedBy, DepAttests, DepTracks,
- DepUntil, DepCausedBy, DepValidates, DepDelegatedFrom:
- return true
- }
- return false
-}
+ os.Exit(1)
+ }
+ beadsDir := selectedDoltBeadsDir()
+ if beadsDir == "" {
+ fmt.Fprintf(os.Stderr, "Error: not in a beads repository (no .beads directory found)\n")
+ os.Exit(1)
+ }
+ serverDir := doltserver.ResolveServerDir(beadsDir)
+
+ state, err := doltserver.Start(serverDir)
+ if err != nil {
+ if strings.Contains(err.Error(), "already running") {
+ fmt.Println(err)
+ return
+ }
+ fmt.Fprintf(os.Stderr, "Error: %v\n", err)
+ os.Exit(1)
+ }
-// AffectsReadyWork returns true if this dependency type blocks work.
-// Only blocking types affect the ready work calculation.
-func (d DependencyType) AffectsReadyWork() bool {
- return d == DepBlocks || d == DepParentChild || d == DepConditionalBlocks || d == DepWaitsFor
+ fmt.Printf("Dolt server started (PID %d, port %d)\n", state.PID, state.Port)
+ fmt.Printf(" Data: %s\n", state.DataDir)
+ fmt.Printf(" Logs: %s\n", doltserver.LogPath(serverDir))
+ if doltserver.IsSharedServerMode() {
+ fmt.Println(" Mode: shared server")
+ }
+ },
}
-// WaitsForMeta holds metadata for waits-for dependencies (fanout gates).
-// Stored as JSON in the Dependency.Metadata field.
-type WaitsForMeta struct {
- // Gate type: "all-children" (wait for all), "any-children" (wait for first)
- Gate string `json:"gate"`
- // SpawnerID identifies which step/issue spawns the children to wait for.
+var doltStopCmd = &cobra.Command{
+ Use: "stop",
+ Short: "Stop the Dolt SQL server for this project",
+ Long: `Stop the dolt sql-server managed by beads for the current project.
```
This function is important because it defines how Beads Tutorial: Git-Backed Task Graph Memory for Coding Agents implements the patterns covered in this chapter.
-### `internal/types/types.go`
+### `cmd/bd/dolt.go`
-The `AffectsReadyWork` function in [`internal/types/types.go`](https://github.com/steveyegge/beads/blob/HEAD/internal/types/types.go) handles a key part of this chapter's functionality:
+The `showDoltConfig` function in [`cmd/bd/dolt.go`](https://github.com/steveyegge/beads/blob/HEAD/cmd/bd/dolt.go) handles a key part of this chapter's functionality:
```go
+ os.Exit(1)
+ }
+ showDoltConfig(true)
+ },
}
-// AffectsReadyWork returns true if this dependency type blocks work.
-// Only blocking types affect the ready work calculation.
-func (d DependencyType) AffectsReadyWork() bool {
- return d == DepBlocks || d == DepParentChild || d == DepConditionalBlocks || d == DepWaitsFor
-}
-
-// WaitsForMeta holds metadata for waits-for dependencies (fanout gates).
-// Stored as JSON in the Dependency.Metadata field.
-type WaitsForMeta struct {
- // Gate type: "all-children" (wait for all), "any-children" (wait for first)
- Gate string `json:"gate"`
- // SpawnerID identifies which step/issue spawns the children to wait for.
- // If empty, waits for all direct children of the depends_on_id issue.
- SpawnerID string `json:"spawner_id,omitempty"`
-}
-
-// WaitsForGate constants
-const (
- WaitsForAllChildren = "all-children" // Wait for all dynamic children to complete
- WaitsForAnyChildren = "any-children" // Proceed when first child completes (future)
-)
-
-// ParseWaitsForGateMetadata extracts the waits-for gate type from dependency metadata.
-// Note: spawner identity comes from dependencies.depends_on_id in storage/query paths;
-// metadata.spawner_id is parsed for compatibility/future explicit targeting.
-// Returns WaitsForAllChildren on empty/invalid metadata for backward compatibility.
-func ParseWaitsForGateMetadata(metadata string) string {
- if strings.TrimSpace(metadata) == "" {
- return WaitsForAllChildren
- }
+var doltSetCmd = &cobra.Command{
+ Use: "set ",
+ Short: "Set a Dolt configuration value",
+ Long: `Set a Dolt configuration value in metadata.json.
+
+Keys:
+ database Database name (default: issue prefix or "beads")
+ host Server host (default: 127.0.0.1)
+ port Server port (auto-detected; override with bd dolt set port )
+ user MySQL user (default: root)
+ data-dir Custom dolt data directory (absolute path; default: .beads/dolt)
+
+Use --update-config to also write to config.yaml for team-wide defaults.
+
+Examples:
+ bd dolt set database myproject
+ bd dolt set host 192.168.1.100
+ bd dolt set port 3307 --update-config
+ bd dolt set data-dir /home/user/.beads-dolt/myproject`,
+ Args: cobra.ExactArgs(2),
+ Run: func(cmd *cobra.Command, args []string) {
+ if isEmbeddedMode() {
+ fmt.Fprintln(os.Stderr, "Error: 'bd dolt set' is not supported in embedded mode (no Dolt server)")
+ os.Exit(1)
+ }
+ key := args[0]
```
This function is important because it defines how Beads Tutorial: Git-Backed Task Graph Memory for Coding Agents implements the patterns covered in this chapter.
@@ -210,11 +208,11 @@ This function is important because it defines how Beads Tutorial: Git-Backed Tas
```mermaid
flowchart TD
- A[IsValid]
- B[IsValid]
- C[IsWellKnown]
- D[AffectsReadyWork]
- E[ParseWaitsForGateMetadata]
+ A[isTimeoutError]
+ B[init]
+ C[selectedDoltBeadsDir]
+ D[showDoltConfig]
+ E[setDoltConfig]
A --> B
B --> C
C --> D
diff --git a/tutorials/beads-tutorial/06-multi-branch-collaboration-and-protected-flows.md b/tutorials/beads-tutorial/06-multi-branch-collaboration-and-protected-flows.md
index efac6aa6..e33b8c50 100644
--- a/tutorials/beads-tutorial/06-multi-branch-collaboration-and-protected-flows.md
+++ b/tutorials/beads-tutorial/06-multi-branch-collaboration-and-protected-flows.md
@@ -37,12 +37,51 @@ You now have safer collaboration patterns for branch-heavy Beads workflows.
Next: [Chapter 7: Troubleshooting and Operations](07-troubleshooting-and-operations.md)
-## Depth Expansion Playbook
-
## Source Code Walkthrough
### `internal/doltserver/doltserver.go`
+The `SharedDoltDir` function in [`internal/doltserver/doltserver.go`](https://github.com/steveyegge/beads/blob/HEAD/internal/doltserver/doltserver.go) handles a key part of this chapter's functionality:
+
+```go
+}
+
+// SharedDoltDir returns the dolt data directory for the shared server.
+// Returns ~/.beads/shared-server/dolt/ (created on first use).
+func SharedDoltDir() (string, error) {
+ serverDir, err := SharedServerDir()
+ if err != nil {
+ return "", err
+ }
+ dir := filepath.Join(serverDir, "dolt")
+ if err := os.MkdirAll(dir, config.BeadsDirPerm); err != nil {
+ return "", fmt.Errorf("cannot create shared dolt directory %s: %w", dir, err)
+ }
+ return dir, nil
+}
+
+// resolveServerDir returns the canonical server directory for dolt state files.
+// In shared server mode, returns ~/.beads/shared-server/ instead of the
+// project's .beads/ directory.
+func resolveServerDir(beadsDir string) string {
+ if IsSharedServerMode() {
+ dir, err := SharedServerDir()
+ if err != nil {
+ fmt.Fprintf(os.Stderr, "Warning: shared server directory unavailable, using per-project mode: %v\n", err)
+ return beadsDir
+ }
+ return dir
+ }
+ return beadsDir
+}
+
+// ResolveServerDir is the exported version of resolveServerDir.
+```
+
+This function is important because it defines how Beads Tutorial: Git-Backed Task Graph Memory for Coding Agents implements the patterns covered in this chapter.
+
+### `internal/doltserver/doltserver.go`
+
The `resolveServerDir` function in [`internal/doltserver/doltserver.go`](https://github.com/steveyegge/beads/blob/HEAD/internal/doltserver/doltserver.go) handles a key part of this chapter's functionality:
```go
@@ -164,57 +203,16 @@ func ResolveDoltDir(beadsDir string) string {
This function is important because it defines how Beads Tutorial: Git-Backed Task Graph Memory for Coding Agents implements the patterns covered in this chapter.
-### `internal/doltserver/doltserver.go`
-
-The `pidPath` function in [`internal/doltserver/doltserver.go`](https://github.com/steveyegge/beads/blob/HEAD/internal/doltserver/doltserver.go) handles a key part of this chapter's functionality:
-
-```go
-
-// file paths within .beads/
-func pidPath(beadsDir string) string { return filepath.Join(beadsDir, "dolt-server.pid") }
-func logPath(beadsDir string) string { return filepath.Join(beadsDir, "dolt-server.log") }
-func lockPath(beadsDir string) string { return filepath.Join(beadsDir, "dolt-server.lock") }
-func portPath(beadsDir string) string { return filepath.Join(beadsDir, "dolt-server.port") }
-
-// MaxDoltServers is the hard ceiling on concurrent dolt sql-server processes.
-// Allows up to 3 (e.g., multiple projects).
-func maxDoltServers() int {
- return 3
-}
-
-// allocateEphemeralPort asks the OS for a free TCP port on host.
-// It binds to port 0, reads the assigned port, and closes the listener.
-// The caller should pass the returned port to dolt sql-server promptly
-// to minimize the TOCTOU window.
-func allocateEphemeralPort(host string) (int, error) {
- ln, err := net.Listen("tcp", net.JoinHostPort(host, "0"))
- if err != nil {
- return 0, fmt.Errorf("allocating ephemeral port: %w", err)
- }
- port := ln.Addr().(*net.TCPAddr).Port
- _ = ln.Close()
- return port, nil
-}
-
-// isPortAvailable checks if a TCP port is available for binding.
-func isPortAvailable(host string, port int) bool {
- addr := net.JoinHostPort(host, strconv.Itoa(port))
- ln, err := net.Listen("tcp", addr)
- if err != nil {
-```
-
-This function is important because it defines how Beads Tutorial: Git-Backed Task Graph Memory for Coding Agents implements the patterns covered in this chapter.
-
## How These Components Connect
```mermaid
flowchart TD
- A[resolveServerDir]
- B[ResolveServerDir]
- C[ResolveDoltDir]
- D[pidPath]
- E[logPath]
+ A[SharedDoltDir]
+ B[resolveServerDir]
+ C[ResolveServerDir]
+ D[ResolveDoltDir]
+ E[pidPath]
A --> B
B --> C
C --> D
diff --git a/tutorials/beads-tutorial/07-troubleshooting-and-operations.md b/tutorials/beads-tutorial/07-troubleshooting-and-operations.md
index cd96a3d6..225321ea 100644
--- a/tutorials/beads-tutorial/07-troubleshooting-and-operations.md
+++ b/tutorials/beads-tutorial/07-troubleshooting-and-operations.md
@@ -37,12 +37,51 @@ You now have an operations runbook baseline for Beads troubleshooting.
Next: [Chapter 8: Contribution Workflow and Ecosystem Extensions](08-contribution-workflow-and-ecosystem-extensions.md)
-## Depth Expansion Playbook
-
## Source Code Walkthrough
### `internal/doltserver/doltserver.go`
+The `DefaultConfig` function in [`internal/doltserver/doltserver.go`](https://github.com/steveyegge/beads/blob/HEAD/internal/doltserver/doltserver.go) handles a key part of this chapter's functionality:
+
+```go
+}
+
+// DefaultConfig returns config with sensible defaults.
+// Priority: env var > port file > config.yaml / global config > metadata.json.
+// Returns port 0 when no source provides a port, meaning Start() should
+// allocate an ephemeral port from the OS.
+//
+// The port file (dolt-server.port) is written by Start() with the actual port
+// the server is listening on. Consulting it here ensures that commands
+// connecting to an already-running server use the correct port.
+func DefaultConfig(beadsDir string) *Config {
+ // In shared mode, use the shared server directory for port resolution
+ if IsSharedServerMode() {
+ if sharedDir, err := SharedServerDir(); err == nil {
+ beadsDir = sharedDir
+ }
+ }
+
+ cfg := &Config{
+ BeadsDir: beadsDir,
+ Host: "127.0.0.1",
+ Mode: ResolveServerMode(beadsDir),
+ }
+
+ // Check env var override first (used by tests and manual overrides)
+ if p := os.Getenv("BEADS_DOLT_SERVER_PORT"); p != "" {
+ if port, err := strconv.Atoi(p); err == nil {
+ cfg.Port = port
+ return cfg
+ }
+ }
+
+```
+
+This function is important because it defines how Beads Tutorial: Git-Backed Task Graph Memory for Coding Agents implements the patterns covered in this chapter.
+
+### `internal/doltserver/doltserver.go`
+
The `IsRunning` function in [`internal/doltserver/doltserver.go`](https://github.com/steveyegge/beads/blob/HEAD/internal/doltserver/doltserver.go) handles a key part of this chapter's functionality:
```go
@@ -164,57 +203,16 @@ func EnsureRunningDetailed(beadsDir string) (port int, startedByUs bool, err err
This function is important because it defines how Beads Tutorial: Git-Backed Task Graph Memory for Coding Agents implements the patterns covered in this chapter.
-### `internal/doltserver/doltserver.go`
-
-The `Start` function in [`internal/doltserver/doltserver.go`](https://github.com/steveyegge/beads/blob/HEAD/internal/doltserver/doltserver.go) handles a key part of this chapter's functionality:
-
-```go
-//
-// Port assignment uses OS-assigned ephemeral ports by default. When no explicit
-// port is configured (env var, config.yaml, metadata.json), Start() asks the OS
-// for a free port via net.Listen(":0"), passes it to dolt sql-server, and writes
-// the actual port to dolt-server.port. This eliminates the birthday-problem
-// collisions that plagued the old hash-derived port scheme (GH#2098, GH#2372).
-//
-// Users with explicit port config via BEADS_DOLT_SERVER_PORT env var or
-// config.yaml always use that port instead, with conflict detection via
-// reclaimPort.
-//
-// Server state files (PID, port, log, lock) live in the .beads/ directory.
-package doltserver
-
-import (
- "context"
- "database/sql"
- "fmt"
- "net"
- "os"
- "os/exec"
- "path/filepath"
- "strconv"
- "strings"
- "time"
-
- _ "github.com/go-sql-driver/mysql"
-
- "github.com/steveyegge/beads/internal/config"
- "github.com/steveyegge/beads/internal/configfile"
- "github.com/steveyegge/beads/internal/lockfile"
-)
-```
-
-This function is important because it defines how Beads Tutorial: Git-Backed Task Graph Memory for Coding Agents implements the patterns covered in this chapter.
-
## How These Components Connect
```mermaid
flowchart TD
- A[IsRunning]
- B[EnsureRunning]
- C[EnsureRunningDetailed]
- D[Start]
- E[FlushWorkingSet]
+ A[DefaultConfig]
+ B[IsRunning]
+ C[EnsureRunning]
+ D[EnsureRunningDetailed]
+ E[Start]
A --> B
B --> C
C --> D
diff --git a/tutorials/beads-tutorial/08-contribution-workflow-and-ecosystem-extensions.md b/tutorials/beads-tutorial/08-contribution-workflow-and-ecosystem-extensions.md
index f90a7c4e..b7c200ba 100644
--- a/tutorials/beads-tutorial/08-contribution-workflow-and-ecosystem-extensions.md
+++ b/tutorials/beads-tutorial/08-contribution-workflow-and-ecosystem-extensions.md
@@ -38,170 +38,168 @@ You now have a full Beads path from baseline usage to ecosystem contribution.
Next tutorial: [AutoAgent Tutorial](../autoagent-tutorial/)
-## Depth Expansion Playbook
-
## Source Code Walkthrough
-### `cmd/bd/list.go`
+### `cmd/bd/hooks.go`
-The `getHierarchicalChildren` function in [`cmd/bd/list.go`](https://github.com/steveyegge/beads/blob/HEAD/cmd/bd/list.go) handles a key part of this chapter's functionality:
+The `hookSectionEndLine` function in [`cmd/bd/hooks.go`](https://github.com/steveyegge/beads/blob/HEAD/cmd/bd/hooks.go) handles a key part of this chapter's functionality:
```go
}
-// getHierarchicalChildren handles the --tree --parent combination logic
-func getHierarchicalChildren(ctx context.Context, store storage.DoltStorage, dbPath string, parentID string) ([]*types.Issue, error) {
- // First verify that the parent issue exists
- var parentIssue *types.Issue
- err := withStorage(ctx, store, dbPath, func(s storage.DoltStorage) error {
- var err error
- parentIssue, err = s.GetIssue(ctx, parentID)
- return err
- })
- if err != nil {
- return nil, fmt.Errorf("error checking parent issue: %v", err)
- }
- if parentIssue == nil {
- return nil, fmt.Errorf("parent issue '%s' not found", parentID)
- }
-
- // Use recursive search to find all descendants using the same logic as --parent filter
- // This works around issues with GetDependencyTree not finding all dependents properly
- allDescendants := make(map[string]*types.Issue)
-
- // Always include the parent
- allDescendants[parentID] = parentIssue
-
- // Recursively find all descendants
- err = findAllDescendants(ctx, store, dbPath, parentID, allDescendants, 0, 10) // max depth 10
- if err != nil {
- return nil, fmt.Errorf("error finding descendants: %v", err)
- }
+// hookSectionEndLine returns the full end marker line with the current version.
+func hookSectionEndLine() string {
+ return fmt.Sprintf("%s v%s ---", hookSectionEndPrefix, Version)
+}
- // Convert map to slice for display
+// hookTimeoutSeconds is the maximum time a beads hook is allowed to run before
+// being killed and allowing the git operation to proceed. A bounded timeout
+// prevents `bd hooks run` from hanging `git push` indefinitely (GH#2453).
+// The default is 300 seconds (5 minutes) to accommodate chained hooks — e.g.
+// pre-commit framework pipelines that run linters, type-checkers, and builds
+// inside `bd hooks run` via the `.old` hook chain (GH#2732).
+// The value can be overridden via the BEADS_HOOK_TIMEOUT environment variable.
+const hookTimeoutSeconds = 300
+
+// generateHookSection returns the marked section content for a given hook name.
+// The section is self-contained: it checks for bd availability, runs the hook
+// via 'bd hooks run', and propagates exit codes — without preventing any user
+// content after the section from executing on success.
+//
+// Resilience (GH#2453, GH#2449):
+// - A configurable timeout prevents hooks from hanging git operations.
+// - If the beads database is not initialized (exit code 3), the hook exits
+// successfully with a warning so that git operations are not blocked.
+func generateHookSection(hookName string) string {
+ return hookSectionBeginLine() + "\n" +
+ "# This section is managed by beads. Do not remove these markers.\n" +
+ "if command -v bd >/dev/null 2>&1; then\n" +
+ " export BD_GIT_HOOK=1\n" +
+ " _bd_timeout=${BEADS_HOOK_TIMEOUT:-" + fmt.Sprintf("%d", hookTimeoutSeconds) + "}\n" +
+ " if command -v timeout >/dev/null 2>&1; then\n" +
```
This function is important because it defines how Beads Tutorial: Git-Backed Task Graph Memory for Coding Agents implements the patterns covered in this chapter.
-### `cmd/bd/list.go`
+### `cmd/bd/hooks.go`
-The `findAllDescendants` function in [`cmd/bd/list.go`](https://github.com/steveyegge/beads/blob/HEAD/cmd/bd/list.go) handles a key part of this chapter's functionality:
+The `generateHookSection` function in [`cmd/bd/hooks.go`](https://github.com/steveyegge/beads/blob/HEAD/cmd/bd/hooks.go) handles a key part of this chapter's functionality:
```go
- // Recursively find all descendants
- err = findAllDescendants(ctx, store, dbPath, parentID, allDescendants, 0, 10) // max depth 10
- if err != nil {
- return nil, fmt.Errorf("error finding descendants: %v", err)
- }
+// managedHookNames lists the git hooks managed by beads.
+// Hook content is generated dynamically by generateHookSection().
+var managedHookNames = []string{"pre-commit", "post-merge", "pre-push", "post-checkout", "prepare-commit-msg"}
- // Convert map to slice for display
- treeIssues := make([]*types.Issue, 0, len(allDescendants))
- for _, issue := range allDescendants {
- treeIssues = append(treeIssues, issue)
- }
+const hookVersionPrefix = "# bd-hooks-version: "
+const shimVersionPrefix = "# bd-shim "
+
+// inlineHookMarker identifies inline hooks created by bd init (GH#1120)
+// These hooks have the logic embedded directly rather than using shims
+const inlineHookMarker = "# bd (beads)"
+
+// Section markers for git hooks (GH#1380) — consistent with AGENTS.md pattern.
+// Only content between markers is managed by beads; user content outside is preserved.
+const hookSectionBeginPrefix = "# --- BEGIN BEADS INTEGRATION"
+const hookSectionEndPrefix = "# --- END BEADS INTEGRATION"
- return treeIssues, nil
+// hookSectionBeginLine returns the full begin marker line with the current version.
+func hookSectionBeginLine() string {
+ return fmt.Sprintf("%s v%s ---", hookSectionBeginPrefix, Version)
}
-// findAllDescendants recursively finds all descendants using parent filtering
-func findAllDescendants(ctx context.Context, store storage.DoltStorage, dbPath string, parentID string, result map[string]*types.Issue, currentDepth, maxDepth int) error {
- if currentDepth >= maxDepth {
- return nil // Prevent infinite recursion
- }
+// hookSectionEndLine returns the full end marker line with the current version.
+func hookSectionEndLine() string {
+ return fmt.Sprintf("%s v%s ---", hookSectionEndPrefix, Version)
+}
- // Get direct children using the same filter logic as regular --parent
- var children []*types.Issue
- err := withStorage(ctx, store, dbPath, func(s storage.DoltStorage) error {
- filter := types.IssueFilter{
- ParentID: &parentID,
- }
- var err error
- children, err = s.SearchIssues(ctx, "", filter)
- return err
- })
+// hookTimeoutSeconds is the maximum time a beads hook is allowed to run before
+// being killed and allowing the git operation to proceed. A bounded timeout
+// prevents `bd hooks run` from hanging `git push` indefinitely (GH#2453).
+// The default is 300 seconds (5 minutes) to accommodate chained hooks — e.g.
+// pre-commit framework pipelines that run linters, type-checkers, and builds
```
This function is important because it defines how Beads Tutorial: Git-Backed Task Graph Memory for Coding Agents implements the patterns covered in this chapter.
-### `cmd/bd/list.go`
+### `cmd/bd/hooks.go`
-The `watchIssues` function in [`cmd/bd/list.go`](https://github.com/steveyegge/beads/blob/HEAD/cmd/bd/list.go) handles a key part of this chapter's functionality:
+The `injectHookSection` function in [`cmd/bd/hooks.go`](https://github.com/steveyegge/beads/blob/HEAD/cmd/bd/hooks.go) handles a key part of this chapter's functionality:
```go
}
-// watchIssues polls for changes and re-displays (GH#654)
-// Uses polling instead of fsnotify because Dolt stores data in a server-side
-// database, not files — file watchers never fire.
-func watchIssues(ctx context.Context, store storage.DoltStorage, filter types.IssueFilter, sortBy string, reverse bool) {
- // Initial display
- issues, err := store.SearchIssues(ctx, "", filter)
- if err != nil {
- fmt.Fprintf(os.Stderr, "Error querying issues: %v\n", err)
- return
+// injectHookSection merges the beads section into existing hook file content.
+// If section markers are found, only the content between them is replaced.
+// If broken markers exist (orphaned BEGIN, reversed order), the stale markers
+// are removed before injecting the new section.
+// If no markers are found, the section is appended.
+func injectHookSection(existing, section string) string {
+ return injectHookSectionWithDepth(existing, section, 0)
+}
+
+// maxInjectDepth guards against infinite recursion when cleaning broken markers.
+const maxInjectDepth = 5
+
+func injectHookSectionWithDepth(existing, section string, depth int) string {
+ if depth > maxInjectDepth {
+ // Safety: too many recursive cleanups — append as fallback
+ result := existing
+ if !strings.HasSuffix(result, "\n") {
+ result += "\n"
+ }
+ return result + "\n" + section
}
- sortIssues(issues, sortBy, reverse)
- displayPrettyList(issues, true)
- lastSnapshot := issueSnapshot(issues)
-
- fmt.Fprintf(os.Stderr, "\nWatching for changes... (Press Ctrl+C to exit)\n")
-
- // Handle Ctrl+C — deferred Stop prevents signal handler leak
- sigChan := make(chan os.Signal, 1)
- signal.Notify(sigChan, os.Interrupt, syscall.SIGTERM)
- defer signal.Stop(sigChan)
-
- pollInterval := 2 * time.Second
- ticker := time.NewTicker(pollInterval)
- defer ticker.Stop()
-
- for {
- select {
- case <-sigChan:
- fmt.Fprintf(os.Stderr, "\nStopped watching.\n")
- return
+
+ beginIdx := strings.Index(existing, hookSectionBeginPrefix)
+ endIdx := strings.Index(existing, hookSectionEndPrefix)
+
+ if beginIdx != -1 && endIdx != -1 && beginIdx < endIdx {
+ // Case 1: valid BEGIN...END pair — replace between markers
+ lineStart := strings.LastIndex(existing[:beginIdx], "\n")
+ if lineStart == -1 {
+ lineStart = 0
```
This function is important because it defines how Beads Tutorial: Git-Backed Task Graph Memory for Coding Agents implements the patterns covered in this chapter.
-### `cmd/bd/list.go`
+### `cmd/bd/hooks.go`
-The `issueSnapshot` function in [`cmd/bd/list.go`](https://github.com/steveyegge/beads/blob/HEAD/cmd/bd/list.go) handles a key part of this chapter's functionality:
+The `injectHookSectionWithDepth` function in [`cmd/bd/hooks.go`](https://github.com/steveyegge/beads/blob/HEAD/cmd/bd/hooks.go) handles a key part of this chapter's functionality:
```go
- sortIssues(issues, sortBy, reverse)
- displayPrettyList(issues, true)
- lastSnapshot := issueSnapshot(issues)
-
- fmt.Fprintf(os.Stderr, "\nWatching for changes... (Press Ctrl+C to exit)\n")
-
- // Handle Ctrl+C — deferred Stop prevents signal handler leak
- sigChan := make(chan os.Signal, 1)
- signal.Notify(sigChan, os.Interrupt, syscall.SIGTERM)
- defer signal.Stop(sigChan)
-
- pollInterval := 2 * time.Second
- ticker := time.NewTicker(pollInterval)
- defer ticker.Stop()
-
- for {
- select {
- case <-sigChan:
- fmt.Fprintf(os.Stderr, "\nStopped watching.\n")
- return
- case <-ticker.C:
- issues, err := store.SearchIssues(ctx, "", filter)
- if err != nil {
- fmt.Fprintf(os.Stderr, "Error refreshing issues: %v\n", err)
- continue
- }
- sortIssues(issues, sortBy, reverse)
- snap := issueSnapshot(issues)
- if snap != lastSnapshot {
- lastSnapshot = snap
- displayPrettyList(issues, true)
- fmt.Fprintf(os.Stderr, "\nWatching for changes... (Press Ctrl+C to exit)\n")
+// If no markers are found, the section is appended.
+func injectHookSection(existing, section string) string {
+ return injectHookSectionWithDepth(existing, section, 0)
+}
+
+// maxInjectDepth guards against infinite recursion when cleaning broken markers.
+const maxInjectDepth = 5
+
+func injectHookSectionWithDepth(existing, section string, depth int) string {
+ if depth > maxInjectDepth {
+ // Safety: too many recursive cleanups — append as fallback
+ result := existing
+ if !strings.HasSuffix(result, "\n") {
+ result += "\n"
+ }
+ return result + "\n" + section
+ }
+
+ beginIdx := strings.Index(existing, hookSectionBeginPrefix)
+ endIdx := strings.Index(existing, hookSectionEndPrefix)
+
+ if beginIdx != -1 && endIdx != -1 && beginIdx < endIdx {
+ // Case 1: valid BEGIN...END pair — replace between markers
+ lineStart := strings.LastIndex(existing[:beginIdx], "\n")
+ if lineStart == -1 {
+ lineStart = 0
+ } else {
+ lineStart++ // skip the newline itself
+ }
+
+ // Find end of the end-marker line (including trailing newline)
+ endOfEndMarker := endIdx + len(hookSectionEndPrefix)
```
This function is important because it defines how Beads Tutorial: Git-Backed Task Graph Memory for Coding Agents implements the patterns covered in this chapter.
@@ -211,11 +209,11 @@ This function is important because it defines how Beads Tutorial: Git-Backed Tas
```mermaid
flowchart TD
- A[getHierarchicalChildren]
- B[findAllDescendants]
- C[watchIssues]
- D[issueSnapshot]
- E[sortIssues]
+ A[hookSectionEndLine]
+ B[generateHookSection]
+ C[injectHookSection]
+ D[injectHookSectionWithDepth]
+ E[removeOrphanedBeginBlock]
A --> B
B --> C
C --> D
diff --git a/tutorials/bentoml-tutorial/02-model-packaging.md b/tutorials/bentoml-tutorial/02-model-packaging.md
index f3dc02ac..d5f2bbfc 100644
--- a/tutorials/bentoml-tutorial/02-model-packaging.md
+++ b/tutorials/bentoml-tutorial/02-model-packaging.md
@@ -522,9 +522,28 @@ def test_error_handling():
assert data["status"] == 400
```
+## Model Packaging Architecture
+
+```mermaid
+flowchart TD
+ A[Train or load model]
+ B[Create BentoML Runner for model]
+ C[Define Service with runner dependency]
+ D[Add API endpoints to Service]
+ E[bentoml build produces Bento artifact]
+ F[Bento contains model, code, and dependencies]
+ G[Bento ready for deployment]
+ A --> B
+ B --> C
+ C --> D
+ D --> E
+ E --> F
+ F --> G
+```
+
## What We've Accomplished
-Congratulations! 🎉 You've successfully learned:
+You've successfully learned:
1. **Advanced Service Creation** - Multi-model services with runners
2. **Model Management** - Versioning, optimization, and metadata
diff --git a/tutorials/bentoml-tutorial/03-api-development.md b/tutorials/bentoml-tutorial/03-api-development.md
index b4c61308..54325bdd 100644
--- a/tutorials/bentoml-tutorial/03-api-development.md
+++ b/tutorials/bentoml-tutorial/03-api-development.md
@@ -559,9 +559,29 @@ class RobustAPIService:
return True
```
+## API Development Architecture
+
+```mermaid
+flowchart TD
+ A[Define Service class]
+ B[Add api endpoints with @bentoml.api decorator]
+ C[Specify input and output types]
+ D[Add auth and rate limiting middleware]
+ E[Client sends HTTP request to endpoint]
+ F[BentoML deserializes input]
+ G[Service method executes with runner]
+ H[Response serialized and returned]
+ A --> B
+ B --> C
+ C --> D
+ E --> F
+ F --> G
+ G --> H
+```
+
## What We've Accomplished
-Congratulations! 🎉 You've successfully learned:
+You've successfully learned:
1. **Multiple API Formats** - JSON, NumPy, File, and Image APIs
2. **Authentication** - JWT-based authentication for secure APIs
diff --git a/tutorials/bentoml-tutorial/04-framework-integration.md b/tutorials/bentoml-tutorial/04-framework-integration.md
index a1fe519f..2ca89bfd 100644
--- a/tutorials/bentoml-tutorial/04-framework-integration.md
+++ b/tutorials/bentoml-tutorial/04-framework-integration.md
@@ -417,9 +417,28 @@ class VersionedService:
}
```
+## Framework Integration Flow
+
+```mermaid
+flowchart TD
+ A[ML framework model trained]
+ B[Save model with framework-native format]
+ C[Load into BentoML Runner]
+ D[Runner wraps framework inference call]
+ E[Service exposes API endpoint]
+ F[Request routed to runner]
+ G[Framework model produces prediction]
+ A --> B
+ B --> C
+ C --> D
+ D --> E
+ E --> F
+ F --> G
+```
+
## What We've Accomplished
-Congratulations! 🎉 You've successfully learned:
+You've successfully learned:
1. **TensorFlow Integration** - Basic and optimized TensorFlow models
2. **PyTorch Integration** - CPU and GPU-accelerated PyTorch models
diff --git a/tutorials/bentoml-tutorial/05-testing-validation.md b/tutorials/bentoml-tutorial/05-testing-validation.md
index 663e082d..7e22e6b4 100644
--- a/tutorials/bentoml-tutorial/05-testing-validation.md
+++ b/tutorials/bentoml-tutorial/05-testing-validation.md
@@ -436,9 +436,26 @@ curl -X POST "http://localhost:3000/predict" \
docker stop test-service
```
+## Testing Architecture
+
+```mermaid
+flowchart TD
+ A[Unit tests: test runners and model logic]
+ B[Integration tests: spin up service locally]
+ C[bentoml.testing.Client sends test requests]
+ D[Assert response shape and values]
+ E[Load tests with concurrent requests]
+ F[Docker integration test: build and curl]
+ A --> B
+ B --> C
+ C --> D
+ D --> E
+ E --> F
+```
+
## What We've Accomplished
-Congratulations! 🎉 You've successfully learned:
+You've successfully learned:
1. **Unit Testing** - Testing individual components and functions
2. **Integration Testing** - Testing complete workflows and API integrations
diff --git a/tutorials/bentoml-tutorial/06-deployment-strategies.md b/tutorials/bentoml-tutorial/06-deployment-strategies.md
index f15e2bb9..f9e641cb 100644
--- a/tutorials/bentoml-tutorial/06-deployment-strategies.md
+++ b/tutorials/bentoml-tutorial/06-deployment-strategies.md
@@ -585,9 +585,30 @@ class AsyncService:
return self.model.predict(batch)
```
+## Deployment Architecture
+
+```mermaid
+flowchart TD
+ A[bentoml build creates Bento]
+ B[bentoml containerize creates Docker image]
+ C{Deployment target}
+ D[Docker: docker run with port mapping]
+ E[Kubernetes: deploy with BentoDeployment CRD]
+ F[BentoCloud: bentoml deploy command]
+ G[Service running and accepting requests]
+ A --> B
+ B --> C
+ C --> D
+ C --> E
+ C --> F
+ D --> G
+ E --> G
+ F --> G
+```
+
## What We've Accomplished
-Congratulations! 🎉 You've successfully learned:
+You've successfully learned:
1. **Docker Deployment** - Containerizing and running BentoML services
2. **Kubernetes Orchestration** - Scaling services with K8s deployments
diff --git a/tutorials/bentoml-tutorial/07-monitoring-observability.md b/tutorials/bentoml-tutorial/07-monitoring-observability.md
index 08e13e5c..88807ddc 100644
--- a/tutorials/bentoml-tutorial/07-monitoring-observability.md
+++ b/tutorials/bentoml-tutorial/07-monitoring-observability.md
@@ -621,9 +621,28 @@ class DashboardService:
raise
```
+## Observability Architecture
+
+```mermaid
+flowchart TD
+ A[Request arrives at BentoML service]
+ B[Request and latency metrics recorded]
+ C[Structured log entry emitted]
+ D[Prometheus scrapes metrics endpoint]
+ E[Grafana dashboard visualizes metrics]
+ F[Alert fired on error rate threshold]
+ G[Traces exported to observability backend]
+ A --> B
+ A --> C
+ B --> D
+ D --> E
+ E --> F
+ A --> G
+```
+
## What We've Accomplished
-Congratulations! 🎉 You've successfully learned:
+You've successfully learned:
1. **Metrics Collection** - Prometheus integration and custom metrics
2. **Structured Logging** - JSON logging and log aggregation
diff --git a/tutorials/bentoml-tutorial/08-production-scaling.md b/tutorials/bentoml-tutorial/08-production-scaling.md
index 784fa848..d5142c1a 100644
--- a/tutorials/bentoml-tutorial/08-production-scaling.md
+++ b/tutorials/bentoml-tutorial/08-production-scaling.md
@@ -930,9 +930,30 @@ class CachedBentoService:
return self.cache.get_stats()
```
+## Production Scaling Architecture
+
+```mermaid
+flowchart TD
+ A[Production traffic arrives]
+ B[Load balancer distributes requests]
+ C[Multiple service replicas running]
+ D[Each replica has runner pool]
+ E[Autoscaler monitors queue depth]
+ F[New replicas added on high load]
+ G[Replicas removed when idle]
+ H[Metrics exported to monitoring]
+ A --> B
+ B --> C
+ C --> D
+ D --> E
+ E --> F
+ E --> G
+ D --> H
+```
+
## What We've Accomplished
-Congratulations! 🎉 You've completed the comprehensive BentoML tutorial:
+You've completed the comprehensive BentoML tutorial:
1. **Getting Started** - Basic BentoML concepts and service creation
2. **Model Packaging** - Advanced model packaging and versioning
diff --git a/tutorials/bolt-diy-tutorial/01-getting-started.md b/tutorials/bolt-diy-tutorial/01-getting-started.md
index 35fbd444..d4a205ad 100644
--- a/tutorials/bolt-diy-tutorial/01-getting-started.md
+++ b/tutorials/bolt-diy-tutorial/01-getting-started.md
@@ -174,42 +174,8 @@ You now have a reliable bolt.diy baseline with:
Next: [Chapter 2: Architecture Overview](02-architecture-overview.md)
-## Depth Expansion Playbook
-
## Source Code Walkthrough
-### `worker-configuration.d.ts`
-
-The `Env` interface in [`worker-configuration.d.ts`](https://github.com/stackblitz-labs/bolt.diy/blob/HEAD/worker-configuration.d.ts) handles a key part of this chapter's functionality:
-
-```ts
-interface Env {
- RUNNING_IN_DOCKER: Settings;
- DEFAULT_NUM_CTX: Settings;
- ANTHROPIC_API_KEY: string;
- OPENAI_API_KEY: string;
- GROQ_API_KEY: string;
- HuggingFace_API_KEY: string;
- OPEN_ROUTER_API_KEY: string;
- OLLAMA_API_BASE_URL: string;
- OPENAI_LIKE_API_KEY: string;
- OPENAI_LIKE_API_BASE_URL: string;
- OPENAI_LIKE_API_MODELS: string;
- TOGETHER_API_KEY: string;
- TOGETHER_API_BASE_URL: string;
- DEEPSEEK_API_KEY: string;
- LMSTUDIO_API_BASE_URL: string;
- GOOGLE_GENERATIVE_AI_API_KEY: string;
- MISTRAL_API_KEY: string;
- XAI_API_KEY: string;
- PERPLEXITY_API_KEY: string;
- AWS_BEDROCK_CONFIG: string;
-}
-
-```
-
-This interface is important because it defines how bolt.diy Tutorial: Build and Operate an Open Source AI App Builder implements the patterns covered in this chapter.
-
### `vite.config.ts`
The `chrome129IssuePlugin` function in [`vite.config.ts`](https://github.com/stackblitz-labs/bolt.diy/blob/HEAD/vite.config.ts) handles a key part of this chapter's functionality:
@@ -292,32 +258,89 @@ export default defineConfig({
This function is important because it defines how bolt.diy Tutorial: Build and Operate an Open Source AI App Builder implements the patterns covered in this chapter.
-### `load-context.ts`
+### `worker-configuration.d.ts`
-The `AppLoadContext` interface in [`load-context.ts`](https://github.com/stackblitz-labs/bolt.diy/blob/HEAD/load-context.ts) handles a key part of this chapter's functionality:
+The `Env` interface in [`worker-configuration.d.ts`](https://github.com/stackblitz-labs/bolt.diy/blob/HEAD/worker-configuration.d.ts) handles a key part of this chapter's functionality:
```ts
-
-declare module '@remix-run/cloudflare' {
- interface AppLoadContext {
- cloudflare: Cloudflare;
- }
+interface Env {
+ RUNNING_IN_DOCKER: Settings;
+ DEFAULT_NUM_CTX: Settings;
+ ANTHROPIC_API_KEY: string;
+ OPENAI_API_KEY: string;
+ GROQ_API_KEY: string;
+ HuggingFace_API_KEY: string;
+ OPEN_ROUTER_API_KEY: string;
+ OLLAMA_API_BASE_URL: string;
+ OPENAI_LIKE_API_KEY: string;
+ OPENAI_LIKE_API_BASE_URL: string;
+ OPENAI_LIKE_API_MODELS: string;
+ TOGETHER_API_KEY: string;
+ TOGETHER_API_BASE_URL: string;
+ DEEPSEEK_API_KEY: string;
+ LMSTUDIO_API_BASE_URL: string;
+ GOOGLE_GENERATIVE_AI_API_KEY: string;
+ MISTRAL_API_KEY: string;
+ XAI_API_KEY: string;
+ PERPLEXITY_API_KEY: string;
+ AWS_BEDROCK_CONFIG: string;
}
```
This interface is important because it defines how bolt.diy Tutorial: Build and Operate an Open Source AI App Builder implements the patterns covered in this chapter.
+### `app/root.tsx`
+
+The `setTutorialKitTheme` function in [`app/root.tsx`](https://github.com/stackblitz-labs/bolt.diy/blob/HEAD/app/root.tsx) handles a key part of this chapter's functionality:
+
+```tsx
+
+const inlineThemeCode = stripIndents`
+ setTutorialKitTheme();
+
+ function setTutorialKitTheme() {
+ let theme = localStorage.getItem('bolt_theme');
+
+ if (!theme) {
+ theme = window.matchMedia('(prefers-color-scheme: dark)').matches ? 'dark' : 'light';
+ }
+
+ document.querySelector('html')?.setAttribute('data-theme', theme);
+ }
+`;
+
+export const Head = createHead(() => (
+ <>
+
+
+
+
+
+ >
+));
+
+export function Layout({ children }: { children: React.ReactNode }) {
+ const theme = useStore(themeStore);
+
+ useEffect(() => {
+ document.querySelector('html')?.setAttribute('data-theme', theme);
+ }, [theme]);
+
+```
+
+This function is important because it defines how bolt.diy Tutorial: Build and Operate an Open Source AI App Builder implements the patterns covered in this chapter.
+
## How These Components Connect
```mermaid
flowchart TD
- A[Env]
- B[chrome129IssuePlugin]
- C[generateAlphaPalette]
- D[AppLoadContext]
- E[setTutorialKitTheme]
+ A[chrome129IssuePlugin]
+ B[generateAlphaPalette]
+ C[Env]
+ D[setTutorialKitTheme]
+ E[Layout]
A --> B
B --> C
C --> D
diff --git a/tutorials/bolt-diy-tutorial/02-architecture-overview.md b/tutorials/bolt-diy-tutorial/02-architecture-overview.md
index 38ec9eb9..cd1cc636 100644
--- a/tutorials/bolt-diy-tutorial/02-architecture-overview.md
+++ b/tutorials/bolt-diy-tutorial/02-architecture-overview.md
@@ -137,53 +137,10 @@ You now have a working architecture map of bolt.diy:
Next: [Chapter 3: Providers and Model Routing](03-providers-and-routing.md)
-## Depth Expansion Playbook
-
## Source Code Walkthrough
### `app/root.tsx`
-The `Layout` function in [`app/root.tsx`](https://github.com/stackblitz-labs/bolt.diy/blob/HEAD/app/root.tsx) handles a key part of this chapter's functionality:
-
-```tsx
-));
-
-export function Layout({ children }: { children: React.ReactNode }) {
- const theme = useStore(themeStore);
-
- useEffect(() => {
- document.querySelector('html')?.setAttribute('data-theme', theme);
- }, [theme]);
-
- return (
- <>
- {() => {children} }
- {
- return (
-
- );
- }}
- icon={({ type }) => {
- switch (type) {
- case 'success': {
- return ;
- }
- case 'error': {
- return ;
- }
- }
-
- return undefined;
- }}
-```
-
-This function is important because it defines how bolt.diy Tutorial: Build and Operate an Open Source AI App Builder implements the patterns covered in this chapter.
-
-### `app/root.tsx`
-
The `App` function in [`app/root.tsx`](https://github.com/stackblitz-labs/bolt.diy/blob/HEAD/app/root.tsx) handles a key part of this chapter's functionality:
```tsx
@@ -223,6 +180,22 @@ export default function App() {
This function is important because it defines how bolt.diy Tutorial: Build and Operate an Open Source AI App Builder implements the patterns covered in this chapter.
+### `load-context.ts`
+
+The `AppLoadContext` interface in [`load-context.ts`](https://github.com/stackblitz-labs/bolt.diy/blob/HEAD/load-context.ts) handles a key part of this chapter's functionality:
+
+```ts
+
+declare module '@remix-run/cloudflare' {
+ interface AppLoadContext {
+ cloudflare: Cloudflare;
+ }
+}
+
+```
+
+This interface is important because it defines how bolt.diy Tutorial: Build and Operate an Open Source AI App Builder implements the patterns covered in this chapter.
+
### `app/entry.server.tsx`
The `handleRequest` function in [`app/entry.server.tsx`](https://github.com/stackblitz-labs/bolt.diy/blob/HEAD/app/entry.server.tsx) handles a key part of this chapter's functionality:
@@ -310,11 +283,11 @@ This function is important because it defines how bolt.diy Tutorial: Build and O
```mermaid
flowchart TD
- A[Layout]
- B[App]
+ A[App]
+ B[AppLoadContext]
C[handleRequest]
D[read]
- E[action]
+ E[getEncoding]
A --> B
B --> C
C --> D
diff --git a/tutorials/bolt-diy-tutorial/03-providers-and-routing.md b/tutorials/bolt-diy-tutorial/03-providers-and-routing.md
index 8c561c87..d9db8a6a 100644
--- a/tutorials/bolt-diy-tutorial/03-providers-and-routing.md
+++ b/tutorials/bolt-diy-tutorial/03-providers-and-routing.md
@@ -135,140 +135,39 @@ You now have a provider-routing governance model that covers:
Next: [Chapter 4: Prompt-to-App Workflow](04-prompt-to-app-workflow.md)
-## Depth Expansion Playbook
-
## Source Code Walkthrough
-### `app/routes/api.chat.ts`
-
-The `parseCookies` function in [`app/routes/api.chat.ts`](https://github.com/stackblitz-labs/bolt.diy/blob/HEAD/app/routes/api.chat.ts) handles a key part of this chapter's functionality:
-
-```ts
-const logger = createScopedLogger('api.chat');
-
-function parseCookies(cookieHeader: string): Record {
- const cookies: Record = {};
+### `app/lib/stores/settings.ts`
- const items = cookieHeader.split(';').map((cookie) => cookie.trim());
+The provider configuration store in [`app/lib/stores/settings.ts`](https://github.com/stackblitz-labs/bolt.diy/blob/HEAD/app/lib/stores/settings.ts) is central to this chapter — it holds the active provider/model selection and persists user routing preferences across sessions.
- items.forEach((item) => {
- const [name, ...rest] = item.split('=');
+The store is built on `nanostores` and exposes atoms like `providersStore` that the UI and LLM routing layer both read. When a user selects a provider in the settings panel, the atom updates and the next chat request automatically picks up the new provider config. This is the primary place to trace if you want to understand how provider selection flows from UI to request.
- if (name && rest) {
- const decodedName = decodeURIComponent(name.trim());
- const decodedValue = decodeURIComponent(rest.join('=').trim());
- cookies[decodedName] = decodedValue;
- }
- });
+### `app/lib/hooks/useSettings.ts`
- return cookies;
-}
+The `useSettings` hook in [`app/lib/hooks/useSettings.ts`](https://github.com/stackblitz-labs/bolt.diy/blob/HEAD/app/lib/hooks/useSettings.ts) is what React components use to read and mutate provider state. It wraps the nanostores atoms with React subscriptions, ensuring components re-render when provider or model selection changes.
-async function chatAction({ context, request }: ActionFunctionArgs) {
- const streamRecovery = new StreamRecoveryManager({
- timeout: 45000,
- maxRetries: 2,
- onTimeout: () => {
- logger.warn('Stream timeout - attempting recovery');
- },
- });
-
- const { messages, files, promptId, contextOptimization, supabase, chatMode, designScheme, maxLLMSteps } =
- await request.json<{
- messages: Messages;
-```
-
-This function is important because it defines how bolt.diy Tutorial: Build and Operate an Open Source AI App Builder implements the patterns covered in this chapter.
+For routing policy work, this hook is the integration point: you can extend it to enforce allowed-provider constraints or inject environment-driven defaults before the value reaches UI components.
### `app/routes/api.chat.ts`
-The `chatAction` function in [`app/routes/api.chat.ts`](https://github.com/stackblitz-labs/bolt.diy/blob/HEAD/app/routes/api.chat.ts) handles a key part of this chapter's functionality:
-
-```ts
-
-export async function action(args: ActionFunctionArgs) {
- return chatAction(args);
-}
-
-const logger = createScopedLogger('api.chat');
-
-function parseCookies(cookieHeader: string): Record {
- const cookies: Record = {};
-
- const items = cookieHeader.split(';').map((cookie) => cookie.trim());
-
- items.forEach((item) => {
- const [name, ...rest] = item.split('=');
-
- if (name && rest) {
- const decodedName = decodeURIComponent(name.trim());
- const decodedValue = decodeURIComponent(rest.join('=').trim());
- cookies[decodedName] = decodedValue;
- }
- });
-
- return cookies;
-}
-
-async function chatAction({ context, request }: ActionFunctionArgs) {
- const streamRecovery = new StreamRecoveryManager({
- timeout: 45000,
- maxRetries: 2,
- onTimeout: () => {
- logger.warn('Stream timeout - attempting recovery');
- },
-```
-
-This function is important because it defines how bolt.diy Tutorial: Build and Operate an Open Source AI App Builder implements the patterns covered in this chapter.
-
-### `types/istextorbinary.d.ts`
-
-The `getEncoding` function in [`types/istextorbinary.d.ts`](https://github.com/stackblitz-labs/bolt.diy/blob/HEAD/types/istextorbinary.d.ts) handles a key part of this chapter's functionality:
-
-```ts
- }
-
- export function getEncoding(buffer: Buffer | null, opts?: EncodingOpts): 'utf8' | 'binary' | null;
-}
-
-```
-
-This function is important because it defines how bolt.diy Tutorial: Build and Operate an Open Source AI App Builder implements the patterns covered in this chapter.
-
-### `types/istextorbinary.d.ts`
-
-The `EncodingOpts` interface in [`types/istextorbinary.d.ts`](https://github.com/stackblitz-labs/bolt.diy/blob/HEAD/types/istextorbinary.d.ts) handles a key part of this chapter's functionality:
-
-```ts
- */
-declare module 'istextorbinary' {
- export interface EncodingOpts {
- /** Defaults to 24 */
- chunkLength?: number;
-
- /** If not provided, will check the start, beginning, and end */
- chunkBegin?: number;
- }
-
- export function getEncoding(buffer: Buffer | null, opts?: EncodingOpts): 'utf8' | 'binary' | null;
-}
-
-```
-
-This interface is important because it defines how bolt.diy Tutorial: Build and Operate an Open Source AI App Builder implements the patterns covered in this chapter.
+The `action` export in [`app/routes/api.chat.ts`](https://github.com/stackblitz-labs/bolt.diy/blob/HEAD/app/routes/api.chat.ts) is the server-side entry point where provider selection from the client is consumed. The provider and model identifiers travel from the React store through the chat request payload to this route, which then delegates to the appropriate provider client.
+Tracing from this file through the LLM stream layer shows exactly where fallback logic would need to be inserted to implement a multi-provider fallback chain.
## How These Components Connect
```mermaid
flowchart TD
- A[parseCookies]
- B[chatAction]
- C[getEncoding]
- D[EncodingOpts]
- E[CircularBuffer]
+ A[User selects provider in UI]
+ B[providersStore atom updated]
+ C[useSettings hook propagates change]
+ D[Chat request payload includes provider + model]
+ E[api.chat.ts action receives provider config]
+ F[LLM stream layer dispatches to provider client]
A --> B
B --> C
C --> D
D --> E
+ E --> F
```
diff --git a/tutorials/bolt-diy-tutorial/04-prompt-to-app-workflow.md b/tutorials/bolt-diy-tutorial/04-prompt-to-app-workflow.md
index 0fb9ff4f..a55be72c 100644
--- a/tutorials/bolt-diy-tutorial/04-prompt-to-app-workflow.md
+++ b/tutorials/bolt-diy-tutorial/04-prompt-to-app-workflow.md
@@ -159,186 +159,39 @@ You now have a deterministic prompt-to-app method:
Next: [Chapter 5: Files, Diff, and Locking](05-files-diff-locking.md)
-## Depth Expansion Playbook
-
## Source Code Walkthrough
-### `app/utils/debugLogger.ts`
-
-The `DebugLogger` class in [`app/utils/debugLogger.ts`](https://github.com/stackblitz-labs/bolt.diy/blob/HEAD/app/utils/debugLogger.ts) handles a key part of this chapter's functionality:
-
-```ts
-
-// Configuration interface for debug logger
-export interface DebugLoggerConfig {
- enabled: boolean;
- maxEntries: number;
- captureConsole: boolean;
- captureNetwork: boolean;
- captureErrors: boolean;
- debounceTerminal: number; // ms
-}
-
-// Circular buffer implementation for memory efficiency
-class CircularBuffer {
- private _buffer: (T | undefined)[];
- private _head = 0;
- private _tail = 0;
- private _size = 0;
-
- constructor(private _capacity: number) {
- this._buffer = new Array(_capacity);
- }
-
- push(item: T): void {
- this._buffer[this._tail] = item;
- this._tail = (this._tail + 1) % this._capacity;
-
- if (this._size < this._capacity) {
- this._size++;
- } else {
- this._head = (this._head + 1) % this._capacity;
- }
- }
-```
-
-This class is important because it defines how bolt.diy Tutorial: Build and Operate an Open Source AI App Builder implements the patterns covered in this chapter.
-
-### `app/utils/debugLogger.ts`
-
-The `downloadDebugLog` function in [`app/utils/debugLogger.ts`](https://github.com/stackblitz-labs/bolt.diy/blob/HEAD/app/utils/debugLogger.ts) handles a key part of this chapter's functionality:
-
-```ts
+### `app/routes/api.chat.ts`
-// Helper function to download debug log
-export async function downloadDebugLog(filename?: string): Promise {
- try {
- const debugData = await debugLogger.generateDebugLog();
+The `action` export in [`app/routes/api.chat.ts`](https://github.com/stackblitz-labs/bolt.diy/blob/HEAD/app/routes/api.chat.ts) is the server-side handler for chat requests. Every prompt submitted through the bolt.diy UI passes through this route. It receives the conversation messages, the selected provider/model, and any constraints from the client, then delegates to the streaming LLM layer.
- // Create a formatted summary
- const summary = createDebugSummary(debugData);
- const fullContent = `${summary}\n\n=== DETAILED DEBUG DATA ===\n\n${JSON.stringify(debugData, null, 2)}`;
+Understanding this file is key to tracing how a user's prompt becomes a model request, and where you can insert logging, validation, or budget-cap logic before the model call.
- const blob = new Blob([fullContent], { type: 'text/plain' });
- const url = URL.createObjectURL(blob);
-
- const link = document.createElement('a');
- link.href = url;
- link.download = filename || `bolt-debug-${new Date().toISOString().split('T')[0]}.txt`;
- document.body.appendChild(link);
- link.click();
- document.body.removeChild(link);
-
- URL.revokeObjectURL(url);
-
- logger.info('Debug log downloaded successfully');
- } catch (error) {
- logger.error('Failed to download debug log:', error);
- }
-}
-
-// Create a human-readable summary of the debug data
-function createDebugSummary(data: DebugLogData): string {
- const summary = [
- '=== BOLT DIY DEBUG LOG SUMMARY ===',
-```
+### `app/lib/llm/stream-text.ts`
-This function is important because it defines how bolt.diy Tutorial: Build and Operate an Open Source AI App Builder implements the patterns covered in this chapter.
+The streaming layer in [`app/lib/llm/stream-text.ts`](https://github.com/stackblitz-labs/bolt.diy/blob/HEAD/app/lib/llm/stream-text.ts) handles the actual LLM call and streams tokens back to the client. It wraps the AI SDK's `streamText` function and applies provider-specific configuration.
-### `app/utils/debugLogger.ts`
+This is where the prompt-to-response pipeline executes. For the prompt-to-app workflow, this is the boundary between "what the user asked" and "what the model generates" — the right place to add timeout controls, stream error recovery, or cost accounting.
-The `createDebugSummary` function in [`app/utils/debugLogger.ts`](https://github.com/stackblitz-labs/bolt.diy/blob/HEAD/app/utils/debugLogger.ts) handles a key part of this chapter's functionality:
-
-```ts
-
- // Create a formatted summary
- const summary = createDebugSummary(debugData);
- const fullContent = `${summary}\n\n=== DETAILED DEBUG DATA ===\n\n${JSON.stringify(debugData, null, 2)}`;
-
- const blob = new Blob([fullContent], { type: 'text/plain' });
- const url = URL.createObjectURL(blob);
-
- const link = document.createElement('a');
- link.href = url;
- link.download = filename || `bolt-debug-${new Date().toISOString().split('T')[0]}.txt`;
- document.body.appendChild(link);
- link.click();
- document.body.removeChild(link);
-
- URL.revokeObjectURL(url);
-
- logger.info('Debug log downloaded successfully');
- } catch (error) {
- logger.error('Failed to download debug log:', error);
- }
-}
-
-// Create a human-readable summary of the debug data
-function createDebugSummary(data: DebugLogData): string {
- const summary = [
- '=== BOLT DIY DEBUG LOG SUMMARY ===',
- `Generated: ${new Date(data.timestamp).toLocaleString()}`,
- `Session ID: ${data.sessionId}`,
- '',
- '=== SYSTEM INFORMATION ===',
- `Platform: ${data.systemInfo.platform}`,
-```
-
-This function is important because it defines how bolt.diy Tutorial: Build and Operate an Open Source AI App Builder implements the patterns covered in this chapter.
-
-### `app/utils/debugLogger.ts`
-
-The `captureTerminalLog` function in [`app/utils/debugLogger.ts`](https://github.com/stackblitz-labs/bolt.diy/blob/HEAD/app/utils/debugLogger.ts) handles a key part of this chapter's functionality:
-
-```ts
- }
-
- captureTerminalLog(entry: TerminalEntry): void {
- try {
- // Debounce terminal logs to prevent spam
- if (this._config.debounceTerminal > 0) {
- this._terminalLogQueue.push(entry);
-
- if (this._terminalLogTimer) {
- clearTimeout(this._terminalLogTimer);
- }
-
- this._terminalLogTimer = setTimeout(() => {
- this._flushTerminalLogs();
- }, this._config.debounceTerminal);
- } else {
- this._terminalLogs.push(entry);
- }
- } catch (error) {
- console.error('Debug logger failed to capture terminal log:', error);
- }
- }
-
- private _flushTerminalLogs(): void {
- try {
- while (this._terminalLogQueue.length > 0) {
- const entry = this._terminalLogQueue.shift();
-
- if (entry) {
- this._terminalLogs.push(entry);
- }
- }
-```
+### `app/components/chat/BaseChat.tsx`
-This function is important because it defines how bolt.diy Tutorial: Build and Operate an Open Source AI App Builder implements the patterns covered in this chapter.
+The `BaseChat` component in [`app/components/chat/BaseChat.tsx`](https://github.com/stackblitz-labs/bolt.diy/blob/HEAD/app/components/chat/BaseChat.tsx) is the primary UI container for the prompt input and conversation display. It manages the message list, the input field, and sends requests to `api.chat`.
+For the prompt-to-app workflow, this component defines the user-facing contract: what the user types, how constraints are surfaced, and how the generated output is streamed back into the editor.
## How These Components Connect
```mermaid
flowchart TD
- A[DebugLogger]
- B[downloadDebugLog]
- C[createDebugSummary]
- D[captureTerminalLog]
- E[captureUserAction]
+ A[User types prompt in BaseChat]
+ B[Request sent to api.chat.ts action]
+ C[Provider and model config applied]
+ D[stream-text.ts calls LLM provider]
+ E[Tokens stream back to UI]
+ F[Generated code applied to editor]
A --> B
B --> C
C --> D
D --> E
+ E --> F
```
diff --git a/tutorials/bolt-diy-tutorial/05-files-diff-locking.md b/tutorials/bolt-diy-tutorial/05-files-diff-locking.md
index d35078f8..ab954d32 100644
--- a/tutorials/bolt-diy-tutorial/05-files-diff-locking.md
+++ b/tutorials/bolt-diy-tutorial/05-files-diff-locking.md
@@ -124,173 +124,41 @@ You now have a robust governance model for generated edits:
Next: [Chapter 6: Integrations and MCP](06-integrations-and-mcp.md)
-## Depth Expansion Playbook
-
## Source Code Walkthrough
-### `app/utils/debugLogger.ts`
-
-The `getDebugLogger` function in [`app/utils/debugLogger.ts`](https://github.com/stackblitz-labs/bolt.diy/blob/HEAD/app/utils/debugLogger.ts) handles a key part of this chapter's functionality:
-
-```ts
-}
-
-export function getDebugLogger(): DebugLogger {
- return debugLogger;
-}
-
-// Utility function to enable debug mode on demand
-export function enableDebugMode(): void {
- debugLogger.enableDebugMode();
-}
-
-// Utility function to disable debug mode
-export function disableDebugMode(): void {
- debugLogger.disableDebugMode();
-}
-
-// Utility function to get debug logger status
-export function getDebugStatus(): { initialized: boolean; capturing: boolean; enabled: boolean } {
- return debugLogger.getStatus();
-}
-
-// Utility function to update debug configuration
-export function updateDebugConfig(config: Partial): void {
- debugLogger.updateConfig(config);
-}
-
-// Initialize debug logger when this module is imported
-if (typeof window !== 'undefined') {
- // Defer initialization to avoid blocking
- setTimeout(() => {
- debugLogger.initialize();
- }, 0);
-```
-
-This function is important because it defines how bolt.diy Tutorial: Build and Operate an Open Source AI App Builder implements the patterns covered in this chapter.
-
-### `app/utils/debugLogger.ts`
-
-The `enableDebugMode` function in [`app/utils/debugLogger.ts`](https://github.com/stackblitz-labs/bolt.diy/blob/HEAD/app/utils/debugLogger.ts) handles a key part of this chapter's functionality:
-
-```ts
+### `app/lib/runtime/message-parser.ts`
- // Public method to enable debug logging on demand
- enableDebugMode(): void {
- this._config.enabled = true;
+The message parser in [`app/lib/runtime/message-parser.ts`](https://github.com/stackblitz-labs/bolt.diy/blob/HEAD/app/lib/runtime/message-parser.ts) processes the streamed LLM output and extracts file operations (create, update, delete) from the structured XML-like action tags bolt.diy uses in its system prompt.
- if (!this._isInitialized) {
- this.initialize();
- } else if (!this._isCapturing) {
- this.startCapture();
- }
- }
+This file is the core of the diff/files layer: it converts raw model text into typed `FileAction` objects that the runtime then applies. For the locking and diff governance patterns in this chapter, this is where you intercept generated changes before they reach disk — for example, to check whether an action targets a protected file path.
- // Public method to disable debug logging
- disableDebugMode(): void {
- this.stopCapture();
- }
-
- // Get current status
- getStatus(): { initialized: boolean; capturing: boolean; enabled: boolean } {
- return {
- initialized: this._isInitialized,
- capturing: this._isCapturing,
- enabled: this._config.enabled,
- };
- }
-
- // Update configuration
- updateConfig(newConfig: Partial): void {
- const wasCapturing = this._isCapturing;
-
- if (wasCapturing) {
- this.stopCapture();
-```
+### `app/lib/stores/files.ts`
-This function is important because it defines how bolt.diy Tutorial: Build and Operate an Open Source AI App Builder implements the patterns covered in this chapter.
+The file store in [`app/lib/stores/files.ts`](https://github.com/stackblitz-labs/bolt.diy/blob/HEAD/app/lib/stores/files.ts) is the in-memory representation of the virtual filesystem managed by bolt.diy's WebContainer runtime. It maps file paths to content and tracks dirty/modified state.
-### `app/utils/debugLogger.ts`
+For diff and locking controls, this store is where you read the pre-edit content to construct a diff and where file-lock checks would be applied: if a file path is in the protected list, the store update should be blocked and surfaced to the user for review.
-The `disableDebugMode` function in [`app/utils/debugLogger.ts`](https://github.com/stackblitz-labs/bolt.diy/blob/HEAD/app/utils/debugLogger.ts) handles a key part of this chapter's functionality:
-
-```ts
-
- // Public method to disable debug logging
- disableDebugMode(): void {
- this.stopCapture();
- }
-
- // Get current status
- getStatus(): { initialized: boolean; capturing: boolean; enabled: boolean } {
- return {
- initialized: this._isInitialized,
- capturing: this._isCapturing,
- enabled: this._config.enabled,
- };
- }
-
- // Update configuration
- updateConfig(newConfig: Partial): void {
- const wasCapturing = this._isCapturing;
-
- if (wasCapturing) {
- this.stopCapture();
- }
-
- this._config = { ...this._config, ...newConfig };
-
- // Recreate buffers if maxEntries changed
- if (newConfig.maxEntries && newConfig.maxEntries !== this._config.maxEntries) {
- const oldLogs = this._logs.toArray();
- const oldErrors = this._errors.toArray();
- const oldNetworkRequests = this._networkRequests.toArray();
- const oldUserActions = this._userActions.toArray();
- const oldTerminalLogs = this._terminalLogs.toArray();
-```
-
-This function is important because it defines how bolt.diy Tutorial: Build and Operate an Open Source AI App Builder implements the patterns covered in this chapter.
-
-### `app/utils/debugLogger.ts`
-
-The `getDebugStatus` function in [`app/utils/debugLogger.ts`](https://github.com/stackblitz-labs/bolt.diy/blob/HEAD/app/utils/debugLogger.ts) handles a key part of this chapter's functionality:
-
-```ts
-
-// Utility function to get debug logger status
-export function getDebugStatus(): { initialized: boolean; capturing: boolean; enabled: boolean } {
- return debugLogger.getStatus();
-}
-
-// Utility function to update debug configuration
-export function updateDebugConfig(config: Partial): void {
- debugLogger.updateConfig(config);
-}
-
-// Initialize debug logger when this module is imported
-if (typeof window !== 'undefined') {
- // Defer initialization to avoid blocking
- setTimeout(() => {
- debugLogger.initialize();
- }, 0);
-}
-
-```
+### `app/lib/runtime/action-runner.ts`
-This function is important because it defines how bolt.diy Tutorial: Build and Operate an Open Source AI App Builder implements the patterns covered in this chapter.
+The action runner in [`app/lib/runtime/action-runner.ts`](https://github.com/stackblitz-labs/bolt.diy/blob/HEAD/app/lib/runtime/action-runner.ts) is responsible for executing parsed file and shell actions against the WebContainer. It reads from the message parser output and writes to the file store and terminal.
+This is the last enforcement point before a change lands. Adding a pre-execution check here — comparing the target path against a deny list or requiring explicit approval — is the most direct way to implement the high-risk file protection patterns described in this chapter.
## How These Components Connect
```mermaid
flowchart TD
- A[getDebugLogger]
- B[enableDebugMode]
- C[disableDebugMode]
- D[getDebugStatus]
- E[updateDebugConfig]
+ A[LLM streams action tags]
+ B[message-parser.ts extracts FileActions]
+ C[action-runner.ts queues actions]
+ D{Protected file check}
+ E[files.ts store updated]
+ F[Diff shown to user]
+ G[Change blocked or flagged]
A --> B
B --> C
C --> D
- D --> E
+ D -- allowed --> E
+ E --> F
+ D -- protected --> G
```
diff --git a/tutorials/bolt-diy-tutorial/06-integrations-and-mcp.md b/tutorials/bolt-diy-tutorial/06-integrations-and-mcp.md
index 594c5657..e9d7fede 100644
--- a/tutorials/bolt-diy-tutorial/06-integrations-and-mcp.md
+++ b/tutorials/bolt-diy-tutorial/06-integrations-and-mcp.md
@@ -126,186 +126,41 @@ You now have a practical integration strategy for bolt.diy:
Next: [Chapter 7: Deployment and Distribution](07-deployment-distribution.md)
-## Depth Expansion Playbook
-
## Source Code Walkthrough
-### `app/utils/debugLogger.ts`
-
-The `for` interface in [`app/utils/debugLogger.ts`](https://github.com/stackblitz-labs/bolt.diy/blob/HEAD/app/utils/debugLogger.ts) handles a key part of this chapter's functionality:
-
-```ts
-};
-
-// Configuration interface for debug logger
-export interface DebugLoggerConfig {
- enabled: boolean;
- maxEntries: number;
- captureConsole: boolean;
- captureNetwork: boolean;
- captureErrors: boolean;
- debounceTerminal: number; // ms
-}
-
-// Circular buffer implementation for memory efficiency
-class CircularBuffer {
- private _buffer: (T | undefined)[];
- private _head = 0;
- private _tail = 0;
- private _size = 0;
-
- constructor(private _capacity: number) {
- this._buffer = new Array(_capacity);
- }
-
- push(item: T): void {
- this._buffer[this._tail] = item;
- this._tail = (this._tail + 1) % this._capacity;
-
- if (this._size < this._capacity) {
- this._size++;
- } else {
- this._head = (this._head + 1) % this._capacity;
- }
-```
+### `app/lib/hooks/useMCPServers.ts`
-This interface is important because it defines how bolt.diy Tutorial: Build and Operate an Open Source AI App Builder implements the patterns covered in this chapter.
-
-### `app/utils/debugLogger.ts`
-
-The `DebugLoggerConfig` interface in [`app/utils/debugLogger.ts`](https://github.com/stackblitz-labs/bolt.diy/blob/HEAD/app/utils/debugLogger.ts) handles a key part of this chapter's functionality:
-
-```ts
-
-// Configuration interface for debug logger
-export interface DebugLoggerConfig {
- enabled: boolean;
- maxEntries: number;
- captureConsole: boolean;
- captureNetwork: boolean;
- captureErrors: boolean;
- debounceTerminal: number; // ms
-}
-
-// Circular buffer implementation for memory efficiency
-class CircularBuffer {
- private _buffer: (T | undefined)[];
- private _head = 0;
- private _tail = 0;
- private _size = 0;
-
- constructor(private _capacity: number) {
- this._buffer = new Array(_capacity);
- }
-
- push(item: T): void {
- this._buffer[this._tail] = item;
- this._tail = (this._tail + 1) % this._capacity;
-
- if (this._size < this._capacity) {
- this._size++;
- } else {
- this._head = (this._head + 1) % this._capacity;
- }
- }
-```
+The `useMCPServers` hook in [`app/lib/hooks/useMCPServers.ts`](https://github.com/stackblitz-labs/bolt.diy/blob/HEAD/app/lib/hooks/useMCPServers.ts) manages the lifecycle of connected MCP servers: loading configured server definitions, connecting, and surfacing available tools to the chat layer.
-This interface is important because it defines how bolt.diy Tutorial: Build and Operate an Open Source AI App Builder implements the patterns covered in this chapter.
-
-### `app/utils/debugLogger.ts`
-
-The `DebugLogData` interface in [`app/utils/debugLogger.ts`](https://github.com/stackblitz-labs/bolt.diy/blob/HEAD/app/utils/debugLogger.ts) handles a key part of this chapter's functionality:
-
-```ts
-}
-
-export interface DebugLogData {
- timestamp: string;
- sessionId: string;
- systemInfo: SystemInfo;
- appInfo: AppInfo;
- logs: LogEntry[];
- errors: ErrorEntry[];
- networkRequests: NetworkEntry[];
- performance: PerformanceEntry;
- state: StateEntry;
- userActions: UserActionEntry[];
- terminalLogs: TerminalEntry[];
-}
-
-export interface SystemInfo {
- platform: string;
- userAgent: string;
- screenResolution: string;
- viewportSize: string;
- isMobile: boolean;
- timezone: string;
- language: string;
- cookiesEnabled: boolean;
- localStorageEnabled: boolean;
- sessionStorageEnabled: boolean;
-}
-
-export interface AppInfo {
- version: string;
- buildTime: string;
-```
+This file is the primary entry point for understanding how bolt.diy discovers and registers external tools via MCP. When adding a new integration, you configure a server entry and this hook handles connection and tool enumeration.
-This interface is important because it defines how bolt.diy Tutorial: Build and Operate an Open Source AI App Builder implements the patterns covered in this chapter.
-
-### `app/utils/debugLogger.ts`
-
-The `SystemInfo` interface in [`app/utils/debugLogger.ts`](https://github.com/stackblitz-labs/bolt.diy/blob/HEAD/app/utils/debugLogger.ts) handles a key part of this chapter's functionality:
-
-```ts
- timestamp: string;
- sessionId: string;
- systemInfo: SystemInfo;
- appInfo: AppInfo;
- logs: LogEntry[];
- errors: ErrorEntry[];
- networkRequests: NetworkEntry[];
- performance: PerformanceEntry;
- state: StateEntry;
- userActions: UserActionEntry[];
- terminalLogs: TerminalEntry[];
-}
-
-export interface SystemInfo {
- platform: string;
- userAgent: string;
- screenResolution: string;
- viewportSize: string;
- isMobile: boolean;
- timezone: string;
- language: string;
- cookiesEnabled: boolean;
- localStorageEnabled: boolean;
- sessionStorageEnabled: boolean;
-}
-
-export interface AppInfo {
- version: string;
- buildTime: string;
- currentModel: string;
- currentProvider: string;
- projectType: string;
-```
+### `app/routes/api.mcp.ts`
+
+The MCP API route in [`app/routes/api.mcp.ts`](https://github.com/stackblitz-labs/bolt.diy/blob/HEAD/app/routes/api.mcp.ts) is the server-side handler for MCP operations. It proxies tool calls from the bolt.diy runtime to external MCP server processes, handling serialization and error propagation.
+
+For integration governance, this route is the enforcement boundary: MCP tool calls pass through here, making it the right place to add logging, rate limiting, or approval gates before an external system is mutated.
+
+### `app/lib/stores/mcp.ts`
-This interface is important because it defines how bolt.diy Tutorial: Build and Operate an Open Source AI App Builder implements the patterns covered in this chapter.
+The MCP store in [`app/lib/stores/mcp.ts`](https://github.com/stackblitz-labs/bolt.diy/blob/HEAD/app/lib/stores/mcp.ts) holds the in-memory state of MCP connections: which servers are registered, their connection status, and the tool manifests they expose.
+For operational visibility — a requirement called out in this chapter's integration readiness checklist — this store is where you read current tool availability and emit connection health metrics.
## How These Components Connect
```mermaid
flowchart TD
- A[for]
- B[DebugLoggerConfig]
- C[DebugLogData]
- D[SystemInfo]
- E[AppInfo]
+ A[MCP server config defined]
+ B[useMCPServers hook connects to server]
+ C[mcp.ts store holds tool manifests]
+ D[Model selects MCP tool in response]
+ E[api.mcp.ts proxies tool call]
+ F[External service executes action]
+ G[Result returned to model context]
A --> B
B --> C
C --> D
D --> E
+ E --> F
+ F --> G
```
diff --git a/tutorials/bolt-diy-tutorial/07-deployment-distribution.md b/tutorials/bolt-diy-tutorial/07-deployment-distribution.md
index 1b22ac3c..d5e91303 100644
--- a/tutorials/bolt-diy-tutorial/07-deployment-distribution.md
+++ b/tutorials/bolt-diy-tutorial/07-deployment-distribution.md
@@ -116,186 +116,38 @@ You now have a deployment framework that aligns target choice with:
Next: [Chapter 8: Production Operations](08-production-operations.md)
-## Depth Expansion Playbook
-
## Source Code Walkthrough
-### `app/utils/debugLogger.ts`
-
-The `LogEntry` interface in [`app/utils/debugLogger.ts`](https://github.com/stackblitz-labs/bolt.diy/blob/HEAD/app/utils/debugLogger.ts) handles a key part of this chapter's functionality:
-
-```ts
- systemInfo: SystemInfo;
- appInfo: AppInfo;
- logs: LogEntry[];
- errors: ErrorEntry[];
- networkRequests: NetworkEntry[];
- performance: PerformanceEntry;
- state: StateEntry;
- userActions: UserActionEntry[];
- terminalLogs: TerminalEntry[];
-}
-
-export interface SystemInfo {
- platform: string;
- userAgent: string;
- screenResolution: string;
- viewportSize: string;
- isMobile: boolean;
- timezone: string;
- language: string;
- cookiesEnabled: boolean;
- localStorageEnabled: boolean;
- sessionStorageEnabled: boolean;
-}
-
-export interface AppInfo {
- version: string;
- buildTime: string;
- currentModel: string;
- currentProvider: string;
- projectType: string;
- workbenchView: string;
- hasActivePreview: boolean;
-```
+### `Dockerfile`
-This interface is important because it defines how bolt.diy Tutorial: Build and Operate an Open Source AI App Builder implements the patterns covered in this chapter.
-
-### `app/utils/debugLogger.ts`
-
-The `ErrorEntry` interface in [`app/utils/debugLogger.ts`](https://github.com/stackblitz-labs/bolt.diy/blob/HEAD/app/utils/debugLogger.ts) handles a key part of this chapter's functionality:
-
-```ts
- appInfo: AppInfo;
- logs: LogEntry[];
- errors: ErrorEntry[];
- networkRequests: NetworkEntry[];
- performance: PerformanceEntry;
- state: StateEntry;
- userActions: UserActionEntry[];
- terminalLogs: TerminalEntry[];
-}
-
-export interface SystemInfo {
- platform: string;
- userAgent: string;
- screenResolution: string;
- viewportSize: string;
- isMobile: boolean;
- timezone: string;
- language: string;
- cookiesEnabled: boolean;
- localStorageEnabled: boolean;
- sessionStorageEnabled: boolean;
-}
-
-export interface AppInfo {
- version: string;
- buildTime: string;
- currentModel: string;
- currentProvider: string;
- projectType: string;
- workbenchView: string;
- hasActivePreview: boolean;
- unsavedFiles: number;
-```
+The [`Dockerfile`](https://github.com/stackblitz-labs/bolt.diy/blob/HEAD/Dockerfile) defines the container build for self-hosted deployment. It captures the Node.js build step, copies the compiled output, and sets the runtime entrypoint. Reviewing this file reveals the assumed environment variables (provider API keys, port settings) that must be supplied via secret manager in production container deployments.
-This interface is important because it defines how bolt.diy Tutorial: Build and Operate an Open Source AI App Builder implements the patterns covered in this chapter.
-
-### `app/utils/debugLogger.ts`
-
-The `NetworkEntry` interface in [`app/utils/debugLogger.ts`](https://github.com/stackblitz-labs/bolt.diy/blob/HEAD/app/utils/debugLogger.ts) handles a key part of this chapter's functionality:
-
-```ts
- logs: LogEntry[];
- errors: ErrorEntry[];
- networkRequests: NetworkEntry[];
- performance: PerformanceEntry;
- state: StateEntry;
- userActions: UserActionEntry[];
- terminalLogs: TerminalEntry[];
-}
-
-export interface SystemInfo {
- platform: string;
- userAgent: string;
- screenResolution: string;
- viewportSize: string;
- isMobile: boolean;
- timezone: string;
- language: string;
- cookiesEnabled: boolean;
- localStorageEnabled: boolean;
- sessionStorageEnabled: boolean;
-}
-
-export interface AppInfo {
- version: string;
- buildTime: string;
- currentModel: string;
- currentProvider: string;
- projectType: string;
- workbenchView: string;
- hasActivePreview: boolean;
- unsavedFiles: number;
- workbenchState?: {
-```
+### `package.json` (build and deploy scripts)
-This interface is important because it defines how bolt.diy Tutorial: Build and Operate an Open Source AI App Builder implements the patterns covered in this chapter.
-
-### `app/utils/debugLogger.ts`
-
-The `PerformanceEntry` interface in [`app/utils/debugLogger.ts`](https://github.com/stackblitz-labs/bolt.diy/blob/HEAD/app/utils/debugLogger.ts) handles a key part of this chapter's functionality:
-
-```ts
- errors: ErrorEntry[];
- networkRequests: NetworkEntry[];
- performance: PerformanceEntry;
- state: StateEntry;
- userActions: UserActionEntry[];
- terminalLogs: TerminalEntry[];
-}
-
-export interface SystemInfo {
- platform: string;
- userAgent: string;
- screenResolution: string;
- viewportSize: string;
- isMobile: boolean;
- timezone: string;
- language: string;
- cookiesEnabled: boolean;
- localStorageEnabled: boolean;
- sessionStorageEnabled: boolean;
-}
-
-export interface AppInfo {
- version: string;
- buildTime: string;
- currentModel: string;
- currentProvider: string;
- projectType: string;
- workbenchView: string;
- hasActivePreview: boolean;
- unsavedFiles: number;
- workbenchState?: {
- currentView: string;
-```
+The `scripts` section in [`package.json`](https://github.com/stackblitz-labs/bolt.diy/blob/HEAD/package.json) defines the build targets: `build`, `start`, and the Electron-related `electron:build` script. These scripts are the canonical entry point for CI/CD pipelines — knowing which script corresponds to which deployment target is essential for wiring up automated release pipelines.
+
+The `build` output goes to a `build/` directory that is then served by the production Node server or packaged into the Electron app. For web hosting targets (Vercel, Netlify), the same `build/` directory is what the hosting provider deploys.
-This interface is important because it defines how bolt.diy Tutorial: Build and Operate an Open Source AI App Builder implements the patterns covered in this chapter.
+### `.env.example`
+The [`.env.example`](https://github.com/stackblitz-labs/bolt.diy/blob/HEAD/.env.example) file enumerates every environment variable the application supports. For deployment, this is the authoritative checklist of secrets and configuration values that must be injected — API keys per provider, optional feature flags, and runtime tuning variables. Auditing this file against your secret manager before each deployment prevents missing-config outages.
## How These Components Connect
```mermaid
flowchart TD
- A[LogEntry]
- B[ErrorEntry]
- C[NetworkEntry]
- D[PerformanceEntry]
- E[StateEntry]
+ A[Source code and assets]
+ B[npm run build produces build/ directory]
+ C{Deployment target}
+ D[Web host deploys build/]
+ E[Docker image built from Dockerfile]
+ F[Electron package wraps build/]
+ G[Secrets injected from .env.example checklist]
A --> B
B --> C
C --> D
- D --> E
+ C --> E
+ C --> F
+ G --> D
+ G --> E
```
diff --git a/tutorials/bolt-diy-tutorial/08-production-operations.md b/tutorials/bolt-diy-tutorial/08-production-operations.md
index a26198cc..f498b8a2 100644
--- a/tutorials/bolt-diy-tutorial/08-production-operations.md
+++ b/tutorials/bolt-diy-tutorial/08-production-operations.md
@@ -133,132 +133,130 @@ Related tracks:
- [Roo Code Tutorial](../roo-code-tutorial/)
- [OpenHands Tutorial](../openhands-tutorial/)
-## Depth Expansion Playbook
-
## Source Code Walkthrough
-### `app/utils/debugLogger.ts`
+### `app/routes/api.chat.ts`
-The `UserActionEntry` interface in [`app/utils/debugLogger.ts`](https://github.com/stackblitz-labs/bolt.diy/blob/HEAD/app/utils/debugLogger.ts) handles a key part of this chapter's functionality:
+The `action` function in [`app/routes/api.chat.ts`](https://github.com/stackblitz-labs/bolt.diy/blob/HEAD/app/routes/api.chat.ts) handles a key part of this chapter's functionality:
```ts
- performance: PerformanceEntry;
- state: StateEntry;
- userActions: UserActionEntry[];
- terminalLogs: TerminalEntry[];
+import { StreamRecoveryManager } from '~/lib/.server/llm/stream-recovery';
+
+export async function action(args: ActionFunctionArgs) {
+ return chatAction(args);
}
-export interface SystemInfo {
- platform: string;
- userAgent: string;
- screenResolution: string;
- viewportSize: string;
- isMobile: boolean;
- timezone: string;
- language: string;
- cookiesEnabled: boolean;
- localStorageEnabled: boolean;
- sessionStorageEnabled: boolean;
+const logger = createScopedLogger('api.chat');
+
+function parseCookies(cookieHeader: string): Record {
+ const cookies: Record = {};
+
+ const items = cookieHeader.split(';').map((cookie) => cookie.trim());
+
+ items.forEach((item) => {
+ const [name, ...rest] = item.split('=');
+
+ if (name && rest) {
+ const decodedName = decodeURIComponent(name.trim());
+ const decodedValue = decodeURIComponent(rest.join('=').trim());
+ cookies[decodedName] = decodedValue;
+ }
+ });
+
+ return cookies;
}
-export interface AppInfo {
- version: string;
- buildTime: string;
- currentModel: string;
- currentProvider: string;
- projectType: string;
- workbenchView: string;
- hasActivePreview: boolean;
- unsavedFiles: number;
- workbenchState?: {
- currentView: string;
- showWorkbench: boolean;
- showTerminal: boolean;
+async function chatAction({ context, request }: ActionFunctionArgs) {
+ const streamRecovery = new StreamRecoveryManager({
+ timeout: 45000,
+ maxRetries: 2,
+ onTimeout: () => {
+ logger.warn('Stream timeout - attempting recovery');
```
-This interface is important because it defines how bolt.diy Tutorial: Build and Operate an Open Source AI App Builder implements the patterns covered in this chapter.
+This function is important because it defines how bolt.diy Tutorial: Build and Operate an Open Source AI App Builder implements the patterns covered in this chapter.
-### `app/utils/debugLogger.ts`
+### `app/routes/api.chat.ts`
-The `TerminalEntry` interface in [`app/utils/debugLogger.ts`](https://github.com/stackblitz-labs/bolt.diy/blob/HEAD/app/utils/debugLogger.ts) handles a key part of this chapter's functionality:
+The `parseCookies` function in [`app/routes/api.chat.ts`](https://github.com/stackblitz-labs/bolt.diy/blob/HEAD/app/routes/api.chat.ts) handles a key part of this chapter's functionality:
```ts
- state: StateEntry;
- userActions: UserActionEntry[];
- terminalLogs: TerminalEntry[];
-}
+const logger = createScopedLogger('api.chat');
+
+function parseCookies(cookieHeader: string): Record {
+ const cookies: Record = {};
-export interface SystemInfo {
- platform: string;
- userAgent: string;
- screenResolution: string;
- viewportSize: string;
- isMobile: boolean;
- timezone: string;
- language: string;
- cookiesEnabled: boolean;
- localStorageEnabled: boolean;
- sessionStorageEnabled: boolean;
+ const items = cookieHeader.split(';').map((cookie) => cookie.trim());
+
+ items.forEach((item) => {
+ const [name, ...rest] = item.split('=');
+
+ if (name && rest) {
+ const decodedName = decodeURIComponent(name.trim());
+ const decodedValue = decodeURIComponent(rest.join('=').trim());
+ cookies[decodedName] = decodedValue;
+ }
+ });
+
+ return cookies;
}
-export interface AppInfo {
- version: string;
- buildTime: string;
- currentModel: string;
- currentProvider: string;
- projectType: string;
- workbenchView: string;
- hasActivePreview: boolean;
- unsavedFiles: number;
- workbenchState?: {
- currentView: string;
- showWorkbench: boolean;
- showTerminal: boolean;
- artifactsCount: number;
+async function chatAction({ context, request }: ActionFunctionArgs) {
+ const streamRecovery = new StreamRecoveryManager({
+ timeout: 45000,
+ maxRetries: 2,
+ onTimeout: () => {
+ logger.warn('Stream timeout - attempting recovery');
+ },
+ });
+
+ const { messages, files, promptId, contextOptimization, supabase, chatMode, designScheme, maxLLMSteps } =
+ await request.json<{
+ messages: Messages;
```
-This interface is important because it defines how bolt.diy Tutorial: Build and Operate an Open Source AI App Builder implements the patterns covered in this chapter.
+This function is important because it defines how bolt.diy Tutorial: Build and Operate an Open Source AI App Builder implements the patterns covered in this chapter.
-### `app/utils/debugLogger.ts`
+### `app/routes/api.chat.ts`
-The `const` interface in [`app/utils/debugLogger.ts`](https://github.com/stackblitz-labs/bolt.diy/blob/HEAD/app/utils/debugLogger.ts) handles a key part of this chapter's functionality:
+The `chatAction` function in [`app/routes/api.chat.ts`](https://github.com/stackblitz-labs/bolt.diy/blob/HEAD/app/routes/api.chat.ts) handles a key part of this chapter's functionality:
```ts
-import { isMac, isWindows, isLinux } from './os';
-import { isMobile } from './mobile';
-import { PROVIDER_LIST, DEFAULT_MODEL } from './constants';
-import { logger } from './logger';
-
-// Lazy import to avoid circular dependencies
-let logStore: any = null;
-const getLogStore = () => {
- if (!logStore && typeof window !== 'undefined') {
- try {
- // Import and set the logStore on first access
- import('~/lib/stores/logs')
- .then(({ logStore: store }) => {
- logStore = store;
- })
- .catch(() => {
- // Ignore import errors
- });
- } catch {
- // Ignore errors
+
+export async function action(args: ActionFunctionArgs) {
+ return chatAction(args);
+}
+
+const logger = createScopedLogger('api.chat');
+
+function parseCookies(cookieHeader: string): Record {
+ const cookies: Record = {};
+
+ const items = cookieHeader.split(';').map((cookie) => cookie.trim());
+
+ items.forEach((item) => {
+ const [name, ...rest] = item.split('=');
+
+ if (name && rest) {
+ const decodedName = decodeURIComponent(name.trim());
+ const decodedValue = decodeURIComponent(rest.join('=').trim());
+ cookies[decodedName] = decodedValue;
}
- }
+ });
- return logStore;
-};
+ return cookies;
+}
-// Configuration interface for debug logger
-export interface DebugLoggerConfig {
- enabled: boolean;
- maxEntries: number;
- captureConsole: boolean;
- captureNetwork: boolean;
+async function chatAction({ context, request }: ActionFunctionArgs) {
+ const streamRecovery = new StreamRecoveryManager({
+ timeout: 45000,
+ maxRetries: 2,
+ onTimeout: () => {
+ logger.warn('Stream timeout - attempting recovery');
+ },
```
-This interface is important because it defines how bolt.diy Tutorial: Build and Operate an Open Source AI App Builder implements the patterns covered in this chapter.
+This function is important because it defines how bolt.diy Tutorial: Build and Operate an Open Source AI App Builder implements the patterns covered in this chapter.
### `app/routes/api.vercel-deploy.ts`
@@ -306,9 +304,9 @@ This function is important because it defines how bolt.diy Tutorial: Build and O
```mermaid
flowchart TD
- A[UserActionEntry]
- B[TerminalEntry]
- C[const]
+ A[action]
+ B[parseCookies]
+ C[chatAction]
D[loader]
E[action]
A --> B
diff --git a/tutorials/browser-use-tutorial/01-getting-started.md b/tutorials/browser-use-tutorial/01-getting-started.md
index 78ee708e..cd1e2f3b 100644
--- a/tutorials/browser-use-tutorial/01-getting-started.md
+++ b/tutorials/browser-use-tutorial/01-getting-started.md
@@ -6,6 +6,7 @@ has_children: false
parent: Browser Use Tutorial
---
+
# Chapter 1: Getting Started with Browser Use
Welcome to **Chapter 1: Getting Started with Browser Use**. In this part of **Browser Use Tutorial: AI-Powered Web Automation Agents**, you will build an intuitive mental model first, then move into concrete implementation details and practical production tradeoffs.
@@ -511,151 +512,182 @@ Now that you can run basic browser agents, let's explore **browser control basic
## Depth Expansion Playbook
-
+## Source Code Walkthrough
+
+### `browser_use/config.py`
+
+The `OldConfig` class in [`browser_use/config.py`](https://github.com/browser-use/browser-use/blob/HEAD/browser_use/config.py) handles a key part of this chapter's functionality:
+
+```py
+
+
+class OldConfig:
+ """Original lazy-loading configuration class for environment variables."""
+
+ # Cache for directory creation tracking
+ _dirs_created = False
+
+ @property
+ def BROWSER_USE_LOGGING_LEVEL(self) -> str:
+ return os.getenv('BROWSER_USE_LOGGING_LEVEL', 'info').lower()
+
+ @property
+ def ANONYMIZED_TELEMETRY(self) -> bool:
+ return os.getenv('ANONYMIZED_TELEMETRY', 'true').lower()[:1] in 'ty1'
-This chapter is expanded to v1-style depth for production-grade learning and implementation quality.
+ @property
+ def BROWSER_USE_CLOUD_SYNC(self) -> bool:
+ return os.getenv('BROWSER_USE_CLOUD_SYNC', str(self.ANONYMIZED_TELEMETRY)).lower()[:1] in 'ty1'
-### Strategic Context
+ @property
+ def BROWSER_USE_CLOUD_API_URL(self) -> str:
+ url = os.getenv('BROWSER_USE_CLOUD_API_URL', 'https://api.browser-use.com')
+ assert '://' in url, 'BROWSER_USE_CLOUD_API_URL must be a valid URL'
+ return url
-- tutorial: **Browser Use Tutorial: AI-Powered Web Automation Agents**
-- tutorial slug: **browser-use-tutorial**
-- chapter focus: **Chapter 1: Getting Started with Browser Use**
-- system context: **Browser Use Tutorial**
-- objective: move from surface-level usage to repeatable engineering operation
+ @property
+ def BROWSER_USE_CLOUD_UI_URL(self) -> str:
+ url = os.getenv('BROWSER_USE_CLOUD_UI_URL', '')
+ # Allow empty string as default, only validate if set
+ if url and '://' not in url:
+ raise AssertionError('BROWSER_USE_CLOUD_UI_URL must be a valid URL if set')
+```
-### Architecture Decomposition
+This class is important because it defines how Browser Use Tutorial: AI-Powered Web Automation Agents implements the patterns covered in this chapter.
-1. Define the runtime boundary for `Chapter 1: Getting Started with Browser Use`.
-2. Separate control-plane decisions from data-plane execution.
-3. Capture input contracts, transformation points, and output contracts.
-4. Trace state transitions across request lifecycle stages.
-5. Identify extension hooks and policy interception points.
-6. Map ownership boundaries for team and automation workflows.
-7. Specify rollback and recovery paths for unsafe changes.
-8. Track observability signals for correctness, latency, and cost.
+### `browser_use/config.py`
-### Operator Decision Matrix
+The `for` class in [`browser_use/config.py`](https://github.com/browser-use/browser-use/blob/HEAD/browser_use/config.py) handles a key part of this chapter's functionality:
-| Decision Area | Low-Risk Path | High-Control Path | Tradeoff |
-|:--------------|:--------------|:------------------|:---------|
-| Runtime mode | managed defaults | explicit policy config | speed vs control |
-| State handling | local ephemeral | durable persisted state | simplicity vs auditability |
-| Tool integration | direct API use | mediated adapter layer | velocity vs governance |
-| Rollout method | manual change | staged + canary rollout | effort vs safety |
-| Incident response | best effort logs | runbooks + SLO alerts | cost vs reliability |
+```py
+"""Configuration system for browser-use with automatic migration support."""
-### Failure Modes and Countermeasures
+import json
+import logging
+import os
+from datetime import datetime
+from functools import cache
+from pathlib import Path
+from typing import Any
+from uuid import uuid4
+
+import psutil
+from pydantic import BaseModel, ConfigDict, Field
+from pydantic_settings import BaseSettings, SettingsConfigDict
+
+logger = logging.getLogger(__name__)
+
+
+@cache
+def is_running_in_docker() -> bool:
+ """Detect if we are running in a docker container, for the purpose of optimizing chrome launch flags (dev shm usage, gpu settings, etc.)"""
+ try:
+ if Path('/.dockerenv').exists() or 'docker' in Path('/proc/1/cgroup').read_text().lower():
+ return True
+ except Exception:
+ pass
+
+ try:
+ # if init proc (PID 1) looks like uvicorn/python/uv/etc. then we're in Docker
+ # if init proc (PID 1) looks like bash/systemd/init/etc. then we're probably NOT in Docker
+```
-| Failure Mode | Early Signal | Root Cause Pattern | Countermeasure |
-|:-------------|:-------------|:-------------------|:---------------|
-| stale context | inconsistent outputs | missing refresh window | enforce context TTL and refresh hooks |
-| policy drift | unexpected execution | ad hoc overrides | centralize policy profiles |
-| auth mismatch | 401/403 bursts | credential sprawl | rotation schedule + scope minimization |
-| schema breakage | parser/validation errors | unmanaged upstream changes | contract tests per release |
-| retry storms | queue congestion | no backoff controls | jittered backoff + circuit breakers |
-| silent regressions | quality drop without alerts | weak baseline metrics | eval harness with thresholds |
+This class is important because it defines how Browser Use Tutorial: AI-Powered Web Automation Agents implements the patterns covered in this chapter.
-### Implementation Runbook
+### `browser_use/config.py`
-1. Establish a reproducible baseline environment.
-2. Capture chapter-specific success criteria before changes.
-3. Implement minimal viable path with explicit interfaces.
-4. Add observability before expanding feature scope.
-5. Run deterministic tests for happy-path behavior.
-6. Inject failure scenarios for negative-path validation.
-7. Compare output quality against baseline snapshots.
-8. Promote through staged environments with rollback gates.
-9. Record operational lessons in release notes.
+The `FlatEnvConfig` class in [`browser_use/config.py`](https://github.com/browser-use/browser-use/blob/HEAD/browser_use/config.py) handles a key part of this chapter's functionality:
-### Quality Gate Checklist
+```py
-- [ ] chapter-level assumptions are explicit and testable
-- [ ] API/tool boundaries are documented with input/output examples
-- [ ] failure handling includes retry, timeout, and fallback policy
-- [ ] security controls include auth scopes and secret rotation plans
-- [ ] observability includes logs, metrics, traces, and alert thresholds
-- [ ] deployment guidance includes canary and rollback paths
-- [ ] docs include links to upstream sources and related tracks
-- [ ] post-release verification confirms expected behavior under load
-### Source Alignment
+class FlatEnvConfig(BaseSettings):
+ """All environment variables in a flat namespace."""
-- [Browser Use Repository](https://github.com/browser-use/browser-use)
-- [Browser Use Releases](https://github.com/browser-use/browser-use/releases)
-- [Browser Use Docs](https://docs.browser-use.com/)
-- [Browser Use Cloud](https://cloud.browser-use.com/)
+ model_config = SettingsConfigDict(env_file='.env', env_file_encoding='utf-8', case_sensitive=True, extra='allow')
-### Cross-Tutorial Connection Map
+ # Logging and telemetry
+ BROWSER_USE_LOGGING_LEVEL: str = Field(default='info')
+ CDP_LOGGING_LEVEL: str = Field(default='WARNING')
+ BROWSER_USE_DEBUG_LOG_FILE: str | None = Field(default=None)
+ BROWSER_USE_INFO_LOG_FILE: str | None = Field(default=None)
+ ANONYMIZED_TELEMETRY: bool = Field(default=True)
+ BROWSER_USE_CLOUD_SYNC: bool | None = Field(default=None)
+ BROWSER_USE_CLOUD_API_URL: str = Field(default='https://api.browser-use.com')
+ BROWSER_USE_CLOUD_UI_URL: str = Field(default='')
+ BROWSER_USE_MODEL_PRICING_URL: str = Field(default='')
-- [OpenHands Tutorial](../openhands-tutorial/)
-- [Cline Tutorial](../cline-tutorial/)
-- [Roo Code Tutorial](../roo-code-tutorial/)
-- [Claude Code Tutorial](../claude-code-tutorial/)
-- [Chapter 1: Getting Started](01-getting-started.md)
+ # Path configuration
+ XDG_CACHE_HOME: str = Field(default='~/.cache')
+ XDG_CONFIG_HOME: str = Field(default='~/.config')
+ BROWSER_USE_CONFIG_DIR: str | None = Field(default=None)
-### Advanced Practice Exercises
+ # LLM API keys
+ OPENAI_API_KEY: str = Field(default='')
+ ANTHROPIC_API_KEY: str = Field(default='')
+ GOOGLE_API_KEY: str = Field(default='')
+ DEEPSEEK_API_KEY: str = Field(default='')
+ GROK_API_KEY: str = Field(default='')
+ NOVITA_API_KEY: str = Field(default='')
+ AZURE_OPENAI_ENDPOINT: str = Field(default='')
+ AZURE_OPENAI_KEY: str = Field(default='')
+```
-1. Build a minimal end-to-end implementation for `Chapter 1: Getting Started with Browser Use`.
-2. Add instrumentation and measure baseline latency and error rate.
-3. Introduce one controlled failure and confirm graceful recovery.
-4. Add policy constraints and verify they are enforced consistently.
-5. Run a staged rollout and document rollback decision criteria.
+This class is important because it defines how Browser Use Tutorial: AI-Powered Web Automation Agents implements the patterns covered in this chapter.
-### Review Questions
+### `browser_use/config.py`
-1. Which execution boundary matters most for this chapter and why?
-2. What signal detects regressions earliest in your environment?
-3. What tradeoff did you make between delivery speed and governance?
-4. How would you recover from the highest-impact failure mode?
-5. What must be automated before scaling to team-wide adoption?
+The `DBStyleEntry` class in [`browser_use/config.py`](https://github.com/browser-use/browser-use/blob/HEAD/browser_use/config.py) handles a key part of this chapter's functionality:
-## What Problem Does This Solve?
+```py
-Most teams struggle here because the hard part is not writing more code, but deciding clear boundaries for `result`, `agent`, `print` so behavior stays predictable as complexity grows.
-In practical terms, this chapter helps you avoid three common failures:
+class DBStyleEntry(BaseModel):
+ """Database-style entry with UUID and metadata."""
-- coupling core logic too tightly to one implementation path
-- missing the handoff boundaries between setup, execution, and validation
-- shipping changes without clear rollback or observability strategy
-
-After working through this chapter, you should be able to reason about `Chapter 1: Getting Started with Browser Use` as an operating subsystem inside **Browser Use Tutorial: AI-Powered Web Automation Agents**, with explicit contracts for inputs, state transitions, and outputs.
+ id: str = Field(default_factory=lambda: str(uuid4()))
+ default: bool = Field(default=False)
+ created_at: str = Field(default_factory=lambda: datetime.utcnow().isoformat())
-Use the implementation notes around `Agent`, `browser`, `ChatOpenAI` as your checklist when adapting these patterns to your own repository.
-## How it Works Under the Hood
+class BrowserProfileEntry(DBStyleEntry):
+ """Browser profile configuration entry - accepts any BrowserProfile fields."""
-Under the hood, `Chapter 1: Getting Started with Browser Use` usually follows a repeatable control path:
-
-1. **Context bootstrap**: initialize runtime config and prerequisites for `result`.
-2. **Input normalization**: shape incoming data so `agent` receives stable contracts.
-3. **Core execution**: run the main logic branch and propagate intermediate state through `print`.
-4. **Policy and safety checks**: enforce limits, auth scopes, and failure boundaries.
-5. **Output composition**: return canonical result payloads for downstream consumers.
-6. **Operational telemetry**: emit logs/metrics needed for debugging and performance tuning.
+ model_config = ConfigDict(extra='allow')
-When debugging, walk this sequence in order and confirm each stage has explicit success/failure conditions.
+ # Common browser profile fields for reference
+ headless: bool | None = None
+ user_data_dir: str | None = None
+ allowed_domains: list[str] | None = None
+ downloads_path: str | None = None
-## Source Walkthrough
-Use the following upstream sources to verify implementation details while reading this chapter:
+class LLMEntry(DBStyleEntry):
+ """LLM configuration entry."""
-- [Browser Use Repository](https://github.com/browser-use/browser-use)
- Why it matters: authoritative reference on `Browser Use Repository` (github.com).
-- [Browser Use Releases](https://github.com/browser-use/browser-use/releases)
- Why it matters: authoritative reference on `Browser Use Releases` (github.com).
-- [Browser Use Docs](https://docs.browser-use.com/)
- Why it matters: authoritative reference on `Browser Use Docs` (docs.browser-use.com).
-- [Browser Use Cloud](https://cloud.browser-use.com/)
- Why it matters: authoritative reference on `Browser Use Cloud` (cloud.browser-use.com).
+ api_key: str | None = None
+ model: str | None = None
+ temperature: float | None = None
+ max_tokens: int | None = None
-Suggested trace strategy:
-- search upstream code for `result` and `agent` to map concrete implementation paths
-- compare docs claims against actual runtime/config code before reusing patterns in production
-## Chapter Connections
+class AgentEntry(DBStyleEntry):
+```
-- [Tutorial Index](README.md)
-- [Next Chapter: Chapter 2: Browser Control Basics](02-browser-control.md)
-- [Main Catalog](../../README.md#-tutorial-catalog)
-- [A-Z Tutorial Directory](../../discoverability/tutorial-directory.md)
+This class is important because it defines how Browser Use Tutorial: AI-Powered Web Automation Agents implements the patterns covered in this chapter.
+
+
+## How These Components Connect
+
+```mermaid
+flowchart TD
+ A[OldConfig]
+ B[for]
+ C[FlatEnvConfig]
+ D[DBStyleEntry]
+ E[BrowserProfileEntry]
+ A --> B
+ B --> C
+ C --> D
+ D --> E
+```
diff --git a/tutorials/browser-use-tutorial/02-browser-control.md b/tutorials/browser-use-tutorial/02-browser-control.md
index 25af5f4a..2dc1f825 100644
--- a/tutorials/browser-use-tutorial/02-browser-control.md
+++ b/tutorials/browser-use-tutorial/02-browser-control.md
@@ -613,6 +613,28 @@ if __name__ == "__main__":
asyncio.run(performance_optimization())
```
+## Browser Control Flow
+
+```mermaid
+flowchart TD
+ A[Agent instantiated with LLM]
+ B[Browser launched via Playwright]
+ C[Agent receives task: navigate to URL]
+ D[go_to_url action executed]
+ E[Page DOM and screenshot captured]
+ F[Agent analyzes page state]
+ G[Next action proposed: click or type]
+ H[Action applied to browser]
+ A --> B
+ B --> C
+ C --> D
+ D --> E
+ E --> F
+ F --> G
+ G --> H
+ H --> E
+```
+
## Summary
In this chapter, we've covered:
diff --git a/tutorials/browser-use-tutorial/03-element-selection.md b/tutorials/browser-use-tutorial/03-element-selection.md
index 343f1072..ca972b5f 100644
--- a/tutorials/browser-use-tutorial/03-element-selection.md
+++ b/tutorials/browser-use-tutorial/03-element-selection.md
@@ -566,6 +566,26 @@ if __name__ == "__main__":
asyncio.run(javascript_element_manipulation())
```
+## Element Selection Flow
+
+```mermaid
+flowchart TD
+ A[Page state captured as DOM and screenshot]
+ B{Selection strategy}
+ C[Vision: LLM analyzes screenshot to find element]
+ D[DOM: parse element tree for selectors]
+ E[Element index or selector identified]
+ F[click or input_text action with element reference]
+ G[Playwright executes on identified element]
+ A --> B
+ B -- vision mode --> C
+ B -- dom mode --> D
+ C --> E
+ D --> E
+ E --> F
+ F --> G
+```
+
## Summary
In this chapter, we've covered:
diff --git a/tutorials/browser-use-tutorial/04-form-automation.md b/tutorials/browser-use-tutorial/04-form-automation.md
index 9283933c..c811dcc2 100644
--- a/tutorials/browser-use-tutorial/04-form-automation.md
+++ b/tutorials/browser-use-tutorial/04-form-automation.md
@@ -638,6 +638,31 @@ if __name__ == "__main__":
asyncio.run(compliance_form_automation())
```
+## Form Automation Flow
+
+```mermaid
+flowchart TD
+ A[Agent navigates to form page]
+ B[DOM analysis identifies form fields]
+ C[Field types detected: text select checkbox radio]
+ D[Agent maps input data to fields]
+ E[input_text actions fill text fields]
+ F[select_option actions choose dropdowns]
+ G[click actions select checkboxes]
+ H[Submit button clicked]
+ I[Success or error response validated]
+ A --> B
+ B --> C
+ C --> D
+ D --> E
+ D --> F
+ D --> G
+ E --> H
+ F --> H
+ G --> H
+ H --> I
+```
+
## Summary
In this chapter, we've covered:
diff --git a/tutorials/browser-use-tutorial/05-data-extraction.md b/tutorials/browser-use-tutorial/05-data-extraction.md
index b9386f28..830be913 100644
--- a/tutorials/browser-use-tutorial/05-data-extraction.md
+++ b/tutorials/browser-use-tutorial/05-data-extraction.md
@@ -627,6 +627,28 @@ if __name__ == "__main__":
asyncio.run(api_data_integration())
```
+## Data Extraction Flow
+
+```mermaid
+flowchart TD
+ A[Agent navigates to target page]
+ B[Page content captured as DOM and text]
+ C[Agent identifies data patterns]
+ D[extract_content action with schema]
+ E[LLM parses structured data from page text]
+ F[Data validated against expected schema]
+ G[Extracted data returned as structured output]
+ H[Pagination: navigate to next page and repeat]
+ A --> B
+ B --> C
+ C --> D
+ D --> E
+ E --> F
+ F --> G
+ G --> H
+ H --> B
+```
+
## Summary
In this chapter, we've covered:
diff --git a/tutorials/browser-use-tutorial/06-multi-tab.md b/tutorials/browser-use-tutorial/06-multi-tab.md
index e11ea941..65ebdbc1 100644
--- a/tutorials/browser-use-tutorial/06-multi-tab.md
+++ b/tutorials/browser-use-tutorial/06-multi-tab.md
@@ -560,6 +560,26 @@ if __name__ == "__main__":
asyncio.run(tab_lifecycle_management())
```
+## Multi-Tab Workflow
+
+```mermaid
+flowchart TD
+ A[Agent starts with initial tab]
+ B[open_tab action creates new tab]
+ C[switch_tab action focuses target tab]
+ D[Operations performed in active tab]
+ E[Data from Tab A used in Tab B]
+ F[close_tab when tab no longer needed]
+ G[Final results aggregated across tabs]
+ A --> B
+ B --> C
+ C --> D
+ D --> E
+ E --> C
+ D --> F
+ F --> G
+```
+
## Summary
In this chapter, we've covered:
diff --git a/tutorials/browser-use-tutorial/07-custom-actions.md b/tutorials/browser-use-tutorial/07-custom-actions.md
index 45014cd0..d65b5368 100644
--- a/tutorials/browser-use-tutorial/07-custom-actions.md
+++ b/tutorials/browser-use-tutorial/07-custom-actions.md
@@ -872,6 +872,27 @@ Key takeaways from the research and analysis.
return {"step": step, "success": False, "message": f"Step failed: {str(e)}"}
```
+## Custom Actions Architecture
+
+```mermaid
+flowchart TD
+ A[Define custom action function]
+ B[Decorate with @controller.action]
+ C[Action registered in Controller]
+ D[Controller passed to Agent]
+ E[Agent sees custom action in tool list]
+ F[LLM calls custom action by name]
+ G[Custom function executes with browser context]
+ H[Result returned to agent loop]
+ A --> B
+ B --> C
+ C --> D
+ D --> E
+ E --> F
+ F --> G
+ G --> H
+```
+
## Summary
In this chapter, we've covered:
diff --git a/tutorials/browser-use-tutorial/08-production.md b/tutorials/browser-use-tutorial/08-production.md
index 3c00b0ac..9ec9246d 100644
--- a/tutorials/browser-use-tutorial/08-production.md
+++ b/tutorials/browser-use-tutorial/08-production.md
@@ -970,6 +970,27 @@ curl -f http://localhost:8000/health || echo "Health check failed"
echo "Recovery completed!"
```
+## Production Architecture
+
+```mermaid
+flowchart TD
+ A[Task request received]
+ B[Browser pool allocates instance]
+ C[Agent runs with headless Chromium]
+ D[Action executed with retry on failure]
+ E[Screenshot and logs emitted]
+ F[Result returned to caller]
+ G[Browser instance returned to pool]
+ H[Circuit breaker opens on repeated failures]
+ A --> B
+ B --> C
+ C --> D
+ D --> E
+ E --> F
+ F --> G
+ D -- failure --> H
+```
+
## Summary
In this chapter, we've covered:
diff --git a/tutorials/chatbox-tutorial/01-getting-started.md b/tutorials/chatbox-tutorial/01-getting-started.md
index 38cccc64..ee9039ac 100644
--- a/tutorials/chatbox-tutorial/01-getting-started.md
+++ b/tutorials/chatbox-tutorial/01-getting-started.md
@@ -526,16 +526,24 @@ Under the hood, `Chapter 1: Getting Started with Chatbox` usually follows a repe
When debugging, walk this sequence in order and confirm each stage has explicit success/failure conditions.
-## Source Walkthrough
+## Source Code Walkthrough
-Use the following upstream sources to verify implementation details while reading this chapter:
+### `src/shared/types.ts`
-- [View Repo](https://github.com/Bin-Huang/chatbox)
- Why it matters: authoritative reference on `View Repo` (github.com).
+The `createMessage` factory in [`src/shared/types.ts`](https://github.com/Bin-Huang/chatbox/blob/main/src/shared/types.ts) is the entry point for every chat interaction in Chatbox:
-Suggested trace strategy:
-- search upstream code for `input` and `message` to map concrete implementation paths
-- compare docs claims against actual runtime/config code before reusing patterns in production
+```ts
+export function createMessage(role: MessageRole = MessageRoleEnum.User, content: string = ''): Message {
+ return {
+ id: uuidv4(),
+ contentParts: content ? [{ type: 'text', text: content }] : [],
+ role: role,
+ timestamp: Date.now(),
+ }
+}
+```
+
+The `isChatSession` and `isPictureSession` helpers distinguish between the two session modes — text chat (default) and image generation. The `ExportChatFormat` type (`'Markdown' | 'TXT' | 'HTML'`) controls how conversations can be exported for archival.
## Chapter Connections
diff --git a/tutorials/chatbox-tutorial/02-ui-architecture.md b/tutorials/chatbox-tutorial/02-ui-architecture.md
index 25f3994d..8294a15c 100644
--- a/tutorials/chatbox-tutorial/02-ui-architecture.md
+++ b/tutorials/chatbox-tutorial/02-ui-architecture.md
@@ -12,6 +12,22 @@ Welcome to **Chapter 2: UI Architecture & Components**. In this part of **Chatbo
This chapter explores the user interface architecture and component design patterns used in modern AI chat applications like Chatbox.
+## UI Component Architecture
+
+```mermaid
+graph TD
+ App["Chatbox App"] --> Sidebar["Sidebar\n(ConversationList)"]
+ App --> Main["Main Panel"]
+ Main --> Header["Chat Header\n(title + controls)"]
+ Main --> Messages["Messages Area\n(VirtualizedList)"]
+ Main --> Input["MessageInput\n(textarea + send)"]
+ Messages --> Bubble["MessageBubble\n(user / assistant)"]
+ Bubble --> Content["MessageContent\n(markdown render)"]
+ Bubble --> Actions["MessageActions\n(edit / delete)"]
+ Sidebar --> Search["SearchInput"]
+ Sidebar --> Item["ConversationItem"]
+```
+
## 🎨 UI Architecture Overview
### Component Hierarchy
@@ -741,16 +757,24 @@ Under the hood, `Chapter 2: UI Architecture & Components` usually follows a repe
When debugging, walk this sequence in order and confirm each stage has explicit success/failure conditions.
-## Source Walkthrough
+## Source Code Walkthrough
+
+### `src/shared/types.ts`
-Use the following upstream sources to verify implementation details while reading this chapter:
+The `createMessage` function in [`src/shared/types.ts`](https://github.com/Bin-Huang/chatbox/blob/main/src/shared/types.ts) is the canonical factory for all chat messages in the UI layer:
-- [View Repo](https://github.com/Bin-Huang/chatbox)
- Why it matters: authoritative reference on `View Repo` (github.com).
+```ts
+export function createMessage(role: MessageRole = MessageRoleEnum.User, content: string = ''): Message {
+ return {
+ id: uuidv4(),
+ contentParts: content ? [{ type: 'text', text: content }] : [],
+ role: role,
+ timestamp: Date.now(),
+ }
+}
+```
-Suggested trace strategy:
-- search upstream code for `message` and `className` to map concrete implementation paths
-- compare docs claims against actual runtime/config code before reusing patterns in production
+This shows that every message carries `contentParts` (supporting multi-modal content), a UUID, and a role enum. The `SettingWindowTab` type (`'ai' | 'display' | 'chat' | 'advanced' | 'extension' | 'mcp'`) maps directly to the settings panel tabs visible in the UI.
## Chapter Connections
diff --git a/tutorials/chatbox-tutorial/03-ai-providers.md b/tutorials/chatbox-tutorial/03-ai-providers.md
index 1aa25a25..9fcfd92c 100644
--- a/tutorials/chatbox-tutorial/03-ai-providers.md
+++ b/tutorials/chatbox-tutorial/03-ai-providers.md
@@ -12,6 +12,18 @@ Welcome to **Chapter 3: AI Provider Integration**. In this part of **Chatbox Tut
This chapter covers integrating multiple AI providers and managing different language models in chat applications.
+## Provider Registration Flow
+
+```mermaid
+graph LR
+ Def["defineProvider(input)"] --> Registry["providerRegistry\n(Map)"]
+ Registry --> Get["getProviderDefinition(id)"]
+ Registry --> List["getAllProviders()"]
+ List --> UI["Provider Selection UI"]
+ Get --> Model["createModel(config)"]
+ Model --> API["AI API Call\n(OpenAI / Anthropic / Gemini...)"]
+```
+
## 🤖 AI Provider Architecture
### Provider Management System
@@ -571,16 +583,33 @@ Under the hood, `Chapter 3: AI Provider Integration` usually follows a repeatabl
When debugging, walk this sequence in order and confirm each stage has explicit success/failure conditions.
-## Source Walkthrough
+## Source Code Walkthrough
+
+### `src/shared/providers/registry.ts`
-Use the following upstream sources to verify implementation details while reading this chapter:
+The `defineProvider` / `getProviderDefinition` functions in [`src/shared/providers/registry.ts`](https://github.com/Bin-Huang/chatbox/blob/main/src/shared/providers/registry.ts) form the core of Chatbox's provider system:
-- [View Repo](https://github.com/Bin-Huang/chatbox)
- Why it matters: authoritative reference on `View Repo` (github.com).
+```ts
+const providerRegistry = new Map()
+
+export function defineProvider(definition: ProviderDefinitionInput): ProviderDefinition {
+ if (providerRegistry.has(definition.id)) {
+ console.warn(`Provider "${definition.id}" is already registered. Overwriting.`)
+ }
+ providerRegistry.set(definition.id, definition)
+ return definition
+}
+
+export function getProviderDefinition(id: string): ProviderDefinition | undefined {
+ return providerRegistry.get(id)
+}
+
+export function getAllProviders(): ProviderDefinition[] {
+ return Array.from(providerRegistry.values())
+}
+```
-Suggested trace strategy:
-- search upstream code for `provider` and `model` to map concrete implementation paths
-- compare docs claims against actual runtime/config code before reusing patterns in production
+Each AI backend (OpenAI, Anthropic, Gemini, Ollama, etc.) calls `defineProvider` at import time, registering into this central Map. `src/shared/providers/index.ts` imports all definitions in order, which controls the display order in the UI provider list.
## Chapter Connections
diff --git a/tutorials/chatbox-tutorial/04-conversation-management.md b/tutorials/chatbox-tutorial/04-conversation-management.md
index 8a9e76eb..29d46c3f 100644
--- a/tutorials/chatbox-tutorial/04-conversation-management.md
+++ b/tutorials/chatbox-tutorial/04-conversation-management.md
@@ -12,6 +12,20 @@ Welcome to **Chapter 4: Conversation Management**. In this part of **Chatbox Tut
This chapter covers managing chat conversations, including history, context, and multi-conversation workflows.
+## Conversation Lifecycle
+
+```mermaid
+stateDiagram-v2
+ [*] --> Created: createMessage()
+ Created --> Active: user sends message
+ Active --> Waiting: AI request in-flight
+ Waiting --> Active: AI response received
+ Active --> Threaded: split into thread
+ Active --> Archived: user archives
+ Archived --> Active: restore
+ Active --> [*]: delete
+```
+
## 💬 Conversation Architecture
### Conversation Data Structure
@@ -637,16 +651,21 @@ Under the hood, `Chapter 4: Conversation Management` usually follows a repeatabl
When debugging, walk this sequence in order and confirm each stage has explicit success/failure conditions.
-## Source Walkthrough
+## Source Code Walkthrough
+
+### `src/shared/types/session.ts`
-Use the following upstream sources to verify implementation details while reading this chapter:
+The `Session` and `Message` schemas in [`src/shared/types/session.ts`](https://github.com/Bin-Huang/chatbox/blob/main/src/shared/types/session.ts) define how conversations are persisted. Key fields include `contentParts` (supporting multimodal messages), `TokenCountMap` for tracking per-tokenizer usage, and `SessionThread` for branching conversations:
-- [View Repo](https://github.com/Bin-Huang/chatbox)
- Why it matters: authoritative reference on `View Repo` (github.com).
+```ts
+export const TokenCacheKeySchema = z.enum(['default', 'deepseek', 'default_preview', 'deepseek_preview'])
+export type TokenCacheKey = z.infer
+
+export const TokenCountMapSchema = z.record(z.string(), z.number())
+export type TokenCountMap = z.infer
+```
-Suggested trace strategy:
-- search upstream code for `conversation` and `messages` to map concrete implementation paths
-- compare docs claims against actual runtime/config code before reusing patterns in production
+Each `Message` stores `tokenCountMap` to enable accurate context-window management across different LLM backends. The `isChatSession` / `isPictureSession` helpers in `src/shared/types.ts` distinguish between text chat and image-generation sessions.
## Chapter Connections
diff --git a/tutorials/chatbox-tutorial/05-message-processing.md b/tutorials/chatbox-tutorial/05-message-processing.md
index 7479e67c..dc97df01 100644
--- a/tutorials/chatbox-tutorial/05-message-processing.md
+++ b/tutorials/chatbox-tutorial/05-message-processing.md
@@ -12,6 +12,20 @@ Welcome to **Chapter 5: Message Processing Pipeline**. In this part of **Chatbox
This chapter covers the message processing pipeline, including text processing, formatting, and content enhancement.
+## Message Processing Pipeline
+
+```mermaid
+flowchart LR
+ Raw["Raw User Input"] --> Validate["Input Validation\n(length, type)"]
+ Validate --> Tokenize["Token Count\n(per-model tokenizer)"]
+ Tokenize --> Build["Build contentParts\n(text / image / file)"]
+ Build --> Context["Context Window\nTrimming"]
+ Context --> Send["AI Provider API"]
+ Send --> Stream["Streaming Response"]
+ Stream --> Render["Markdown Render\n+ Syntax Highlight"]
+ Render --> Store["Persist to Storage\n(tokenCountMap updated)"]
+```
+
## 🔄 Message Processing Architecture
### Processing Pipeline
@@ -618,16 +632,26 @@ Under the hood, `Chapter 5: Message Processing Pipeline` usually follows a repea
When debugging, walk this sequence in order and confirm each stage has explicit success/failure conditions.
-## Source Walkthrough
+## Source Code Walkthrough
+
+### `src/shared/types/session.ts`
-Use the following upstream sources to verify implementation details while reading this chapter:
+The `SearchResultItemSchema` in [`src/shared/types/session.ts`](https://github.com/Bin-Huang/chatbox/blob/main/src/shared/types/session.ts) defines the structured output for web-search tool results injected into messages:
-- [View Repo](https://github.com/Bin-Huang/chatbox)
- Why it matters: authoritative reference on `View Repo` (github.com).
+```ts
+export const SearchResultItemSchema = z.object({
+ title: z.string(),
+ link: z.string(),
+ snippet: z.string(),
+ rawContent: z.string().nullable().optional(),
+})
+
+export const SearchResultSchema = z.object({
+ items: z.array(SearchResultItemSchema),
+})
+```
-Suggested trace strategy:
-- search upstream code for `text` and `message` to map concrete implementation paths
-- compare docs claims against actual runtime/config code before reusing patterns in production
+Message `contentParts` use Zod schemas throughout, which means invalid AI responses are caught at parse time before reaching the UI renderer. The `TokenCountMapSchema` (`z.record(z.string(), z.number())`) stores per-tokenizer counts alongside each message for accurate context-window management.
## Chapter Connections
diff --git a/tutorials/chatbox-tutorial/06-theme-system.md b/tutorials/chatbox-tutorial/06-theme-system.md
index 999ade3a..ea045b30 100644
--- a/tutorials/chatbox-tutorial/06-theme-system.md
+++ b/tutorials/chatbox-tutorial/06-theme-system.md
@@ -12,6 +12,19 @@ Welcome to **Chapter 6: Theme & Customization System**. In this part of **Chatbo
This chapter covers building a comprehensive theming system and customization options for chat applications.
+## Theme and Settings Architecture
+
+```mermaid
+graph TD
+ Settings["Settings Schema\n(ProviderSettingsSchema)"] --> Display["Display Settings\n(theme / fontSize)"]
+ Settings --> Chat["Chat Settings\n(maxHistory / autoSave)"]
+ Settings --> Provider["Provider Settings\n(apiKey / model)"]
+ Display --> CSS["CSS Custom Properties\n(--color-*, --font-size-*)"]
+ Display --> LocalStore["localStorage\npreferred-theme"]
+ CSS --> UI["Runtime UI Render"]
+ Chat --> Session["Session Config\n(temperature / maxTokens)"]
+```
+
## 🎨 Theme Architecture
### Theme System Design
@@ -655,16 +668,23 @@ Under the hood, `Chapter 6: Theme & Customization System` usually follows a repe
When debugging, walk this sequence in order and confirm each stage has explicit success/failure conditions.
-## Source Walkthrough
+## Source Code Walkthrough
-Use the following upstream sources to verify implementation details while reading this chapter:
+### `src/shared/types/settings.ts`
-- [View Repo](https://github.com/Bin-Huang/chatbox)
- Why it matters: authoritative reference on `View Repo` (github.com).
+The `ProviderSettingsSchema` in [`src/shared/types/settings.ts`](https://github.com/Bin-Huang/chatbox/blob/main/src/shared/types/settings.ts) shows how Chatbox stores per-provider configuration alongside display preferences:
+
+```ts
+export const ProviderSettingsSchema = z.object({
+ apiKey: z.string().optional().catch(undefined),
+ apiHost: z.string().optional().catch(undefined),
+ apiPath: z.string().optional().catch(undefined),
+ models: z.array(ProviderModelInfoSchema).optional().catch(undefined),
+ excludedModels: z.array(z.string()).optional().catch(undefined),
+})
+```
-Suggested trace strategy:
-- search upstream code for `theme` and `colors` to map concrete implementation paths
-- compare docs claims against actual runtime/config code before reusing patterns in production
+The `DocumentParserType` enum (`'none' | 'local' | 'chatbox-ai' | 'mineru'`) illustrates how Chatbox uses the settings system for feature toggling across platforms — desktop uses `'local'`, mobile defaults to `'none'`, and cloud users can opt into `'chatbox-ai'`.
## Chapter Connections
diff --git a/tutorials/chatbox-tutorial/07-plugin-system.md b/tutorials/chatbox-tutorial/07-plugin-system.md
index b1e18ae2..5df3587f 100644
--- a/tutorials/chatbox-tutorial/07-plugin-system.md
+++ b/tutorials/chatbox-tutorial/07-plugin-system.md
@@ -12,6 +12,19 @@ Welcome to **Chapter 7: Plugin Architecture**. In this part of **Chatbox Tutoria
This chapter covers building an extensible plugin system for chat applications, enabling third-party integrations and custom functionality.
+## MCP and Extension Architecture
+
+```mermaid
+graph TD
+ MCP["MCP Module\nsrc/main/mcp/"] --> Tools["Tool Definitions\n(registered functions)"]
+ MCP --> Conn["Transport\n(stdio / SSE)"]
+ Tools --> ChatEngine["Chat Engine\n(tool_use capability)"]
+ ChatEngine --> Execute["Tool Execution\n(approval flow)"]
+ Execute --> Result["Tool Result\n(injected into context)"]
+ SettingTab["SettingWindowTab\n'extension' | 'mcp'"] --> MCP
+ SettingTab --> Skills["Skills System\n(SkillSettingsSchema)"]
+```
+
## 🔌 Plugin System Architecture
### Plugin Interface
@@ -650,16 +663,17 @@ Under the hood, `Chapter 7: Plugin Architecture` usually follows a repeatable co
When debugging, walk this sequence in order and confirm each stage has explicit success/failure conditions.
-## Source Walkthrough
+## Source Code Walkthrough
-Use the following upstream sources to verify implementation details while reading this chapter:
+### `src/shared/types/settings.ts`
-- [View Repo](https://github.com/Bin-Huang/chatbox)
- Why it matters: authoritative reference on `View Repo` (github.com).
+The `SettingWindowTab` type in [`src/shared/types.ts`](https://github.com/Bin-Huang/chatbox/blob/main/src/shared/types.ts) reveals Chatbox's extension surface — the `'extension'` and `'mcp'` tabs expose the plugin and MCP configuration UIs:
+
+```ts
+export type SettingWindowTab = 'ai' | 'display' | 'chat' | 'advanced' | 'extension' | 'mcp'
+```
-Suggested trace strategy:
-- search upstream code for `plugin` and `pluginName` to map concrete implementation paths
-- compare docs claims against actual runtime/config code before reusing patterns in production
+The `src/main/mcp/` directory implements the MCP host — Chatbox can connect to any MCP-compatible tool server via stdio or SSE transport. `SkillSettingsSchema` in `src/shared/types/skills.ts` handles the configuration persistence layer for each registered tool or skill.
## Chapter Connections
diff --git a/tutorials/chatbox-tutorial/08-production-deployment.md b/tutorials/chatbox-tutorial/08-production-deployment.md
index e941a663..32174b7d 100644
--- a/tutorials/chatbox-tutorial/08-production-deployment.md
+++ b/tutorials/chatbox-tutorial/08-production-deployment.md
@@ -12,6 +12,21 @@ Welcome to **Chapter 8: Production Deployment**. In this part of **Chatbox Tutor
This final chapter covers deploying Chatbox applications to production environments with proper scaling, security, and operational practices.
+## Electron Build and Release Pipeline
+
+```mermaid
+graph LR
+ Source["Source\n(src/)"] --> Build["electron-builder\n+ vite"]
+ Build --> macOS["macOS .dmg\n(arm64 / x64)"]
+ Build --> Windows["Windows .exe\n(installer)"]
+ Build --> Linux["Linux AppImage\n/ deb"]
+ macOS --> Release["GitHub Releases"]
+ Windows --> Release
+ Linux --> Release
+ Release --> AutoUpdate["Auto-Updater\n(app-updater.ts)"]
+ AutoUpdate --> Client["Running Client\n(background check)"]
+```
+
## 🚀 Production Architecture
### Scalable Deployment
@@ -817,16 +832,11 @@ Under the hood, `Chapter 8: Production Deployment` usually follows a repeatable
When debugging, walk this sequence in order and confirm each stage has explicit success/failure conditions.
-## Source Walkthrough
-
-Use the following upstream sources to verify implementation details while reading this chapter:
+## Source Code Walkthrough
-- [View Repo](https://github.com/Bin-Huang/chatbox)
- Why it matters: authoritative reference on `View Repo` (github.com).
+### `src/main/app-updater.ts`
-Suggested trace strategy:
-- search upstream code for `error` and `Promise` to map concrete implementation paths
-- compare docs claims against actual runtime/config code before reusing patterns in production
+The `app-updater.ts` module in [`src/main/app-updater.ts`](https://github.com/Bin-Huang/chatbox/blob/main/src/main/app-updater.ts) implements auto-update logic for the Electron desktop app. It integrates with `electron-builder`'s update mechanism to check GitHub Releases, download updates in the background, and prompt users to restart. The `electron-builder.yml` at the repo root configures multi-platform targets (macOS universal, Windows NSIS, Linux AppImage/deb) and code-signing. Chatbox's `release/` scripts automate the version-bump and publishing workflow.
## Chapter Connections
diff --git a/tutorials/cherry-studio-tutorial/01-getting-started.md b/tutorials/cherry-studio-tutorial/01-getting-started.md
index ae293229..61d2cce9 100644
--- a/tutorials/cherry-studio-tutorial/01-getting-started.md
+++ b/tutorials/cherry-studio-tutorial/01-getting-started.md
@@ -38,170 +38,168 @@ You now have Cherry Studio installed and ready for daily AI workflows.
Next: [Chapter 2: Core Architecture and Product Model](02-core-architecture-and-product-model.md)
-## Depth Expansion Playbook
-
## Source Code Walkthrough
-### `scripts/feishu-notify.ts`
+### `scripts/check-hardcoded-strings.ts`
-The `generateSignature` function in [`scripts/feishu-notify.ts`](https://github.com/CherryHQ/cherry-studio/blob/HEAD/scripts/feishu-notify.ts) handles a key part of this chapter's functionality:
+The `HardcodedStringDetector` class in [`scripts/check-hardcoded-strings.ts`](https://github.com/CherryHQ/cherry-studio/blob/HEAD/scripts/check-hardcoded-strings.ts) handles a key part of this chapter's functionality:
```ts
- * @returns Base64 encoded signature
- */
-function generateSignature(secret: string, timestamp: number): string {
- const stringToSign = `${timestamp}\n${secret}`
- const hmac = crypto.createHmac('sha256', stringToSign)
- return hmac.digest('base64')
}
-/**
- * Send message to Feishu webhook
- * @param webhookUrl - Feishu webhook URL
- * @param secret - Feishu webhook secret
- * @param content - Feishu card message content
- * @returns Resolves when message is sent successfully
- * @throws When Feishu API returns non-2xx status code or network error occurs
- */
-function sendToFeishu(webhookUrl: string, secret: string, content: FeishuCard): Promise {
- return new Promise((resolve, reject) => {
- const timestamp = Math.floor(Date.now() / 1000)
- const sign = generateSignature(secret, timestamp)
-
- const payload: FeishuPayload = {
- timestamp: timestamp.toString(),
- sign,
- msg_type: 'interactive',
- card: content
+class HardcodedStringDetector {
+ private project: Project
+
+ constructor() {
+ this.project = new Project({
+ skipAddingFilesFromTsConfig: true,
+ skipFileDependencyResolution: true
+ })
+ }
+
+ scanFile(filePath: string, source: 'renderer' | 'main'): Finding[] {
+ const findings: Finding[] = []
+
+ try {
+ const sourceFile = this.project.addSourceFileAtPath(filePath)
+ sourceFile.forEachDescendant((node) => {
+ this.checkNode(node, sourceFile, source, findings)
+ })
+ this.project.removeSourceFile(sourceFile)
+ } catch (error) {
+ console.error(`Error parsing ${filePath}:`, error)
}
- const payloadStr = JSON.stringify(payload)
- const url = new URL(webhookUrl)
+ return findings
+ }
- const options: https.RequestOptions = {
+ private checkNode(node: Node, sourceFile: SourceFile, source: 'renderer' | 'main', findings: Finding[]): void {
+ if (shouldSkipNode(node)) return
+
+ if (Node.isJsxText(node)) {
```
-This function is important because it defines how Cherry Studio Tutorial: Multi-Provider AI Desktop Workspace for Teams implements the patterns covered in this chapter.
+This class is important because it defines how Cherry Studio Tutorial: Multi-Provider AI Desktop Workspace for Teams implements the patterns covered in this chapter.
-### `scripts/feishu-notify.ts`
+### `scripts/check-hardcoded-strings.ts`
-The `sendToFeishu` function in [`scripts/feishu-notify.ts`](https://github.com/CherryHQ/cherry-studio/blob/HEAD/scripts/feishu-notify.ts) handles a key part of this chapter's functionality:
+The `hasCJK` function in [`scripts/check-hardcoded-strings.ts`](https://github.com/CherryHQ/cherry-studio/blob/HEAD/scripts/check-hardcoded-strings.ts) handles a key part of this chapter's functionality:
```ts
- * @throws When Feishu API returns non-2xx status code or network error occurs
- */
-function sendToFeishu(webhookUrl: string, secret: string, content: FeishuCard): Promise {
- return new Promise((resolve, reject) => {
- const timestamp = Math.floor(Date.now() / 1000)
- const sign = generateSignature(secret, timestamp)
-
- const payload: FeishuPayload = {
- timestamp: timestamp.toString(),
- sign,
- msg_type: 'interactive',
- card: content
- }
+].join('')
- const payloadStr = JSON.stringify(payload)
- const url = new URL(webhookUrl)
-
- const options: https.RequestOptions = {
- hostname: url.hostname,
- path: url.pathname + url.search,
- method: 'POST',
- headers: {
- 'Content-Type': 'application/json',
- 'Content-Length': Buffer.byteLength(payloadStr)
- }
- }
+function hasCJK(text: string): boolean {
+ return new RegExp(`[${CJK_RANGES}]`).test(text)
+}
+
+function hasEnglishUIText(text: string): boolean {
+ const words = text.trim().split(/\s+/)
+ if (words.length < 2 || words.length > 6) return false
+ return /^[A-Z][a-z]+(\s+[A-Za-z]+){1,5}$/.test(text.trim())
+}
+
+function createFinding(
+ node: Node,
+ sourceFile: SourceFile,
+ type: 'chinese' | 'english',
+ source: 'renderer' | 'main',
+ nodeType: string
+): Finding {
+ return {
+ file: sourceFile.getFilePath(),
+ line: sourceFile.getLineAndColumnAtPos(node.getStart()).line,
+ content: node.getText().slice(0, 100),
+ type,
+ source,
+ nodeType
+ }
+}
+
+function shouldSkipNode(node: Node): boolean {
+ let current: Node | undefined = node
- const req = https.request(options, (res) => {
- let data = ''
- res.on('data', (chunk: Buffer) => {
- data += chunk.toString()
- })
```
This function is important because it defines how Cherry Studio Tutorial: Multi-Provider AI Desktop Workspace for Teams implements the patterns covered in this chapter.
-### `scripts/feishu-notify.ts`
+### `scripts/check-hardcoded-strings.ts`
-The `createIssueCard` function in [`scripts/feishu-notify.ts`](https://github.com/CherryHQ/cherry-studio/blob/HEAD/scripts/feishu-notify.ts) handles a key part of this chapter's functionality:
+The `hasEnglishUIText` function in [`scripts/check-hardcoded-strings.ts`](https://github.com/CherryHQ/cherry-studio/blob/HEAD/scripts/check-hardcoded-strings.ts) handles a key part of this chapter's functionality:
```ts
- * @returns Feishu card content
- */
-function createIssueCard(issueData: IssueData): FeishuCard {
- const { issueUrl, issueNumber, issueTitle, issueSummary, issueAuthor, labels } = issueData
-
- const elements: FeishuCardElement[] = [
- {
- tag: 'div',
- text: {
- tag: 'lark_md',
- content: `**Author:** ${issueAuthor}`
- }
- }
- ]
-
- if (labels.length > 0) {
- elements.push({
- tag: 'div',
- text: {
- tag: 'lark_md',
- content: `**Labels:** ${labels.join(', ')}`
- }
- })
+}
+
+function hasEnglishUIText(text: string): boolean {
+ const words = text.trim().split(/\s+/)
+ if (words.length < 2 || words.length > 6) return false
+ return /^[A-Z][a-z]+(\s+[A-Za-z]+){1,5}$/.test(text.trim())
+}
+
+function createFinding(
+ node: Node,
+ sourceFile: SourceFile,
+ type: 'chinese' | 'english',
+ source: 'renderer' | 'main',
+ nodeType: string
+): Finding {
+ return {
+ file: sourceFile.getFilePath(),
+ line: sourceFile.getLineAndColumnAtPos(node.getStart()).line,
+ content: node.getText().slice(0, 100),
+ type,
+ source,
+ nodeType
}
+}
+
+function shouldSkipNode(node: Node): boolean {
+ let current: Node | undefined = node
+
+ while (current) {
+ const parent = current.getParent()
+ if (!parent) break
- elements.push(
- { tag: 'hr' },
- {
- tag: 'div',
- text: {
- tag: 'lark_md',
- content: `**Summary:**\n${issueSummary}`
```
This function is important because it defines how Cherry Studio Tutorial: Multi-Provider AI Desktop Workspace for Teams implements the patterns covered in this chapter.
-### `scripts/feishu-notify.ts`
+### `scripts/check-hardcoded-strings.ts`
-The `createSimpleCard` function in [`scripts/feishu-notify.ts`](https://github.com/CherryHQ/cherry-studio/blob/HEAD/scripts/feishu-notify.ts) handles a key part of this chapter's functionality:
+The `createFinding` function in [`scripts/check-hardcoded-strings.ts`](https://github.com/CherryHQ/cherry-studio/blob/HEAD/scripts/check-hardcoded-strings.ts) handles a key part of this chapter's functionality:
```ts
- * @returns Feishu card content
- */
-function createSimpleCard(title: string, description: string, color: FeishuHeaderTemplate = 'turquoise'): FeishuCard {
+}
+
+function createFinding(
+ node: Node,
+ sourceFile: SourceFile,
+ type: 'chinese' | 'english',
+ source: 'renderer' | 'main',
+ nodeType: string
+): Finding {
return {
- elements: [
- {
- tag: 'div',
- text: {
- tag: 'lark_md',
- content: description
- }
- }
- ],
- header: {
- template: color,
- title: {
- tag: 'plain_text',
- content: title
- }
- }
+ file: sourceFile.getFilePath(),
+ line: sourceFile.getLineAndColumnAtPos(node.getStart()).line,
+ content: node.getText().slice(0, 100),
+ type,
+ source,
+ nodeType
}
}
-/**
- * Get Feishu credentials from environment variables
- */
-function getCredentials(): { webhookUrl: string; secret: string } {
- const webhookUrl = process.env.FEISHU_WEBHOOK_URL
- const secret = process.env.FEISHU_WEBHOOK_SECRET
+function shouldSkipNode(node: Node): boolean {
+ let current: Node | undefined = node
+
+ while (current) {
+ const parent = current.getParent()
+ if (!parent) break
+
+ if (Node.isImportDeclaration(parent) || Node.isExportDeclaration(parent)) {
+ return true
+ }
- if (!webhookUrl) {
- console.error('Error: FEISHU_WEBHOOK_URL environment variable is required')
+ if (Node.isCallExpression(parent)) {
+ const callText = parent.getExpression().getText()
```
This function is important because it defines how Cherry Studio Tutorial: Multi-Provider AI Desktop Workspace for Teams implements the patterns covered in this chapter.
@@ -211,11 +209,11 @@ This function is important because it defines how Cherry Studio Tutorial: Multi-
```mermaid
flowchart TD
- A[generateSignature]
- B[sendToFeishu]
- C[createIssueCard]
- D[createSimpleCard]
- E[getCredentials]
+ A[HardcodedStringDetector]
+ B[hasCJK]
+ C[hasEnglishUIText]
+ D[createFinding]
+ E[shouldSkipNode]
A --> B
B --> C
C --> D
diff --git a/tutorials/cherry-studio-tutorial/02-core-architecture-and-product-model.md b/tutorials/cherry-studio-tutorial/02-core-architecture-and-product-model.md
index 65d0d3c0..9fa0c39c 100644
--- a/tutorials/cherry-studio-tutorial/02-core-architecture-and-product-model.md
+++ b/tutorials/cherry-studio-tutorial/02-core-architecture-and-product-model.md
@@ -41,170 +41,168 @@ You now have a system-level model for how Cherry Studio organizes AI productivit
Next: [Chapter 3: Provider Configuration and Routing](03-provider-configuration-and-routing.md)
-## Depth Expansion Playbook
-
## Source Code Walkthrough
-### `scripts/feishu-notify.ts`
+### `scripts/check-hardcoded-strings.ts`
-The `IssueOptions` interface in [`scripts/feishu-notify.ts`](https://github.com/CherryHQ/cherry-studio/blob/HEAD/scripts/feishu-notify.ts) handles a key part of this chapter's functionality:
+The `main` function in [`scripts/check-hardcoded-strings.ts`](https://github.com/CherryHQ/cherry-studio/blob/HEAD/scripts/check-hardcoded-strings.ts) handles a key part of this chapter's functionality:
```ts
-/** Issue subcommand options */
-interface IssueOptions {
- url: string
- number: string
- title: string
- summary: string
- author?: string
- labels?: string
-}
-
-/** Send subcommand options */
-interface SendOptions {
- title: string
- description: string
- color?: string
+const RENDERER_DIR = path.join(__dirname, '../src/renderer/src')
+const MAIN_DIR = path.join(__dirname, '../src/main')
+const EXTENSIONS = ['.tsx', '.ts']
+const IGNORED_DIRS = ['__tests__', 'node_modules', 'i18n', 'locales', 'types', 'assets']
+const IGNORED_FILES = ['*.test.ts', '*.test.tsx', '*.d.ts', '*prompts*.ts']
+
+// 'content' is handled specially - only checked for specific components
+const UI_ATTRIBUTES = [
+ 'placeholder',
+ 'title',
+ 'label',
+ 'message',
+ 'description',
+ 'tooltip',
+ 'buttonLabel',
+ 'name',
+ 'detail',
+ 'body'
+]
+
+const CONTEXT_SENSITIVE_ATTRIBUTES: Record = {
+ content: ['Tooltip', 'Popover', 'Modal', 'Popconfirm', 'Alert', 'Notification', 'Message']
}
-/**
- * Generate Feishu webhook signature using HMAC-SHA256
- * @param secret - Feishu webhook secret
- * @param timestamp - Unix timestamp in seconds
- * @returns Base64 encoded signature
- */
-function generateSignature(secret: string, timestamp: number): string {
- const stringToSign = `${timestamp}\n${secret}`
- const hmac = crypto.createHmac('sha256', stringToSign)
- return hmac.digest('base64')
-}
+const UI_PROPERTIES = ['message', 'text', 'title', 'label', 'placeholder', 'description', 'detail']
-/**
- * Send message to Feishu webhook
+interface Finding {
+ file: string
+ line: number
+ content: string
+ type: 'chinese' | 'english'
```
-This interface is important because it defines how Cherry Studio Tutorial: Multi-Provider AI Desktop Workspace for Teams implements the patterns covered in this chapter.
+This function is important because it defines how Cherry Studio Tutorial: Multi-Provider AI Desktop Workspace for Teams implements the patterns covered in this chapter.
-### `scripts/feishu-notify.ts`
+### `scripts/check-hardcoded-strings.ts`
-The `SendOptions` interface in [`scripts/feishu-notify.ts`](https://github.com/CherryHQ/cherry-studio/blob/HEAD/scripts/feishu-notify.ts) handles a key part of this chapter's functionality:
+The `Finding` interface in [`scripts/check-hardcoded-strings.ts`](https://github.com/CherryHQ/cherry-studio/blob/HEAD/scripts/check-hardcoded-strings.ts) handles a key part of this chapter's functionality:
```ts
+const UI_PROPERTIES = ['message', 'text', 'title', 'label', 'placeholder', 'description', 'detail']
+
+interface Finding {
+ file: string
+ line: number
+ content: string
+ type: 'chinese' | 'english'
+ source: 'renderer' | 'main'
+ nodeType: string
+}
+
+const CJK_RANGES = [
+ '\u3000-\u303f', // CJK Symbols and Punctuation
+ '\u3040-\u309f', // Hiragana
+ '\u30a0-\u30ff', // Katakana
+ '\u3100-\u312f', // Bopomofo
+ '\u3400-\u4dbf', // CJK Unified Ideographs Extension A
+ '\u4e00-\u9fff', // CJK Unified Ideographs
+ '\uac00-\ud7af', // Hangul Syllables
+ '\uf900-\ufaff' // CJK Compatibility Ideographs
+].join('')
-/** Send subcommand options */
-interface SendOptions {
- title: string
- description: string
- color?: string
+function hasCJK(text: string): boolean {
+ return new RegExp(`[${CJK_RANGES}]`).test(text)
}
-/**
- * Generate Feishu webhook signature using HMAC-SHA256
- * @param secret - Feishu webhook secret
- * @param timestamp - Unix timestamp in seconds
- * @returns Base64 encoded signature
- */
-function generateSignature(secret: string, timestamp: number): string {
- const stringToSign = `${timestamp}\n${secret}`
- const hmac = crypto.createHmac('sha256', stringToSign)
- return hmac.digest('base64')
+function hasEnglishUIText(text: string): boolean {
+ const words = text.trim().split(/\s+/)
+ if (words.length < 2 || words.length > 6) return false
+ return /^[A-Z][a-z]+(\s+[A-Za-z]+){1,5}$/.test(text.trim())
}
-/**
- * Send message to Feishu webhook
- * @param webhookUrl - Feishu webhook URL
- * @param secret - Feishu webhook secret
- * @param content - Feishu card message content
- * @returns Resolves when message is sent successfully
- * @throws When Feishu API returns non-2xx status code or network error occurs
- */
-function sendToFeishu(webhookUrl: string, secret: string, content: FeishuCard): Promise {
- return new Promise((resolve, reject) => {
- const timestamp = Math.floor(Date.now() / 1000)
- const sign = generateSignature(secret, timestamp)
```
This interface is important because it defines how Cherry Studio Tutorial: Multi-Provider AI Desktop Workspace for Teams implements the patterns covered in this chapter.
-### `scripts/check-hardcoded-strings.ts`
+### `scripts/cloudflare-worker.js`
-The `HardcodedStringDetector` class in [`scripts/check-hardcoded-strings.ts`](https://github.com/CherryHQ/cherry-studio/blob/HEAD/scripts/check-hardcoded-strings.ts) handles a key part of this chapter's functionality:
+The `addLog` function in [`scripts/cloudflare-worker.js`](https://github.com/CherryHQ/cherry-studio/blob/HEAD/scripts/cloudflare-worker.js) handles a key part of this chapter's functionality:
-```ts
-}
+```js
+ * 添加日志记录函数
+ */
+async function addLog(env, type, event, details = null) {
+ try {
+ const logFile = await env.R2_BUCKET.get(config.LOG_FILE)
+ let logs = { logs: [] }
-class HardcodedStringDetector {
- private project: Project
+ if (logFile) {
+ logs = JSON.parse(await logFile.text())
+ }
- constructor() {
- this.project = new Project({
- skipAddingFilesFromTsConfig: true,
- skipFileDependencyResolution: true
+ logs.logs.unshift({
+ timestamp: new Date().toISOString(),
+ type,
+ event,
+ details
})
- }
- scanFile(filePath: string, source: 'renderer' | 'main'): Finding[] {
- const findings: Finding[] = []
-
- try {
- const sourceFile = this.project.addSourceFileAtPath(filePath)
- sourceFile.forEachDescendant((node) => {
- this.checkNode(node, sourceFile, source, findings)
- })
- this.project.removeSourceFile(sourceFile)
- } catch (error) {
- console.error(`Error parsing ${filePath}:`, error)
+ // 保持日志数量在限制内
+ if (logs.logs.length > config.MAX_LOGS) {
+ logs.logs = logs.logs.slice(0, config.MAX_LOGS)
}
- return findings
+ await env.R2_BUCKET.put(config.LOG_FILE, JSON.stringify(logs, null, 2))
+ } catch (error) {
+ console.error('写入日志失败:', error)
}
+}
- private checkNode(node: Node, sourceFile: SourceFile, source: 'renderer' | 'main', findings: Finding[]): void {
- if (shouldSkipNode(node)) return
-
- if (Node.isJsxText(node)) {
+/**
+ * 获取最新版本信息
+ */
```
-This class is important because it defines how Cherry Studio Tutorial: Multi-Provider AI Desktop Workspace for Teams implements the patterns covered in this chapter.
-
-### `scripts/check-hardcoded-strings.ts`
-
-The `hasCJK` function in [`scripts/check-hardcoded-strings.ts`](https://github.com/CherryHQ/cherry-studio/blob/HEAD/scripts/check-hardcoded-strings.ts) handles a key part of this chapter's functionality:
-
-```ts
-].join('')
-
-function hasCJK(text: string): boolean {
- return new RegExp(`[${CJK_RANGES}]`).test(text)
-}
-
-function hasEnglishUIText(text: string): boolean {
- const words = text.trim().split(/\s+/)
- if (words.length < 2 || words.length > 6) return false
- return /^[A-Z][a-z]+(\s+[A-Za-z]+){1,5}$/.test(text.trim())
-}
+This function is important because it defines how Cherry Studio Tutorial: Multi-Provider AI Desktop Workspace for Teams implements the patterns covered in this chapter.
-function createFinding(
- node: Node,
- sourceFile: SourceFile,
- type: 'chinese' | 'english',
- source: 'renderer' | 'main',
- nodeType: string
-): Finding {
- return {
- file: sourceFile.getFilePath(),
- line: sourceFile.getLineAndColumnAtPos(node.getStart()).line,
- content: node.getText().slice(0, 100),
- type,
- source,
- nodeType
- }
-}
+### `scripts/cloudflare-worker.js`
-function shouldSkipNode(node: Node): boolean {
- let current: Node | undefined = node
+The `getLatestRelease` function in [`scripts/cloudflare-worker.js`](https://github.com/CherryHQ/cherry-studio/blob/HEAD/scripts/cloudflare-worker.js) handles a key part of this chapter's functionality:
+```js
+ * 获取最新版本信息
+ */
+async function getLatestRelease(env) {
+ try {
+ const cached = await env.R2_BUCKET.get(config.CACHE_KEY)
+ if (!cached) {
+ // 如果缓存不存在,先检查版本数据库
+ const versionDB = await env.R2_BUCKET.get(config.VERSION_DB)
+ if (versionDB) {
+ const versions = JSON.parse(await versionDB.text())
+ if (versions.latestVersion) {
+ // 从版本数据库重建缓存
+ const latestVersion = versions.versions[versions.latestVersion]
+ const cacheData = {
+ version: latestVersion.version,
+ publishedAt: latestVersion.publishedAt,
+ changelog: latestVersion.changelog,
+ downloads: latestVersion.files
+ .filter((file) => file.uploaded)
+ .map((file) => ({
+ name: file.name,
+ url: `https://${config.R2_CUSTOM_DOMAIN}/${file.name}`,
+ size: formatFileSize(file.size)
+ }))
+ }
+ // 更新缓存
+ await env.R2_BUCKET.put(config.CACHE_KEY, JSON.stringify(cacheData))
+ return new Response(JSON.stringify(cacheData), {
+ headers: {
+ 'Content-Type': 'application/json',
+ 'Access-Control-Allow-Origin': '*'
+ }
```
This function is important because it defines how Cherry Studio Tutorial: Multi-Provider AI Desktop Workspace for Teams implements the patterns covered in this chapter.
@@ -214,11 +212,11 @@ This function is important because it defines how Cherry Studio Tutorial: Multi-
```mermaid
flowchart TD
- A[IssueOptions]
- B[SendOptions]
- C[HardcodedStringDetector]
- D[hasCJK]
- E[hasEnglishUIText]
+ A[main]
+ B[Finding]
+ C[addLog]
+ D[getLatestRelease]
+ E[handleDownload]
A --> B
B --> C
C --> D
diff --git a/tutorials/cherry-studio-tutorial/03-provider-configuration-and-routing.md b/tutorials/cherry-studio-tutorial/03-provider-configuration-and-routing.md
index a041cecb..97827774 100644
--- a/tutorials/cherry-studio-tutorial/03-provider-configuration-and-routing.md
+++ b/tutorials/cherry-studio-tutorial/03-provider-configuration-and-routing.md
@@ -45,184 +45,182 @@ You now can configure provider routing in Cherry Studio with better reliability
Next: [Chapter 4: Assistants, Topics, and Workflow Design](04-assistants-topics-and-workflow-design.md)
-## Depth Expansion Playbook
-
## Source Code Walkthrough
-### `scripts/check-hardcoded-strings.ts`
+### `scripts/update-app-upgrade-config.ts`
-The `formatFindings` function in [`scripts/check-hardcoded-strings.ts`](https://github.com/CherryHQ/cherry-studio/blob/HEAD/scripts/check-hardcoded-strings.ts) handles a key part of this chapter's functionality:
+The `main` function in [`scripts/update-app-upgrade-config.ts`](https://github.com/CherryHQ/cherry-studio/blob/HEAD/scripts/update-app-upgrade-config.ts) handles a key part of this chapter's functionality:
```ts
-}
-
-function formatFindings(findings: Finding[]): string {
- if (findings.length === 0) {
- return '✅ No hardcoded strings found!'
+const DEFAULT_SEGMENTS_PATH = path.join(ROOT_DIR, 'config/app-upgrade-segments.json')
+
+async function main() {
+ const options = parseArgs()
+ const releaseTag = resolveTag(options)
+ const normalizedVersion = normalizeVersion(releaseTag)
+ const releaseChannel = detectChannel(normalizedVersion)
+ if (!releaseChannel) {
+ console.warn(`[update-app-upgrade-config] Tag ${normalizedVersion} does not map to beta/rc/latest. Skipping.`)
+ return
}
- const rendererFindings = findings.filter((f) => f.source === 'renderer')
- const mainFindings = findings.filter((f) => f.source === 'main')
- const chineseFindings = findings.filter((f) => f.type === 'chinese')
- const englishFindings = findings.filter((f) => f.type === 'english')
-
- let output = ''
+ // Validate version format matches prerelease status
+ if (options.isPrerelease !== undefined) {
+ const hasPrereleaseSuffix = releaseChannel === 'beta' || releaseChannel === 'rc'
- if (rendererFindings.length > 0) {
- output += '\n📦 Renderer Process:\n'
- output += '-'.repeat(50) + '\n'
-
- const rendererChinese = rendererFindings.filter((f) => f.type === 'chinese')
- const rendererEnglish = rendererFindings.filter((f) => f.type === 'english')
+ if (options.isPrerelease && !hasPrereleaseSuffix) {
+ console.warn(
+ `[update-app-upgrade-config] ⚠️ Release marked as prerelease but version ${normalizedVersion} has no beta/rc suffix. Skipping.`
+ )
+ return
+ }
- if (rendererChinese.length > 0) {
- output += '\n⚠️ Hardcoded Chinese strings:\n'
- rendererChinese.forEach((f) => {
- const relativePath = path.relative(RENDERER_DIR, f.file)
- output += `\n📍 ${relativePath}:${f.line} [${f.nodeType}]\n`
- output += ` ${f.content}\n`
- })
+ if (!options.isPrerelease && hasPrereleaseSuffix) {
+ console.warn(
+ `[update-app-upgrade-config] ⚠️ Release marked as latest but version ${normalizedVersion} has prerelease suffix (${releaseChannel}). Skipping.`
+ )
+ return
}
+ }
- if (rendererEnglish.length > 0) {
- output += '\n⚠️ Potential hardcoded English strings:\n'
+ const [config, segmentFile] = await Promise.all([
```
This function is important because it defines how Cherry Studio Tutorial: Multi-Provider AI Desktop Workspace for Teams implements the patterns covered in this chapter.
-### `scripts/check-hardcoded-strings.ts`
+### `scripts/update-app-upgrade-config.ts`
-The `main` function in [`scripts/check-hardcoded-strings.ts`](https://github.com/CherryHQ/cherry-studio/blob/HEAD/scripts/check-hardcoded-strings.ts) handles a key part of this chapter's functionality:
+The `parseArgs` function in [`scripts/update-app-upgrade-config.ts`](https://github.com/CherryHQ/cherry-studio/blob/HEAD/scripts/update-app-upgrade-config.ts) handles a key part of this chapter's functionality:
```ts
-const RENDERER_DIR = path.join(__dirname, '../src/renderer/src')
-const MAIN_DIR = path.join(__dirname, '../src/main')
-const EXTENSIONS = ['.tsx', '.ts']
-const IGNORED_DIRS = ['__tests__', 'node_modules', 'i18n', 'locales', 'types', 'assets']
-const IGNORED_FILES = ['*.test.ts', '*.test.tsx', '*.d.ts', '*prompts*.ts']
-
-// 'content' is handled specially - only checked for specific components
-const UI_ATTRIBUTES = [
- 'placeholder',
- 'title',
- 'label',
- 'message',
- 'description',
- 'tooltip',
- 'buttonLabel',
- 'name',
- 'detail',
- 'body'
-]
-
-const CONTEXT_SENSITIVE_ATTRIBUTES: Record = {
- content: ['Tooltip', 'Popover', 'Modal', 'Popconfirm', 'Alert', 'Notification', 'Message']
-}
+async function main() {
+ const options = parseArgs()
+ const releaseTag = resolveTag(options)
+ const normalizedVersion = normalizeVersion(releaseTag)
+ const releaseChannel = detectChannel(normalizedVersion)
+ if (!releaseChannel) {
+ console.warn(`[update-app-upgrade-config] Tag ${normalizedVersion} does not map to beta/rc/latest. Skipping.`)
+ return
+ }
+
+ // Validate version format matches prerelease status
+ if (options.isPrerelease !== undefined) {
+ const hasPrereleaseSuffix = releaseChannel === 'beta' || releaseChannel === 'rc'
-const UI_PROPERTIES = ['message', 'text', 'title', 'label', 'placeholder', 'description', 'detail']
+ if (options.isPrerelease && !hasPrereleaseSuffix) {
+ console.warn(
+ `[update-app-upgrade-config] ⚠️ Release marked as prerelease but version ${normalizedVersion} has no beta/rc suffix. Skipping.`
+ )
+ return
+ }
+
+ if (!options.isPrerelease && hasPrereleaseSuffix) {
+ console.warn(
+ `[update-app-upgrade-config] ⚠️ Release marked as latest but version ${normalizedVersion} has prerelease suffix (${releaseChannel}). Skipping.`
+ )
+ return
+ }
+ }
-interface Finding {
- file: string
- line: number
- content: string
- type: 'chinese' | 'english'
+ const [config, segmentFile] = await Promise.all([
+ readJson(options.configPath ?? DEFAULT_CONFIG_PATH),
```
This function is important because it defines how Cherry Studio Tutorial: Multi-Provider AI Desktop Workspace for Teams implements the patterns covered in this chapter.
-### `scripts/check-hardcoded-strings.ts`
+### `scripts/update-app-upgrade-config.ts`
-The `Finding` interface in [`scripts/check-hardcoded-strings.ts`](https://github.com/CherryHQ/cherry-studio/blob/HEAD/scripts/check-hardcoded-strings.ts) handles a key part of this chapter's functionality:
+The `printHelp` function in [`scripts/update-app-upgrade-config.ts`](https://github.com/CherryHQ/cherry-studio/blob/HEAD/scripts/update-app-upgrade-config.ts) handles a key part of this chapter's functionality:
```ts
-const UI_PROPERTIES = ['message', 'text', 'title', 'label', 'placeholder', 'description', 'detail']
-
-interface Finding {
- file: string
- line: number
- content: string
- type: 'chinese' | 'english'
- source: 'renderer' | 'main'
- nodeType: string
-}
+ i += 1
+ } else if (arg === '--help') {
+ printHelp()
+ process.exit(0)
+ } else {
+ console.warn(`Ignoring unknown argument "${arg}"`)
+ }
+ }
-const CJK_RANGES = [
- '\u3000-\u303f', // CJK Symbols and Punctuation
- '\u3040-\u309f', // Hiragana
- '\u30a0-\u30ff', // Katakana
- '\u3100-\u312f', // Bopomofo
- '\u3400-\u4dbf', // CJK Unified Ideographs Extension A
- '\u4e00-\u9fff', // CJK Unified Ideographs
- '\uac00-\ud7af', // Hangul Syllables
- '\uf900-\ufaff' // CJK Compatibility Ideographs
-].join('')
-
-function hasCJK(text: string): boolean {
- return new RegExp(`[${CJK_RANGES}]`).test(text)
+ if (options.skipReleaseChecks && !options.dryRun) {
+ throw new Error('--skip-release-checks can only be used together with --dry-run')
+ }
+
+ return options
}
-function hasEnglishUIText(text: string): boolean {
- const words = text.trim().split(/\s+/)
- if (words.length < 2 || words.length > 6) return false
- return /^[A-Z][a-z]+(\s+[A-Za-z]+){1,5}$/.test(text.trim())
+function printHelp() {
+ console.log(`Usage: tsx scripts/update-app-upgrade-config.ts [options]
+
+Options:
+ --tag Release tag (e.g. v2.1.6). Falls back to GITHUB_REF_NAME/RELEASE_TAG.
+ --config Path to app-upgrade-config.json.
+ --segments Path to app-upgrade-segments.json.
+ --is-prerelease Whether this is a prerelease (validates version format).
+ --dry-run Print the result without writing to disk.
+ --skip-release-checks Skip release page availability checks (only valid with --dry-run).
+ --help Show this help message.`)
}
+function resolveTag(options: CliOptions): string {
+ const envTag = process.env.RELEASE_TAG ?? process.env.GITHUB_REF_NAME ?? process.env.TAG_NAME
+ const tag = options.tag ?? envTag
```
-This interface is important because it defines how Cherry Studio Tutorial: Multi-Provider AI Desktop Workspace for Teams implements the patterns covered in this chapter.
+This function is important because it defines how Cherry Studio Tutorial: Multi-Provider AI Desktop Workspace for Teams implements the patterns covered in this chapter.
-### `scripts/auto-translate-i18n.ts`
+### `scripts/update-app-upgrade-config.ts`
-The `ConcurrencyController` class in [`scripts/auto-translate-i18n.ts`](https://github.com/CherryHQ/cherry-studio/blob/HEAD/scripts/auto-translate-i18n.ts) handles a key part of this chapter's functionality:
+The `resolveTag` function in [`scripts/update-app-upgrade-config.ts`](https://github.com/CherryHQ/cherry-studio/blob/HEAD/scripts/update-app-upgrade-config.ts) handles a key part of this chapter's functionality:
```ts
+async function main() {
+ const options = parseArgs()
+ const releaseTag = resolveTag(options)
+ const normalizedVersion = normalizeVersion(releaseTag)
+ const releaseChannel = detectChannel(normalizedVersion)
+ if (!releaseChannel) {
+ console.warn(`[update-app-upgrade-config] Tag ${normalizedVersion} does not map to beta/rc/latest. Skipping.`)
+ return
+ }
+
+ // Validate version format matches prerelease status
+ if (options.isPrerelease !== undefined) {
+ const hasPrereleaseSuffix = releaseChannel === 'beta' || releaseChannel === 'rc'
+
+ if (options.isPrerelease && !hasPrereleaseSuffix) {
+ console.warn(
+ `[update-app-upgrade-config] ⚠️ Release marked as prerelease but version ${normalizedVersion} has no beta/rc suffix. Skipping.`
+ )
+ return
+ }
-// Concurrency Control with ES6+ features
-class ConcurrencyController {
- private running = 0
- private queue: Array<() => Promise> = []
-
- constructor(private maxConcurrent: number) {}
-
- async add(task: () => Promise): Promise {
- return new Promise((resolve, reject) => {
- const execute = async () => {
- this.running++
- try {
- const result = await task()
- resolve(result)
- } catch (error) {
- reject(error)
- } finally {
- this.running--
- this.processQueue()
- }
- }
-
- if (this.running < this.maxConcurrent) {
- execute()
- } else {
- this.queue.push(execute)
- }
- })
+ if (!options.isPrerelease && hasPrereleaseSuffix) {
+ console.warn(
+ `[update-app-upgrade-config] ⚠️ Release marked as latest but version ${normalizedVersion} has prerelease suffix (${releaseChannel}). Skipping.`
+ )
+ return
+ }
}
- private processQueue() {
+ const [config, segmentFile] = await Promise.all([
+ readJson(options.configPath ?? DEFAULT_CONFIG_PATH),
+ readJson(options.segmentsPath ?? DEFAULT_SEGMENTS_PATH)
```
-This class is important because it defines how Cherry Studio Tutorial: Multi-Provider AI Desktop Workspace for Teams implements the patterns covered in this chapter.
+This function is important because it defines how Cherry Studio Tutorial: Multi-Provider AI Desktop Workspace for Teams implements the patterns covered in this chapter.
## How These Components Connect
```mermaid
flowchart TD
- A[formatFindings]
- B[main]
- C[Finding]
- D[ConcurrencyController]
- E[addLog]
+ A[main]
+ B[parseArgs]
+ C[printHelp]
+ D[resolveTag]
+ E[normalizeVersion]
A --> B
B --> C
C --> D
diff --git a/tutorials/cherry-studio-tutorial/04-assistants-topics-and-workflow-design.md b/tutorials/cherry-studio-tutorial/04-assistants-topics-and-workflow-design.md
index a9bd21e0..5d9ab2b2 100644
--- a/tutorials/cherry-studio-tutorial/04-assistants-topics-and-workflow-design.md
+++ b/tutorials/cherry-studio-tutorial/04-assistants-topics-and-workflow-design.md
@@ -38,170 +38,168 @@ You now have a practical structure for assistant- and topic-driven workflows in
Next: [Chapter 5: Documents, MCP, and Tool Integrations](05-documents-mcp-and-tool-integrations.md)
-## Depth Expansion Playbook
-
## Source Code Walkthrough
-### `scripts/cloudflare-worker.js`
-
-The `listAllFiles` function in [`scripts/cloudflare-worker.js`](https://github.com/CherryHQ/cherry-studio/blob/HEAD/scripts/cloudflare-worker.js) handles a key part of this chapter's functionality:
-
-```js
-
- // 先获取 R2 桶中的所有文件列表
- const allFiles = await listAllFiles(env)
-
- // 获取需要保留的文件名列表
- const keepFiles = new Set()
- for (const keepVersion of keepVersions) {
- const versionFiles = versions.versions[keepVersion].files
- versionFiles.forEach((file) => keepFiles.add(file.name))
- }
-
- // 删除所有旧版本文件
- for (const oldVersion of oldVersions) {
- const oldFiles = versions.versions[oldVersion].files
- for (const file of oldFiles) {
- try {
- if (file.uploaded) {
- await env.R2_BUCKET.delete(file.name)
- await addLog(env, 'INFO', `删除旧文件: ${file.name}`)
- }
- } catch (error) {
- await addLog(env, 'ERROR', `删除旧文件失败: ${file.name}`, error.message)
- }
- }
- delete versions.versions[oldVersion]
- }
-
- // 清理可能遗留的旧文件
- for (const file of allFiles) {
- if (!keepFiles.has(file.name)) {
- try {
- await env.R2_BUCKET.delete(file.name)
+### `scripts/update-app-upgrade-config.ts`
+
+The `getBaseVersion` function in [`scripts/update-app-upgrade-config.ts`](https://github.com/CherryHQ/cherry-studio/blob/HEAD/scripts/update-app-upgrade-config.ts) handles a key part of this chapter's functionality:
+
+```ts
+ }
+
+ const baseVersion = getBaseVersion(releaseInfo.version)
+ return baseVersion ?? releaseInfo.version
+}
+
+function getBaseVersion(version: string): string | null {
+ const parsed = semver.parse(version, { loose: true })
+ if (!parsed) {
+ return null
+ }
+ return `${parsed.major}.${parsed.minor}.${parsed.patch}`
+}
+
+function createEmptyVersionEntry(): VersionEntry {
+ return {
+ minCompatibleVersion: '',
+ description: '',
+ channels: {
+ latest: null,
+ rc: null,
+ beta: null
+ }
+ }
+}
+
+function ensureChannelSlots(
+ channels: Record
+): Record {
+ return CHANNELS.reduce(
+ (acc, channel) => {
+ acc[channel] = channels[channel] ?? null
```
This function is important because it defines how Cherry Studio Tutorial: Multi-Provider AI Desktop Workspace for Teams implements the patterns covered in this chapter.
### `scripts/update-app-upgrade-config.ts`
-The `main` function in [`scripts/update-app-upgrade-config.ts`](https://github.com/CherryHQ/cherry-studio/blob/HEAD/scripts/update-app-upgrade-config.ts) handles a key part of this chapter's functionality:
+The `createEmptyVersionEntry` function in [`scripts/update-app-upgrade-config.ts`](https://github.com/CherryHQ/cherry-studio/blob/HEAD/scripts/update-app-upgrade-config.ts) handles a key part of this chapter's functionality:
```ts
-const DEFAULT_SEGMENTS_PATH = path.join(ROOT_DIR, 'config/app-upgrade-segments.json')
-
-async function main() {
- const options = parseArgs()
- const releaseTag = resolveTag(options)
- const normalizedVersion = normalizeVersion(releaseTag)
- const releaseChannel = detectChannel(normalizedVersion)
- if (!releaseChannel) {
- console.warn(`[update-app-upgrade-config] Tag ${normalizedVersion} does not map to beta/rc/latest. Skipping.`)
- return
- }
-
- // Validate version format matches prerelease status
- if (options.isPrerelease !== undefined) {
- const hasPrereleaseSuffix = releaseChannel === 'beta' || releaseChannel === 'rc'
-
- if (options.isPrerelease && !hasPrereleaseSuffix) {
- console.warn(
- `[update-app-upgrade-config] ⚠️ Release marked as prerelease but version ${normalizedVersion} has no beta/rc suffix. Skipping.`
- )
- return
- }
+ entry = { ...versionsCopy[existingKey], channels: { ...versionsCopy[existingKey].channels } }
+ } else {
+ entry = createEmptyVersionEntry()
+ }
- if (!options.isPrerelease && hasPrereleaseSuffix) {
- console.warn(
- `[update-app-upgrade-config] ⚠️ Release marked as latest but version ${normalizedVersion} has prerelease suffix (${releaseChannel}). Skipping.`
- )
- return
- }
+ entry.channels = ensureChannelSlots(entry.channels)
+
+ const channelUpdated = await applyChannelUpdate(entry, segment, releaseInfo, skipReleaseValidation)
+ if (!channelUpdated) {
+ return { versions, updated: false }
}
- const [config, segmentFile] = await Promise.all([
+ if (shouldRename && existingKey) {
+ delete versionsCopy[existingKey]
+ }
+
+ entry.metadata = {
+ segmentId: segment.id,
+ segmentType: segment.type
+ }
+ entry.minCompatibleVersion = segment.minCompatibleVersion
+ entry.description = segment.description
+
+ versionsCopy[targetKey] = entry
+ return {
+ versions: sortVersionMap(versionsCopy),
+ updated: true
+ }
+}
+
+function findVersionKeyBySegment(versions: Record, segmentId: string): string | null {
+ for (const [key, value] of Object.entries(versions)) {
```
This function is important because it defines how Cherry Studio Tutorial: Multi-Provider AI Desktop Workspace for Teams implements the patterns covered in this chapter.
### `scripts/update-app-upgrade-config.ts`
-The `parseArgs` function in [`scripts/update-app-upgrade-config.ts`](https://github.com/CherryHQ/cherry-studio/blob/HEAD/scripts/update-app-upgrade-config.ts) handles a key part of this chapter's functionality:
+The `ensureChannelSlots` function in [`scripts/update-app-upgrade-config.ts`](https://github.com/CherryHQ/cherry-studio/blob/HEAD/scripts/update-app-upgrade-config.ts) handles a key part of this chapter's functionality:
```ts
+ }
-async function main() {
- const options = parseArgs()
- const releaseTag = resolveTag(options)
- const normalizedVersion = normalizeVersion(releaseTag)
- const releaseChannel = detectChannel(normalizedVersion)
- if (!releaseChannel) {
- console.warn(`[update-app-upgrade-config] Tag ${normalizedVersion} does not map to beta/rc/latest. Skipping.`)
- return
- }
-
- // Validate version format matches prerelease status
- if (options.isPrerelease !== undefined) {
- const hasPrereleaseSuffix = releaseChannel === 'beta' || releaseChannel === 'rc'
-
- if (options.isPrerelease && !hasPrereleaseSuffix) {
- console.warn(
- `[update-app-upgrade-config] ⚠️ Release marked as prerelease but version ${normalizedVersion} has no beta/rc suffix. Skipping.`
- )
- return
- }
+ entry.channels = ensureChannelSlots(entry.channels)
- if (!options.isPrerelease && hasPrereleaseSuffix) {
- console.warn(
- `[update-app-upgrade-config] ⚠️ Release marked as latest but version ${normalizedVersion} has prerelease suffix (${releaseChannel}). Skipping.`
- )
- return
- }
+ const channelUpdated = await applyChannelUpdate(entry, segment, releaseInfo, skipReleaseValidation)
+ if (!channelUpdated) {
+ return { versions, updated: false }
+ }
+
+ if (shouldRename && existingKey) {
+ delete versionsCopy[existingKey]
+ }
+
+ entry.metadata = {
+ segmentId: segment.id,
+ segmentType: segment.type
}
+ entry.minCompatibleVersion = segment.minCompatibleVersion
+ entry.description = segment.description
- const [config, segmentFile] = await Promise.all([
- readJson(options.configPath ?? DEFAULT_CONFIG_PATH),
+ versionsCopy[targetKey] = entry
+ return {
+ versions: sortVersionMap(versionsCopy),
+ updated: true
+ }
+}
+
+function findVersionKeyBySegment(versions: Record, segmentId: string): string | null {
+ for (const [key, value] of Object.entries(versions)) {
+ if (value.metadata?.segmentId === segmentId) {
+ return key
+ }
```
This function is important because it defines how Cherry Studio Tutorial: Multi-Provider AI Desktop Workspace for Teams implements the patterns covered in this chapter.
### `scripts/update-app-upgrade-config.ts`
-The `printHelp` function in [`scripts/update-app-upgrade-config.ts`](https://github.com/CherryHQ/cherry-studio/blob/HEAD/scripts/update-app-upgrade-config.ts) handles a key part of this chapter's functionality:
+The `applyChannelUpdate` function in [`scripts/update-app-upgrade-config.ts`](https://github.com/CherryHQ/cherry-studio/blob/HEAD/scripts/update-app-upgrade-config.ts) handles a key part of this chapter's functionality:
```ts
- i += 1
- } else if (arg === '--help') {
- printHelp()
- process.exit(0)
- } else {
- console.warn(`Ignoring unknown argument "${arg}"`)
- }
+ entry.channels = ensureChannelSlots(entry.channels)
+
+ const channelUpdated = await applyChannelUpdate(entry, segment, releaseInfo, skipReleaseValidation)
+ if (!channelUpdated) {
+ return { versions, updated: false }
}
- if (options.skipReleaseChecks && !options.dryRun) {
- throw new Error('--skip-release-checks can only be used together with --dry-run')
+ if (shouldRename && existingKey) {
+ delete versionsCopy[existingKey]
}
- return options
-}
+ entry.metadata = {
+ segmentId: segment.id,
+ segmentType: segment.type
+ }
+ entry.minCompatibleVersion = segment.minCompatibleVersion
+ entry.description = segment.description
-function printHelp() {
- console.log(`Usage: tsx scripts/update-app-upgrade-config.ts [options]
-
-Options:
- --tag Release tag (e.g. v2.1.6). Falls back to GITHUB_REF_NAME/RELEASE_TAG.
- --config Path to app-upgrade-config.json.
- --segments Path to app-upgrade-segments.json.
- --is-prerelease Whether this is a prerelease (validates version format).
- --dry-run Print the result without writing to disk.
- --skip-release-checks Skip release page availability checks (only valid with --dry-run).
- --help Show this help message.`)
+ versionsCopy[targetKey] = entry
+ return {
+ versions: sortVersionMap(versionsCopy),
+ updated: true
+ }
}
-function resolveTag(options: CliOptions): string {
- const envTag = process.env.RELEASE_TAG ?? process.env.GITHUB_REF_NAME ?? process.env.TAG_NAME
- const tag = options.tag ?? envTag
+function findVersionKeyBySegment(versions: Record, segmentId: string): string | null {
+ for (const [key, value] of Object.entries(versions)) {
+ if (value.metadata?.segmentId === segmentId) {
+ return key
+ }
+ }
+ return null
```
This function is important because it defines how Cherry Studio Tutorial: Multi-Provider AI Desktop Workspace for Teams implements the patterns covered in this chapter.
@@ -211,11 +209,11 @@ This function is important because it defines how Cherry Studio Tutorial: Multi-
```mermaid
flowchart TD
- A[listAllFiles]
- B[main]
- C[parseArgs]
- D[printHelp]
- E[resolveTag]
+ A[getBaseVersion]
+ B[createEmptyVersionEntry]
+ C[ensureChannelSlots]
+ D[applyChannelUpdate]
+ E[buildFeedUrls]
A --> B
B --> C
C --> D
diff --git a/tutorials/cherry-studio-tutorial/05-documents-mcp-and-tool-integrations.md b/tutorials/cherry-studio-tutorial/05-documents-mcp-and-tool-integrations.md
index 6d575e74..368b38aa 100644
--- a/tutorials/cherry-studio-tutorial/05-documents-mcp-and-tool-integrations.md
+++ b/tutorials/cherry-studio-tutorial/05-documents-mcp-and-tool-integrations.md
@@ -39,184 +39,182 @@ You now know how to combine documents and MCP tooling in Cherry Studio workflows
Next: [Chapter 6: Team Adoption and Enterprise Capabilities](06-team-adoption-and-enterprise-capabilities.md)
-## Depth Expansion Playbook
-
## Source Code Walkthrough
### `scripts/update-app-upgrade-config.ts`
-The `getBaseVersion` function in [`scripts/update-app-upgrade-config.ts`](https://github.com/CherryHQ/cherry-studio/blob/HEAD/scripts/update-app-upgrade-config.ts) handles a key part of this chapter's functionality:
+The `SegmentMatchRule` interface in [`scripts/update-app-upgrade-config.ts`](https://github.com/CherryHQ/cherry-studio/blob/HEAD/scripts/update-app-upgrade-config.ts) handles a key part of this chapter's functionality:
```ts
- }
+}
- const baseVersion = getBaseVersion(releaseInfo.version)
- return baseVersion ?? releaseInfo.version
+interface SegmentMatchRule {
+ range?: string
+ exact?: string[]
+ excludeExact?: string[]
}
-function getBaseVersion(version: string): string | null {
- const parsed = semver.parse(version, { loose: true })
- if (!parsed) {
- return null
- }
- return `${parsed.major}.${parsed.minor}.${parsed.patch}`
+interface SegmentDefinition {
+ id: string
+ type: 'legacy' | 'breaking' | 'latest'
+ match: SegmentMatchRule
+ lockedVersion?: string
+ minCompatibleVersion: string
+ description: string
+ channelTemplates?: Partial>
}
-function createEmptyVersionEntry(): VersionEntry {
- return {
- minCompatibleVersion: '',
- description: '',
- channels: {
- latest: null,
- rc: null,
- beta: null
- }
- }
+interface SegmentMetadataFile {
+ segments: SegmentDefinition[]
+}
+
+interface ChannelConfig {
+ version: string
+ feedUrls: Record
+}
+
+interface VersionMetadata {
+ segmentId: string
+ segmentType?: string
}
-function ensureChannelSlots(
- channels: Record
-): Record {
- return CHANNELS.reduce(
- (acc, channel) => {
- acc[channel] = channels[channel] ?? null
```
-This function is important because it defines how Cherry Studio Tutorial: Multi-Provider AI Desktop Workspace for Teams implements the patterns covered in this chapter.
+This interface is important because it defines how Cherry Studio Tutorial: Multi-Provider AI Desktop Workspace for Teams implements the patterns covered in this chapter.
### `scripts/update-app-upgrade-config.ts`
-The `createEmptyVersionEntry` function in [`scripts/update-app-upgrade-config.ts`](https://github.com/CherryHQ/cherry-studio/blob/HEAD/scripts/update-app-upgrade-config.ts) handles a key part of this chapter's functionality:
+The `SegmentDefinition` interface in [`scripts/update-app-upgrade-config.ts`](https://github.com/CherryHQ/cherry-studio/blob/HEAD/scripts/update-app-upgrade-config.ts) handles a key part of this chapter's functionality:
```ts
- entry = { ...versionsCopy[existingKey], channels: { ...versionsCopy[existingKey].channels } }
- } else {
- entry = createEmptyVersionEntry()
- }
-
- entry.channels = ensureChannelSlots(entry.channels)
-
- const channelUpdated = await applyChannelUpdate(entry, segment, releaseInfo, skipReleaseValidation)
- if (!channelUpdated) {
- return { versions, updated: false }
- }
-
- if (shouldRename && existingKey) {
- delete versionsCopy[existingKey]
- }
-
- entry.metadata = {
- segmentId: segment.id,
- segmentType: segment.type
- }
- entry.minCompatibleVersion = segment.minCompatibleVersion
- entry.description = segment.description
-
- versionsCopy[targetKey] = entry
- return {
- versions: sortVersionMap(versionsCopy),
- updated: true
- }
-}
-
-function findVersionKeyBySegment(versions: Record, segmentId: string): string | null {
- for (const [key, value] of Object.entries(versions)) {
+}
+
+interface SegmentDefinition {
+ id: string
+ type: 'legacy' | 'breaking' | 'latest'
+ match: SegmentMatchRule
+ lockedVersion?: string
+ minCompatibleVersion: string
+ description: string
+ channelTemplates?: Partial>
+}
+
+interface SegmentMetadataFile {
+ segments: SegmentDefinition[]
+}
+
+interface ChannelConfig {
+ version: string
+ feedUrls: Record
+}
+
+interface VersionMetadata {
+ segmentId: string
+ segmentType?: string
+}
+
+interface VersionEntry {
+ metadata?: VersionMetadata
+ minCompatibleVersion: string
+ description: string
+ channels: Record
+}
```
-This function is important because it defines how Cherry Studio Tutorial: Multi-Provider AI Desktop Workspace for Teams implements the patterns covered in this chapter.
+This interface is important because it defines how Cherry Studio Tutorial: Multi-Provider AI Desktop Workspace for Teams implements the patterns covered in this chapter.
### `scripts/update-app-upgrade-config.ts`
-The `ensureChannelSlots` function in [`scripts/update-app-upgrade-config.ts`](https://github.com/CherryHQ/cherry-studio/blob/HEAD/scripts/update-app-upgrade-config.ts) handles a key part of this chapter's functionality:
+The `SegmentMetadataFile` interface in [`scripts/update-app-upgrade-config.ts`](https://github.com/CherryHQ/cherry-studio/blob/HEAD/scripts/update-app-upgrade-config.ts) handles a key part of this chapter's functionality:
```ts
- }
-
- entry.channels = ensureChannelSlots(entry.channels)
-
- const channelUpdated = await applyChannelUpdate(entry, segment, releaseInfo, skipReleaseValidation)
- if (!channelUpdated) {
- return { versions, updated: false }
- }
-
- if (shouldRename && existingKey) {
- delete versionsCopy[existingKey]
- }
-
- entry.metadata = {
- segmentId: segment.id,
- segmentType: segment.type
- }
- entry.minCompatibleVersion = segment.minCompatibleVersion
- entry.description = segment.description
-
- versionsCopy[targetKey] = entry
- return {
- versions: sortVersionMap(versionsCopy),
- updated: true
- }
-}
-
-function findVersionKeyBySegment(versions: Record, segmentId: string): string | null {
- for (const [key, value] of Object.entries(versions)) {
- if (value.metadata?.segmentId === segmentId) {
- return key
- }
+}
+
+interface SegmentMetadataFile {
+ segments: SegmentDefinition[]
+}
+
+interface ChannelConfig {
+ version: string
+ feedUrls: Record
+}
+
+interface VersionMetadata {
+ segmentId: string
+ segmentType?: string
+}
+
+interface VersionEntry {
+ metadata?: VersionMetadata
+ minCompatibleVersion: string
+ description: string
+ channels: Record
+}
+
+interface UpgradeConfigFile {
+ lastUpdated: string
+ versions: Record
+}
+
+interface ReleaseInfo {
+ tag: string
+ version: string
+ channel: UpgradeChannel
```
-This function is important because it defines how Cherry Studio Tutorial: Multi-Provider AI Desktop Workspace for Teams implements the patterns covered in this chapter.
+This interface is important because it defines how Cherry Studio Tutorial: Multi-Provider AI Desktop Workspace for Teams implements the patterns covered in this chapter.
### `scripts/update-app-upgrade-config.ts`
-The `applyChannelUpdate` function in [`scripts/update-app-upgrade-config.ts`](https://github.com/CherryHQ/cherry-studio/blob/HEAD/scripts/update-app-upgrade-config.ts) handles a key part of this chapter's functionality:
+The `ChannelConfig` interface in [`scripts/update-app-upgrade-config.ts`](https://github.com/CherryHQ/cherry-studio/blob/HEAD/scripts/update-app-upgrade-config.ts) handles a key part of this chapter's functionality:
```ts
- entry.channels = ensureChannelSlots(entry.channels)
-
- const channelUpdated = await applyChannelUpdate(entry, segment, releaseInfo, skipReleaseValidation)
- if (!channelUpdated) {
- return { versions, updated: false }
- }
-
- if (shouldRename && existingKey) {
- delete versionsCopy[existingKey]
- }
-
- entry.metadata = {
- segmentId: segment.id,
- segmentType: segment.type
- }
- entry.minCompatibleVersion = segment.minCompatibleVersion
- entry.description = segment.description
-
- versionsCopy[targetKey] = entry
- return {
- versions: sortVersionMap(versionsCopy),
- updated: true
- }
-}
-
-function findVersionKeyBySegment(versions: Record, segmentId: string): string | null {
- for (const [key, value] of Object.entries(versions)) {
- if (value.metadata?.segmentId === segmentId) {
- return key
- }
- }
- return null
+}
+
+interface ChannelConfig {
+ version: string
+ feedUrls: Record
+}
+
+interface VersionMetadata {
+ segmentId: string
+ segmentType?: string
+}
+
+interface VersionEntry {
+ metadata?: VersionMetadata
+ minCompatibleVersion: string
+ description: string
+ channels: Record
+}
+
+interface UpgradeConfigFile {
+ lastUpdated: string
+ versions: Record
+}
+
+interface ReleaseInfo {
+ tag: string
+ version: string
+ channel: UpgradeChannel
+}
+
+interface UpdateVersionsResult {
+ versions: Record
```
-This function is important because it defines how Cherry Studio Tutorial: Multi-Provider AI Desktop Workspace for Teams implements the patterns covered in this chapter.
+This interface is important because it defines how Cherry Studio Tutorial: Multi-Provider AI Desktop Workspace for Teams implements the patterns covered in this chapter.
## How These Components Connect
```mermaid
flowchart TD
- A[getBaseVersion]
- B[createEmptyVersionEntry]
- C[ensureChannelSlots]
- D[applyChannelUpdate]
- E[buildFeedUrls]
+ A[SegmentMatchRule]
+ B[SegmentDefinition]
+ C[SegmentMetadataFile]
+ D[ChannelConfig]
+ E[VersionMetadata]
A --> B
B --> C
C --> D
diff --git a/tutorials/cherry-studio-tutorial/06-team-adoption-and-enterprise-capabilities.md b/tutorials/cherry-studio-tutorial/06-team-adoption-and-enterprise-capabilities.md
index 6748038b..398b77c8 100644
--- a/tutorials/cherry-studio-tutorial/06-team-adoption-and-enterprise-capabilities.md
+++ b/tutorials/cherry-studio-tutorial/06-team-adoption-and-enterprise-capabilities.md
@@ -40,184 +40,182 @@ You now have a rollout model for scaling Cherry Studio from individual use to te
Next: [Chapter 7: Development and Contribution Workflow](07-development-and-contribution-workflow.md)
-## Depth Expansion Playbook
-
## Source Code Walkthrough
-### `scripts/update-app-upgrade-config.ts`
+### `scripts/feishu-notify.ts`
-The `SegmentDefinition` interface in [`scripts/update-app-upgrade-config.ts`](https://github.com/CherryHQ/cherry-studio/blob/HEAD/scripts/update-app-upgrade-config.ts) handles a key part of this chapter's functionality:
+The `createIssueCard` function in [`scripts/feishu-notify.ts`](https://github.com/CherryHQ/cherry-studio/blob/HEAD/scripts/feishu-notify.ts) handles a key part of this chapter's functionality:
```ts
-}
-
-interface SegmentDefinition {
- id: string
- type: 'legacy' | 'breaking' | 'latest'
- match: SegmentMatchRule
- lockedVersion?: string
- minCompatibleVersion: string
- description: string
- channelTemplates?: Partial>
-}
-
-interface SegmentMetadataFile {
- segments: SegmentDefinition[]
-}
-
-interface ChannelConfig {
- version: string
- feedUrls: Record
-}
-
-interface VersionMetadata {
- segmentId: string
- segmentType?: string
-}
-
-interface VersionEntry {
- metadata?: VersionMetadata
- minCompatibleVersion: string
- description: string
- channels: Record
-}
+ * @returns Feishu card content
+ */
+function createIssueCard(issueData: IssueData): FeishuCard {
+ const { issueUrl, issueNumber, issueTitle, issueSummary, issueAuthor, labels } = issueData
+
+ const elements: FeishuCardElement[] = [
+ {
+ tag: 'div',
+ text: {
+ tag: 'lark_md',
+ content: `**Author:** ${issueAuthor}`
+ }
+ }
+ ]
+
+ if (labels.length > 0) {
+ elements.push({
+ tag: 'div',
+ text: {
+ tag: 'lark_md',
+ content: `**Labels:** ${labels.join(', ')}`
+ }
+ })
+ }
+
+ elements.push(
+ { tag: 'hr' },
+ {
+ tag: 'div',
+ text: {
+ tag: 'lark_md',
+ content: `**Summary:**\n${issueSummary}`
```
-This interface is important because it defines how Cherry Studio Tutorial: Multi-Provider AI Desktop Workspace for Teams implements the patterns covered in this chapter.
+This function is important because it defines how Cherry Studio Tutorial: Multi-Provider AI Desktop Workspace for Teams implements the patterns covered in this chapter.
-### `scripts/update-app-upgrade-config.ts`
+### `scripts/feishu-notify.ts`
-The `SegmentMetadataFile` interface in [`scripts/update-app-upgrade-config.ts`](https://github.com/CherryHQ/cherry-studio/blob/HEAD/scripts/update-app-upgrade-config.ts) handles a key part of this chapter's functionality:
+The `createSimpleCard` function in [`scripts/feishu-notify.ts`](https://github.com/CherryHQ/cherry-studio/blob/HEAD/scripts/feishu-notify.ts) handles a key part of this chapter's functionality:
```ts
-}
-
-interface SegmentMetadataFile {
- segments: SegmentDefinition[]
-}
-
-interface ChannelConfig {
- version: string
- feedUrls: Record
-}
-
-interface VersionMetadata {
- segmentId: string
- segmentType?: string
-}
-
-interface VersionEntry {
- metadata?: VersionMetadata
- minCompatibleVersion: string
- description: string
- channels: Record
-}
-
-interface UpgradeConfigFile {
- lastUpdated: string
- versions: Record
-}
-
-interface ReleaseInfo {
- tag: string
- version: string
- channel: UpgradeChannel
+ * @returns Feishu card content
+ */
+function createSimpleCard(title: string, description: string, color: FeishuHeaderTemplate = 'turquoise'): FeishuCard {
+ return {
+ elements: [
+ {
+ tag: 'div',
+ text: {
+ tag: 'lark_md',
+ content: description
+ }
+ }
+ ],
+ header: {
+ template: color,
+ title: {
+ tag: 'plain_text',
+ content: title
+ }
+ }
+ }
+}
+
+/**
+ * Get Feishu credentials from environment variables
+ */
+function getCredentials(): { webhookUrl: string; secret: string } {
+ const webhookUrl = process.env.FEISHU_WEBHOOK_URL
+ const secret = process.env.FEISHU_WEBHOOK_SECRET
+
+ if (!webhookUrl) {
+ console.error('Error: FEISHU_WEBHOOK_URL environment variable is required')
```
-This interface is important because it defines how Cherry Studio Tutorial: Multi-Provider AI Desktop Workspace for Teams implements the patterns covered in this chapter.
+This function is important because it defines how Cherry Studio Tutorial: Multi-Provider AI Desktop Workspace for Teams implements the patterns covered in this chapter.
-### `scripts/update-app-upgrade-config.ts`
+### `scripts/feishu-notify.ts`
-The `ChannelConfig` interface in [`scripts/update-app-upgrade-config.ts`](https://github.com/CherryHQ/cherry-studio/blob/HEAD/scripts/update-app-upgrade-config.ts) handles a key part of this chapter's functionality:
+The `getCredentials` function in [`scripts/feishu-notify.ts`](https://github.com/CherryHQ/cherry-studio/blob/HEAD/scripts/feishu-notify.ts) handles a key part of this chapter's functionality:
```ts
-}
-
-interface ChannelConfig {
- version: string
- feedUrls: Record
-}
+ * Get Feishu credentials from environment variables
+ */
+function getCredentials(): { webhookUrl: string; secret: string } {
+ const webhookUrl = process.env.FEISHU_WEBHOOK_URL
+ const secret = process.env.FEISHU_WEBHOOK_SECRET
+
+ if (!webhookUrl) {
+ console.error('Error: FEISHU_WEBHOOK_URL environment variable is required')
+ process.exit(1)
+ }
+ if (!secret) {
+ console.error('Error: FEISHU_WEBHOOK_SECRET environment variable is required')
+ process.exit(1)
+ }
+
+ return { webhookUrl, secret }
+}
+
+/**
+ * Handle send subcommand
+ */
+async function handleSendCommand(options: SendOptions): Promise {
+ const { webhookUrl, secret } = getCredentials()
+
+ const { title, description, color = 'turquoise' } = options
+
+ // Validate color parameter
+ const colorValidation = FeishuHeaderTemplateSchema.safeParse(color)
+ if (!colorValidation.success) {
+ console.error(`Error: Invalid color "${color}". Valid colors: ${FeishuHeaderTemplateSchema.options.join(', ')}`)
+ process.exit(1)
+ }
+```
-interface VersionMetadata {
- segmentId: string
- segmentType?: string
-}
+This function is important because it defines how Cherry Studio Tutorial: Multi-Provider AI Desktop Workspace for Teams implements the patterns covered in this chapter.
-interface VersionEntry {
- metadata?: VersionMetadata
- minCompatibleVersion: string
- description: string
- channels: Record
-}
+### `scripts/feishu-notify.ts`
-interface UpgradeConfigFile {
- lastUpdated: string
- versions: Record
-}
+The `handleSendCommand` function in [`scripts/feishu-notify.ts`](https://github.com/CherryHQ/cherry-studio/blob/HEAD/scripts/feishu-notify.ts) handles a key part of this chapter's functionality:
-interface ReleaseInfo {
- tag: string
- version: string
- channel: UpgradeChannel
-}
-
-interface UpdateVersionsResult {
- versions: Record
-```
-
-This interface is important because it defines how Cherry Studio Tutorial: Multi-Provider AI Desktop Workspace for Teams implements the patterns covered in this chapter.
+```ts
+ * Handle send subcommand
+ */
+async function handleSendCommand(options: SendOptions): Promise {
+ const { webhookUrl, secret } = getCredentials()
-### `scripts/update-app-upgrade-config.ts`
+ const { title, description, color = 'turquoise' } = options
-The `VersionMetadata` interface in [`scripts/update-app-upgrade-config.ts`](https://github.com/CherryHQ/cherry-studio/blob/HEAD/scripts/update-app-upgrade-config.ts) handles a key part of this chapter's functionality:
+ // Validate color parameter
+ const colorValidation = FeishuHeaderTemplateSchema.safeParse(color)
+ if (!colorValidation.success) {
+ console.error(`Error: Invalid color "${color}". Valid colors: ${FeishuHeaderTemplateSchema.options.join(', ')}`)
+ process.exit(1)
+ }
-```ts
-}
+ const card = createSimpleCard(title, description, colorValidation.data)
-interface VersionMetadata {
- segmentId: string
- segmentType?: string
-}
+ console.log('Sending notification to Feishu...')
+ console.log(`Title: ${title}`)
-interface VersionEntry {
- metadata?: VersionMetadata
- minCompatibleVersion: string
- description: string
- channels: Record
-}
+ await sendToFeishu(webhookUrl, secret, card)
-interface UpgradeConfigFile {
- lastUpdated: string
- versions: Record
+ console.log('Notification sent successfully!')
}
-interface ReleaseInfo {
- tag: string
- version: string
- channel: UpgradeChannel
-}
+/**
+ * Handle issue subcommand
+ */
+async function handleIssueCommand(options: IssueOptions): Promise {
+ const { webhookUrl, secret } = getCredentials()
-interface UpdateVersionsResult {
- versions: Record
- updated: boolean
-}
+ const { url, number, title, summary, author = 'Unknown', labels: labelsStr = '' } = options
-const ROOT_DIR = path.resolve(__dirname, '..')
-const DEFAULT_CONFIG_PATH = path.join(ROOT_DIR, 'app-upgrade-config.json')
```
-This interface is important because it defines how Cherry Studio Tutorial: Multi-Provider AI Desktop Workspace for Teams implements the patterns covered in this chapter.
+This function is important because it defines how Cherry Studio Tutorial: Multi-Provider AI Desktop Workspace for Teams implements the patterns covered in this chapter.
## How These Components Connect
```mermaid
flowchart TD
- A[SegmentDefinition]
- B[SegmentMetadataFile]
- C[ChannelConfig]
- D[VersionMetadata]
- E[VersionEntry]
+ A[createIssueCard]
+ B[createSimpleCard]
+ C[getCredentials]
+ D[handleSendCommand]
+ E[handleIssueCommand]
A --> B
B --> C
C --> D
diff --git a/tutorials/cherry-studio-tutorial/07-development-and-contribution-workflow.md b/tutorials/cherry-studio-tutorial/07-development-and-contribution-workflow.md
index 474716df..e0b3245a 100644
--- a/tutorials/cherry-studio-tutorial/07-development-and-contribution-workflow.md
+++ b/tutorials/cherry-studio-tutorial/07-development-and-contribution-workflow.md
@@ -49,15 +49,98 @@ You now have a contributor-ready workflow for building and submitting Cherry Stu
Next: [Chapter 8: Production Operations and Governance](08-production-operations-and-governance.md)
-## Depth Expansion Playbook
-
## Source Code Walkthrough
+### `scripts/feishu-notify.ts`
+
+The `SendOptions` interface in [`scripts/feishu-notify.ts`](https://github.com/CherryHQ/cherry-studio/blob/HEAD/scripts/feishu-notify.ts) handles a key part of this chapter's functionality:
+
+```ts
+
+/** Send subcommand options */
+interface SendOptions {
+ title: string
+ description: string
+ color?: string
+}
+
+/**
+ * Generate Feishu webhook signature using HMAC-SHA256
+ * @param secret - Feishu webhook secret
+ * @param timestamp - Unix timestamp in seconds
+ * @returns Base64 encoded signature
+ */
+function generateSignature(secret: string, timestamp: number): string {
+ const stringToSign = `${timestamp}\n${secret}`
+ const hmac = crypto.createHmac('sha256', stringToSign)
+ return hmac.digest('base64')
+}
+
+/**
+ * Send message to Feishu webhook
+ * @param webhookUrl - Feishu webhook URL
+ * @param secret - Feishu webhook secret
+ * @param content - Feishu card message content
+ * @returns Resolves when message is sent successfully
+ * @throws When Feishu API returns non-2xx status code or network error occurs
+ */
+function sendToFeishu(webhookUrl: string, secret: string, content: FeishuCard): Promise {
+ return new Promise((resolve, reject) => {
+ const timestamp = Math.floor(Date.now() / 1000)
+ const sign = generateSignature(secret, timestamp)
+```
+
+This interface is important because it defines how Cherry Studio Tutorial: Multi-Provider AI Desktop Workspace for Teams implements the patterns covered in this chapter.
+
+### `scripts/update-i18n.ts`
+
+The `translate` function in [`scripts/update-i18n.ts`](https://github.com/CherryHQ/cherry-studio/blob/HEAD/scripts/update-i18n.ts) handles a key part of this chapter's functionality:
+
+```ts
+/**
+ * 使用 OpenAI 兼容的模型生成 i18n 文本,并更新到 translate 目录
+ *
+ * API_KEY=sk-xxxx BASE_URL=xxxx MODEL=xxxx ts-node scripts/update-i18n.ts
+ */
+
+import OpenAI from '@cherrystudio/openai'
+import cliProgress from 'cli-progress'
+import fs from 'fs'
+
+type I18NValue = string | { [key: string]: I18NValue }
+type I18N = { [key: string]: I18NValue }
+
+const API_KEY = process.env.API_KEY
+const BASE_URL = process.env.BASE_URL || 'https://dashscope.aliyuncs.com/compatible-mode/v1/'
+const MODEL = process.env.MODEL || 'qwen-plus-latest'
+
+const INDEX = [
+ // 语言的名称代码用来翻译的模型
+ { name: 'France', code: 'fr-fr', model: MODEL },
+ { name: 'Spanish', code: 'es-es', model: MODEL },
+ { name: 'Portuguese', code: 'pt-pt', model: MODEL },
+ { name: 'Greek', code: 'el-gr', model: MODEL }
+]
+
+const zh = JSON.parse(fs.readFileSync('src/renderer/src/i18n/locales/zh-cn.json', 'utf8')) as I18N
+
+const openai = new OpenAI({
+ apiKey: API_KEY,
+ baseURL: BASE_URL
+})
+```
+
+This function is important because it defines how Cherry Studio Tutorial: Multi-Provider AI Desktop Workspace for Teams implements the patterns covered in this chapter.
+
### `scripts/skills-check.ts`
-The `isClaudeReadmeFile` function in [`scripts/skills-check.ts`](https://github.com/CherryHQ/cherry-studio/blob/HEAD/scripts/skills-check.ts) handles a key part of this chapter's functionality:
+The `isAgentsReadmeFile` function in [`scripts/skills-check.ts`](https://github.com/CherryHQ/cherry-studio/blob/HEAD/scripts/skills-check.ts) handles a key part of this chapter's functionality:
```ts
+} from './skills-common'
+
+function isAgentsReadmeFile(file: string): boolean {
+ return /^\.agents\/skills\/README(?:\.[a-z0-9-]+)?\.md$/i.test(file)
}
function isClaudeReadmeFile(file: string): boolean {
@@ -86,21 +169,21 @@ function checkClaudeSkillSymlink(skillName: string, errors: string[]) {
let stat: fs.Stats
try {
stat = fs.lstatSync(claudeSkillDir)
- } catch {
- errors.push(`.claude/skills/${skillName} is missing (run pnpm skills:sync)`)
- return
- }
```
This function is important because it defines how Cherry Studio Tutorial: Multi-Provider AI Desktop Workspace for Teams implements the patterns covered in this chapter.
### `scripts/skills-check.ts`
-The `checkGitignore` function in [`scripts/skills-check.ts`](https://github.com/CherryHQ/cherry-studio/blob/HEAD/scripts/skills-check.ts) handles a key part of this chapter's functionality:
+The `isClaudeReadmeFile` function in [`scripts/skills-check.ts`](https://github.com/CherryHQ/cherry-studio/blob/HEAD/scripts/skills-check.ts) handles a key part of this chapter's functionality:
```ts
}
+function isClaudeReadmeFile(file: string): boolean {
+ return /^\.claude\/skills\/README(?:\.[a-z0-9-]+)?\.md$/i.test(file)
+}
+
function checkGitignore(filePath: string, expected: string, displayPath: string, errors: string[]) {
const actual = readFileSafe(filePath)
if (actual === null) {
@@ -127,92 +210,6 @@ function checkClaudeSkillSymlink(skillName: string, errors: string[]) {
errors.push(`.claude/skills/${skillName} is missing (run pnpm skills:sync)`)
return
}
-
- if (!stat.isSymbolicLink()) {
- errors.push(
- `.claude/skills/${skillName} must be a symlink, not a ${stat.isDirectory() ? 'directory' : 'file'} (run pnpm skills:sync)`
-```
-
-This function is important because it defines how Cherry Studio Tutorial: Multi-Provider AI Desktop Workspace for Teams implements the patterns covered in this chapter.
-
-### `scripts/skills-check.ts`
-
-The `checkClaudeSkillSymlink` function in [`scripts/skills-check.ts`](https://github.com/CherryHQ/cherry-studio/blob/HEAD/scripts/skills-check.ts) handles a key part of this chapter's functionality:
-
-```ts
- * `../../.agents/skills/`.
- */
-function checkClaudeSkillSymlink(skillName: string, errors: string[]) {
- const claudeSkillDir = path.join(CLAUDE_SKILLS_DIR, skillName)
- const expectedTarget = path.join('..', '..', '.agents', 'skills', skillName)
-
- let stat: fs.Stats
- try {
- stat = fs.lstatSync(claudeSkillDir)
- } catch {
- errors.push(`.claude/skills/${skillName} is missing (run pnpm skills:sync)`)
- return
- }
-
- if (!stat.isSymbolicLink()) {
- errors.push(
- `.claude/skills/${skillName} must be a symlink, not a ${stat.isDirectory() ? 'directory' : 'file'} (run pnpm skills:sync)`
- )
- return
- }
-
- const actualTarget = fs.readlinkSync(claudeSkillDir)
- if (actualTarget !== expectedTarget) {
- errors.push(`.claude/skills/${skillName} symlink points to '${actualTarget}', expected '${expectedTarget}'`)
- }
-}
-
-function checkTrackedFilesAgainstWhitelist(skillNames: string[], errors: string[]) {
- const sharedAgentsFiles = new Set(['.agents/skills/.gitignore', '.agents/skills/public-skills.txt'])
- const sharedClaudeFiles = new Set(['.claude/skills/.gitignore'])
- const allowedAgentsPrefixes = skillNames.map((skillName) => `.agents/skills/${skillName}/`)
- const allowedClaudeSymlinks = new Set(skillNames.map((skillName) => `.claude/skills/${skillName}`))
-```
-
-This function is important because it defines how Cherry Studio Tutorial: Multi-Provider AI Desktop Workspace for Teams implements the patterns covered in this chapter.
-
-### `scripts/skills-check.ts`
-
-The `checkTrackedFilesAgainstWhitelist` function in [`scripts/skills-check.ts`](https://github.com/CherryHQ/cherry-studio/blob/HEAD/scripts/skills-check.ts) handles a key part of this chapter's functionality:
-
-```ts
-}
-
-function checkTrackedFilesAgainstWhitelist(skillNames: string[], errors: string[]) {
- const sharedAgentsFiles = new Set(['.agents/skills/.gitignore', '.agents/skills/public-skills.txt'])
- const sharedClaudeFiles = new Set(['.claude/skills/.gitignore'])
- const allowedAgentsPrefixes = skillNames.map((skillName) => `.agents/skills/${skillName}/`)
- const allowedClaudeSymlinks = new Set(skillNames.map((skillName) => `.claude/skills/${skillName}`))
- const allowedClaudePrefixes = skillNames.map((skillName) => `.claude/skills/${skillName}/`)
-
- let trackedFiles: string[]
- try {
- const output = execSync('git ls-files -- .agents/skills .claude/skills', {
- cwd: ROOT_DIR,
- encoding: 'utf-8'
- })
- trackedFiles = output
- .split('\n')
- .map((line) => line.trim())
- .filter((line) => line.length > 0)
- } catch (error) {
- const message = error instanceof Error ? error.message : String(error)
- errors.push(`failed to read tracked skill files via git ls-files: ${message}`)
- return
- }
-
- for (const file of trackedFiles) {
- if (file.startsWith('.agents/skills/')) {
- if (sharedAgentsFiles.has(file) || isAgentsReadmeFile(file)) {
- continue
- }
- if (allowedAgentsPrefixes.some((prefix) => file.startsWith(prefix))) {
- continue
```
This function is important because it defines how Cherry Studio Tutorial: Multi-Provider AI Desktop Workspace for Teams implements the patterns covered in this chapter.
@@ -222,11 +219,11 @@ This function is important because it defines how Cherry Studio Tutorial: Multi-
```mermaid
flowchart TD
- A[isClaudeReadmeFile]
- B[checkGitignore]
- C[checkClaudeSkillSymlink]
- D[checkTrackedFilesAgainstWhitelist]
- E[main]
+ A[SendOptions]
+ B[translate]
+ C[isAgentsReadmeFile]
+ D[isClaudeReadmeFile]
+ E[checkGitignore]
A --> B
B --> C
C --> D
diff --git a/tutorials/cherry-studio-tutorial/08-production-operations-and-governance.md b/tutorials/cherry-studio-tutorial/08-production-operations-and-governance.md
index b09cbce9..6c962662 100644
--- a/tutorials/cherry-studio-tutorial/08-production-operations-and-governance.md
+++ b/tutorials/cherry-studio-tutorial/08-production-operations-and-governance.md
@@ -41,12 +41,51 @@ You now have a full production governance model for using Cherry Studio in serio
Continue with the [Context7 Tutorial](../context7-tutorial/).
-## Depth Expansion Playbook
-
## Source Code Walkthrough
### `scripts/check-i18n.ts`
+The `isSortedI18N` function in [`scripts/check-i18n.ts`](https://github.com/CherryHQ/cherry-studio/blob/HEAD/scripts/check-i18n.ts) handles a key part of this chapter's functionality:
+
+```ts
+}
+
+function isSortedI18N(obj: I18N): boolean {
+ // fs.writeFileSync('./test_origin.json', JSON.stringify(obj))
+ // fs.writeFileSync('./test_sorted.json', JSON.stringify(sortedObjectByKeys(obj)))
+ return JSON.stringify(obj) === JSON.stringify(sortedObjectByKeys(obj))
+}
+
+/**
+ * 检查 JSON 对象中是否存在重复键,并收集所有重复键
+ * @param obj 要检查的对象
+ * @returns 返回重复键的数组(若无重复则返回空数组)
+ */
+function checkDuplicateKeys(obj: I18N): string[] {
+ const keys = new Set()
+ const duplicateKeys: string[] = []
+
+ const checkObject = (obj: I18N, path: string = '') => {
+ for (const key in obj) {
+ const fullPath = path ? `${path}.${key}` : key
+
+ if (keys.has(fullPath)) {
+ // 发现重复键时,添加到数组中(避免重复添加)
+ if (!duplicateKeys.includes(fullPath)) {
+ duplicateKeys.push(fullPath)
+ }
+ } else {
+ keys.add(fullPath)
+ }
+
+ // 递归检查子对象
+ if (typeof obj[key] === 'object' && obj[key] !== null) {
+```
+
+This function is important because it defines how Cherry Studio Tutorial: Multi-Provider AI Desktop Workspace for Teams implements the patterns covered in this chapter.
+
+### `scripts/check-i18n.ts`
+
The `checkDuplicateKeys` function in [`scripts/check-i18n.ts`](https://github.com/CherryHQ/cherry-studio/blob/HEAD/scripts/check-i18n.ts) handles a key part of this chapter's functionality:
```ts
@@ -150,57 +189,16 @@ main()
This function is important because it defines how Cherry Studio Tutorial: Multi-Provider AI Desktop Workspace for Teams implements the patterns covered in this chapter.
-### `scripts/patch-claude-agent-sdk.ts`
-
-The `patchSpawnImport` function in [`scripts/patch-claude-agent-sdk.ts`](https://github.com/CherryHQ/cherry-studio/blob/HEAD/scripts/patch-claude-agent-sdk.ts) handles a key part of this chapter's functionality:
-
-```ts
-
-// 1. Replace `import{spawn as X}from"child_process"` with `import{fork as X}from"child_process"`
-export function patchSpawnImport(content: string): PatchResult {
- let matched = false
- const result = content.replace(/import\{spawn as ([\w$]+)\}from"child_process"/, (_, alias) => {
- matched = true
- return `import{fork as ${alias}}from"child_process"`
- })
- return { result, matched }
-}
-
-// 2. Remove `command:X,` from spawnLocalProcess destructuring
-// Before: spawnLocalProcess(Q){let{command:X,args:Y,cwd:$,env:W,signal:J}=Q
-// After: spawnLocalProcess(Q){let{args:Y,cwd:$,env:W,signal:J}=Q
-export function patchRemoveCommand(content: string): PatchResult {
- let matched = false
- const result = content.replace(
- /spawnLocalProcess\(([\w$]+)\)\{let\{command:([\w$]+),args:([\w$]+)/,
- (_, fnArg, _cmd, args) => {
- matched = true
- return `spawnLocalProcess(${fnArg}){let{args:${args}`
- }
- )
- return { result, matched }
-}
-
-// 3. Rewrite the spawn/fork call:
-// Before: =Sq(X,Y,{cwd:$,stdio:["pipe","pipe",G],signal:J,env:W,windowsHide:!0})
-// After: =Sq(Y[0],Y.slice(1),{cwd:$,stdio:G==="pipe"?["pipe","pipe","pipe","ipc"]:["pipe","pipe","ignore","ipc"],signal:J,env:W})
-export function patchSpawnCall(content: string): PatchResult {
- let matched = false
- const result = content.replace(
-```
-
-This function is important because it defines how Cherry Studio Tutorial: Multi-Provider AI Desktop Workspace for Teams implements the patterns covered in this chapter.
-
## How These Components Connect
```mermaid
flowchart TD
- A[checkDuplicateKeys]
- B[checkTranslations]
- C[main]
- D[patchSpawnImport]
- E[patchRemoveCommand]
+ A[isSortedI18N]
+ B[checkDuplicateKeys]
+ C[checkTranslations]
+ D[main]
+ E[extractAllLanguageData]
A --> B
B --> C
C --> D
diff --git a/tutorials/chroma-tutorial/01-getting-started.md b/tutorials/chroma-tutorial/01-getting-started.md
index 2bcfd227..64c9f337 100644
--- a/tutorials/chroma-tutorial/01-getting-started.md
+++ b/tutorials/chroma-tutorial/01-getting-started.md
@@ -411,16 +411,30 @@ Under the hood, `Chapter 1: Getting Started with Chroma` usually follows a repea
When debugging, walk this sequence in order and confirm each stage has explicit success/failure conditions.
-## Source Walkthrough
+## Source Code Walkthrough
-Use the following upstream sources to verify implementation details while reading this chapter:
+### `chromadb/api/client.py`
-- [View Repo](https://github.com/chroma-core/chroma)
- Why it matters: authoritative reference on `View Repo` (github.com).
+The `Client` class in [`chromadb/api/client.py`](https://github.com/chroma-core/chroma/blob/main/chromadb/api/client.py) is the main entrypoint for interacting with Chroma. It extends `SharedSystemClient` and `ClientAPI`, maintaining `tenant` and `database` as first-class attributes:
-Suggested trace strategy:
-- search upstream code for `collection` and `documents` to map concrete implementation paths
-- compare docs claims against actual runtime/config code before reusing patterns in production
+```python
+class Client(SharedSystemClient, ClientAPI):
+ """A client for Chroma. This is the main entrypoint for interacting with Chroma.
+ A client internally stores its tenant and database and proxies calls to a
+ Server API instance of Chroma. It treats the Server API and corresponding System
+ as a singleton, so multiple clients connecting to the same resource will share the
+ same API instance.
+ """
+
+ tenant: str = DEFAULT_TENANT
+ database: str = DEFAULT_DATABASE
+
+ _server: ServerAPI
+ _admin_client: AdminAPI
+ _closed: bool = False
+```
+
+Chroma uses a `Settings` + `System` dependency injection pattern — the `Client` holds a `ServerAPI` reference (which may be an in-process segment API or an HTTP proxy), meaning the same API surface works for both embedded and client-server mode.
## Chapter Connections
diff --git a/tutorials/chroma-tutorial/02-collections-documents.md b/tutorials/chroma-tutorial/02-collections-documents.md
index 4dc38c98..bbe10fd6 100644
--- a/tutorials/chroma-tutorial/02-collections-documents.md
+++ b/tutorials/chroma-tutorial/02-collections-documents.md
@@ -12,6 +12,19 @@ Welcome to **Chapter 2: Collections & Documents**. In this part of **ChromaDB Tu
Welcome back! Now that you understand Chroma's basics, let's dive deeper into managing collections and documents. Collections are the core organizational unit in Chroma, and understanding how to work with them effectively is crucial for building robust AI applications.
+## Collection Data Model
+
+```mermaid
+graph TD
+ Client["chromadb.Client\n(tenant + database)"] --> Col["Collection\n(name + metadata + EF)"]
+ Col --> Doc["Documents\n(text strings)"]
+ Col --> Emb["Embeddings\n(float vectors)"]
+ Col --> Meta["Metadatas\n(dict per item)"]
+ Col --> IDs["IDs\n(unique strings)"]
+ EF["EmbeddingFunction\n(default: all-MiniLM)"] --> Emb
+ Col --> HNSW["HNSW Index\n(similarity search)"]
+```
+
## Collection Architecture
### Understanding Collections
@@ -557,16 +570,29 @@ Under the hood, `Chapter 2: Collections & Documents` usually follows a repeatabl
When debugging, walk this sequence in order and confirm each stage has explicit success/failure conditions.
-## Source Walkthrough
+## Source Code Walkthrough
+
+### `chromadb/api/types.py`
-Use the following upstream sources to verify implementation details while reading this chapter:
+The `EmbeddingFunction` protocol and `QueryResult` type in [`chromadb/api/types.py`](https://github.com/chroma-core/chroma/blob/main/chromadb/api/types.py) define the interface contract for collections. The `Include` type controls which fields are returned in query results:
-- [View Repo](https://github.com/chroma-core/chroma)
- Why it matters: authoritative reference on `View Repo` (github.com).
+```python
+from chromadb.api.types import (
+ CollectionMetadata,
+ Documents,
+ Embeddings,
+ EmbeddingFunction,
+ GetResult,
+ IDs,
+ Include,
+ Metadatas,
+ QueryResult,
+ IncludeMetadataDocuments,
+ IncludeMetadataDocumentsDistances,
+)
+```
-Suggested trace strategy:
-- search upstream code for `collection` and `documents` to map concrete implementation paths
-- compare docs claims against actual runtime/config code before reusing patterns in production
+Collection operations (`add`, `get`, `query`, `update`, `upsert`, `delete`) are defined in `chromadb/api/__init__.py` as abstract methods on `ServerAPI`, then implemented in `chromadb/api/segment.py` for the embedded backend.
## Chapter Connections
diff --git a/tutorials/chroma-tutorial/03-embeddings-indexing.md b/tutorials/chroma-tutorial/03-embeddings-indexing.md
index e7cd3fb0..ef876aa8 100644
--- a/tutorials/chroma-tutorial/03-embeddings-indexing.md
+++ b/tutorials/chroma-tutorial/03-embeddings-indexing.md
@@ -9,6 +9,18 @@ nav_order: 3
Welcome to the heart of Chroma's power! This chapter explores how embeddings work, how Chroma indexes them for fast retrieval, and how to optimize similarity search performance.
+## Embedding and Indexing Pipeline
+
+```mermaid
+flowchart LR
+ Text["Raw Text\n(documents)"] --> EF["EmbeddingFunction\n(e.g. all-MiniLM-L6-v2)"]
+ EF --> Vec["Float Vectors\n(384 or 1536 dims)"]
+ Vec --> HNSW["HNSW Index\n(chromadb/db/)"]
+ HNSW --> ANN["Approximate Nearest\nNeighbour Search"]
+ ANN --> TopK["Top-K Results\n(ids + distances)"]
+ Custom["Custom EF\n(OpenAI / Cohere)"] --> Vec
+```
+
## Understanding Embeddings
### What Are Embeddings?
@@ -451,16 +463,27 @@ Under the hood, `Chapter 3: Embeddings & Indexing` usually follows a repeatable
When debugging, walk this sequence in order and confirm each stage has explicit success/failure conditions.
-## Source Walkthrough
+## Source Code Walkthrough
+
+### `chromadb/api/types.py`
-Use the following upstream sources to verify implementation details while reading this chapter:
+The `EmbeddingFunction` protocol and `DefaultEmbeddingFunction` in [`chromadb/api/types.py`](https://github.com/chroma-core/chroma/blob/main/chromadb/api/types.py) define how Chroma transforms documents into vectors. The `Embeddings` type is `List[Vector]` where `Vector = List[float]`, and `SparseVector` supports sparse retrieval:
-- [View Repo](https://github.com/chroma-core/chroma)
- Why it matters: authoritative reference on `View Repo` (github.com).
+```python
+from chromadb.base_types import (
+ Vector,
+ PyVector,
+ LiteralValue,
+ LogicalOperator,
+ WhereOperator,
+ OperatorExpression,
+ Where,
+ WhereDocument,
+ SparseVector,
+)
+```
-Suggested trace strategy:
-- search upstream code for `embeddings` and `collection` to map concrete implementation paths
-- compare docs claims against actual runtime/config code before reusing patterns in production
+When you pass `documents` to `collection.add()` without explicit embeddings, Chroma calls the collection's `EmbeddingFunction.__call__` to generate them. The default is `all-MiniLM-L6-v2` via `chromadb.utils.embedding_functions`.
## Chapter Connections
diff --git a/tutorials/chroma-tutorial/04-querying-retrieval.md b/tutorials/chroma-tutorial/04-querying-retrieval.md
index 983c2cb1..73c4e474 100644
--- a/tutorials/chroma-tutorial/04-querying-retrieval.md
+++ b/tutorials/chroma-tutorial/04-querying-retrieval.md
@@ -12,6 +12,20 @@ Welcome to **Chapter 4: Querying & Retrieval**. In this part of **ChromaDB Tutor
Master the art of querying in Chroma! This chapter covers advanced querying techniques, metadata filtering, and retrieval strategies for building powerful search applications.
+## Query Execution Flow
+
+```mermaid
+flowchart TD
+ QueryText["query_texts\n(list of strings)"] --> EF["EmbeddingFunction\n(auto-embed)"]
+ QueryEmbeddings["query_embeddings\n(pre-computed)"] --> KNN["KNN Operator\n(execution plan)"]
+ EF --> KNN
+ Where["where filter\n({'category': 'doc'})"] --> Filter["Filter Operator"]
+ WhereDoc["where_document filter\n({'$contains': 'text'})"] --> Filter
+ Filter --> KNN
+ KNN --> Limit["Limit / n_results"]
+ Limit --> Result["QueryResult\n(ids, distances, documents, metadatas)"]
+```
+
## Advanced Query Patterns
### Metadata Filtering
@@ -214,16 +228,18 @@ Under the hood, `Chapter 4: Querying & Retrieval` usually follows a repeatable c
When debugging, walk this sequence in order and confirm each stage has explicit success/failure conditions.
-## Source Walkthrough
+## Source Code Walkthrough
+
+### `chromadb/api/segment.py`
-Use the following upstream sources to verify implementation details while reading this chapter:
+The `SegmentAPI` class in [`chromadb/api/segment.py`](https://github.com/chroma-core/chroma/blob/main/chromadb/api/segment.py) implements the core query path for embedded Chroma. It uses a structured execution plan (`KNNPlan`, `GetPlan`, `CountPlan`) built from `Scan`, `Filter`, `Limit`, `KNN`, and `Projection` operators:
-- [View Repo](https://github.com/chroma-core/chroma)
- Why it matters: authoritative reference on `View Repo` (github.com).
+```python
+from chromadb.execution.expression.operator import Scan, Filter, Limit, KNN, Projection
+from chromadb.execution.expression.plan import CountPlan, GetPlan, KNNPlan
+```
-Suggested trace strategy:
-- search upstream code for `query` and `results` to map concrete implementation paths
-- compare docs claims against actual runtime/config code before reusing patterns in production
+The `tenacity`-based retry decorators on `SegmentAPI` methods handle transient Rust-layer failures gracefully. The `QuotaEnforcer` and `RateLimitEnforcer` integrate with the execution path for cloud deployments.
## Chapter Connections
diff --git a/tutorials/chroma-tutorial/05-metadata-filtering.md b/tutorials/chroma-tutorial/05-metadata-filtering.md
index b1643d3c..1787f409 100644
--- a/tutorials/chroma-tutorial/05-metadata-filtering.md
+++ b/tutorials/chroma-tutorial/05-metadata-filtering.md
@@ -12,6 +12,19 @@ Welcome to **Chapter 5: Metadata & Filtering**. In this part of **ChromaDB Tutor
Master metadata management and advanced filtering in Chroma! This chapter covers sophisticated metadata strategies and complex filtering patterns for building powerful, precise search applications.
+## Metadata Filter Operators
+
+```mermaid
+graph TD
+ Where["where dict"] --> Logical["Logical Ops\n($and / $or)"]
+ Where --> Compare["Comparison Ops\n($eq / $ne / $gt / $lt / $in / $nin)"]
+ WhereDoc["where_document dict"] --> DocOps["Document Ops\n($contains / $not_contains)"]
+ Logical --> Compare
+ Compare --> Segment["Segment Filter\n(applied before KNN)"]
+ DocOps --> Segment
+ Segment --> Result["Filtered candidates\npassed to HNSW"]
+```
+
## Advanced Metadata Strategies
### Hierarchical Metadata Design
@@ -224,16 +237,27 @@ Under the hood, `Chapter 5: Metadata & Filtering` usually follows a repeatable c
When debugging, walk this sequence in order and confirm each stage has explicit success/failure conditions.
-## Source Walkthrough
+## Source Code Walkthrough
+
+### `chromadb/base_types.py`
-Use the following upstream sources to verify implementation details while reading this chapter:
+The `Where`, `WhereDocument`, `LogicalOperator`, and `WhereOperator` types in [`chromadb/base_types.py`](https://github.com/chroma-core/chroma/blob/main/chromadb/base_types.py) define the complete filter grammar. Logical operators (`$and`, `$or`) compose comparison expressions (`$eq`, `$ne`, `$gt`, `$gte`, `$lt`, `$lte`, `$in`, `$nin`):
-- [View Repo](https://github.com/chroma-core/chroma)
- Why it matters: authoritative reference on `View Repo` (github.com).
+```python
+from chromadb.base_types import (
+ Metadata,
+ UpdateMetadata,
+ LiteralValue,
+ LogicalOperator,
+ WhereOperator,
+ OperatorExpression,
+ Where,
+ WhereDocumentOperator,
+ WhereDocument,
+)
+```
-Suggested trace strategy:
-- search upstream code for `tags` and `metadata` to map concrete implementation paths
-- compare docs claims against actual runtime/config code before reusing patterns in production
+Validation of these filter trees happens in `chromadb/api/types.py` via `validate_where` and `validate_where_document` before the query reaches the segment layer.
## Chapter Connections
diff --git a/tutorials/chroma-tutorial/06-integration-patterns.md b/tutorials/chroma-tutorial/06-integration-patterns.md
index 2cd47920..37c48e5a 100644
--- a/tutorials/chroma-tutorial/06-integration-patterns.md
+++ b/tutorials/chroma-tutorial/06-integration-patterns.md
@@ -1020,16 +1020,20 @@ Under the hood, `Chapter 6: Integration Patterns` usually follows a repeatable c
When debugging, walk this sequence in order and confirm each stage has explicit success/failure conditions.
-## Source Walkthrough
+## Source Code Walkthrough
-Use the following upstream sources to verify implementation details while reading this chapter:
+### `chromadb/api/client.py`
-- [View Repo](https://github.com/chroma-core/chroma)
- Why it matters: authoritative reference on `View Repo` (github.com).
+The `Client.__init__` factory pattern in [`chromadb/api/client.py`](https://github.com/chroma-core/chroma/blob/main/chromadb/api/client.py) is the standard integration entrypoint. The class uses `maybe_set_tenant_and_database` to ensure tenant/database context is resolved before any collection operation, which is critical for multi-tenant LangChain / LlamaIndex integrations:
-Suggested trace strategy:
-- search upstream code for `documents` and `Chroma` to map concrete implementation paths
-- compare docs claims against actual runtime/config code before reusing patterns in production
+```python
+from chromadb.auth.utils import maybe_set_tenant_and_database
+from chromadb.config import Settings, System
+from chromadb.config import DEFAULT_TENANT, DEFAULT_DATABASE
+from chromadb.api.models.Collection import Collection
+```
+
+LangChain and LlamaIndex integrations call `chromadb.HttpClient()` or `chromadb.EphemeralClient()` to get a `Client` instance, then pass it directly to their vector store wrappers. The `DataLoader` and `URIs` types in `chromadb/api/types.py` support multimodal (image, audio) document stores.
## Chapter Connections
diff --git a/tutorials/chroma-tutorial/07-production-deployment.md b/tutorials/chroma-tutorial/07-production-deployment.md
index 17e8ef71..49525e5d 100644
--- a/tutorials/chroma-tutorial/07-production-deployment.md
+++ b/tutorials/chroma-tutorial/07-production-deployment.md
@@ -12,6 +12,20 @@ Welcome to **Chapter 7: Production Deployment**. In this part of **ChromaDB Tuto
Scale Chroma for production workloads! This chapter covers deployment strategies, scaling, monitoring, and operational best practices for production Chroma deployments.
+## Production Deployment Modes
+
+```mermaid
+graph TD
+ Dev["Development\nEphemeralClient()"] --> PersistLocal["Persistent Local\nPersistentClient(path=)"]
+ PersistLocal --> Server["Server Mode\nchroma run --path /data"]
+ Server --> HTTPClient["HttpClient\n(host, port, auth)"]
+ HTTPClient --> LB["Load Balancer\n(multiple replicas)"]
+ LB --> Auth["Auth Layer\n(chromadb/auth/)"]
+ Auth --> API["FastAPI Server\n(chromadb/api/fastapi.py)"]
+ API --> Seg["SegmentAPI\n(storage backend)"]
+ Seg --> Rust["Rust HNSW\n(chromadb bindings)"]
+```
+
## Production Architecture
### Scalable Deployment
@@ -314,16 +328,20 @@ Under the hood, `Chapter 7: Production Deployment` usually follows a repeatable
When debugging, walk this sequence in order and confirm each stage has explicit success/failure conditions.
-## Source Walkthrough
+## Source Code Walkthrough
+
+### `chromadb/api/fastapi.py`
-Use the following upstream sources to verify implementation details while reading this chapter:
+The `FastAPI` class in [`chromadb/api/fastapi.py`](https://github.com/chroma-core/chroma/blob/main/chromadb/api/fastapi.py) implements the HTTP server layer. The `chromadb/auth/` directory provides pluggable authentication (token-based, basic auth) via the `UserIdentity` abstraction:
-- [View Repo](https://github.com/chroma-core/chroma)
- Why it matters: authoritative reference on `View Repo` (github.com).
+```python
+from chromadb.auth import UserIdentity
+from chromadb.auth.utils import maybe_set_tenant_and_database
+from chromadb.config import Settings, System
+from chromadb.config import DEFAULT_TENANT, DEFAULT_DATABASE
+```
-Suggested trace strategy:
-- search upstream code for `self` and `client` to map concrete implementation paths
-- compare docs claims against actual runtime/config code before reusing patterns in production
+For production deployments, `Settings` controls persistence path, anonymized telemetry, auth providers, and log level. The `Tiltfile` at the repo root enables local Kubernetes development via Tilt, and the `Dockerfile` provides the official container image used in production deployments.
## Chapter Connections
diff --git a/tutorials/chroma-tutorial/08-performance-optimization.md b/tutorials/chroma-tutorial/08-performance-optimization.md
index b932245c..7e11e064 100644
--- a/tutorials/chroma-tutorial/08-performance-optimization.md
+++ b/tutorials/chroma-tutorial/08-performance-optimization.md
@@ -12,6 +12,18 @@ Welcome to **Chapter 8: Performance Optimization**. In this part of **ChromaDB T
Master Chroma performance tuning! This final chapter covers advanced optimization techniques, benchmarking, and performance best practices for maximum efficiency.
+## Performance Tuning Knobs
+
+```mermaid
+graph TD
+ BatchSize["Batch Size\n(add / query)"] --> Throughput["Higher Throughput\n(amortize Python overhead)"]
+ HNSW_M["HNSW ef_construction\n+ M parameter"] --> Recall["Index Quality\nvs Build Time"]
+ EFSearch["HNSW ef\n(query-time)"] --> Latency["Search Latency\nvs Recall"]
+ EmbCache["Embedding Cache\n(lru_cache)"] --> EmbTime["Embedding Time\n(skip re-embed)"]
+ ReadLevel["ReadLevel\n(eventual / sync)"] --> Consistency["Consistency\nvs Throughput"]
+ Workers["async_io + workers"] --> Parallel["Parallel Queries"]
+```
+
## Performance Profiling
### Query Performance Analysis
@@ -504,16 +516,29 @@ Under the hood, `Chapter 8: Performance Optimization` usually follows a repeatab
When debugging, walk this sequence in order and confirm each stage has explicit success/failure conditions.
-## Source Walkthrough
+## Source Code Walkthrough
+
+### `chromadb/api/types.py`
-Use the following upstream sources to verify implementation details while reading this chapter:
+The `ReadLevel` enum and `validate_batch` function in [`chromadb/api/types.py`](https://github.com/chroma-core/chroma/blob/main/chromadb/api/types.py) are the primary performance-relevant API surface. `validate_batch` enforces that IDs, embeddings, documents, and metadatas are equal-length lists — catching size mismatches before expensive operations:
-- [View Repo](https://github.com/chroma-core/chroma)
- Why it matters: authoritative reference on `View Repo` (github.com).
+```python
+from chromadb.api.types import (
+ ReadLevel,
+ GetResult,
+ QueryResult,
+ SearchResult,
+ validate_metadata,
+ validate_update_metadata,
+ validate_where,
+ validate_where_document,
+ validate_batch,
+ IncludeMetadataDocuments,
+ IncludeMetadataDocumentsDistances,
+)
+```
-Suggested trace strategy:
-- search upstream code for `self` and `collection` to map concrete implementation paths
-- compare docs claims against actual runtime/config code before reusing patterns in production
+Using `include=["embeddings"]` in queries returns raw vectors and should be avoided in production unless needed, as it adds significant serialization cost. The `lru_cache` decorator in `chromadb/api/types.py` caches embedding function introspection for performance.
## Chapter Connections
diff --git a/tutorials/chrome-devtools-mcp-tutorial/01-getting-started.md b/tutorials/chrome-devtools-mcp-tutorial/01-getting-started.md
index f88ff3b1..90b50de7 100644
--- a/tutorials/chrome-devtools-mcp-tutorial/01-getting-started.md
+++ b/tutorials/chrome-devtools-mcp-tutorial/01-getting-started.md
@@ -44,170 +44,168 @@ You now have a working Chrome DevTools MCP baseline in your coding client.
Next: [Chapter 2: Architecture and Design Principles](02-architecture-and-design-principles.md)
-## Depth Expansion Playbook
-
## Source Code Walkthrough
-### `src/McpContext.ts`
+### `src/McpResponse.ts`
-The `McpContext` class in [`src/McpContext.ts`](https://github.com/ChromeDevTools/chrome-devtools-mcp/blob/HEAD/src/McpContext.ts) handles a key part of this chapter's functionality:
+The `McpResponse` class in [`src/McpResponse.ts`](https://github.com/ChromeDevTools/chrome-devtools-mcp/blob/HEAD/src/McpResponse.ts) handles a key part of this chapter's functionality:
```ts
-import {WaitForHelper} from './WaitForHelper.js';
-
-interface McpContextOptions {
- // Whether the DevTools windows are exposed as pages for debugging of DevTools.
- experimentalDevToolsDebugging: boolean;
- // Whether all page-like targets are exposed as pages.
- experimentalIncludeAllPages?: boolean;
- // Whether CrUX data should be fetched.
- performanceCrux: boolean;
-}
-
-const DEFAULT_TIMEOUT = 5_000;
-const NAVIGATION_TIMEOUT = 10_000;
-
-function getNetworkMultiplierFromString(condition: string | null): number {
- const puppeteerCondition =
- condition as keyof typeof PredefinedNetworkConditions;
-
- switch (puppeteerCondition) {
- case 'Fast 4G':
- return 1;
- case 'Slow 4G':
- return 2.5;
- case 'Fast 3G':
- return 5;
- case 'Slow 3G':
- return 10;
- }
- return 1;
}
-export class McpContext implements Context {
+export class McpResponse implements Response {
+ #includePages = false;
+ #includeExtensionServiceWorkers = false;
+ #includeExtensionPages = false;
+ #snapshotParams?: SnapshotParams;
+ #attachedNetworkRequestId?: number;
+ #attachedNetworkRequestOptions?: {
+ requestFilePath?: string;
+ responseFilePath?: string;
+ };
+ #attachedConsoleMessageId?: number;
+ #attachedTraceSummary?: TraceResult;
+ #attachedTraceInsight?: TraceInsightData;
+ #attachedLighthouseResult?: LighthouseData;
+ #textResponseLines: string[] = [];
+ #images: ImageContentData[] = [];
+ #networkRequestsOptions?: {
+ include: boolean;
+ pagination?: PaginationOptions;
+ resourceTypes?: ResourceType[];
+ includePreservedRequests?: boolean;
+ networkRequestIdInDevToolsUI?: number;
+ };
+ #consoleDataOptions?: {
+ include: boolean;
+ pagination?: PaginationOptions;
+ types?: string[];
+ includePreservedMessages?: boolean;
+ };
+ #listExtensions?: boolean;
```
This class is important because it defines how Chrome DevTools MCP Tutorial: Browser Automation and Debugging for Coding Agents implements the patterns covered in this chapter.
-### `src/McpContext.ts`
+### `src/McpResponse.ts`
-The `to` class in [`src/McpContext.ts`](https://github.com/ChromeDevTools/chrome-devtools-mcp/blob/HEAD/src/McpContext.ts) handles a key part of this chapter's functionality:
+The `replaceHtmlElementsWithUids` function in [`src/McpResponse.ts`](https://github.com/ChromeDevTools/chrome-devtools-mcp/blob/HEAD/src/McpResponse.ts) handles a key part of this chapter's functionality:
```ts
-import path from 'node:path';
-
-import type {TargetUniverse} from './DevtoolsUtils.js';
-import {UniverseManager} from './DevtoolsUtils.js';
-import {McpPage} from './McpPage.js';
-import {
- NetworkCollector,
- ConsoleCollector,
- type ListenerMap,
- type UncaughtError,
-} from './PageCollector.js';
-import type {DevTools} from './third_party/index.js';
-import type {
- Browser,
- BrowserContext,
- ConsoleMessage,
- Debugger,
- HTTPRequest,
- Page,
- ScreenRecorder,
- SerializedAXNode,
- Viewport,
- Target,
-} from './third_party/index.js';
-import {Locator} from './third_party/index.js';
-import {PredefinedNetworkConditions} from './third_party/index.js';
-import {listPages} from './tools/pages.js';
-import {CLOSE_PAGE_ERROR} from './tools/ToolDefinition.js';
-import type {Context, DevToolsData} from './tools/ToolDefinition.js';
-import type {TraceResult} from './trace-processing/parse.js';
-import type {
- EmulationSettings,
-```
-
-This class is important because it defines how Chrome DevTools MCP Tutorial: Browser Automation and Debugging for Coding Agents implements the patterns covered in this chapter.
+}
-### `src/McpContext.ts`
+export function replaceHtmlElementsWithUids(schema: JSONSchema7Definition) {
+ if (typeof schema === 'boolean') {
+ return;
+ }
-The `instances` class in [`src/McpContext.ts`](https://github.com/ChromeDevTools/chrome-devtools-mcp/blob/HEAD/src/McpContext.ts) handles a key part of this chapter's functionality:
+ let isHtmlElement = false;
+ for (const [key, value] of Object.entries(schema)) {
+ if (key === 'x-mcp-type' && value === 'HTMLElement') {
+ isHtmlElement = true;
+ break;
+ }
+ }
-```ts
- logger: Debugger,
- opts: McpContextOptions,
- /* Let tests use unbundled Locator class to avoid overly strict checks within puppeteer that fail when mixing bundled and unbundled class instances */
- locatorClass: typeof Locator = Locator,
- ) {
- const context = new McpContext(browser, logger, opts, locatorClass);
- await context.#init();
- return context;
+ if (isHtmlElement) {
+ schema.properties = {uid: {type: 'string'}};
+ schema.required = ['uid'];
}
- resolveCdpRequestId(page: McpPage, cdpRequestId: string): number | undefined {
- if (!cdpRequestId) {
- this.logger('no network request');
- return;
- }
- const request = this.#networkCollector.find(page.pptrPage, request => {
- // @ts-expect-error id is internal.
- return request.id === cdpRequestId;
- });
- if (!request) {
- this.logger('no network request for ' + cdpRequestId);
- return;
+ if (schema.properties) {
+ for (const key of Object.keys(schema.properties)) {
+ replaceHtmlElementsWithUids(schema.properties[key]);
}
- return this.#networkCollector.getIdForResource(request);
}
- resolveCdpElementId(
- page: McpPage,
- cdpBackendNodeId: number,
- ): string | undefined {
- if (!cdpBackendNodeId) {
- this.logger('no cdpBackendNodeId');
+ if (schema.items) {
+ if (Array.isArray(schema.items)) {
+ for (const item of schema.items) {
+ replaceHtmlElementsWithUids(item);
+ }
+ } else {
```
-This class is important because it defines how Chrome DevTools MCP Tutorial: Browser Automation and Debugging for Coding Agents implements the patterns covered in this chapter.
+This function is important because it defines how Chrome DevTools MCP Tutorial: Browser Automation and Debugging for Coding Agents implements the patterns covered in this chapter.
-### `src/McpContext.ts`
+### `src/McpResponse.ts`
-The `getNetworkMultiplierFromString` function in [`src/McpContext.ts`](https://github.com/ChromeDevTools/chrome-devtools-mcp/blob/HEAD/src/McpContext.ts) handles a key part of this chapter's functionality:
+The `getToolGroup` function in [`src/McpResponse.ts`](https://github.com/ChromeDevTools/chrome-devtools-mcp/blob/HEAD/src/McpResponse.ts) handles a key part of this chapter's functionality:
```ts
-const NAVIGATION_TIMEOUT = 10_000;
-
-function getNetworkMultiplierFromString(condition: string | null): number {
- const puppeteerCondition =
- condition as keyof typeof PredefinedNetworkConditions;
-
- switch (puppeteerCondition) {
- case 'Fast 4G':
- return 1;
- case 'Slow 4G':
- return 2.5;
- case 'Fast 3G':
- return 5;
- case 'Slow 3G':
- return 10;
- }
- return 1;
}
-export class McpContext implements Context {
- browser: Browser;
- logger: Debugger;
+async function getToolGroup(
+ page: McpPage,
+): Promise | undefined> {
+ // Check if there is a `devtoolstooldiscovery` event listener
+ const windowHandle = await page.pptrPage.evaluateHandle(() => window);
+ // @ts-expect-error internal API
+ const client = page.pptrPage._client();
+ const {listeners}: {listeners: Protocol.DOMDebugger.EventListener[]} =
+ await client.send('DOMDebugger.getEventListeners', {
+ objectId: windowHandle.remoteObject().objectId,
+ });
+ if (listeners.find(l => l.type === 'devtoolstooldiscovery') === undefined) {
+ return;
+ }
+
+ const toolGroup = await page.pptrPage.evaluate(() => {
+ return new Promise | undefined>(resolve => {
+ const event = new CustomEvent('devtoolstooldiscovery');
+ // @ts-expect-error Adding custom property
+ event.respondWith = (toolGroup: ToolGroup) => {
+ if (!window.__dtmcp) {
+ window.__dtmcp = {};
+ }
+ window.__dtmcp.toolGroup = toolGroup;
+
+ // When receiving a toolGroup for the first time, expose a simple execution helper
+ if (!window.__dtmcp.executeTool) {
+ window.__dtmcp.executeTool = async (toolName, args) => {
+ if (!window.__dtmcp?.toolGroup) {
+ throw new Error('No tools found on the page');
+```
- // Maps LLM-provided isolatedContext name → Puppeteer BrowserContext.
- #isolatedContexts = new Map();
- // Auto-generated name counter for when no name is provided.
- #nextIsolatedContextId = 1;
+This function is important because it defines how Chrome DevTools MCP Tutorial: Browser Automation and Debugging for Coding Agents implements the patterns covered in this chapter.
+
+### `src/McpResponse.ts`
+
+The `createStructuredPage` function in [`src/McpResponse.ts`](https://github.com/ChromeDevTools/chrome-devtools-mcp/blob/HEAD/src/McpResponse.ts) handles a key part of this chapter's functionality:
+
+```ts
+ `${context.getPageId(page)}: ${page.url()}${context.isPageSelected(page) ? ' [selected]' : ''}${contextLabel}`,
+ );
+ structuredPages.push(createStructuredPage(page, context));
+ }
+ response.push(...parts);
+ structuredContent.pages = structuredPages;
+ }
+
+ if (this.#includeExtensionPages) {
+ if (extensionPages.length) {
+ response.push(`## Extension Pages`);
+ const structuredExtensionPages = [];
+ for (const page of extensionPages) {
+ const isolatedContextName = context.getIsolatedContextName(page);
+ const contextLabel = isolatedContextName
+ ? ` isolatedContext=${isolatedContextName}`
+ : '';
+ response.push(
+ `${context.getPageId(page)}: ${page.url()}${context.isPageSelected(page) ? ' [selected]' : ''}${contextLabel}`,
+ );
+ structuredExtensionPages.push(createStructuredPage(page, context));
+ }
+ structuredContent.extensionPages = structuredExtensionPages;
+ }
+ }
+ }
- #pages: Page[] = [];
- #extensionServiceWorkers: ExtensionServiceWorker[] = [];
+ if (this.#includeExtensionServiceWorkers) {
+ if (context.getExtensionServiceWorkers().length) {
+ response.push(`## Extension Service Workers`);
+ }
- #mcpPages = new Map();
```
This function is important because it defines how Chrome DevTools MCP Tutorial: Browser Automation and Debugging for Coding Agents implements the patterns covered in this chapter.
@@ -217,11 +215,11 @@ This function is important because it defines how Chrome DevTools MCP Tutorial:
```mermaid
flowchart TD
- A[McpContext]
- B[to]
- C[instances]
- D[getNetworkMultiplierFromString]
- E[McpContextOptions]
+ A[McpResponse]
+ B[replaceHtmlElementsWithUids]
+ C[getToolGroup]
+ D[createStructuredPage]
+ E[TraceInsightData]
A --> B
B --> C
C --> D
diff --git a/tutorials/chrome-devtools-mcp-tutorial/02-architecture-and-design-principles.md b/tutorials/chrome-devtools-mcp-tutorial/02-architecture-and-design-principles.md
index 772b995d..b3b60212 100644
--- a/tutorials/chrome-devtools-mcp-tutorial/02-architecture-and-design-principles.md
+++ b/tutorials/chrome-devtools-mcp-tutorial/02-architecture-and-design-principles.md
@@ -38,170 +38,168 @@ You now understand how design principles translate into reliable tool interactio
Next: [Chapter 3: Client Integrations and Setup Patterns](03-client-integrations-and-setup-patterns.md)
-## Depth Expansion Playbook
-
## Source Code Walkthrough
-### `src/index.ts`
+### `src/DevtoolsUtils.ts`
-The `registerTool` function in [`src/index.ts`](https://github.com/ChromeDevTools/chrome-devtools-mcp/blob/HEAD/src/index.ts) handles a key part of this chapter's functionality:
+The `waitForScript` function in [`src/DevtoolsUtils.ts`](https://github.com/ChromeDevTools/chrome-devtools-mcp/blob/HEAD/src/DevtoolsUtils.ts) handles a key part of this chapter's functionality:
```ts
- const toolMutex = new Mutex();
-
- function registerTool(tool: ToolDefinition | DefinedPageTool): void {
- if (
- tool.annotations.category === ToolCategory.EMULATION &&
- serverArgs.categoryEmulation === false
- ) {
- return;
- }
- if (
- tool.annotations.category === ToolCategory.PERFORMANCE &&
- serverArgs.categoryPerformance === false
- ) {
- return;
- }
- if (
- tool.annotations.category === ToolCategory.NETWORK &&
- serverArgs.categoryNetwork === false
- ) {
- return;
- }
- if (
- tool.annotations.category === ToolCategory.EXTENSIONS &&
- !serverArgs.categoryExtensions
- ) {
- return;
+ await Promise.all(
+ [...scriptIds].map(id =>
+ waitForScript(model, id, signal)
+ .then(script =>
+ model.sourceMapManager().sourceMapForClientPromise(script),
+ )
+ .catch(),
+ ),
+ );
+
+ const binding = devTools.universe.context.get(
+ DevTools.DebuggerWorkspaceBinding,
+ );
+ // DevTools uses branded types for ScriptId and others. Casting the puppeteer protocol type to the DevTools protocol type is safe.
+ return binding.createStackTraceFromProtocolRuntime(
+ rawStackTrace as Parameters<
+ DevTools.DebuggerWorkspaceBinding['createStackTraceFromProtocolRuntime']
+ >[0],
+ target,
+ );
+}
+
+// Waits indefinitely for the script so pair it with Promise.race.
+async function waitForScript(
+ model: DevTools.DebuggerModel,
+ scriptId: Protocol.Runtime.ScriptId,
+ signal: AbortSignal,
+) {
+ while (true) {
+ if (signal.aborted) {
+ throw signal.reason;
}
- if (
- tool.annotations.conditions?.includes('computerVision') &&
- !serverArgs.experimentalVision
- ) {
- return;
```
This function is important because it defines how Chrome DevTools MCP Tutorial: Browser Automation and Debugging for Coding Agents implements the patterns covered in this chapter.
-### `scripts/generate-cli.ts`
+### `src/DevtoolsUtils.ts`
-The `fetchTools` function in [`scripts/generate-cli.ts`](https://github.com/ChromeDevTools/chrome-devtools-mcp/blob/HEAD/scripts/generate-cli.ts) handles a key part of this chapter's functionality:
+The `TargetUniverse` interface in [`src/DevtoolsUtils.ts`](https://github.com/ChromeDevTools/chrome-devtools-mcp/blob/HEAD/src/DevtoolsUtils.ts) handles a key part of this chapter's functionality:
```ts
-);
-
-async function fetchTools() {
- console.log('Connecting to chrome-devtools-mcp to fetch tools...');
- // Use the local build of the server
- const serverPath = path.join(
- import.meta.dirname,
- '../build/src/bin/chrome-devtools-mcp.js',
- );
-
- const transport = new StdioClientTransport({
- command: 'node',
- args: [serverPath],
- env: {...process.env, CHROME_DEVTOOLS_MCP_NO_USAGE_STATISTICS: 'true'},
- });
+});
- const client = new Client(
- {
- name: 'chrome-devtools-cli-generator',
- version: '0.1.0',
- },
- {
- capabilities: {},
- },
- );
+export interface TargetUniverse {
+ /** The DevTools target corresponding to the puppeteer Page */
+ target: DevTools.Target;
+ universe: DevTools.Foundation.Universe.Universe;
+}
+export type TargetUniverseFactoryFn = (page: Page) => Promise;
+
+export class UniverseManager {
+ readonly #browser: Browser;
+ readonly #createUniverseFor: TargetUniverseFactoryFn;
+ readonly #universes = new WeakMap();
+
+ /** Guard access to #universes so we don't create unnecessary universes */
+ readonly #mutex = new Mutex();
+
+ constructor(
+ browser: Browser,
+ factory: TargetUniverseFactoryFn = DEFAULT_FACTORY,
+ ) {
+ this.#browser = browser;
+ this.#createUniverseFor = factory;
+ }
- await client.connect(transport);
- try {
- const toolsResponse = await client.listTools();
- if (!toolsResponse.tools?.length) {
- throw new Error(`No tools were fetched`);
- }
+ async init(pages: Page[]) {
+ try {
+ await this.#mutex.acquire();
+ const promises = [];
+ for (const page of pages) {
+ promises.push(
+ this.#createUniverseFor(page).then(targetUniverse =>
```
-This function is important because it defines how Chrome DevTools MCP Tutorial: Browser Automation and Debugging for Coding Agents implements the patterns covered in this chapter.
+This interface is important because it defines how Chrome DevTools MCP Tutorial: Browser Automation and Debugging for Coding Agents implements the patterns covered in this chapter.
-### `scripts/generate-cli.ts`
+### `src/DevtoolsUtils.ts`
-The `schemaToCLIOptions` function in [`scripts/generate-cli.ts`](https://github.com/ChromeDevTools/chrome-devtools-mcp/blob/HEAD/scripts/generate-cli.ts) handles a key part of this chapter's functionality:
+The `from` interface in [`src/DevtoolsUtils.ts`](https://github.com/ChromeDevTools/chrome-devtools-mcp/blob/HEAD/src/DevtoolsUtils.ts) handles a key part of this chapter's functionality:
```ts
-}
-
-function schemaToCLIOptions(schema: JsonSchema): CliOption[] {
- if (!schema || !schema.properties) {
+ */
+
+import {PuppeteerDevToolsConnection} from './DevToolsConnectionAdapter.js';
+import {Mutex} from './Mutex.js';
+import {DevTools} from './third_party/index.js';
+import type {
+ Browser,
+ ConsoleMessage,
+ Page,
+ Protocol,
+ Target as PuppeteerTarget,
+} from './third_party/index.js';
+
+/**
+ * A mock implementation of an issues manager that only implements the methods
+ * that are actually used by the IssuesAggregator
+ */
+export class FakeIssuesManager extends DevTools.Common.ObjectWrapper
+ .ObjectWrapper {
+ issues(): DevTools.Issue[] {
return [];
}
- const required = schema.required || [];
- const properties = schema.properties;
- return Object.entries(properties).map(([name, prop]) => {
- const isRequired = required.includes(name);
- const description = prop.description || '';
- if (typeof prop.type !== 'string') {
- throw new Error(
- `Property ${name} has a complex type not supported by CLI.`,
- );
- }
- return {
- name,
- type: prop.type,
- description,
- required: isRequired,
- default: prop.default,
- enum: prop.enum,
- };
- });
}
-async function generateCli() {
- const tools = await fetchTools();
+// DevTools CDP errors can get noisy.
+DevTools.ProtocolClient.InspectorBackend.test.suppressRequestErrors = true;
- // Sort tools by name
- const sortedTools = tools
+DevTools.I18n.DevToolsLocale.DevToolsLocale.instance({
+ create: true,
+ data: {
+ navigatorLanguage: 'en-US',
+ settingLanguage: 'en-US',
```
-This function is important because it defines how Chrome DevTools MCP Tutorial: Browser Automation and Debugging for Coding Agents implements the patterns covered in this chapter.
+This interface is important because it defines how Chrome DevTools MCP Tutorial: Browser Automation and Debugging for Coding Agents implements the patterns covered in this chapter.
-### `scripts/generate-cli.ts`
+### `scripts/generate-docs.ts`
-The `generateCli` function in [`scripts/generate-cli.ts`](https://github.com/ChromeDevTools/chrome-devtools-mcp/blob/HEAD/scripts/generate-cli.ts) handles a key part of this chapter's functionality:
+The `measureServer` function in [`scripts/generate-docs.ts`](https://github.com/ChromeDevTools/chrome-devtools-mcp/blob/HEAD/scripts/generate-docs.ts) handles a key part of this chapter's functionality:
```ts
-}
+const README_PATH = './README.md';
-async function generateCli() {
- const tools = await fetchTools();
-
- // Sort tools by name
- const sortedTools = tools
- .sort((a, b) => a.name.localeCompare(b.name))
- .filter(tool => {
- // Skipping fill_form because it is not relevant in shell scripts
- // and CLI does not handle array/JSON args well.
- if (tool.name === 'fill_form') {
- return false;
- }
- // Skipping wait_for because CLI does not handle array/JSON args well
- // and shell scripts have many mechanisms for waiting.
- if (tool.name === 'wait_for') {
- return false;
- }
- return true;
- });
-
- const staticTools = createTools(parseArguments());
- const toolNameToCategory = new Map();
- for (const tool of staticTools) {
- toolNameToCategory.set(
- tool.name,
- labels[tool.annotations.category as keyof typeof labels],
- );
- }
+async function measureServer(args: string[]) {
+ // 1. Connect to your actual MCP server
+ const transport = new StdioClientTransport({
+ command: 'node',
+ args: ['./build/src/bin/chrome-devtools-mcp.js', ...args], // Point to your built MCP server
+ });
+
+ const client = new Client(
+ {name: 'measurer', version: '1.0.0'},
+ {capabilities: {}},
+ );
+ await client.connect(transport);
+
+ // 2. Fetch all tools
+ const toolsList = await client.listTools();
+
+ // 3. Serialize exactly how an LLM would see it (JSON)
+ const jsonString = JSON.stringify(toolsList.tools, null, 2);
+
+ // 4. Count tokens (using cl100k_base which is standard for GPT-4/Claude-3.5 approximation)
+ const enc = get_encoding('cl100k_base');
+ const tokenCount = enc.encode(jsonString).length;
+
+ console.log(`--- Measurement Results ---`);
+ console.log(`Total Tools: ${toolsList.tools.length}`);
+ console.log(`JSON Character Count: ${jsonString.length}`);
+ console.log(`Estimated Token Count: ~${tokenCount}`);
- const commands: Record<
+ // Clean up
+ enc.free();
```
This function is important because it defines how Chrome DevTools MCP Tutorial: Browser Automation and Debugging for Coding Agents implements the patterns covered in this chapter.
@@ -211,11 +209,11 @@ This function is important because it defines how Chrome DevTools MCP Tutorial:
```mermaid
flowchart TD
- A[registerTool]
- B[fetchTools]
- C[schemaToCLIOptions]
- D[generateCli]
- E[CliOption]
+ A[waitForScript]
+ B[TargetUniverse]
+ C[from]
+ D[measureServer]
+ E[escapeHtmlTags]
A --> B
B --> C
C --> D
diff --git a/tutorials/chrome-devtools-mcp-tutorial/03-client-integrations-and-setup-patterns.md b/tutorials/chrome-devtools-mcp-tutorial/03-client-integrations-and-setup-patterns.md
index 9e731601..be71fd12 100644
--- a/tutorials/chrome-devtools-mcp-tutorial/03-client-integrations-and-setup-patterns.md
+++ b/tutorials/chrome-devtools-mcp-tutorial/03-client-integrations-and-setup-patterns.md
@@ -38,184 +38,182 @@ You now have stable setup patterns for multi-client Chrome DevTools MCP usage.
Next: [Chapter 4: Automation Tooling: Input and Navigation](04-automation-tooling-input-and-navigation.md)
-## Depth Expansion Playbook
-
## Source Code Walkthrough
-### `scripts/eval_gemini.ts`
+### `scripts/generate-docs.ts`
-The `CapturedFunctionCall` interface in [`scripts/eval_gemini.ts`](https://github.com/ChromeDevTools/chrome-devtools-mcp/blob/HEAD/scripts/eval_gemini.ts) handles a key part of this chapter's functionality:
+The `getZodTypeInfo` function in [`scripts/generate-docs.ts`](https://github.com/ChromeDevTools/chrome-devtools-mcp/blob/HEAD/scripts/generate-docs.ts) handles a key part of this chapter's functionality:
```ts
-// Define schema for our test scenarios
-export interface CapturedFunctionCall {
- name: string;
- args: Record;
-}
-
-export interface TestScenario {
- prompt: string;
- maxTurns: number;
- expectations: (calls: CapturedFunctionCall[]) => void;
- htmlRoute?: {
- path: string;
- htmlContent: string;
- };
- /** Extra CLI flags passed to the MCP server (e.g. '--experimental-page-id-routing'). */
- serverArgs?: string[];
-}
-
-async function loadScenario(scenarioPath: string): Promise {
- const module = await import(pathToFileURL(scenarioPath).href);
- if (!module.scenario) {
- throw new Error(
- `Scenario file ${scenarioPath} does not export a 'scenario' object.`,
- );
+// Helper to convert Zod schema to JSON schema-like object for docs
+function getZodTypeInfo(schema: ZodSchema): TypeInfo {
+ let description = schema.description;
+ let def = schema._def;
+ let defaultValue: unknown;
+
+ // Unwrap optional/default/effects
+ while (
+ def.typeName === 'ZodOptional' ||
+ def.typeName === 'ZodDefault' ||
+ def.typeName === 'ZodEffects'
+ ) {
+ if (def.typeName === 'ZodDefault' && def.defaultValue) {
+ defaultValue = def.defaultValue();
+ }
+ const next = def.innerType || def.schema;
+ if (!next) {
+ break;
+ }
+ schema = next;
+ def = schema._def;
+ if (!description && schema.description) {
+ description = schema.description;
+ }
}
- return module.scenario;
-}
-async function runSingleScenario(
- scenarioPath: string,
- apiKey: string,
+ const result: TypeInfo = {type: 'unknown'};
+ if (description) {
+ result.description = description;
+ }
+ if (defaultValue !== undefined) {
```
-This interface is important because it defines how Chrome DevTools MCP Tutorial: Browser Automation and Debugging for Coding Agents implements the patterns covered in this chapter.
+This function is important because it defines how Chrome DevTools MCP Tutorial: Browser Automation and Debugging for Coding Agents implements the patterns covered in this chapter.
-### `scripts/eval_gemini.ts`
+### `scripts/generate-docs.ts`
-The `TestScenario` interface in [`scripts/eval_gemini.ts`](https://github.com/ChromeDevTools/chrome-devtools-mcp/blob/HEAD/scripts/eval_gemini.ts) handles a key part of this chapter's functionality:
+The `isRequired` function in [`scripts/generate-docs.ts`](https://github.com/ChromeDevTools/chrome-devtools-mcp/blob/HEAD/scripts/generate-docs.ts) handles a key part of this chapter's functionality:
```ts
}
-export interface TestScenario {
- prompt: string;
- maxTurns: number;
- expectations: (calls: CapturedFunctionCall[]) => void;
- htmlRoute?: {
- path: string;
- htmlContent: string;
- };
- /** Extra CLI flags passed to the MCP server (e.g. '--experimental-page-id-routing'). */
- serverArgs?: string[];
-}
-
-async function loadScenario(scenarioPath: string): Promise {
- const module = await import(pathToFileURL(scenarioPath).href);
- if (!module.scenario) {
- throw new Error(
- `Scenario file ${scenarioPath} does not export a 'scenario' object.`,
- );
+function isRequired(schema: ZodSchema): boolean {
+ let def = schema._def;
+ while (def.typeName === 'ZodEffects') {
+ if (!def.schema) {
+ break;
+ }
+ schema = def.schema;
+ def = schema._def;
}
- return module.scenario;
+ return def.typeName !== 'ZodOptional' && def.typeName !== 'ZodDefault';
}
-async function runSingleScenario(
- scenarioPath: string,
- apiKey: string,
- server: TestServer,
- modelId: string,
- debug: boolean,
- includeSkill: boolean,
-): Promise {
-```
+async function generateReference(
+ title: string,
+ outputPath: string,
+ toolsWithAnnotations: ToolWithAnnotations[],
+ categories: Record,
+ sortedCategories: string[],
+ serverArgs: string[],
+) {
+ console.log(`Found ${toolsWithAnnotations.length} tools`);
-This interface is important because it defines how Chrome DevTools MCP Tutorial: Browser Automation and Debugging for Coding Agents implements the patterns covered in this chapter.
+ // Generate markdown documentation
+ let markdown = `
-### `src/PageCollector.ts`
+# ${title} (~${(await measureServer(serverArgs)).tokenCount} cl100k_base tokens)
-The `UncaughtError` class in [`src/PageCollector.ts`](https://github.com/ChromeDevTools/chrome-devtools-mcp/blob/HEAD/src/PageCollector.ts) handles a key part of this chapter's functionality:
+`;
+ // Generate table of contents
+ for (const category of sortedCategories) {
+```
-```ts
-} from './third_party/index.js';
+This function is important because it defines how Chrome DevTools MCP Tutorial: Browser Automation and Debugging for Coding Agents implements the patterns covered in this chapter.
-export class UncaughtError {
- readonly details: Protocol.Runtime.ExceptionDetails;
- readonly targetId: string;
+### `scripts/generate-docs.ts`
- constructor(details: Protocol.Runtime.ExceptionDetails, targetId: string) {
- this.details = details;
- this.targetId = targetId;
- }
-}
+The `generateReference` function in [`scripts/generate-docs.ts`](https://github.com/ChromeDevTools/chrome-devtools-mcp/blob/HEAD/scripts/generate-docs.ts) handles a key part of this chapter's functionality:
-interface PageEvents extends PuppeteerPageEvents {
- issue: DevTools.AggregatedIssue;
- uncaughtError: UncaughtError;
+```ts
}
-export type ListenerMap = {
- [K in keyof EventMap]?: (event: EventMap[K]) => void;
-};
-
-function createIdGenerator() {
- let i = 1;
- return () => {
- if (i === Number.MAX_SAFE_INTEGER) {
- i = 0;
+async function generateReference(
+ title: string,
+ outputPath: string,
+ toolsWithAnnotations: ToolWithAnnotations[],
+ categories: Record,
+ sortedCategories: string[],
+ serverArgs: string[],
+) {
+ console.log(`Found ${toolsWithAnnotations.length} tools`);
+
+ // Generate markdown documentation
+ let markdown = `
+
+# ${title} (~${(await measureServer(serverArgs)).tokenCount} cl100k_base tokens)
+
+`;
+ // Generate table of contents
+ for (const category of sortedCategories) {
+ const categoryTools = categories[category];
+ const categoryName = labels[category];
+ const anchorName = categoryName.toLowerCase().replace(/\s+/g, '-');
+ markdown += `- **[${categoryName}](#${anchorName})** (${categoryTools.length} tools)\n`;
+
+ // Sort tools within category for TOC
+ categoryTools.sort((a: Tool, b: Tool) => a.name.localeCompare(b.name));
+ for (const tool of categoryTools) {
+ // Generate proper markdown anchor link: backticks are removed, keep underscores, lowercase
+ const anchorLink = tool.name.toLowerCase();
+ markdown += ` - [\`${tool.name}\`](#${anchorLink})\n`;
}
- return i++;
- };
-}
-
-export const stableIdSymbol = Symbol('stableIdSymbol');
```
-This class is important because it defines how Chrome DevTools MCP Tutorial: Browser Automation and Debugging for Coding Agents implements the patterns covered in this chapter.
+This function is important because it defines how Chrome DevTools MCP Tutorial: Browser Automation and Debugging for Coding Agents implements the patterns covered in this chapter.
-### `src/PageCollector.ts`
+### `scripts/generate-docs.ts`
-The `PageCollector` class in [`src/PageCollector.ts`](https://github.com/ChromeDevTools/chrome-devtools-mcp/blob/HEAD/src/PageCollector.ts) handles a key part of this chapter's functionality:
+The `getToolsAndCategories` function in [`scripts/generate-docs.ts`](https://github.com/ChromeDevTools/chrome-devtools-mcp/blob/HEAD/scripts/generate-docs.ts) handles a key part of this chapter's functionality:
```ts
-};
-
-export class PageCollector {
- #browser: Browser;
- #listenersInitializer: (
- collector: (item: T) => void,
- ) => ListenerMap;
- #listeners = new WeakMap();
- protected maxNavigationSaved = 3;
-
- /**
- * This maps a Page to a list of navigations with a sub-list
- * of all collected resources.
- * The newer navigations come first.
- */
- protected storage = new WeakMap>>>();
-
- constructor(
- browser: Browser,
- listeners: (collector: (item: T) => void) => ListenerMap,
- ) {
- this.#browser = browser;
- this.#listenersInitializer = listeners;
- }
-
- async init(pages: Page[]) {
- for (const page of pages) {
- this.addPage(page);
- }
- this.#browser.on('targetcreated', this.#onTargetCreated);
- this.#browser.on('targetdestroyed', this.#onTargetDestroyed);
+// eslint-disable-next-line @typescript-eslint/no-explicit-any
+function getToolsAndCategories(tools: any) {
+ // Convert ToolDefinitions to ToolWithAnnotations
+ const toolsWithAnnotations: ToolWithAnnotations[] = tools
+ .filter(tool => {
+ if (!tool.annotations.conditions) {
+ return true;
+ }
+
+ // Only include unconditional tools.
+ return tool.annotations.conditions.length === 0;
+ })
+ .map(tool => {
+ const properties: Record = {};
+ const required: string[] = [];
+
+ for (const [key, schema] of Object.entries(
+ tool.schema as unknown as Record,
+ )) {
+ const info = getZodTypeInfo(schema);
+ properties[key] = info;
+ if (isRequired(schema)) {
+ required.push(key);
+ }
+ }
+
+ return {
+ name: tool.name,
+ description: tool.description,
+ inputSchema: {
+ type: 'object',
```
-This class is important because it defines how Chrome DevTools MCP Tutorial: Browser Automation and Debugging for Coding Agents implements the patterns covered in this chapter.
+This function is important because it defines how Chrome DevTools MCP Tutorial: Browser Automation and Debugging for Coding Agents implements the patterns covered in this chapter.
## How These Components Connect
```mermaid
flowchart TD
- A[CapturedFunctionCall]
- B[TestScenario]
- C[UncaughtError]
- D[PageCollector]
- E[ConsoleCollector]
+ A[getZodTypeInfo]
+ B[isRequired]
+ C[generateReference]
+ D[getToolsAndCategories]
+ E[generateToolDocumentation]
A --> B
B --> C
C --> D
diff --git a/tutorials/chrome-devtools-mcp-tutorial/04-automation-tooling-input-and-navigation.md b/tutorials/chrome-devtools-mcp-tutorial/04-automation-tooling-input-and-navigation.md
index a62b5032..8e37b281 100644
--- a/tutorials/chrome-devtools-mcp-tutorial/04-automation-tooling-input-and-navigation.md
+++ b/tutorials/chrome-devtools-mcp-tutorial/04-automation-tooling-input-and-navigation.md
@@ -37,184 +37,182 @@ You now have a repeatable automation pattern for browser interactions.
Next: [Chapter 5: Performance and Debugging Workflows](05-performance-and-debugging-workflows.md)
-## Depth Expansion Playbook
-
## Source Code Walkthrough
-### `src/browser.ts`
+### `scripts/generate-docs.ts`
-The `targetFilter` function in [`src/browser.ts`](https://github.com/ChromeDevTools/chrome-devtools-mcp/blob/HEAD/src/browser.ts) handles a key part of this chapter's functionality:
+The `order` interface in [`scripts/generate-docs.ts`](https://github.com/ChromeDevTools/chrome-devtools-mcp/blob/HEAD/scripts/generate-docs.ts) handles a key part of this chapter's functionality:
```ts
- }
-
- return function targetFilter(target: Target): boolean {
- if (target.url() === 'chrome://newtab/') {
- return true;
+ });
+
+ // Sort categories using the enum order
+ const categoryOrder = Object.values(ToolCategory);
+ const sortedCategories = Object.keys(categories).sort((a, b) => {
+ const aIndex = categoryOrder.indexOf(a);
+ const bIndex = categoryOrder.indexOf(b);
+ // Put known categories first, unknown categories last
+ if (aIndex === -1 && bIndex === -1) {
+ return a.localeCompare(b);
}
- // Could be the only page opened in the browser.
- if (target.url().startsWith('chrome://inspect')) {
- return true;
+ if (aIndex === -1) {
+ return 1;
}
- for (const prefix of ignoredPrefixes) {
- if (target.url().startsWith(prefix)) {
- return false;
- }
+ if (bIndex === -1) {
+ return -1;
}
- return true;
- };
+ return aIndex - bIndex;
+ });
+ return {toolsWithAnnotations, categories, sortedCategories};
}
-export async function ensureBrowserConnected(options: {
- browserURL?: string;
- wsEndpoint?: string;
- wsHeaders?: Record;
- devtools: boolean;
- channel?: Channel;
- userDataDir?: string;
- enableExtensions?: boolean;
-}) {
- const {channel, enableExtensions} = options;
- if (browser?.connected) {
- return browser;
- }
+async function generateToolDocumentation(): Promise {
+ try {
+ console.log('Generating tool documentation from definitions...');
+
+ {
+ const {toolsWithAnnotations, categories, sortedCategories} =
+ getToolsAndCategories(createTools({slim: false} as ParsedArguments));
+ await generateReference(
+ 'Chrome DevTools MCP Tool Reference',
+ OUTPUT_PATH,
```
-This function is important because it defines how Chrome DevTools MCP Tutorial: Browser Automation and Debugging for Coding Agents implements the patterns covered in this chapter.
+This interface is important because it defines how Chrome DevTools MCP Tutorial: Browser Automation and Debugging for Coding Agents implements the patterns covered in this chapter.
-### `src/browser.ts`
+### `src/McpContext.ts`
-The `ensureBrowserConnected` function in [`src/browser.ts`](https://github.com/ChromeDevTools/chrome-devtools-mcp/blob/HEAD/src/browser.ts) handles a key part of this chapter's functionality:
+The `McpContext` class in [`src/McpContext.ts`](https://github.com/ChromeDevTools/chrome-devtools-mcp/blob/HEAD/src/McpContext.ts) handles a key part of this chapter's functionality:
```ts
+import {getNetworkMultiplierFromString} from './WaitForHelper.js';
+
+interface McpContextOptions {
+ // Whether the DevTools windows are exposed as pages for debugging of DevTools.
+ experimentalDevToolsDebugging: boolean;
+ // Whether all page-like targets are exposed as pages.
+ experimentalIncludeAllPages?: boolean;
+ // Whether CrUX data should be fetched.
+ performanceCrux: boolean;
}
-export async function ensureBrowserConnected(options: {
- browserURL?: string;
- wsEndpoint?: string;
- wsHeaders?: Record;
- devtools: boolean;
- channel?: Channel;
- userDataDir?: string;
- enableExtensions?: boolean;
-}) {
- const {channel, enableExtensions} = options;
- if (browser?.connected) {
- return browser;
- }
+const DEFAULT_TIMEOUT = 5_000;
+const NAVIGATION_TIMEOUT = 10_000;
- const connectOptions: Parameters[0] = {
- targetFilter: makeTargetFilter(enableExtensions),
- defaultViewport: null,
- handleDevToolsAsPage: true,
- };
-
- let autoConnect = false;
- if (options.wsEndpoint) {
- connectOptions.browserWSEndpoint = options.wsEndpoint;
- if (options.wsHeaders) {
- connectOptions.headers = options.wsHeaders;
- }
- } else if (options.browserURL) {
- connectOptions.browserURL = options.browserURL;
- } else if (channel || options.userDataDir) {
- const userDataDir = options.userDataDir;
-```
+export class McpContext implements Context {
+ browser: Browser;
+ logger: Debugger;
-This function is important because it defines how Chrome DevTools MCP Tutorial: Browser Automation and Debugging for Coding Agents implements the patterns covered in this chapter.
+ // Maps LLM-provided isolatedContext name → Puppeteer BrowserContext.
+ #isolatedContexts = new Map();
+ // Auto-generated name counter for when no name is provided.
+ #nextIsolatedContextId = 1;
-### `src/browser.ts`
+ #pages: Page[] = [];
+ #extensionServiceWorkers: ExtensionServiceWorker[] = [];
-The `detectDisplay` function in [`src/browser.ts`](https://github.com/ChromeDevTools/chrome-devtools-mcp/blob/HEAD/src/browser.ts) handles a key part of this chapter's functionality:
+ #mcpPages = new Map();
+ #selectedPage?: McpPage;
+ #networkCollector: NetworkCollector;
+ #consoleCollector: ConsoleCollector;
+ #devtoolsUniverseManager: UniverseManager;
+ #extensionRegistry = new ExtensionRegistry();
+```
-```ts
-}
+This class is important because it defines how Chrome DevTools MCP Tutorial: Browser Automation and Debugging for Coding Agents implements the patterns covered in this chapter.
-export function detectDisplay(): void {
- // Only detect display on Linux/UNIX.
- if (os.platform() === 'win32' || os.platform() === 'darwin') {
- return;
- }
- if (!process.env['DISPLAY']) {
- try {
- const result = execSync(
- `ps -u $(id -u) -o pid= | xargs -I{} cat /proc/{}/environ 2>/dev/null | tr '\\0' '\\n' | grep -m1 '^DISPLAY=' | cut -d= -f2`,
- );
- const display = result.toString('utf8').trim();
- process.env['DISPLAY'] = display;
- } catch {
- // no-op
- }
- }
-}
+### `src/McpContext.ts`
-export async function launch(options: McpLaunchOptions): Promise {
- const {channel, executablePath, headless, isolated} = options;
- const profileDirName =
- channel && channel !== 'stable'
- ? `chrome-profile-${channel}`
- : 'chrome-profile';
-
- let userDataDir = options.userDataDir;
- if (!isolated && !userDataDir) {
- userDataDir = path.join(
- os.homedir(),
- '.cache',
+The `to` class in [`src/McpContext.ts`](https://github.com/ChromeDevTools/chrome-devtools-mcp/blob/HEAD/src/McpContext.ts) handles a key part of this chapter's functionality:
+
+```ts
+import path from 'node:path';
+
+import type {TargetUniverse} from './DevtoolsUtils.js';
+import {UniverseManager} from './DevtoolsUtils.js';
+import {McpPage} from './McpPage.js';
+import {
+ NetworkCollector,
+ ConsoleCollector,
+ type ListenerMap,
+ type UncaughtError,
+} from './PageCollector.js';
+import type {DevTools} from './third_party/index.js';
+import type {
+ Browser,
+ BrowserContext,
+ ConsoleMessage,
+ Debugger,
+ HTTPRequest,
+ Page,
+ ScreenRecorder,
+ SerializedAXNode,
+ Viewport,
+ Target,
+} from './third_party/index.js';
+import {Locator} from './third_party/index.js';
+import {PredefinedNetworkConditions} from './third_party/index.js';
+import {listPages} from './tools/pages.js';
+import {CLOSE_PAGE_ERROR} from './tools/ToolDefinition.js';
+import type {Context, DevToolsData} from './tools/ToolDefinition.js';
+import type {TraceResult} from './trace-processing/parse.js';
+import type {
+ EmulationSettings,
```
-This function is important because it defines how Chrome DevTools MCP Tutorial: Browser Automation and Debugging for Coding Agents implements the patterns covered in this chapter.
+This class is important because it defines how Chrome DevTools MCP Tutorial: Browser Automation and Debugging for Coding Agents implements the patterns covered in this chapter.
-### `src/browser.ts`
+### `src/McpContext.ts`
-The `launch` function in [`src/browser.ts`](https://github.com/ChromeDevTools/chrome-devtools-mcp/blob/HEAD/src/browser.ts) handles a key part of this chapter's functionality:
+The `instances` class in [`src/McpContext.ts`](https://github.com/ChromeDevTools/chrome-devtools-mcp/blob/HEAD/src/McpContext.ts) handles a key part of this chapter's functionality:
```ts
-}
+ logger: Debugger,
+ opts: McpContextOptions,
+ /* Let tests use unbundled Locator class to avoid overly strict checks within puppeteer that fail when mixing bundled and unbundled class instances */
+ locatorClass: typeof Locator = Locator,
+ ) {
+ const context = new McpContext(browser, logger, opts, locatorClass);
+ await context.#init();
+ return context;
+ }
-export async function launch(options: McpLaunchOptions): Promise {
- const {channel, executablePath, headless, isolated} = options;
- const profileDirName =
- channel && channel !== 'stable'
- ? `chrome-profile-${channel}`
- : 'chrome-profile';
-
- let userDataDir = options.userDataDir;
- if (!isolated && !userDataDir) {
- userDataDir = path.join(
- os.homedir(),
- '.cache',
- options.viaCli ? 'chrome-devtools-mcp-cli' : 'chrome-devtools-mcp',
- profileDirName,
- );
- await fs.promises.mkdir(userDataDir, {
- recursive: true,
+ resolveCdpRequestId(page: McpPage, cdpRequestId: string): number | undefined {
+ if (!cdpRequestId) {
+ this.logger('no network request');
+ return;
+ }
+ const request = this.#networkCollector.find(page.pptrPage, request => {
+ // @ts-expect-error id is internal.
+ return request.id === cdpRequestId;
});
+ if (!request) {
+ this.logger('no network request for ' + cdpRequestId);
+ return;
+ }
+ return this.#networkCollector.getIdForResource(request);
}
- const args: LaunchOptions['args'] = [
- ...(options.chromeArgs ?? []),
- '--hide-crash-restore-bubble',
- ];
- const ignoreDefaultArgs: LaunchOptions['ignoreDefaultArgs'] =
- options.ignoreDefaultChromeArgs ?? false;
-
- if (headless) {
- args.push('--screen-info={3840x2160}');
- }
+ resolveCdpElementId(
+ page: McpPage,
+ cdpBackendNodeId: number,
+ ): string | undefined {
+ if (!cdpBackendNodeId) {
+ this.logger('no cdpBackendNodeId');
```
-This function is important because it defines how Chrome DevTools MCP Tutorial: Browser Automation and Debugging for Coding Agents implements the patterns covered in this chapter.
+This class is important because it defines how Chrome DevTools MCP Tutorial: Browser Automation and Debugging for Coding Agents implements the patterns covered in this chapter.
## How These Components Connect
```mermaid
flowchart TD
- A[targetFilter]
- B[ensureBrowserConnected]
- C[detectDisplay]
- D[launch]
- E[ensureBrowserLaunched]
+ A[order]
+ B[McpContext]
+ C[to]
+ D[instances]
+ E[McpContextOptions]
A --> B
B --> C
C --> D
diff --git a/tutorials/chrome-devtools-mcp-tutorial/05-performance-and-debugging-workflows.md b/tutorials/chrome-devtools-mcp-tutorial/05-performance-and-debugging-workflows.md
index 477afb14..722edb68 100644
--- a/tutorials/chrome-devtools-mcp-tutorial/05-performance-and-debugging-workflows.md
+++ b/tutorials/chrome-devtools-mcp-tutorial/05-performance-and-debugging-workflows.md
@@ -39,184 +39,182 @@ You now have an end-to-end debugging and performance analysis workflow.
Next: [Chapter 6: Troubleshooting and Reliability Hardening](06-troubleshooting-and-reliability-hardening.md)
-## Depth Expansion Playbook
-
## Source Code Walkthrough
-### `src/DevtoolsUtils.ts`
+### `src/PageCollector.ts`
-The `createStackTrace` function in [`src/DevtoolsUtils.ts`](https://github.com/ChromeDevTools/chrome-devtools-mcp/blob/HEAD/src/DevtoolsUtils.ts) handles a key part of this chapter's functionality:
+The `ConsoleCollector` class in [`src/PageCollector.ts`](https://github.com/ChromeDevTools/chrome-devtools-mcp/blob/HEAD/src/PageCollector.ts) handles a key part of this chapter's functionality:
```ts
- } else if (opts.details.stackTrace) {
- try {
- stackTrace = await createStackTrace(
- opts.devTools,
- opts.details.stackTrace,
- opts.targetId,
- );
- } catch {
- // ignore
- }
+}
+
+export class ConsoleCollector extends PageCollector<
+ ConsoleMessage | Error | DevTools.AggregatedIssue | UncaughtError
+> {
+ #subscribedPages = new WeakMap();
+
+ override addPage(page: Page): void {
+ super.addPage(page);
+ if (!this.#subscribedPages.has(page)) {
+ const subscriber = new PageEventSubscriber(page);
+ this.#subscribedPages.set(page, subscriber);
+ void subscriber.subscribe();
}
+ }
+
+ protected override cleanupPageDestroyed(page: Page): void {
+ super.cleanupPageDestroyed(page);
+ this.#subscribedPages.get(page)?.unsubscribe();
+ this.#subscribedPages.delete(page);
+ }
+}
+
+class PageEventSubscriber {
+ #issueManager = new FakeIssuesManager();
+ #issueAggregator = new DevTools.IssueAggregator(this.#issueManager);
+ #seenKeys = new Set();
+ #seenIssues = new Set();
+ #page: Page;
+ #session: CDPSession;
+ #targetId: string;
- // TODO: Turn opts.details.exception into a JSHandle and retrieve the 'cause' property.
- // If its an Error, recursively create a SymbolizedError.
- let cause: SymbolizedError | undefined;
- if (opts.resolvedCauseForTesting) {
- cause = opts.resolvedCauseForTesting;
- } else if (opts.details.exception) {
- try {
- const causeRemoteObj = await SymbolizedError.#lookupCause(
- opts.devTools,
- opts.details.exception,
- opts.targetId,
- );
- if (causeRemoteObj) {
- cause = await SymbolizedError.fromError({
- devTools: opts.devTools,
- error: causeRemoteObj,
- targetId: opts.targetId,
- });
- }
- } catch {
```
-This function is important because it defines how Chrome DevTools MCP Tutorial: Browser Automation and Debugging for Coding Agents implements the patterns covered in this chapter.
+This class is important because it defines how Chrome DevTools MCP Tutorial: Browser Automation and Debugging for Coding Agents implements the patterns covered in this chapter.
-### `src/DevtoolsUtils.ts`
+### `src/PageCollector.ts`
-The `waitForScript` function in [`src/DevtoolsUtils.ts`](https://github.com/ChromeDevTools/chrome-devtools-mcp/blob/HEAD/src/DevtoolsUtils.ts) handles a key part of this chapter's functionality:
+The `PageEventSubscriber` class in [`src/PageCollector.ts`](https://github.com/ChromeDevTools/chrome-devtools-mcp/blob/HEAD/src/PageCollector.ts) handles a key part of this chapter's functionality:
```ts
- await Promise.all(
- [...scriptIds].map(id =>
- waitForScript(model, id, signal)
- .then(script =>
- model.sourceMapManager().sourceMapForClientPromise(script),
- )
- .catch(),
- ),
- );
-
- const binding = devTools.universe.context.get(
- DevTools.DebuggerWorkspaceBinding,
- );
- // DevTools uses branded types for ScriptId and others. Casting the puppeteer protocol type to the DevTools protocol type is safe.
- return binding.createStackTraceFromProtocolRuntime(
- rawStackTrace as Parameters<
- DevTools.DebuggerWorkspaceBinding['createStackTraceFromProtocolRuntime']
- >[0],
- target,
- );
+ ConsoleMessage | Error | DevTools.AggregatedIssue | UncaughtError
+> {
+ #subscribedPages = new WeakMap();
+
+ override addPage(page: Page): void {
+ super.addPage(page);
+ if (!this.#subscribedPages.has(page)) {
+ const subscriber = new PageEventSubscriber(page);
+ this.#subscribedPages.set(page, subscriber);
+ void subscriber.subscribe();
+ }
+ }
+
+ protected override cleanupPageDestroyed(page: Page): void {
+ super.cleanupPageDestroyed(page);
+ this.#subscribedPages.get(page)?.unsubscribe();
+ this.#subscribedPages.delete(page);
+ }
}
-// Waits indefinitely for the script so pair it with Promise.race.
-async function waitForScript(
- model: DevTools.DebuggerModel,
- scriptId: Protocol.Runtime.ScriptId,
- signal: AbortSignal,
-) {
- while (true) {
- if (signal.aborted) {
- throw signal.reason;
- }
+class PageEventSubscriber {
+ #issueManager = new FakeIssuesManager();
+ #issueAggregator = new DevTools.IssueAggregator(this.#issueManager);
+ #seenKeys = new Set();
+ #seenIssues = new Set();
+ #page: Page;
+ #session: CDPSession;
+ #targetId: string;
+
+ constructor(page: Page) {
+ this.#page = page;
+ // @ts-expect-error use existing CDP client (internal Puppeteer API).
```
-This function is important because it defines how Chrome DevTools MCP Tutorial: Browser Automation and Debugging for Coding Agents implements the patterns covered in this chapter.
+This class is important because it defines how Chrome DevTools MCP Tutorial: Browser Automation and Debugging for Coding Agents implements the patterns covered in this chapter.
-### `src/DevtoolsUtils.ts`
+### `src/PageCollector.ts`
-The `TargetUniverse` interface in [`src/DevtoolsUtils.ts`](https://github.com/ChromeDevTools/chrome-devtools-mcp/blob/HEAD/src/DevtoolsUtils.ts) handles a key part of this chapter's functionality:
+The `NetworkCollector` class in [`src/PageCollector.ts`](https://github.com/ChromeDevTools/chrome-devtools-mcp/blob/HEAD/src/PageCollector.ts) handles a key part of this chapter's functionality:
```ts
-});
-
-export interface TargetUniverse {
- /** The DevTools target corresponding to the puppeteer Page */
- target: DevTools.Target;
- universe: DevTools.Foundation.Universe.Universe;
}
-export type TargetUniverseFactoryFn = (page: Page) => Promise;
-
-export class UniverseManager {
- readonly #browser: Browser;
- readonly #createUniverseFor: TargetUniverseFactoryFn;
- readonly #universes = new WeakMap();
-
- /** Guard access to #universes so we don't create unnecessary universes */
- readonly #mutex = new Mutex();
+export class NetworkCollector extends PageCollector {
constructor(
browser: Browser,
- factory: TargetUniverseFactoryFn = DEFAULT_FACTORY,
+ listeners: (
+ collector: (item: HTTPRequest) => void,
+ ) => ListenerMap = collect => {
+ return {
+ request: req => {
+ collect(req);
+ },
+ } as ListenerMap;
+ },
) {
- this.#browser = browser;
- this.#createUniverseFor = factory;
+ super(browser, listeners);
}
+ override splitAfterNavigation(page: Page) {
+ const navigations = this.storage.get(page) ?? [];
+ if (!navigations) {
+ return;
+ }
- async init(pages: Page[]) {
- try {
- await this.#mutex.acquire();
- const promises = [];
- for (const page of pages) {
- promises.push(
- this.#createUniverseFor(page).then(targetUniverse =>
+ const requests = navigations[0];
+
+ const lastRequestIdx = requests.findLastIndex(request => {
+ return request.frame() === page.mainFrame()
+ ? request.isNavigationRequest()
+ : false;
+ });
+
+ // Keep all requests since the last navigation request including that
```
-This interface is important because it defines how Chrome DevTools MCP Tutorial: Browser Automation and Debugging for Coding Agents implements the patterns covered in this chapter.
+This class is important because it defines how Chrome DevTools MCP Tutorial: Browser Automation and Debugging for Coding Agents implements the patterns covered in this chapter.
-### `src/DevtoolsUtils.ts`
+### `src/PageCollector.ts`
-The `from` interface in [`src/DevtoolsUtils.ts`](https://github.com/ChromeDevTools/chrome-devtools-mcp/blob/HEAD/src/DevtoolsUtils.ts) handles a key part of this chapter's functionality:
+The `createIdGenerator` function in [`src/PageCollector.ts`](https://github.com/ChromeDevTools/chrome-devtools-mcp/blob/HEAD/src/PageCollector.ts) handles a key part of this chapter's functionality:
```ts
- */
-
-import {PuppeteerDevToolsConnection} from './DevToolsConnectionAdapter.js';
-import {Mutex} from './Mutex.js';
-import {DevTools} from './third_party/index.js';
-import type {
- Browser,
- ConsoleMessage,
- Page,
- Protocol,
- Target as PuppeteerTarget,
-} from './third_party/index.js';
-
-/**
- * A mock implementation of an issues manager that only implements the methods
- * that are actually used by the IssuesAggregator
- */
-export class FakeIssuesManager extends DevTools.Common.ObjectWrapper
- .ObjectWrapper {
- issues(): DevTools.Issue[] {
- return [];
- }
+};
+
+function createIdGenerator() {
+ let i = 1;
+ return () => {
+ if (i === Number.MAX_SAFE_INTEGER) {
+ i = 0;
+ }
+ return i++;
+ };
}
-// DevTools CDP errors can get noisy.
-DevTools.ProtocolClient.InspectorBackend.test.suppressRequestErrors = true;
+export const stableIdSymbol = Symbol('stableIdSymbol');
+type WithSymbolId = T & {
+ [stableIdSymbol]?: number;
+};
+
+export class PageCollector {
+ #browser: Browser;
+ #listenersInitializer: (
+ collector: (item: T) => void,
+ ) => ListenerMap;
+ #listeners = new WeakMap();
+ protected maxNavigationSaved = 3;
+
+ /**
+ * This maps a Page to a list of navigations with a sub-list
+ * of all collected resources.
+ * The newer navigations come first.
+ */
+ protected storage = new WeakMap>>>();
-DevTools.I18n.DevToolsLocale.DevToolsLocale.instance({
- create: true,
- data: {
- navigatorLanguage: 'en-US',
- settingLanguage: 'en-US',
```
-This interface is important because it defines how Chrome DevTools MCP Tutorial: Browser Automation and Debugging for Coding Agents implements the patterns covered in this chapter.
+This function is important because it defines how Chrome DevTools MCP Tutorial: Browser Automation and Debugging for Coding Agents implements the patterns covered in this chapter.
## How These Components Connect
```mermaid
flowchart TD
- A[createStackTrace]
- B[waitForScript]
- C[TargetUniverse]
- D[from]
- E[measureServer]
+ A[ConsoleCollector]
+ B[PageEventSubscriber]
+ C[NetworkCollector]
+ D[createIdGenerator]
+ E[PageEvents]
A --> B
B --> C
C --> D
diff --git a/tutorials/chrome-devtools-mcp-tutorial/06-troubleshooting-and-reliability-hardening.md b/tutorials/chrome-devtools-mcp-tutorial/06-troubleshooting-and-reliability-hardening.md
index 13f0feb8..19ea4d84 100644
--- a/tutorials/chrome-devtools-mcp-tutorial/06-troubleshooting-and-reliability-hardening.md
+++ b/tutorials/chrome-devtools-mcp-tutorial/06-troubleshooting-and-reliability-hardening.md
@@ -38,184 +38,164 @@ You now have a practical reliability playbook for Chrome DevTools MCP operations
Next: [Chapter 7: Development, Evaluation, and Contribution](07-development-evaluation-and-contribution.md)
-## Depth Expansion Playbook
-
## Source Code Walkthrough
-### `scripts/generate-docs.ts`
+### `src/browser.ts`
-The `updateReadmeWithOptionsMarkdown` function in [`scripts/generate-docs.ts`](https://github.com/ChromeDevTools/chrome-devtools-mcp/blob/HEAD/scripts/generate-docs.ts) handles a key part of this chapter's functionality:
+The `ensureBrowserLaunched` function in [`src/browser.ts`](https://github.com/ChromeDevTools/chrome-devtools-mcp/blob/HEAD/src/browser.ts) handles a key part of this chapter's functionality:
```ts
}
-function updateReadmeWithOptionsMarkdown(optionsMarkdown: string): void {
- const readmeContent = fs.readFileSync(README_PATH, 'utf8');
-
- const beginMarker = '';
- const endMarker = '';
-
- const beginIndex = readmeContent.indexOf(beginMarker);
- const endIndex = readmeContent.indexOf(endMarker);
-
- if (beginIndex === -1 || endIndex === -1) {
- console.warn('Could not find auto-generated options markers in README.md');
- return;
+export async function ensureBrowserLaunched(
+ options: McpLaunchOptions,
+): Promise {
+ if (browser?.connected) {
+ return browser;
}
-
- const before = readmeContent.substring(0, beginIndex + beginMarker.length);
- const after = readmeContent.substring(endIndex);
-
- const updatedContent = before + '\n\n' + optionsMarkdown + '\n\n' + after;
-
- fs.writeFileSync(README_PATH, updatedContent);
- console.log('Updated README.md with options markdown');
+ browser = await launch(options);
+ return browser;
}
-// Helper to convert Zod schema to JSON schema-like object for docs
-function getZodTypeInfo(schema: ZodSchema): TypeInfo {
- let description = schema.description;
- let def = schema._def;
- let defaultValue: unknown;
+export type Channel = 'stable' | 'canary' | 'beta' | 'dev';
- // Unwrap optional/default/effects
```
This function is important because it defines how Chrome DevTools MCP Tutorial: Browser Automation and Debugging for Coding Agents implements the patterns covered in this chapter.
-### `scripts/generate-docs.ts`
+### `src/browser.ts`
-The `getZodTypeInfo` function in [`scripts/generate-docs.ts`](https://github.com/ChromeDevTools/chrome-devtools-mcp/blob/HEAD/scripts/generate-docs.ts) handles a key part of this chapter's functionality:
+The `McpLaunchOptions` interface in [`src/browser.ts`](https://github.com/ChromeDevTools/chrome-devtools-mcp/blob/HEAD/src/browser.ts) handles a key part of this chapter's functionality:
```ts
+}
-// Helper to convert Zod schema to JSON schema-like object for docs
-function getZodTypeInfo(schema: ZodSchema): TypeInfo {
- let description = schema.description;
- let def = schema._def;
- let defaultValue: unknown;
-
- // Unwrap optional/default/effects
- while (
- def.typeName === 'ZodOptional' ||
- def.typeName === 'ZodDefault' ||
- def.typeName === 'ZodEffects'
- ) {
- if (def.typeName === 'ZodDefault' && def.defaultValue) {
- defaultValue = def.defaultValue();
- }
- const next = def.innerType || def.schema;
- if (!next) {
- break;
- }
- schema = next;
- def = schema._def;
- if (!description && schema.description) {
- description = schema.description;
- }
- }
+interface McpLaunchOptions {
+ acceptInsecureCerts?: boolean;
+ executablePath?: string;
+ channel?: Channel;
+ userDataDir?: string;
+ headless: boolean;
+ isolated: boolean;
+ logFile?: fs.WriteStream;
+ viewport?: {
+ width: number;
+ height: number;
+ };
+ chromeArgs?: string[];
+ ignoreDefaultChromeArgs?: string[];
+ devtools: boolean;
+ enableExtensions?: boolean;
+ viaCli?: boolean;
+}
- const result: TypeInfo = {type: 'unknown'};
- if (description) {
- result.description = description;
+export function detectDisplay(): void {
+ // Only detect display on Linux/UNIX.
+ if (os.platform() === 'win32' || os.platform() === 'darwin') {
+ return;
}
- if (defaultValue !== undefined) {
+ if (!process.env['DISPLAY']) {
+ try {
+ const result = execSync(
+ `ps -u $(id -u) -o pid= | xargs -I{} cat /proc/{}/environ 2>/dev/null | tr '\\0' '\\n' | grep -m1 '^DISPLAY=' | cut -d= -f2`,
+ );
+ const display = result.toString('utf8').trim();
```
-This function is important because it defines how Chrome DevTools MCP Tutorial: Browser Automation and Debugging for Coding Agents implements the patterns covered in this chapter.
+This interface is important because it defines how Chrome DevTools MCP Tutorial: Browser Automation and Debugging for Coding Agents implements the patterns covered in this chapter.
-### `scripts/generate-docs.ts`
+### `src/McpPage.ts`
-The `isRequired` function in [`scripts/generate-docs.ts`](https://github.com/ChromeDevTools/chrome-devtools-mcp/blob/HEAD/scripts/generate-docs.ts) handles a key part of this chapter's functionality:
+The `consumed` class in [`src/McpPage.ts`](https://github.com/ChromeDevTools/chrome-devtools-mcp/blob/HEAD/src/McpPage.ts) handles a key part of this chapter's functionality:
```ts
-}
-
-function isRequired(schema: ZodSchema): boolean {
- let def = schema._def;
- while (def.typeName === 'ZodEffects') {
- if (!def.schema) {
- break;
- }
- schema = def.schema;
- def = schema._def;
- }
- return def.typeName !== 'ZodOptional' && def.typeName !== 'ZodDefault';
-}
-
-async function generateReference(
- title: string,
- outputPath: string,
- toolsWithAnnotations: ToolWithAnnotations[],
- categories: Record,
- sortedCategories: string[],
- serverArgs: string[],
-) {
- console.log(`Found ${toolsWithAnnotations.length} tools`);
-
- // Generate markdown documentation
- let markdown = `
-
-# ${title} (~${(await measureServer(serverArgs)).tokenCount} cl100k_base tokens)
-
-`;
- // Generate table of contents
- for (const category of sortedCategories) {
+ * and metadata that were previously scattered across Maps in McpContext.
+ *
+ * Internal class consumed only by McpContext. Fields are public for direct
+ * read/write access. The dialog field is private because it requires an
+ * event listener lifecycle managed by the constructor/dispose pair.
+ */
+export class McpPage implements ContextPage {
+ readonly pptrPage: Page;
+ readonly id: number;
+
+ // Snapshot
+ textSnapshot: TextSnapshot | null = null;
+ uniqueBackendNodeIdToMcpId = new Map();
+
+ // Emulation
+ emulationSettings: EmulationSettings = {};
+
+ // Metadata
+ isolatedContextName?: string;
+ devToolsPage?: Page;
+
+ // Dialog
+ #dialog?: Dialog;
+ #dialogHandler: (dialog: Dialog) => void;
+
+ inPageTools: ToolGroup | undefined;
+
+ constructor(page: Page, id: number) {
+ this.pptrPage = page;
+ this.id = id;
+ this.#dialogHandler = (dialog: Dialog): void => {
+ this.#dialog = dialog;
```
-This function is important because it defines how Chrome DevTools MCP Tutorial: Browser Automation and Debugging for Coding Agents implements the patterns covered in this chapter.
+This class is important because it defines how Chrome DevTools MCP Tutorial: Browser Automation and Debugging for Coding Agents implements the patterns covered in this chapter.
-### `scripts/generate-docs.ts`
+### `src/McpPage.ts`
-The `generateReference` function in [`scripts/generate-docs.ts`](https://github.com/ChromeDevTools/chrome-devtools-mcp/blob/HEAD/scripts/generate-docs.ts) handles a key part of this chapter's functionality:
+The `McpPage` class in [`src/McpPage.ts`](https://github.com/ChromeDevTools/chrome-devtools-mcp/blob/HEAD/src/McpPage.ts) handles a key part of this chapter's functionality:
```ts
-}
+ * event listener lifecycle managed by the constructor/dispose pair.
+ */
+export class McpPage implements ContextPage {
+ readonly pptrPage: Page;
+ readonly id: number;
+
+ // Snapshot
+ textSnapshot: TextSnapshot | null = null;
+ uniqueBackendNodeIdToMcpId = new Map();
+
+ // Emulation
+ emulationSettings: EmulationSettings = {};
+
+ // Metadata
+ isolatedContextName?: string;
+ devToolsPage?: Page;
+
+ // Dialog
+ #dialog?: Dialog;
+ #dialogHandler: (dialog: Dialog) => void;
+
+ inPageTools: ToolGroup | undefined;
+
+ constructor(page: Page, id: number) {
+ this.pptrPage = page;
+ this.id = id;
+ this.#dialogHandler = (dialog: Dialog): void => {
+ this.#dialog = dialog;
+ };
+ page.on('dialog', this.#dialogHandler);
+ }
-async function generateReference(
- title: string,
- outputPath: string,
- toolsWithAnnotations: ToolWithAnnotations[],
- categories: Record,
- sortedCategories: string[],
- serverArgs: string[],
-) {
- console.log(`Found ${toolsWithAnnotations.length} tools`);
-
- // Generate markdown documentation
- let markdown = `
-
-# ${title} (~${(await measureServer(serverArgs)).tokenCount} cl100k_base tokens)
-
-`;
- // Generate table of contents
- for (const category of sortedCategories) {
- const categoryTools = categories[category];
- const categoryName = labels[category];
- const anchorName = categoryName.toLowerCase().replace(/\s+/g, '-');
- markdown += `- **[${categoryName}](#${anchorName})** (${categoryTools.length} tools)\n`;
-
- // Sort tools within category for TOC
- categoryTools.sort((a: Tool, b: Tool) => a.name.localeCompare(b.name));
- for (const tool of categoryTools) {
- // Generate proper markdown anchor link: backticks are removed, keep underscores, lowercase
- const anchorLink = tool.name.toLowerCase();
- markdown += ` - [\`${tool.name}\`](#${anchorLink})\n`;
- }
```
-This function is important because it defines how Chrome DevTools MCP Tutorial: Browser Automation and Debugging for Coding Agents implements the patterns covered in this chapter.
+This class is important because it defines how Chrome DevTools MCP Tutorial: Browser Automation and Debugging for Coding Agents implements the patterns covered in this chapter.
## How These Components Connect
```mermaid
flowchart TD
- A[updateReadmeWithOptionsMarkdown]
- B[getZodTypeInfo]
- C[isRequired]
- D[generateReference]
- E[getToolsAndCategories]
+ A[ensureBrowserLaunched]
+ B[McpLaunchOptions]
+ C[consumed]
+ D[McpPage]
+ E[loadScenario]
A --> B
B --> C
C --> D
diff --git a/tutorials/chrome-devtools-mcp-tutorial/07-development-evaluation-and-contribution.md b/tutorials/chrome-devtools-mcp-tutorial/07-development-evaluation-and-contribution.md
index 59e41929..894d5af9 100644
--- a/tutorials/chrome-devtools-mcp-tutorial/07-development-evaluation-and-contribution.md
+++ b/tutorials/chrome-devtools-mcp-tutorial/07-development-evaluation-and-contribution.md
@@ -39,184 +39,182 @@ You now have a clean contributor path for this MCP server ecosystem.
Next: [Chapter 8: Production Operations and Privacy Governance](08-production-operations-and-privacy-governance.md)
-## Depth Expansion Playbook
-
## Source Code Walkthrough
-### `scripts/generate-docs.ts`
+### `scripts/generate-cli.ts`
-The `TypeInfo` interface in [`scripts/generate-docs.ts`](https://github.com/ChromeDevTools/chrome-devtools-mcp/blob/HEAD/scripts/generate-docs.ts) handles a key part of this chapter's functionality:
+The `schemaToCLIOptions` function in [`scripts/generate-cli.ts`](https://github.com/ChromeDevTools/chrome-devtools-mcp/blob/HEAD/scripts/generate-cli.ts) handles a key part of this chapter's functionality:
```ts
}
-interface TypeInfo {
- type: string;
- enum?: string[];
- items?: TypeInfo;
- description?: string;
- default?: unknown;
+function schemaToCLIOptions(schema: JsonSchema): CliOption[] {
+ if (!schema || !schema.properties) {
+ return [];
+ }
+ const required = schema.required || [];
+ const properties = schema.properties;
+ return Object.entries(properties).map(([name, prop]) => {
+ const isRequired = required.includes(name);
+ const description = prop.description || '';
+ if (typeof prop.type !== 'string') {
+ throw new Error(
+ `Property ${name} has a complex type not supported by CLI.`,
+ );
+ }
+ return {
+ name,
+ type: prop.type,
+ description,
+ required: isRequired,
+ default: prop.default,
+ enum: prop.enum,
+ };
+ });
}
-function escapeHtmlTags(text: string): string {
- return text
- .replace(/&(?![a-zA-Z]+;)/g, '&')
- .replace(/<([a-zA-Z][^>]*)>/g, '<$1>');
-}
+async function generateCli() {
+ const tools = await fetchTools();
-function addCrossLinks(text: string, tools: ToolWithAnnotations[]): string {
- let result = text;
+ // Sort tools by name
+ const sortedTools = tools
+```
- // Create a set of all tool names for efficient lookup
- const toolNames = new Set(tools.map(tool => tool.name));
+This function is important because it defines how Chrome DevTools MCP Tutorial: Browser Automation and Debugging for Coding Agents implements the patterns covered in this chapter.
- // Sort tool names by length (descending) to match longer names first
- const sortedToolNames = Array.from(toolNames).sort(
- (a, b) => b.length - a.length,
- );
+### `scripts/generate-cli.ts`
- for (const toolName of sortedToolNames) {
- // Create regex to match tool name (case insensitive, word boundaries)
- const regex = new RegExp(`\\b${toolName}\\b`, 'gi');
+The `generateCli` function in [`scripts/generate-cli.ts`](https://github.com/ChromeDevTools/chrome-devtools-mcp/blob/HEAD/scripts/generate-cli.ts) handles a key part of this chapter's functionality:
- result = result.replace(regex, match => {
+```ts
+}
+
+async function generateCli() {
+ const tools = await fetchTools();
+
+ // Sort tools by name
+ const sortedTools = tools
+ .sort((a, b) => a.name.localeCompare(b.name))
+ .filter(tool => {
+ // Skipping fill_form because it is not relevant in shell scripts
+ // and CLI does not handle array/JSON args well.
+ if (tool.name === 'fill_form') {
+ return false;
+ }
+ // Skipping wait_for because CLI does not handle array/JSON args well
+ // and shell scripts have many mechanisms for waiting.
+ if (tool.name === 'wait_for') {
+ return false;
+ }
+ return true;
+ });
+
+ const staticTools = createTools(parseArguments());
+ const toolNameToCategory = new Map();
+ for (const tool of staticTools) {
+ toolNameToCategory.set(
+ tool.name,
+ labels[tool.annotations.category as keyof typeof labels],
+ );
+ }
+
+ const commands: Record<
```
-This interface is important because it defines how Chrome DevTools MCP Tutorial: Browser Automation and Debugging for Coding Agents implements the patterns covered in this chapter.
+This function is important because it defines how Chrome DevTools MCP Tutorial: Browser Automation and Debugging for Coding Agents implements the patterns covered in this chapter.
-### `scripts/generate-docs.ts`
+### `scripts/generate-cli.ts`
-The `order` interface in [`scripts/generate-docs.ts`](https://github.com/ChromeDevTools/chrome-devtools-mcp/blob/HEAD/scripts/generate-docs.ts) handles a key part of this chapter's functionality:
+The `CliOption` interface in [`scripts/generate-cli.ts`](https://github.com/ChromeDevTools/chrome-devtools-mcp/blob/HEAD/scripts/generate-cli.ts) handles a key part of this chapter's functionality:
```ts
- });
+}
- // Sort categories using the enum order
- const categoryOrder = Object.values(ToolCategory);
- const sortedCategories = Object.keys(categories).sort((a, b) => {
- const aIndex = categoryOrder.indexOf(a);
- const bIndex = categoryOrder.indexOf(b);
- // Put known categories first, unknown categories last
- if (aIndex === -1 && bIndex === -1) {
- return a.localeCompare(b);
- }
- if (aIndex === -1) {
- return 1;
- }
- if (bIndex === -1) {
- return -1;
- }
- return aIndex - bIndex;
- });
- return {toolsWithAnnotations, categories, sortedCategories};
+interface CliOption {
+ name: string;
+ type: string;
+ description: string;
+ required: boolean;
+ default?: unknown;
+ enum?: unknown[];
}
-async function generateToolDocumentation(): Promise {
- try {
- console.log('Generating tool documentation from definitions...');
+interface JsonSchema {
+ type?: string | string[];
+ description?: string;
+ properties?: Record;
+ required?: string[];
+ default?: unknown;
+ enum?: unknown[];
+}
- {
- const {toolsWithAnnotations, categories, sortedCategories} =
- getToolsAndCategories(createTools({slim: false} as ParsedArguments));
- await generateReference(
- 'Chrome DevTools MCP Tool Reference',
- OUTPUT_PATH,
+function schemaToCLIOptions(schema: JsonSchema): CliOption[] {
+ if (!schema || !schema.properties) {
+ return [];
+ }
+ const required = schema.required || [];
+ const properties = schema.properties;
+ return Object.entries(properties).map(([name, prop]) => {
+ const isRequired = required.includes(name);
+ const description = prop.description || '';
+ if (typeof prop.type !== 'string') {
+ throw new Error(
+ `Property ${name} has a complex type not supported by CLI.`,
```
This interface is important because it defines how Chrome DevTools MCP Tutorial: Browser Automation and Debugging for Coding Agents implements the patterns covered in this chapter.
-### `src/WaitForHelper.ts`
+### `scripts/generate-cli.ts`
-The `WaitForHelper` class in [`src/WaitForHelper.ts`](https://github.com/ChromeDevTools/chrome-devtools-mcp/blob/HEAD/src/WaitForHelper.ts) handles a key part of this chapter's functionality:
+The `JsonSchema` interface in [`scripts/generate-cli.ts`](https://github.com/ChromeDevTools/chrome-devtools-mcp/blob/HEAD/scripts/generate-cli.ts) handles a key part of this chapter's functionality:
```ts
-import type {Page, Protocol, CdpPage} from './third_party/index.js';
-
-export class WaitForHelper {
- #abortController = new AbortController();
- #page: CdpPage;
- #stableDomTimeout: number;
- #stableDomFor: number;
- #expectNavigationIn: number;
- #navigationTimeout: number;
-
- constructor(
- page: Page,
- cpuTimeoutMultiplier: number,
- networkTimeoutMultiplier: number,
- ) {
- this.#stableDomTimeout = 3000 * cpuTimeoutMultiplier;
- this.#stableDomFor = 100 * cpuTimeoutMultiplier;
- this.#expectNavigationIn = 100 * cpuTimeoutMultiplier;
- this.#navigationTimeout = 3000 * networkTimeoutMultiplier;
- this.#page = page as unknown as CdpPage;
- }
-
- /**
- * A wrapper that executes a action and waits for
- * a potential navigation, after which it waits
- * for the DOM to be stable before returning.
- */
- async waitForStableDom(): Promise {
- const stableDomObserver = await this.#page.evaluateHandle(timeout => {
- let timeoutId: ReturnType;
- function callback() {
- clearTimeout(timeoutId);
-```
-
-This class is important because it defines how Chrome DevTools MCP Tutorial: Browser Automation and Debugging for Coding Agents implements the patterns covered in this chapter.
-
-### `src/WaitForHelper.ts`
+}
-The `callback` function in [`src/WaitForHelper.ts`](https://github.com/ChromeDevTools/chrome-devtools-mcp/blob/HEAD/src/WaitForHelper.ts) handles a key part of this chapter's functionality:
+interface JsonSchema {
+ type?: string | string[];
+ description?: string;
+ properties?: Record;
+ required?: string[];
+ default?: unknown;
+ enum?: unknown[];
+}
-```ts
- const stableDomObserver = await this.#page.evaluateHandle(timeout => {
- let timeoutId: ReturnType;
- function callback() {
- clearTimeout(timeoutId);
- timeoutId = setTimeout(() => {
- domObserver.resolver.resolve();
- domObserver.observer.disconnect();
- }, timeout);
- }
- const domObserver = {
- resolver: Promise.withResolvers(),
- observer: new MutationObserver(callback),
- };
- // It's possible that the DOM is not gonna change so we
- // need to start the timeout initially.
- callback();
-
- domObserver.observer.observe(document.body, {
- childList: true,
- subtree: true,
- attributes: true,
- });
-
- return domObserver;
- }, this.#stableDomFor);
-
- this.#abortController.signal.addEventListener('abort', async () => {
- try {
- await stableDomObserver.evaluate(observer => {
- observer.observer.disconnect();
- observer.resolver.resolve();
- });
+function schemaToCLIOptions(schema: JsonSchema): CliOption[] {
+ if (!schema || !schema.properties) {
+ return [];
+ }
+ const required = schema.required || [];
+ const properties = schema.properties;
+ return Object.entries(properties).map(([name, prop]) => {
+ const isRequired = required.includes(name);
+ const description = prop.description || '';
+ if (typeof prop.type !== 'string') {
+ throw new Error(
+ `Property ${name} has a complex type not supported by CLI.`,
+ );
+ }
+ return {
+ name,
+ type: prop.type,
+ description,
+ required: isRequired,
+ default: prop.default,
+ enum: prop.enum,
```
-This function is important because it defines how Chrome DevTools MCP Tutorial: Browser Automation and Debugging for Coding Agents implements the patterns covered in this chapter.
+This interface is important because it defines how Chrome DevTools MCP Tutorial: Browser Automation and Debugging for Coding Agents implements the patterns covered in this chapter.
## How These Components Connect
```mermaid
flowchart TD
- A[TypeInfo]
- B[order]
- C[WaitForHelper]
- D[callback]
- E[loadIssueDescriptions]
+ A[schemaToCLIOptions]
+ B[generateCli]
+ C[CliOption]
+ D[JsonSchema]
+ E[ArgDef]
A --> B
B --> C
C --> D
diff --git a/tutorials/chrome-devtools-mcp-tutorial/08-production-operations-and-privacy-governance.md b/tutorials/chrome-devtools-mcp-tutorial/08-production-operations-and-privacy-governance.md
index 738dd75f..8f4aeb15 100644
--- a/tutorials/chrome-devtools-mcp-tutorial/08-production-operations-and-privacy-governance.md
+++ b/tutorials/chrome-devtools-mcp-tutorial/08-production-operations-and-privacy-governance.md
@@ -39,15 +39,18 @@ You now have a full Chrome DevTools MCP learning path from setup to governed pro
Next tutorial: [Codex CLI Tutorial](../codex-cli-tutorial/)
-## Depth Expansion Playbook
-
## Source Code Walkthrough
### `scripts/prepare.ts`
-The `removeConflictingGlobalDeclaration` function in [`scripts/prepare.ts`](https://github.com/ChromeDevTools/chrome-devtools-mcp/blob/HEAD/scripts/prepare.ts) handles a key part of this chapter's functionality:
+The `HTMLElementEventMap` interface in [`scripts/prepare.ts`](https://github.com/ChromeDevTools/chrome-devtools-mcp/blob/HEAD/scripts/prepare.ts) handles a key part of this chapter's functionality:
```ts
+
+/**
+ * Removes the conflicting global HTMLElementEventMap declaration from
+ * @paulirish/trace_engine/models/trace/ModelImpl.d.ts to avoid TS2717 error
+ * when both chrome-devtools-frontend and @paulirish/trace_engine declare
* the same property.
*/
function removeConflictingGlobalDeclaration(): void {
@@ -75,137 +78,132 @@ async function main() {
const fullPath = resolve(projectRoot, file);
console.log(`Removing: ${file}`);
try {
- await rm(fullPath, {recursive: true, force: true});
- } catch (error) {
- console.error(`Failed to remove ${file}:`, error);
- process.exit(1);
- }
```
-This function is important because it defines how Chrome DevTools MCP Tutorial: Browser Automation and Debugging for Coding Agents implements the patterns covered in this chapter.
+This interface is important because it defines how Chrome DevTools MCP Tutorial: Browser Automation and Debugging for Coding Agents implements the patterns covered in this chapter.
-### `scripts/prepare.ts`
+### `src/WaitForHelper.ts`
-The `main` function in [`scripts/prepare.ts`](https://github.com/ChromeDevTools/chrome-devtools-mcp/blob/HEAD/scripts/prepare.ts) handles a key part of this chapter's functionality:
+The `WaitForHelper` class in [`src/WaitForHelper.ts`](https://github.com/ChromeDevTools/chrome-devtools-mcp/blob/HEAD/src/WaitForHelper.ts) handles a key part of this chapter's functionality:
```ts
-}
-
-async function main() {
- console.log('Running prepare script to clean up chrome-devtools-frontend...');
- for (const file of filesToRemove) {
- const fullPath = resolve(projectRoot, file);
- console.log(`Removing: ${file}`);
- try {
- await rm(fullPath, {recursive: true, force: true});
- } catch (error) {
- console.error(`Failed to remove ${file}:`, error);
- process.exit(1);
- }
+import type {PredefinedNetworkConditions} from './third_party/index.js';
+
+export class WaitForHelper {
+ #abortController = new AbortController();
+ #page: CdpPage;
+ #stableDomTimeout: number;
+ #stableDomFor: number;
+ #expectNavigationIn: number;
+ #navigationTimeout: number;
+
+ constructor(
+ page: Page,
+ cpuTimeoutMultiplier: number,
+ networkTimeoutMultiplier: number,
+ ) {
+ this.#stableDomTimeout = 3000 * cpuTimeoutMultiplier;
+ this.#stableDomFor = 100 * cpuTimeoutMultiplier;
+ this.#expectNavigationIn = 100 * cpuTimeoutMultiplier;
+ this.#navigationTimeout = 3000 * networkTimeoutMultiplier;
+ this.#page = page as unknown as CdpPage;
}
- console.log('Clean up of chrome-devtools-frontend complete.');
-
- removeConflictingGlobalDeclaration();
-}
-
-void main();
+ /**
+ * A wrapper that executes a action and waits for
+ * a potential navigation, after which it waits
+ * for the DOM to be stable before returning.
+ */
+ async waitForStableDom(): Promise {
+ const stableDomObserver = await this.#page.evaluateHandle(timeout => {
+ let timeoutId: ReturnType;
+ function callback() {
+ clearTimeout(timeoutId);
```
-This function is important because it defines how Chrome DevTools MCP Tutorial: Browser Automation and Debugging for Coding Agents implements the patterns covered in this chapter.
+This class is important because it defines how Chrome DevTools MCP Tutorial: Browser Automation and Debugging for Coding Agents implements the patterns covered in this chapter.
-### `scripts/prepare.ts`
+### `src/WaitForHelper.ts`
-The `HTMLElementEventMap` interface in [`scripts/prepare.ts`](https://github.com/ChromeDevTools/chrome-devtools-mcp/blob/HEAD/scripts/prepare.ts) handles a key part of this chapter's functionality:
+The `callback` function in [`src/WaitForHelper.ts`](https://github.com/ChromeDevTools/chrome-devtools-mcp/blob/HEAD/src/WaitForHelper.ts) handles a key part of this chapter's functionality:
```ts
-
-/**
- * Removes the conflicting global HTMLElementEventMap declaration from
- * @paulirish/trace_engine/models/trace/ModelImpl.d.ts to avoid TS2717 error
- * when both chrome-devtools-frontend and @paulirish/trace_engine declare
- * the same property.
- */
-function removeConflictingGlobalDeclaration(): void {
- const filePath = resolve(
- projectRoot,
- 'node_modules/@paulirish/trace_engine/models/trace/ModelImpl.d.ts',
- );
- console.log(
- 'Removing conflicting global declaration from @paulirish/trace_engine...',
- );
- const content = readFileSync(filePath, 'utf-8');
- // Remove the declare global block using regex
- // Matches: declare global { ... interface HTMLElementEventMap { ... } ... }
- const newContent = content.replace(
- /declare global\s*\{\s*interface HTMLElementEventMap\s*\{[^}]*\[ModelUpdateEvent\.eventName\]:\s*ModelUpdateEvent;\s*\}\s*\}/s,
- '',
- );
- writeFileSync(filePath, newContent, 'utf-8');
- console.log('Successfully removed conflicting global declaration.');
-}
-
-async function main() {
- console.log('Running prepare script to clean up chrome-devtools-frontend...');
- for (const file of filesToRemove) {
- const fullPath = resolve(projectRoot, file);
- console.log(`Removing: ${file}`);
- try {
+ const stableDomObserver = await this.#page.evaluateHandle(timeout => {
+ let timeoutId: ReturnType;
+ function callback() {
+ clearTimeout(timeoutId);
+ timeoutId = setTimeout(() => {
+ domObserver.resolver.resolve();
+ domObserver.observer.disconnect();
+ }, timeout);
+ }
+ const domObserver = {
+ resolver: Promise.withResolvers(),
+ observer: new MutationObserver(callback),
+ };
+ // It's possible that the DOM is not gonna change so we
+ // need to start the timeout initially.
+ callback();
+
+ domObserver.observer.observe(document.body, {
+ childList: true,
+ subtree: true,
+ attributes: true,
+ });
+
+ return domObserver;
+ }, this.#stableDomFor);
+
+ this.#abortController.signal.addEventListener('abort', async () => {
+ try {
+ await stableDomObserver.evaluate(observer => {
+ observer.observer.disconnect();
+ observer.resolver.resolve();
+ });
```
-This interface is important because it defines how Chrome DevTools MCP Tutorial: Browser Automation and Debugging for Coding Agents implements the patterns covered in this chapter.
+This function is important because it defines how Chrome DevTools MCP Tutorial: Browser Automation and Debugging for Coding Agents implements the patterns covered in this chapter.
-### `scripts/prepare.ts`
+### `src/WaitForHelper.ts`
-The `HTMLElementEventMap` interface in [`scripts/prepare.ts`](https://github.com/ChromeDevTools/chrome-devtools-mcp/blob/HEAD/scripts/prepare.ts) handles a key part of this chapter's functionality:
+The `getNetworkMultiplierFromString` function in [`src/WaitForHelper.ts`](https://github.com/ChromeDevTools/chrome-devtools-mcp/blob/HEAD/src/WaitForHelper.ts) handles a key part of this chapter's functionality:
```ts
+}
-/**
- * Removes the conflicting global HTMLElementEventMap declaration from
- * @paulirish/trace_engine/models/trace/ModelImpl.d.ts to avoid TS2717 error
- * when both chrome-devtools-frontend and @paulirish/trace_engine declare
- * the same property.
- */
-function removeConflictingGlobalDeclaration(): void {
- const filePath = resolve(
- projectRoot,
- 'node_modules/@paulirish/trace_engine/models/trace/ModelImpl.d.ts',
- );
- console.log(
- 'Removing conflicting global declaration from @paulirish/trace_engine...',
- );
- const content = readFileSync(filePath, 'utf-8');
- // Remove the declare global block using regex
- // Matches: declare global { ... interface HTMLElementEventMap { ... } ... }
- const newContent = content.replace(
- /declare global\s*\{\s*interface HTMLElementEventMap\s*\{[^}]*\[ModelUpdateEvent\.eventName\]:\s*ModelUpdateEvent;\s*\}\s*\}/s,
- '',
- );
- writeFileSync(filePath, newContent, 'utf-8');
- console.log('Successfully removed conflicting global declaration.');
+export function getNetworkMultiplierFromString(
+ condition: string | null,
+): number {
+ const puppeteerCondition =
+ condition as keyof typeof PredefinedNetworkConditions;
+
+ switch (puppeteerCondition) {
+ case 'Fast 4G':
+ return 1;
+ case 'Slow 4G':
+ return 2.5;
+ case 'Fast 3G':
+ return 5;
+ case 'Slow 3G':
+ return 10;
+ }
+ return 1;
}
-async function main() {
- console.log('Running prepare script to clean up chrome-devtools-frontend...');
- for (const file of filesToRemove) {
- const fullPath = resolve(projectRoot, file);
- console.log(`Removing: ${file}`);
- try {
```
-This interface is important because it defines how Chrome DevTools MCP Tutorial: Browser Automation and Debugging for Coding Agents implements the patterns covered in this chapter.
+This function is important because it defines how Chrome DevTools MCP Tutorial: Browser Automation and Debugging for Coding Agents implements the patterns covered in this chapter.
## How These Components Connect
```mermaid
flowchart TD
- A[removeConflictingGlobalDeclaration]
- B[main]
- C[HTMLElementEventMap]
- D[HTMLElementEventMap]
- E[saveLogsToFile]
+ A[HTMLElementEventMap]
+ B[WaitForHelper]
+ C[callback]
+ D[getNetworkMultiplierFromString]
+ E[HaveUniqueNames]
A --> B
B --> C
C --> D
diff --git a/tutorials/cipher-tutorial/01-getting-started.md b/tutorials/cipher-tutorial/01-getting-started.md
index 2b75ddd1..24876bb7 100644
--- a/tutorials/cipher-tutorial/01-getting-started.md
+++ b/tutorials/cipher-tutorial/01-getting-started.md
@@ -41,184 +41,161 @@ You now have Cipher running with a baseline local session.
Next: [Chapter 2: Core Modes and Session Workflow](02-core-modes-and-session-workflow.md)
-## Depth Expansion Playbook
-
## Source Code Walkthrough
-### `src/app/index.ts`
+### `bin/kill-daemon.js`
+
+The `sleep` function in [`bin/kill-daemon.js`](https://github.com/campfirein/cipher/blob/HEAD/bin/kill-daemon.js) handles a key part of this chapter's functionality:
-The `resolveEnvPath` function in [`src/app/index.ts`](https://github.com/campfirein/cipher/blob/HEAD/src/app/index.ts) handles a key part of this chapter's functionality:
+```js
+} from '@campfirein/brv-transport-client'
-```ts
+function sleep(ms) {
+ return new Promise((resolve) => {
+ setTimeout(resolve, ms)
+ })
+}
-// Helper function to resolve .env file path
-function resolveEnvPath(): string {
- // Try current working directory first
- if (existsSync('.env')) {
- return '.env';
- }
+async function waitForProcessExit(pid, deadlineMs, pollMs) {
+ const deadline = Date.now() + deadlineMs
+ while (Date.now() < deadline) {
+ if (!isProcessAlive(pid)) {
+ return true
+ }
- // Try relative to project root (where package.json is located)
- const currentFileUrl = import.meta.url;
- const currentFilePath = fileURLToPath(currentFileUrl);
- const projectRoot = path.resolve(path.dirname(currentFilePath), '../..');
- const envPath = path.resolve(projectRoot, '.env');
+ // eslint-disable-next-line no-await-in-loop
+ await sleep(pollMs)
+ }
- return envPath;
+ return false
}
-// ===== EARLY MCP MODE DETECTION AND LOG REDIRECTION =====
-// Following Cipher's best practices to prevent stdio interference
-// This must happen BEFORE any logging operations
-const detectAndRedirectMcpLogs = () => {
- const args = process.argv;
- const isMcpMode = args.includes('--mode') && args[args.indexOf('--mode') + 1] === 'mcp';
-
- if (isMcpMode) {
- // Redirect logs immediately to prevent stdout contamination
- const logFile = process.env.CIPHER_MCP_LOG_FILE || path.join(os.tmpdir(), 'cipher-mcp.log');
- logger.redirectToFile(logFile);
-
- // Use stderr for critical startup messages only
- process.stderr.write(`[CIPHER-MCP] Log redirection activated: ${logFile}\n`);
- }
+const status = discoverDaemon()
+
+// Extract PID from any discovery result that has one
+const pid = status.running
+ ? status.pid
+ : 'pid' in status
+ ? status.pid
+ : undefined
+
+if (pid === undefined || !isProcessAlive(pid)) {
```
This function is important because it defines how Cipher Tutorial: Shared Memory Layer for Coding Agents implements the patterns covered in this chapter.
-### `src/app/index.ts`
-
-The `startApiMode` function in [`src/app/index.ts`](https://github.com/campfirein/cipher/blob/HEAD/src/app/index.ts) handles a key part of this chapter's functionality:
-
-```ts
- * Start the API server mode
- */
- async function startApiMode(agent: MemAgent, options: any): Promise {
- const port = parseInt(options.port) || 3001;
- const host = options.host || 'localhost';
- const mcpTransportType = options.mcpTransportType || undefined; // Pass through from CLI options
- const mcpPort = options.mcpPort ? parseInt(options.mcpPort, 10) : undefined; // Pass through from CLI options
- // Handle API prefix from environment variable or CLI option
- const apiPrefix =
- process.env.CIPHER_API_PREFIX !== undefined
- ? process.env.CIPHER_API_PREFIX === '""'
- ? ''
- : process.env.CIPHER_API_PREFIX
- : options.apiPrefix;
-
- logger.info(`Starting API server on ${host}:${port}`, null, 'green');
-
- const apiServer = new ApiServer(agent, {
- port,
- host,
- corsOrigins: ['http://localhost:3000', 'http://localhost:3001'], // Default CORS origins
- rateLimitWindowMs: 15 * 60 * 1000, // 15 minutes
- rateLimitMaxRequests: 100, // 100 requests per window
- // Enable WebSocket by default for API mode
- enableWebSocket: true,
- webSocketConfig: {
- path: '/ws',
- maxConnections: 1000,
- connectionTimeout: 300000, // 5 minutes
- heartbeatInterval: 30000, // 30 seconds
- enableCompression: true,
- },
+### `bin/kill-daemon.js`
+
+The `waitForProcessExit` function in [`bin/kill-daemon.js`](https://github.com/campfirein/cipher/blob/HEAD/bin/kill-daemon.js) handles a key part of this chapter's functionality:
+
+```js
+}
+
+async function waitForProcessExit(pid, deadlineMs, pollMs) {
+ const deadline = Date.now() + deadlineMs
+ while (Date.now() < deadline) {
+ if (!isProcessAlive(pid)) {
+ return true
+ }
+
+ // eslint-disable-next-line no-await-in-loop
+ await sleep(pollMs)
+ }
+
+ return false
+}
+
+const status = discoverDaemon()
+
+// Extract PID from any discovery result that has one
+const pid = status.running
+ ? status.pid
+ : 'pid' in status
+ ? status.pid
+ : undefined
+
+if (pid === undefined || !isProcessAlive(pid)) {
+ console.log('[kill-daemon] No running daemon found')
+} else {
+ console.log(`[kill-daemon] Stopping daemon (PID ${pid})...`)
+
+ let stopped = false
+
```
This function is important because it defines how Cipher Tutorial: Shared Memory Layer for Coding Agents implements the patterns covered in this chapter.
-### `src/app/index.ts`
-
-The `startUiMode` function in [`src/app/index.ts`](https://github.com/campfirein/cipher/blob/HEAD/src/app/index.ts) handles a key part of this chapter's functionality:
-
-```ts
- * Start the UI mode with both API server and Web UI
- */
- async function startUiMode(agent: MemAgent, options: any): Promise {
- const apiPort = parseInt(options.port) || 3001;
- const uiPort = parseInt(options.uiPort) || 3000;
- const host = options.host || 'localhost';
- const mcpTransportType = options.mcpTransportType || undefined;
- const mcpPort = options.mcpPort ? parseInt(options.mcpPort, 10) : undefined;
- // Handle API prefix from environment variable or CLI option
- const apiPrefix =
- process.env.CIPHER_API_PREFIX !== undefined
- ? process.env.CIPHER_API_PREFIX === '""'
- ? ''
- : process.env.CIPHER_API_PREFIX
- : options.apiPrefix;
-
- logger.info(
- `Starting UI mode - API server on ${host}:${apiPort}, UI server on ${host}:${uiPort}`,
- null,
- 'green'
- );
-
- // Start API server first
- const apiServer = new ApiServer(agent, {
- port: apiPort,
- host,
- corsOrigins: [`http://${host}:${uiPort}`, `http://localhost:${uiPort}`], // Allow UI to connect
- rateLimitWindowMs: 15 * 60 * 1000, // 15 minutes
- rateLimitMaxRequests: 100, // 100 requests per window
- // Enable WebSocket by default for UI mode
- enableWebSocket: true,
- webSocketConfig: {
+### `src/tui/repl-startup.tsx`
+
+The `startRepl` function in [`src/tui/repl-startup.tsx`](https://github.com/campfirein/cipher/blob/HEAD/src/tui/repl-startup.tsx) handles a key part of this chapter's functionality:
+
+```tsx
+ * Start the ByteRover REPL
+ */
+export async function startRepl(options: ReplOptions): Promise {
+ const {version} = options
+
+ // Set version in store before rendering
+ useTransportStore.getState().setVersion(version)
+
+ const {waitUntilExit} = render(
+
+
+ ,
+ )
+
+ await waitUntilExit()
+}
+
```
This function is important because it defines how Cipher Tutorial: Shared Memory Layer for Coding Agents implements the patterns covered in this chapter.
-### `src/core/utils/service-initializer.ts`
+### `src/tui/repl-startup.tsx`
-The `createEmbeddingFromLLMProvider` function in [`src/core/utils/service-initializer.ts`](https://github.com/campfirein/cipher/blob/HEAD/src/core/utils/service-initializer.ts) handles a key part of this chapter's functionality:
+The `ReplOptions` interface in [`src/tui/repl-startup.tsx`](https://github.com/campfirein/cipher/blob/HEAD/src/tui/repl-startup.tsx) handles a key part of this chapter's functionality:
-```ts
- * Create embedding configuration from LLM provider settings
+```tsx
+ * - TransportInitializer connects to daemon via connectToDaemon()
*/
-async function createEmbeddingFromLLMProvider(
- embeddingManager: EmbeddingManager,
- llmConfig: any
-): Promise<{ embedder: any; info: any } | null> {
- const provider = llmConfig.provider?.toLowerCase();
-
- try {
- switch (provider) {
- case 'openai': {
- const apiKey = llmConfig.apiKey || process.env.OPENAI_API_KEY;
- if (!apiKey || apiKey.trim() === '') {
- logger.debug(
- 'No OpenAI API key available for embedding fallback - switching to chat-only mode'
- );
- return null;
- }
- const embeddingConfig = {
- type: 'openai' as const,
- apiKey,
- model: 'text-embedding-3-small' as const,
- baseUrl: llmConfig.baseUrl,
- organization: llmConfig.organization,
- timeout: 30000,
- maxRetries: 3,
- };
- logger.debug('Using OpenAI embedding fallback: text-embedding-3-small');
- return await embeddingManager.createEmbedderFromConfig(embeddingConfig, 'default');
- }
-
- case 'ollama': {
+export interface ReplOptions {
+ version: string
+}
+
+/**
+ * Start the ByteRover REPL
+ */
+export async function startRepl(options: ReplOptions): Promise {
+ const {version} = options
+
+ // Set version in store before rendering
+ useTransportStore.getState().setVersion(version)
+
+ const {waitUntilExit} = render(
+
+
+ ,
+ )
+
+ await waitUntilExit()
+}
+
```
-This function is important because it defines how Cipher Tutorial: Shared Memory Layer for Coding Agents implements the patterns covered in this chapter.
+This interface is important because it defines how Cipher Tutorial: Shared Memory Layer for Coding Agents implements the patterns covered in this chapter.
## How These Components Connect
```mermaid
flowchart TD
- A[resolveEnvPath]
- B[startApiMode]
- C[startUiMode]
- D[createEmbeddingFromLLMProvider]
+ A[sleep]
+ B[waitForProcessExit]
+ C[startRepl]
+ D[ReplOptions]
+ E[fuzzyMatch]
A --> B
B --> C
C --> D
+ D --> E
```
diff --git a/tutorials/cipher-tutorial/02-core-modes-and-session-workflow.md b/tutorials/cipher-tutorial/02-core-modes-and-session-workflow.md
index 546ec3e7..3a767607 100644
--- a/tutorials/cipher-tutorial/02-core-modes-and-session-workflow.md
+++ b/tutorials/cipher-tutorial/02-core-modes-and-session-workflow.md
@@ -32,170 +32,168 @@ You now understand which Cipher mode to run for each workflow type.
Next: [Chapter 3: Memory Architecture and Data Model](03-memory-architecture-and-data-model.md)
-## Depth Expansion Playbook
-
## Source Code Walkthrough
-### `src/core/utils/service-initializer.ts`
+### `src/tui/components/selectable-list.tsx`
-The `createAgentServices` function in [`src/core/utils/service-initializer.ts`](https://github.com/campfirein/cipher/blob/HEAD/src/core/utils/service-initializer.ts) handles a key part of this chapter's functionality:
+The `SelectableListProps` interface in [`src/tui/components/selectable-list.tsx`](https://github.com/campfirein/cipher/blob/HEAD/src/tui/components/selectable-list.tsx) handles a key part of this chapter's functionality:
-```ts
-};
-
-export async function createAgentServices(
- agentConfig: AgentConfig,
- appMode?: 'cli' | 'mcp' | 'api'
-): Promise {
- let contextManager: ContextManager | undefined = undefined;
- // 1. Initialize agent config
- const config = agentConfig;
-
- // 1.1. Initialize event manager first (other services will use it)
- logger.debug('Initializing event manager...');
-
- // Use eventPersistence config if present, with environment variable overrides
- const eventPersistenceConfig = {
- ...config.eventPersistence,
- // Support EVENT_PERSISTENCE_ENABLED env variable
- enabled:
- process.env.EVENT_PERSISTENCE_ENABLED === 'true' ||
- (config.eventPersistence?.enabled ?? false),
- // Support EVENT_PERSISTENCE_PATH env variable
- filePath: process.env.EVENT_PERSISTENCE_PATH || config.eventPersistence?.filePath,
- };
-
- // Support EVENT_FILTERING_ENABLED env variable
- const enableFiltering = process.env.EVENT_FILTERING_ENABLED === 'true';
-
- // Support EVENT_FILTERED_TYPES env variable (comma-separated)
- const filteredTypes = (process.env.EVENT_FILTERED_TYPES || '')
- .split(',')
- .map(s => s.trim())
- .filter(Boolean);
+```tsx
+ * Props for SelectableList component.
+ */
+export interface SelectableListProps {
+ /** Available height in lines */
+ availableHeight?: number
+ /** Current/selected item (shows ● indicator) */
+ currentItem?: T
+ /** Keys to use for filtering (searched with fuzzy match) */
+ filterKeys: (item: T) => string[]
+ /** Function to get item key for comparison with currentItem */
+ getCurrentKey?: (item: T) => string
+ /** Optional grouping function */
+ groupBy?: (item: T) => string
+ /** Hide the Cancel keybind hint and disable Esc to cancel */
+ hideCancelButton?: boolean
+ /** Initial search value */
+ initialSearch?: string
+ /** Whether keyboard input is active */
+ isActive?: boolean
+ /** Array of items to display */
+ items: T[]
+ /** Custom keybinds */
+ keybinds?: Array<{
+ action: (item: T) => void
+ key: string
+ label: string
+ }>
+ /** Function to get unique key for each item */
+ keyExtractor: (item: T) => string
+ /** Callback when selection is cancelled (Esc) */
+ onCancel?: () => void
+ /** Callback when an item is selected */
```
-This function is important because it defines how Cipher Tutorial: Shared Memory Layer for Coding Agents implements the patterns covered in this chapter.
+This interface is important because it defines how Cipher Tutorial: Shared Memory Layer for Coding Agents implements the patterns covered in this chapter.
-### `src/core/vector_storage/factory.ts`
+### `src/oclif/commands/restart.ts`
-The `createVectorStore` function in [`src/core/vector_storage/factory.ts`](https://github.com/campfirein/cipher/blob/HEAD/src/core/vector_storage/factory.ts) handles a key part of this chapter's functionality:
+The `Restart` class in [`src/oclif/commands/restart.ts`](https://github.com/campfirein/cipher/blob/HEAD/src/oclif/commands/restart.ts) handles a key part of this chapter's functionality:
```ts
- * ```typescript
- * // Basic usage with Qdrant
- * const { manager, store } = await createVectorStore({
- * type: 'qdrant',
- * host: 'localhost',
- * port: 6333,
- * collectionName: 'documents',
- * dimension: 1536
- * });
- *
- * // Use the vector store
- * await store.insert([vector], ['doc1'], [{ title: 'Document' }]);
- * const results = await store.search(queryVector, 5);
- *
- * // Cleanup when done
- * await manager.disconnect();
- * ```
- *
- * @example
- * ```typescript
- * // Development configuration with in-memory
- * const { manager, store } = await createVectorStore({
- * type: 'in-memory',
- * collectionName: 'test',
- * dimension: 1536,
- * maxVectors: 1000
- * });
- * ```
- */
-export async function createVectorStore(config: VectorStoreConfig): Promise {
- const logger = createLogger({ level: env.CIPHER_LOG_LEVEL });
-
+const SIGTERM_BUDGET_MS = 8000
+
+export default class Restart extends Command {
+ static description = `Restart ByteRover — stop everything and start fresh.
+
+Run this when ByteRover is unresponsive, stuck, or after installing an update.
+All open sessions and background processes are stopped.
+The daemon will restart automatically on the next brv command.`
+ static examples = ['<%= config.bin %> <%= command.id %>']
+ /** Commands whose processes must not be killed (e.g. `brv update` calls `brv restart`). */
+ private static readonly PROTECTED_COMMANDS = ['update']
+ /** Server/agent patterns — cannot match CLI processes, no self-kill risk. */
+ private static readonly SERVER_AGENT_PATTERNS = ['brv-server.js', 'agent-process.js']
+
+ /**
+ * Builds the list of CLI script patterns used to identify brv client processes.
+ *
+ * All patterns are absolute paths or specific filenames to avoid false-positive matches
+ * against other oclif CLIs (which also use bin/run.js and bin/dev.js conventions).
+ *
+ * CLI script patterns (covers all installations):
+ * dev mode (bin/dev.js): join(brvBinDir, 'dev.js') — absolute path, same installation only
+ * build/dev (bin/run.js): join(brvBinDir, 'run.js')
+ * global install (npm / tgz): byterover-cli/bin/run.js — package name in node_modules is fixed
+ * bundled binary (oclif pack): join('bin', 'brv') + argv1
+ * nvm / system global: cmdline = node .../bin/brv ← caught by 'bin/brv' substring
+ * curl install (/.brv-cli/): join(brvBinDir, 'run') — entry point named 'run' without .js
+ *
+ * Set deduplicates when paths overlap (e.g. process.argv[1] is already run.js).
+ */
+ static buildCliPatterns(): string[] {
+ const argv1 = resolve(process.argv[1])
```
-This function is important because it defines how Cipher Tutorial: Shared Memory Layer for Coding Agents implements the patterns covered in this chapter.
+This class is important because it defines how Cipher Tutorial: Shared Memory Layer for Coding Agents implements the patterns covered in this chapter.
-### `src/core/vector_storage/factory.ts`
+### `src/tui/components/init.tsx`
-The `createDefaultVectorStore` function in [`src/core/vector_storage/factory.ts`](https://github.com/campfirein/cipher/blob/HEAD/src/core/vector_storage/factory.ts) handles a key part of this chapter's functionality:
+The `countOutputLines` function in [`src/tui/components/init.tsx`](https://github.com/campfirein/cipher/blob/HEAD/src/tui/components/init.tsx) handles a key part of this chapter's functionality:
-```ts
- * @example
- * ```typescript
- * const { manager, store } = await createDefaultVectorStore();
- * // Uses in-memory backend with default settings
- *
- * const { manager, store } = await createDefaultVectorStore('my_collection', 768);
- * // Uses in-memory backend with custom collection and dimension
- * ```
+```tsx
+ * @returns Total number of lines across all messages
*/
-export async function createDefaultVectorStore(
- collectionName: string = 'knowledge_memory',
- dimension: number = 1536
-): Promise {
- return createVectorStore({
- type: 'in-memory',
- collectionName,
- dimension,
- maxVectors: 10000,
- });
+function countOutputLines(messages: StreamingMessage[]): number {
+ let total = 0
+ for (const msg of messages) {
+ total += msg.content.split('\n').length
+ }
+
+ return total
}
/**
- * Creates vector storage from environment variables
+ * Get messages from the end that fit within maxLines, truncating from the beginning
*
- * Reads vector storage configuration from environment variables and creates
- * the vector storage system. Falls back to in-memory if not configured.
- *
- * Environment variables:
- * - VECTOR_STORE_TYPE: Backend type (qdrant, in-memory)
- * - VECTOR_STORE_HOST: Qdrant host (if using Qdrant)
- * - VECTOR_STORE_PORT: Qdrant port (if using Qdrant)
- * - VECTOR_STORE_URL: Qdrant URL (if using Qdrant)
+ * @param messages - Array of streaming messages
+ * @param maxLines - Maximum number of lines to display
+ * @returns Object containing display messages, skipped lines count, and total lines
+ */
+function getMessagesFromEnd(
+ messages: StreamingMessage[],
+ maxLines: number,
+): {displayMessages: StreamingMessage[]; skippedLines: number; totalLines: number} {
+ const totalLines = countOutputLines(messages)
+
+ if (totalLines <= maxLines) {
+ return {displayMessages: messages, skippedLines: 0, totalLines}
+ }
+
+ const displayMessages: StreamingMessage[] = []
+ let lineCount = 0
+
+ // Iterate from the end (newest messages first)
```
This function is important because it defines how Cipher Tutorial: Shared Memory Layer for Coding Agents implements the patterns covered in this chapter.
-### `src/core/vector_storage/factory.ts`
+### `src/tui/components/init.tsx`
-The `createVectorStoreFromEnv` function in [`src/core/vector_storage/factory.ts`](https://github.com/campfirein/cipher/blob/HEAD/src/core/vector_storage/factory.ts) handles a key part of this chapter's functionality:
+The `getMessagesFromEnd` function in [`src/tui/components/init.tsx`](https://github.com/campfirein/cipher/blob/HEAD/src/tui/components/init.tsx) handles a key part of this chapter's functionality:
-```ts
- * process.env.VECTOR_STORE_COLLECTION = 'documents';
- *
- * const { manager, store } = await createVectorStoreFromEnv();
- * ```
+```tsx
+ * @returns Object containing display messages, skipped lines count, and total lines
*/
-export async function createVectorStoreFromEnv(agentConfig?: any): Promise {
- const logger = createLogger({ level: env.CIPHER_LOG_LEVEL });
-
- // Get configuration from environment variables
- const config = getVectorStoreConfigFromEnv(agentConfig);
- // console.log('config', config);
- logger.info(`${LOG_PREFIXES.FACTORY} Creating vector storage from environment`, {
- type: config.type,
- collection: config.collectionName,
- dimension: config.dimension,
- });
-
- return createVectorStore(config);
-}
-
-/**
- * Creates dual collection vector storage from environment variables
- *
- * Creates a dual collection manager that handles both knowledge and reflection
- * memory collections. Reflection collection is only created if REFLECTION_VECTOR_STORE_COLLECTION
- * is set and the model supports reasoning.
- *
- * @param agentConfig - Optional agent configuration to override dimension from embedding config
- * @returns Promise resolving to dual collection manager and stores
- *
- * @example
- * ```typescript
+function getMessagesFromEnd(
+ messages: StreamingMessage[],
+ maxLines: number,
+): {displayMessages: StreamingMessage[]; skippedLines: number; totalLines: number} {
+ const totalLines = countOutputLines(messages)
+
+ if (totalLines <= maxLines) {
+ return {displayMessages: messages, skippedLines: 0, totalLines}
+ }
+
+ const displayMessages: StreamingMessage[] = []
+ let lineCount = 0
+
+ // Iterate from the end (newest messages first)
+ for (let i = messages.length - 1; i >= 0; i--) {
+ const msg = messages[i]
+ const msgLineArray = msg.content.split('\n')
+ const msgLineCount = msgLineArray.length
+
+ if (lineCount + msgLineCount <= maxLines) {
+ displayMessages.unshift(msg)
+ lineCount += msgLineCount
+ } else {
+ const remainingSpace = maxLines - lineCount
+ if (remainingSpace > 0) {
+ const truncatedContent = msgLineArray.slice(-remainingSpace).join('\n')
+ displayMessages.unshift({
+ ...msg,
+ content: truncatedContent,
+ })
```
This function is important because it defines how Cipher Tutorial: Shared Memory Layer for Coding Agents implements the patterns covered in this chapter.
@@ -205,11 +203,13 @@ This function is important because it defines how Cipher Tutorial: Shared Memory
```mermaid
flowchart TD
- A[createAgentServices]
- B[createVectorStore]
- C[createDefaultVectorStore]
- D[createVectorStoreFromEnv]
+ A[SelectableListProps]
+ B[Restart]
+ C[countOutputLines]
+ D[getMessagesFromEnd]
+ E[processMessagesForActions]
A --> B
B --> C
C --> D
+ D --> E
```
diff --git a/tutorials/cipher-tutorial/03-memory-architecture-and-data-model.md b/tutorials/cipher-tutorial/03-memory-architecture-and-data-model.md
index 6c2f7c47..b723fab2 100644
--- a/tutorials/cipher-tutorial/03-memory-architecture-and-data-model.md
+++ b/tutorials/cipher-tutorial/03-memory-architecture-and-data-model.md
@@ -32,170 +32,168 @@ You now understand the high-level memory model that powers Cipher across agent i
Next: [Chapter 4: Configuration, Providers, and Embeddings](04-configuration-providers-and-embeddings.md)
-## Depth Expansion Playbook
-
## Source Code Walkthrough
-### `src/core/vector_storage/factory.ts`
+### `src/tui/components/init.tsx`
-The `createDualCollectionVectorStoreFromEnv` function in [`src/core/vector_storage/factory.ts`](https://github.com/campfirein/cipher/blob/HEAD/src/core/vector_storage/factory.ts) handles a key part of this chapter's functionality:
+The `ProcessedMessage` interface in [`src/tui/components/init.tsx`](https://github.com/campfirein/cipher/blob/HEAD/src/tui/components/init.tsx) handles a key part of this chapter's functionality:
-```ts
- * process.env.REFLECTION_VECTOR_STORE_COLLECTION = 'reflection_memory';
+```tsx
+ * Includes action state for spinner display
+ */
+export interface ProcessedMessage extends StreamingMessage {
+ /** For action_start: whether the action is still running (no matching action_stop) */
+ isActionRunning?: boolean
+ /** For action_start: the completion message from action_stop */
+ stopMessage?: string
+}
+
+/**
+ * Count the total number of lines in streaming messages (simple newline count)
*
- * const { manager, knowledgeStore, reflectionStore } = await createDualCollectionVectorStoreFromEnv();
- * ```
+ * @param messages - Array of streaming messages
+ * @returns Total number of lines across all messages
*/
-export async function createDualCollectionVectorStoreFromEnv(
- agentConfig?: any
-): Promise {
- const logger = createLogger({ level: env.CIPHER_LOG_LEVEL });
-
- // Get base configuration from environment variables
- const config = getVectorStoreConfigFromEnv(agentConfig);
- // console.log('createDualCollectionVectorStoreFromEnv config', config)
- // Use ServiceCache to prevent duplicate dual collection vector store creation
- const serviceCache = getServiceCache();
- const cacheKey = createServiceKey('dualCollectionVectorStore', {
- type: config.type,
- collection: config.collectionName,
- reflectionCollection: env.REFLECTION_VECTOR_STORE_COLLECTION || '',
- // Include dimension for proper cache key differentiation
- dimension: config.dimension,
- });
-
- return await serviceCache.getOrCreate(cacheKey, async () => {
- logger.debug('Creating new dual collection vector store instance');
- return await createDualCollectionVectorStoreInternal(config, logger);
- });
+function countOutputLines(messages: StreamingMessage[]): number {
+ let total = 0
+ for (const msg of messages) {
+ total += msg.content.split('\n').length
+ }
+
+ return total
}
-async function createDualCollectionVectorStoreInternal(
- config: VectorStoreConfig,
- logger: any
+/**
+ * Get messages from the end that fit within maxLines, truncating from the beginning
+ *
+ * @param messages - Array of streaming messages
+ * @param maxLines - Maximum number of lines to display
+ * @returns Object containing display messages, skipped lines count, and total lines
+ */
+function getMessagesFromEnd(
```
-This function is important because it defines how Cipher Tutorial: Shared Memory Layer for Coding Agents implements the patterns covered in this chapter.
+This interface is important because it defines how Cipher Tutorial: Shared Memory Layer for Coding Agents implements the patterns covered in this chapter.
-### `src/core/vector_storage/factory.ts`
+### `src/tui/components/init.tsx`
-The `createDualCollectionVectorStoreInternal` function in [`src/core/vector_storage/factory.ts`](https://github.com/campfirein/cipher/blob/HEAD/src/core/vector_storage/factory.ts) handles a key part of this chapter's functionality:
+The `InitProps` interface in [`src/tui/components/init.tsx`](https://github.com/campfirein/cipher/blob/HEAD/src/tui/components/init.tsx) handles a key part of this chapter's functionality:
-```ts
- return await serviceCache.getOrCreate(cacheKey, async () => {
- logger.debug('Creating new dual collection vector store instance');
- return await createDualCollectionVectorStoreInternal(config, logger);
- });
+```tsx
+const INLINE_SEARCH_OVERHEAD = 3
+
+export interface InitProps {
+ /** Whether the component should be interactive (for EnterPrompt activation) */
+ active?: boolean
+
+ /** Auto-start init without waiting for Enter key in idle state */
+ autoStart?: boolean
+
+ /** Custom idle state message (optional) */
+ idleMessage?: string
+
+ /** Maximum lines available for streaming output */
+ maxOutputLines: number
+
+ /** Optional callback when init completes successfully */
+ onInitComplete?: () => void
+
+ /** Show idle state message? (default: true for InitView, false for OnboardingFlow) */
+ showIdleMessage?: boolean
}
-async function createDualCollectionVectorStoreInternal(
- config: VectorStoreConfig,
- logger: any
-): Promise {
- // If reflection collection is not set or is empty/whitespace, treat as disabled
- const reflectionCollection = (env.REFLECTION_VECTOR_STORE_COLLECTION || '').trim();
- if (!reflectionCollection) {
- logger.info(
- `${LOG_PREFIXES.FACTORY} Reflection collection not set, creating single collection manager only`,
- {
- type: config.type,
- knowledgeCollection: config.collectionName,
- }
- );
- const manager = new DualCollectionVectorManager(config);
-
- try {
- await manager.connect();
- const knowledgeStore = manager.getStore('knowledge');
- if (!knowledgeStore) {
- throw new Error('Failed to get knowledge store from dual collection manager');
- }
- return {
- manager,
- knowledgeStore,
- reflectionStore: null,
+export const Init: React.FC = ({
+ active = true,
+ autoStart = false,
+ idleMessage = 'Your project needs initializing.',
+ maxOutputLines,
+ onInitComplete,
+ showIdleMessage = true,
+}) => {
+ const {
+ theme: {colors},
```
-This function is important because it defines how Cipher Tutorial: Shared Memory Layer for Coding Agents implements the patterns covered in this chapter.
+This interface is important because it defines how Cipher Tutorial: Shared Memory Layer for Coding Agents implements the patterns covered in this chapter.
-### `src/core/vector_storage/factory.ts`
+### `src/oclif/commands/debug.ts`
-The `getVectorStoreConfigFromEnv` function in [`src/core/vector_storage/factory.ts`](https://github.com/campfirein/cipher/blob/HEAD/src/core/vector_storage/factory.ts) handles a key part of this chapter's functionality:
+The `Debug` class in [`src/oclif/commands/debug.ts`](https://github.com/campfirein/cipher/blob/HEAD/src/oclif/commands/debug.ts) handles a key part of this chapter's functionality:
```ts
-
- // Get configuration from environment variables
- const config = getVectorStoreConfigFromEnv(agentConfig);
- // console.log('config', config);
- logger.info(`${LOG_PREFIXES.FACTORY} Creating vector storage from environment`, {
- type: config.type,
- collection: config.collectionName,
- dimension: config.dimension,
- });
-
- return createVectorStore(config);
}
-/**
- * Creates dual collection vector storage from environment variables
- *
- * Creates a dual collection manager that handles both knowledge and reflection
- * memory collections. Reflection collection is only created if REFLECTION_VECTOR_STORE_COLLECTION
- * is set and the model supports reasoning.
- *
- * @param agentConfig - Optional agent configuration to override dimension from embedding config
- * @returns Promise resolving to dual collection manager and stores
- *
- * @example
- * ```typescript
- * // Set environment variables for reasoning model with dual collections
- * process.env.VECTOR_STORE_TYPE = 'in-memory';
- * process.env.VECTOR_STORE_COLLECTION = 'knowledge';
- * process.env.REFLECTION_VECTOR_STORE_COLLECTION = 'reflection_memory';
- *
- * const { manager, knowledgeStore, reflectionStore } = await createDualCollectionVectorStoreFromEnv();
- * ```
+export default class Debug extends Command {
+ public static description = 'Live monitor for daemon internal state (development only)'
+ public static examples = [
+ '<%= config.bin %> <%= command.id %>',
+ '<%= config.bin %> <%= command.id %> --format json',
+ '<%= config.bin %> <%= command.id %> --once',
+ ]
+ public static flags = {
+ force: Flags.boolean({
+ default: false,
+ description: 'Kill existing daemon and start fresh',
+ }),
+ format: Flags.string({
+ char: 'f',
+ default: 'tree',
+ description: 'Output format',
+ options: ['tree', 'json'],
+ }),
+ once: Flags.boolean({
+ default: false,
+ description: 'Print once and exit (no live monitoring)',
+ }),
+ }
+ public static hidden = !isDevelopment()
+
+ protected clearScreen(): void {
+ if (process.stdout.isTTY) {
+ process.stdout.write('\u001B[2J\u001B[H')
+ }
+ }
```
-This function is important because it defines how Cipher Tutorial: Shared Memory Layer for Coding Agents implements the patterns covered in this chapter.
+This class is important because it defines how Cipher Tutorial: Shared Memory Layer for Coding Agents implements the patterns covered in this chapter.
-### `src/core/vector_storage/factory.ts`
+### `src/oclif/lib/task-client.ts`
-The `getWorkspaceVectorStoreConfigFromEnv` function in [`src/core/vector_storage/factory.ts`](https://github.com/campfirein/cipher/blob/HEAD/src/core/vector_storage/factory.ts) handles a key part of this chapter's functionality:
+The `formatToolDisplay` function in [`src/oclif/lib/task-client.ts`](https://github.com/campfirein/cipher/blob/HEAD/src/oclif/lib/task-client.ts) handles a key part of this chapter's functionality:
```ts
- * @example
- * ```typescript
- * const config = getWorkspaceVectorStoreConfigFromEnv();
- * console.log('Workspace vector store configuration:', config);
- *
- * // Then use the config to create workspace store
- * const { manager, store } = await createVectorStore(config);
- * ```
+
+/**
+ * Format tool call for CLI display (simplified version of TUI formatToolDisplay).
*/
-export function getWorkspaceVectorStoreConfigFromEnv(agentConfig?: any): VectorStoreConfig {
- const logger = createLogger({ level: env.CIPHER_LOG_LEVEL });
-
- // Get workspace-specific configuration with fallbacks to default vector store config
- const storeType = env.WORKSPACE_VECTOR_STORE_TYPE || env.VECTOR_STORE_TYPE;
- const collectionName = env.WORKSPACE_VECTOR_STORE_COLLECTION || 'workspace_memory';
- let dimension =
- env.WORKSPACE_VECTOR_STORE_DIMENSION !== undefined &&
- !Number.isNaN(env.WORKSPACE_VECTOR_STORE_DIMENSION)
- ? env.WORKSPACE_VECTOR_STORE_DIMENSION
- : env.VECTOR_STORE_DIMENSION !== undefined && !Number.isNaN(env.VECTOR_STORE_DIMENSION)
- ? env.VECTOR_STORE_DIMENSION
- : 1536;
- const maxVectors =
- env.WORKSPACE_VECTOR_STORE_MAX_VECTORS !== undefined &&
- !Number.isNaN(env.WORKSPACE_VECTOR_STORE_MAX_VECTORS)
- ? env.WORKSPACE_VECTOR_STORE_MAX_VECTORS
- : env.VECTOR_STORE_MAX_VECTORS !== undefined && !Number.isNaN(env.VECTOR_STORE_MAX_VECTORS)
- ? env.VECTOR_STORE_MAX_VECTORS
- : 10000;
-
- // Override dimension from agent config if embedding configuration is present
- if (
+export function formatToolDisplay(toolName: string, args: Record): string {
+ switch (toolName.toLowerCase()) {
+ case 'bash': {
+ const cmd = args.command ? String(args.command) : ''
+ return `Bash ${cmd.length > 60 ? `$ ${cmd.slice(0, 57)}...` : `$ ${cmd}`}`
+ }
+
+ case 'code_exec': {
+ return 'CodeExec'
+ }
+
+ case 'edit': {
+ const filePath = args.file_path ?? args.filePath
+ return filePath ? `Edit ${filePath}` : 'Edit'
+ }
+
+ case 'glob': {
+ const {path, pattern} = args
+ return pattern ? `Glob "${pattern}"${path ? ` in ${path}` : ''}` : 'Glob'
+ }
+
+ case 'grep': {
+ const {path, pattern} = args
+ return pattern ? `Grep "${pattern}"${path ? ` in ${path}` : ''}` : 'Grep'
+ }
+
+ case 'read': {
+ const filePath = args.file_path ?? args.filePath
```
This function is important because it defines how Cipher Tutorial: Shared Memory Layer for Coding Agents implements the patterns covered in this chapter.
@@ -205,11 +203,13 @@ This function is important because it defines how Cipher Tutorial: Shared Memory
```mermaid
flowchart TD
- A[createDualCollectionVectorStoreFromEnv]
- B[createDualCollectionVectorStoreInternal]
- C[getVectorStoreConfigFromEnv]
- D[getWorkspaceVectorStoreConfigFromEnv]
+ A[ProcessedMessage]
+ B[InitProps]
+ C[Debug]
+ D[formatToolDisplay]
+ E[waitForTaskCompletion]
A --> B
B --> C
C --> D
+ D --> E
```
diff --git a/tutorials/cipher-tutorial/04-configuration-providers-and-embeddings.md b/tutorials/cipher-tutorial/04-configuration-providers-and-embeddings.md
index 2cb12d25..0e32d544 100644
--- a/tutorials/cipher-tutorial/04-configuration-providers-and-embeddings.md
+++ b/tutorials/cipher-tutorial/04-configuration-providers-and-embeddings.md
@@ -32,175 +32,184 @@ You now have a configuration strategy for deterministic Cipher behavior across e
Next: [Chapter 5: Vector Stores and Workspace Memory](05-vector-stores-and-workspace-memory.md)
-## Depth Expansion Playbook
-
## Source Code Walkthrough
-### `src/core/vector_storage/factory.ts`
+### `src/oclif/lib/task-client.ts`
-The `createMultiCollectionVectorStoreFromEnv` function in [`src/core/vector_storage/factory.ts`](https://github.com/campfirein/cipher/blob/HEAD/src/core/vector_storage/factory.ts) handles a key part of this chapter's functionality:
+The `ToolCallRecord` interface in [`src/oclif/lib/task-client.ts`](https://github.com/campfirein/cipher/blob/HEAD/src/oclif/lib/task-client.ts) handles a key part of this chapter's functionality:
```ts
- * @returns Promise resolving to multi collection manager and stores
- */
-export async function createMultiCollectionVectorStoreFromEnv(
- agentConfig?: any
-): Promise {
- const logger = createLogger({ level: env.CIPHER_LOG_LEVEL });
-
- // Import MultiCollectionVectorManager dynamically to avoid circular dependencies
- // const { MultiCollectionVectorManager } = await import('./multi-collection-manager.js'); // Not used in this scope
-
- // Get base configuration from environment
- const config = getVectorStoreConfigFromEnv(agentConfig);
-
- // Use ServiceCache to prevent duplicate multi collection vector store creation
- const serviceCache = getServiceCache();
- const cacheKey = createServiceKey('multiCollectionVectorStore', {
- type: config.type,
- collection: config.collectionName,
- reflectionCollection: env.REFLECTION_VECTOR_STORE_COLLECTION || '',
- workspaceCollection: env.WORKSPACE_VECTOR_STORE_COLLECTION || 'workspace_memory',
- workspaceEnabled: !!env.USE_WORKSPACE_MEMORY,
- // Include dimension for proper cache key differentiation
- dimension: config.dimension,
- });
-
- return await serviceCache.getOrCreate(cacheKey, async () => {
- logger.debug('Creating new multi collection vector store instance');
- return await createMultiCollectionVectorStoreInternal(config, logger);
- });
+
+/** Collected tool call with result (mirrors TUI ToolCallEvent) */
+export interface ToolCallRecord {
+ args: Record
+ callId?: string
+ error?: string
+ result?: unknown
+ status: 'completed' | 'error' | 'running'
+ success?: boolean
+ toolName: string
}
-async function createMultiCollectionVectorStoreInternal(
+/** Completion result passed to onCompleted callback */
+export interface TaskCompletionResult {
+ logId?: string
+ result?: string
+ taskId: string
+ toolCalls: ToolCallRecord[]
+}
+
+/** Error result passed to onError callback */
+export interface TaskErrorResult {
+ error: {code?: string; message: string}
+ logId?: string
+ taskId: string
+ toolCalls: ToolCallRecord[]
+}
+
+/** Options for waitForTaskCompletion */
+export interface WaitForTaskOptions {
+ /** Client to subscribe events on */
+ client: ITransportClient
```
-This function is important because it defines how Cipher Tutorial: Shared Memory Layer for Coding Agents implements the patterns covered in this chapter.
+This interface is important because it defines how Cipher Tutorial: Shared Memory Layer for Coding Agents implements the patterns covered in this chapter.
-### `src/core/vector_storage/factory.ts`
+### `src/oclif/lib/task-client.ts`
-The `createMultiCollectionVectorStoreInternal` function in [`src/core/vector_storage/factory.ts`](https://github.com/campfirein/cipher/blob/HEAD/src/core/vector_storage/factory.ts) handles a key part of this chapter's functionality:
+The `TaskCompletionResult` interface in [`src/oclif/lib/task-client.ts`](https://github.com/campfirein/cipher/blob/HEAD/src/oclif/lib/task-client.ts) handles a key part of this chapter's functionality:
```ts
- return await serviceCache.getOrCreate(cacheKey, async () => {
- logger.debug('Creating new multi collection vector store instance');
- return await createMultiCollectionVectorStoreInternal(config, logger);
- });
+
+/** Completion result passed to onCompleted callback */
+export interface TaskCompletionResult {
+ logId?: string
+ result?: string
+ taskId: string
+ toolCalls: ToolCallRecord[]
}
-async function createMultiCollectionVectorStoreInternal(
- config: VectorStoreConfig,
- logger: any
-): Promise {
- // Import MultiCollectionVectorManager dynamically
- const { MultiCollectionVectorManager } = await import('./multi-collection-manager.js');
-
- logger.info(`${LOG_PREFIXES.FACTORY} Creating multi collection vector storage from environment`, {
- type: config.type,
- knowledgeCollection: config.collectionName,
- reflectionCollection: env.REFLECTION_VECTOR_STORE_COLLECTION || 'disabled',
- workspaceCollection: env.USE_WORKSPACE_MEMORY
- ? env.WORKSPACE_VECTOR_STORE_COLLECTION || 'workspace_memory'
- : 'disabled',
- workspaceEnabled: !!env.USE_WORKSPACE_MEMORY,
- });
-
- // Create multi collection manager
- const manager = new MultiCollectionVectorManager(config);
-
- try {
- const connected = await manager.connect();
-
- if (!connected) {
- throw new Error('Failed to connect multi collection vector manager');
- }
+/** Error result passed to onError callback */
+export interface TaskErrorResult {
+ error: {code?: string; message: string}
+ logId?: string
+ taskId: string
+ toolCalls: ToolCallRecord[]
+}
+
+/** Options for waitForTaskCompletion */
+export interface WaitForTaskOptions {
+ /** Client to subscribe events on */
+ client: ITransportClient
+ /** Command name for JSON output */
+ command: string
+ /** Output format */
+ format: 'json' | 'text'
+ /** Called on task:completed */
+ onCompleted: (result: TaskCompletionResult) => void
+ /** Called on task:error */
+ onError: (result: TaskErrorResult) => void
+ /** Called on llmservice:response (optional, used by query to display final answer) */
+ onResponse?: (content: string, taskId: string) => void
+ /** Task ID to wait for */
```
-This function is important because it defines how Cipher Tutorial: Shared Memory Layer for Coding Agents implements the patterns covered in this chapter.
+This interface is important because it defines how Cipher Tutorial: Shared Memory Layer for Coding Agents implements the patterns covered in this chapter.
-### `src/core/vector_storage/factory.ts`
+### `src/oclif/lib/task-client.ts`
-The `createWorkspaceVectorStoreFromEnv` function in [`src/core/vector_storage/factory.ts`](https://github.com/campfirein/cipher/blob/HEAD/src/core/vector_storage/factory.ts) handles a key part of this chapter's functionality:
+The `TaskErrorResult` interface in [`src/oclif/lib/task-client.ts`](https://github.com/campfirein/cipher/blob/HEAD/src/oclif/lib/task-client.ts) handles a key part of this chapter's functionality:
```ts
- * process.env.WORKSPACE_VECTOR_STORE_COLLECTION = 'team_workspace';
- *
- * const { manager, store } = await createWorkspaceVectorStoreFromEnv();
- * ```
- */
-export async function createWorkspaceVectorStoreFromEnv(
- agentConfig?: any
-): Promise {
- const logger = createLogger({ level: env.CIPHER_LOG_LEVEL });
-
- // Get workspace-specific configuration from environment variables
- const config = getWorkspaceVectorStoreConfigFromEnv(agentConfig);
-
- logger.info(`${LOG_PREFIXES.FACTORY} Creating workspace memory vector storage from environment`, {
- type: config.type,
- collection: config.collectionName,
- dimension: config.dimension,
- workspaceSpecific: config.collectionName !== env.VECTOR_STORE_COLLECTION,
- });
-
- return createVectorStore(config);
+
+/** Error result passed to onError callback */
+export interface TaskErrorResult {
+ error: {code?: string; message: string}
+ logId?: string
+ taskId: string
+ toolCalls: ToolCallRecord[]
}
-/**
- * Type guard to check if an object is a VectorStoreFactory
- *
- * @param obj - Object to check
- * @returns true if the object has manager and store properties
- */
-export function isVectorStoreFactory(obj: unknown): obj is VectorStoreFactory {
- return (
- typeof obj === 'object' &&
+/** Options for waitForTaskCompletion */
+export interface WaitForTaskOptions {
+ /** Client to subscribe events on */
+ client: ITransportClient
+ /** Command name for JSON output */
+ command: string
+ /** Output format */
+ format: 'json' | 'text'
+ /** Called on task:completed */
+ onCompleted: (result: TaskCompletionResult) => void
+ /** Called on task:error */
+ onError: (result: TaskErrorResult) => void
+ /** Called on llmservice:response (optional, used by query to display final answer) */
+ onResponse?: (content: string, taskId: string) => void
+ /** Task ID to wait for */
+ taskId: string
+ /** Timeout in ms (default: 5 minutes) */
+ timeoutMs?: number
+}
+
+/** Grace period before treating 'reconnecting' as daemon death (ms) */
+const DISCONNECT_GRACE_MS = 10_000
+/** Default timeout for task completion (ms) */
```
-This function is important because it defines how Cipher Tutorial: Shared Memory Layer for Coding Agents implements the patterns covered in this chapter.
+This interface is important because it defines how Cipher Tutorial: Shared Memory Layer for Coding Agents implements the patterns covered in this chapter.
-### `src/core/vector_storage/factory.ts`
+### `src/oclif/lib/task-client.ts`
-The `isVectorStoreFactory` function in [`src/core/vector_storage/factory.ts`](https://github.com/campfirein/cipher/blob/HEAD/src/core/vector_storage/factory.ts) handles a key part of this chapter's functionality:
+The `WaitForTaskOptions` interface in [`src/oclif/lib/task-client.ts`](https://github.com/campfirein/cipher/blob/HEAD/src/oclif/lib/task-client.ts) handles a key part of this chapter's functionality:
```ts
- * @returns true if the object has manager and store properties
- */
-export function isVectorStoreFactory(obj: unknown): obj is VectorStoreFactory {
- return (
- typeof obj === 'object' &&
- obj !== null &&
- 'manager' in obj &&
- 'store' in obj &&
- obj.manager instanceof VectorStoreManager
- );
+
+/** Options for waitForTaskCompletion */
+export interface WaitForTaskOptions {
+ /** Client to subscribe events on */
+ client: ITransportClient
+ /** Command name for JSON output */
+ command: string
+ /** Output format */
+ format: 'json' | 'text'
+ /** Called on task:completed */
+ onCompleted: (result: TaskCompletionResult) => void
+ /** Called on task:error */
+ onError: (result: TaskErrorResult) => void
+ /** Called on llmservice:response (optional, used by query to display final answer) */
+ onResponse?: (content: string, taskId: string) => void
+ /** Task ID to wait for */
+ taskId: string
+ /** Timeout in ms (default: 5 minutes) */
+ timeoutMs?: number
}
+/** Grace period before treating 'reconnecting' as daemon death (ms) */
+const DISCONNECT_GRACE_MS = 10_000
+/** Default timeout for task completion (ms) */
+const DEFAULT_TIMEOUT_MS = 5 * 60 * 1000
+
/**
- * Check if Qdrant configuration is available in environment
+ * Format tool call for CLI display (simplified version of TUI formatToolDisplay).
*/
-export function isQdrantConfigAvailable(): boolean {
- return !!(
- process.env.VECTOR_STORE_URL ||
- process.env.VECTOR_STORE_HOST ||
- process.env.VECTOR_STORE_PORT
- );
-}
-
+export function formatToolDisplay(toolName: string, args: Record): string {
+ switch (toolName.toLowerCase()) {
+ case 'bash': {
```
-This function is important because it defines how Cipher Tutorial: Shared Memory Layer for Coding Agents implements the patterns covered in this chapter.
+This interface is important because it defines how Cipher Tutorial: Shared Memory Layer for Coding Agents implements the patterns covered in this chapter.
## How These Components Connect
```mermaid
flowchart TD
- A[createMultiCollectionVectorStoreFromEnv]
- B[createMultiCollectionVectorStoreInternal]
- C[createWorkspaceVectorStoreFromEnv]
- D[isVectorStoreFactory]
+ A[ToolCallRecord]
+ B[TaskCompletionResult]
+ C[TaskErrorResult]
+ D[WaitForTaskOptions]
+ E[ScrollableListProps]
A --> B
B --> C
C --> D
+ D --> E
```
diff --git a/tutorials/cipher-tutorial/05-vector-stores-and-workspace-memory.md b/tutorials/cipher-tutorial/05-vector-stores-and-workspace-memory.md
index fd039bd8..420cea4f 100644
--- a/tutorials/cipher-tutorial/05-vector-stores-and-workspace-memory.md
+++ b/tutorials/cipher-tutorial/05-vector-stores-and-workspace-memory.md
@@ -33,148 +33,168 @@ You now know how to choose and operate Cipher storage backends for single-user a
Next: [Chapter 6: MCP Integration Patterns](06-mcp-integration-patterns.md)
-## Depth Expansion Playbook
-
## Source Code Walkthrough
-### `src/core/vector_storage/factory.ts`
+### `src/tui/components/suggestions.tsx`
-The `isQdrantConfigAvailable` function in [`src/core/vector_storage/factory.ts`](https://github.com/campfirein/cipher/blob/HEAD/src/core/vector_storage/factory.ts) handles a key part of this chapter's functionality:
+The `SuggestionsProps` interface in [`src/tui/components/suggestions.tsx`](https://github.com/campfirein/cipher/blob/HEAD/src/tui/components/suggestions.tsx) handles a key part of this chapter's functionality:
-```ts
- * Check if Qdrant configuration is available in environment
- */
-export function isQdrantConfigAvailable(): boolean {
- return !!(
- process.env.VECTOR_STORE_URL ||
- process.env.VECTOR_STORE_HOST ||
- process.env.VECTOR_STORE_PORT
- );
+```tsx
+const MAX_VISIBLE_ITEMS = 7
+
+interface SuggestionsProps {
+ input: string
+ onInsert?: (value: string) => void
+ onSelect?: (value: string) => void
}
+export const Suggestions: React.FC = ({input, onInsert, onSelect}) => {
+ const {
+ theme: {colors},
+ } = useTheme()
+ const {mode, setMode} = useMode()
+ const {
+ activeIndex,
+ clearSuggestions,
+ hasMatchedCommand,
+ isCommandAttempt,
+ nextSuggestion,
+ prevSuggestion,
+ selectSuggestion,
+ suggestions,
+ } = useSlashCompletion(input)
+
+ // Track if user dismissed suggestions with Escape
+ const isDismissedRef = useRef(false)
+ const prevInputRef = useRef(input)
+
+ // Reset dismissed state when input changes
+ useEffect(() => {
+ if (input !== prevInputRef.current) {
+ isDismissedRef.current = false
```
-This function is important because it defines how Cipher Tutorial: Shared Memory Layer for Coding Agents implements the patterns covered in this chapter.
+This interface is important because it defines how Cipher Tutorial: Shared Memory Layer for Coding Agents implements the patterns covered in this chapter.
-### `src/core/vector_storage/factory.ts`
+### `src/oclif/commands/query.ts`
-The `VectorStoreFactory` interface in [`src/core/vector_storage/factory.ts`](https://github.com/campfirein/cipher/blob/HEAD/src/core/vector_storage/factory.ts) handles a key part of this chapter's functionality:
+The `Query` class in [`src/oclif/commands/query.ts`](https://github.com/campfirein/cipher/blob/HEAD/src/oclif/commands/query.ts) handles a key part of this chapter's functionality:
```ts
- * Factory result containing both the manager and vector store
- */
-export interface VectorStoreFactory {
- /** The vector store manager instance for lifecycle control */
- manager: VectorStoreManager;
- /** The connected vector store ready for use */
- store: VectorStore;
-}
-/**
- * Dual collection factory result containing dual manager and stores
- */
-export interface DualCollectionVectorFactory {
- /** The dual collection manager instance for lifecycle control */
- manager: DualCollectionVectorManager;
- /** The knowledge vector store ready for use */
- knowledgeStore: VectorStore;
- /** The reflection vector store ready for use (null if disabled) */
- reflectionStore: VectorStore | null;
+/** Parsed flags type */
+type QueryFlags = {
+ format?: 'json' | 'text'
}
-/**
- * Creates and connects vector storage backend
- *
- * This is the primary factory function for initializing the vector storage system.
- * It creates a VectorStoreManager, connects to the configured backend, and
- * returns both the manager and the connected vector store.
- *
- * @param config - Vector storage configuration
- * @returns Promise resolving to manager and connected vector store
- * @throws {VectorStoreConnectionError} If connection fails and no fallback is available
- *
+export default class Query extends Command {
+ public static args = {
+ query: Args.string({
+ description: 'Natural language question about your codebase or project knowledge',
+ required: true,
+ }),
+ }
+ public static description = `Query and retrieve information from the context tree
+
+Good:
+- "How is user authentication implemented?"
+- "What are the API rate limits and where are they enforced?"
+Bad:
+- "auth" or "authentication" (too vague, not a question)
+- "show me code" (not specific about what information is needed)`
+ public static examples = [
+ '# Ask questions about patterns, decisions, or implementation details',
+ '<%= config.bin %> <%= command.id %> What are the coding standards?',
+ '<%= config.bin %> <%= command.id %> How is authentication implemented?',
+ '',
+ '# JSON output (for automation)',
+ '<%= config.bin %> <%= command.id %> "How does auth work?" --format json',
+ ]
+ public static flags = {
+ format: Flags.string({
+ default: 'text',
```
-This interface is important because it defines how Cipher Tutorial: Shared Memory Layer for Coding Agents implements the patterns covered in this chapter.
+This class is important because it defines how Cipher Tutorial: Shared Memory Layer for Coding Agents implements the patterns covered in this chapter.
-### `src/core/vector_storage/factory.ts`
+### `src/tui/components/markdown.tsx`
-The `DualCollectionVectorFactory` interface in [`src/core/vector_storage/factory.ts`](https://github.com/campfirein/cipher/blob/HEAD/src/core/vector_storage/factory.ts) handles a key part of this chapter's functionality:
+The `MarkdownProps` interface in [`src/tui/components/markdown.tsx`](https://github.com/campfirein/cipher/blob/HEAD/src/tui/components/markdown.tsx) handles a key part of this chapter's functionality:
-```ts
- * Dual collection factory result containing dual manager and stores
- */
-export interface DualCollectionVectorFactory {
- /** The dual collection manager instance for lifecycle control */
- manager: DualCollectionVectorManager;
- /** The knowledge vector store ready for use */
- knowledgeStore: VectorStore;
- /** The reflection vector store ready for use (null if disabled) */
- reflectionStore: VectorStore | null;
+```tsx
+import {useTheme} from '../hooks/index.js'
+
+interface MarkdownProps {
+ children: string
+}
+
+interface ListContext {
+ index: number
+ ordered: boolean
}
-/**
- * Creates and connects vector storage backend
- *
- * This is the primary factory function for initializing the vector storage system.
- * It creates a VectorStoreManager, connects to the configured backend, and
- * returns both the manager and the connected vector store.
- *
- * @param config - Vector storage configuration
- * @returns Promise resolving to manager and connected vector store
- * @throws {VectorStoreConnectionError} If connection fails and no fallback is available
- *
- * @example
- * ```typescript
- * // Basic usage with Qdrant
- * const { manager, store } = await createVectorStore({
- * type: 'qdrant',
- * host: 'localhost',
- * port: 6333,
- * collectionName: 'documents',
- * dimension: 1536
- * });
+const renderPhrasingContent = (nodes: PhrasingContent[], theme: Theme): React.ReactNode => nodes.map((node, index) => {
+ switch (node.type) {
+ case 'break': {
+ return {'\n'}
+ }
+
+ case 'emphasis': {
+ return (
+
+ {renderPhrasingContent((node as Emphasis).children, theme)}
+
+ )
+ }
+
+ case 'inlineCode': {
+ return (
+
+ {(node as InlineCode).value}
+
+ )
+ }
```
This interface is important because it defines how Cipher Tutorial: Shared Memory Layer for Coding Agents implements the patterns covered in this chapter.
-### `src/core/vector_storage/factory.ts`
+### `src/tui/components/markdown.tsx`
-The `for` interface in [`src/core/vector_storage/factory.ts`](https://github.com/campfirein/cipher/blob/HEAD/src/core/vector_storage/factory.ts) handles a key part of this chapter's functionality:
+The `ListContext` interface in [`src/tui/components/markdown.tsx`](https://github.com/campfirein/cipher/blob/HEAD/src/tui/components/markdown.tsx) handles a key part of this chapter's functionality:
-```ts
- * Vector Storage Factory
- *
- * Factory functions for creating and initializing the vector storage system.
- * Provides a simplified API for common vector storage setup patterns.
- *
- * @module vector_storage/factory
- */
-
-import { VectorStoreManager } from './manager.js';
-import { DualCollectionVectorManager } from './dual-collection-manager.js';
-import type { VectorStoreConfig } from './types.js';
-import { VectorStore } from './backend/vector-store.js';
-import { createLogger } from '../logger/index.js';
-import { LOG_PREFIXES } from './constants.js';
-import { env } from '../env.js';
-import { getServiceCache, createServiceKey } from '../brain/memory/service-cache.js';
-
-/**
- * Factory result containing both the manager and vector store
- */
-export interface VectorStoreFactory {
- /** The vector store manager instance for lifecycle control */
- manager: VectorStoreManager;
- /** The connected vector store ready for use */
- store: VectorStore;
+```tsx
+}
+
+interface ListContext {
+ index: number
+ ordered: boolean
}
-/**
- * Dual collection factory result containing dual manager and stores
- */
-export interface DualCollectionVectorFactory {
- /** The dual collection manager instance for lifecycle control */
+const renderPhrasingContent = (nodes: PhrasingContent[], theme: Theme): React.ReactNode => nodes.map((node, index) => {
+ switch (node.type) {
+ case 'break': {
+ return {'\n'}
+ }
+
+ case 'emphasis': {
+ return (
+
+ {renderPhrasingContent((node as Emphasis).children, theme)}
+
+ )
+ }
+
+ case 'inlineCode': {
+ return (
+
+ {(node as InlineCode).value}
+
+ )
+ }
+
+ case 'link': {
+ return (
+
```
This interface is important because it defines how Cipher Tutorial: Shared Memory Layer for Coding Agents implements the patterns covered in this chapter.
@@ -184,11 +204,13 @@ This interface is important because it defines how Cipher Tutorial: Shared Memor
```mermaid
flowchart TD
- A[isQdrantConfigAvailable]
- B[VectorStoreFactory]
- C[DualCollectionVectorFactory]
- D[for]
+ A[SuggestionsProps]
+ B[Query]
+ C[MarkdownProps]
+ D[ListContext]
+ E[FileContentReader]
A --> B
B --> C
C --> D
+ D --> E
```
diff --git a/tutorials/cipher-tutorial/06-mcp-integration-patterns.md b/tutorials/cipher-tutorial/06-mcp-integration-patterns.md
index 29b4472d..2b328541 100644
--- a/tutorials/cipher-tutorial/06-mcp-integration-patterns.md
+++ b/tutorials/cipher-tutorial/06-mcp-integration-patterns.md
@@ -30,184 +30,158 @@ You now have a practical map for integrating Cipher with MCP clients under diffe
Next: [Chapter 7: Deployment and Operations Modes](07-deployment-and-operations-modes.md)
-## Depth Expansion Playbook
-
## Source Code Walkthrough
-### `src/core/vector_storage/factory.ts`
+### `src/server/utils/file-content-reader.ts`
-The `MultiCollectionVectorFactory` interface in [`src/core/vector_storage/factory.ts`](https://github.com/campfirein/cipher/blob/HEAD/src/core/vector_storage/factory.ts) handles a key part of this chapter's functionality:
+The `createFileContentReader` function in [`src/server/utils/file-content-reader.ts`](https://github.com/campfirein/cipher/blob/HEAD/src/server/utils/file-content-reader.ts) handles a key part of this chapter's functionality:
```ts
- * Multi Collection Vector Factory interface for workspace memory support
+ * Factory function to create a FileContentReader instance.
*/
-export interface MultiCollectionVectorFactory {
- /** The multi collection manager instance */
- manager: any; // MultiCollectionVectorManager
- /** The knowledge vector store ready for use */
- knowledgeStore: VectorStore;
- /** The reflection vector store ready for use (null if disabled) */
- reflectionStore: VectorStore | null;
- /** The workspace vector store ready for use (null if disabled) */
- workspaceStore: VectorStore | null;
+export function createFileContentReader(documentParser?: IDocumentParserService): FileContentReader {
+ return new FileContentReader(documentParser)
}
-/**
- * Creates multi-collection vector storage from environment variables
- *
- * Creates a multi-collection manager that handles knowledge, reflection, and workspace
- * memory collections. This replaces DualCollectionVectorManager when workspace memory is enabled.
- *
- * @param agentConfig - Optional agent configuration to override dimension from embedding config
- * @returns Promise resolving to multi collection manager and stores
+```
+
+This function is important because it defines how Cipher Tutorial: Shared Memory Layer for Coding Agents implements the patterns covered in this chapter.
+
+### `src/server/utils/file-content-reader.ts`
+
+The `FileReadResult` interface in [`src/server/utils/file-content-reader.ts`](https://github.com/campfirein/cipher/blob/HEAD/src/server/utils/file-content-reader.ts) handles a key part of this chapter's functionality:
+
+```ts
+ * Result of reading a file's content.
*/
-export async function createMultiCollectionVectorStoreFromEnv(
- agentConfig?: any
-): Promise {
- const logger = createLogger({ level: env.CIPHER_LOG_LEVEL });
+export interface FileReadResult {
+ /** Extracted content from the file */
+ content: string
- // Import MultiCollectionVectorManager dynamically to avoid circular dependencies
- // const { MultiCollectionVectorManager } = await import('./multi-collection-manager.js'); // Not used in this scope
+ /** Error message if reading failed */
+ error?: string
- // Get base configuration from environment
- const config = getVectorStoreConfigFromEnv(agentConfig);
-```
+ /** Original file path */
+ filePath: string
-This interface is important because it defines how Cipher Tutorial: Shared Memory Layer for Coding Agents implements the patterns covered in this chapter.
+ /** Detected file type */
+ fileType: 'binary' | 'image' | 'office' | 'pdf' | 'text'
-### `src/app/api/server.ts`
+ /** Additional metadata about the file */
+ metadata?: {
+ /** Number of lines (for text files) */
+ lineCount?: number
-The `ApiServer` class in [`src/app/api/server.ts`](https://github.com/campfirein/cipher/blob/HEAD/src/app/api/server.ts) handles a key part of this chapter's functionality:
+ /** Number of pages (for PDFs) */
+ pageCount?: number
-```ts
-import { createWebhookRoutes } from './routes/webhook.js';
-
-export interface ApiServerConfig {
- port: number;
- host?: string;
- corsOrigins?: string[];
- rateLimitWindowMs?: number;
- rateLimitMaxRequests?: number;
- mcpTransportType?: 'stdio' | 'sse' | 'http';
- mcpPort?: number;
- // WebSocket configuration
- enableWebSocket?: boolean;
- webSocketConfig?: WebSocketConfig;
- // API prefix configuration
- apiPrefix?: string;
+ /** Whether content was truncated */
+ truncated?: boolean
+ }
+
+ /** Whether the read was successful */
+ success: boolean
}
-export class ApiServer {
- private app: Application;
- private agent: MemAgent;
- private config: ApiServerConfig;
- private apiPrefix: string;
- private mcpServer?: McpServer;
- private activeMcpSseTransports: Map = new Map();
-
- // WebSocket components
- private httpServer?: http.Server;
- private wss?: WebSocketServer;
- private wsConnectionManager?: WebSocketConnectionManager;
- private wsMessageRouter?: WebSocketMessageRouter;
- private wsEventSubscriber?: WebSocketEventSubscriber;
- private heartbeatInterval?: NodeJS.Timeout;
+/**
```
-This class is important because it defines how Cipher Tutorial: Shared Memory Layer for Coding Agents implements the patterns covered in this chapter.
+This interface is important because it defines how Cipher Tutorial: Shared Memory Layer for Coding Agents implements the patterns covered in this chapter.
-### `src/app/api/server.ts`
+### `src/server/utils/file-content-reader.ts`
-The `ApiServerConfig` interface in [`src/app/api/server.ts`](https://github.com/campfirein/cipher/blob/HEAD/src/app/api/server.ts) handles a key part of this chapter's functionality:
+The `FileContentReaderConfig` interface in [`src/server/utils/file-content-reader.ts`](https://github.com/campfirein/cipher/blob/HEAD/src/server/utils/file-content-reader.ts) handles a key part of this chapter's functionality:
```ts
-import { createWebhookRoutes } from './routes/webhook.js';
-
-export interface ApiServerConfig {
- port: number;
- host?: string;
- corsOrigins?: string[];
- rateLimitWindowMs?: number;
- rateLimitMaxRequests?: number;
- mcpTransportType?: 'stdio' | 'sse' | 'http';
- mcpPort?: number;
- // WebSocket configuration
- enableWebSocket?: boolean;
- webSocketConfig?: WebSocketConfig;
- // API prefix configuration
- apiPrefix?: string;
+ * Configuration options for file reading.
+ */
+interface FileContentReaderConfig {
+ /** Maximum content length per file in characters (default: 40000) */
+ maxContentLength?: number
+
+ /** Maximum lines to read for text files (default: 2000) */
+ maxLinesPerFile?: number
+
+ /** Maximum pages to extract for PDFs (default: 50) */
+ maxPdfPages?: number
}
-export class ApiServer {
- private app: Application;
- private agent: MemAgent;
- private config: ApiServerConfig;
- private apiPrefix: string;
- private mcpServer?: McpServer;
- private activeMcpSseTransports: Map = new Map();
-
- // WebSocket components
- private httpServer?: http.Server;
- private wss?: WebSocketServer;
- private wsConnectionManager?: WebSocketConnectionManager;
- private wsMessageRouter?: WebSocketMessageRouter;
- private wsEventSubscriber?: WebSocketEventSubscriber;
- private heartbeatInterval?: NodeJS.Timeout;
+const DEFAULT_MAX_CONTENT_LENGTH = 40_000
+const DEFAULT_MAX_LINES_PER_FILE = 2000
+const DEFAULT_MAX_PDF_PAGES = 50
+const SAMPLE_BUFFER_SIZE = 4096
+
+/**
+ * Service for reading file contents with support for various file types.
+ *
+ * Supports:
+ * - Text/code files: Read directly with truncation
+ * - Office documents (.docx, .pptx, .xlsx, etc.): Parse using DocumentParserService
+ * - PDFs: Extract text using PdfExtractor
+ * - Images/Binaries: Skip with appropriate error message
+ */
+export class FileContentReader {
+ private readonly documentParser: IDocumentParserService
+
+ constructor(documentParser?: IDocumentParserService) {
+ this.documentParser = documentParser ?? createDocumentParserService()
```
This interface is important because it defines how Cipher Tutorial: Shared Memory Layer for Coding Agents implements the patterns covered in this chapter.
-### `src/app/api/server.ts`
+### `src/oclif/lib/daemon-client.ts`
-The `API` interface in [`src/app/api/server.ts`](https://github.com/campfirein/cipher/blob/HEAD/src/app/api/server.ts) handles a key part of this chapter's functionality:
+The `connectToDaemonClient` function in [`src/oclif/lib/daemon-client.ts`](https://github.com/campfirein/cipher/blob/HEAD/src/oclif/lib/daemon-client.ts) handles a key part of this chapter's functionality:
```ts
- enableWebSocket?: boolean;
- webSocketConfig?: WebSocketConfig;
- // API prefix configuration
- apiPrefix?: string;
+ * Connects to the daemon, auto-starting it if needed.
+ */
+export async function connectToDaemonClient(
+ options?: Pick,
+): Promise {
+ const connector = options?.transportConnector ?? createDaemonAwareConnector()
+ return connector()
}
-export class ApiServer {
- private app: Application;
- private agent: MemAgent;
- private config: ApiServerConfig;
- private apiPrefix: string;
- private mcpServer?: McpServer;
- private activeMcpSseTransports: Map = new Map();
-
- // WebSocket components
- private httpServer?: http.Server;
- private wss?: WebSocketServer;
- private wsConnectionManager?: WebSocketConnectionManager;
- private wsMessageRouter?: WebSocketMessageRouter;
- private wsEventSubscriber?: WebSocketEventSubscriber;
- private heartbeatInterval?: NodeJS.Timeout;
-
- constructor(agent: MemAgent, config: ApiServerConfig) {
- this.agent = agent;
- this.config = config;
-
- // Validate and set API prefix
- this.apiPrefix = this.validateAndNormalizeApiPrefix(config.apiPrefix);
-
- this.app = express();
- this.setupMiddleware();
- this.setupRoutes();
+/**
+ * Executes an operation against the daemon with retry logic.
+ *
+ * Retries on infrastructure failures (daemon spawn timeout, connection dropped,
+ * agent disconnected). Does NOT retry on business errors (auth, validation, etc.).
+ */
+export async function withDaemonRetry(
+ fn: (client: ITransportClient, projectRoot?: string) => Promise,
+ options?: DaemonClientOptions & {
+ /** Called before each retry with attempt number (1-indexed) */
+ onRetry?: (attempt: number, maxRetries: number) => void
+ },
+): Promise {
+ const maxRetries = options?.maxRetries ?? MAX_RETRIES
+ const retryDelayMs = options?.retryDelayMs ?? DEFAULT_RETRY_DELAY_MS
+ const connector = options?.transportConnector ?? createDaemonAwareConnector()
+
+ let lastError: unknown
+
+ /* eslint-disable no-await-in-loop -- intentional sequential retry loop */
+ for (let attempt = 1; attempt <= maxRetries; attempt++) {
+ let client: ITransportClient | undefined
+
```
-This interface is important because it defines how Cipher Tutorial: Shared Memory Layer for Coding Agents implements the patterns covered in this chapter.
+This function is important because it defines how Cipher Tutorial: Shared Memory Layer for Coding Agents implements the patterns covered in this chapter.
## How These Components Connect
```mermaid
flowchart TD
- A[MultiCollectionVectorFactory]
- B[ApiServer]
- C[ApiServerConfig]
- D[API]
+ A[createFileContentReader]
+ B[FileReadResult]
+ C[FileContentReaderConfig]
+ D[connectToDaemonClient]
+ E[isRetryableError]
A --> B
B --> C
C --> D
+ D --> E
```
diff --git a/tutorials/cipher-tutorial/07-deployment-and-operations-modes.md b/tutorials/cipher-tutorial/07-deployment-and-operations-modes.md
index 28ffcc35..c4f4a0bf 100644
--- a/tutorials/cipher-tutorial/07-deployment-and-operations-modes.md
+++ b/tutorials/cipher-tutorial/07-deployment-and-operations-modes.md
@@ -36,184 +36,184 @@ You now have deployment and operations patterns for running Cipher in developer
Next: [Chapter 8: Security and Team Governance](08-security-and-team-governance.md)
-## Depth Expansion Playbook
-
## Source Code Walkthrough
-### `src/app/api/server.ts`
+### `src/oclif/lib/daemon-client.ts`
-The `full` interface in [`src/app/api/server.ts`](https://github.com/campfirein/cipher/blob/HEAD/src/app/api/server.ts) handles a key part of this chapter's functionality:
+The `hasLeakedHandles` function in [`src/oclif/lib/daemon-client.ts`](https://github.com/campfirein/cipher/blob/HEAD/src/oclif/lib/daemon-client.ts) handles a key part of this chapter's functionality:
```ts
+ if (error instanceof DaemonSpawnError || error instanceof ConnectionFailedError) return true
+ if (error instanceof TransportRequestTimeoutError) return true
+ return hasLeakedHandles(error)
+}
- /**
- * Helper method to construct full path including proxy context path
- * Used for SSE transport endpoint configuration when behind reverse proxy
- */
- private buildFullPath(req: Request, path: string): string {
- const contextPath = (req as any).contextPath || '';
- const fullPath = contextPath + this.buildApiRoute(path);
-
- logger.debug('[API Server] Built full path', {
- path,
- contextPath,
- apiPrefix: this.apiPrefix,
- fullPath,
- });
-
- return fullPath;
- }
-
- private async setupMcpServer(
- transportType: 'stdio' | 'sse' | 'http',
- _port?: number
- ): Promise {
- logger.info(`[API Server] Setting up MCP server with transport type: ${transportType}`);
- try {
- // Initialize agent card data
- const agentCard = this.agent.getEffectiveConfig().agentCard;
- const agentCardInput = agentCard
- ? Object.fromEntries(Object.entries(agentCard).filter(([, value]) => value !== undefined))
- : {};
- const agentCardData = initializeAgentCardResource(agentCardInput);
+/**
+ * Checks if an error left leaked Socket.IO handles that prevent Node.js from exiting.
+ */
+export function hasLeakedHandles(error: unknown): boolean {
+ if (!(error instanceof Error)) return false
+ if (!('code' in error)) return false
+ return error.code === TaskErrorCode.AGENT_DISCONNECTED || error.code === TaskErrorCode.AGENT_NOT_AVAILABLE
+}
+
+/**
+ * Builds a user-friendly message when provider credentials are missing from storage.
+ */
+export function providerMissingMessage(activeProvider: string, authMethod?: 'api-key' | 'oauth'): string {
+ return authMethod === 'oauth'
+ ? `${activeProvider} authentication has expired.\nPlease reconnect: brv providers connect ${activeProvider} --oauth`
+ : `${activeProvider} API key is missing from storage.\nPlease reconnect: brv providers connect ${activeProvider} --api-key `
+}
+export interface ProviderErrorContext {
+ activeModel?: string
+ activeProvider?: string
+}
+
+/**
+ * Formats a connection error into a user-friendly message.
+ */
+export function formatConnectionError(error: unknown, providerContext?: ProviderErrorContext): string {
```
-This interface is important because it defines how Cipher Tutorial: Shared Memory Layer for Coding Agents implements the patterns covered in this chapter.
+This function is important because it defines how Cipher Tutorial: Shared Memory Layer for Coding Agents implements the patterns covered in this chapter.
-### `src/app/mcp/mcp_handler.ts`
+### `src/oclif/lib/daemon-client.ts`
-The `initializeMcpServer` function in [`src/app/mcp/mcp_handler.ts`](https://github.com/campfirein/cipher/blob/HEAD/src/app/mcp/mcp_handler.ts) handles a key part of this chapter's functionality:
+The `providerMissingMessage` function in [`src/oclif/lib/daemon-client.ts`](https://github.com/campfirein/cipher/blob/HEAD/src/oclif/lib/daemon-client.ts) handles a key part of this chapter's functionality:
```ts
- * @param aggregatorConfig - Configuration for aggregator mode (optional)
+ * Builds a user-friendly message when provider credentials are missing from storage.
*/
-export async function initializeMcpServer(
- agent: MemAgent,
- agentCard: AgentCard,
- mode: 'default' | 'aggregator' = 'default',
- aggregatorConfig?: AggregatorConfig
-): Promise {
- logger.info(`[MCP Handler] Initializing MCP server with agent capabilities (mode: ${mode})`);
-
- // Remove or update the call to agent.promptManager.load
- // if (mode === 'default') {
- // agent.promptManager.load(
- // `When running as an MCP server, Cipher should focus solely on EITHER storage OR retrieval using its own tools. For each interaction, perform ONLY ONE operation: either retrieval OR storage. For storage tasks, do NOT use retrieval tools. For retrieval tasks, use search tools as needed. This behavior is only expected in MCP server mode.`
- // );
- // }
-
- // Create MCP server instance
- const server = new Server(
- {
- name: agentCard.name || 'cipher',
- version: agentCard.version || '1.0.0',
- },
- {
- capabilities: {
- tools: {},
- resources: {},
- prompts: {},
- },
- }
- );
+export function providerMissingMessage(activeProvider: string, authMethod?: 'api-key' | 'oauth'): string {
+ return authMethod === 'oauth'
+ ? `${activeProvider} authentication has expired.\nPlease reconnect: brv providers connect ${activeProvider} --oauth`
+ : `${activeProvider} API key is missing from storage.\nPlease reconnect: brv providers connect ${activeProvider} --api-key `
+}
+export interface ProviderErrorContext {
+ activeModel?: string
+ activeProvider?: string
+}
+
+/**
+ * Formats a connection error into a user-friendly message.
+ */
+export function formatConnectionError(error: unknown, providerContext?: ProviderErrorContext): string {
+ if (error instanceof NoInstanceRunningError) {
+ if (isSandboxEnvironment()) {
+ const sandboxName = getSandboxEnvironmentName()
+ return (
+ `Daemon failed to start automatically.\n` +
+ `⚠️ Sandbox environment detected (${sandboxName}).\n\n` +
+ `Run 'brv' in a terminal outside the sandbox, then allow network access so this sandbox can connect.`
+ )
+ }
+
+ return 'Daemon failed to start automatically.\n\nRestart your terminal and retry the command.'
+ }
+
+ if (error instanceof InstanceCrashedError) {
+ return "Daemon crashed unexpectedly.\n\nRun 'brv restart' to force a clean restart."
```
This function is important because it defines how Cipher Tutorial: Shared Memory Layer for Coding Agents implements the patterns covered in this chapter.
-### `src/app/mcp/mcp_handler.ts`
+### `src/oclif/lib/daemon-client.ts`
-The `registerAgentTools` function in [`src/app/mcp/mcp_handler.ts`](https://github.com/campfirein/cipher/blob/HEAD/src/app/mcp/mcp_handler.ts) handles a key part of this chapter's functionality:
+The `formatConnectionError` function in [`src/oclif/lib/daemon-client.ts`](https://github.com/campfirein/cipher/blob/HEAD/src/oclif/lib/daemon-client.ts) handles a key part of this chapter's functionality:
```ts
- await registerAggregatedTools(server, agent, aggregatorConfig);
- } else {
- await registerAgentTools(server, agent);
- }
- await registerAgentResources(server, agent, agentCard);
- await registerAgentPrompts(server, agent);
-
- logger.info(`[MCP Handler] MCP server initialized successfully (mode: ${mode})`);
- logger.info('[MCP Handler] Agent is now available as MCP server for external clients');
-
- return server;
-}
-
-/**
- * Register agent tools as MCP tools (default mode - ask_cipher only)
+ * Formats a connection error into a user-friendly message.
*/
-async function registerAgentTools(server: Server, agent: MemAgent): Promise {
- logger.debug('[MCP Handler] Registering agent tools (default mode - ask_cipher only)');
-
- // Default mode: Only expose ask_cipher tool (simplified)
- const mcpTools = [
- {
- name: 'ask_cipher',
- description:
- 'Use this tool to store new information or search existing information. When you encounter information not yet seen in the current conversation, call ask_cipher to store it. For questions outside the current context, use ask_cipher to search relevant memory. Users may not explicitly request it, but ask_cipher should be your first choice in these cases.',
- inputSchema: {
- type: 'object',
- properties: {
- message: {
- type: 'string',
- description: 'The message or question to send to the Cipher agent',
- },
+export function formatConnectionError(error: unknown, providerContext?: ProviderErrorContext): string {
+ if (error instanceof NoInstanceRunningError) {
+ if (isSandboxEnvironment()) {
+ const sandboxName = getSandboxEnvironmentName()
+ return (
+ `Daemon failed to start automatically.\n` +
+ `⚠️ Sandbox environment detected (${sandboxName}).\n\n` +
+ `Run 'brv' in a terminal outside the sandbox, then allow network access so this sandbox can connect.`
+ )
+ }
+
+ return 'Daemon failed to start automatically.\n\nRestart your terminal and retry the command.'
+ }
+
+ if (error instanceof InstanceCrashedError) {
+ return "Daemon crashed unexpectedly.\n\nRun 'brv restart' to force a clean restart."
+ }
+
+ if (error instanceof ConnectionFailedError) {
+ const isSandboxError = isSandboxNetworkError(error.originalError ?? error)
+
+ if (isSandboxError) {
+ const sandboxName = getSandboxEnvironmentName()
+ return (
+ `Failed to connect to the daemon.\n` +
+ `Port: ${error.port ?? 'unknown'}\n` +
+ `⚠️ Sandbox network restriction detected (${sandboxName}).\n\n` +
+ `Please allow network access in the sandbox and retry the command.`
+ )
+ }
```
This function is important because it defines how Cipher Tutorial: Shared Memory Layer for Coding Agents implements the patterns covered in this chapter.
-### `src/app/mcp/mcp_handler.ts`
+### `src/oclif/lib/daemon-client.ts`
-The `registerAggregatedTools` function in [`src/app/mcp/mcp_handler.ts`](https://github.com/campfirein/cipher/blob/HEAD/src/app/mcp/mcp_handler.ts) handles a key part of this chapter's functionality:
+The `DaemonClientOptions` interface in [`src/oclif/lib/daemon-client.ts`](https://github.com/campfirein/cipher/blob/HEAD/src/oclif/lib/daemon-client.ts) handles a key part of this chapter's functionality:
```ts
- // Register agent capabilities as MCP tools, resources, and prompts
- if (mode === 'aggregator') {
- await registerAggregatedTools(server, agent, aggregatorConfig);
- } else {
- await registerAgentTools(server, agent);
- }
- await registerAgentResources(server, agent, agentCard);
- await registerAgentPrompts(server, agent);
-
- logger.info(`[MCP Handler] MCP server initialized successfully (mode: ${mode})`);
- logger.info('[MCP Handler] Agent is now available as MCP server for external clients');
-
- return server;
+}
+
+export interface DaemonClientOptions {
+ /** Max retry attempts. Default: 3 */
+ maxRetries?: number
+ /** Delay between retries in ms. Default: 2000. Set to 0 in tests. */
+ retryDelayMs?: number
+ /** Optional transport connector for DI/testing */
+ transportConnector?: TransportConnector
+}
+
+/**
+ * Connects to the daemon, auto-starting it if needed.
+ */
+export async function connectToDaemonClient(
+ options?: Pick,
+): Promise {
+ const connector = options?.transportConnector ?? createDaemonAwareConnector()
+ return connector()
}
/**
- * Register agent tools as MCP tools (default mode - ask_cipher only)
+ * Executes an operation against the daemon with retry logic.
+ *
+ * Retries on infrastructure failures (daemon spawn timeout, connection dropped,
+ * agent disconnected). Does NOT retry on business errors (auth, validation, etc.).
*/
-async function registerAgentTools(server: Server, agent: MemAgent): Promise