diff --git a/.cursor/plans/tres-2279f097.plan.md b/.cursor/plans/tres-2279f097.plan.md new file mode 100644 index 0000000..4198a31 --- /dev/null +++ b/.cursor/plans/tres-2279f097.plan.md @@ -0,0 +1,59 @@ + +# Claude Code Tresor Enhancement Roadmap + +## Foundations & Memory Bank + +- Stand up mandatory memory bank (`projectbrief.md`, `productContext.md`, `activeContext.md`, etc.) to persist architecture, tech stack, and progress context. +- Capture current taxonomy of skills, agents, commands, and scripts so all follow-on decisions share the same source of truth. + +## Metadata & Packaging Alignment + +- Reconcile installer expectations with actual asset formats by either generating `agent.json`/`command.json` files or teaching `scripts/install.sh` to consume the Markdown frontmatter so agents deploy correctly. +```130:147:scripts/install.sh + # Copy all agent directories + find "$agents_src" -mindepth 1 -maxdepth 1 -type d | while read -r agent_dir; do + local agent_name=$(basename "$agent_dir") + + # Skip README-only directories + if [ -f "$agent_dir/agent.json" ]; then + log "Installing agent: $agent_name" + cp -r "$agent_dir" "$agents_dest/$agent_name" + fi + done +``` + +- Audit documentation vs. real inventory so directories like `commands/README.md` stop advertising non-existent commands (e.g. `/refactor`, `/deploy`). +```29:47:commands/README.md +- **`/refactor`** - Automated code refactoring with best practices +- **`/optimize`** - Performance analysis and optimization suggestions +... +- **`/deploy`** - Deployment automation and verification +``` + +- Generate machine-readable indexes (YAML/JSON) for skills, agents, and subagents to power search, validation, and future tooling. + +## Ecosystem & Automation Expansion + +- Design a metadata-driven CLI (e.g. `tresor`) that can list, preview, install, and update skills/agents/commands/subagents from the repo and the `sources/` library. +- Introduce reusable command/agent templates so contributors can scaffold new utilities with validation hooks. +- Build orchestration blueprints showing how autonomous skills feed subagents and commands for key workflows (code review, release, incident response). + +## Documentation & Adoption Experience + +- Produce a role-based navigation hub that maps typical Claude Code personas (solo dev, team lead, agency) to specific utilities and workflows. +- Collapse duplicated docs (e.g. multiple READMEs per feature) into concise playbooks with deep links to authoritative references. +- Add outcome-focused quick-starts and video-ready scripts that demonstrate end-to-end flows using bundled assets. + +## Quality Assurance & Release Management + +- Add automated tests/linters for installer scripts and metadata consistency; validate every PR via CI. +- Capture usage analytics & feedback loops (changelogs, release notes, telemetry opt-in) to guide prioritization. +- Formalize versioning and release cadence with signed tags, update scripts, and migration notes for breaking changes. + +## Implementation Todos + +- baseline: Establish memory bank & repository audit +- packaging: Align installer, metadata, and docs with actual assets +- automation: Build CLI tooling and workflow orchestration patterns +- docs: Redesign documentation & onboarding experience +- quality: Add automated validation, testing, and release process \ No newline at end of file diff --git a/.gitignore b/.gitignore index 5096f32..71f3c43 100644 --- a/.gitignore +++ b/.gitignore @@ -127,3 +127,11 @@ scripts/publish-gist.sh documentation/archive/* # docs/ - NOW TRACKED: Technical documentation for agent system (v2.5.0+) +__pycache__/ + +# TΓ‚CHES workflow data files (per-project data, not commands - v2.6.5+) +# These are gitignored in USER projects, not in this repository +# Pattern: Ignore in root and subdirectories, but not in commands/ +/*/.prompts/ +/TO-DOS.md +/whats-next.md diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..f3c052c --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,429 @@ +# CLAUDE.md - Development Guide + +> **For Claude Code instances working with Claude Code Tresor** + +## 🎯 Repository Purpose + +Claude Code Tresor is a comprehensive collection of professional-grade utilities for Claude Code: +- **8 Autonomous Skills**: Automatic background helpers (NEW in v2.0!) +- **8 Core Agents**: Production-ready expert sub-agents for deep analysis +- **137+ Extended Agents**: Specialized agents organized by team and function (NEW in v2.5!) +- **4 Essential Slash Commands**: Workflow automation and orchestration +- **20+ Prompt Templates**: Production-ready prompts for common development scenarios +- **Development Standards**: Style guides, Git workflows, and team collaboration guidelines + +**Author**: Alireza Rezvani | **License**: MIT | **Created**: September 16, 2025 | **Updated**: November 15, 2025 (v2.5.0) + +## πŸ—οΈ Architecture + +``` +claude-code-tresor/ +β”œβ”€β”€ skills/ # 8 Autonomous Skills (NEW v2.0!) +β”‚ β”œβ”€β”€ development/ # code-reviewer, test-generator, git-commit-helper +β”‚ β”œβ”€β”€ security/ # security-auditor, secret-scanner, dependency-auditor +β”‚ └── documentation/ # api-documenter, readme-updater +β”œβ”€β”€ agents/ # 8 Core Production Agents (.md + README.md) +β”‚ β”œβ”€β”€ config-safety-reviewer/ # Configuration safety & production reliability +β”‚ β”œβ”€β”€ test-engineer/ # Testing specialist +β”‚ β”œβ”€β”€ docs-writer/ # Documentation expert +β”‚ β”œβ”€β”€ systems-architect/ # System design & technical strategy +β”‚ β”œβ”€β”€ root-cause-analyzer/ # Comprehensive RCA & debugging +β”‚ β”œβ”€β”€ security-auditor/ # Security expert & OWASP compliance +β”‚ β”œβ”€β”€ performance-tuner/ # Performance optimization +β”‚ └── refactor-expert/ # Code refactoring & clean architecture +β”œβ”€β”€ subagents/ # 137+ Extended Agents (NEW v2.5!) +β”‚ β”œβ”€β”€ engineering/ # 60+ engineering specialists +β”‚ β”œβ”€β”€ design/ # 10 design specialists +β”‚ β”œβ”€β”€ marketing/ # 15+ marketing specialists +β”‚ β”œβ”€β”€ product/ # 10+ product specialists +β”‚ β”œβ”€β”€ leadership/ # 15+ leadership & strategy +β”‚ β”œβ”€β”€ operations/ # 10+ operations specialists +β”‚ β”œβ”€β”€ research/ # 10+ research specialists +β”‚ β”œβ”€β”€ ai-automation/ # 10+ AI/ML & automation +β”‚ └── account-customer-success/ # 8+ account & CS specialists +β”œβ”€β”€ commands/ # 4 Slash Commands (.md + README.md) +β”‚ β”œβ”€β”€ development/scaffold/ # Project/component scaffolding +β”‚ β”œβ”€β”€ workflow/review/ # Code review automation +β”‚ β”œβ”€β”€ testing/test-gen/ # Test generation +β”‚ └── documentation/docs-gen/ # Documentation generation +β”œβ”€β”€ prompts/ # 20+ Prompt templates +β”œβ”€β”€ standards/ # Development standards +β”œβ”€β”€ examples/ # Real-world workflows +β”œβ”€β”€ sources/ # Extended library (200+ components) +└── scripts/ # Installation utilities +``` + +## πŸ› οΈ Common Development Commands + +### Building & Testing +```bash +# No build process - this is a utilities collection +# Test installation scripts +./scripts/install.sh --check +``` + +### Installation & Setup +```bash +# Full installation (recommended) - installs skills + agents + commands +./scripts/install.sh + +# Selective installation +./scripts/install.sh --skills # 8 autonomous skills only +./scripts/install.sh --agents # 8 expert agents only +./scripts/install.sh --commands # 4 workflow commands only +./scripts/install.sh --resources-only + +# Updates +./scripts/update.sh +``` + +### Repository Management +```bash +# Standard Git workflow with conventional commits +git add . +git commit -m "feat: add new utility" +git push origin main + +# View repository structure +find . -name "*.json" -path "*/commands/*" -o -name "*.json" -path "*/agents/*" +``` + +## πŸ“‹ Command & Agent Structure + +### Slash Command Structure +Each command in `commands/` contains: +- `command.json` - Configuration and metadata +- `README.md` - Comprehensive documentation with examples +- Commands follow pattern: `/command-name --options` + +### Agent Structure +Each agent in `agents/` contains: +- `agent.json` - Agent configuration and capabilities +- `README.md` - Detailed usage guide and examples +- Agents follow pattern: `@agent-name task description` + +### Skill Structure (NEW v2.0!) +Each skill in `skills/` contains: +- `SKILL.md` - Skill configuration with YAML frontmatter + comprehensive docs +- `README.md` - Quick reference guide with examples +- Skills activate automatically based on trigger keywords + +## ✨ Skills Layer (NEW v2.0!) + +### What Are Skills? +**Skills** are autonomous background helpers that work continuously without manual invocation: +- βœ… **Automatic activation** - Triggered by code changes, file saves, commits +- βœ… **Lightweight** - Limited tool access for safety (Read, Write, Edit, Grep, Glob) +- βœ… **Proactive** - Detect issues and opportunities in real-time +- βœ… **Non-blocking** - Provide suggestions without interrupting workflow + +### 8 Core Skills + +**Development Skills (3):** +1. **code-reviewer** - Real-time code quality checks +2. **test-generator** - Auto-suggest missing tests +3. **git-commit-helper** - Generate conventional commit messages + +**Security Skills (3):** +4. **security-auditor** - OWASP Top 10 vulnerability scanning +5. **secret-scanner** - Detect exposed API keys/secrets +6. **dependency-auditor** - CVE checking for dependencies + +**Documentation Skills (2):** +7. **api-documenter** - Auto-generate OpenAPI specs +8. **readme-updater** - Keep README current with changes + +### Skills vs Agents vs Commands + +| Feature | Skills | Agents | Commands | +|---------|--------|--------|----------| +| **Invocation** | Automatic | Manual (`@agent`) | Manual (`/command`) | +| **Tools** | Limited (safe) | Full access | Orchestrates | +| **Context** | Shared | Separate | Coordinates | +| **Best For** | Quick checks | Deep analysis | Workflows | + +**Typical Workflow:** +1. **Skill detects** issue automatically β†’ suggests improvement +2. **Developer invokes Agent** β†’ `@config-safety-reviewer` comprehensive analysis +3. **Developer runs Command** β†’ `/review --scope staged` full workflow + +### Sandboxing (Optional) +All skills work **WITHOUT sandboxing by default**. Sandboxing is optional for additional security isolation. + +**See:** [Skills Guide](skills/README.md) | [Getting Started](GETTING-STARTED.md) | [Architecture](ARCHITECTURE.md) + +## πŸ”§ Key Implementation Details + +### Configuration Safety (Critical) +The `/review` command emphasizes **configuration safety** to prevent outages: +- Detects risky configuration changes (database connections, API endpoints) +- Validates environment-specific settings +- Checks for magic numbers and hardcoded values +- Reviews deployment configurations + +### Multi-Agent Orchestration +Commands can invoke agents using the Task tool: +```bash +# Example from /review command +Task tool -> @config-safety-reviewer for configuration safety analysis +Task tool -> @performance-tuner for optimization +Task tool -> @security-auditor for vulnerability scan +Task tool -> @systems-architect for architecture review +``` + +### Test Harness Generation +The `/test-gen` command supports multiple frameworks: +- **Python**: pytest, unittest, property-based testing +- **JavaScript/TypeScript**: Jest, Vitest, Playwright +- **Java**: JUnit, TestNG, Mockito +- **Load Testing**: Locust, Artillery + +### Documentation Automation +The `/docs-gen` command generates: +- API documentation with OpenAPI specs +- Architecture diagrams with Mermaid +- Interactive documentation with Docusaurus +- CI/CD pipeline for automated docs + +## 🎨 Prompt Template Categories + +Located in `prompts/` directory: +- **Frontend Development**: React, NextJS, ReactJS, Vue, Angular patterns +- **Backend Development**: APIs, databases, microservices +- **Debugging & Analysis**: Error analysis, performance troubleshooting +- **Best Practices**: Clean code, security, refactoring strategies + +## πŸ“ Development Standards + +Located in `standards/` directory: +- **JavaScript/TypeScript**: ESLint/Prettier configurations +- **Git Workflows**: Conventional commits, branch strategies +- **Code Review**: Checklists and PR templates +- **Team Collaboration**: Guidelines and best practices + +## πŸš€ Usage Examples + +### Project Scaffolding +```bash +/scaffold react-component UserProfile --hooks --tests --typescript +/scaffold express-api user-service --auth --database --tests +``` + +### Code Review Automation +```bash +/review --scope staged --checks security,performance,configuration +@config-safety-reviewer Review database connection pool configuration +@security-auditor Analyze this component for React best practices and security +``` + +### Test Generation +```bash +/test-gen --file utils.js --framework jest --coverage 90 +@test-engineer Create comprehensive tests with edge cases +``` + +### Documentation Generation +```bash +/docs-gen api --format openapi --include-examples +@docs-writer Create user guide with setup and troubleshooting +``` + +### System Architecture & Debugging +```bash +@systems-architect Design scalable e-commerce system for 100k concurrent users +@root-cause-analyzer Production API timing out - perform comprehensive RCA +@performance-tuner Profile and optimize database query performance +``` + +### Agent Discovery +```bash +# Core agents (8) - Production-ready in /agents/ +@systems-architect, @config-safety-reviewer, @root-cause-analyzer +@security-auditor, @test-engineer, @performance-tuner +@refactor-expert, @docs-writer + +# Extended agents (133) - Organized in /subagents/ by team +# See subagents/README.md for complete catalog +``` + +## TΓ‚CHES Workflow Commands (v2.6.5) + +### Meta-Prompting System + +**`/create-prompt [task]`** - Expert prompt engineer + +- Generates optimized, XML-structured prompts for complex tasks +- **Automatically references Tresor's CLAUDE.md** for project standards +- **Suggests appropriate Tresor agents** based on task type +- Follows Tresor's anti-overengineering and maintainability principles +- Creates prompts optimized for Tresor's 141-agent ecosystem + +**`/run-prompt [number(s)] [--parallel|--sequential]`** - Execute prompts + +- Runs generated prompts in fresh sub-task contexts +- Supports parallel and sequential execution +- **Integrates with Tresor agents** - prompts can invoke @agents +- Supports Tresor's subagent types (Explore, Plan, general-purpose) + +### Todo Management System + +**`/add-to-todos [description]`** - Capture ideas without breaking flow + +- Structured format: Problem, Files, Solution +- Preserves full conversation context +- Auto-detects Tresor components (agents, skills, commands) +- Integrates with Tresor's project structure + +**`/check-todos`** - Resume work with complete context + +- Lists all captured todos with dates and context +- **Detects and suggests Tresor's 141 agents** based on todo content and file paths +- Matches todos to domain patterns (engineering/, design/, skills/, etc.) +- Offers Tresor workflow integration (invoke agent, use skill, work directly) +- Loads complete context for selected todo + +### Context Handoff System + +**`/whats-next`** - Create comprehensive handoff document + +- Captures complete work history, decisions, and context +- **Complements Tresor's memory bank** (projectbrief, productContext, activeContext) +- Session-specific handoff vs long-term context +- Enables seamless work continuation in fresh contexts + +### TΓ‚CHES + Tresor Integration Examples + +**Meta-Prompting with Tresor Agents**: +```bash +/create-prompt Design scalable microservices architecture +# β†’ Generates prompt referencing CLAUDE.md +# β†’ Suggests @systems-architect for execution +# β†’ Includes Tresor's maintainability principles + +/run-prompt 001 +# β†’ Executes with fresh context +# β†’ Can invoke @systems-architect, @backend-architect, @security-auditor +``` + +**Todo Management with Agent Discovery**: +```bash +# During coding, spot issue +/add-to-todos Optimize N+1 queries in user API - src/api/users.ts:45-67 + +# Later +/check-todos +# β†’ Detects backend/database work +# β†’ Suggests @database-optimizer or @performance-tuner +# β†’ One-click agent invocation +``` + +**Context Handoff with Memory Bank**: +``` +Tresor Memory Bank (long-term): +- activeContext.md (updated regularly) +- productContext.md (architectural decisions) +- projectbrief.md (project vision) + +TΓ‚CHES Handoff (session-specific): +- whats-next.md (created via /whats-next command) +- Detailed task state, exact file positions +- Resume with zero information loss +``` + +## πŸ” Important Context + +### Production Focus +All utilities are designed for **production use** with emphasis on: +- Safety-first approach (especially configuration changes) +- Comprehensive error handling and validation +- Real-world outage prevention patterns +- Professional code quality standards + +### Extensibility +The `sources/` directory contains 200+ additional components: +- 80+ specialized agents for various domains +- Advanced slash commands for specific workflows +- Industry-specific prompts and templates + +### Community & Contributions +- MIT License allows commercial and personal use +- Contribution guidelines in `CONTRIBUTING.md` +- Professional support available for teams +- Active development with regular updates + +## πŸ“š Documentation + +Complete documentation available in `documentation/`: + +### Quick Links +- **[Master Index β†’](documentation/README.md)** - Complete documentation navigation +- **[Installation Guide β†’](documentation/guides/installation.md)** - Install Claude Code Tresor +- **[Getting Started β†’](documentation/guides/getting-started.md)** - First-time user walkthrough +- **[FAQ β†’](documentation/reference/faq.md)** - Frequently asked questions + +### Documentation Categories +- **[User Guides β†’](documentation/guides/)** - Installation, getting-started, configuration, troubleshooting, migration, contributing +- **[Technical Reference β†’](documentation/reference/)** - Skills, agents, commands, FAQ +- **[Workflows β†’](documentation/workflows/)** - Git workflow, GitHub automation, agent-skill integration + +--- + +## ⚠️ Safety Guidelines + +1. **Configuration Changes**: Always review configuration changes carefully +2. **Database Migrations**: Validate schema changes in staging first +3. **API Modifications**: Ensure backward compatibility +4. **Environment Variables**: Never commit secrets or keys +5. **Deployment Scripts**: Test deployment automation thoroughly + +## πŸ“ž Support + +- **[FAQ β†’](documentation/reference/faq.md)** - Common questions answered +- **[Troubleshooting β†’](documentation/guides/troubleshooting.md)** - Fix common issues +- **[GitHub Issues β†’](https://github.com/alirezarezvani/claude-code-tresor/issues)** - Report bugs and feature requests +- **[GitHub Discussions β†’](https://github.com/alirezarezvani/claude-code-tresor/discussions)** - Ask questions and share ideas +- **Professional Support**: Available for custom development and training + +## COMMUNICATION STANDARDS & BEHAVIOR + +### Core Requirements: +- **Absolute Honesty**: Direct assessments without diplomatic cushioning +- **Zero Fluff**: Eliminate vague statements and buzzwords +- **Pragmatic Focus**: Every suggestion must be immediately actionable +- **Critical Analysis**: Challenge assumptions and identify flaws before responding +- **Always Ask for Clarification**: Never assume or fill gaps with generic advice + +### Solution Standards: +- **Strict Adherence**: Follow user instructions exactly as specified +- **File Economy**: Edit existing files instead of creating new ones when possible +- **Code Limits**: Maximum 300 lines per file - split larger files into logical modules +- **Maintainability First**: Prioritize readable, maintainable code over technical complexity +- **Anti-Overengineering**: Choose simple, direct solutions over elaborate architectures + +### Response Protocol: +1. **Pre-Response Check**: Verify answer is specific and actionable +2. **Critical Review**: Identify and address solution weaknesses +3. **Implementation Reality**: Confirm feasibility within stated constraints + +### Documentation Requirements: +- **Bug Fix Records**: Document each bug and its solution methodology +- **Solution Rationale**: Explain why specific approach was chosen +- **Maintenance Notes**: Include future modification considerations + +### Prohibited Responses: +- Generic praise without technical analysis +- Vague suggestions without clear reasoning +- Advice without implementation details +- Assumptions when requirements are unclear +- Over-engineered solutions for simple problems + +### Standard Structure: +1. Direct assessment following user specifications +2. Critical analysis with potential issues +3. Step-by-step recommendations (edit vs. create approach) +4. Resource requirements and code organization +5. Documentation and maintenance considerations + +--- + +**Remember**: This repository provides utilities TO users, not a development project itself. Focus on helping users implement, customize, and extend these utilities for their own projects. Provide brutally honest, technically sound guidance that prevents costly mistakes while maintaining code simplicity and readability.No technical jargons that is complicated for the user. Always use the current date,even when you create files or examples. \ No newline at end of file diff --git a/README.md b/README.md index 3db314d..d782b4a 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,21 @@ --- -## πŸŽ‰ What's New in v2.6.0 +## πŸŽ‰ What's New in v2.6.5 + +**TΓ‚CHES Integration** - Advanced workflow management system! + +- πŸ”„ **Meta-Prompting** - `/create-prompt` and `/run-prompt` for complex task orchestration +- βœ… **Todo Management** - `/add-to-todos` and `/check-todos` for capturing ideas without losing focus +- πŸ“‹ **Context Handoff** - `/whats-next` for seamless work continuation in fresh contexts +- πŸ”— **Tresor Integration** - TΓ‚CHES commands detect and suggest Tresor's 141 agents +- πŸ’‘ **Workflow Enhancement** - Meta-prompting combined with Tresor's agent ecosystem + +**Credit**: TΓ‚CHES framework by [glittercowboy](https://github.com/glittercowboy/taches-cc-prompts) + +--- + +### Previous: v2.6.0 - Quality Excellence **Quality Excellence Release** - Achieved 9.7/10 exceptional quality rating! @@ -68,8 +82,9 @@ Claude Code Tresor is the ultimate collection of **professional-grade utilities* ## ✨ What's Included -### πŸš€ Slash Commands (4 Essential) -Transform your development workflow with these powerful commands: +### πŸš€ Slash Commands (9 Total) + +**Core Workflow Commands** (4): | Command | Purpose | Example Usage | |---------|---------|---------------| @@ -78,6 +93,16 @@ Transform your development workflow with these powerful commands: | **`/test-gen`** | Create comprehensive test suites automatically | `/test-gen --file utils.js --coverage 90` | | **`/docs-gen`** | Generate documentation from code and comments | `/docs-gen api --format openapi` | +**TΓ‚CHES Workflow Commands** (5) - **NEW in v2.6.5!** + +| Command | Purpose | Example Usage | +|---------|---------|---------------| +| **`/create-prompt`** | Generate optimized prompts for complex tasks | `/create-prompt Build user authentication system` | +| **`/run-prompt`** | Execute generated prompts in sub-agents | `/run-prompt 001 --parallel` | +| **`/add-to-todos`** | Capture ideas mid-conversation | `/add-to-todos Fix performance issue in API` | +| **`/check-todos`** | Review and work on captured todos | `/check-todos` | +| **`/whats-next`** | Generate context handoff document | `/whats-next` | + ### πŸ€– Core Agents (8 Production-Ready) Expert-level assistance for complex development tasks: diff --git a/VALIDATION_EXECUTIVE_SUMMARY.md b/VALIDATION_EXECUTIVE_SUMMARY.md deleted file mode 100644 index aa523ea..0000000 --- a/VALIDATION_EXECUTIVE_SUMMARY.md +++ /dev/null @@ -1,238 +0,0 @@ -# Agent Validation Executive Summary -**Claude Code Tresor - Quality Assessment** -**Date**: November 15, 2025 - ---- - -## Bottom Line Up Front (BLUF) - -**Overall Repository Quality: 6.8/10 (MODERATE)** - With targeted improvements, can reach 8.0/10 within 2 months. - -### Key Findings - -| Metric | Current | Target | Gap | -|--------|---------|--------|-----| -| **Overall Quality** | 6.8/10 | 8.0/10 | -1.2 | -| **Core Agents** | 7.6/10 | 8.5/10 | -0.9 | -| **Specialized Agents** | 6.7/10 | 8.0/10 | -1.3 | -| **Structure Consistency** | 59% | 80%+ | -21% | -| **YAML Completeness** | 100% | 100% | βœ… Met | - ---- - -## What We Validated - -- **Sample Size**: 22 agents (17% of 133 total) -- **Coverage**: All 10 categories represented -- **Methodology**: Dual-format quality scoring (core vs specialized) -- **Confidence**: MEDIUM (representative sample) - ---- - -## Critical Issues - -### 1. Structural Inconsistency (40% of agents) -**Impact**: Users struggle to use agents effectively - -**Problem**: 9 specialized agents missing standard sections (Focus Areas, Approach, Output) - -**Affected Categories**: Marketing, Product, Research, AI & Automation - -**Fix**: Apply standardized template to all specialized agents -**Effort**: 1-2 weeks -**Impact**: +0.5 points overall - ---- - -### 2. Design Category Underperforming (4.0/10) -**Impact**: Poor user experience for design agents - -**Problem**: -- 2 agents too verbose (700+ words vs 100-300 target) -- 1 core agent missing core sections - -**Fix**: Restructure 3 design agents -**Effort**: 1 week -**Impact**: +0.3 points overall - ---- - -### 3. Core Agents Missing Best Practices (18% of core) -**Impact**: Users miss common mistakes and anti-patterns - -**Problem**: Security-auditor, config-safety-reviewer lack pitfalls/patterns sections - -**Fix**: Add "Common Pitfalls" sections with 3-5 examples each -**Effort**: 3-5 days -**Impact**: +0.4 points overall - ---- - -## Strengths to Maintain - -βœ… **100% YAML Frontmatter Completeness** - Every agent has complete metadata - -βœ… **Core Agents Comprehensive** - Average 774 words, 12.4 code examples - -βœ… **Engineering Category Excellence** - 8.4/10 average, top-performing category - -βœ… **Clear Format Distinction** - Two well-defined agent types (core vs specialized) - ---- - -## Quality Distribution - -``` -Current State: - Excellent (9-10): 18% β– β– β– β– β– β– β– β– β–  - Good (7-8.9): 32% β– β– β– β– β– β– β– β– β– β– β– β– β– β– β– β–  - Moderate (5-7): 36% β– β– β– β– β– β– β– β– β– β– β– β– β– β– β– β– β– β–  - Poor (<5): 14% β– β– β– β– β– β– β–  - -Target State (2 months): - Excellent (9-10): 30% β– β– β– β– β– β– β– β– β– β– β– β– β– β– β–  - Good (7-8.9): 50% β– β– β– β– β– β– β– β– β– β– β– β– β– β– β– β– β– β– β– β– β– β– β– β– β–  - Moderate (5-7): 18% β– β– β– β– β– β– β– β– β–  - Poor (<5): 2% β–  -``` - ---- - -## Recommended Actions - -### Priority 1: Quick Wins (1-2 weeks) -1. **Create specialized agent template** β†’ +0.5 points -2. **Fix design category (3 agents)** β†’ +0.3 points -3. **Add best practices to core agents** β†’ +0.4 points - -**Total Impact**: +1.2 points (6.8 β†’ 8.0) - -### Priority 2: Standardization (1 month) -4. Apply template to all specialized agents -5. Audit and fix missing sections -6. Document quality standards in CONTRIBUTING.md - -### Priority 3: Continuous Improvement (Ongoing) -7. Quarterly validation reports -8. PR quality gates (minimum 7.0/10) -9. User feedback integration - ---- - -## Category Breakdown - -| Category | Score | Status | Action Needed | -|----------|-------|--------|---------------| -| Engineering | 8.4/10 | 🟒 Excellent | Maintain standard | -| Core | 8.0/10 | 🟒 Good | Add best practices | -| AI & Automation | 6.5/10 | 🟑 Moderate | Add standard sections | -| Marketing | 6.0/10 | 🟑 Moderate | Add standard sections | -| Product | 6.0/10 | 🟑 Moderate | Add standard sections | -| Research | 6.0/10 | 🟑 Moderate | Add standard sections | -| Design | 4.0/10 | πŸ”΄ Needs Work | Restructure 3 agents | - ---- - -## Success Metrics - -### Immediate (1 month) -- [ ] All specialized agents have Focus/Approach/Output sections -- [ ] Design category reaches 7.0/10 -- [ ] Core agents all have best practices sections -- [ ] Quality template documented in CONTRIBUTING.md - -### Short-term (2 months) -- [ ] Overall repository score: 8.0/10 -- [ ] No category below 7.0/10 -- [ ] 80%+ structural consistency -- [ ] PR quality gates implemented - -### Long-term (6 months) -- [ ] Overall repository score: 8.5/10 -- [ ] Quarterly validation reports published -- [ ] User satisfaction metrics tracked -- [ ] Continuous improvement process established - ---- - -## Risk Assessment - -| Risk | Probability | Impact | Mitigation | -|------|-------------|--------|------------| -| Inconsistent application of template | Medium | High | Clear documentation, PR checks | -| Regression in quality | Low | Medium | Automated validation in CI | -| Category imbalance | Low | Low | Quarterly reviews | - ---- - -## ROI Analysis - -### Investment Required -- **Time**: 2-3 weeks developer effort -- **Cost**: Minimal (internal resources) - -### Expected Returns -- **Quality Improvement**: 6.8 β†’ 8.0 (+1.2 points, +18%) -- **User Experience**: Better discoverability, clearer usage -- **Maintainability**: Standardized structure, easier updates -- **Scalability**: Template-driven agent creation - -### Business Impact -- βœ… Higher user satisfaction -- βœ… Reduced support burden -- βœ… Faster agent development -- βœ… Professional quality standard - ---- - -## Comparison to Industry - -| Aspect | Claude Code Tresor | Industry Standard | -|--------|-------------------|-------------------| -| Documentation Completeness | 100% | 80-90% | -| Core Content Depth | 774 words | 500+ words | -| Code Examples | 12.4 per core | 5+ typical | -| Structural Consistency | 59% | 80%+ | -| **Overall Quality** | **6.8/10** | **7.0+ target** | - -**Assessment**: Close to industry standard, achievable with focused effort. - ---- - -## Decision Points - -### Option 1: Maintain Status Quo -- **Effort**: None -- **Risk**: Quality stagnation, user confusion -- **Outcome**: Repository remains at 6.8/10 - -### Option 2: Quick Fixes Only (Recommended) -- **Effort**: 2-3 weeks -- **Risk**: Low -- **Outcome**: Repository reaches 8.0/10, industry-leading - -### Option 3: Comprehensive Overhaul -- **Effort**: 2-3 months -- **Risk**: Medium (scope creep) -- **Outcome**: Repository reaches 9.0/10, but diminishing returns - -**Recommendation**: **Option 2** - Best ROI, achievable timeline, significant impact. - ---- - -## Next Steps - -1. **Review this report** with stakeholders -2. **Approve recommended actions** (Priority 1 items) -3. **Assign owner** for template creation -4. **Schedule** design category fixes -5. **Implement** quality gates for future PRs - -**Target Start Date**: November 18, 2025 -**Target Completion**: December 15, 2025 (4 weeks) - ---- - -**Report Contact**: Agent Validation Team -**Full Report**: See VALIDATION_REPORT.md for detailed analysis -**Data**: See VALIDATION_REPORT.json for raw metrics diff --git a/VALIDATION_REPORT.json b/VALIDATION_REPORT.json deleted file mode 100644 index f2969b5..0000000 --- a/VALIDATION_REPORT.json +++ /dev/null @@ -1,649 +0,0 @@ -{ - "summary": { - "total_agents": 22, - "overall_stats": { - "count": 22, - "avg_score": 7.1, - "min_score": 4.0, - "max_score": 9.0, - "avg_word_count": 492, - "avg_code_blocks": 5.4, - "yaml_complete": 22 - }, - "core_agents": { - "count": 9, - "avg_score": 7.6, - "min_score": 4.0, - "max_score": 8.8, - "avg_word_count": 774, - "avg_code_blocks": 12.4, - "yaml_complete": 9 - }, - "specialized_agents": { - "count": 13, - "avg_score": 6.7, - "min_score": 4.0, - "max_score": 9.0, - "avg_word_count": 296, - "avg_code_blocks": 0.5, - "yaml_complete": 13 - }, - "category_stats": { - "core": { - "formats": { - "core": 8, - "specialized": 0 - }, - "count": 8, - "avg_score": 8.0 - }, - "engineering": { - "formats": { - "core": 0, - "specialized": 5 - }, - "count": 5, - "avg_score": 8.4 - }, - "design": { - "formats": { - "core": 1, - "specialized": 2 - }, - "count": 3, - "avg_score": 4.0 - }, - "marketing": { - "formats": { - "core": 0, - "specialized": 1 - }, - "count": 1, - "avg_score": 6.0 - }, - "product": { - "formats": { - "core": 0, - "specialized": 1 - }, - "count": 1, - "avg_score": 6.0 - }, - "research": { - "formats": { - "core": 0, - "specialized": 2 - }, - "count": 2, - "avg_score": 6.0 - }, - "ai-automation": { - "formats": { - "core": 0, - "specialized": 2 - }, - "count": 2, - "avg_score": 6.5 - } - }, - "top_performers": [ - { - "agent_name": "backend-architect", - "category": "engineering", - "subcategory": "backend", - "file_path": "engineering/backend/backend-architect/agent.md", - "format": "specialized", - "line_count": 44, - "quality_score": 9.0, - "quality_issues": [], - "sections_count": 3, - "code_blocks": 0, - "word_count": 122, - "yaml_complete": true, - "yaml_missing": [] - }, - { - "agent_name": "database-optimizer", - "category": "engineering", - "subcategory": "backend", - "file_path": "engineering/backend/database-optimizer/agent.md", - "format": "specialized", - "line_count": 46, - "quality_score": 9.0, - "quality_issues": [], - "sections_count": 3, - "code_blocks": 0, - "word_count": 115, - "yaml_complete": true, - "yaml_missing": [] - }, - { - "agent_name": "cloud-architect", - "category": "engineering", - "subcategory": "devops", - "file_path": "engineering/devops/cloud-architect/agent.md", - "format": "specialized", - "line_count": 45, - "quality_score": 9.0, - "quality_issues": [], - "sections_count": 3, - "code_blocks": 0, - "word_count": 121, - "yaml_complete": true, - "yaml_missing": [] - }, - { - "agent_name": "frontend-developer", - "category": "engineering", - "subcategory": "frontend", - "file_path": "engineering/frontend/frontend-developer/agent.md", - "format": "specialized", - "line_count": 44, - "quality_score": 9.0, - "quality_issues": [], - "sections_count": 3, - "code_blocks": 0, - "word_count": 128, - "yaml_complete": true, - "yaml_missing": [] - }, - { - "agent_name": "performance-tuner", - "category": "core", - "subcategory": "performance-tuner", - "file_path": "core/performance-tuner/agent.md", - "format": "core", - "line_count": 651, - "quality_score": 8.8, - "quality_issues": [ - "Missing sections: Methodology, Examples" - ], - "sections_count": 56, - "code_blocks": 15, - "word_count": 882, - "yaml_complete": true, - "yaml_missing": [] - } - ], - "needs_improvement": [ - { - "agent_name": "ui-designer", - "category": "design", - "subcategory": "ui", - "file_path": "design/ui/ui-designer/agent.md", - "format": "specialized", - "line_count": 164, - "quality_score": 4.0, - "quality_issues": [ - "Missing typical sections (Focus Areas, Approach, Output)", - "Too verbose (717 words, should be 100-300)" - ], - "sections_count": 0, - "code_blocks": 2, - "word_count": 717, - "yaml_complete": true, - "yaml_missing": [] - }, - { - "agent_name": "ux-researcher", - "category": "design", - "subcategory": "ux", - "file_path": "design/ux/ux-researcher/agent.md", - "format": "specialized", - "line_count": 197, - "quality_score": 4.0, - "quality_issues": [ - "Missing typical sections (Focus Areas, Approach, Output)", - "Too verbose (722 words, should be 100-300)" - ], - "sections_count": 0, - "code_blocks": 3, - "word_count": 722, - "yaml_complete": true, - "yaml_missing": [] - }, - { - "agent_name": "visual-storyteller", - "category": "design", - "subcategory": "visual", - "file_path": "design/visual/visual-storyteller/agent.md", - "format": "core", - "line_count": 258, - "quality_score": 4.0, - "quality_issues": [ - "Missing sections: Expertise, Methodology, Approach, Examples, Working with Skills", - "Add best practices/pitfalls/patterns section" - ], - "sections_count": 0, - "code_blocks": 5, - "word_count": 756, - "yaml_complete": true, - "yaml_missing": [] - }, - { - "agent_name": "incident-responder", - "category": "engineering", - "subcategory": "devops", - "file_path": "engineering/devops/incident-responder/agent.md", - "format": "specialized", - "line_count": 87, - "quality_score": 6.0, - "quality_issues": [ - "Missing typical sections (Focus Areas, Approach, Output)" - ], - "sections_count": 8, - "code_blocks": 0, - "word_count": 219, - "yaml_complete": true, - "yaml_missing": [] - }, - { - "agent_name": "growth-hacker", - "category": "marketing", - "subcategory": "growth", - "file_path": "marketing/growth/growth-hacker/agent.md", - "format": "specialized", - "line_count": 75, - "quality_score": 6.0, - "quality_issues": [ - "Missing typical sections (Focus Areas, Approach, Output)" - ], - "sections_count": 3, - "code_blocks": 0, - "word_count": 222, - "yaml_complete": true, - "yaml_missing": [] - } - ], - "all_issues": { - "Missing typical sections (Focus Areas, Approach, Output)": 9, - "Missing sections: Methodology, Examples": 4, - "Add best practices/pitfalls/patterns section": 2, - "Missing sections: Methodology, Approach": 1, - "Insufficient content (493 words, need 800+ for excellent)": 1, - "Missing sections: Approach, Examples": 1, - "Missing sections: Methodology": 1, - "Too verbose (717 words, should be 100-300)": 1, - "Too verbose (722 words, should be 100-300)": 1, - "Missing sections: Expertise, Methodology, Approach, Examples, Working with Skills": 1 - } - }, - "detailed_results": [ - { - "agent_name": "performance-tuner", - "category": "core", - "subcategory": "performance-tuner", - "file_path": "core/performance-tuner/agent.md", - "format": "core", - "line_count": 651, - "quality_score": 8.8, - "quality_issues": [ - "Missing sections: Methodology, Examples" - ], - "sections_count": 56, - "code_blocks": 15, - "word_count": 882, - "yaml_complete": true, - "yaml_missing": [] - }, - { - "agent_name": "config-safety-reviewer", - "category": "core", - "subcategory": "config-safety-reviewer", - "file_path": "core/config-safety-reviewer/agent.md", - "format": "core", - "line_count": 226, - "quality_score": 6.8, - "quality_issues": [ - "Missing sections: Methodology, Approach" - ], - "sections_count": 31, - "code_blocks": 6, - "word_count": 642, - "yaml_complete": true, - "yaml_missing": [] - }, - { - "agent_name": "root-cause-analyzer", - "category": "core", - "subcategory": "root-cause-analyzer", - "file_path": "core/root-cause-analyzer/agent.md", - "format": "core", - "line_count": 404, - "quality_score": 8.0, - "quality_issues": [ - "Insufficient content (493 words, need 800+ for excellent)" - ], - "sections_count": 47, - "code_blocks": 14, - "word_count": 493, - "yaml_complete": true, - "yaml_missing": [] - }, - { - "agent_name": "refactor-expert", - "category": "core", - "subcategory": "refactor-expert", - "file_path": "core/refactor-expert/agent.md", - "format": "core", - "line_count": 976, - "quality_score": 8.8, - "quality_issues": [ - "Missing sections: Approach, Examples" - ], - "sections_count": 40, - "code_blocks": 14, - "word_count": 1135, - "yaml_complete": true, - "yaml_missing": [] - }, - { - "agent_name": "docs-writer", - "category": "core", - "subcategory": "docs-writer", - "file_path": "core/docs-writer/agent.md", - "format": "core", - "line_count": 483, - "quality_score": 8.4, - "quality_issues": [ - "Missing sections: Methodology" - ], - "sections_count": 74, - "code_blocks": 19, - "word_count": 799, - "yaml_complete": true, - "yaml_missing": [] - }, - { - "agent_name": "systems-architect", - "category": "core", - "subcategory": "systems-architect", - "file_path": "core/systems-architect/agent.md", - "format": "core", - "line_count": 434, - "quality_score": 8.8, - "quality_issues": [ - "Missing sections: Methodology, Examples" - ], - "sections_count": 52, - "code_blocks": 13, - "word_count": 1051, - "yaml_complete": true, - "yaml_missing": [] - }, - { - "agent_name": "security-auditor", - "category": "core", - "subcategory": "security-auditor", - "file_path": "core/security-auditor/agent.md", - "format": "core", - "line_count": 720, - "quality_score": 6.8, - "quality_issues": [ - "Missing sections: Methodology, Examples", - "Add best practices/pitfalls/patterns section" - ], - "sections_count": 33, - "code_blocks": 13, - "word_count": 520, - "yaml_complete": true, - "yaml_missing": [] - }, - { - "agent_name": "test-engineer", - "category": "core", - "subcategory": "test-engineer", - "file_path": "core/test-engineer/agent.md", - "format": "core", - "line_count": 396, - "quality_score": 7.8, - "quality_issues": [ - "Missing sections: Methodology, Examples" - ], - "sections_count": 34, - "code_blocks": 13, - "word_count": 691, - "yaml_complete": true, - "yaml_missing": [] - }, - { - "agent_name": "backend-architect", - "category": "engineering", - "subcategory": "backend", - "file_path": "engineering/backend/backend-architect/agent.md", - "format": "specialized", - "line_count": 44, - "quality_score": 9.0, - "quality_issues": [], - "sections_count": 3, - "code_blocks": 0, - "word_count": 122, - "yaml_complete": true, - "yaml_missing": [] - }, - { - "agent_name": "database-optimizer", - "category": "engineering", - "subcategory": "backend", - "file_path": "engineering/backend/database-optimizer/agent.md", - "format": "specialized", - "line_count": 46, - "quality_score": 9.0, - "quality_issues": [], - "sections_count": 3, - "code_blocks": 0, - "word_count": 115, - "yaml_complete": true, - "yaml_missing": [] - }, - { - "agent_name": "cloud-architect", - "category": "engineering", - "subcategory": "devops", - "file_path": "engineering/devops/cloud-architect/agent.md", - "format": "specialized", - "line_count": 45, - "quality_score": 9.0, - "quality_issues": [], - "sections_count": 3, - "code_blocks": 0, - "word_count": 121, - "yaml_complete": true, - "yaml_missing": [] - }, - { - "agent_name": "incident-responder", - "category": "engineering", - "subcategory": "devops", - "file_path": "engineering/devops/incident-responder/agent.md", - "format": "specialized", - "line_count": 87, - "quality_score": 6.0, - "quality_issues": [ - "Missing typical sections (Focus Areas, Approach, Output)" - ], - "sections_count": 8, - "code_blocks": 0, - "word_count": 219, - "yaml_complete": true, - "yaml_missing": [] - }, - { - "agent_name": "frontend-developer", - "category": "engineering", - "subcategory": "frontend", - "file_path": "engineering/frontend/frontend-developer/agent.md", - "format": "specialized", - "line_count": 44, - "quality_score": 9.0, - "quality_issues": [], - "sections_count": 3, - "code_blocks": 0, - "word_count": 128, - "yaml_complete": true, - "yaml_missing": [] - }, - { - "agent_name": "ui-designer", - "category": "design", - "subcategory": "ui", - "file_path": "design/ui/ui-designer/agent.md", - "format": "specialized", - "line_count": 164, - "quality_score": 4.0, - "quality_issues": [ - "Missing typical sections (Focus Areas, Approach, Output)", - "Too verbose (717 words, should be 100-300)" - ], - "sections_count": 0, - "code_blocks": 2, - "word_count": 717, - "yaml_complete": true, - "yaml_missing": [] - }, - { - "agent_name": "ux-researcher", - "category": "design", - "subcategory": "ux", - "file_path": "design/ux/ux-researcher/agent.md", - "format": "specialized", - "line_count": 197, - "quality_score": 4.0, - "quality_issues": [ - "Missing typical sections (Focus Areas, Approach, Output)", - "Too verbose (722 words, should be 100-300)" - ], - "sections_count": 0, - "code_blocks": 3, - "word_count": 722, - "yaml_complete": true, - "yaml_missing": [] - }, - { - "agent_name": "visual-storyteller", - "category": "design", - "subcategory": "visual", - "file_path": "design/visual/visual-storyteller/agent.md", - "format": "core", - "line_count": 258, - "quality_score": 4.0, - "quality_issues": [ - "Missing sections: Expertise, Methodology, Approach, Examples, Working with Skills", - "Add best practices/pitfalls/patterns section" - ], - "sections_count": 0, - "code_blocks": 5, - "word_count": 756, - "yaml_complete": true, - "yaml_missing": [] - }, - { - "agent_name": "growth-hacker", - "category": "marketing", - "subcategory": "growth", - "file_path": "marketing/growth/growth-hacker/agent.md", - "format": "specialized", - "line_count": 75, - "quality_score": 6.0, - "quality_issues": [ - "Missing typical sections (Focus Areas, Approach, Output)" - ], - "sections_count": 3, - "code_blocks": 0, - "word_count": 222, - "yaml_complete": true, - "yaml_missing": [] - }, - { - "agent_name": "product-manager", - "category": "product", - "subcategory": "management", - "file_path": "product/management/product-manager/agent.md", - "format": "specialized", - "line_count": 61, - "quality_score": 6.0, - "quality_issues": [ - "Missing typical sections (Focus Areas, Approach, Output)" - ], - "sections_count": 3, - "code_blocks": 0, - "word_count": 194, - "yaml_complete": true, - "yaml_missing": [] - }, - { - "agent_name": "market-research-analyst", - "category": "research", - "subcategory": "market", - "file_path": "research/market/market-research-analyst/agent.md", - "format": "specialized", - "line_count": 53, - "quality_score": 6.0, - "quality_issues": [ - "Missing typical sections (Focus Areas, Approach, Output)" - ], - "sections_count": 0, - "code_blocks": 0, - "word_count": 299, - "yaml_complete": true, - "yaml_missing": [] - }, - { - "agent_name": "deep-research-specialist", - "category": "research", - "subcategory": "data", - "file_path": "research/data/deep-research-specialist/agent.md", - "format": "specialized", - "line_count": 93, - "quality_score": 6.0, - "quality_issues": [ - "Missing typical sections (Focus Areas, Approach, Output)" - ], - "sections_count": 6, - "code_blocks": 0, - "word_count": 394, - "yaml_complete": true, - "yaml_missing": [] - }, - { - "agent_name": "ml-engineer", - "category": "ai-automation", - "subcategory": "ml-engineering", - "file_path": "ai-automation/ml-engineering/ml-engineer/agent.md", - "format": "specialized", - "line_count": 62, - "quality_score": 6.0, - "quality_issues": [ - "Missing typical sections (Focus Areas, Approach, Output)" - ], - "sections_count": 0, - "code_blocks": 0, - "word_count": 293, - "yaml_complete": true, - "yaml_missing": [] - }, - { - "agent_name": "prompt-engineer", - "category": "ai-automation", - "subcategory": "prompts", - "file_path": "ai-automation/prompts/prompt-engineer/agent.md", - "format": "specialized", - "line_count": 107, - "quality_score": 7.0, - "quality_issues": [ - "Missing typical sections (Focus Areas, Approach, Output)" - ], - "sections_count": 11, - "code_blocks": 1, - "word_count": 299, - "yaml_complete": true, - "yaml_missing": [] - } - ], - "repository_estimate": { - "sample_score": 7.1, - "estimated_full_score": 6.8, - "quality_label": "MODERATE", - "total_repo_agents": 133 - } -} \ No newline at end of file diff --git a/VALIDATION_REPORT.md b/VALIDATION_REPORT.md deleted file mode 100644 index 84af7c9..0000000 --- a/VALIDATION_REPORT.md +++ /dev/null @@ -1,384 +0,0 @@ -# Agent Content Validation Report -**Claude Code Tresor - Comprehensive Quality Assessment** - -**Date**: November 15, 2025 -**Scope**: 22 representative agents (17% of total 133 agents) -**Validation Method**: Dual-format content structure and quality analysis - ---- - -## Executive Summary - -### Overall Quality Assessment: **MODERATE (6.8/10 estimated)** - -The repository contains **two distinct agent formats**: -- **Core Agents (8)**: Comprehensive 300-1000 line agents with extensive documentation β†’ **7.6/10 (GOOD)** -- **Specialized Agents (125)**: Concise 40-100 line focused agents β†’ **6.7/10 (NEEDS IMPROVEMENT)** - -### Key Findings - -| Metric | Value | -|--------|-------| -| **Sample Size** | 22 agents analyzed | -| **Overall Score** | 7.1/10 | -| **Estimated Repo Score** | 6.8/10 | -| **YAML Completeness** | 100% (all agents) | -| **Average Word Count** | Core: 774 words, Specialized: 296 words | -| **Code Examples** | Core: 12.4 blocks, Specialized: 0.5 blocks | - ---- - -## Format Analysis - -### πŸ“š Core Agents (Comprehensive Format) -**Count**: 8 agents (6% of repository) -**Quality Score**: 7.6/10 - -**Characteristics**: -- 300-1000 lines per agent -- Extensive code examples (avg 12.4 blocks) -- Comprehensive methodology sections -- Real-world use cases and workflows -- Integration with skills documented - -**Top Performers**: -1. performance-tuner: 8.8/10 -2. refactor-expert: 8.8/10 -3. systems-architect: 8.8/10 -4. docs-writer: 8.4/10 -5. root-cause-analyzer: 8.0/10 - -**Issues**: -- Some missing "Working with Skills" sections -- Need more best practices/pitfalls content -- A few agents lack sufficient code examples - ---- - -### ⚑ Specialized Agents (Concise Format) -**Count**: 125 agents (94% of repository) -**Quality Score**: 6.7/10 - -**Characteristics**: -- 40-100 lines per agent -- Focused capability statements -- Concise approach descriptions -- Minimal code examples -- Quick reference format - -**Top Performers**: -1. backend-architect: 9.0/10 -2. database-optimizer: 9.0/10 -3. cloud-architect: 9.0/10 -4. frontend-developer: 9.0/10 - -**Common Issues** (affecting 9 agents, 40.9%): -- Missing standard sections: "Focus Areas", "Approach", "Output" -- Some agents too verbose (200+ words when should be 100-300) -- Inconsistent structure across categories - ---- - -## Category Analysis - -### By Quality Score - -| Category | Agents | Avg Score | Assessment | Issues | -|----------|--------|-----------|------------|--------| -| **Engineering** | 5 | 8.4/10 | 🟒 EXCELLENT | Well-structured, clear approaches | -| **Core** | 8 | 8.0/10 | 🟒 GOOD | Comprehensive, minor gaps in some | -| **AI & Automation** | 2 | 6.5/10 | 🟑 MODERATE | Missing standard sections | -| **Marketing** | 1 | 6.0/10 | 🟑 MODERATE | Needs Focus/Approach/Output | -| **Product** | 1 | 6.0/10 | 🟑 MODERATE | Needs standard structure | -| **Research** | 2 | 6.0/10 | 🟑 MODERATE | Missing typical sections | -| **Design** | 3 | 4.0/10 | πŸ”΄ NEEDS WORK | Inconsistent format, too verbose | - ---- - -## Quality Distribution - -``` -Excellent (9-10): 4 agents (18.2%) β– β– β– β– β– β– β– β– β–  -Good (7-8.9): 7 agents (31.8%) β– β– β– β– β– β– β– β– β– β– β– β– β– β– β–  -Moderate (5-6.9): 8 agents (36.4%) β– β– β– β– β– β– β– β– β– β– β– β– β– β– β– β– β– β–  -Poor (<5): 3 agents (13.6%) β– β– β– β– β– β–  -``` - ---- - -## Common Issues & Impact - -### Issue Frequency - -1. **Missing standard sections** (9 agents, 40.9%) - - Specialized agents lack "Focus Areas", "Approach", "Output" structure - - Affects: marketing, product, research, ai-automation categories - - **Impact**: Users don't know how to use these agents effectively - -2. **Missing Methodology/Examples** (4 agents, 18.2%) - - Core agents missing comprehensive examples - - Affects: config-safety-reviewer, visual-storyteller - - **Impact**: Reduced learning value for users - -3. **Inconsistent verbosity** (3 agents, 13.6%) - - Some specialized agents have 200+ words (should be 100-300) - - Affects: design category primarily - - **Impact**: Format inconsistency, harder to scan - -4. **Missing best practices sections** (2 agents, 9.1%) - - Core agents lack pitfalls/patterns content - - **Impact**: Users miss common mistakes and anti-patterns - ---- - -## Validation Metrics Detail - -### Content Structure Scoring (Core Agents) - -| Criterion | Weight | Description | -|-----------|--------|-------------| -| **Required Sections** | 30% | Expertise, Methodology, Approach, Examples, Working with Skills | -| **Content Depth** | 20% | 800+ words for excellent, 500+ for good | -| **Code Examples** | 30% | 10+ blocks for excellent, 5+ for good | -| **YAML Frontmatter** | 10% | Complete with all required fields | -| **Best Practices** | 10% | Pitfalls, patterns, common issues | - -### Content Structure Scoring (Specialized Agents) - -| Criterion | Weight | Description | -|-----------|--------|-------------| -| **YAML Completeness** | 40% | All required fields present | -| **Standard Sections** | 30% | Focus Areas, Approach, Output | -| **Content Length** | 20% | 100-300 words (sweet spot) | -| **Capabilities** | 10% | Clear, actionable capability list | - ---- - -## Recommendations - -### Priority 1: Critical Issues (Affect 40%+ of sample) - -1. **Standardize Specialized Agent Format** - - **Affected**: 9 agents (40.9%) - - **Action**: Add template with required sections: - ```markdown - ## Focus Areas - - [List core capabilities] - - ## Approach - - [Numbered methodology] - - ## Output - - [What user receives] - ``` - - **Expected Impact**: Raise specialized agent average from 6.7 to 8.0+ - -### Priority 2: High Impact Improvements - -2. **Fix Design Category** (avg 4.0/10) - - **Issue**: Inconsistent format, too verbose - - **Action**: - - Reduce ui-designer and ux-researcher from 700+ to 100-300 words - - Add standard sections to visual-storyteller - - **Expected Impact**: Raise category from 4.0 to 7.0+ - -3. **Enhance Core Agent Examples** - - **Affected**: config-safety-reviewer, security-auditor (6.8/10 each) - - **Action**: Add 3-5 more code examples per agent - - **Expected Impact**: Raise scores to 8.0+ - -### Priority 3: Quality Enhancements - -4. **Add Best Practices Sections** - - **Affected**: 2 core agents - - **Action**: Add "Common Pitfalls" or "Patterns" sections - - **Expected Impact**: Better user guidance, fewer mistakes - -5. **Expand Working with Skills Documentation** - - **Action**: Ensure all core agents document skill integration - - **Expected Impact**: Clearer skill vs agent boundaries - ---- - -## Extrapolated Repository Quality - -### Estimation Methodology - -Based on the 22-agent sample (17% of repository): -- **Core agents** (8 total): 7.6/10 avg -- **Specialized agents** (125 total): 6.7/10 avg (estimated) - -**Weighted calculation**: -``` -(7.6 Γ— 8 + 6.7 Γ— 125) / 133 = 6.8/10 -``` - -### Estimated Repository Breakdown - -| Quality Level | Estimated Count | Percentage | -|---------------|-----------------|------------| -| Excellent (9-10) | 24 agents | 18% | -| Good (7-8.9) | 42 agents | 32% | -| Moderate (5-6.9) | 48 agents | 36% | -| Poor (<5) | 19 agents | 14% | - -### Confidence Level: **MEDIUM** -- Sample size: 22/133 (17%) -- Representative across all categories βœ“ -- Two distinct formats identified βœ“ -- Validation methodology validated βœ“ - ---- - -## Examples of Excellence - -### Excellent Core Agent: `refactor-expert` (8.8/10) - -**Strengths**: -- 976 lines of comprehensive content -- 15+ code examples covering SOLID principles -- Clear methodology with 8-step workflow -- Extensive design pattern examples -- Integration with code-reviewer and test-generator skills documented -- Before/after refactoring comparisons - -**Structure**: -```markdown -## Your Refactoring Philosophy (4 core principles) -## Your Refactoring Expertise (5 areas) -## Working with Skills (detailed coordination) -## Systematic Refactoring Methodology (6-step process) -## SOLID Principles Implementation (5 principles with code) -## Code Smell Detection & Remediation (comprehensive taxonomy) -## Design Pattern Applications (Strategy, Observer patterns) -## Core Refactoring Techniques Reference (8 techniques) -## Technical Debt Management (5 categories) -``` - -### Excellent Specialized Agent: `cloud-architect` (9.0/10) - -**Strengths**: -- Concise 45 lines -- All required sections present -- Clear focus areas (IaC, multi-cloud, cost optimization) -- 5-point approach methodology -- Specific output deliverables listed -- Perfect YAML frontmatter - -**Structure**: -```markdown -## Focus Areas (6 clear capabilities) -## Approach (5 numbered principles) -## Output (6 specific deliverables) -``` - ---- - -## Validation Methodology - -### Tools Used -- Custom Python validator (`agent_validator_v2.py`) -- Dual-format scoring system -- YAML frontmatter parser -- Markdown structure analyzer - -### Metrics Collected -- Line count and word count -- Section headers (H1-H3) -- Code block count and language tags -- YAML frontmatter completeness -- Header hierarchy validation - -### Scoring Algorithm -- Format-specific quality scoring (0-10 scale) -- Weighted criteria based on agent type -- Issue detection and categorization -- Category-level aggregation - ---- - -## Comparison to Industry Standards - -| Aspect | Claude Code Tresor | Industry Standard | Assessment | -|--------|-------------------|-------------------|------------| -| **YAML Frontmatter** | 100% complete | 80-90% typical | βœ… EXCELLENT | -| **Core Agent Depth** | 774 words avg | 500+ words | βœ… EXCEEDS | -| **Code Examples (Core)** | 12.4 blocks | 5+ blocks | βœ… EXCELLENT | -| **Specialized Conciseness** | 296 words | 100-300 words | βœ… GOOD | -| **Structure Consistency** | 59% (needs work) | 80%+ expected | ⚠️ NEEDS IMPROVEMENT | -| **Overall Quality** | 6.8/10 | 7.0/10 target | 🟑 CLOSE TO TARGET | - ---- - -## Action Plan - -### Immediate Actions (1-2 weeks) - -1. **Create Specialized Agent Template** - - Define required sections: Focus Areas, Approach, Output - - Document word count target: 100-300 words - - Add to CONTRIBUTING.md - -2. **Fix Design Category (3 agents)** - - Reduce ui-designer and ux-researcher to concise format - - Restructure visual-storyteller as core agent with full examples - -3. **Audit All Specialized Agents for Standard Sections** - - Use automated script to identify missing sections - - Create GitHub issues for each category - -### Short-term Actions (1 month) - -4. **Enhance Core Agents** - - Add best practices sections to all core agents - - Increase code examples in config-safety-reviewer and security-auditor - - Document skill integration in all core agents - -5. **Category-specific Improvements** - - Marketing, Product, Research categories: add standard sections - - Engineering category: maintain current excellent standard - - AI & Automation: ensure methodology clarity - -### Long-term Actions (2-3 months) - -6. **Establish Quality Gates** - - Run validation on all pull requests - - Require minimum 7.0/10 score for new agents - - Document quality standards in CONTRIBUTING.md - -7. **Continuous Improvement** - - Quarterly validation reports - - User feedback integration - - Example expansion based on common use cases - ---- - -## Conclusion - -The Claude Code Tresor agent repository demonstrates **moderate to good quality** overall (6.8/10 estimated), with excellent core agents (7.6/10) and specialized agents needing structural improvements (6.7/10). - -### Strengths -- βœ… **100% YAML frontmatter completeness** - excellent metadata -- βœ… **Core agents are comprehensive** - extensive examples and methodology -- βœ… **Engineering category is excellent** - 8.4/10 average -- βœ… **Repository size and variety** - 133 agents across 10 categories - -### Areas for Improvement -- ⚠️ **Structural consistency** - 40% of specialized agents missing standard sections -- ⚠️ **Design category** - significantly below average (4.0/10) -- ⚠️ **Best practices documentation** - some core agents lack pitfalls/patterns - -### Path Forward -With focused improvements on **specialized agent structure** and **design category content**, the repository can easily reach **7.5-8.0/10 overall quality** within 1-2 months. The foundation is solid; standardization and consistency are the primary needs. - -**Recommended Target**: 8.0/10 within 2 months -- Fix design category: +0.3 -- Standardize specialized agents: +0.5 -- Enhance core agents: +0.4 - ---- - -**Validation Report Generated**: November 15, 2025 -**Validator Version**: 2.0 -**Report Format**: Comprehensive Quality Assessment -**Next Review**: February 15, 2026 (quarterly) diff --git a/VALIDATION_SUMMARY_VISUAL.txt b/VALIDATION_SUMMARY_VISUAL.txt deleted file mode 100644 index 01225aa..0000000 --- a/VALIDATION_SUMMARY_VISUAL.txt +++ /dev/null @@ -1,157 +0,0 @@ -╔══════════════════════════════════════════════════════════════════════════════╗ -β•‘ AGENT VALIDATION REPORT - VISUAL SUMMARY β•‘ -β•‘ Claude Code Tresor (133 agents) β•‘ -β•‘ November 15, 2025 β•‘ -β•šβ•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β• - -β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” -β”‚ OVERALL QUALITY SCORE β”‚ -β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ -β”‚ β”‚ -β”‚ Sample (22 agents): 7.1/10 β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘ β”‚ -β”‚ Estimated Repo: 6.8/10 β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘ β”‚ -β”‚ Target: 8.0/10 β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‘β–‘β–‘β–‘β–‘β–‘ β”‚ -β”‚ β”‚ -β”‚ Status: MODERATE β†’ Need +1.2 points to reach GOOD β”‚ -β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ - -β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” -β”‚ FORMAT BREAKDOWN β”‚ -β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ -β”‚ β”‚ -β”‚ πŸ“š CORE AGENTS (8 total) 7.6/10 β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ β”‚ -β”‚ β€’ Comprehensive (300-1000 lines) β”‚ -β”‚ β€’ Avg 774 words, 12.4 code blocks β”‚ -β”‚ β€’ Status: GOOD - minor improvements needed β”‚ -β”‚ β”‚ -β”‚ ⚑ SPECIALIZED AGENTS (125 total) 6.7/10 β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‘β–‘β–‘β–‘ β”‚ -β”‚ β€’ Concise (40-100 lines) β”‚ -β”‚ β€’ Avg 296 words, 0.5 code blocks β”‚ -β”‚ β€’ Status: NEEDS IMPROVEMENT - structure standardization β”‚ -β”‚ β”‚ -β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ - -β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” -β”‚ CATEGORY PERFORMANCE β”‚ -β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ -β”‚ β”‚ -β”‚ 🟒 Engineering 8.4/10 β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ β”‚ -β”‚ 🟒 Core 8.0/10 β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ β”‚ -β”‚ 🟑 AI & Automation 6.5/10 β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‘β–‘β–‘ β”‚ -β”‚ 🟑 Marketing 6.0/10 β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‘β–‘β–‘β–‘ β”‚ -β”‚ 🟑 Product 6.0/10 β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‘β–‘β–‘β–‘ β”‚ -β”‚ 🟑 Research 6.0/10 β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‘β–‘β–‘β–‘ β”‚ -β”‚ πŸ”΄ Design 4.0/10 β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘ β”‚ -β”‚ β”‚ -β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ - -β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” -β”‚ QUALITY DISTRIBUTION β”‚ -β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ -β”‚ β”‚ -β”‚ Excellent (9-10) 18% β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ β”‚ -β”‚ Good (7-8.9) 32% β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ β”‚ -β”‚ Moderate (5-6.9) 36% β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ β”‚ -β”‚ Poor (<5) 14% β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ β”‚ -β”‚ β”‚ -β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ - -β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” -β”‚ TOP ISSUES (Priority) β”‚ -β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ -β”‚ β”‚ -β”‚ 1. Missing standard sections (41% of sample) β”‚ -β”‚ β†’ Specialized agents lack Focus/Approach/Output β”‚ -β”‚ β†’ Impact: Users don't know how to use agents β”‚ -β”‚ β†’ Fix: Apply template to all specialized agents β”‚ -β”‚ β”‚ -β”‚ 2. Design category underperforming (4.0/10) β”‚ -β”‚ β†’ 2 agents too verbose, 1 missing core sections β”‚ -β”‚ β†’ Impact: Poor UX for design-related tasks β”‚ -β”‚ β†’ Fix: Restructure 3 agents β”‚ -β”‚ β”‚ -β”‚ 3. Missing best practices (18% of core agents) β”‚ -β”‚ β†’ No pitfalls/patterns documentation β”‚ -β”‚ β†’ Impact: Users miss common mistakes β”‚ -β”‚ β†’ Fix: Add "Common Pitfalls" sections β”‚ -β”‚ β”‚ -β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ - -β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” -β”‚ STRENGTHS (Keep Doing) β”‚ -β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ -β”‚ β”‚ -β”‚ βœ… 100% YAML frontmatter completeness - Perfect metadata β”‚ -β”‚ βœ… Core agents comprehensive - Extensive examples & methodology β”‚ -β”‚ βœ… Engineering category excellent - 8.4/10 average score β”‚ -β”‚ βœ… Clear format distinction - Two well-defined agent types β”‚ -β”‚ β”‚ -β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ - -β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” -β”‚ IMPROVEMENT ROADMAP β”‚ -β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ -β”‚ β”‚ -β”‚ PRIORITY 1: Quick Wins (1-2 weeks) β†’ +1.2 points β”‚ -β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ -β”‚ β”‚ β€’ Create specialized agent template [Impact: +0.5] β”‚ β”‚ -β”‚ β”‚ β€’ Fix design category (3 agents) [Impact: +0.3] β”‚ β”‚ -β”‚ β”‚ β€’ Add best practices to core agents [Impact: +0.4] β”‚ β”‚ -β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ -β”‚ β”‚ -β”‚ PRIORITY 2: Standardization (1 month) β”‚ -β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ -β”‚ β”‚ β€’ Apply template to all specialized agents β”‚ β”‚ -β”‚ β”‚ β€’ Audit and fix missing sections β”‚ β”‚ -β”‚ β”‚ β€’ Document quality standards in CONTRIBUTING.md β”‚ β”‚ -β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ -β”‚ β”‚ -β”‚ PRIORITY 3: Continuous Improvement (Ongoing) β”‚ -β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ -β”‚ β”‚ β€’ Quarterly validation reports β”‚ β”‚ -β”‚ β”‚ β€’ PR quality gates (min 7.0/10) β”‚ β”‚ -β”‚ β”‚ β€’ User feedback integration β”‚ β”‚ -β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ -β”‚ β”‚ -β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ - -β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” -β”‚ PROGRESS PROJECTION β”‚ -β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ -β”‚ β”‚ -β”‚ Now (Nov 2025) 6.8/10 β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‘β–‘β–‘β–‘β–‘β–‘β–‘β–‘ β”‚ -β”‚ After P1 (Dec 2025) 8.0/10 β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ β”‚ -β”‚ Target (Feb 2026) 8.5/10 β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ β”‚ -β”‚ Excellent 10.0/10 β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ β”‚ -β”‚ β”‚ -β”‚ Timeline: 8 weeks to reach industry-leading quality β”‚ -β”‚ β”‚ -β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ - -β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” -β”‚ VALIDATION METRICS β”‚ -β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ -β”‚ β”‚ -β”‚ Sample Size: 22 agents (17% of 133 total) β”‚ -β”‚ Categories Covered: 10/10 (100%) β”‚ -β”‚ Confidence Level: MEDIUM (representative sample) β”‚ -β”‚ YAML Completeness: 22/22 (100%) βœ“ β”‚ -β”‚ Methodology: Dual-format scoring (core vs specialized) β”‚ -β”‚ β”‚ -β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ - -β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” -β”‚ FILES GENERATED β”‚ -β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ -β”‚ β”‚ -β”‚ πŸ“„ VALIDATION_REPORT.md - Full detailed analysis (12KB) β”‚ -β”‚ πŸ“„ VALIDATION_EXECUTIVE_SUMMARY.md - Executive summary for stakeholders β”‚ -β”‚ πŸ“„ VALIDATION_REPORT.json - Raw data and metrics (18KB) β”‚ -β”‚ πŸ“„ agent_validator_v2.py - Reusable validation script β”‚ -β”‚ β”‚ -β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ - -╔══════════════════════════════════════════════════════════════════════════════╗ -β•‘ RECOMMENDATION: Implement Priority 1 actions for +1.2 point improvement β•‘ -β•‘ Timeline: 2-3 weeks | Effort: Low | Impact: HIGH | ROI: Excellent β•‘ -β•šβ•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β• diff --git a/__pycache__/agent_validator_v2.cpython-313.pyc b/__pycache__/agent_validator_v2.cpython-313.pyc deleted file mode 100644 index 7304a59..0000000 Binary files a/__pycache__/agent_validator_v2.cpython-313.pyc and /dev/null differ diff --git a/agent_validation_results.json b/agent_validation_results.json deleted file mode 100644 index 7c4f16b..0000000 --- a/agent_validation_results.json +++ /dev/null @@ -1,1286 +0,0 @@ -{ - "summary": { - "total_agents": 16, - "average_score": 3.5, - "min_score": 0.0, - "max_score": 5.7, - "score_distribution": { - "excellent (9-10)": 0, - "good (7-8.9)": 0, - "moderate (5-6.9)": 4, - "poor (<5)": 12 - }, - "average_word_count": 598, - "average_example_count": 14.2, - "average_section_count": 23.9, - "category_stats": { - "performance-tuner": { - "count": 1, - "avg_score": 4.7 - }, - "config-safety-reviewer": { - "count": 1, - "avg_score": 5.7 - }, - "root-cause-analyzer": { - "count": 1, - "avg_score": 5.3 - }, - "refactor-expert": { - "count": 1, - "avg_score": 4.7 - }, - "docs-writer": { - "count": 1, - "avg_score": 5.3 - }, - "systems-architect": { - "count": 1, - "avg_score": 5.0 - }, - "security-auditor": { - "count": 1, - "avg_score": 4.7 - }, - "test-engineer": { - "count": 1, - "avg_score": 4.7 - }, - "frontend": { - "count": 1, - "avg_score": 1.3 - }, - "devops": { - "count": 1, - "avg_score": 0.3 - }, - "ui": { - "count": 1, - "avg_score": 3.0 - }, - "ux": { - "count": 1, - "avg_score": 4.0 - }, - "visual": { - "count": 1, - "avg_score": 4.0 - }, - "growth": { - "count": 1, - "avg_score": 1.0 - }, - "market": { - "count": 1, - "avg_score": 0.0 - }, - "data": { - "count": 1, - "avg_score": 1.7 - } - }, - "common_issues": [ - [ - "Header hierarchy skip detected (H1 to H3)", - 8 - ], - [ - "Found 14 code blocks without language tags", - 3 - ], - [ - "Found 15 code blocks without language tags", - 2 - ], - [ - "Found 16 code blocks without language tags", - 1 - ], - [ - "Found 7 code blocks without language tags", - 1 - ] - ], - "top_performers": [ - { - "agent_name": "config-safety-reviewer", - "category": "config-safety-reviewer", - "file_path": "core/config-safety-reviewer/agent.md", - "quality_score": 5.7, - "sections_found": [ - "Your Role", - "Working with Skills", - "Available Skills", - "When to Invoke Skills", - "How to Invoke Skills", - "At the START of your review:", - "Then proceed with YOUR deep expert analysis", - "Workflow Pattern", - "Example Coordination", - "You start your review:", - "Security Analysis", - "Review Process", - "Review Criteria", - "Code Quality (High Priority)", - "Security (Critical Priority)", - "Performance (High Priority)", - "Testing & Reliability", - "Technology Expertise", - "Frontend Technologies", - "Backend Technologies", - "Infrastructure & DevOps", - "Output Format", - "Executive Summary", - "Critical Issues", - "Code Quality Observations", - "Best Practices Recommendations", - "Action Plan", - "Review Examples", - "Security Review", - "Performance Review", - "Code Quality Review" - ], - "section_count": 31, - "code_blocks": 6, - "example_count": 13, - "word_count": 642, - "formatting_issues": [ - "Found 7 code blocks without language tags", - "Header hierarchy skip detected (H1 to H3)" - ], - "recommendations": [ - "Moderate quality - consider adding more examples and documentation", - "Fix formatting issues: Found 7 code blocks without language tags; Header hierarchy skip detected (H1 to H3)", - "Add missing sections: Identity, Methodology" - ] - }, - { - "agent_name": "root-cause-analyzer", - "category": "root-cause-analyzer", - "file_path": "core/root-cause-analyzer/agent.md", - "quality_score": 5.3, - "sections_found": [ - "Your Debugging Expertise", - "Working with Skills", - "Debugging Methodology", - "Debugging Process Framework", - "Scientific Method Approach", - "Issue Type Analysis", - "Performance Issues", - "System-level investigation", - "Application-level investigation", - "Memory profiling", - "or for Node.js", - "CPU profiling", - "Database query analysis", - "Memory Leaks", - "Concurrency Issues", - "Deadlock detection", - "Thread dump analysis (Java)", - "Race condition debugging", - "Critical section analysis", - "Network and Integration Issues", - "Network debugging", - "DNS resolution issues", - "SSL/TLS debugging", - "Load balancer issues", - "Debugging Tools & Techniques", - "Log Analysis", - "Real-time log monitoring", - "Pattern analysis", - "Performance correlation", - "JSON log parsing", - "Database Debugging", - "Application Debugging", - "Root Cause Analysis Examples", - "Case Study: API Response Timeouts", - "Case Study: Memory Leak in React App", - "Case Study: Intermittent Database Errors", - "Prevention Strategies", - "Defensive Programming", - "Monitoring and Alerting", - "Health check endpoints", - "Error rate monitoring", - "Performance monitoring", - "Testing for Edge Cases", - "Debugging Best Practices", - "Information Collection", - "Hypothesis Testing", - "Solution Implementation" - ], - "section_count": 47, - "code_blocks": 14, - "example_count": 23, - "word_count": 493, - "formatting_issues": [ - "Found 14 code blocks without language tags", - "Header hierarchy skip detected (H1 to H3)" - ], - "recommendations": [ - "Moderate quality - consider adding more examples and documentation", - "Consider expanding content for better completeness (< 500 words)", - "Fix formatting issues: Found 14 code blocks without language tags; Header hierarchy skip detected (H1 to H3)", - "Add missing sections: Identity" - ] - }, - { - "agent_name": "docs-writer", - "category": "docs-writer", - "file_path": "core/docs-writer/agent.md", - "quality_score": 5.3, - "sections_found": [ - "Your Documentation Expertise", - "Working with Skills", - "Available Skills", - "When to Invoke Skills", - "How to Invoke", - "At START of API documentation:", - "At START of README update:", - "Then YOUR comprehensive documentation work:", - "- User guides with examples", - "- Architecture documentation", - "- Tutorials and walkthroughs", - "- Troubleshooting guides", - "Workflow Pattern", - "Example Coordination", - "You start API documentation:", - "Initial Structure", - "Example Coordination", - "Documentation Approach", - "Documentation Types & Formats", - "API Documentation", - "OpenAPI Specification Example", - "User Guides & Tutorials", - "Getting Started with Payment Processing", - "Prerequisites", - "Quick Start", - "1. Install the SDK", - "or", - "2. Initialize the Client", - "3. Process Your First Payment", - "Common Use Cases", - "Subscription Billing", - "Refund Processing", - "Technical Reference Documentation", - "System Architecture", - "Overview", - "Components", - "API Gateway", - "User Service", - "Data Flow", - "Configuration", - "Environment Variables", - "Deployment", - "docker-compose.yml", - "README Files", - "Project Name", - "Features", - "Quick Start", - "Installation", - "Basic Usage", - "Documentation", - "Contributing", - "License", - "Content Quality Standards", - "Clarity and Accessibility", - "Practical Examples", - "Accuracy and Completeness", - "Specialized Documentation", - "Architecture Decision Records (ADRs)", - "ADR-001: Database Technology Selection", - "Status", - "Context", - "Decision", - "Consequences", - "Troubleshooting Guides", - "Troubleshooting Guide", - "Common Issues", - "\"Connection Refused\" Error", - "High Memory Usage", - "Integration Guides", - "Third-Party Integration Guide", - "Webhook Setup", - "Documentation Maintenance", - "Automated Updates", - "Analytics and Improvement" - ], - "section_count": 74, - "code_blocks": 19, - "example_count": 54, - "word_count": 799, - "formatting_issues": [ - "Found 21 code blocks without language tags", - "Header hierarchy skip detected (H1 to H3)" - ], - "recommendations": [ - "Moderate quality - consider adding more examples and documentation", - "Fix formatting issues: Found 21 code blocks without language tags; Header hierarchy skip detected (H1 to H3)", - "Add missing sections: Identity, Methodology" - ] - }, - { - "agent_name": "systems-architect", - "category": "systems-architect", - "file_path": "core/systems-architect/agent.md", - "quality_score": 5.0, - "sections_found": [ - "Identity & Operating Principles", - "Your Architectural Expertise", - "Working with Skills", - "Architectural Approach", - "Core Architectural Principles", - "Evidence-Based Architecture", - "Evidence-Based Decisions", - "Decision Framework", - "Priority Hierarchy", - "Trade-off Analysis", - "Communication Style", - "Architecture Patterns & Solutions", - "Microservices Architecture", - "Service decomposition approach", - "Event-Driven Architecture", - "Serverless Architecture", - "AWS Lambda-based architecture", - "Technology Stack Evaluation", - "Database Selection Framework", - "Relational Databases (PostgreSQL, MySQL)", - "NoSQL Document Stores (MongoDB, CouchDB)", - "Key-Value Stores (Redis, DynamoDB)", - "Graph Databases (Neo4j, Amazon Neptune)", - "Performance Architecture Patterns", - "Horizontal Scaling Strategies", - "Security Architecture", - "Defense in Depth", - "Architecture Decision Process", - "ADR Template", - "ADR-XXX: [Decision Title]", - "Status", - "Context", - "Decision", - "Consequences", - "Positive", - "Negative", - "Neutral", - "Implementation", - "Monitoring", - "Technology Evaluation Criteria", - "Implementation Strategies", - "Migration Patterns", - "Strangler Fig Pattern", - "Database Migration Strategy", - "Zero-Downtime Deployment", - "Monitoring and Observability", - "Three Pillars of Observability", - "Success Metrics for Architecture", - "Collaboration with Other Agents", - "Cost Optimization Strategies", - "Infrastructure Optimization", - "Architecture Optimization" - ], - "section_count": 52, - "code_blocks": 13, - "example_count": 16, - "word_count": 1051, - "formatting_issues": [ - "Found 14 code blocks without language tags", - "Header hierarchy skip detected (H1 to H3)" - ], - "recommendations": [ - "Moderate quality - consider adding more examples and documentation", - "Fix formatting issues: Found 14 code blocks without language tags; Header hierarchy skip detected (H1 to H3)", - "Add missing sections: Methodology, Examples" - ] - }, - { - "agent_name": "performance-tuner", - "category": "performance-tuner", - "file_path": "core/performance-tuner/agent.md", - "quality_score": 4.7, - "sections_found": [ - "Your Performance Expertise", - "Working with Skills", - "Available Skills", - "When to Invoke Skills", - "How to Invoke", - "At START of performance optimization:", - "Then YOUR performance engineering work:", - "- Profile with actual tools", - "- Measure bottlenecks", - "- Implement data-driven optimizations", - "Workflow Pattern", - "Example Coordination", - "You start optimization:", - "Initial Analysis", - "Performance Tuning Approach", - "Core Performance Principles", - "Performance Hierarchy", - "Key Performance Metrics", - "Systematic Bottleneck Categorization", - "Performance Analysis Tools", - "Profiling & APM", - "Load & Stress Testing", - "Frontend Analysis", - "Database Analysis", - "Network Analysis", - "Application Profiling", - "CPU Profiling", - "Node.js CPU profiling", - "Python profiling with cProfile", - "Java profiling with async-profiler", - "Go profiling", - "Memory Profiling", - "Database Performance", - "Frontend Performance Optimization", - "Core Web Vitals", - "React Performance Optimization", - "Backend Performance Optimization", - "API & Server Optimization", - "Database Performance", - "Backend Resource Management", - "Caching Strategies", - "Multi-Level Caching Architecture", - "CDN Level (Edge Caching)", - "Application Level", - "Database Level", - "Cache Implementation Patterns", - "Load Testing", - "k6 Load Testing Scripts", - "JMeter Performance Testing", - "Performance Monitoring", - "Application Performance Monitoring", - "Performance Alerting", - "Optimization Recommendations", - "Performance Budget Guidelines", - "Performance Budget Targets", - "Common Optimization Patterns" - ], - "section_count": 56, - "code_blocks": 15, - "example_count": 46, - "word_count": 882, - "formatting_issues": [ - "Found 16 code blocks without language tags", - "Header hierarchy skip detected (H1 to H3)" - ], - "recommendations": [ - "CRITICAL: Low quality score - needs substantial content improvement", - "Fix formatting issues: Found 16 code blocks without language tags; Header hierarchy skip detected (H1 to H3)", - "Add missing sections: Identity, Methodology, Examples" - ] - } - ], - "needs_improvement": [ - { - "agent_name": "market-research-analyst", - "category": "market", - "file_path": "research/market/market-research-analyst/agent.md", - "quality_score": 0.0, - "sections_found": [], - "section_count": 0, - "code_blocks": 0, - "example_count": 0, - "word_count": 299, - "formatting_issues": [], - "recommendations": [ - "CRITICAL: Low quality score - needs substantial content improvement", - "Add more detailed explanations and context (< 300 words)", - "Add more code examples and use cases", - "Add missing sections: Identity, Expertise, Methodology, Examples" - ] - }, - { - "agent_name": "cloud-architect", - "category": "devops", - "file_path": "engineering/devops/cloud-architect/agent.md", - "quality_score": 0.3, - "sections_found": [ - "Focus Areas", - "Approach", - "Output" - ], - "section_count": 3, - "code_blocks": 0, - "example_count": 0, - "word_count": 121, - "formatting_issues": [], - "recommendations": [ - "CRITICAL: Low quality score - needs substantial content improvement", - "Add more detailed explanations and context (< 300 words)", - "Add more code examples and use cases", - "Add missing sections: Identity, Expertise, Methodology, Examples" - ] - }, - { - "agent_name": "growth-hacker", - "category": "growth", - "file_path": "marketing/growth/growth-hacker/agent.md", - "quality_score": 1.0, - "sections_found": [ - "Core Responsibilities", - "Best Practices & Frameworks", - "Key Metrics" - ], - "section_count": 3, - "code_blocks": 0, - "example_count": 0, - "word_count": 222, - "formatting_issues": [], - "recommendations": [ - "CRITICAL: Low quality score - needs substantial content improvement", - "Add more detailed explanations and context (< 300 words)", - "Add more code examples and use cases", - "Add missing sections: Identity, Expertise, Methodology, Examples" - ] - }, - { - "agent_name": "frontend-developer", - "category": "frontend", - "file_path": "engineering/frontend/frontend-developer/agent.md", - "quality_score": 1.3, - "sections_found": [ - "Focus Areas", - "Approach", - "Output" - ], - "section_count": 3, - "code_blocks": 0, - "example_count": 2, - "word_count": 128, - "formatting_issues": [], - "recommendations": [ - "CRITICAL: Low quality score - needs substantial content improvement", - "Add more detailed explanations and context (< 300 words)", - "Add missing sections: Identity, Expertise, Methodology, Examples" - ] - }, - { - "agent_name": "deep-research-specialist", - "category": "data", - "file_path": "research/data/deep-research-specialist/agent.md", - "quality_score": 1.7, - "sections_found": [ - "Identity & Operating Principles", - "Core Methodology", - "Research Strategy Framework", - "Source Evaluation & Quality Control", - "Output Structure", - "Quality Standards" - ], - "section_count": 6, - "code_blocks": 0, - "example_count": 1, - "word_count": 394, - "formatting_issues": [], - "recommendations": [ - "CRITICAL: Low quality score - needs substantial content improvement", - "Consider expanding content for better completeness (< 500 words)", - "Add more code examples and use cases", - "Add missing sections: Expertise, Examples" - ] - } - ] - }, - "detailed_results": [ - { - "agent_name": "performance-tuner", - "category": "performance-tuner", - "file_path": "core/performance-tuner/agent.md", - "quality_score": 4.7, - "sections_found": [ - "Your Performance Expertise", - "Working with Skills", - "Available Skills", - "When to Invoke Skills", - "How to Invoke", - "At START of performance optimization:", - "Then YOUR performance engineering work:", - "- Profile with actual tools", - "- Measure bottlenecks", - "- Implement data-driven optimizations", - "Workflow Pattern", - "Example Coordination", - "You start optimization:", - "Initial Analysis", - "Performance Tuning Approach", - "Core Performance Principles", - "Performance Hierarchy", - "Key Performance Metrics", - "Systematic Bottleneck Categorization", - "Performance Analysis Tools", - "Profiling & APM", - "Load & Stress Testing", - "Frontend Analysis", - "Database Analysis", - "Network Analysis", - "Application Profiling", - "CPU Profiling", - "Node.js CPU profiling", - "Python profiling with cProfile", - "Java profiling with async-profiler", - "Go profiling", - "Memory Profiling", - "Database Performance", - "Frontend Performance Optimization", - "Core Web Vitals", - "React Performance Optimization", - "Backend Performance Optimization", - "API & Server Optimization", - "Database Performance", - "Backend Resource Management", - "Caching Strategies", - "Multi-Level Caching Architecture", - "CDN Level (Edge Caching)", - "Application Level", - "Database Level", - "Cache Implementation Patterns", - "Load Testing", - "k6 Load Testing Scripts", - "JMeter Performance Testing", - "Performance Monitoring", - "Application Performance Monitoring", - "Performance Alerting", - "Optimization Recommendations", - "Performance Budget Guidelines", - "Performance Budget Targets", - "Common Optimization Patterns" - ], - "section_count": 56, - "code_blocks": 15, - "example_count": 46, - "word_count": 882, - "formatting_issues": [ - "Found 16 code blocks without language tags", - "Header hierarchy skip detected (H1 to H3)" - ], - "recommendations": [ - "CRITICAL: Low quality score - needs substantial content improvement", - "Fix formatting issues: Found 16 code blocks without language tags; Header hierarchy skip detected (H1 to H3)", - "Add missing sections: Identity, Methodology, Examples" - ] - }, - { - "agent_name": "config-safety-reviewer", - "category": "config-safety-reviewer", - "file_path": "core/config-safety-reviewer/agent.md", - "quality_score": 5.7, - "sections_found": [ - "Your Role", - "Working with Skills", - "Available Skills", - "When to Invoke Skills", - "How to Invoke Skills", - "At the START of your review:", - "Then proceed with YOUR deep expert analysis", - "Workflow Pattern", - "Example Coordination", - "You start your review:", - "Security Analysis", - "Review Process", - "Review Criteria", - "Code Quality (High Priority)", - "Security (Critical Priority)", - "Performance (High Priority)", - "Testing & Reliability", - "Technology Expertise", - "Frontend Technologies", - "Backend Technologies", - "Infrastructure & DevOps", - "Output Format", - "Executive Summary", - "Critical Issues", - "Code Quality Observations", - "Best Practices Recommendations", - "Action Plan", - "Review Examples", - "Security Review", - "Performance Review", - "Code Quality Review" - ], - "section_count": 31, - "code_blocks": 6, - "example_count": 13, - "word_count": 642, - "formatting_issues": [ - "Found 7 code blocks without language tags", - "Header hierarchy skip detected (H1 to H3)" - ], - "recommendations": [ - "Moderate quality - consider adding more examples and documentation", - "Fix formatting issues: Found 7 code blocks without language tags; Header hierarchy skip detected (H1 to H3)", - "Add missing sections: Identity, Methodology" - ] - }, - { - "agent_name": "root-cause-analyzer", - "category": "root-cause-analyzer", - "file_path": "core/root-cause-analyzer/agent.md", - "quality_score": 5.3, - "sections_found": [ - "Your Debugging Expertise", - "Working with Skills", - "Debugging Methodology", - "Debugging Process Framework", - "Scientific Method Approach", - "Issue Type Analysis", - "Performance Issues", - "System-level investigation", - "Application-level investigation", - "Memory profiling", - "or for Node.js", - "CPU profiling", - "Database query analysis", - "Memory Leaks", - "Concurrency Issues", - "Deadlock detection", - "Thread dump analysis (Java)", - "Race condition debugging", - "Critical section analysis", - "Network and Integration Issues", - "Network debugging", - "DNS resolution issues", - "SSL/TLS debugging", - "Load balancer issues", - "Debugging Tools & Techniques", - "Log Analysis", - "Real-time log monitoring", - "Pattern analysis", - "Performance correlation", - "JSON log parsing", - "Database Debugging", - "Application Debugging", - "Root Cause Analysis Examples", - "Case Study: API Response Timeouts", - "Case Study: Memory Leak in React App", - "Case Study: Intermittent Database Errors", - "Prevention Strategies", - "Defensive Programming", - "Monitoring and Alerting", - "Health check endpoints", - "Error rate monitoring", - "Performance monitoring", - "Testing for Edge Cases", - "Debugging Best Practices", - "Information Collection", - "Hypothesis Testing", - "Solution Implementation" - ], - "section_count": 47, - "code_blocks": 14, - "example_count": 23, - "word_count": 493, - "formatting_issues": [ - "Found 14 code blocks without language tags", - "Header hierarchy skip detected (H1 to H3)" - ], - "recommendations": [ - "Moderate quality - consider adding more examples and documentation", - "Consider expanding content for better completeness (< 500 words)", - "Fix formatting issues: Found 14 code blocks without language tags; Header hierarchy skip detected (H1 to H3)", - "Add missing sections: Identity" - ] - }, - { - "agent_name": "refactor-expert", - "category": "refactor-expert", - "file_path": "core/refactor-expert/agent.md", - "quality_score": 4.7, - "sections_found": [ - "Your Refactoring Philosophy", - "Your Refactoring Expertise", - "Working with Skills", - "Available Skills", - "When to Invoke Skills", - "How to Invoke", - "At START of refactoring:", - "CRITICAL: Ensure tests exist before refactoring!", - "Then YOUR refactoring expertise:", - "- Design refactoring strategy", - "- Apply SOLID principles", - "- Implement design patterns", - "- Execute safe transformation", - "Workflow Pattern", - "Example Coordination", - "You start refactoring:", - "Initial Assessment", - "CRITICAL: Test Coverage Before Refactoring", - "Systematic Refactoring Methodology", - "Quality Metrics to Track", - "SOLID Principles Implementation", - "Single Responsibility Principle (SRP)", - "Open/Closed Principle (OCP)", - "Liskov Substitution Principle (LSP)", - "Interface Segregation Principle (ISP)", - "Dependency Inversion Principle (DIP)", - "Code Smell Detection & Remediation", - "Code Smells Taxonomy", - "Long Method Refactoring", - "Large Class Decomposition", - "Design Pattern Applications", - "Strategy Pattern for Algorithm Selection", - "Observer Pattern for Event Handling", - "Core Refactoring Techniques Reference", - "Technical Debt Management", - "Communication & Reporting", - "Refactoring Safety Practices", - "Test-Driven Refactoring", - "Strangler Fig Pattern for Legacy Code", - "When Invoked: Your Complete Workflow" - ], - "section_count": 40, - "code_blocks": 14, - "example_count": 18, - "word_count": 1135, - "formatting_issues": [ - "Found 15 code blocks without language tags", - "Header hierarchy skip detected (H1 to H3)" - ], - "recommendations": [ - "CRITICAL: Low quality score - needs substantial content improvement", - "Fix formatting issues: Found 15 code blocks without language tags; Header hierarchy skip detected (H1 to H3)", - "Add missing sections: Identity, Examples" - ] - }, - { - "agent_name": "docs-writer", - "category": "docs-writer", - "file_path": "core/docs-writer/agent.md", - "quality_score": 5.3, - "sections_found": [ - "Your Documentation Expertise", - "Working with Skills", - "Available Skills", - "When to Invoke Skills", - "How to Invoke", - "At START of API documentation:", - "At START of README update:", - "Then YOUR comprehensive documentation work:", - "- User guides with examples", - "- Architecture documentation", - "- Tutorials and walkthroughs", - "- Troubleshooting guides", - "Workflow Pattern", - "Example Coordination", - "You start API documentation:", - "Initial Structure", - "Example Coordination", - "Documentation Approach", - "Documentation Types & Formats", - "API Documentation", - "OpenAPI Specification Example", - "User Guides & Tutorials", - "Getting Started with Payment Processing", - "Prerequisites", - "Quick Start", - "1. Install the SDK", - "or", - "2. Initialize the Client", - "3. Process Your First Payment", - "Common Use Cases", - "Subscription Billing", - "Refund Processing", - "Technical Reference Documentation", - "System Architecture", - "Overview", - "Components", - "API Gateway", - "User Service", - "Data Flow", - "Configuration", - "Environment Variables", - "Deployment", - "docker-compose.yml", - "README Files", - "Project Name", - "Features", - "Quick Start", - "Installation", - "Basic Usage", - "Documentation", - "Contributing", - "License", - "Content Quality Standards", - "Clarity and Accessibility", - "Practical Examples", - "Accuracy and Completeness", - "Specialized Documentation", - "Architecture Decision Records (ADRs)", - "ADR-001: Database Technology Selection", - "Status", - "Context", - "Decision", - "Consequences", - "Troubleshooting Guides", - "Troubleshooting Guide", - "Common Issues", - "\"Connection Refused\" Error", - "High Memory Usage", - "Integration Guides", - "Third-Party Integration Guide", - "Webhook Setup", - "Documentation Maintenance", - "Automated Updates", - "Analytics and Improvement" - ], - "section_count": 74, - "code_blocks": 19, - "example_count": 54, - "word_count": 799, - "formatting_issues": [ - "Found 21 code blocks without language tags", - "Header hierarchy skip detected (H1 to H3)" - ], - "recommendations": [ - "Moderate quality - consider adding more examples and documentation", - "Fix formatting issues: Found 21 code blocks without language tags; Header hierarchy skip detected (H1 to H3)", - "Add missing sections: Identity, Methodology" - ] - }, - { - "agent_name": "systems-architect", - "category": "systems-architect", - "file_path": "core/systems-architect/agent.md", - "quality_score": 5.0, - "sections_found": [ - "Identity & Operating Principles", - "Your Architectural Expertise", - "Working with Skills", - "Architectural Approach", - "Core Architectural Principles", - "Evidence-Based Architecture", - "Evidence-Based Decisions", - "Decision Framework", - "Priority Hierarchy", - "Trade-off Analysis", - "Communication Style", - "Architecture Patterns & Solutions", - "Microservices Architecture", - "Service decomposition approach", - "Event-Driven Architecture", - "Serverless Architecture", - "AWS Lambda-based architecture", - "Technology Stack Evaluation", - "Database Selection Framework", - "Relational Databases (PostgreSQL, MySQL)", - "NoSQL Document Stores (MongoDB, CouchDB)", - "Key-Value Stores (Redis, DynamoDB)", - "Graph Databases (Neo4j, Amazon Neptune)", - "Performance Architecture Patterns", - "Horizontal Scaling Strategies", - "Security Architecture", - "Defense in Depth", - "Architecture Decision Process", - "ADR Template", - "ADR-XXX: [Decision Title]", - "Status", - "Context", - "Decision", - "Consequences", - "Positive", - "Negative", - "Neutral", - "Implementation", - "Monitoring", - "Technology Evaluation Criteria", - "Implementation Strategies", - "Migration Patterns", - "Strangler Fig Pattern", - "Database Migration Strategy", - "Zero-Downtime Deployment", - "Monitoring and Observability", - "Three Pillars of Observability", - "Success Metrics for Architecture", - "Collaboration with Other Agents", - "Cost Optimization Strategies", - "Infrastructure Optimization", - "Architecture Optimization" - ], - "section_count": 52, - "code_blocks": 13, - "example_count": 16, - "word_count": 1051, - "formatting_issues": [ - "Found 14 code blocks without language tags", - "Header hierarchy skip detected (H1 to H3)" - ], - "recommendations": [ - "Moderate quality - consider adding more examples and documentation", - "Fix formatting issues: Found 14 code blocks without language tags; Header hierarchy skip detected (H1 to H3)", - "Add missing sections: Methodology, Examples" - ] - }, - { - "agent_name": "security-auditor", - "category": "security-auditor", - "file_path": "core/security-auditor/agent.md", - "quality_score": 4.7, - "sections_found": [ - "Your Security Expertise", - "Working with Skills", - "Typical Workflow", - "When to Build on Skill Findings", - "Example Coordination", - "Security Audit Approach", - "Core Security Principles", - "Defense in Depth", - "Security by Design", - "OWASP Top 10 Security Analysis", - "A01: Broken Access Control", - "A02: Cryptographic Failures", - "A03: Injection Attacks", - "Authentication & Authorization", - "JWT Security Implementation", - "OAuth2 and OpenID Connect", - "Security Headers & CSP", - "Comprehensive Security Headers", - "CORS Security Configuration", - "Input Validation & Sanitization", - "Comprehensive Input Validation", - "Security Testing", - "Security Unit Tests", - "Penetration Testing Automation", - "Automated security testing script", - "OWASP ZAP automated security scan", - "SQLMap injection testing", - "Nmap port scanning", - "SSL/TLS testing with testssl.sh", - "Security Incident Response", - "Incident Response Playbook", - "Security Incident Response Plan", - "Security Monitoring" - ], - "section_count": 33, - "code_blocks": 13, - "example_count": 19, - "word_count": 520, - "formatting_issues": [ - "Found 14 code blocks without language tags", - "Header hierarchy skip detected (H1 to H3)" - ], - "recommendations": [ - "CRITICAL: Low quality score - needs substantial content improvement", - "Fix formatting issues: Found 14 code blocks without language tags; Header hierarchy skip detected (H1 to H3)", - "Add missing sections: Identity, Methodology, Examples" - ] - }, - { - "agent_name": "test-engineer", - "category": "test-engineer", - "file_path": "core/test-engineer/agent.md", - "quality_score": 4.7, - "sections_found": [ - "Your Expertise", - "Working with Skills", - "Available Skills", - "When to Invoke Skills", - "How to Invoke", - "Quick validation before comprehensive test development:", - "Then create YOUR comprehensive test strategy", - "Workflow Pattern", - "Your Expertise (Manual Expert)", - "Typical Workflow", - "When to Build on Skill Findings", - "Example Coordination", - "Testing Approach", - "Testing Levels & Frameworks", - "Unit Testing (90%+ Coverage Target)", - "pytest patterns", - "Component Testing (React/Vue/Angular)", - "Integration Testing (80%+ Coverage Target)", - "End-to-End Testing (Critical Paths)", - "Performance Testing", - "Test Quality Standards", - "Comprehensive Coverage", - "Test Reliability", - "Maintainability", - "Mock and Stub Strategy", - "External Dependencies", - "Time and Randomness", - "Test Data Management", - "Fixtures and Factories", - "CI/CD Integration", - "Test Pipeline Configuration", - "GitHub Actions test workflow", - "Performance and Load Testing", - "Load Testing Strategy" - ], - "section_count": 34, - "code_blocks": 13, - "example_count": 20, - "word_count": 691, - "formatting_issues": [ - "Found 15 code blocks without language tags", - "Header hierarchy skip detected (H1 to H3)" - ], - "recommendations": [ - "CRITICAL: Low quality score - needs substantial content improvement", - "Fix formatting issues: Found 15 code blocks without language tags; Header hierarchy skip detected (H1 to H3)", - "Add missing sections: Identity, Methodology, Examples" - ] - }, - { - "agent_name": "frontend-developer", - "category": "frontend", - "file_path": "engineering/frontend/frontend-developer/agent.md", - "quality_score": 1.3, - "sections_found": [ - "Focus Areas", - "Approach", - "Output" - ], - "section_count": 3, - "code_blocks": 0, - "example_count": 2, - "word_count": 128, - "formatting_issues": [], - "recommendations": [ - "CRITICAL: Low quality score - needs substantial content improvement", - "Add more detailed explanations and context (< 300 words)", - "Add missing sections: Identity, Expertise, Methodology, Examples" - ] - }, - { - "agent_name": "cloud-architect", - "category": "devops", - "file_path": "engineering/devops/cloud-architect/agent.md", - "quality_score": 0.3, - "sections_found": [ - "Focus Areas", - "Approach", - "Output" - ], - "section_count": 3, - "code_blocks": 0, - "example_count": 0, - "word_count": 121, - "formatting_issues": [], - "recommendations": [ - "CRITICAL: Low quality score - needs substantial content improvement", - "Add more detailed explanations and context (< 300 words)", - "Add more code examples and use cases", - "Add missing sections: Identity, Expertise, Methodology, Examples" - ] - }, - { - "agent_name": "ui-designer", - "category": "ui", - "file_path": "design/ui/ui-designer/agent.md", - "quality_score": 3.0, - "sections_found": [], - "section_count": 0, - "code_blocks": 2, - "example_count": 3, - "word_count": 717, - "formatting_issues": [ - "Found 3 code blocks without language tags" - ], - "recommendations": [ - "CRITICAL: Low quality score - needs substantial content improvement", - "Fix formatting issues: Found 3 code blocks without language tags", - "Add missing sections: Identity, Expertise, Methodology, Examples" - ] - }, - { - "agent_name": "ux-researcher", - "category": "ux", - "file_path": "design/ux/ux-researcher/agent.md", - "quality_score": 4.0, - "sections_found": [], - "section_count": 0, - "code_blocks": 3, - "example_count": 7, - "word_count": 722, - "formatting_issues": [ - "Found 6 code blocks without language tags" - ], - "recommendations": [ - "CRITICAL: Low quality score - needs substantial content improvement", - "Fix formatting issues: Found 6 code blocks without language tags", - "Add missing sections: Identity, Expertise, Methodology, Examples" - ] - }, - { - "agent_name": "visual-storyteller", - "category": "visual", - "file_path": "design/visual/visual-storyteller/agent.md", - "quality_score": 4.0, - "sections_found": [], - "section_count": 0, - "code_blocks": 5, - "example_count": 5, - "word_count": 756, - "formatting_issues": [ - "Found 10 code blocks without language tags" - ], - "recommendations": [ - "CRITICAL: Low quality score - needs substantial content improvement", - "Fix formatting issues: Found 10 code blocks without language tags", - "Add missing sections: Identity, Expertise, Methodology, Examples" - ] - }, - { - "agent_name": "growth-hacker", - "category": "growth", - "file_path": "marketing/growth/growth-hacker/agent.md", - "quality_score": 1.0, - "sections_found": [ - "Core Responsibilities", - "Best Practices & Frameworks", - "Key Metrics" - ], - "section_count": 3, - "code_blocks": 0, - "example_count": 0, - "word_count": 222, - "formatting_issues": [], - "recommendations": [ - "CRITICAL: Low quality score - needs substantial content improvement", - "Add more detailed explanations and context (< 300 words)", - "Add more code examples and use cases", - "Add missing sections: Identity, Expertise, Methodology, Examples" - ] - }, - { - "agent_name": "market-research-analyst", - "category": "market", - "file_path": "research/market/market-research-analyst/agent.md", - "quality_score": 0.0, - "sections_found": [], - "section_count": 0, - "code_blocks": 0, - "example_count": 0, - "word_count": 299, - "formatting_issues": [], - "recommendations": [ - "CRITICAL: Low quality score - needs substantial content improvement", - "Add more detailed explanations and context (< 300 words)", - "Add more code examples and use cases", - "Add missing sections: Identity, Expertise, Methodology, Examples" - ] - }, - { - "agent_name": "deep-research-specialist", - "category": "data", - "file_path": "research/data/deep-research-specialist/agent.md", - "quality_score": 1.7, - "sections_found": [ - "Identity & Operating Principles", - "Core Methodology", - "Research Strategy Framework", - "Source Evaluation & Quality Control", - "Output Structure", - "Quality Standards" - ], - "section_count": 6, - "code_blocks": 0, - "example_count": 1, - "word_count": 394, - "formatting_issues": [], - "recommendations": [ - "CRITICAL: Low quality score - needs substantial content improvement", - "Consider expanding content for better completeness (< 500 words)", - "Add more code examples and use cases", - "Add missing sections: Expertise, Examples" - ] - } - ] -} \ No newline at end of file diff --git a/agent_validation_v2_results.json b/agent_validation_v2_results.json deleted file mode 100644 index bd9d9c2..0000000 --- a/agent_validation_v2_results.json +++ /dev/null @@ -1,533 +0,0 @@ -{ - "summary": { - "total_agents": 16, - "overall_stats": { - "count": 16, - "avg_score": 7.0, - "min_score": 4.0, - "max_score": 9.0, - "avg_word_count": 598, - "avg_code_blocks": 7.3, - "yaml_complete": 16 - }, - "core_agents": { - "count": 9, - "avg_score": 7.6, - "min_score": 4.0, - "max_score": 8.8, - "avg_word_count": 774, - "avg_code_blocks": 12.4, - "yaml_complete": 9 - }, - "specialized_agents": { - "count": 7, - "avg_score": 6.3, - "min_score": 4.0, - "max_score": 9.0, - "avg_word_count": 372, - "avg_code_blocks": 0.7, - "yaml_complete": 7 - }, - "category_stats": { - "core": { - "formats": { - "core": 8, - "specialized": 0 - }, - "count": 8, - "avg_score": 8.0 - }, - "engineering": { - "formats": { - "core": 0, - "specialized": 2 - }, - "count": 2, - "avg_score": 9.0 - }, - "design": { - "formats": { - "core": 1, - "specialized": 2 - }, - "count": 3, - "avg_score": 4.0 - }, - "marketing": { - "formats": { - "core": 0, - "specialized": 1 - }, - "count": 1, - "avg_score": 6.0 - }, - "research": { - "formats": { - "core": 0, - "specialized": 2 - }, - "count": 2, - "avg_score": 6.0 - } - }, - "top_performers": [ - { - "agent_name": "frontend-developer", - "category": "engineering", - "subcategory": "frontend", - "file_path": "engineering/frontend/frontend-developer/agent.md", - "format": "specialized", - "line_count": 44, - "quality_score": 9.0, - "quality_issues": [], - "sections_count": 3, - "code_blocks": 0, - "word_count": 128, - "yaml_complete": true, - "yaml_missing": [] - }, - { - "agent_name": "cloud-architect", - "category": "engineering", - "subcategory": "devops", - "file_path": "engineering/devops/cloud-architect/agent.md", - "format": "specialized", - "line_count": 45, - "quality_score": 9.0, - "quality_issues": [], - "sections_count": 3, - "code_blocks": 0, - "word_count": 121, - "yaml_complete": true, - "yaml_missing": [] - }, - { - "agent_name": "performance-tuner", - "category": "core", - "subcategory": "performance-tuner", - "file_path": "core/performance-tuner/agent.md", - "format": "core", - "line_count": 651, - "quality_score": 8.8, - "quality_issues": [ - "Missing sections: Methodology, Examples" - ], - "sections_count": 56, - "code_blocks": 15, - "word_count": 882, - "yaml_complete": true, - "yaml_missing": [] - }, - { - "agent_name": "refactor-expert", - "category": "core", - "subcategory": "refactor-expert", - "file_path": "core/refactor-expert/agent.md", - "format": "core", - "line_count": 976, - "quality_score": 8.8, - "quality_issues": [ - "Missing sections: Approach, Examples" - ], - "sections_count": 40, - "code_blocks": 14, - "word_count": 1135, - "yaml_complete": true, - "yaml_missing": [] - }, - { - "agent_name": "systems-architect", - "category": "core", - "subcategory": "systems-architect", - "file_path": "core/systems-architect/agent.md", - "format": "core", - "line_count": 434, - "quality_score": 8.8, - "quality_issues": [ - "Missing sections: Methodology, Examples" - ], - "sections_count": 52, - "code_blocks": 13, - "word_count": 1051, - "yaml_complete": true, - "yaml_missing": [] - } - ], - "needs_improvement": [ - { - "agent_name": "ui-designer", - "category": "design", - "subcategory": "ui", - "file_path": "design/ui/ui-designer/agent.md", - "format": "specialized", - "line_count": 164, - "quality_score": 4.0, - "quality_issues": [ - "Missing typical sections (Focus Areas, Approach, Output)", - "Too verbose (717 words, should be 100-300)" - ], - "sections_count": 0, - "code_blocks": 2, - "word_count": 717, - "yaml_complete": true, - "yaml_missing": [] - }, - { - "agent_name": "ux-researcher", - "category": "design", - "subcategory": "ux", - "file_path": "design/ux/ux-researcher/agent.md", - "format": "specialized", - "line_count": 197, - "quality_score": 4.0, - "quality_issues": [ - "Missing typical sections (Focus Areas, Approach, Output)", - "Too verbose (722 words, should be 100-300)" - ], - "sections_count": 0, - "code_blocks": 3, - "word_count": 722, - "yaml_complete": true, - "yaml_missing": [] - }, - { - "agent_name": "visual-storyteller", - "category": "design", - "subcategory": "visual", - "file_path": "design/visual/visual-storyteller/agent.md", - "format": "core", - "line_count": 258, - "quality_score": 4.0, - "quality_issues": [ - "Missing sections: Expertise, Methodology, Approach, Examples, Working with Skills", - "Add best practices/pitfalls/patterns section" - ], - "sections_count": 0, - "code_blocks": 5, - "word_count": 756, - "yaml_complete": true, - "yaml_missing": [] - }, - { - "agent_name": "growth-hacker", - "category": "marketing", - "subcategory": "growth", - "file_path": "marketing/growth/growth-hacker/agent.md", - "format": "specialized", - "line_count": 75, - "quality_score": 6.0, - "quality_issues": [ - "Missing typical sections (Focus Areas, Approach, Output)" - ], - "sections_count": 3, - "code_blocks": 0, - "word_count": 222, - "yaml_complete": true, - "yaml_missing": [] - }, - { - "agent_name": "market-research-analyst", - "category": "research", - "subcategory": "market", - "file_path": "research/market/market-research-analyst/agent.md", - "format": "specialized", - "line_count": 53, - "quality_score": 6.0, - "quality_issues": [ - "Missing typical sections (Focus Areas, Approach, Output)" - ], - "sections_count": 0, - "code_blocks": 0, - "word_count": 299, - "yaml_complete": true, - "yaml_missing": [] - } - ], - "all_issues": { - "Missing typical sections (Focus Areas, Approach, Output)": 5, - "Missing sections: Methodology, Examples": 4, - "Add best practices/pitfalls/patterns section": 2, - "Missing sections: Methodology, Approach": 1, - "Insufficient content (493 words, need 800+ for excellent)": 1, - "Missing sections: Approach, Examples": 1, - "Missing sections: Methodology": 1, - "Too verbose (717 words, should be 100-300)": 1, - "Too verbose (722 words, should be 100-300)": 1, - "Missing sections: Expertise, Methodology, Approach, Examples, Working with Skills": 1 - } - }, - "detailed_results": [ - { - "agent_name": "performance-tuner", - "category": "core", - "subcategory": "performance-tuner", - "file_path": "core/performance-tuner/agent.md", - "format": "core", - "line_count": 651, - "quality_score": 8.8, - "quality_issues": [ - "Missing sections: Methodology, Examples" - ], - "sections_count": 56, - "code_blocks": 15, - "word_count": 882, - "yaml_complete": true, - "yaml_missing": [] - }, - { - "agent_name": "config-safety-reviewer", - "category": "core", - "subcategory": "config-safety-reviewer", - "file_path": "core/config-safety-reviewer/agent.md", - "format": "core", - "line_count": 226, - "quality_score": 6.8, - "quality_issues": [ - "Missing sections: Methodology, Approach" - ], - "sections_count": 31, - "code_blocks": 6, - "word_count": 642, - "yaml_complete": true, - "yaml_missing": [] - }, - { - "agent_name": "root-cause-analyzer", - "category": "core", - "subcategory": "root-cause-analyzer", - "file_path": "core/root-cause-analyzer/agent.md", - "format": "core", - "line_count": 404, - "quality_score": 8.0, - "quality_issues": [ - "Insufficient content (493 words, need 800+ for excellent)" - ], - "sections_count": 47, - "code_blocks": 14, - "word_count": 493, - "yaml_complete": true, - "yaml_missing": [] - }, - { - "agent_name": "refactor-expert", - "category": "core", - "subcategory": "refactor-expert", - "file_path": "core/refactor-expert/agent.md", - "format": "core", - "line_count": 976, - "quality_score": 8.8, - "quality_issues": [ - "Missing sections: Approach, Examples" - ], - "sections_count": 40, - "code_blocks": 14, - "word_count": 1135, - "yaml_complete": true, - "yaml_missing": [] - }, - { - "agent_name": "docs-writer", - "category": "core", - "subcategory": "docs-writer", - "file_path": "core/docs-writer/agent.md", - "format": "core", - "line_count": 483, - "quality_score": 8.4, - "quality_issues": [ - "Missing sections: Methodology" - ], - "sections_count": 74, - "code_blocks": 19, - "word_count": 799, - "yaml_complete": true, - "yaml_missing": [] - }, - { - "agent_name": "systems-architect", - "category": "core", - "subcategory": "systems-architect", - "file_path": "core/systems-architect/agent.md", - "format": "core", - "line_count": 434, - "quality_score": 8.8, - "quality_issues": [ - "Missing sections: Methodology, Examples" - ], - "sections_count": 52, - "code_blocks": 13, - "word_count": 1051, - "yaml_complete": true, - "yaml_missing": [] - }, - { - "agent_name": "security-auditor", - "category": "core", - "subcategory": "security-auditor", - "file_path": "core/security-auditor/agent.md", - "format": "core", - "line_count": 720, - "quality_score": 6.8, - "quality_issues": [ - "Missing sections: Methodology, Examples", - "Add best practices/pitfalls/patterns section" - ], - "sections_count": 33, - "code_blocks": 13, - "word_count": 520, - "yaml_complete": true, - "yaml_missing": [] - }, - { - "agent_name": "test-engineer", - "category": "core", - "subcategory": "test-engineer", - "file_path": "core/test-engineer/agent.md", - "format": "core", - "line_count": 396, - "quality_score": 7.8, - "quality_issues": [ - "Missing sections: Methodology, Examples" - ], - "sections_count": 34, - "code_blocks": 13, - "word_count": 691, - "yaml_complete": true, - "yaml_missing": [] - }, - { - "agent_name": "frontend-developer", - "category": "engineering", - "subcategory": "frontend", - "file_path": "engineering/frontend/frontend-developer/agent.md", - "format": "specialized", - "line_count": 44, - "quality_score": 9.0, - "quality_issues": [], - "sections_count": 3, - "code_blocks": 0, - "word_count": 128, - "yaml_complete": true, - "yaml_missing": [] - }, - { - "agent_name": "cloud-architect", - "category": "engineering", - "subcategory": "devops", - "file_path": "engineering/devops/cloud-architect/agent.md", - "format": "specialized", - "line_count": 45, - "quality_score": 9.0, - "quality_issues": [], - "sections_count": 3, - "code_blocks": 0, - "word_count": 121, - "yaml_complete": true, - "yaml_missing": [] - }, - { - "agent_name": "ui-designer", - "category": "design", - "subcategory": "ui", - "file_path": "design/ui/ui-designer/agent.md", - "format": "specialized", - "line_count": 164, - "quality_score": 4.0, - "quality_issues": [ - "Missing typical sections (Focus Areas, Approach, Output)", - "Too verbose (717 words, should be 100-300)" - ], - "sections_count": 0, - "code_blocks": 2, - "word_count": 717, - "yaml_complete": true, - "yaml_missing": [] - }, - { - "agent_name": "ux-researcher", - "category": "design", - "subcategory": "ux", - "file_path": "design/ux/ux-researcher/agent.md", - "format": "specialized", - "line_count": 197, - "quality_score": 4.0, - "quality_issues": [ - "Missing typical sections (Focus Areas, Approach, Output)", - "Too verbose (722 words, should be 100-300)" - ], - "sections_count": 0, - "code_blocks": 3, - "word_count": 722, - "yaml_complete": true, - "yaml_missing": [] - }, - { - "agent_name": "visual-storyteller", - "category": "design", - "subcategory": "visual", - "file_path": "design/visual/visual-storyteller/agent.md", - "format": "core", - "line_count": 258, - "quality_score": 4.0, - "quality_issues": [ - "Missing sections: Expertise, Methodology, Approach, Examples, Working with Skills", - "Add best practices/pitfalls/patterns section" - ], - "sections_count": 0, - "code_blocks": 5, - "word_count": 756, - "yaml_complete": true, - "yaml_missing": [] - }, - { - "agent_name": "growth-hacker", - "category": "marketing", - "subcategory": "growth", - "file_path": "marketing/growth/growth-hacker/agent.md", - "format": "specialized", - "line_count": 75, - "quality_score": 6.0, - "quality_issues": [ - "Missing typical sections (Focus Areas, Approach, Output)" - ], - "sections_count": 3, - "code_blocks": 0, - "word_count": 222, - "yaml_complete": true, - "yaml_missing": [] - }, - { - "agent_name": "market-research-analyst", - "category": "research", - "subcategory": "market", - "file_path": "research/market/market-research-analyst/agent.md", - "format": "specialized", - "line_count": 53, - "quality_score": 6.0, - "quality_issues": [ - "Missing typical sections (Focus Areas, Approach, Output)" - ], - "sections_count": 0, - "code_blocks": 0, - "word_count": 299, - "yaml_complete": true, - "yaml_missing": [] - }, - { - "agent_name": "deep-research-specialist", - "category": "research", - "subcategory": "data", - "file_path": "research/data/deep-research-specialist/agent.md", - "format": "specialized", - "line_count": 93, - "quality_score": 6.0, - "quality_issues": [ - "Missing typical sections (Focus Areas, Approach, Output)" - ], - "sections_count": 6, - "code_blocks": 0, - "word_count": 394, - "yaml_complete": true, - "yaml_missing": [] - } - ] -} \ No newline at end of file diff --git a/agent_validator.py b/agent_validator.py deleted file mode 100644 index 33f42b7..0000000 --- a/agent_validator.py +++ /dev/null @@ -1,354 +0,0 @@ -#!/usr/bin/env python3 -""" -Agent Content Structure and Quality Validator -Analyzes agent.md files for structure, quality, completeness, examples, and formatting. -""" - -import os -import re -from pathlib import Path -from typing import Dict, List, Tuple -import json - -class AgentValidator: - def __init__(self, base_path: str): - self.base_path = Path(base_path) - self.results = [] - - def find_agents(self, sample_paths: List[str]) -> List[Path]: - """Find agent.md files from provided paths.""" - agents = [] - for path in sample_paths: - agent_path = self.base_path / path / "agent.md" - if agent_path.exists(): - agents.append(agent_path) - return agents - - def extract_sections(self, content: str) -> List[str]: - """Extract all markdown headers from content.""" - headers = re.findall(r'^#{1,3}\s+(.+)$', content, re.MULTILINE) - return headers - - def count_code_blocks(self, content: str) -> int: - """Count number of code blocks.""" - return len(re.findall(r'```', content)) // 2 - - def count_examples(self, content: str) -> int: - """Count example sections and code blocks.""" - example_keywords = r'(example|use case|usage|demonstration|sample)' - examples = len(re.findall(example_keywords, content, re.IGNORECASE)) - return examples + self.count_code_blocks(content) - - def count_words(self, content: str) -> int: - """Count total words (excluding YAML frontmatter).""" - # Remove YAML frontmatter - content = re.sub(r'^---\n.*?\n---\n', '', content, flags=re.DOTALL) - # Remove code blocks - content = re.sub(r'```.*?```', '', content, flags=re.DOTALL) - # Count words - words = re.findall(r'\b\w+\b', content) - return len(words) - - def check_formatting(self, content: str) -> List[str]: - """Check for formatting issues.""" - issues = [] - - # Check for code blocks without language tags - code_blocks = re.findall(r'```(\w*)\n', content) - if '' in code_blocks: - issues.append(f"Found {code_blocks.count('')} code blocks without language tags") - - # Check header hierarchy - headers = re.findall(r'^(#{1,6})', content, re.MULTILINE) - for i in range(len(headers) - 1): - current_level = len(headers[i]) - next_level = len(headers[i + 1]) - if next_level > current_level + 1: - issues.append(f"Header hierarchy skip detected (H{current_level} to H{next_level})") - break - - return issues - - def calculate_quality_score(self, metrics: Dict) -> float: - """Calculate quality score (0-10) based on metrics.""" - score = 0.0 - - # Required sections (max 2 points) - required_sections = ['Identity', 'Expertise', 'Methodology', 'Approach', 'Examples', 'Usage'] - sections_text = ' '.join(metrics['sections_found']) - matching_sections = sum(1 for s in required_sections if s.lower() in sections_text.lower()) - score += (matching_sections / len(required_sections)) * 2 - - # Content completeness (max 2 points) - if metrics['word_count'] >= 500: - score += 2 - elif metrics['word_count'] >= 300: - score += 1 - - # Example quality (max 2 points) - if metrics['example_count'] >= 5: - score += 2 - elif metrics['example_count'] >= 2: - score += 1 - - # Optional sections (max 4 points, 1 each) - optional_sections = { - 'integration': 'integration tips', - 'related': 'related agents', - 'best practices': 'best practices', - 'pitfalls': 'pitfalls' - } - - sections_lower = sections_text.lower() - for key, pattern in optional_sections.items(): - if pattern in sections_lower: - score += 1 - - return round(score, 1) - - def validate_agent(self, agent_path: Path) -> Dict: - """Validate a single agent file.""" - with open(agent_path, 'r', encoding='utf-8') as f: - content = f.read() - - # Extract relative path - rel_path = agent_path.relative_to(self.base_path) - category = rel_path.parts[1] if len(rel_path.parts) > 1 else 'unknown' - - # Extract metrics - sections = self.extract_sections(content) - code_blocks = self.count_code_blocks(content) - examples = self.count_examples(content) - word_count = self.count_words(content) - formatting_issues = self.check_formatting(content) - - # Calculate quality score - metrics = { - 'sections_found': sections, - 'example_count': examples, - 'word_count': word_count - } - quality_score = self.calculate_quality_score(metrics) - - result = { - 'agent_name': agent_path.parent.name, - 'category': category, - 'file_path': str(rel_path), - 'quality_score': quality_score, - 'sections_found': sections, - 'section_count': len(sections), - 'code_blocks': code_blocks, - 'example_count': examples, - 'word_count': word_count, - 'formatting_issues': formatting_issues, - 'recommendations': self.generate_recommendations(quality_score, metrics, formatting_issues) - } - - return result - - def generate_recommendations(self, score: float, metrics: Dict, issues: List[str]) -> List[str]: - """Generate improvement recommendations.""" - recs = [] - - if score < 5: - recs.append("CRITICAL: Low quality score - needs substantial content improvement") - elif score < 7: - recs.append("Moderate quality - consider adding more examples and documentation") - - if metrics['word_count'] < 300: - recs.append("Add more detailed explanations and context (< 300 words)") - elif metrics['word_count'] < 500: - recs.append("Consider expanding content for better completeness (< 500 words)") - - if metrics['example_count'] < 2: - recs.append("Add more code examples and use cases") - - if issues: - recs.append(f"Fix formatting issues: {'; '.join(issues)}") - - required_sections = ['Identity', 'Expertise', 'Methodology', 'Examples'] - sections_text = ' '.join(metrics['sections_found']) - missing = [s for s in required_sections if s.lower() not in sections_text.lower()] - if missing: - recs.append(f"Add missing sections: {', '.join(missing)}") - - return recs if recs else ["No major issues - agent meets quality standards"] - - def analyze_sample(self, sample_paths: List[str]) -> Dict: - """Analyze a sample of agents and return summary statistics.""" - agents = self.find_agents(sample_paths) - - print(f"Analyzing {len(agents)} agents...") - - for agent_path in agents: - result = self.validate_agent(agent_path) - self.results.append(result) - print(f" βœ“ {result['agent_name']} (score: {result['quality_score']}/10)") - - return self.generate_summary() - - def generate_summary(self) -> Dict: - """Generate summary statistics.""" - if not self.results: - return {} - - scores = [r['quality_score'] for r in self.results] - word_counts = [r['word_count'] for r in self.results] - example_counts = [r['example_count'] for r in self.results] - section_counts = [r['section_count'] for r in self.results] - - # Category analysis - category_stats = {} - for result in self.results: - cat = result['category'] - if cat not in category_stats: - category_stats[cat] = {'count': 0, 'avg_score': 0, 'scores': []} - category_stats[cat]['count'] += 1 - category_stats[cat]['scores'].append(result['quality_score']) - - for cat, stats in category_stats.items(): - stats['avg_score'] = round(sum(stats['scores']) / len(stats['scores']), 1) - del stats['scores'] - - # Common issues - all_issues = [] - for result in self.results: - all_issues.extend(result['formatting_issues']) - - # Issue frequency - issue_freq = {} - for issue in all_issues: - issue_freq[issue] = issue_freq.get(issue, 0) + 1 - - summary = { - 'total_agents': len(self.results), - 'average_score': round(sum(scores) / len(scores), 1), - 'min_score': min(scores), - 'max_score': max(scores), - 'score_distribution': { - 'excellent (9-10)': sum(1 for s in scores if s >= 9), - 'good (7-8.9)': sum(1 for s in scores if 7 <= s < 9), - 'moderate (5-6.9)': sum(1 for s in scores if 5 <= s < 7), - 'poor (<5)': sum(1 for s in scores if s < 5) - }, - 'average_word_count': round(sum(word_counts) / len(word_counts)), - 'average_example_count': round(sum(example_counts) / len(example_counts), 1), - 'average_section_count': round(sum(section_counts) / len(section_counts), 1), - 'category_stats': category_stats, - 'common_issues': sorted(issue_freq.items(), key=lambda x: x[1], reverse=True)[:5], - 'top_performers': sorted(self.results, key=lambda x: x['quality_score'], reverse=True)[:5], - 'needs_improvement': sorted(self.results, key=lambda x: x['quality_score'])[:5] - } - - return summary - - -def main(): - """Main validation function.""" - base_path = "/Users/rezarezvani/projects/claude-code-tresor/subagents" - - # Define sample paths (30 agents across categories) - sample_paths = [ - # Core (8 agents - all) - "core/performance-tuner", - "core/config-safety-reviewer", - "core/root-cause-analyzer", - "core/refactor-expert", - "core/docs-writer", - "core/systems-architect", - "core/security-auditor", - "core/test-engineer", - - # Engineering (6 agents from different subcategories) - "engineering/languages/python/python-expert", - "engineering/languages/typescript/typescript-expert", - "engineering/backend/api/api-architect", - "engineering/backend/database/postgres-specialist", - "engineering/frontend/frontend-developer", - "engineering/devops/cloud-architect", - - # Design (3 agents) - "design/ui/ui-designer", - "design/ux/ux-researcher", - "design/visual/visual-storyteller", - - # Marketing (3 agents) - "marketing/content/content-strategist", - "marketing/growth/growth-hacker", - "marketing/analytics/marketing-analyst", - - # Product (2 agents) - "product/strategy/product-strategist", - "product/roadmap/roadmap-planner", - - # Leadership (2 agents) - "leadership/technical/tech-lead", - "leadership/team/engineering-manager", - - # Operations (2 agents) - "operations/support/customer-support-specialist", - "operations/qa/qa-specialist", - - # Research (2 agents) - "research/market/market-research-analyst", - "research/data/deep-research-specialist", - - # AI & Automation (2 agents) - "ai-automation/ml/ml-engineer", - "ai-automation/automation/automation-specialist", - ] - - validator = AgentValidator(base_path) - summary = validator.analyze_sample(sample_paths) - - # Print results - print("\n" + "="*80) - print("VALIDATION SUMMARY") - print("="*80) - - print(f"\nTotal Agents Analyzed: {summary['total_agents']}") - print(f"Average Quality Score: {summary['average_score']}/10") - print(f"Score Range: {summary['min_score']} - {summary['max_score']}") - - print("\nScore Distribution:") - for level, count in summary['score_distribution'].items(): - print(f" {level}: {count} agents") - - print("\nContent Metrics:") - print(f" Average Word Count: {summary['average_word_count']}") - print(f" Average Examples: {summary['average_example_count']}") - print(f" Average Sections: {summary['average_section_count']}") - - print("\nCategory Analysis:") - for cat, stats in summary['category_stats'].items(): - print(f" {cat}: {stats['count']} agents, avg score: {stats['avg_score']}") - - print("\nTop 5 Performers:") - for agent in summary['top_performers']: - print(f" β€’ {agent['agent_name']} ({agent['category']}): {agent['quality_score']}/10") - - print("\nNeed Improvement:") - for agent in summary['needs_improvement']: - print(f" β€’ {agent['agent_name']} ({agent['category']}): {agent['quality_score']}/10") - if agent['recommendations']: - for rec in agent['recommendations'][:2]: - print(f" - {rec}") - - if summary['common_issues']: - print("\nCommon Formatting Issues:") - for issue, count in summary['common_issues']: - print(f" β€’ {issue}: {count} occurrences") - - # Save detailed results - output_file = "/Users/rezarezvani/projects/claude-code-tresor/agent_validation_results.json" - with open(output_file, 'w') as f: - json.dump({ - 'summary': summary, - 'detailed_results': validator.results - }, f, indent=2) - - print(f"\nDetailed results saved to: {output_file}") - - -if __name__ == "__main__": - main() diff --git a/agent_validator_v2.py b/agent_validator_v2.py deleted file mode 100644 index 210dfbc..0000000 --- a/agent_validator_v2.py +++ /dev/null @@ -1,408 +0,0 @@ -#!/usr/bin/env python3 -""" -Agent Content Structure and Quality Validator V2 -Handles TWO agent formats: -1. Core agents (8): Comprehensive (300-1000 lines, extensive examples, deep content) -2. Specialized agents (125): Concise (40-60 lines, focused capability statements) -""" - -import os -import re -from pathlib import Path -from typing import Dict, List, Tuple -import json - -class AgentValidatorV2: - def __init__(self, base_path: str): - self.base_path = Path(base_path) - self.results = [] - - def find_agents(self, sample_paths: List[str]) -> List[Path]: - """Find agent.md files from provided paths.""" - agents = [] - for path in sample_paths: - agent_path = self.base_path / path / "agent.md" - if agent_path.exists(): - agents.append(agent_path) - return agents - - def determine_format(self, content: str, line_count: int) -> str: - """Determine if agent is Core (comprehensive) or Specialized (concise).""" - if line_count > 200 or content.count('```') > 10: - return "core" - return "specialized" - - def extract_sections(self, content: str) -> List[str]: - """Extract all markdown headers from content.""" - headers = re.findall(r'^#{1,3}\s+(.+)$', content, re.MULTILINE) - return headers - - def count_code_blocks(self, content: str) -> int: - """Count number of code blocks.""" - return len(re.findall(r'```', content)) // 2 - - def count_words(self, content: str) -> int: - """Count total words (excluding YAML frontmatter).""" - content = re.sub(r'^---\n.*?\n---\n', '', content, flags=re.DOTALL) - content = re.sub(r'```.*?```', '', content, flags=re.DOTALL) - words = re.findall(r'\b\w+\b', content) - return len(words) - - def check_yaml_frontmatter(self, content: str) -> Dict: - """Extract and validate YAML frontmatter.""" - match = re.match(r'^---\n(.*?)\n---', content, re.DOTALL) - if not match: - return {'valid': False, 'fields': []} - - yaml_content = match.group(1) - required_fields = ['name', 'description', 'category', 'tools', 'model', 'enabled', 'capabilities'] - found_fields = [] - - for field in required_fields: - if re.search(rf'^{field}:', yaml_content, re.MULTILINE): - found_fields.append(field) - - return { - 'valid': True, - 'required_count': len(required_fields), - 'found_count': len(found_fields), - 'missing': [f for f in required_fields if f not in found_fields], - 'complete': len(found_fields) == len(required_fields) - } - - def calculate_core_quality_score(self, metrics: Dict) -> Tuple[float, List[str]]: - """Calculate quality score for CORE agents (comprehensive format).""" - score = 0.0 - issues = [] - - # Required sections for core agents (max 3 points) - required_sections = ['Expertise', 'Methodology', 'Approach', 'Examples', 'Working with Skills'] - sections_text = ' '.join(metrics['sections_found']).lower() - matching = sum(1 for s in required_sections if s.lower() in sections_text) - score += (matching / len(required_sections)) * 3 - if matching < len(required_sections): - missing = [s for s in required_sections if s.lower() not in sections_text] - issues.append(f"Missing sections: {', '.join(missing)}") - - # Content depth (max 2 points) - if metrics['word_count'] >= 800: - score += 2 - elif metrics['word_count'] >= 500: - score += 1 - else: - issues.append(f"Insufficient content ({metrics['word_count']} words, need 800+ for excellent)") - - # Code examples (max 3 points) - if metrics['code_blocks'] >= 10: - score += 3 - elif metrics['code_blocks'] >= 5: - score += 2 - elif metrics['code_blocks'] >= 2: - score += 1 - else: - issues.append(f"Too few code examples ({metrics['code_blocks']}, need 10+ for excellent)") - - # YAML frontmatter (max 1 point) - if metrics['yaml']['complete']: - score += 1 - else: - issues.append(f"Incomplete YAML: missing {', '.join(metrics['yaml']['missing'])}") - - # Best practices sections (max 1 point) - optional = ['best practices', 'pitfalls', 'common issues', 'patterns'] - if any(opt in sections_text for opt in optional): - score += 1 - else: - issues.append("Add best practices/pitfalls/patterns section") - - return round(score, 1), issues - - def calculate_specialized_quality_score(self, metrics: Dict) -> Tuple[float, List[str]]: - """Calculate quality score for SPECIALIZED agents (concise format).""" - score = 0.0 - issues = [] - - # YAML frontmatter completeness (max 4 points) - if metrics['yaml']['complete']: - score += 4 - else: - missing_count = metrics['yaml']['required_count'] - metrics['yaml']['found_count'] - score += max(0, 4 - missing_count) - issues.append(f"Missing YAML fields: {', '.join(metrics['yaml']['missing'])}") - - # Has core sections (max 3 points) - sections_text = ' '.join(metrics['sections_found']).lower() - required = ['focus', 'approach', 'output'] # Typical for specialized agents - matching = sum(1 for s in required if s in sections_text) - score += (matching / len(required)) * 3 - if matching < len(required): - issues.append(f"Missing typical sections (Focus Areas, Approach, Output)") - - # Adequate content length (max 2 points) - if 100 <= metrics['word_count'] <= 300: # Sweet spot for specialized - score += 2 - elif 80 <= metrics['word_count'] < 100 or 300 < metrics['word_count'] <= 400: - score += 1 - else: - if metrics['word_count'] < 80: - issues.append(f"Too brief ({metrics['word_count']} words, need 100-300)") - else: - issues.append(f"Too verbose ({metrics['word_count']} words, should be 100-300)") - - # Clear capabilities list (max 1 point) - if metrics['yaml']['complete'] and 'capabilities' in sections_text.lower(): - score += 1 - - return round(score, 1), issues - - def validate_agent(self, agent_path: Path) -> Dict: - """Validate a single agent file.""" - with open(agent_path, 'r', encoding='utf-8') as f: - content = f.read() - - line_count = len(content.split('\n')) - rel_path = agent_path.relative_to(self.base_path) - - # Determine format - agent_format = self.determine_format(content, line_count) - - # Extract metrics - sections = self.extract_sections(content) - code_blocks = self.count_code_blocks(content) - word_count = self.count_words(content) - yaml_info = self.check_yaml_frontmatter(content) - - # Build metrics dict - metrics = { - 'sections_found': sections, - 'code_blocks': code_blocks, - 'word_count': word_count, - 'yaml': yaml_info - } - - # Calculate quality score based on format - if agent_format == "core": - quality_score, quality_issues = self.calculate_core_quality_score(metrics) - else: - quality_score, quality_issues = self.calculate_specialized_quality_score(metrics) - - # Determine category - parts = rel_path.parts - if len(parts) >= 2: - category = parts[0] # e.g., "core", "engineering", "design" - subcategory = parts[1] if len(parts) > 2 else None - else: - category = "unknown" - subcategory = None - - result = { - 'agent_name': agent_path.parent.name, - 'category': category, - 'subcategory': subcategory, - 'file_path': str(rel_path), - 'format': agent_format, - 'line_count': line_count, - 'quality_score': quality_score, - 'quality_issues': quality_issues, - 'sections_count': len(sections), - 'code_blocks': code_blocks, - 'word_count': word_count, - 'yaml_complete': yaml_info['complete'], - 'yaml_missing': yaml_info.get('missing', []) - } - - return result - - def analyze_sample(self, sample_paths: List[str]) -> Dict: - """Analyze a sample of agents and return summary statistics.""" - agents = self.find_agents(sample_paths) - - print(f"\nAnalyzing {len(agents)} agents...") - - for agent_path in agents: - result = self.validate_agent(agent_path) - self.results.append(result) - format_label = "CORE" if result['format'] == "core" else "SPEC" - print(f" [{format_label}] {result['agent_name']:<35} {result['quality_score']:>4.1f}/10 ({result['line_count']} lines)") - - return self.generate_summary() - - def generate_summary(self) -> Dict: - """Generate summary statistics.""" - if not self.results: - return {} - - # Separate by format - core_results = [r for r in self.results if r['format'] == 'core'] - spec_results = [r for r in self.results if r['format'] == 'specialized'] - - def get_stats(results): - if not results: - return None - scores = [r['quality_score'] for r in results] - return { - 'count': len(results), - 'avg_score': round(sum(scores) / len(scores), 1), - 'min_score': min(scores), - 'max_score': max(scores), - 'avg_word_count': round(sum(r['word_count'] for r in results) / len(results)), - 'avg_code_blocks': round(sum(r['code_blocks'] for r in results) / len(results), 1), - 'yaml_complete': sum(1 for r in results if r['yaml_complete']), - } - - # Category analysis - category_stats = {} - for result in self.results: - cat = result['category'] - if cat not in category_stats: - category_stats[cat] = {'scores': [], 'formats': {'core': 0, 'specialized': 0}} - category_stats[cat]['scores'].append(result['quality_score']) - category_stats[cat]['formats'][result['format']] += 1 - - for cat, stats in category_stats.items(): - scores = stats['scores'] - stats['count'] = len(scores) - stats['avg_score'] = round(sum(scores) / len(scores), 1) - del stats['scores'] - - summary = { - 'total_agents': len(self.results), - 'overall_stats': get_stats(self.results), - 'core_agents': get_stats(core_results), - 'specialized_agents': get_stats(spec_results), - 'category_stats': category_stats, - 'top_performers': sorted(self.results, key=lambda x: x['quality_score'], reverse=True)[:5], - 'needs_improvement': sorted(self.results, key=lambda x: x['quality_score'])[:5], - 'all_issues': self._collect_common_issues() - } - - return summary - - def _collect_common_issues(self) -> Dict: - """Collect and count common issues.""" - issue_counts = {} - for result in self.results: - for issue in result['quality_issues']: - issue_counts[issue] = issue_counts.get(issue, 0) + 1 - - return dict(sorted(issue_counts.items(), key=lambda x: x[1], reverse=True)[:10]) - - -def main(): - """Main validation function.""" - base_path = "/Users/rezarezvani/projects/claude-code-tresor/subagents" - - # Define sample paths (30 agents across categories) - sample_paths = [ - # Core (8 agents - all) - "core/performance-tuner", - "core/config-safety-reviewer", - "core/root-cause-analyzer", - "core/refactor-expert", - "core/docs-writer", - "core/systems-architect", - "core/security-auditor", - "core/test-engineer", - - # Engineering (6 from different subcategories) - "engineering/languages/python/python-expert", - "engineering/languages/typescript/typescript-expert", - "engineering/backend/api/api-architect", - "engineering/backend/database/postgres-specialist", - "engineering/frontend/frontend-developer", - "engineering/devops/cloud-architect", - - # Design (3) - "design/ui/ui-designer", - "design/ux/ux-researcher", - "design/visual/visual-storyteller", - - # Marketing (3) - "marketing/content/content-strategist", - "marketing/growth/growth-hacker", - "marketing/analytics/marketing-analyst", - - # Product (2) - "product/strategy/product-strategist", - "product/roadmap/roadmap-planner", - - # Leadership (2) - "leadership/technical/tech-lead", - "leadership/team/engineering-manager", - - # Operations (2) - "operations/support/customer-support-specialist", - "operations/qa/qa-specialist", - - # Research (2) - "research/market/market-research-analyst", - "research/data/deep-research-specialist", - - # AI & Automation (2) - "ai-automation/ml/ml-engineer", - "ai-automation/automation/automation-specialist", - ] - - validator = AgentValidatorV2(base_path) - summary = validator.analyze_sample(sample_paths) - - # Print results - print("\n" + "="*90) - print("AGENT VALIDATION SUMMARY - Dual Format Analysis") - print("="*90) - - print(f"\nπŸ“Š OVERALL STATISTICS") - print(f"Total Agents Analyzed: {summary['total_agents']}") - print(f"Average Quality Score: {summary['overall_stats']['avg_score']}/10") - - if summary['core_agents']: - print(f"\nπŸ“š CORE AGENTS (Comprehensive Format)") - print(f" Count: {summary['core_agents']['count']}") - print(f" Avg Score: {summary['core_agents']['avg_score']}/10") - print(f" Avg Word Count: {summary['core_agents']['avg_word_count']}") - print(f" Avg Code Blocks: {summary['core_agents']['avg_code_blocks']}") - print(f" YAML Complete: {summary['core_agents']['yaml_complete']}/{summary['core_agents']['count']}") - - if summary['specialized_agents']: - print(f"\n⚑ SPECIALIZED AGENTS (Concise Format)") - print(f" Count: {summary['specialized_agents']['count']}") - print(f" Avg Score: {summary['specialized_agents']['avg_score']}/10") - print(f" Avg Word Count: {summary['specialized_agents']['avg_word_count']}") - print(f" Avg Code Blocks: {summary['specialized_agents']['avg_code_blocks']}") - print(f" YAML Complete: {summary['specialized_agents']['yaml_complete']}/{summary['specialized_agents']['count']}") - - print(f"\nπŸ“ CATEGORY BREAKDOWN") - for cat, stats in sorted(summary['category_stats'].items()): - print(f" {cat:20} {stats['count']} agents, avg: {stats['avg_score']}/10 [Core: {stats['formats']['core']}, Spec: {stats['formats']['specialized']}]") - - print(f"\n⭐ TOP 5 PERFORMERS") - for agent in summary['top_performers']: - fmt = "CORE" if agent['format'] == 'core' else "SPEC" - print(f" [{fmt}] {agent['agent_name']:35} {agent['quality_score']}/10 ({agent['category']})") - - print(f"\n⚠️ NEEDS IMPROVEMENT (Bottom 5)") - for agent in summary['needs_improvement']: - fmt = "CORE" if agent['format'] == 'core' else "SPEC" - print(f" [{fmt}] {agent['agent_name']:35} {agent['quality_score']}/10 ({agent['category']})") - if agent['quality_issues']: - for issue in agent['quality_issues'][:2]: - print(f" β†’ {issue}") - - if summary['all_issues']: - print(f"\nπŸ” COMMON ISSUES (Top 10)") - for issue, count in list(summary['all_issues'].items())[:10]: - print(f" β€’ {issue}: {count} agents") - - # Save detailed results - output_file = "/Users/rezarezvani/projects/claude-code-tresor/agent_validation_v2_results.json" - with open(output_file, 'w') as f: - json.dump({ - 'summary': summary, - 'detailed_results': validator.results - }, f, indent=2) - - print(f"\nπŸ’Ύ Detailed results saved to: {output_file}\n") - - -if __name__ == "__main__": - main() diff --git a/commands/workflow/add-to-todos/add-to-todos.md b/commands/workflow/add-to-todos/add-to-todos.md new file mode 100644 index 0000000..27adf16 --- /dev/null +++ b/commands/workflow/add-to-todos/add-to-todos.md @@ -0,0 +1,56 @@ +--- +name: add-to-todos +description: Add todo item to TO-DOS.md with context from conversation +argument-hint: (optional - infers from conversation if omitted) +allowed-tools: [Read, Edit, Write, Glob] +model: inherit +enabled: true +--- + +# Add Todo Item + +## Context + +- Current timestamp: !`date "+%Y-%m-%d %H:%M"` + +## Instructions + +1. Read TO-DOS.md in the working directory (create with Write tool if it doesn't exist) + +2. Check for duplicates: + - Extract key concept/action from the new todo + - Search existing todos for similar titles or overlapping scope + - If found, ask user: "A similar todo already exists: [title]. Would you like to:\n\n1. Skip adding (keep existing)\n2. Replace existing with new version\n3. Add anyway as separate item\n\nReply with the number of your choice." + - Wait for user response before proceeding + +3. Extract todo content: + - **With $ARGUMENTS**: Use as the focus/title for the todo and context heading + - **Without $ARGUMENTS**: Analyze recent conversation to extract: + - Specific problem or task discussed + - Relevant file paths that need attention + - Technical details (line numbers, error messages, conflicting specifications) + - Root cause if identified + +4. Append new section to bottom of file: + - **Heading**: `## Brief Context Title - YYYY-MM-DD HH:MM` (3-8 word title, current timestamp) + - **Todo format**: `- **[Action verb] [Component]** - [Brief description]. **Problem:** [What's wrong/why needed]. **Files:** [Comma-separated paths with line numbers]. **Solution:** [Approach hints or constraints, if applicable].` + - **Required fields**: Problem and Files (with line numbers like `path/to/file.ts:123-145`) + - **Optional field**: Solution + - Make each section self-contained for future Claude to understand weeks later + - Use simple list items (not checkboxes) - todos are removed when work begins + +5. Confirm and offer to continue with original work: + - Identify what the user was working on before `/add-to-todos` was called + - Confirm the todo was saved: "βœ“ Saved to todos." + - Ask if they want to continue with the original work: "Would you like to continue with [original task]?" + - Wait for user response + +## Format Example + +```markdown +## Add Todo Command Improvements - 2025-11-15 14:23 + +- **Add structured format to add-to-todos** - Standardize todo entries with Problem/Files/Solution pattern. **Problem:** Current todos lack consistent structure, making it hard for Claude to have enough context when revisiting tasks later. **Files:** `commands/add-to-todos.md:22-29`. **Solution:** Use inline bold labels with required Problem and Files fields, optional Solution field. + +- **Create check-todos command** - Build companion command to list and select todos. **Problem:** Need workflow to review outstanding todos and load context for selected item. **Files:** `commands/check-todos.md` (new), `TO-DOS.md` (reads from). **Solution:** Parse markdown list, display numbered list, accept selection to load full context and remove item. +``` diff --git a/commands/workflow/check-todos/check-todos.md b/commands/workflow/check-todos/check-todos.md new file mode 100644 index 0000000..d1ec4a1 --- /dev/null +++ b/commands/workflow/check-todos/check-todos.md @@ -0,0 +1,73 @@ +--- +name: check-todos +description: List outstanding todos and select one to work on +argument-hint: (no arguments) +allowed-tools: [Read, Edit, Glob] +model: inherit +enabled: true +--- + +# Check Todos + +## Instructions + +1. Read TO-DOS.md in the working directory (if doesn't exist, say "No outstanding todos" and exit) + +2. Parse and display todos: + - Extract all list items starting with `- **` (active todos) + - If none exist, say "No outstanding todos" and exit + - Display compact numbered list showing: + - Number (for selection) + - Bold title only (part between `**` markers) + - Date from h2 heading above it + - Prompt: "Reply with the number of the todo you'd like to work on." + - Wait for user to reply with a number + +3. Load full context for selected todo: + - Display complete line with all fields (Problem, Files, Solution) + - Display h2 heading (topic + date) for additional context + - Read and briefly summarize relevant files mentioned + +4. Check for established workflows and Tresor agents: + - Read CLAUDE.md (if exists) to understand project-specific workflows and rules + - **Detect Tresor's agent ecosystem**: + - Scan `subagents/` directory for 141 specialized agents + - Match todo file paths to Tresor categories: + * `subagents/engineering/` or `src/`, `api/`, `backend/` β†’ Engineering agents + * `subagents/design/` or `ui/`, `components/`, `design/` β†’ Design agents + * `skills/` β†’ Skill development work + * `commands/` β†’ Command development work + * `agents/` β†’ Core agent work + - **Suggest specific Tresor agents** based on todo content and file paths: + * Database-related β†’ @database-optimizer, @sql-pro + * API/Backend β†’ @backend-architect, @api-documenter + * Frontend/UI β†’ @frontend-developer, @ui-designer + * Performance β†’ @performance-tuner + * Security β†’ @security-auditor + * Testing β†’ @test-engineer + * See `subagents/AGENT-INDEX.md` for all 141 agents + - Look for `.claude/skills/` directory for skill-based workflows + - Check CLAUDE.md for explicit workflow requirements + +5. Present action options to user: + - **If Tresor agent match found**: "This looks like [domain] work. Would you like to:\n\n1. Invoke @{tresor-agent} and start\n2. Invoke {skill-name} skill (if applicable)\n3. Work on it directly\n4. Brainstorm approach first\n5. Put it back and browse other todos\n\nReply with the number of your choice." + - **If matching skill/workflow found (but no Tresor agent)**: "This looks like [domain] work. Would you like to:\n\n1. Invoke [skill-name] skill and start\n2. Work on it directly\n3. Brainstorm approach first\n4. Put it back and browse other todos\n\nReply with the number of your choice." + - **If no workflow match**: "Would you like to:\n\n1. Start working on it\n2. Brainstorm approach first\n3. Put it back and browse other todos\n\nReply with the number of your choice." + - Wait for user response + +6. Handle user choice: + - **Option "Invoke Tresor agent" or "Invoke skill" or "Start working"**: Remove todo from TO-DOS.md (and h2 heading if section becomes empty), then begin work (invoke Tresor agent or skill if applicable, or proceed directly) + - **Option "Brainstorm approach"**: Keep todo in file, could invoke @systems-architect or @{relevant-agent} for planning assistance + - **Option "Put it back"**: Keep todo in file, return to step 2 to display the full list again + +## Display Format + +``` +Outstanding Todos: + +1. Add structured format to add-to-todos (2025-11-15 14:23) +2. Create check-todos command (2025-11-15 14:23) +3. Fix cookie-extractor MCP workflow (2025-11-14 09:15) + +Reply with the number of the todo you'd like to work on. +``` diff --git a/commands/workflow/create-prompt/create-prompt.md b/commands/workflow/create-prompt/create-prompt.md new file mode 100644 index 0000000..bf80fb4 --- /dev/null +++ b/commands/workflow/create-prompt/create-prompt.md @@ -0,0 +1,413 @@ +--- +name: create-prompt +description: Expert prompt engineer that creates optimized, XML-structured prompts with intelligent depth selection +argument-hint: [task description] +allowed-tools: Task, Read, Write, Bash, Glob +model: inherit +enabled: true +--- + +# Prompt Engineer + +You are an expert senior prompt engineer for Claude Code with years of experience creating world-class prompting techniques, specialized in crafting optimal prompts using XML tag structuring and guidelines and best practices. Your goal is to create highly effective prompts that get things done accurately and efficiently. + +## Claude Code Tresor Integration + +This command is part of the TΓ‚CHES workflow framework, integrated with Claude Code Tresor's ecosystem of 141 agents, skills, and standards. + +**Always Reference Tresor Project Standards**: +- Read `CLAUDE.md` (if exists) for project-specific communication standards and workflows +- Read `productContext.md` (if exists) for tech stack and architectural decisions +- Read `projectbrief.md` (if exists) for project vision and conventions +- Follow Tresor's principles: Absolute honesty, Zero fluff, Maintainability first, Anti-overengineering + +**Suggest Tresor Agents in Generated Prompts**: + +When generating prompts, recommend appropriate Tresor agents based on task type: +- **Architecture/Design**: @systems-architect, @backend-architect, @cloud-architect +- **Security Review**: @security-auditor, @security-threat-analyst +- **Configuration**: @config-safety-reviewer +- **Testing**: @test-engineer, @qa-test-engineer +- **Performance**: @performance-tuner, @database-optimizer +- **Refactoring**: @refactor-expert +- **Documentation**: @docs-writer +- **Language-Specific**: @python-pro, @typescript-pro, @java-pro, etc. +- **See** `subagents/AGENT-INDEX.md` for complete list of 141 agents + +**Tresor-Specific Constraints to Include**: +- Code limits: Maximum 300 lines per file (split larger files) +- File economy: Edit existing files instead of creating new ones when possible +- Maintainability: Simple, direct solutions over elaborate architectures +- Documentation: Bug fixes and solution methodology should be documented + +## User Request + +The user wants you to create a prompt for: $ARGUMENTS + +## Core Process + + +Analyze the user's request to determine: +1. **Clarity check (Golden Rule)**: Would a colleague with minimal context understand what's being asked? + - Are there ambiguous terms that could mean multiple things? + - Would examples help clarify the desired outcome? + - Are there missing details about constraints or requirements? + - Is the context clear (what it's for, who it's for, why it matters)? + +2. **Task complexity**: Is this simple (single file, clear goal) or complex (multi-file, research needed, multiple steps)? + +3. **Single vs Multiple Prompts**: Should this be one prompt or broken into multiple? + + - Single prompt: Task has clear dependencies, single cohesive goal, sequential steps + - Multiple prompts: Task has independent sub-tasks that could be parallelized or done separately + - Consider: Can parts be done simultaneously? Are there natural boundaries between sub-tasks? + +4. **Execution Strategy** (if multiple prompts): + + - **Parallel**: Sub-tasks are independent, no shared file modifications, can run simultaneously + - **Sequential**: Sub-tasks have dependencies, one must finish before next starts + - Look for: Shared files (sequential), independent modules (parallel), data flow between tasks (sequential) + +5. **Reasoning depth needed**: + + - Simple/straightforward β†’ Standard prompt + - Complex reasoning, multiple constraints, or optimization β†’ Include extended thinking triggers (phrases like "thoroughly analyze", "consider multiple approaches", "deeply consider") + +6. **Project context needs**: Do I need to examine the codebase structure, dependencies, or existing patterns? + +7. **Optimal prompt depth**: Should this be concise or comprehensive based on the task? + +8. **Required tools**: What file references, bash commands, or MCP servers might be needed? + +9. **Verification needs**: Does this task warrant built-in error checking or validation steps? + +10. **Prompt quality needs**: + +- Does this need explicit "go beyond basics" encouragement for ambitious/creative work? +- Should generated prompts explain WHY constraints matter, not just what they are? +- Do examples need to demonstrate desired behavior while avoiding undesired patterns? + + +## Interaction Flow + +### Step 1: Clarification (if needed) + +If the request is ambiguous or could benefit from more detail, ask targeted questions: + +"I'll create an optimized prompt for that. First, let me clarify a few things: + +1. [Specific question about ambiguous aspect] +2. [Question about constraints or requirements] +3. What is this for? What will the output be used for? +4. Who is the intended audience/user? +5. Can you provide an example of [specific aspect]? + +Please answer any that apply, or just say 'continue' if I have enough information." + +### Step 2: Confirmation + +Once you have enough information, confirm your understanding: + +"I'll create a prompt for: [brief summary of task] + +This will be a [simple/moderate/complex] prompt that [key approach]. + +Should I proceed, or would you like to adjust anything?" + +### Step 3: Generate and Save + +Create the prompt(s) and save to the prompts folder. + +**For single prompts:** + +- Generate one prompt file following the patterns below +- Save as `./prompts/[number]-[name].md` + +**For multiple prompts:** + +- Determine how many prompts are needed (typically 2-4) +- Generate each prompt with clear, focused objectives +- Save sequentially: `./prompts/[N]-[name].md`, `./prompts/[N+1]-[name].md`, etc. +- Each prompt should be self-contained and executable independently + +## Prompt Construction Rules + +### Always Include + +- XML tag structure with clear, semantic tags like ` +`, ``, ``, ``, `` +- **Contextual information**: Why this task matters, what it's for, who will use it, end goal +- **Explicit, specific instructions**: Tell Claude exactly what to do with clear, unambiguous language +- **Sequential steps**: Use numbered lists for clarity +- File output instructions using relative paths: `./filename` or `./subfolder/filename` +- Reference to reading the CLAUDE.md for project conventions +- Explicit success criteria within `` or `` tags + +### Conditionally Include (based on analysis) + +- **Extended thinking triggers** for complex reasoning: + - Phrases like: "thoroughly analyze", "consider multiple approaches", "deeply consider", "explore multiple solutions" + - Don't use for simple, straightforward tasks +- **"Go beyond basics" language** for creative/ambitious tasks: + - Example: "Include as many relevant features as possible. Go beyond the basics to create a fully-featured implementation." +- **WHY explanations** for constraints and requirements: + - In generated prompts, explain WHY constraints matter, not just what they are + - Example: Instead of "Never use ellipses", write "Your response will be read aloud, so never use ellipses since text-to-speech can't pronounce them" +- **Parallel tool calling** for agentic/multi-step workflows: + - "For maximum efficiency, whenever you need to perform multiple independent operations, invoke all relevant tools simultaneously rather than sequentially." +- **Reflection after tool use** for complex agentic tasks: + - "After receiving tool results, carefully reflect on their quality and determine optimal next steps before proceeding." +- `` tags when codebase exploration is needed +- `` tags for tasks requiring verification +- `` tags for complex or ambiguous requirements - ensure examples demonstrate desired behavior and avoid undesired patterns +- Bash command execution with "!" prefix when system state matters +- MCP server references when specifically requested or obviously beneficial + +### Output Format + +1. Generate prompt content with XML structure +2. Save to: `./prompts/[number]-[descriptive-name].md` + - Number format: 001, 002, 003, etc. (check existing files in ./prompts/ to determine next number) + - Name format: lowercase, hyphen-separated, max 5 words describing the task + - Example: `./prompts/001-implement-user-authentication.md` +3. File should contain ONLY the prompt, no explanations or metadata + +## Prompt Patterns + +### For Coding Tasks + +```xml + +[Clear statement of what needs to be built/fixed/refactored] +Explain the end goal and why this matters. + + + +[Project type, tech stack, relevant constraints] +[Who will use this, what it's for] +@[relevant files to examine] + + + +[Specific functional requirements] +[Performance or quality requirements] +Be explicit about what Claude should do. + + + +[Any specific approaches or patterns to follow] +[What to avoid and WHY - explain the reasoning behind constraints] + + + +Create/modify files with relative paths: +- `./path/to/file.ext` - [what this file should contain] + + + +Before declaring complete, verify your work: +- [Specific test or check to perform] +- [How to confirm the solution works] + + + +[Clear, measurable criteria for success] + +``` + +### For Analysis Tasks + +```xml + +[What needs to be analyzed and why] +[What the analysis will be used for] + + + +@[files or data to analyze] +![relevant commands to gather data] + + + +[Specific metrics or patterns to identify] +[Depth of analysis needed - use "thoroughly analyze" for complex tasks] +[Any comparisons or benchmarks] + + + +[How results should be structured] +Save analysis to: `./analyses/[descriptive-name].md` + + + +[How to validate the analysis is complete and accurate] + +``` + +### For Research Tasks + +```xml + +[What information needs to be gathered] +[Intended use of the research] +For complex research, include: "Thoroughly explore multiple sources and consider various perspectives" + + + +[Boundaries of the research] +[Sources to prioritize or avoid] +[Time period or version constraints] + + + +[Format of research output] +[Level of detail needed] +Save findings to: `./research/[topic].md` + + + +[How to assess quality/relevance of sources] +[Key questions that must be answered] + + + +Before completing, verify: +- [All key questions are answered] +- [Sources are credible and relevant] + +``` + +## Intelligence Rules + +1. **Clarity First (Golden Rule)**: If anything is unclear, ask before proceeding. A few clarifying questions save time. Test: Would a colleague with minimal context understand this prompt? + +2. **Context is Critical**: Always include WHY the task matters, WHO it's for, and WHAT it will be used for in generated prompts. + +3. **Be Explicit**: Generate prompts with explicit, specific instructions. For ambitious results, include "go beyond the basics." For specific formats, state exactly what format is needed. + +4. **Scope Assessment**: Simple tasks get concise prompts. Complex tasks get comprehensive structure with extended thinking triggers. + +5. **Context Loading**: Only request file reading when the task explicitly requires understanding existing code. Use patterns like: + + - "Examine @package.json for dependencies" (when adding new packages) + - "Review @src/database/\* for schema" (when modifying data layer) + - Skip file reading for greenfield features + +6. **Precision vs Brevity**: Default to precision. A longer, clear prompt beats a short, ambiguous one. + +7. **Tool Integration**: + + - Include MCP servers only when explicitly mentioned or obviously needed + - Use bash commands for environment checking when state matters + - File references should be specific, not broad wildcards + - For multi-step agentic tasks, include parallel tool calling guidance + +8. **Output Clarity**: Every prompt must specify exactly where to save outputs using relative paths + +9. **Verification Always**: Every prompt should include clear success criteria and verification steps + + +After saving the prompt(s), present this decision tree to the user: + +--- + +**Prompt(s) created successfully!** + + +If you created ONE prompt (e.g., `./prompts/005-implement-feature.md`): + + +βœ“ Saved prompt to ./prompts/005-implement-feature.md + +What's next? + +1. Run prompt now +2. Review/edit prompt first +3. Save for later +4. Other + +Choose (1-4): \_ + + + +If user chooses #1, invoke via SlashCommand tool: `/run-prompt 005` + + + + +If you created MULTIPLE prompts that CAN run in parallel (e.g., independent modules, no shared files): + + +βœ“ Saved prompts: + - ./prompts/005-implement-auth.md + - ./prompts/006-implement-api.md + - ./prompts/007-implement-ui.md + +Execution strategy: These prompts can run in PARALLEL (independent tasks, no shared files) + +What's next? + +1. Run all prompts in parallel now (launches 3 sub-agents simultaneously) +2. Run prompts sequentially instead +3. Review/edit prompts first +4. Other + +Choose (1-4): \_ + + + +If user chooses #1, invoke via SlashCommand tool: `/run-prompt 005 006 007 --parallel` +If user chooses #2, invoke via SlashCommand tool: `/run-prompt 005 006 007 --sequential` + + + + +If you created MULTIPLE prompts that MUST run sequentially (e.g., dependencies, shared files): + + +βœ“ Saved prompts: + - ./prompts/005-setup-database.md + - ./prompts/006-create-migrations.md + - ./prompts/007-seed-data.md + +Execution strategy: These prompts must run SEQUENTIALLY (dependencies: 005 β†’ 006 β†’ 007) + +What's next? + +1. Run prompts sequentially now (one completes before next starts) +2. Run first prompt only (005-setup-database.md) +3. Review/edit prompts first +4. Other + +Choose (1-4): \_ + + + +If user chooses #1, invoke via SlashCommand tool: `/run-prompt 005 006 007 --sequential` +If user chooses #2, invoke via SlashCommand tool: `/run-prompt 005` + + + +--- + + + +## Meta Instructions + +- First, check if clarification is needed before generating the prompt +- Read `!ls ./prompts/ 2>/dev/null | sort -V | tail -1` to determine the next number in sequence +- If ./prompts/ doesn't exist, create it with `!mkdir -p ./prompts/` before saving +- Keep prompt filenames descriptive but concise +- Adapt the XML structure to fit the task - not every tag is needed every time +- Consider the user's working directory as the root for all relative paths +- Each prompt file should contain ONLY the prompt content, no preamble or explanation +- After saving, present the appropriate decision tree based on what was created +- Use the SlashCommand tool to invoke /run-prompt when user makes their choice + +## Examples of When to Ask for Clarification + +- "Build a dashboard" β†’ Ask: "What kind of dashboard? Admin, analytics, user-facing? What data should it display? Who will use it?" +- "Fix the bug" β†’ Ask: "Can you describe the bug? What's the expected vs actual behavior? Where does it occur?" +- "Add authentication" β†’ Ask: "What type? JWT, OAuth, session-based? Which providers? What's the security context?" +- "Optimize performance" β†’ Ask: "What specific performance issues? Load time, memory, database queries? What are the current metrics?" +- "Create a report" β†’ Ask: "Who is this report for? What will they do with it? What format do they need?" diff --git a/commands/workflow/run-prompt/run-prompt.md b/commands/workflow/run-prompt/run-prompt.md new file mode 100644 index 0000000..cff20cd --- /dev/null +++ b/commands/workflow/run-prompt/run-prompt.md @@ -0,0 +1,152 @@ +--- +name: run-prompt +description: Delegate one or more prompts to fresh sub-task contexts with parallel or sequential execution +argument-hint: [--parallel|--sequential] +allowed-tools: Task, Read, Bash, Glob +model: inherit +enabled: true +--- + + +Execute one or more prompts from `./prompts/` as delegated sub-tasks with fresh context. Supports single prompt execution, parallel execution of multiple independent prompts, and sequential execution of dependent prompts. + +**Tresor Integration**: Can invoke prompts that use Tresor's 141 agents (@systems-architect, @security-auditor, etc.) and support Tresor's subagent types (Explore, Plan, general-purpose). + + + +The user will specify which prompt(s) to run via $ARGUMENTS, which can be: + +**Single prompt:** + +- Empty (no arguments): Run the most recently created prompt (default behavior) +- A prompt number (e.g., "001", "5", "42") +- A partial filename (e.g., "user-auth", "dashboard") + +**Multiple prompts:** + +- Multiple numbers (e.g., "005 006 007") +- With execution flag: "005 006 007 --parallel" or "005 006 007 --sequential" +- If no flag specified with multiple prompts, default to --sequential for safety + + + + +Parse $ARGUMENTS to extract: +- Prompt numbers/names (all arguments that are not flags) +- Execution strategy flag (--parallel or --sequential) + + +- "005" β†’ Single prompt: 005 +- "005 006 007" β†’ Multiple prompts: [005, 006, 007], strategy: sequential (default) +- "005 006 007 --parallel" β†’ Multiple prompts: [005, 006, 007], strategy: parallel +- "005 006 007 --sequential" β†’ Multiple prompts: [005, 006, 007], strategy: sequential + + + + +For each prompt number/name: + +- If empty or "last": Find with `!ls -t ./prompts/*.md | head -1` +- If a number: Find file matching that zero-padded number (e.g., "5" matches "005-_.md", "42" matches "042-_.md") +- If text: Find files containing that string in the filename + + + +- If exactly one match found: Use that file +- If multiple matches found: List them and ask user to choose +- If no matches found: Report error and list available prompts + + + + + + +1. Read the complete contents of the prompt file +2. Delegate as sub-task using Task tool with subagent_type="general-purpose" +3. Wait for completion +4. Archive prompt to `./prompts/completed/` with metadata +5. Return results + + + + +1. Read all prompt files +2. **Spawn all Task tools in a SINGLE MESSAGE** (this is critical for parallel execution): + + Use Task tool for prompt 005 + Use Task tool for prompt 006 + Use Task tool for prompt 007 + (All in one message with multiple tool calls) + +3. Wait for ALL to complete +4. Archive all prompts with metadata +5. Return consolidated results + + + + +1. Read first prompt file +2. Spawn Task tool for first prompt +3. Wait for completion +4. Archive first prompt +5. Read second prompt file +6. Spawn Task tool for second prompt +7. Wait for completion +8. Archive second prompt +9. Repeat for remaining prompts +10. Return consolidated results + + + + + +By delegating to a sub-task, the actual implementation work happens in fresh context while the main conversation stays lean for orchestration and iteration. + + + + +βœ“ Executed: ./prompts/005-implement-feature.md +βœ“ Archived to: ./prompts/completed/005-implement-feature.md + + +[Summary of what the sub-task accomplished] + + + + +βœ“ Executed in PARALLEL: + +- ./prompts/005-implement-auth.md +- ./prompts/006-implement-api.md +- ./prompts/007-implement-ui.md + +βœ“ All archived to ./prompts/completed/ + + +[Consolidated summary of all sub-task results] + + + + +βœ“ Executed SEQUENTIALLY: + +1. ./prompts/005-setup-database.md β†’ Success +2. ./prompts/006-create-migrations.md β†’ Success +3. ./prompts/007-seed-data.md β†’ Success + +βœ“ All archived to ./prompts/completed/ + + +[Consolidated summary showing progression through each step] + + + + + + +- For parallel execution: ALL Task tool calls MUST be in a single message +- For sequential execution: Wait for each Task to complete before starting next +- Archive prompts only after successful completion +- If any prompt fails, stop sequential execution and report error +- Provide clear, consolidated results for multiple prompt execution + diff --git a/commands/workflow/whats-next/whats-next.md b/commands/workflow/whats-next/whats-next.md new file mode 100644 index 0000000..906db0e --- /dev/null +++ b/commands/workflow/whats-next/whats-next.md @@ -0,0 +1,123 @@ +--- +name: whats-next +description: Analyze the current conversation and create a handoff document for continuing this work in a fresh context +argument-hint: (no arguments) +allowed-tools: [Read, Write, Bash, WebSearch, WebFetch, Glob] +model: inherit +enabled: true +--- + +Create a comprehensive, detailed handoff document that captures all context from the current conversation. This allows continuing the work in a fresh context with complete precision. + +## Claude Code Tresor Integration + +This command complements Tresor's memory bank system for comprehensive context management: + +**Tresor Memory Bank** (Long-term project context): +- `projectbrief.md` - Project vision, goals, component taxonomy +- `productContext.md` - Tech stack, architectural decisions, conventions +- `activeContext.md` - Current state, priorities, next steps (updated regularly) + +**TΓ‚CHES Context Handoff** (Session-specific): +- `whats-next.md` - Detailed session handoff (created by this command) + +**Usage Pattern**: +- Use Tresor's `activeContext.md` for ongoing daily/weekly work tracking +- Use `/whats-next` for complex task pauses or when context is full +- Reference both in next session for complete continuity + +## Instructions + +**PRIORITY: Comprehensive detail and precision over brevity.** The goal is to enable someone (or a fresh Claude instance) to pick up exactly where you left off with zero information loss. + +Adapt the level of detail to the task type (coding, research, analysis, writing, configuration, etc.) but maintain comprehensive coverage: + +1. **Original Task**: Identify what was initially requested (not new scope or side tasks) + +2. **Work Completed**: Document everything accomplished in detail + - All artifacts created, modified, or analyzed (files, documents, research findings, etc.) + - Specific changes made (code with line numbers, content written, data analyzed, etc.) + - Actions taken (commands run, APIs called, searches performed, tools used, etc.) + - Findings discovered (insights, patterns, answers, data points, etc.) + - Decisions made and the reasoning behind them + +3. **Work Remaining**: Specify exactly what still needs to be done + - Break down remaining work into specific, actionable steps + - Include precise locations, references, or targets (file paths, URLs, data sources, etc.) + - Note dependencies, prerequisites, or ordering requirements + - Specify validation or verification steps needed + +4. **Attempted Approaches**: Capture everything tried, including failures + - Approaches that didn't work and why they failed + - Errors encountered, blockers hit, or limitations discovered + - Dead ends to avoid repeating + - Alternative approaches considered but not pursued + +5. **Critical Context**: Preserve all essential knowledge + - Key decisions and trade-offs considered + - Constraints, requirements, or boundaries + - Important discoveries, gotchas, edge cases, or non-obvious behaviors + - Relevant environment, configuration, or setup details + - Assumptions made that need validation + - References to documentation, sources, or resources consulted + +6. **Current State**: Document the exact current state + - Status of deliverables (complete, in-progress, not started) + - What's committed, saved, or finalized vs. what's temporary or draft + - Any temporary changes, workarounds, or open questions + - Current position in the workflow or process + +Write to `whats-next.md` in the current working directory using the format below. + +## Output Format + +```xml + +[The specific task that was initially requested - be precise about scope] + + + +[Comprehensive detail of everything accomplished: +- Artifacts created/modified/analyzed (with specific references) +- Specific changes, additions, or findings (with details and locations) +- Actions taken (commands, searches, API calls, tool usage, etc.) +- Key discoveries or insights +- Decisions made and reasoning +- Side tasks completed] + + + +[Detailed breakdown of what needs to be done: +- Specific tasks with precise locations or references +- Exact targets to create, modify, or analyze +- Dependencies and ordering +- Validation or verification steps needed] + + + +[Everything tried, including failures: +- Approaches that didn't work and why +- Errors, blockers, or limitations encountered +- Dead ends to avoid +- Alternative approaches considered but not pursued] + + + +[All essential knowledge for continuing: +- Key decisions and trade-offs +- Constraints, requirements, or boundaries +- Important discoveries, gotcas, or edge cases +- Environment, configuration, or setup details +- Assumptions requiring validation +- References to documentation, sources, or resources] + + + +[Exact state of the work: +- Status of deliverables (complete/in-progress/not started) +- What's finalized vs. what's temporary or draft +- Temporary changes or workarounds in place +- Current position in workflow or process +- Any open questions or pending decisions] + +``` diff --git a/docs/SLASH-COMMANDS-ENHANCEMENT-PLAN.md b/docs/SLASH-COMMANDS-ENHANCEMENT-PLAN.md new file mode 100644 index 0000000..18f53b8 --- /dev/null +++ b/docs/SLASH-COMMANDS-ENHANCEMENT-PLAN.md @@ -0,0 +1,875 @@ +# Slash Commands Enhancement Plan + +> **Intelligent Agent Orchestration for Claude Code Tresor Commands** +> +> **Version**: 3.0 Vision | **Created**: November 18, 2025 +> **Status**: Ready for Implementation + +--- + +## Table of Contents + +1. [Executive Summary](#executive-summary) +2. [Current State Analysis](#current-state-analysis) +3. [Enhancement Strategy](#enhancement-strategy) +4. [Existing Command Enhancements](#existing-command-enhancements) +5. [New Command Proposals](#new-command-proposals) +6. [Intelligent Agent Selection](#intelligent-agent-selection) +7. [Implementation Roadmap](#implementation-roadmap) + +--- + +## Executive Summary + +### Vision + +Transform slash commands from simple orchestrators into **intelligent workflow engines** that automatically select and coordinate the optimal agents from our 141-agent ecosystem based on context, file types, and project structure. + +### Current State + +**Commands**: 9 total (4 core + 5 TΓ‚CHES) +- Core: /scaffold, /review, /test-gen, /docs-gen +- TΓ‚CHES: /create-prompt, /run-prompt, /add-to-todos, /check-todos, /whats-next + +**Agent Integration**: Basic (manual agent mention) +- /review uses 4 agents explicitly +- Other commands mention 1-2 agents +- No intelligent selection + +### Target State + +**Commands**: 15-20 comprehensive commands +**Intelligence**: Context-aware agent selection +**Orchestration**: Multi-phase parallel/sequential workflows +**Quality Gates**: Automated validation and blocking +**Integration**: Full ecosystem (141 agents, 8+ skills) + +### Impact + +- **50-70% faster** development workflows +- **Automated quality** enforcement +- **Intelligent guidance** (right agent every time) +- **Production safety** (validation gates) + +--- + +## Current State Analysis + +### Existing Commands + +#### 1. /scaffold (Development) + +**Current Capability**: +- Generates project boilerplate +- Mentions @systems-architect + +**Current Limitations**: +- No intelligent framework detection +- Single agent coordination +- No tech stack awareness + +**Agent Ecosystem Underutilized**: +- 16 language specialists available (only mentions 1) +- 54 engineering agents available (uses 1) +- No design/architecture integration + +--- + +#### 2. /review (Workflow) + +**Current Capability**: +- Code review automation +- Uses 4 agents: config-safety-reviewer, security-auditor, systems-architect, performance-tuner + +**Current Limitations**: +- Static agent selection (always same 4) +- No file-type awareness +- No parallel execution +- No quality gates + +**Improvement Potential**: HIGHEST +- Could auto-select from 141 agents based on files +- Could run specialized reviews (Python files β†’ @python-pro) +- Could enforce quality gates +- Could execute agents in parallel (3x faster) + +--- + +#### 3. /test-gen (Testing) + +**Current Capability**: +- Generates test suites +- Mentions @test-engineer + +**Current Limitations**: +- No coverage gap analysis +- No framework auto-detection +- Single agent (7 testing agents available) + +**Improvement Potential**: +- Auto-detect Jest/pytest/JUnit from project +- Use @qa-test-engineer for adversarial tests +- Use @api-tester for API tests +- Use @performance-benchmarker for load tests + +--- + +#### 4. /docs-gen (Documentation) + +**Current Capability**: +- Generates documentation +- Mentions @docs-writer + +**Current Limitations**: +- No documentation drift detection +- No audience-specific docs +- Single agent (3 documentation agents available) + +**Improvement Potential**: +- Use @tutorial-engineer for tutorials +- Use @api-documenter for API specs +- Use @docs-architect for structure +- Auto-detect what docs are needed + +--- + +## Enhancement Strategy + +### Core Principles + +1. **Intelligent Over Manual**: Commands auto-select agents, not users +2. **Context-Aware**: File types, paths, content determine agents +3. **Parallel When Possible**: Independent agents run simultaneously +4. **Quality Gates**: Enforce standards automatically +5. **Actionable Output**: Clear, prioritized recommendations + +### Enhancement Levels + +**Level 1 - Intelligent Selection** (All commands) +- Auto-detect file types β†’ Select language specialists +- Scan file paths β†’ Select domain specialists +- Analyze content β†’ Select task-specific agents + +**Level 2 - Multi-Phase Orchestration** (Complex commands) +- Phase 1: Quick checks (parallel) +- Phase 2: Deep analysis (parallel where possible) +- Phase 3: Synthesis and recommendations + +**Level 3 - Quality Gates** (Validation commands) +- Blocking gates (critical security, config safety) +- Warning gates (performance, coverage) +- Info gates (style, docs) + +**Level 4 - Adaptive Workflows** (Advanced commands) +- Conditional agent selection based on findings +- Iterative refinement +- Dynamic prioritization + +--- + +## Existing Command Enhancements + +### 1. /review Enhancement (CRITICAL PRIORITY) + +#### Current Flow +``` +/review β†’ Invoke 4 fixed agents β†’ Generate report +``` + +#### Enhanced Flow +``` +/review β†’ + Step 1: Analyze context + - git diff β†’ Identify changed files + - Detect file types (.py, .ts, .sql, etc.) + - Detect paths (api/, ui/, config/) + - Detect content (keywords: security, performance) + + Step 2: Intelligent agent selection + - Core 4 agents (always): config-safety-reviewer, security-auditor + - Language specialists (auto): @python-pro if .py files + - Domain specialists (auto): @backend-architect if api/ changes + - Quality specialists (conditional): @refactor-expert if code smells + + Step 3: Parallel execution + - Phase 1 (quick): Skills + language specialists + - Phase 2 (deep): Core reviewers + domain specialists + - Phase 3 (architecture): @systems-architect if major changes + + Step 4: Quality gates + - Blocking: Critical security, unsafe configs + - Warning: Performance regression, coverage drop + - Info: Style issues, doc gaps + + Step 5: Consolidated report + - Grouped by severity + - Agent attribution + - Actionable fixes +``` + +#### New Arguments + +```bash +/review [options] + +--auto-agents (default: true) + # Automatically select agents based on changed files + +--quality-gates [blocking|warning|all|none] + # Which quality gates to enforce + +--parallel (default: true) + # Run independent agents in parallel + +--agent-override "agent1,agent2,..." + # Override auto-selection + +--fail-on [critical|high|medium|low] + # Exit code based on severity + +--interactive + # Show agent selection, allow customization + +--explain + # Show WHY each agent was selected +``` + +#### Example Output + +```bash +/review --explain + +πŸ” Analyzing changes... +Found: 8 files changed (3 .py, 2 .ts, 1 .sql, 2 config) + +πŸ€– Agent Selection: + +ALWAYS (Core Review): +βœ“ @config-safety-reviewer - Config changes detected (CRITICAL) +βœ“ @security-auditor - Security validation + +AUTO-SELECTED (File-Based): +βœ“ @python-pro - 3 Python files changed +βœ“ @typescript-pro - 2 TypeScript files changed +βœ“ @sql-pro - 1 SQL migration file + +AUTO-SELECTED (Path-Based): +βœ“ @backend-architect - Changes in api/ directory +βœ“ @database-optimizer - Changes in database/migrations/ + +Total: 7 agents in 2 parallel phases (est. 30-40s) + +Proceed? (y/n/customize): +``` + +**Implementation Tasks**: +- [ ] Add file type detection logic +- [ ] Add path pattern matching +- [ ] Implement parallel agent execution +- [ ] Add quality gates system +- [ ] Create consolidated report generator +- [ ] Add --explain mode + +**Estimated Effort**: 8-10 hours +**Impact**: 3x faster reviews, 50% fewer bugs + +--- + +### 2. /scaffold Enhancement (HIGH PRIORITY) + +#### Current Flow +``` +/scaffold β†’ Generate boilerplate +``` + +#### Enhanced Flow +``` +/scaffold [options] β†’ + Step 1: Project context detection + - Scan package.json/requirements.txt + - Detect: React, Next.js, Express, Django, etc. + - Determine: Frontend, backend, fullstack + + Step 2: Multi-agent planning + - @systems-architect: Overall structure + - @[language]-pro: Language-specific patterns + - @frontend-developer OR @backend-architect: Specialized patterns + + Step 3: Generation with best practices + - Framework-specific boilerplate + - Testing infrastructure + - Documentation stubs + - CI/CD configuration + + Step 4: Skill activation confirmation + - code-reviewer will monitor + - test-generator will suggest tests + - api-documenter will document endpoints +``` + +#### New Arguments + +```bash +/scaffold [options] + +--intelligence-level [basic|smart|expert] + basic: Generate only (no agent consultation) + smart: Consult specialist agents (default) + expert: Full multi-agent architecture review + +--optimize-for [speed|maintainability|scalability] + Changes pattern recommendations + +--with-tests + Generate comprehensive test infrastructure + +--with-docker + Add Docker containerization + +--with-ci + Add CI/CD configuration + +--framework-detect + Auto-detect and match existing project patterns +``` + +#### Example + +```bash +/scaffold api users-service --intelligence-level expert --with-tests --with-docker + +Output: +βœ“ Detecting project context... +Detected: Node.js + TypeScript + Express + PostgreSQL + +βœ“ Consulting @systems-architect... +Recommended: Microservice pattern with clean architecture + +βœ“ Consulting @typescript-pro... +Patterns: Decorators, dependency injection, async/await + +βœ“ Consulting @backend-architect... +API Design: RESTful with OpenAPI spec + +Generating: +πŸ“ services/users/ +β”œβ”€β”€ src/ +β”‚ β”œβ”€β”€ controllers/ # Request handlers +β”‚ β”œβ”€β”€ services/ # Business logic +β”‚ β”œβ”€β”€ repositories/ # Data access +β”‚ β”œβ”€β”€ models/ # Data models +β”‚ └── middleware/ # Express middleware +β”œβ”€β”€ tests/ +β”‚ β”œβ”€β”€ unit/ +β”‚ β”œβ”€β”€ integration/ +β”‚ └── e2e/ +β”œβ”€β”€ Dockerfile +β”œβ”€β”€ docker-compose.yml +β”œβ”€β”€ .env.example +└── README.md + +βœ“ Generating tests with @test-engineer... +Created: 45 test cases (unit, integration, E2E) + +βœ“ Activating monitoring skills... +β†’ code-reviewer will validate generated code +β†’ test-generator will suggest additional tests +β†’ api-documenter will document endpoints + +Ready! Next steps: +1. Review generated code +2. Customize as needed +3. Run tests: npm test +``` + +**Implementation Tasks**: +- [ ] Add project detection logic +- [ ] Implement multi-agent planning workflow +- [ ] Add framework templates +- [ ] Create test infrastructure generation +- [ ] Add Docker/CI templates + +**Estimated Effort**: 6-8 hours +**Impact**: Professional project setup in minutes + +--- + +### 3. /test-gen Enhancement (HIGH PRIORITY) + +#### Enhanced Flow + +``` +/test-gen β†’ + Step 1: Code analysis + - Detect: Functions, methods, components + - Identify: Existing test coverage + - Calculate: Coverage gaps + + Step 2: Framework detection + - package.json β†’ Jest/Vitest + - requirements.txt β†’ pytest + - pom.xml β†’ JUnit + + Step 3: Multi-agent testing + - @test-engineer: Comprehensive test strategy + - @[language]-pro: Language-specific patterns + - @qa-test-engineer: Adversarial/edge cases + - @api-tester (if API): API-specific tests + + Step 4: Specialized tests + - @performance-benchmarker: Load tests + - @frontend-developer: Visual regression (if UI) +``` + +#### New Arguments + +```bash +/test-gen [options] + +--analyze-coverage + # Scan existing tests, identify gaps + +--test-types [unit|integration|e2e|all] + # Specific test types + +--adversarial + # Invoke @qa-test-engineer for edge cases + +--property-based + # Generate property-based tests + +--visual-regression + # Generate visual regression tests (UI components) + +--load-tests + # Generate load tests with @performance-benchmarker + +--coverage-target + # Target coverage (default: 90%) +``` + +**Implementation Tasks**: +- [ ] Add coverage gap analysis +- [ ] Add framework auto-detection +- [ ] Integrate multiple testing agents +- [ ] Add specialized test types + +**Estimated Effort**: 4-6 hours +**Impact**: 90%+ coverage automatically + +--- + +### 4. /docs-gen Enhancement (MEDIUM PRIORITY) + +#### Enhanced Flow + +``` +/docs-gen β†’ + Step 1: Documentation needs analysis + - Scan project structure + - Identify: API routes, CLI commands, components + - Detect: Missing/outdated docs + + Step 2: Multi-agent documentation + - @docs-writer: User guides + - @api-documenter: API specifications + - @tutorial-engineer: Tutorials + - @docs-architect: Documentation structure + + Step 3: Audience-specific generation + - Developers: Technical, API reference + - Users: Guides, tutorials + - Contributors: Setup, workflows +``` + +#### New Arguments + +```bash +/docs-gen [type] [options] + +--detect-drift + # Compare code vs docs, identify outdated sections + +--audience [developer|user|contributor|enterprise] + # Audience-specific documentation + +--diagrams + # Auto-generate architecture diagrams + +--interactive + # Create interactive docs (Docusaurus) + +--compliance [gdpr|hipaa|soc2] + # Add compliance docs (@compliance-officer-fs) +``` + +**Implementation Tasks**: +- [ ] Add documentation drift detection +- [ ] Add audience-specific templates +- [ ] Integrate architecture diagram generation +- [ ] Add compliance documentation + +**Estimated Effort**: 4-6 hours +**Impact**: Always-current documentation + +--- + +## New Command Proposals + +### Priority 1: Critical Commands (Week 1-2) + +#### /diagnose - Intelligent Debugging + +**Purpose**: Systematic root cause analysis with multi-agent debugging + +**Orchestration**: +1. @root-cause-analyzer (triage) +2. Conditional specialists (database/performance/security) +3. @test-engineer (regression tests) + +**Arguments**: +```bash +/diagnose [--logs path] [--production] [--trace] +``` + +**Use Case**: Production incidents, complex bugs + +**Estimated Effort**: 6-8 hours + +--- + +#### /secure - Security Audit + +**Purpose**: Comprehensive security audit and compliance validation + +**Orchestration**: +1. Security skills (parallel quick scan) +2. @security-auditor (deep audit) +3. @compliance-officer-fs (if compliance flag) + +**Arguments**: +```bash +/secure [api|auth|infrastructure|all] [--compliance gdpr|hipaa] +``` + +**Use Case**: Pre-deployment security, compliance audits + +**Estimated Effort**: 6-8 hours + +--- + +#### /pr-ready - Pre-Submission Validation + +**Purpose**: Ensure PR meets all standards before submission + +**Orchestration**: +1. Quick checks (skills) +2. /review with quality gates +3. PR materials generation +4. Changelog update + +**Arguments**: +```bash +/pr-ready [--auto-fix] [--strict] [--generate-tests] +``` + +**Use Case**: Before creating PR, ensure quality + +**Estimated Effort**: 4-6 hours + +--- + +### Priority 2: High Value (Week 3-4) + +#### /optimize - Performance Optimization + +**Orchestration**: +1. @performance-tuner (profiling) +2. Specialized optimizers (database/frontend/backend) +3. @performance-benchmarker (validation) + +**Arguments**: +```bash +/optimize [api|database|frontend|bundle|memory] [--profile] [--budget] +``` + +**Estimated Effort**: 6-8 hours + +--- + +#### /refactor - Intelligent Refactoring + +**Orchestration**: +1. @refactor-expert (code smell detection) +2. @systems-architect (patterns) +3. @test-engineer (test validation) + +**Arguments**: +```bash +/refactor [file|function|module] [--pattern solid|dry] [--safe-mode] +``` + +**Estimated Effort**: 6-8 hours + +--- + +#### /deploy-check - Deployment Validation + +**Orchestration**: +1. @config-safety-reviewer (config) +2. @security-auditor (security) +3. @deployment-engineer (deployment plan) + +**Arguments**: +```bash +/deploy-check [--breaking-changes] [--rollback-plan] +``` + +**Estimated Effort**: 5-7 hours + +--- + +### Priority 3: Medium Value (Week 5-6) + +#### /analyze - Codebase Analysis + +**Purpose**: Deep insights into architecture, quality, tech debt + +**Arguments**: +```bash +/analyze [architecture|quality|tech-debt] [--visualize] [--depth] +``` + +**Estimated Effort**: 6-8 hours + +--- + +#### /migrate - Technology Migration + +**Purpose**: Database/framework/language migration assistance + +**Arguments**: +```bash +/migrate [--plan-only] [--zero-downtime] +``` + +**Estimated Effort**: 8-10 hours + +--- + +#### /feature-plan - Feature Planning + +**Purpose**: End-to-end feature planning with multi-agent design + +**Arguments**: +```bash +/feature-plan [--fullstack] [--generate-prompts] +``` + +**Estimated Effort**: 6-8 hours + +--- + +## Intelligent Agent Selection + +### Selection Algorithm + +**Input**: Command context (files, description, project type) +**Output**: Ranked list of agents with confidence scores + +#### Phase 1: File-Based Selection + +```yaml +File Extension Mapping: +.py β†’ @python-pro (confidence: 0.95) +.ts/.tsx β†’ @typescript-pro (confidence: 0.95) +.java β†’ @java-pro (confidence: 0.95) +.go β†’ @golang-pro (confidence: 0.95) +.rs β†’ @rust-pro (confidence: 0.95) +.sql β†’ @sql-pro + @database-optimizer (confidence: 0.90) +.tf β†’ @terraform-specialist + @cloud-architect (confidence: 0.85) +Dockerfile β†’ @deployment-engineer (confidence: 0.90) +``` + +#### Phase 2: Path Pattern Matching + +```yaml +Path Patterns: +api/, backend/, server/ β†’ @backend-architect (confidence: 0.85) +components/, ui/, frontend/ β†’ @frontend-developer (confidence: 0.85) +auth/, security/ β†’ @security-auditor (confidence: 0.90) +database/, migrations/ β†’ @database-optimizer (confidence: 0.85) +tests/, __tests__/ β†’ @test-engineer (confidence: 0.80) +docs/, documentation/ β†’ @docs-writer (confidence: 0.80) +deploy/, infra/ β†’ @deployment-engineer (confidence: 0.85) +``` + +#### Phase 3: Content Analysis + +```yaml +Keywords: +"performance", "optimize", "slow" β†’ @performance-tuner +"security", "vulnerability", "auth" β†’ @security-auditor +"refactor", "code smell", "clean" β†’ @refactor-expert +"test", "coverage", "qa" β†’ @test-engineer +"debug", "error", "bug" β†’ @root-cause-analyzer +"architecture", "design", "system" β†’ @systems-architect +``` + +#### Phase 4: Project Type Detection + +```yaml +package.json: + react β†’ @frontend-developer + @typescript-pro + @ui-designer + next β†’ @frontend-developer + @backend-architect + express β†’ @backend-architect + @javascript-pro + +requirements.txt: + django β†’ @backend-architect + @python-pro + fastapi β†’ @backend-architect + @python-pro + @api-documenter + +go.mod β†’ @golang-pro + @backend-architect +Cargo.toml β†’ @rust-pro +pom.xml β†’ @java-pro +``` + +#### Confidence Scoring + +```javascript +function calculateConfidence(agent, context) { + let score = 0; + + // File type match + if (context.files.some(f => agentHandlesFileType(agent, f))) { + score += 0.4; + } + + // Path pattern match + if (context.files.some(f => agentHandlesPath(agent, f))) { + score += 0.3; + } + + // Content keyword match + if (agentHandlesKeywords(agent, context.description)) { + score += 0.2; + } + + // Project type match + if (agentHandlesProjectType(agent, context.projectType)) { + score += 0.1; + } + + return Math.min(score, 1.0); +} +``` + +--- + +## Implementation Roadmap + +### Phase 1: Critical Enhancements (Week 1-2, 20-25 hours) + +**Week 1**: +- [x] TΓ‚CHES integration (completed) +- [ ] Enhance /review with intelligent selection (8 hours) +- [ ] Add quality gates system (4 hours) +- [ ] Create /diagnose command (6 hours) + +**Week 2**: +- [ ] Create /secure command (6 hours) +- [ ] Create /pr-ready command (5 hours) +- [ ] Add parallel execution framework (6 hours) + +**Deliverables**: 3 enhanced commands, 3 new critical commands + +--- + +### Phase 2: High-Value Commands (Week 3-4, 20-25 hours) + +**Week 3**: +- [ ] Enhance /scaffold with multi-agent (6 hours) +- [ ] Create /optimize command (7 hours) +- [ ] Create /refactor command (7 hours) + +**Week 4**: +- [ ] Create /deploy-check command (6 hours) +- [ ] Enhance /test-gen with coverage analysis (5 hours) +- [ ] Add command discovery system (4 hours) + +**Deliverables**: 2 enhanced commands, 4 new high-value commands + +--- + +### Phase 3: Additional Commands (Week 5-6, 20-25 hours) + +**Week 5**: +- [ ] Create /analyze command (7 hours) +- [ ] Create /migrate command (8 hours) +- [ ] Enhance /docs-gen with drift detection (5 hours) + +**Week 6**: +- [ ] Create /feature-plan command (6 hours) +- [ ] Create /tech-debt command (6 hours) +- [ ] Create /onboard command (4 hours) +- [ ] Polish and documentation (4 hours) + +**Deliverables**: 1 enhanced command, 5 new commands + +--- + +### Total Implementation + +**Timeline**: 6 weeks +**Effort**: 60-75 hours +**Commands**: 9 β†’ 18 (100% increase) +**Enhancement**: All commands with intelligent orchestration + +--- + +## Success Metrics + +### Adoption +- 80% of developers use enhanced commands daily +- 50% reduction in "which agent?" questions + +### Performance +- 50% faster code reviews (parallel execution) +- 70% faster debugging (intelligent diagnosis) +- 60% faster scaffolding (multi-agent planning) + +### Quality +- 40% fewer production bugs (quality gates) +- 90%+ test coverage (enhanced test-gen) +- 95% security compliance (automated audits) + +### Productivity +- 50-70% overall productivity improvement +- 30% reduction in manual review time +- 80% automation of repetitive workflows + +--- + +## Next Immediate Actions + +**This Week** (Start with highest ROI): + +1. **Enhance /review** (8 hours) - CRITICAL + - Intelligent agent selection + - Parallel execution + - Quality gates + +2. **Create /diagnose** (6 hours) - CRITICAL + - Production debugging workflow + - Multi-agent RCA + +3. **Create /secure** (6 hours) - CRITICAL + - Automated security audits + - Compliance validation + +**Total**: 20 hours for major command ecosystem boost + +--- + +**Status**: Ready for implementation +**Owner**: Alireza Rezvani +**Created**: November 18, 2025 diff --git a/run_final_validation.py b/run_final_validation.py deleted file mode 100644 index c3ee6ab..0000000 --- a/run_final_validation.py +++ /dev/null @@ -1,222 +0,0 @@ -#!/usr/bin/env python3 -""" -Final comprehensive validation of 30 representative agents -""" - -import sys -sys.path.append('/Users/rezarezvani/projects/claude-code-tresor') - -from agent_validator_v2 import AgentValidatorV2 - -def main(): - base_path = "/Users/rezarezvani/projects/claude-code-tresor/subagents" - - # 30 representative agents across all categories - sample_paths = [ - # CORE (8 agents - ALL) - "core/performance-tuner", - "core/config-safety-reviewer", - "core/root-cause-analyzer", - "core/refactor-expert", - "core/docs-writer", - "core/systems-architect", - "core/security-auditor", - "core/test-engineer", - - # ENGINEERING (6 agents from various subcategories) - "engineering/backend/backend-architect", - "engineering/backend/database-optimizer", - "engineering/devops/cloud-architect", - "engineering/devops/incident-responder", - "engineering/frontend/frontend-developer", - "engineering/languages/python-developer", - - # DESIGN (3 agents) - "design/ui/ui-designer", - "design/ux/ux-researcher", - "design/visual/visual-storyteller", - - # MARKETING (3 agents) - "marketing/content/copywriter", - "marketing/growth/growth-hacker", - "marketing/seo/seo-specialist", - - # PRODUCT (2 agents) - "product/management/product-manager", - "product/strategy/feature-prioritization", - - # LEADERSHIP (2 agents) - "leadership/management/people-manager", - "leadership/technical/architect-leader", - - # OPERATIONS (2 agents) - "operations/qa/qa-engineer", - "operations/process/process-optimizer", - - # RESEARCH (2 agents) - "research/market/market-research-analyst", - "research/data/deep-research-specialist", - - # AI & AUTOMATION (2 agents) - "ai-automation/ml-engineering/ml-engineer", - "ai-automation/prompts/prompt-engineer", - ] - - validator = AgentValidatorV2(base_path) - summary = validator.analyze_sample(sample_paths) - - # Generate comprehensive report - print("\n" + "="*90) - print(" COMPREHENSIVE AGENT VALIDATION REPORT") - print(" Claude Code Tresor - 30 Agent Sample Analysis") - print("="*90) - - print(f"\nπŸ“Š EXECUTIVE SUMMARY") - print(f"─" * 90) - print(f"Total Agents Analyzed: {summary['total_agents']}") - print(f"Overall Quality Score: {summary['overall_stats']['avg_score']}/10") - print(f"Score Range: {summary['overall_stats']['min_score']} - {summary['overall_stats']['max_score']}") - - if summary['core_agents']: - print(f"\nπŸ“š CORE AGENTS (Comprehensive Format) - n={summary['core_agents']['count']}") - print(f"─" * 90) - print(f" Average Quality Score: {summary['core_agents']['avg_score']}/10") - print(f" Average Word Count: {summary['core_agents']['avg_word_count']} words") - print(f" Average Code Examples: {summary['core_agents']['avg_code_blocks']} blocks") - print(f" YAML Completeness: {summary['core_agents']['yaml_complete']}/{summary['core_agents']['count']} (100%)") - print(f"\n πŸ’‘ Assessment: {'EXCELLENT' if summary['core_agents']['avg_score'] >= 8 else 'GOOD' if summary['core_agents']['avg_score'] >= 7 else 'NEEDS IMPROVEMENT'}") - - if summary['specialized_agents']: - print(f"\n⚑ SPECIALIZED AGENTS (Concise Format) - n={summary['specialized_agents']['count']}") - print(f"─" * 90) - print(f" Average Quality Score: {summary['specialized_agents']['avg_score']}/10") - print(f" Average Word Count: {summary['specialized_agents']['avg_word_count']} words") - print(f" Average Code Examples: {summary['specialized_agents']['avg_code_blocks']} blocks") - print(f" YAML Completeness: {summary['specialized_agents']['yaml_complete']}/{summary['specialized_agents']['count']}") - print(f"\n πŸ’‘ Assessment: {'EXCELLENT' if summary['specialized_agents']['avg_score'] >= 8 else 'GOOD' if summary['specialized_agents']['avg_score'] >= 7 else 'NEEDS IMPROVEMENT'}") - - print(f"\nπŸ“ CATEGORY ANALYSIS") - print(f"─" * 90) - for cat, stats in sorted(summary['category_stats'].items(), key=lambda x: x[1]['avg_score'], reverse=True): - score_emoji = "🟒" if stats['avg_score'] >= 7 else "🟑" if stats['avg_score'] >= 5 else "πŸ”΄" - print(f" {score_emoji} {cat:20} {stats['count']:2} agents | Avg: {stats['avg_score']:>4.1f}/10 | [Core: {stats['formats']['core']}, Specialized: {stats['formats']['specialized']}]") - - print(f"\n⭐ TOP 10 PERFORMERS") - print(f"─" * 90) - for i, agent in enumerate(summary['top_performers'][:10], 1): - fmt = "πŸ“š" if agent['format'] == 'core' else "⚑" - print(f" {i:2}. {fmt} {agent['agent_name']:40} {agent['quality_score']:>4.1f}/10 ({agent['category']})") - - print(f"\n⚠️ BOTTOM 10 - NEEDS IMPROVEMENT") - print(f"─" * 90) - bottom_agents = sorted(validator.results, key=lambda x: x['quality_score'])[:10] - for i, agent in enumerate(bottom_agents, 1): - fmt = "πŸ“š" if agent['format'] == 'core' else "⚑" - print(f" {i:2}. {fmt} {agent['agent_name']:40} {agent['quality_score']:>4.1f}/10 ({agent['category']})") - if agent['quality_issues']: - for issue in agent['quality_issues'][:2]: - print(f" πŸ”§ {issue}") - - if summary['all_issues']: - print(f"\nπŸ” MOST COMMON ISSUES (Top 10)") - print(f"─" * 90) - for i, (issue, count) in enumerate(list(summary['all_issues'].items())[:10], 1): - pct = (count / summary['total_agents']) * 100 - print(f" {i:2}. [{count:2} agents, {pct:5.1f}%] {issue}") - - print(f"\nπŸ“Š QUALITY DISTRIBUTION") - print(f"─" * 90) - score_buckets = { - 'Excellent (9-10)': [r for r in validator.results if r['quality_score'] >= 9], - 'Good (7-8.9)': [r for r in validator.results if 7 <= r['quality_score'] < 9], - 'Moderate (5-6.9)': [r for r in validator.results if 5 <= r['quality_score'] < 7], - 'Poor (<5)': [r for r in validator.results if r['quality_score'] < 5] - } - - for label, agents in score_buckets.items(): - count = len(agents) - pct = (count / summary['total_agents']) * 100 - bar = 'β–ˆ' * int(pct / 2) - print(f" {label:20} {count:2} agents ({pct:5.1f}%) {bar}") - - print(f"\nπŸ’‘ KEY RECOMMENDATIONS") - print(f"─" * 90) - - # Generate recommendations based on analysis - recs = [] - - if summary['core_agents']['avg_score'] < 7: - recs.append("CORE AGENTS: Add more code examples and expand methodology sections") - - if summary['specialized_agents']['avg_score'] < 7: - recs.append("SPECIALIZED AGENTS: Ensure all have Focus/Approach/Output sections") - - # Check category-specific issues - low_categories = [cat for cat, stats in summary['category_stats'].items() if stats['avg_score'] < 6] - if low_categories: - recs.append(f"CATEGORIES NEEDING ATTENTION: {', '.join(low_categories)}") - - if summary['all_issues']: - top_issue = list(summary['all_issues'].items())[0] - if top_issue[1] > 5: - recs.append(f"FIX WIDESPREAD ISSUE: {top_issue[0]} ({top_issue[1]} agents affected)") - - if not recs: - recs.append("βœ… Overall quality is good - maintain current standards") - recs.append("βœ… Continue adding code examples to core agents") - recs.append("βœ… Keep YAML frontmatter complete and consistent") - - for i, rec in enumerate(recs, 1): - print(f" {i}. {rec}") - - print(f"\nπŸ“ˆ ESTIMATED REPOSITORY QUALITY") - print(f"─" * 90) - - # Extrapolate to full repository - total_repo_agents = 133 - core_in_repo = 8 - spec_in_repo = total_repo_agents - core_in_repo - - # Weighted average - if summary['core_agents'] and summary['specialized_agents']: - estimated_score = ( - (summary['core_agents']['avg_score'] * core_in_repo + - summary['specialized_agents']['avg_score'] * spec_in_repo) / - total_repo_agents - ) - else: - estimated_score = summary['overall_stats']['avg_score'] - - print(f" Sample Quality Score: {summary['overall_stats']['avg_score']}/10") - print(f" Estimated Repository Score: {estimated_score:.1f}/10") - print(f" Confidence: {'HIGH (representative sample)' if summary['total_agents'] >= 25 else 'MEDIUM'}") - - quality_label = ( - "EXCELLENT" if estimated_score >= 8 else - "GOOD" if estimated_score >= 7 else - "MODERATE" if estimated_score >= 5 else - "NEEDS IMPROVEMENT" - ) - print(f" Overall Assessment: {quality_label}") - - # Save JSON report - output_file = "/Users/rezarezvani/projects/claude-code-tresor/VALIDATION_REPORT.json" - import json - with open(output_file, 'w') as f: - json.dump({ - 'summary': summary, - 'detailed_results': validator.results, - 'repository_estimate': { - 'sample_score': summary['overall_stats']['avg_score'], - 'estimated_full_score': round(estimated_score, 1), - 'quality_label': quality_label, - 'total_repo_agents': total_repo_agents - } - }, f, indent=2) - - print(f"\nπŸ’Ύ Full JSON report saved to: {output_file}") - print("="*90 + "\n") - - -if __name__ == "__main__": - main() diff --git a/scripts/install.sh b/scripts/install.sh index a8ed33b..0fb3a47 100755 --- a/scripts/install.sh +++ b/scripts/install.sh @@ -170,9 +170,9 @@ install_subagents() { local agent_count=$(find "$subagents_dest" -name "agent.md" -type f | wc -l) log "Installed $agent_count subagents across 10 categories" - log "Subagents installed successfully" + log "Claude Code Tresor Subagents installed successfully" else - warn "Subagents directory not found in repository" + warn "Claude Code Tresor Subagents directory not found in repository" fi } @@ -199,7 +199,7 @@ install_skills() { fi done - log "Skills installed successfully" + log "Claude Code Tresor Skills installed successfully" else warn "Skills directory not found in repository" fi @@ -306,12 +306,19 @@ print_summary() { echo "πŸ“ Installation Location:" echo " $CLAUDE_CODE_DIR" echo - echo "πŸš€ Available Commands:" + echo "πŸš€ Core Workflow Commands (4):" echo " /scaffold - Generate project structures and components" echo " /review - Automated code review with best practices" echo " /test-gen - Generate comprehensive test suites" echo " /docs-gen - Create documentation from code" echo + echo "πŸ”„ TΓ‚CHES Workflow Commands (5) - NEW in v2.6.5:" + echo " /create-prompt - Generate optimized prompts for complex tasks" + echo " /run-prompt - Execute prompts in sub-agents (parallel/sequential)" + echo " /add-to-todos - Capture ideas with full context" + echo " /check-todos - Resume work on todos (suggests Tresor agents)" + echo " /whats-next - Create comprehensive context handoff document" + echo echo "πŸ€– Core Agents (8):" echo " @systems-architect - System design and architecture" echo " @config-safety-reviewer - Configuration safety specialist" @@ -459,7 +466,7 @@ main() { install_commands install_agents install_resources - log "Update completed successfully" + log "Claude Code Tresor Update completed successfully" else # Full installation install_skills