diff --git a/.github/markdown-link-check.json b/.github/markdown-link-check.json index c22827d..00d74f1 100644 --- a/.github/markdown-link-check.json +++ b/.github/markdown-link-check.json @@ -1,7 +1,6 @@ { "ignorePatterns": [ { "pattern": "^https://t.me/" }, - { "pattern": "^https://example.com/" }, { "pattern": "^https://install.hermes.nous.ai" }, { "pattern": "^https://langfuse.yourdomain.com" }, { "pattern": "^https://hermes.yourdomain.com" }, diff --git a/CHANGELOG.md b/CHANGELOG.md index 211225b..b0d59cd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,27 @@ Dated list of meaningful guide updates. Roughly [Keep a Changelog](https://keepachangelog.com) flavored. +## 2026-04-30 — Hermes v0.11/v0.12 Refresh + +### Added +- **Part 22 — Latest Power Moves** covering Curator, TUI steering habits, context-file hygiene, plugins, auxiliary models, cron chaining, and the v0.12 upgrade checklist +- Curator guidance in Part 5, including dry-run, scheduling, pin/archive behavior, and how it differs from skills/memory/context files +- v0.12 platform coverage for QQBot, Tencent Yuanbao, and Microsoft Teams as a plugin-shipped gateway +- AWS Bedrock, Azure AI Foundry, LM Studio, GMI Cloud, Tencent TokenHub, MiniMax OAuth, Gemini OAuth, and remote model catalog notes in Part 9 +- Vercel Sandbox coverage in Part 21 + +### Changed +- README "What's New" now reflects landed v0.11.0 and v0.12.0 releases instead of speculative post-v0.10 PR tracking +- Part 12 updated for dashboard Chat, Models tab, plugins, Curator controls, and `web,pty` install requirements +- Part 14 updated for `/steer`, `/queue`, `/background`, `/busy`, and current Fast Mode language +- Part 18 updated for orchestrator-role subagents and file coordination +- Part 19 updated with MCP/plugin/dashboard threat surfaces and v0.12 hardline block guidance +- Part 20 updated to prefer the bundled Langfuse observability plugin and auxiliary routing + +### Removed +- Stale "Cooking on main" framing and example.com disclosure placeholder +- Old Gemini CLI install requirement for Gemini OAuth + ## 2026-04-17 — Wizard + Reference Architectures + CI ### Added diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 71d092c..07bb889 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -37,7 +37,7 @@ This guide is built in public. PRs welcome. ├── ECOSYSTEM.md ├── ROADMAP.md ├── LICENSE -├── part1-setup.md … part21-remote-sandboxes.md +├── part1-setup.md … part22-latest-power-moves.md ├── diagrams/architecture.md ├── skills/ │ ├── README.md diff --git a/ECOSYSTEM.md b/ECOSYSTEM.md index 50d92fe..dbcbc9c 100644 --- a/ECOSYSTEM.md +++ b/ECOSYSTEM.md @@ -6,31 +6,31 @@ The canonical "where do I find X for Hermes" directory. Maintained alongside the ## MCP Servers Worth Installing -### Official (Anthropic-maintained) -- [`@modelcontextprotocol/server-github`](https://github.com/modelcontextprotocol/servers/tree/main/src/github) — PRs, issues, code search, Actions +### Official / reference +- [`@modelcontextprotocol/server-github`](https://www.npmjs.com/package/@modelcontextprotocol/server-github) — PRs, issues, code search, Actions - [`@modelcontextprotocol/server-filesystem`](https://github.com/modelcontextprotocol/servers/tree/main/src/filesystem) — read/write to scoped directories -- [`@modelcontextprotocol/server-postgres`](https://github.com/modelcontextprotocol/servers/tree/main/src/postgres) — read-only SQL -- [`@modelcontextprotocol/server-sqlite`](https://github.com/modelcontextprotocol/servers/tree/main/src/sqlite) — local SQLite -- [`@modelcontextprotocol/server-puppeteer`](https://github.com/modelcontextprotocol/servers/tree/main/src/puppeteer) — headless browser automation +- [`@modelcontextprotocol/server-postgres`](https://www.npmjs.com/package/@modelcontextprotocol/server-postgres) — read-only SQL +- [`@modelcontextprotocol/server-sqlite`](https://github.com/modelcontextprotocol/servers-archived/tree/main/src/sqlite) — local SQLite +- [`@modelcontextprotocol/server-puppeteer`](https://www.npmjs.com/package/@modelcontextprotocol/server-puppeteer) — headless browser automation - [`@modelcontextprotocol/server-memory`](https://github.com/modelcontextprotocol/servers/tree/main/src/memory) — lightweight KV memory -- [`@modelcontextprotocol/server-google-drive`](https://github.com/modelcontextprotocol/servers/tree/main/src/gdrive) — Drive read +- [`@modelcontextprotocol/server-google-drive`](https://www.npmjs.com/package/@modelcontextprotocol/server-gdrive) — Drive read ### First-party vendor MCPs - [`@cloudflare/mcp-server-cloudflare`](https://github.com/cloudflare/mcp-server-cloudflare) — Workers, KV, D1, R2 -- [`@supabase/mcp-server-supabase`](https://github.com/supabase/mcp-server-supabase) — Postgres + storage + auth -- [`@stripe/mcp-server-stripe`](https://github.com/stripe/agent-sdk) — payments read + restricted writes -- [`@linear/mcp-server-linear`](https://github.com/linear/linear-mcp-server) — issue tracking -- [`@notion/mcp-server-notion`](https://github.com/notionhq/notion-mcp-server) — page read/write +- [`@supabase/mcp-server-supabase`](https://github.com/supabase-community/supabase-mcp/tree/main/packages/mcp-server-supabase) — Postgres + storage + auth +- [`@stripe/mcp-server-stripe`](https://github.com/stripe/ai/tree/main/tools/modelcontextprotocol) — payments read + restricted writes +- [`Linear remote MCP`](https://linear.app/docs/mcp) — issue tracking +- [`@notionhq/notion-mcp-server`](https://github.com/makenotion/notion-mcp-server) — page read/write - [`@browserbase/mcp-server`](https://github.com/browserbase/mcp-server-browserbase) — managed headless browser - [`@chromadb/mcp-server-chroma`](https://github.com/chroma-core/chroma-mcp) — vector search ### Community -- [`mem0/mcp-server-mem0`](https://github.com/mem0ai/mem0/tree/main/mcp) — persistent cross-device memory +- [`Mem0 remote MCP`](https://docs.mem0.ai/platform/mem0-mcp) — persistent cross-device memory - [`arxiv-mcp-server`](https://github.com/blazickjp/arxiv-mcp-server) — arxiv search + PDF extraction - [`mcp-server-atlassian`](https://github.com/sooperset/mcp-atlassian) — Jira + Confluence -- [`mcp-server-slack`](https://github.com/modelcontextprotocol/servers/tree/main/src/slack) — message, search, profile +- [`@modelcontextprotocol/server-slack`](https://github.com/modelcontextprotocol/servers-archived/tree/main/src/slack) — message, search, profile - [`dbt-mcp`](https://github.com/dbt-labs/dbt-mcp) — dbt Cloud -- [`mcp-server-e2b`](https://github.com/e2b-dev/e2b-mcp) — disposable Python sandboxes +- [`e2b-dev/mcp-server`](https://github.com/e2b-dev/mcp-server) — disposable Python sandboxes - [`mcp-obsidian`](https://github.com/MarkusPfundstein/mcp-obsidian) — your Obsidian vault See [Part 17](./part17-mcp-servers.md) for install patterns and trust model guidance. @@ -74,7 +74,7 @@ See [Part 20](./part20-observability.md). ## Security research / CVEs of note (2026) -- **Comment and Control (2026-04-15)** — cross-vendor prompt-injection via GitHub PR titles hitting Claude Code, Gemini CLI, GitHub Copilot Agent. [Disclosure thread](https://example.com/disclosure). +- **Comment and Control (2026-04-15)** — cross-vendor prompt-injection via GitHub PR titles hitting Claude Code, Gemini CLI, GitHub Copilot Agent. See the defensive write-up referenced in [Part 19](./part19-security-playbook.md). - **MCP stdio poisoning** — untrusted npm packages that proxy stdio MCP traffic. Mitigated by pinning versions + Socket.dev/Semgrep audits. - **Webhook replay attacks** — a reminder that HMAC + TTL together, not HMAC alone, prevents replay. diff --git a/README-ja.md b/README-ja.md index 3741cd7..34c8475 100644 --- a/README-ja.md +++ b/README-ja.md @@ -2,7 +2,7 @@ > [英語版はこちら](./README.md) · このページは入口の要約。本文の章は英語のまま。 -[NousResearch/hermes-agent](https://github.com/NousResearch/hermes-agent)(v0.10.0+)向けの実戦ガイド + インストール可能な成果物(Skills・設定テンプレ・インフラスクリプト)。 +[NousResearch/hermes-agent](https://github.com/NousResearch/hermes-agent)(v0.12.0 まで反映)向けの実戦ガイド + インストール可能な成果物(Skills・設定テンプレ・インフラスクリプト)。 ## ワンコマンドで起動 @@ -15,7 +15,7 @@ curl -sSL https://raw.githubusercontent.com/OnlyTerp/hermes-optimization-guide/m ## 主なコンテンツ -- **21 章の本文**(`part1`〜`part21`) — LightRAG、Telegram、MCP、セキュリティ、可観測性、リモートサンドボックス +- **23 章の本文**(README 内の章 + `part6`〜`part22`) — Curator、TUI、プラグイン、LightRAG、Telegram、MCP、セキュリティ、可観測性、リモートサンドボックス - **13 個のインストール可能 Skill**(`skills/`) — 監査、バックアップ、依存スキャン、コストレポート、Telegram トリアージ、PR レビュー、受信トレイ整理、Hermes 週報、スパムフィルタ、会議準備 など - **5 つのプロダクション設定テンプレ**(`templates/config/`) — minimum / telegram-bot / production / cost-optimized / security-hardened - **インフラ一式**(`templates/compose/`, `templates/caddy/`, `templates/systemd/`, `scripts/`) — Langfuse セルフホスト、Caddy リバースプロキシ、systemd 強化、VPS ブートストラップ diff --git a/README-zh.md b/README-zh.md index eb013fe..caefc9f 100644 --- a/README-zh.md +++ b/README-zh.md @@ -2,7 +2,7 @@ > [English 完整版](./README.md) · 本页是入口摘要,章节正文仍为英文。 -实用指南 + 可安装制品(Skills、配置模板、基础设施脚本),针对 [NousResearch/hermes-agent](https://github.com/NousResearch/hermes-agent)(v0.10.0+)。 +实用指南 + 可安装制品(Skills、配置模板、基础设施脚本),针对 [NousResearch/hermes-agent](https://github.com/NousResearch/hermes-agent)(当前覆盖到 v0.12.0)。 ## 一键起步 @@ -15,7 +15,7 @@ curl -sSL https://raw.githubusercontent.com/OnlyTerp/hermes-optimization-guide/m ## 内容一览 -- **21 章中文正文**(见 `part1` 到 `part21`) — LightRAG、Telegram、MCP、安全、可观测性、远程沙箱 +- **23 章正文**(README 内章节 + `part6` 到 `part22`) — Curator、TUI、插件、LightRAG、Telegram、MCP、安全、可观测性、远程沙箱 - **13 个可安装 Skill**(`skills/`) — 审计、备份、依赖扫描、成本报告、Telegram 分类、PR 审查、收件箱分类、Hermes 周报、垃圾过滤、会议准备 等 - **5 套生产配置模板**(`templates/config/`) — minimum / telegram-bot / production / cost-optimized / security-hardened - **基础设施**(`templates/compose/`, `templates/caddy/`, `templates/systemd/`, `scripts/`) — Langfuse 自托管、Caddy 反代、systemd 硬化、VPS 引导脚本 diff --git a/README.md b/README.md index 2caacc2..4235752 100644 --- a/README.md +++ b/README.md @@ -1,24 +1,24 @@ # Hermes Optimization Guide [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](./LICENSE) -[![Hermes](https://img.shields.io/badge/Hermes-v0.10.0%20%28main%29-9146FF)](https://github.com/NousResearch/hermes-agent) -[![Last updated](https://img.shields.io/badge/Last%20updated-2026--04--17-brightgreen)](./CHANGELOG.md) -[![Parts](https://img.shields.io/badge/parts-21-blue)](#table-of-contents) +[![Hermes](https://img.shields.io/badge/Hermes-v0.12.0%20%282026.4.30%29-9146FF)](https://github.com/NousResearch/hermes-agent/releases/tag/v2026.4.30) +[![Last updated](https://img.shields.io/badge/Last%20updated-2026--04--30-brightgreen)](./CHANGELOG.md) +[![Parts](https://img.shields.io/badge/parts-23-blue)](#table-of-contents) [![Skills](https://img.shields.io/badge/installable%20skills-13-blue)](./skills/) [![Configs](https://img.shields.io/badge/config%20templates-5-blue)](./templates/config/) [![CI](https://github.com/OnlyTerp/hermes-optimization-guide/actions/workflows/ci.yml/badge.svg)](./.github/workflows/ci.yml) [![PRs Welcome](https://img.shields.io/badge/PRs-welcome-brightgreen.svg)](./CONTRIBUTING.md) -> **Tested on Hermes Agent v0.10.0 (v2026.4.16)** with post-release tracking for `main` · **21 parts, 13 installable skills, 5 opinionated configs, 4 reference architectures, one-command VPS bootstrap** · Battle-tested on a live production deployment +> **Current through Hermes Agent v0.12.0 (v2026.4.30)** · **23 parts, 13 installable guide skills, 5 opinionated configs, 4 reference architectures, one-command VPS bootstrap** · Updated for Curator, the Ink TUI, plugins, Teams/Yuanbao/QQBot, Bedrock/Azure/LM Studio, remote model catalogs, dashboard chat, and the latest skill-hub workflows > > Other languages: [中文](./README-zh.md) · [日本語](./README-ja.md) ### The End-to-End Hermes Guide — docs + runnable artifacts -Every part you need to go from fresh install to a production Hermes deployment that talks on 16 platforms, orchestrates Claude Code / Codex / Gemini CLI, plugs into any MCP server, traces every call in Langfuse, and runs heavy work on disposable Modal/Daytona sandboxes — without burning $100/day on Opus tokens. +Every part you need to go from fresh install to a production Hermes deployment that talks on 18+ built-in/plugin platforms, orchestrates Claude Code / Codex / Gemini CLI, plugs into any MCP server, traces every call in Langfuse, curates its own skills, and runs heavy work on disposable Modal/Daytona/Vercel sandboxes — without burning $100/day on frontier tokens. Unlike most guides, the prescriptions come with **working files**: [`skills/`](./skills) you can `ln -s` into `~/.hermes/skills/`, [`templates/config/`](./templates/config) you `cp` to `~/.hermes/config.yaml`, [`scripts/vps-bootstrap.sh`](./scripts/vps-bootstrap.sh) that takes a fresh VPS to production in one command. -*By Terp — [Terp AI Labs](https://x.com/OnlyTerp)* · Last updated **April 17, 2026** · [CHANGELOG](./CHANGELOG.md) · [ROADMAP](./ROADMAP.md) · [ECOSYSTEM](./ECOSYSTEM.md) +*By Terp — [Terp AI Labs](https://x.com/OnlyTerp)* · Last updated **April 30, 2026** · [CHANGELOG](./CHANGELOG.md) · [ROADMAP](./ROADMAP.md) · [ECOSYSTEM](./ECOSYSTEM.md) --- @@ -55,7 +55,7 @@ Prefer a 5-minute local-only setup? → **[docs/quickstart.md](./docs/quickstart | [`docs/quickstart.md`](./docs/quickstart.md) | 5-minute zero-to-Telegram-bot. | | [`ECOSYSTEM.md`](./ECOSYSTEM.md) | Curated directory of MCP servers, coding agents, dashboard plugins. | | [`ROADMAP.md`](./ROADMAP.md) · [`CHANGELOG.md`](./CHANGELOG.md) · [`CONTRIBUTING.md`](./CONTRIBUTING.md) | The usual suspects. | -| `part1-*.md` … `part21-*.md` | The guide itself. | +| README + `part1-*.md` … `part22-*.md` | The 23-part guide itself. | --- @@ -63,7 +63,7 @@ Prefer a 5-minute local-only setup? → **[docs/quickstart.md](./docs/quickstart ```mermaid flowchart LR - Inputs[16 platforms
Telegram · Discord · Slack
iMessage · WeChat · Email
SMS · Webhooks · Cron · Voice · CLI] --> Gateway + Inputs[18+ platforms
Telegram · Discord · Slack
QQBot · Yuanbao · Teams
iMessage · WeChat · Email
SMS · Webhooks · Cron · Voice · CLI] --> Gateway Gateway --> Router[Model Router
cost + context + capability] Router --> Providers[Anthropic · OpenAI
Google · Cerebras · Moonshot
z.ai · xAI · Local] Gateway --> Approval[Approval Layer
denylist · allowlist · quarantine] @@ -78,7 +78,7 @@ Full set of diagrams: [`diagrams/architecture.md`](./diagrams/architecture.md). ## Pick Your Path -This guide grew to 21 parts because *Hermes grew*. You don't have to read them all. Pick the shortest path to what you need: +This guide grew to 23 parts because *Hermes grew*. Six sections (Parts 1–5 plus SOUL.md) live in this README; Parts 6–22 live as separate files. You don't have to read them all — pick the shortest path to what you need: ### 🎯 "I just want it working in 10 minutes" [Part 1: Setup](#part-1-setup-stop-fumbling-with-installation) → [Part 12: Web Dashboard](./part12-web-dashboard.md) → done. Use the dashboard to point-and-click the rest. @@ -87,10 +87,10 @@ This guide grew to 21 parts because *Hermes grew*. You don't have to read them a [Part 1](#part-1-setup-stop-fumbling-with-installation) → [Part 4: Telegram](./part4-telegram-setup.md) → [Part 5: On-the-fly Skills](./part5-creating-skills.md) → [Part 7: Memory](./part7-memory-system.md). ### 🤖 "I want to drive Claude Code / Codex / Gemini from my phone" -[Part 18: Coding Agents](./part18-coding-agents.md) → [Part 17: MCP Servers](./part17-mcp-servers.md) → [Part 21: Remote Sandboxes](./part21-remote-sandboxes.md). +[Part 18: Coding Agents](./part18-coding-agents.md) → [Part 22: Latest Power Moves](./part22-latest-power-moves.md) → [Part 21: Remote Sandboxes](./part21-remote-sandboxes.md). ### 💼 "I'm running this in production" -[Part 19: Security Playbook](./part19-security-playbook.md) → [Part 20: Observability & Cost](./part20-observability.md) → [Part 16: Backup & Debug](./part16-backup-debug.md) → [Part 11: Gateway Recovery](./part11-gateway-recovery.md). +[Part 19: Security Playbook](./part19-security-playbook.md) → [Part 20: Observability & Cost](./part20-observability.md) → [Part 16: Backup & Debug](./part16-backup-debug.md) → [Part 22: Curator + Plugins](./part22-latest-power-moves.md). ### 🧠 "I want the most capable agent possible, cost be damned" [Part 17: MCP Servers](./part17-mcp-servers.md) → [Part 18: Coding Agents](./part18-coding-agents.md) → [Part 3: LightRAG](./part3-lightrag-setup.md) → [Part 14: Fast Mode](./part14-fast-mode-watchers.md) → [Part 20: Observability](./part20-observability.md). @@ -103,61 +103,39 @@ This guide grew to 21 parts because *Hermes grew*. You don't have to read them a --- -## What's New (April 10–17, 2026) +## What's New (April 2026) -Two major Hermes releases dropped this week, **plus a stream of landmark PRs on `main`** that are targeted for v0.11. This guide is current as of both releases *and* the most impactful post-v0.10 merges. +Hermes moved fast after this repo's v0.10 refresh. The current stable target is **[v0.12.0 — 2026.4.30 — "Curator"](https://github.com/NousResearch/hermes-agent/releases/tag/v2026.4.30)**, following **[v0.11.0 — 2026.4.23 — "Interface"](https://github.com/NousResearch/hermes-agent/releases/tag/v2026.4.23)**. This update removes speculative "cooking on main" notes and folds the landed features into the guide. -### [v0.9.0 — 2026.4.13 — "Everywhere"](https://github.com/NousResearch/hermes-agent/releases/tag/v2026.4.13) +### v0.12.0 — "Curator" -- **Local web dashboard** (`hermes dashboard`) — full browser UI for config, API keys, sessions, logs, analytics, cron, and skills. See [Part 12](./part12-web-dashboard.md). -- **Fast Mode** (`/fast`) — priority-tier inference on OpenAI & Anthropic, available on every gateway platform, not just the CLI. See [Part 14](./part14-fast-mode-watchers.md). -- **Three new messaging adapters** — iMessage (via BlueBubbles), WeChat/Weixin, and WeCom (Enterprise WeChat). Brings the total to **16 platforms**. See [Part 15](./part15-new-platforms.md). -- **Android / Termux** tested install path — run the full Hermes CLI on your phone. See [Part 15](./part15-new-platforms.md#android--termux-running-hermes-on-your-phone). -- **Background process monitoring** (`watch_patterns`) — real-time regex event hooks on long-running processes, no more polling. See [Part 14](./part14-fast-mode-watchers.md#background-process-monitoring-watch_patterns). -- **Pluggable context engine** — swap in a custom engine to filter memory, inject domain context, or pre-summarize tool output. See [Part 14](./part14-fast-mode-watchers.md#pluggable-context-engine). -- **`hermes backup` / `hermes import`** — first-class portable backups with interactive conflict resolution. See [Part 16](./part16-backup-debug.md). -- **Dashboard plugins** — third-party tabs that extend the web UI. See [Part 12](./part12-web-dashboard.md#dashboard-plugins-extend-the-ui). -- **Proxy support, webhook secret validation, SSRF protection, log redaction** — hardening pass across every adapter. See [Part 16](./part16-backup-debug.md#security-hardening-v09--v010-notes). +- **Autonomous Curator** — `hermes curator` grades, consolidates, pins, archives, and restores agent-created skills on a default 7-day cadence. See [Part 22](./part22-latest-power-moves.md#1-turn-on-curator-before-your-skill-library-becomes-noise). +- **Self-improvement loop upgraded** — the review fork is rubric-based, active-skill-biased, restricted to memory + skills tools, and correctly inherits the parent provider/model/credentials. See [Part 5](./part5-creating-skills.md#curator-v012-keep-the-skill-library-from-rotting). +- **Provider expansion** — LM Studio became a first-class provider; GMI Cloud, Azure AI Foundry, MiniMax OAuth, Tencent TokenHub, AWS Bedrock, NVIDIA NIM, Vercel AI Gateway, Step Plan, Gemini OAuth, and Codex OAuth are now part of the realistic routing menu. See [Part 9](./part9-custom-models.md). +- **Plugin-first gateway** — gateway platforms can ship as plugins; Microsoft Teams is the first plugin-shipped platform, and Tencent Yuanbao is the 18th native platform. See [Part 15](./part15-new-platforms.md#2026-update-qqbot-yuanbao-and-teams). +- **Bundled plugins worth enabling** — Spotify tools, Google Meet transcription/duplex audio, Langfuse observability, achievements, extra image providers, and dashboard skins. See [Part 22](./part22-latest-power-moves.md#4-use-plugins-for-integrations-not-one-off-scripts). +- **Dashboard caught up** — Models tab, auxiliary-model configuration, dashboard Chat backed by the real `hermes --tui`, plugin slots, themes, update/restart controls, and better session analytics. See [Part 12](./part12-web-dashboard.md). +- **TUI is now the primary interface** — `hermes --tui` adds sticky composer, slash autocomplete, live tool cards, `/steer`, `/queue`, `/background`, `/busy`, `/indicator`, voice parity, LaTeX, and better resume/delete flows. See [Part 22](./part22-latest-power-moves.md#2-use-the-tui-as-your-daily-driver). +- **Remote model catalog** — OpenRouter and Nous Portal picker lists update from a hosted manifest, so users see new models without waiting for a Hermes release. See [Part 9](./part9-custom-models.md#remote-model-catalog-stop-hardcoding-this-weeks-winner). +- **Cron got serious** — per-job `workdir`, per-job toolsets, `context_from` chaining, and zero-LLM direct webhook delivery make scheduled automations cheaper and more predictable. +- **Tool/runtime hardening** — hardline command blocklists, Docker host-user bind mounts, Vercel Sandbox backend, SSH permission fixes, local Chromium for localhost/LAN browser tasks, and richer approval hooks. -### [v0.10.0 — 2026.4.16 — "Tool Gateway"](https://github.com/NousResearch/hermes-agent/releases/tag/v2026.4.16) +### v0.11.0 — "Interface" -- **Nous Tool Gateway** — paid [Nous Portal](https://portal.nousresearch.com) subscribers get web search (Firecrawl), image generation (FAL FLUX 2 Pro), TTS (OpenAI), and browser automation (Browser Use) through their subscription — **no extra API keys**. Per-tool `use_gateway: true` with clean precedence over direct keys. See [Part 13](./part13-tool-gateway.md). -- **Native xAI and Xiaomi MiMo providers** — both now have first-class adapters with provider-specific features (Grok's live-X search, MiMo's reasoning modes), not just OpenRouter pass-through. See [Part 9](./part9-custom-models.md). -- **`/compress `** — guided compression that preserves detail relevant to a topic. See [Part 14](./part14-fast-mode-watchers.md#compress-topic--guided-compression). -- **`/debug` + `hermes debug share`** — single-command diagnostic bundler with upload endpoint for bug reports. See [Part 16](./part16-backup-debug.md#debug-and-hermes-debug-share). -- **Approval bypass inheritance for subagents** — subagents inherit the parent session's approval posture; override per delegation. See [Part 16](./part16-backup-debug.md#approval-bypass-for-trusted-subagents). -- `HERMES_ENABLE_NOUS_MANAGED_TOOLS` env flag **removed** — replaced by clean subscription detection + per-tool `use_gateway`. `hermes upgrade` migrates automatically. +- **Ink TUI rewrite** — `hermes --tui` is a React/Ink interface over a Python JSON-RPC backend with streaming, status bars, pickers, and subagent observability. +- **Transport layer rewrite** — Anthropic, Chat Completions, OpenAI Responses, and Bedrock transports are separate, making native providers more reliable than generic OpenAI-compatible shims. +- **AWS Bedrock native provider** — IAM credentials, Converse API, cross-region inference profiles, and Bedrock Guardrails. See [Part 9](./part9-custom-models.md#aws-bedrock-and-azure-ai-foundry-enterprise-routing-without-proxy-glue). +- **Auxiliary model UI** — choose separate models for compression, vision, session search, title generation, and curator instead of silently burning your main model on side tasks. +- **Smarter delegation** — orchestrator-role subagents, configurable spawn depth, and file coordination between sibling workers reduce multi-agent clobbering. See [Part 18](./part18-coding-agents.md). +- **Plugin and hook surface expanded** — plugins can register slash commands, dispatch tools, block tool execution, rewrite tool results, transform terminal output, add image backends, and add dashboard tabs. +- **Webhook direct delivery** — push alerts to a platform chat without waking the LLM, ideal for uptime checks and event streams. -### 🔥 Cooking on `main` (past 72 hours, targeting v0.11) +### Still important from v0.9/v0.10 -The stream of merges between April 14–17 is unusually large — this is why we added five new parts to this guide: - -- **Gemini CLI OAuth inference provider** — OAuth login + 1500 req/day free tier. [#11270](https://github.com/NousResearch/hermes-agent/pull/11270). See [Part 9](./part9-custom-models.md#gemini-cli-oauth--free-1500-reqday). -- **Gemini TTS (7th voice provider)** — `gemini-2.5-flash-preview-tts`, Kore voice, native WAV. [#10922](https://github.com/NousResearch/hermes-agent/issues/10922). See [Part 9](./part9-custom-models.md#gemini-tts--7th-voice-provider). -- **Multi-model FAL image gen picker** — `hermes tools` now lets you pick FLUX / Imagen / SDXL variants without editing YAML. [#11265](https://github.com/NousResearch/hermes-agent/pull/11265). -- **Bulk file sync with sync-back on teardown** — SSH/Modal/Daytona remote sandboxes now download only diffed files on shutdown, with SIGINT-safe rollback and flock serialization. [#8018](https://github.com/NousResearch/hermes-agent/pull/8018). See **[Part 21 — Remote Sandboxes](./part21-remote-sandboxes.md)**. -- **TCP keepalives for provider connections** — dead connections detected in 60s instead of silently hanging. [#11277](https://github.com/NousResearch/hermes-agent/pull/11277). -- **GLM 5.1 in OpenCode Go catalogs** — fastest open-weights tool-use model now routable through OpenCode. [#11269](https://github.com/NousResearch/hermes-agent/pull/11269). -- **Azure OpenAI GPT-5.x via `/chat/completions`** — previously locked to `/responses`. [#10086](https://github.com/NousResearch/hermes-agent/pull/10086). -- **`concept-diagrams` skill** — auto-renders Mermaid diagrams for explanations. Merged April 17. -- **Feishu CARD-type WebSocket** — interactive cards finally work in enterprise Feishu deployments. -- **OCAS skill sync** (feature proposal, April 15) — sync your local skills to a central repo. Watch [#11363](https://github.com/NousResearch/hermes-agent/issues/11363). - -### 🆕 Brand-new parts in this guide (April 17) - -- **[Part 17 — MCP Servers](./part17-mcp-servers.md)** — the viral tool-integration standard. Finally documented for Hermes. GitHub, Postgres, Supabase, Cloudflare, mem0, writing your own, and the `sampling/createMessage` killer feature. -- **[Part 18 — Delegating to Coding Agents](./part18-coding-agents.md)** — Claude Code, Codex, Gemini CLI, OpenCode, and Aider. Print-mode delegation, thread-bound runtimes (the OpenClaw pattern), ACP as both client and server, cost routing, git isolation. -- **[Part 19 — Security Playbook](./part19-security-playbook.md)** — defending against the April 15 "Comment and Control" prompt-injection attack, plus the full Hermes hardening posture: provenance labels, approval layers, secrets isolation, webhook sig validation, SSRF guards, MCP trust levels, quarantine mode. -- **[Part 20 — Observability & Cost Control](./part20-observability.md)** — Langfuse, Helicone, OpenTelemetry → Phoenix. The cost-routing playbook that drops typical feature-implementation spend by 90%. Eval-driven regression protection. -- **[Part 21 — Remote Sandboxes & Bulk File Sync](./part21-remote-sandboxes.md)** — SSH, Modal, Daytona, Fly Machines, E2B. "Phone drives, beefy remote does the work." Sync-back on teardown with SIGINT safety. - -### Viral model / provider developments (past 72h, now in the guide) - -- **GPT-5.4** and **GPT-5.4-Cyber** (OpenAI, Apr 15) — reasoning flagship and the first LLM-as-a-security-analyst. See [Part 9 model cheat sheet](./part9-custom-models.md#flagship-model-cheat-sheet-april-17-2026). -- **Claude Mythos** (Anthropic, cyber-focused, invite-only). -- **Gemini 3 Flash Preview** (Google) — 1M context, low latency, $0.50/$3 per MTok. -- **Kimi K2.5** (Moonshot) — arguably the best price/quality ratio in coding models. -- **GLM 5.1** (z.ai) — the strongest open-weights tool-use model as of this week. +- **Local web dashboard** (`hermes dashboard`) — config, API keys, sessions, logs, analytics, cron, skills, models, plugins, and optional browser Chat. See [Part 12](./part12-web-dashboard.md). +- **Nous Tool Gateway** — Nous Portal subscribers can route web search, image generation, TTS, and browser automation through the subscription instead of juggling separate API keys. See [Part 13](./part13-tool-gateway.md). +- **Fast Mode** (`/fast`) and **guided compression** (`/compress `) still matter, but they are no longer the whole story; pair them with auxiliary model routing and `/steer`. See [Part 14](./part14-fast-mode-watchers.md). +- **MCP + coding-agent delegation + remote sandboxes** remain the high-leverage developer stack. See [Part 17](./part17-mcp-servers.md), [Part 18](./part18-coding-agents.md), and [Part 21](./part21-remote-sandboxes.md). --- @@ -172,19 +150,20 @@ The stream of merges between April 14–17 is unusually large — this is why we 7. [Context Compression](./part6-context-compression.md) — Fix the silent context loss bug, configure compression thresholds, survive long sessions 8. [Memory System](./part7-memory-system.md) — The three-tier memory architecture: persistent facts, conversation recall, procedural memory 9. [Subagent Patterns](./part8-subagent-patterns.md) — Orchestrator/worker delegation, ACP subagents, parallel task execution -10. [Custom Model Providers](./part9-custom-models.md) — Cerebras, Fireworks, Ollama, native xAI/MiMo/z.ai/Kimi/MiniMax/Arcee, Nous Portal, model aliases, fallback chains +10. [Custom Model Providers](./part9-custom-models.md) — Bedrock, Azure AI Foundry, LM Studio, Gemini OAuth, Codex OAuth, OpenRouter routing, model aliases, fallback chains 11. [SOUL.md Anti-Patterns](./part10-soul-antipatterns.md) — What makes an agent annoying vs useful, the formula that works 12. [Gateway Recovery](./part11-gateway-recovery.md) — Crash detection, auto-recovery, common failure modes, health checks -13. [Web Dashboard](./part12-web-dashboard.md) — **New.** `hermes dashboard`, the full browser UI — config, keys, sessions, logs, analytics, cron, skills, REST API, plugins -14. [Nous Tool Gateway](./part13-tool-gateway.md) — **New.** Web search, image gen, TTS, and browser automation through a single Nous Portal subscription -15. [Fast Mode & Background Watchers](./part14-fast-mode-watchers.md) — **New.** `/fast` priority tier, `watch_patterns` real-time process monitoring, pluggable context engine, `/compress ` -16. [New Platforms (iMessage, WeChat, Android)](./part15-new-platforms.md) — **New.** BlueBubbles/iMessage, Weixin/WeCom, Android via Termux — the full 16-platform lineup -17. [Backup, Import & `/debug`](./part16-backup-debug.md) — **New.** Portable `hermes backup`/`import`, `/debug` bundler, `hermes debug share`, security hardening -18. [MCP Servers](./part17-mcp-servers.md) — **NEW (April 17).** The viral tool-protocol standard. stdio + HTTP transports, sampling, the 14 MCP servers worth installing today, writing your own -19. [Delegating to Coding Agents](./part18-coding-agents.md) — **NEW (April 17).** Claude Code, Codex, Gemini CLI, OpenCode, Aider. Print-mode, thread-bound sessions (OpenClaw pattern), ACP, git isolation, cost routing -20. [Security Playbook](./part19-security-playbook.md) — **NEW (April 17).** Defending against "Comment and Control" prompt injection. Provenance labels, approval layers, secrets redaction, MCP trust model, quarantine mode -21. [Observability & Cost Control](./part20-observability.md) — **NEW (April 17).** Langfuse, Helicone, OpenTelemetry → Phoenix. The cost-routing playbook that drops spend 90%. Eval-driven regression -22. [Remote Sandboxes & Bulk File Sync](./part21-remote-sandboxes.md) — **NEW (April 17).** SSH, Modal, Daytona, Fly Machines, E2B. "Phone drives, beefy remote does the work." Diff-based sync-back on teardown +13. [Web Dashboard](./part12-web-dashboard.md) — `hermes dashboard`, browser Chat via real TUI, models/plugins tabs, config, keys, sessions, logs, analytics, cron +14. [Nous Tool Gateway](./part13-tool-gateway.md) — Web search, image gen, TTS, and browser automation through a single Nous Portal subscription +15. [Fast Mode & Background Watchers](./part14-fast-mode-watchers.md) — `/fast`, `/steer`, `/queue`, `watch_patterns`, pluggable context engine, `/compress ` +16. [New Platforms (iMessage, WeChat, Android)](./part15-new-platforms.md) — BlueBubbles/iMessage, Weixin/WeCom, QQBot, Yuanbao, Teams plugin, Android via Termux +17. [Backup, Import & `/debug`](./part16-backup-debug.md) — Portable `hermes backup`/`import`, `/debug` bundler, `hermes debug share`, security hardening +18. [MCP Servers](./part17-mcp-servers.md) — The tool-protocol standard. stdio + HTTP transports, sampling, trust boundaries, server shortlist, writing your own +19. [Delegating to Coding Agents](./part18-coding-agents.md) — Claude Code, Codex, Gemini CLI, OpenCode, Aider. Print-mode, orchestrator subagents, ACP, git isolation, cost routing +20. [Security Playbook](./part19-security-playbook.md) — Prompt-injection defense, provenance labels, approval layers, secrets redaction, MCP trust model, hardline blocks +21. [Observability & Cost Control](./part20-observability.md) — Langfuse plugin, Helicone, OpenTelemetry → Phoenix, auxiliary routing, eval-driven regressions +22. [Remote Sandboxes & Bulk File Sync](./part21-remote-sandboxes.md) — SSH, Modal, Daytona, Vercel Sandbox, Fly Machines, E2B. Diff-based sync-back on teardown +23. [Latest Power Moves](./part22-latest-power-moves.md) — Curator, TUI habits, context-file hygiene, plugins, dashboard Chat, cron chaining, and the 2026 upgrade checklist --- @@ -328,12 +307,12 @@ Supported providers and recommended models: |----------|-----------|----------|-------------| | **Nous Portal** | Hermes 5, Hermes 4 405B | Built-in [Tool Gateway](./part13-tool-gateway.md) — web search/image/TTS/browser with no extra keys | Auth via `hermes model` | | **Anthropic** | Opus 4.6, Sonnet 4 | Best reasoning, complex tasks, coding, `/fast` priority tier | `ANTHROPIC_API_KEY` | -| **OpenAI** | GPT-5.4 Pro, o3, GPT-4.1 | Strong tool use, fast inference, huge context, `/fast` priority tier | `OPENAI_API_KEY` | +| **OpenAI** | GPT-5-class, o-series, GPT-4.1 | Strong tool use, fast inference, huge context, `/fast` priority tier | `OPENAI_API_KEY` | | **Xiaomi MiMo** | MiMo V2 Pro *(native adapter)* | Fast, cheap, native reasoning modes, great for orchestration | `XIAOMI_API_KEY` | | **xAI** | Grok 3, Grok 3 Mini *(native adapter)* | Fast, good reasoning, native live-X search | `XAI_API_KEY` | | **Kimi / Moonshot** | Kimi 2.5 | Big context, excellent for entity extraction / LightRAG ingestion | `MOONSHOT_API_KEY` | | **z.ai / GLM** | GLM-5, GLM-5 Air | Strongest open-weights model, great for translation + tools | `ZAI_API_KEY` | -| **Google** | Gemini 3.1 Pro | Massive context (2M tokens), multimodal, cheap | `GEMINI_API_KEY` | +| **Google** | Gemini Pro/Flash | Massive context, multimodal, cheap; OAuth supported via `hermes model` | `GEMINI_API_KEY` or OAuth | | **MiniMax** | M2.7 | Good balance of speed and quality | `MINIMAX_API_KEY` | | **Cerebras** | Llama 4 Scout, Qwen 3 32B | Blazing fast inference (2000+ tok/s), cheap | `CEREBRAS_API_KEY` | | **Groq** | Llama 4, Qwen 3 | Very fast inference, limited context | `GROQ_API_KEY` | @@ -353,7 +332,7 @@ Run models on your own hardware for free. Recommended local models: | Nemotron 30B | 30B | Fine-tunable, good general purpose | 16GB | | nomic-embed-text | 274M | Free embeddings for memory search | 2GB | -> **Recommendation:** Use a cloud frontier model (Opus 4.6, GPT-5.4 Pro, Gemini 3.1 Pro) as your primary and a local Ollama model (Qwen 3.5, Gemma 4) for embeddings, fallback, and simple tasks. Best of both worlds. +> **Recommendation:** Use a cloud frontier model (Anthropic/OpenAI/Gemini) as your primary and a local Ollama or LM Studio model for embeddings, fallback, and simple tasks. Best of both worlds. You can configure **multiple providers** with automatic fallback. If one goes down, Hermes switches to the next. @@ -1770,6 +1749,7 @@ You've now got the full picture: - **[Part 3: LightRAG](#part-3-lightrag--graph-rag-that-actually-works)** — Graph-based knowledge - **[Part 4: Telegram](#part-4-telegram-setup-chat-from-anywhere)** — Mobile access - **[Part 5: On-the-Fly Skills](#part-5-on-the-fly-skills-let-hermes-build-its-own-playbook)** — Self-improving workflows +- **[Part 22: Latest Power Moves](./part22-latest-power-moves.md)** — Curator, TUI habits, plugins, and the current upgrade checklist Start with setup, add what you need, and let Hermes build the rest. diff --git a/ROADMAP.md b/ROADMAP.md index e652c5b..724bb0e 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -24,6 +24,7 @@ What's landing next. PRs welcome. ## Done (recent) +- ✅ 2026-04-30 — v0.11/v0.12 refresh: Curator, TUI, plugins, Bedrock/Azure/LM Studio, Teams/Yuanbao/QQBot, Vercel Sandbox, Part 22 - ✅ 2026-04-17 — Interactive config wizard (`docs/wizard/`) - ✅ 2026-04-17 — 4 reference architectures (homelab / solo-dev / small-agency / road-warrior) - ✅ 2026-04-17 — CI (markdown-link-check + yamllint + skill frontmatter validator) diff --git a/benchmarks/README.md b/benchmarks/README.md index 15be5ca..78b4815 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -2,7 +2,7 @@ Real, reproducible cost + latency benchmarks across flagship models, run on standardized tasks. This folder contains the **methodology**, the **task set**, and the **raw results**. -> ⚠ Benchmark numbers drift as providers re-price and models update. The committed data is dated. Re-run with `benchmarks/run.sh` (stub below) to refresh. +> ⚠ Benchmark numbers drift as providers re-price and models update. The committed data is a dated April 2026 snapshot. Re-run with `benchmarks/run.sh` (stub below) to refresh. --- @@ -28,7 +28,7 @@ Real, reproducible cost + latency benchmarks across flagship models, run on stan --- -## Current snapshot — 2026-04-17 +## Dated snapshot — 2026-04-17 Retail list prices; some providers may offer committed-use discounts. diff --git a/docs/outreach/blog-post-long.md b/docs/outreach/blog-post-long.md index 1c5764f..3b40d79 100644 --- a/docs/outreach/blog-post-long.md +++ b/docs/outreach/blog-post-long.md @@ -21,7 +21,7 @@ So I wrote the opposite. ## What "ships code" means -The [Hermes Optimization Guide](https://github.com/OnlyTerp/hermes-optimization-guide) has 21 chapters of documentation. That's the part that looks like every other guide. +The [Hermes Optimization Guide](https://github.com/OnlyTerp/hermes-optimization-guide) has 23 parts of documentation. That's the part that looks like every other guide. But it also has, in the same repo: @@ -52,14 +52,14 @@ Most guides stop before that because their authors never did the work, or did it This one gets its own section because it's the part readers care most about. -The default advice on cost is "use cheaper models". But you can't just set `default: gpt-5.4-mini` — for certain tasks (nuanced reasoning, long-context analysis, hard coding) it will silently hurt quality and you'll blame the framework. +The default advice on cost is "use cheaper models". But you can't just set the cheapest default model and forget it — for certain tasks (nuanced reasoning, long-context analysis, hard coding) it will silently hurt quality and you'll blame the framework. Here's what actually works, derived from our benchmarks: 1. **Triage** (~60% of traffic for a personal bot): Gemini 2.5 Flash. Cheap, fast, 1M context. Routes to the right skill or punts to the right model. 2. **Classification** (tagging, routing, spam-trap): Cerebras Llama 70B on a free tier. Effectively zero cost. -3. **Default coding:** Kimi K2.5. Cheapest competent coder, good for 80% of changes. -4. **Hard coding / architecture:** Anthropic Sonnet 4.5. Opt-in (say "use sonnet" or mark the skill with `model: anthropic/claude-sonnet-4-5`). +3. **Default coding:** Kimi/Moonshot. Cheap competent coder, good for routine changes. +4. **Hard coding / architecture:** Anthropic Sonnet. Opt-in (say "use sonnet" or mark the skill with `model: anthropic/claude-sonnet`). 5. **Long-context research:** Gemini 2.5 Pro. 1M context + reasoning. With prompt caching on (Anthropic, OpenAI), `prefer_cached: true` as a default, and Fast Mode *off* unless you explicitly need it — the typical user month drops from $150 to $20–40. diff --git a/docs/outreach/hacker-news-post.md b/docs/outreach/hacker-news-post.md index 0e05f56..52fbbf9 100644 --- a/docs/outreach/hacker-news-post.md +++ b/docs/outreach/hacker-news-post.md @@ -14,7 +14,7 @@ Author here. Context on what this is and why: Hermes (Nous Research, ~94K GH stars) is the agent framework I've been using for a year. Most of the existing community guides explain the architecture but don't give you anything to run — you read 15 parts, still have to write your own `config.yaml`, your own cron skills, your own systemd hardening. -This guide is the other direction: 21 parts of actual documentation *plus* +This guide is the other direction: 23 parts of actual documentation *plus* - **13 installable `SKILL.md` files** (audit-mcp, rotate-secrets, audit-approval-bypass, nightly-backup, weekly-dep-audit, cost-report, telegram-triage, pr-review, release-notes, daily-inbox-triage, hermes-weekly, spam-trap, meeting-prep) — drop them into `~/.hermes/skills/` or symlink them in - **5 opinionated configs** for the 5 real personas (minimum / telegram-bot / production / cost-optimized / security-hardened) — every non-obvious field commented @@ -24,7 +24,7 @@ This guide is the other direction: 21 parts of actual documentation *plus* - **Reproducible cost benchmarks** — 12 flagship models × 5 canonical tasks (triage / summarize / codefix / deepreason / bulk-extract), methodology included, rerun-able with `hermes evals run` - **ECOSYSTEM.md** — 40+ curated MCP servers / coding agents / dashboard plugins -The part I wanted to share specifically for HN: the **cost routing playbook** (Part 20) — five rules that drop typical agent spend ~90% (Gemini Flash for triage, Cerebras Llama for classification, Kimi K2.5 as default coder, Sonnet only when you explicitly opt in, Gemini 2.5 Pro for long-context). The benchmarks folder lets you verify yourself on your own workload. +The part I wanted to share specifically for HN: the **cost routing playbook** (Part 20) — five rules that drop typical agent spend ~90% (Gemini Flash for triage, Cerebras Llama for classification, Kimi/Moonshot as default coder, Sonnet only when you explicitly opt in, Gemini Pro for long-context). The benchmarks folder lets you verify yourself on your own workload. And the **defensive security playbook** (Part 19) — written after the Apr 15 "Comment and Control" cross-vendor prompt-injection disclosure that hit Claude Code + Gemini CLI + Copilot Agent. Seven layers: provenance labels, approval, secret isolation, webhook signatures, SSRF, MCP trust levels, quarantine profiles. If your coding agent reads arbitrary PR bodies or emails, this is the hardening posture I wish I'd had 6 months ago. diff --git a/docs/outreach/launch-tweet-thread.md b/docs/outreach/launch-tweet-thread.md index e4025fe..84adc54 100644 --- a/docs/outreach/launch-tweet-thread.md +++ b/docs/outreach/launch-tweet-thread.md @@ -7,7 +7,7 @@ **1/8** I got tired of Hermes guides that explain the architecture but don't give you anything to run, so I shipped the opposite: -21 parts of documentation **plus** 13 installable skills, 5 production configs, 4 reference architectures, a VPS bootstrap script, hardened systemd units, a reproducible cost benchmark, and an in-browser config wizard. +23 parts of documentation **plus** 13 installable skills, 5 production configs, 4 reference architectures, a VPS bootstrap script, hardened systemd units, a reproducible cost benchmark, and an in-browser config wizard. github.com/OnlyTerp/hermes-optimization-guide @@ -60,7 +60,7 @@ Part 19 is the defensive playbook: 7 layers (provenance, approval, secret isolat Cost routing playbook (Part 20) drops a typical workload by ~90%: - Triage → Gemini Flash or Cerebras - Classification → Cerebras Llama (~free) -- Default coding → Kimi K2.5 +- Default coding → Kimi/Moonshot - Hard coding → Sonnet (explicit opt-in) - Long context → Gemini 2.5 Pro diff --git a/docs/outreach/nous-upstream-pr-body.md b/docs/outreach/nous-upstream-pr-body.md index cc59c0a..ec6b2e7 100644 --- a/docs/outreach/nous-upstream-pr-body.md +++ b/docs/outreach/nous-upstream-pr-body.md @@ -19,7 +19,7 @@ Add a new section to `README.md` (just below "Documentation" or "Quick Start"): Independent guides written by Hermes users. These are not official, but have been vetted by maintainers for accuracy. -- [Hermes Optimization Guide](https://github.com/OnlyTerp/hermes-optimization-guide) — 21-part guide covering LightRAG, Telegram deployment, MCP, security hardening, cost routing, observability, and remote sandboxes. Ships installable skills, 5 production configs, a VPS bootstrap script, and reproducible cost benchmarks. +- [Hermes Optimization Guide](https://github.com/OnlyTerp/hermes-optimization-guide) — 23-part guide covering LightRAG, Telegram deployment, MCP, security hardening, cost routing, observability, and remote sandboxes. Ships installable skills, 5 production configs, a VPS bootstrap script, and reproducible cost benchmarks. _Maintain your own? Open a PR adding it here._ ```` @@ -30,7 +30,7 @@ _Maintain your own? Open a PR adding it here._ > > I've been writing a community optimization guide since v0.9.0 shipped, and have gotten enough "where should I link this so people can find it?" messages that I wanted to propose an upstream spot: a small **Community Guides** section in the README. > -> The guide itself is at https://github.com/OnlyTerp/hermes-optimization-guide — 21 parts of documentation, 13 installable `SKILL.md` files, 5 production configs, 4 reference architectures, a VPS bootstrap script, an in-browser config wizard, and a reproducible cost benchmark. MIT license. CHANGELOG + ROADMAP are real. I cross-check every release note on `main` and update within 72h. +> The guide itself is at https://github.com/OnlyTerp/hermes-optimization-guide — 23 parts of documentation, 13 installable `SKILL.md` files, 5 production configs, 4 reference architectures, a VPS bootstrap script, an in-browser config wizard, and a reproducible cost benchmark. MIT license. CHANGELOG + ROADMAP are real. I cross-check every release note on `main` and update within 72h. > > Totally understand if you'd rather maintain a separate page, or curate more carefully before pointing at third-party content. Happy to iterate on the section copy, add more guides as they show up, or even move the list to `docs/community.md` if that fits better. > diff --git a/docs/quickstart.md b/docs/quickstart.md index 117cf86..42cbce8 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -6,8 +6,9 @@ From zero to working Telegram bot. - A Linux, macOS, or WSL machine (anything with bash) - A Telegram account -- An Anthropic API key — [console.anthropic.com](https://console.anthropic.com/settings/keys) -- (Optional) A Google API key — [aistudio.google.com](https://aistudio.google.com/apikey) for free-tier routing +- An Anthropic API key for the default model +- A Google API key — [aistudio.google.com](https://aistudio.google.com/apikey) for Gemini Flash classification + LightRAG LLM in the Telegram template +- An OpenAI API key — [platform.openai.com/api-keys](https://platform.openai.com/api-keys) for LightRAG embeddings in the Telegram template ## Step 1 — Install Hermes @@ -41,7 +42,8 @@ Create `~/.hermes/.env`: ```bash cat > ~/.hermes/.env <<'EOF' ANTHROPIC_API_KEY=sk-ant-... -GOOGLE_API_KEY=AIza... +OPENAI_API_KEY=sk-... # required by telegram-bot.yaml for LightRAG embeddings +GOOGLE_API_KEY=AIza... # required by telegram-bot.yaml for Gemini Flash classification + LightRAG LLM TELEGRAM_ADMIN_BOT_TOKEN=1234567890:ABC... TELEGRAM_OWNER_ID=1234567 # your numeric ID from @userinfobot EOF @@ -76,6 +78,7 @@ Now try: ## Step 7 — Level up - **More platforms:** [Part 4 (Telegram deep-dive)](../part4-telegram-setup.md), [Part 15 (iMessage/WeChat/Android)](../part15-new-platforms.md) +- **Latest features:** [Part 22 (Curator, TUI, plugins)](../part22-latest-power-moves.md) - **Memory that reasons:** [Part 3 (LightRAG)](../part3-lightrag-setup.md) - **Tools:** [Part 17 (MCP servers)](../part17-mcp-servers.md) - **Coding agent driver:** [Part 18 (Claude Code, Codex, Gemini CLI)](../part18-coding-agents.md) diff --git a/docs/reference-architectures/homelab.md b/docs/reference-architectures/homelab.md index 4f04127..e2768c9 100644 --- a/docs/reference-architectures/homelab.md +++ b/docs/reference-architectures/homelab.md @@ -83,9 +83,9 @@ models: routing: - when: task == "reasoning" - use: anthropic/claude-sonnet-4-5 + use: anthropic/claude-sonnet - when: task == "coding" && complexity == "high" - use: anthropic/claude-sonnet-4-5 + use: anthropic/claude-sonnet gateways: cli: @@ -129,7 +129,7 @@ hermes /reload ## Honest tradeoffs - **Latency.** Local 70B Q4 ≈ 20–40 tok/s on a 3090. Flagship Sonnet ≈ 60–90 tok/s. Most "work" queries you won't notice; coding/deep reasoning you will. -- **Quality.** Current open models (Qwen 2.5 Coder, Llama 3.1 70B, Kimi K2.5 local) are *close* on many tasks, *behind* on long-context + nuanced reasoning. Routing lets you hand the hard stuff to Sonnet. +- **Quality.** Current open/local models (Qwen Coder, Llama, Kimi-class local models) are *close* on many tasks, *behind* on long-context + nuanced reasoning. Routing lets you hand the hard stuff to Sonnet. - **Patching.** You maintain the box. Enable unattended-upgrades (the bootstrap script does) and schedule monthly reboots. - **Reachability.** Tailscale is solid but means "no Tailscale = no Hermes". Keep a cellphone backup admin bot, or run a tiny cloud relay. - **Backups.** Set [`nightly-backup`](../../skills/ops/nightly-backup/SKILL.md) to write encrypted archives to a second physical disk — not the same RAID array. diff --git a/docs/reference-architectures/small-agency.md b/docs/reference-architectures/small-agency.md index fe14374..e3a9a2c 100644 --- a/docs/reference-architectures/small-agency.md +++ b/docs/reference-architectures/small-agency.md @@ -90,10 +90,10 @@ systemctl enable --now hermes@alice.service Use [`templates/config/production.yaml`](../../templates/config/production.yaml) as the base. Key rules: - **Triage** (most traffic): Cerebras Llama 70B — free-ish tier -- **Default coding:** Kimi K2.5 (cheapest competent coder) +- **Default coding:** Kimi/Moonshot (cheap competent coder) - **"Hard" coding / architecture:** Anthropic Sonnet — explicit opt-in - **Long-context research:** Gemini 2.5 Pro -- **Deep reasoning:** GPT-5.4 (opt-in) +- **Deep reasoning:** OpenAI reasoning model (opt-in) With weekly `cost-report` → Discord ops channel, cost anomalies surface before the invoice. diff --git a/docs/wizard/index.html b/docs/wizard/index.html index 4950f33..4dc90ed 100644 --- a/docs/wizard/index.html +++ b/docs/wizard/index.html @@ -75,13 +75,13 @@

Hermes Config Wizard

2. Default model

Can be overridden per-skill.

@@ -187,7 +187,7 @@

Hermes Config Wizard

rest of the form is pre-filled so Question 1 actually drives output. */ const PERSONA_PRESETS = { 'minimum': { - default_model: 'anthropic/claude-sonnet-4-5', + default_model: 'anthropic/claude-sonnet', memory: 'vector', gateways: { cli: true, telegram: false, discord: false, slack: false, email: false, webhook: false }, mcps: { github: false, postgres: false, cloudflare: false, linear: false, filesystem: false, mem0: false }, @@ -196,7 +196,7 @@

Hermes Config Wizard

crons: { backup: false, deps: false, cost: false, mcp: false }, }, 'telegram-bot': { - default_model: 'anthropic/claude-sonnet-4-5', + default_model: 'anthropic/claude-sonnet', memory: 'lightrag', gateways: { cli: true, telegram: true, discord: false, slack: false, email: false, webhook: false }, mcps: { github: true, postgres: false, cloudflare: false, linear: false, filesystem: false, mem0: false }, @@ -205,7 +205,7 @@

Hermes Config Wizard

crons: { backup: true, deps: false, cost: true, mcp: false }, }, 'production': { - default_model: 'anthropic/claude-sonnet-4-5', + default_model: 'anthropic/claude-sonnet', memory: 'lightrag', gateways: { cli: true, telegram: true, discord: true, slack: true, email: true, webhook: true }, mcps: { github: true, postgres: true, cloudflare: true, linear: true, filesystem: true, mem0: false }, @@ -223,7 +223,7 @@

Hermes Config Wizard

crons: { backup: true, deps: false, cost: true, mcp: false }, }, 'security-hardened': { - default_model: 'anthropic/claude-sonnet-4-5', + default_model: 'anthropic/claude-sonnet', memory: 'vector', gateways: { cli: true, telegram: true, discord: false, slack: false, email: false, webhook: true }, mcps: { github: true, postgres: false, cloudflare: false, linear: false, filesystem: false, mem0: false }, @@ -425,10 +425,10 @@

Hermes Config Wizard

} if (mcps.linear) { lines.push(` linear:`); - lines.push(` command: npx`); - lines.push(` args: [-y, "@linear/mcp-server-linear"]`); - lines.push(` env:`); - lines.push(` LINEAR_API_KEY: "$\{LINEAR_API_KEY\}"`); + lines.push(` url: https://mcp.linear.app/mcp`); + lines.push(` # OAuth is completed by the MCP client on first connection.`); + lines.push(` trust: trusted`); + lines.push(` allow_sampling: false`); } if (mcps.filesystem) { lines.push(` filesystem:`); @@ -507,8 +507,8 @@

Hermes Config Wizard

lines.push(` rules:`); lines.push(` - { when: "task.type == 'classify'", use: "cerebras/llama-3.1-70b" }`); lines.push(` - { when: "context.tokens > 200000", use: "google/gemini-2.5-pro" }`); - lines.push(` - { when: "task.type == 'code'", use: "moonshot/kimi-k2.5" }`); - lines.push(` - { when: "task.explicit_opt_in == 'sonnet'", use: "anthropic/claude-sonnet-4-5" }`); + lines.push(` - { when: "task.type == 'code'", use: "moonshot/kimi" }`); + lines.push(` - { when: "task.explicit_opt_in == 'sonnet'", use: "anthropic/claude-sonnet" }`); lines.push(` - { else: true, use: "google/gemini-2.5-flash" }`); lines.push(``); } diff --git a/part12-web-dashboard.md b/part12-web-dashboard.md index 37b6385..a269d92 100644 --- a/part12-web-dashboard.md +++ b/part12-web-dashboard.md @@ -1,6 +1,6 @@ # Part 12: The Local Web Dashboard (Stop Editing YAML) -*New in Hermes v0.9.0 (2026.4.13). The easiest way to run Hermes — a full browser-based control panel for everything you used to do in the terminal.* +*Introduced in v0.9 and substantially upgraded through v0.12. The dashboard is now a browser-based control panel plus an embedded real Hermes TUI, not just a YAML editor.* --- @@ -8,16 +8,18 @@ Before v0.9, managing Hermes meant: edit `config.yaml`, export env vars, grep through logs, and use the CLI to inspect sessions. Great for power users. Terrible for anyone new. -The new **web dashboard** (`hermes dashboard`) replaces all of that with a single browser UI: +The **web dashboard** (`hermes dashboard`) replaces most of that with a single browser UI: -- Live status of the gateway and all 16 platform adapters +- Live status of the gateway and all built-in/plugin platform adapters +- Browser Chat backed by the real `hermes --tui` - Form-based editor for every config field (all 150+ of them, auto-discovered from `DEFAULT_CONFIG`) +- Models tab for main + auxiliary model configuration - API key manager for providers, tools, and platforms - Full-text search across past sessions (FTS5) - Log tailer with level/component filters - Usage and cost analytics (daily token + cost breakdown, per-model) - Cron job management -- Skills and toolsets browser with enable/disable toggles +- Skills, Curator, plugins, and toolsets browser with enable/disable toggles Everything runs on `127.0.0.1` — no data leaves your machine. @@ -33,13 +35,13 @@ That's it. It starts a local server and opens `http://127.0.0.1:9119` in your de ### Install the Dependencies (One Time) -The dashboard uses FastAPI + Uvicorn + a React frontend: +The dashboard uses FastAPI + Uvicorn + a React frontend. The Chat tab also needs PTY support: ```bash -pip install hermes-agent[web] +pip install 'hermes-agent[web,pty]' ``` -If you installed with `hermes-agent[all]`, you're already done. The frontend auto-builds on first launch if `npm` is available. +If you installed with `hermes-agent[all]`, you're already done. The `web` extra brings FastAPI/Uvicorn; `pty` lets the Chat tab spawn `hermes --tui` behind a pseudo-terminal on Linux/macOS/WSL. The frontend auto-builds on first launch if `npm` is available. ### Options @@ -48,6 +50,8 @@ If you installed with `hermes-agent[all]`, you're already done. The frontend aut | `--port` | `9119` | Port to serve on | | `--host` | `127.0.0.1` | Bind address | | `--no-open` | — | Don't auto-open the browser | +| `--insecure` | off | Permit non-localhost binding; dangerous without a proxy/auth | +| `--tui` | off | Enable the in-browser Chat tab; also available via `HERMES_DASHBOARD_TUI=1` | ```bash # Custom port @@ -77,6 +81,18 @@ Live overview that auto-refreshes every 5 seconds: This is the page you leave open on a second monitor. +### Chat + +The Chat tab embeds the actual `hermes --tui` process through xterm.js. That matters: slash commands, approval prompts, clarify/sudo/secret prompts, skins, markdown streaming, tool-call cards, `/resume`, `/steer`, `/queue`, and TUI fixes appear here automatically because the dashboard is not maintaining a second chat implementation. + +Requirements: + +- Node.js for the Ink TUI bundle +- `ptyprocess` via `pip install 'hermes-agent[pty]'` +- POSIX PTY support: Linux, macOS, or WSL; native Windows Python is not supported for the embedded PTY + +Tip: launch from the Sessions page with the play icon to resume a past session directly into `/chat?resume=`. + ### Config Form-based editor for `config.yaml`. Fields are auto-discovered from `DEFAULT_CONFIG` and grouped into tabs: @@ -88,6 +104,8 @@ Form-based editor for `config.yaml`. Fields are auto-discovered from `DEFAULT_CO - **delegation** — subagent limits, reasoning effort - **memory** — provider, context injection settings - **approvals** — dangerous command mode (`ask` / `yolo` / `deny`) +- **plugins** — enabled/disabled plugin allowlists +- **curator** — schedule, pruning thresholds, pinned/archived behavior Dropdowns for known-value fields (terminal backend, skin, approval mode). Toggles for booleans. Text inputs for everything else. @@ -145,6 +163,17 @@ Usage and cost, computed from session history. Pick a time window (7 / 30 / 90 d If you're on the Nous Portal Tool Gateway (Part 13), gateway tool usage shows up here too. +### Models + +Use this page before you edit routing YAML by hand. It exposes: + +- Main model/provider selection +- Auxiliary models for compression, vision, title generation, session search, and curator +- Remote OpenRouter/Nous picker data when available +- Per-model usage analytics so "cheap default, expensive opt-in" stays honest + +This is the fastest way to stop wasting your best model on background summaries. + ### Cron Create and manage scheduled agent prompts. @@ -166,6 +195,23 @@ Browse, search, and toggle every skill and toolset. - **Toggle** — enable/disable individual skills per session - **Toolsets** — separate section showing built-in toolsets (file, web, browser), with active/inactive state, setup requirements, and the list of tools each one provides +### Plugins + +Plugins ship disabled. Use the dashboard to review what was discovered from bundled, user, project, pip, and Nix sources before enabling anything with hooks/tools. + +Good first enables: + +- `observability/langfuse` — trace LLM/tool calls to Langfuse +- `spotify` — native playback/queue/search tools +- `google_meet` — join, transcribe, speak, and follow up on Meet calls +- `hermes-achievements` — dashboard achievements from real session history + +Project-local plugins under `.hermes/plugins/` should stay disabled unless you trust the repository. + +### Curator + +v0.12 adds Curator controls for skill-library hygiene: run dry-runs, inspect proposed archives/merges, pin important skills, and review archived skills before restoring or deleting. See [Part 5](./part5-creating-skills.md#curator-v012-keep-the-skill-library-from-rotting) and [Part 22](./part22-latest-power-moves.md#1-turn-on-curator-before-your-skill-library-becomes-noise). + --- ## `/reload` — Pick Up `.env` Changes Live diff --git a/part13-tool-gateway.md b/part13-tool-gateway.md index d24ec1f..b4dbab2 100644 --- a/part13-tool-gateway.md +++ b/part13-tool-gateway.md @@ -1,6 +1,6 @@ # Part 13: The Nous Tool Gateway (One Subscription, Four Tools, Zero Extra Keys) -*New in Hermes v0.10.0 (2026.4.16). If you have a paid Nous Portal subscription, you already have web search, image generation, text-to-speech, and browser automation — you just haven't turned them on yet.* +*If you have a paid Nous Portal subscription, you already have web search, image generation, text-to-speech, and browser automation — you just haven't turned them on yet.* --- diff --git a/part14-fast-mode-watchers.md b/part14-fast-mode-watchers.md index 909b098..f69fd0c 100644 --- a/part14-fast-mode-watchers.md +++ b/part14-fast-mode-watchers.md @@ -1,6 +1,6 @@ # Part 14: Fast Mode & Background Watchers -*New in Hermes v0.9.0 (2026.4.13). Two small features with outsized impact: priority-tier inference on OpenAI and Anthropic, and real-time pattern matching on background process output.* +*Priority-tier inference, live background-process events, and the newer TUI controls that keep long sessions steerable instead of stuck.* --- @@ -10,7 +10,7 @@ Both OpenAI and Anthropic run **priority processing queues** for latency-sensitive traffic. Higher cost per token, but dramatically lower p50 and p99 latency — especially under load on reasoning models. -`/fast` toggles that priority tier per session. On supported models (GPT-5.4, Codex, Claude Opus 4.6, Claude Sonnet 4), flipping it on injects `service_tier: "priority"` into every outgoing request. +`/fast` toggles that priority tier per session. On supported OpenAI/Codex and Anthropic models, flipping it on injects `service_tier: "priority"` into outgoing requests. ### When to Use It @@ -65,6 +65,29 @@ Priority tier is more expensive per token. Watch the **Analytics** tab in the da --- +## `/steer`, `/queue`, and Background Turns + +The newer TUI makes long-running work much easier to control: + +| Command | Use it when | Pattern | +|---------|-------------|---------| +| `/steer ` | The agent is mid-run but drifting | "Continue, but don't edit generated files" | +| `/queue ` | You want the next task to start after the current one | "After tests pass, summarize the risk" | +| `/background ` | Fire off work without blocking the main chat | "Research alternatives while I keep coding" | +| `/busy` | You want to inspect what Hermes is doing | Check active runs/subagents | +| `/indicator` | The spinner/activity feed is too loud or too quiet | Toggle busy indicator style | + +Best practice: + +1. Use `/steer` for **constraints**, not brand-new goals. +2. Use `/queue` for dependent follow-ups. +3. Use `/background` for independent research or monitoring. +4. If the run touches files, keep follow-up prompts specific enough that Hermes can avoid clobbering its own edits. + +This is the practical replacement for repeatedly interrupting and restating the whole task. + +--- + ## Background Process Monitoring (`watch_patterns`) ### The Problem This Fixes diff --git a/part15-new-platforms.md b/part15-new-platforms.md index 18d96c6..634c6a8 100644 --- a/part15-new-platforms.md +++ b/part15-new-platforms.md @@ -1,12 +1,12 @@ -# Part 15: New Messaging Platforms (iMessage, WeChat, Android) +# Part 15: Messaging Platforms (iMessage, WeChat, QQBot, Yuanbao, Teams, Android) -*Hermes v0.9.0 (2026.4.13) — the "everywhere" release. Three new surfaces that dramatically expand where Hermes can run and who can talk to it.* +*Hermes' gateway is now a plugin host. v0.9 made Hermes "everywhere"; v0.11/v0.12 added QQBot, Tencent Yuanbao, and Microsoft Teams as the first plugin-shipped platform.* --- -## The 16-Platform Lineup +## The 18+ Platform Lineup -As of v0.9, the gateway ships adapters for: +As of v0.12, the gateway ships built-in adapters plus plugin-shipped platforms: | Platform | Mode | Notes | |----------|------|-------| @@ -17,6 +17,9 @@ As of v0.9, the gateway ships adapters for: | **iMessage (BlueBubbles)** | Webhook | **New in v0.9** | | **Weixin (WeChat personal)** | Long-poll | **New in v0.9** | | **WeCom (Enterprise WeChat)** | Webhook | **New in v0.9** | +| **QQBot** | WebSocket/Webhook | Added after the original v0.9 platform sweep | +| **Tencent Yuanbao** | Native gateway | **New in v0.12**, text + media delivery | +| **Microsoft Teams** | Plugin | **New in v0.12**, first plugin-shipped gateway platform | | Signal | REST via signal-cli | Self-hosted bridge | | DingTalk | Webhook | Corporate IM, China/APAC | | Feishu / Lark | Webhook | Corporate IM, ByteDance | @@ -33,8 +36,31 @@ All of them respect: - Tool Gateway routing (Part 13) - Cron delivery targets - The shared session database (Part 7) +- Pre-dispatch plugin hooks -This part covers the three brand-new adapters plus **Android / Termux** — running the agent itself on a phone. +This part covers the v0.9 adapters, the newer v0.12 surfaces, and **Android / Termux** — running the agent itself on a phone. + +## 2026 Update: QQBot, Yuanbao, and Teams + +### QQBot + +Use QQBot when your community already lives in QQ and you want the same approval/session model as Telegram or Discord. Treat QQ groups as untrusted input by default: keep allowlists tight, require approval for filesystem/network tools, and use [Part 19](./part19-security-playbook.md) for prompt-injection hardening. + +### Tencent Yuanbao + +Yuanbao is now a native gateway adapter with text and media delivery. It belongs in the same bucket as Weixin/WeCom: powerful in China/APAC workflows, but operationally different from Western SaaS bots. Verify media size limits and identity mapping before using it for production approvals. + +### Microsoft Teams Plugin + +Teams proves the v0.12 gateway-plugin architecture: new platforms no longer need to land inside `gateway/platforms/` to be usable. Enable only trusted platform plugins: + +```bash +hermes plugins list +hermes plugins enable teams +hermes gateway setup +``` + +Keep project-local plugins disabled unless the repository is trusted (`HERMES_ENABLE_PROJECT_PLUGINS=true` is intentionally opt-in). --- diff --git a/part16-backup-debug.md b/part16-backup-debug.md index dd6692f..b0db9e2 100644 --- a/part16-backup-debug.md +++ b/part16-backup-debug.md @@ -1,6 +1,6 @@ # Part 16: Backup, Import, and `/debug` — Your Recovery Kit -*New in Hermes v0.9.0 and v0.10.0. Two long-missing features finally shipped: first-class backup/import of your whole Hermes install, and a built-in diagnostic bundler you can share in bug reports.* +*First-class backup/import, debug bundles, update preflights, and the hardening details you need before you let Hermes run unattended.* --- @@ -138,12 +138,12 @@ sessions.db ### The New Diagnostic Flow -When something goes weird, the old flow was: grep through `~/.hermes/logs/`, paste 800 lines into a GitHub issue, hope you got the right ones. The v0.10 flow is: +When something goes weird, the old flow was: grep through `~/.hermes/logs/`, paste 800 lines into a GitHub issue, hope you got the right ones. The modern flow is: ```text You → /debug Collecting diagnostics… - ✓ Agent version: v0.10.0 (v2026.4.16) + ✓ Agent version: v0.12.0 (v2026.4.30) ✓ Platform: Linux 6.8.0 / Python 3.12.3 ✓ Gateway: running (3 adapters connected) ✓ Last 200 lines of agent.log @@ -225,10 +225,39 @@ Preserves detail relevant to the topic and aggressively compresses everything el --- -## Security Hardening (v0.9 + v0.10 Notes) +## Security Hardening Notes A handful of hardening changes landed in the "everywhere" + "gateway" releases worth calling out explicitly: +### v0.12 hardline blocklist + +Hermes now has a hardline blocklist for commands that should not be recoverable through casual approval prompts. Keep your own denylist too, but do not rely on "the model will know this is dangerous" for commands that delete homes, scrape credentials, or hit metadata services. + +Useful custom denylist additions: + +```yaml +security: + approval: + denylist: + - 'rm\s+-rf\s+(/|~|\$HOME)' + - 'curl\s+.+\|\s*(sh|bash)' + - '169\.254\.169\.254' + - 'cat\s+~?/?\.?ssh/' + - 'aws\s+s3\s+sync\s+.+\s+s3://' + - 'ssh-keyscan' +``` + +### `hermes update --check` before upgrades + +Before a major upgrade: + +```bash +hermes update --check +hermes backup +``` + +The preflight catches obvious incompatibilities and the backup gives you a rollback point for `HERMES_HOME`. + ### Webhook secrets validated on startup Every webhook-based adapter (Telegram, BlueBubbles, WeCom, Feishu, WeChat, generic Webhook) now validates its signing secret at gateway startup. A missing/empty/weak secret produces a startup error instead of silently accepting forged requests. @@ -280,4 +309,4 @@ You've now seen the full April 2026 feature surface: - [Part 14 — Fast Mode & Background Watchers](./part14-fast-mode-watchers.md) - [Part 15 — New Platforms (iMessage, WeChat, Android)](./part15-new-platforms.md) -If you installed fresh on v0.10.0 and walked through [Part 1](./part1-setup.md) and this series, you're running the most capable Hermes configuration to date. +If you installed fresh on v0.12.0 and walked through [Part 1](./part1-setup.md) and this series, you're running the most capable Hermes configuration to date. diff --git a/part17-mcp-servers.md b/part17-mcp-servers.md index 3e9d289..9501fee 100644 --- a/part17-mcp-servers.md +++ b/part17-mcp-servers.md @@ -107,13 +107,15 @@ Without a `tools_allowlist`, every tool the server exposes is available. These are the ones that pay for themselves within a day: +> **2026 reality check:** MCP is also a supply-chain boundary. Prefer official servers, pin package versions, restrict filesystem roots, and keep `allow_sampling: false` unless the server genuinely needs to call an LLM. + | Server | What it adds | Why you want it | |--------|--------------|-----------------| | **@modelcontextprotocol/server-github** | Issues, PRs, repo search, branch diffs | Hermes becomes a code-aware teammate | | **@modelcontextprotocol/server-filesystem** | Scoped file reads/writes/search | Safer than giving terminal access | | **@modelcontextprotocol/server-postgres** | Read-only SQL | Answer "what's in the db?" without exposing DSN | | **@modelcontextprotocol/server-sqlite** | Local SQLite analysis | Great for log files, analytics snapshots | -| **@modelcontextprotocol/server-puppeteer** | Browser automation | Complement to the Tool Gateway's Browser Use | +| **@modelcontextprotocol/server-puppeteer** | Browser automation | Complement to the Tool Gateway's Browser Use; sandbox it tightly | | **@modelcontextprotocol/server-memory** | Knowledge-graph memory | Pairs with [Part 3 LightRAG](./part3-lightrag-setup.md) for redundancy | | **mcp.mem0.ai** | Hosted long-term memory | Cross-device memory across Hermes + Claude Code | | **Cloudflare Observability MCP** | Query your Worker logs/analytics | If you run anything on Cloudflare | @@ -124,7 +126,7 @@ These are the ones that pay for themselves within a day: | **@browserbase/mcp** | Headless browser-as-a-service | Scraping sites Firecrawl can't handle | | **@chroma-core/chroma-mcp** | ChromaDB vectors | Works alongside LightRAG | -For the full catalog, see [modelcontextprotocol.io/servers](https://modelcontextprotocol.io/servers) and the `awesome-mcp-servers` list on GitHub. +For the full catalog, see the [MCP Registry](https://registry.modelcontextprotocol.io/) and the `awesome-mcp-servers` list on GitHub. --- diff --git a/part18-coding-agents.md b/part18-coding-agents.md index b7ad961..812a97d 100644 --- a/part18-coding-agents.md +++ b/part18-coding-agents.md @@ -12,7 +12,7 @@ Hermes is excellent at reasoning, memory, conversation, and workflow. It is *not |-------|-----------|------------| | **Claude Code** | Strongest at large refactors, test writing, PR reviews | Pro/Max OAuth or `ANTHROPIC_API_KEY` | | **Codex** (OpenAI) | Fast feedback loop, great at bug hunts, small edits | OAuth via `openai` CLI or `OPENAI_API_KEY` | -| **Gemini CLI** | 1M context — unbeatable for "read the whole repo" tasks | OAuth via `gemini auth` (free tier generous) | +| **Gemini CLI** | 1M context — unbeatable for "read the whole repo" tasks | OAuth via `gemini auth`; Hermes' own Gemini OAuth covers normal model-provider use | | **OpenCode** (anomalyco) | Open-source, routes to GLM/Kimi/MiMo cheaply | Bring any provider key | | **Aider** | Surgical git-based edits, smallest token footprint | Bring any provider key | @@ -33,7 +33,7 @@ codex auth login # Gemini CLI npm install -g @google/gemini-cli -gemini auth # Free tier: 1500 req/day +gemini auth # Only needed when delegating to Gemini CLI itself # OpenCode (Go variant preferred for Hermes) curl -fsSL https://opencode.ai/install.sh | bash @@ -101,7 +101,7 @@ Each specialist has a sweet spot. Let Hermes route: | Bug reproduction + fix in a single file | Codex | Fast turnaround, cheaper per task | | "Explain this codebase" | Gemini CLI | 1M context eats any repo whole | | Bulk surgical edits with deterministic diffs | Aider | Smallest token footprint, git-native | -| Anything on a budget | OpenCode + GLM 4.6 / Kimi K2 | One-tenth the cost of Claude for ~80% quality | +| Anything on a budget | OpenCode + GLM / Kimi | Much cheaper than frontier models for routine edits | A sensible `~/.hermes/config.yaml`: @@ -119,7 +119,7 @@ delegation: agent: gemini-cli - match: { budget: low } agent: opencode - model: glm-5.1 + model: zai/glm ``` --- @@ -128,7 +128,7 @@ delegation: What you actually want on your phone: a Telegram topic named "Claude Code" where every message lands in a persistent Claude Code session. No re-explaining context. No re-spawning. Just chat with the coding agent directly, with Hermes handling the transport, memory, and voice-to-text. -This is the feature request tracked in [#5394](https://github.com/NousResearch/hermes-agent/issues/5394) and already landing in bits across v0.9/v0.10. As of v0.10.0 the workflow is: +This pattern is now practical because v0.11 added orchestrator-role subagents, spawn-depth controls, and file-coordination between sibling workers. The workflow: ```bash # In Telegram, create a topic, then from the CLI or dashboard: @@ -138,6 +138,8 @@ hermes bind-thread --runtime claude-code --cwd ~/projects/myapp From that point: - Every message in the topic goes to a persistent Claude Code session - File edits happen in `~/projects/myapp` on the Hermes host +- Orchestrator subagents can spawn their own workers if `max_spawn_depth` allows it +- Concurrent workers coordinate file state instead of blindly overwriting siblings - `/unbind` in the topic detaches and reverts to normal Hermes chat - `/runtime gemini-cli` swaps the runtime without losing the thread diff --git a/part19-security-playbook.md b/part19-security-playbook.md index 9c211a6..45533df 100644 --- a/part19-security-playbook.md +++ b/part19-security-playbook.md @@ -17,6 +17,8 @@ Hermes is uniquely exposed because it takes input from **many** surfaces and has | GitHub MCP | PR titles, issue bodies, comments | Comment-and-Control pattern | | Web-scraped content | Page HTML the agent reads | "Read then act" injections | | Voice transcript | Whisper transcription | "Say the magic phrase" attacks | +| MCP/plugin package | Tool schema, stdout, hook behavior | Supply-chain prompt injection / token burn | +| Dashboard plugin | Browser UI + backend endpoints | Local secret/config exposure | The goal isn't to eliminate these channels — Hermes is *for* reading them. The goal is to make sure untrusted text can't cross a trust boundary into secrets, writes, or shell. @@ -82,6 +84,8 @@ security: - "chmod -R 777 /" - "curl * | sudo bash" - ".*/etc/shadow" + - "169.254.169.254" + - "ssh-keyscan" approval_channels: # Where the prompt shows up - telegram_private # Your personal DM, not the group - cli @@ -102,6 +106,10 @@ security: # DO NOT ADD: any subagent that reads Telegram, email, webhooks, or scraped web ``` +### v0.12 Hardline Blocks + +Hermes now has hardline command blocking for unrecoverable patterns. Treat it as the seatbelt, not the whole car: keep your own denylist, preserve private approval channels, and never route approvals back into the same untrusted group/chat that triggered the action. + --- ## Layer 3: Secrets Isolation diff --git a/part20-observability.md b/part20-observability.md index 6e0e4cf..94ee5d0 100644 --- a/part20-observability.md +++ b/part20-observability.md @@ -1,4 +1,4 @@ -# Part 20: Observability & Cost Control — Langfuse, Helicone, /usage, Routing Playbooks +# Part 20: Observability & Cost Control — Langfuse Plugin, Helicone, /usage, Routing Playbooks *You can't optimize what you can't see. Hermes tracks tokens, latency, and errors natively, but once you're running across CLI + Telegram + Discord + cron + coding-agent delegations, you want a real tracing stack. This part sets up Langfuse, Helicone, or OpenTelemetry → Phoenix with one config block, then gives you the cost-routing playbook that dropped our test deployment from $34 to $3 per feature implementation.* @@ -70,7 +70,11 @@ hermes logs export --since 30d --format jsonl \ ## Level 3 — Langfuse (Recommended Default) -Langfuse is the "everything in one place" option: tracing, prompt management, evals, self-hostable. If you're not sure where to start, start here. +Langfuse is the "everything in one place" option: tracing, prompt management, evals, self-hostable. If you're not sure where to start, start here. In v0.12, Langfuse also ships as a bundled observability plugin, so prefer enabling that over hand-rolled hooks. + +```bash +hermes plugins enable observability/langfuse +``` ### Setup (Hosted Cloud) @@ -176,36 +180,36 @@ Hermes emits `gen_ai.*` spans following the [OpenInference](https://github.com/A ### Rule 1: Route by Task Complexity, Not Default -Most Hermes cost bloat comes from using Claude Opus / GPT-5 for tasks Kimi / GLM / MiniMax would handle identically. Set up a **task-aware default**: +Most Hermes cost bloat comes from using your most expensive frontier model for tasks Gemini Flash, Kimi/Moonshot, GLM, MiniMax, Cerebras, or a local model would handle identically. Set up a **task-aware default**: ```yaml model_routing: default: - model: claude-sonnet-4-20250514 + model: claude-sonnet provider: anthropic routes: - match: { intent: [classification, extraction, triage, sum_under_500_tokens] } model: gemini-2.5-flash - provider: openrouter + provider: google - match: { intent: long_context, tokens_gte: 150000 } model: gemini-2.5-pro provider: openrouter - match: { intent: [write_code, refactor, debug], complexity: medium } - model: glm-5.1 + model: glm provider: zai - match: { intent: [write_code, refactor, debug], complexity: high } - model: claude-sonnet-4-20250514 + model: claude-sonnet provider: anthropic - match: { intent: [reasoning, math], complexity: high } - model: gpt-5.4 + model: reasoning provider: openai ``` Hermes classifies intent via a tiny prompt (~100 tokens) and routes accordingly. Empirically: -| Scenario | Naive default (Sonnet 4.5) | Routed | Savings | +| Scenario | Naive frontier default | Routed | Savings | |----------|----------------------------|--------|---------| -| Feature implementation (100 calls) | ~$34 | ~$3 (mostly Kimi) | 91% | +| Feature implementation (100 calls) | ~$34 | ~$3 (mostly Kimi/GLM) | 91% | | Long-doc summarization (10 calls, 200K each) | ~$42 | ~$4 (Gemini 2.5 Pro) | 90% | | Daily classification triage | ~$18/day | ~$1/day (Flash) | 94% | @@ -294,8 +298,8 @@ hermes evals dataset create telegram-support-flows hermes evals dataset add telegram-support-flows ~/.hermes/traces/support/*.json # Run on every release -hermes evals run telegram-support-flows --model claude-sonnet-4-20250514 -hermes evals run telegram-support-flows --model glm-5.1 # Check if cheaper model still passes +hermes evals run telegram-support-flows --model anthropic/claude-sonnet +hermes evals run telegram-support-flows --model zai/glm # Check if cheaper model still passes hermes evals compare ``` diff --git a/part21-remote-sandboxes.md b/part21-remote-sandboxes.md index a7988c2..3687247 100644 --- a/part21-remote-sandboxes.md +++ b/part21-remote-sandboxes.md @@ -1,6 +1,6 @@ -# Part 21: Remote Sandboxes & Bulk File Sync — SSH, Modal, Daytona +# Part 21: Remote Sandboxes & Bulk File Sync — SSH, Modal, Daytona, Vercel -*Running Hermes on a $5 VPS is great for chat. Running heavy coding work there is not. This part sets up the "phone drives, beefy remote does the work" pattern: Hermes lives on your small VPS, delegates execution to a disposable sandbox on SSH/Modal/Daytona, syncs files both ways, and tears it down when idle. Ships in v0.9+ with the [bulk file sync](https://github.com/NousResearch/hermes-agent/pull/8018) hardening that landed April 17, 2026.* +*Running Hermes on a $5 VPS is great for chat. Running heavy coding work there is not. This part sets up the "phone drives, beefy remote does the work" pattern: Hermes lives on your small VPS, delegates execution to a disposable sandbox on SSH/Modal/Daytona/Vercel, syncs files both ways, and tears it down when idle.* --- @@ -30,11 +30,12 @@ Hermes uploads your workspace on task start, delegates work, then downloads only | **SSH** | Your infra | Whatever your host costs | Homelab / always-on dev box | | **Modal** | Per-second compute | $0 (hibernate) | Bursty coding tasks, GPU work | | **Daytona** | Per-second workspace | $0 (hibernate) | Long-lived dev workspaces | +| **Vercel Sandbox** | Per-run / platform billing | $0 when unused | Webapp builds and isolated `execute_code` tasks | | **Fly Machines** | Per-second | $0 (stop) | Regional sandboxes near your users | | **E2B** | Per-second | $0 | Quick throwaway Python sandboxes | | **Local Docker** | Your hardware | N/A | Testing / development | -Hermes ships native support for SSH, Modal, and Daytona as of v0.9+. Fly Machines and E2B work via a thin `remote_exec` plugin. +Hermes ships native support for SSH, Modal, Daytona, and Vercel Sandbox. Fly Machines and E2B work via thin plugins. --- @@ -89,7 +90,7 @@ Under the hood on teardown: 5. Applies only changed files back to `~/.hermes`, with `fcntl.flock` serialization if another sandbox runs concurrently 6. SIGINT-safe — pressing Ctrl-C during sync rolls back cleanly -This is what PR [#8018](https://github.com/NousResearch/hermes-agent/pull/8018) (merged April 17) formalized. Before it, you either rsynced everything every time (slow) or lost remote-made edits on teardown. +This is the hardening that made remote sandboxes safe enough for real coding work. Before diff-based sync-back, you either rsynced everything every time (slow) or lost remote-made edits on teardown. --- @@ -164,7 +165,32 @@ sandboxes: pull_on_command: "/sync-home" # Manual sync when you want it ``` -Pair with the [Gemini CLI OAuth provider](./part9-custom-models.md) (merged PR [#11270](https://github.com/NousResearch/hermes-agent/pull/11270), April 16) for free-tier Gemini use inside the sandbox — the 1500 req/day free tier covers most exploratory work. +Pair with the [Gemini OAuth provider](./part9-custom-models.md#gemini-oauth--free-tier-friendly) for free-tier-friendly long-context reads inside the sandbox. + +--- + +## Vercel Sandbox (Web Builds / Isolated Code Execution) + +Vercel Sandbox is now a native backend for `execute_code` and terminal-style runs. Use it when the task is webapp-shaped: install dependencies, run a build, inspect generated output, and throw the environment away. + +```yaml +sandboxes: + vercel-web: + backend: vercel + project: my-webapp + timeout: 1800 + sync: + push: ~/projects/my-webapp + pull_on_teardown: true + pull_paths: + - . + ignore: + - node_modules + - .next + - dist +``` + +It is not a replacement for Daytona if you want a persistent dev workspace. Treat it as a clean execution target for builds, tests, and short isolated scripts. --- diff --git a/part22-latest-power-moves.md b/part22-latest-power-moves.md new file mode 100644 index 0000000..d9c6210 --- /dev/null +++ b/part22-latest-power-moves.md @@ -0,0 +1,175 @@ +# Part 22: Latest Power Moves — Curator, TUI, Plugins, Context Files + +*If you already know Hermes but missed the v0.11/v0.12 wave, read this part first. These are the changes that most improve daily usage.* + +--- + +## 1. Turn On Curator Before Your Skill Library Becomes Noise + +Agent-created skills are valuable until the library fills with duplicates, stale CLI flags, and one-off task notes. Curator is the v0.12 maintenance loop for that. + +```bash +hermes curator run --dry-run +hermes curator run +hermes curator enable +``` + +Use it like this: + +- Pin production runbooks and skills you personally rely on. +- Let Curator archive weak/duplicate agent-created skills. +- Run a dry-run after upgrades or big workflow changes. +- Restore archived skills instead of recreating them from memory. + +Curator should prune skills, not decide project policy. Put durable project rules in context files. + +--- + +## 2. Use the TUI as Your Daily Driver + +`hermes --tui` is now the primary power-user interface. It is not just prettier output; it changes how you steer long runs. + +```bash +hermes --tui +``` + +Habits that pay off: + +- Use `/steer ` when the agent is mid-run but drifting. +- Use `/queue ` for dependent follow-ups. +- Use `/background ` for independent research or monitoring. +- Use `/resume`, then delete stale sessions from the picker with `d`. +- Use `/reload` after editing `.env`; avoid restarting the session just to pick up keys. +- Toggle `/mouse` if your terminal/ConPTY injects phantom mouse events. + +If the dashboard Chat tab is enabled, it embeds the same TUI through a PTY, so improving your TUI workflow also improves the browser workflow. + +--- + +## 3. Clean Up Context Files + +Hermes now reads common agent instruction files, including `.hermes.md`, `AGENTS.md`, `CLAUDE.md`, `SOUL.md`, and `.cursorrules`. + +Use them for different jobs: + +| File | Put this there | Avoid | +|------|----------------|-------| +| `.hermes.md` | Hermes-specific repo workflow, commands, approval expectations | Generic company policy | +| `AGENTS.md` | Cross-agent coding instructions | Personal style/personality | +| `SOUL.md` | Tone, boundaries, durable preferences | Build commands and API docs | +| `.cursorrules` | Editor/Cursor compatibility | Secrets or credentials | + +Best pattern: + +1. Keep root instructions short. +2. Add subdirectory-specific files only where behavior changes. +3. Store secrets in `.env` or provider auth stores, never context files. +4. Use skills for procedures, memory for facts, and context files for policy. + +--- + +## 4. Use Plugins for Integrations, Not One-Off Scripts + +v0.12 made plugins the right abstraction for tools, hooks, slash commands, dashboard tabs, and gateway platforms. + +```bash +hermes plugins list +hermes plugins enable observability/langfuse +hermes plugins enable spotify +``` + +Bundled plugins worth reviewing: + +| Plugin | Why enable it | +|--------|---------------| +| `observability/langfuse` | Trace LLM/tool calls without writing custom hooks | +| `spotify` | Native playback, queue, search, playlists, devices | +| `google_meet` | Join calls, transcribe, speak, and generate follow-ups | +| `hermes-achievements` | Dashboard achievements from session history | +| image-gen backends | Extra OpenAI/Codex/xAI image routes | + +Security posture: + +- Plugins are disabled by default; keep it that way. +- Enable only trusted bundled/user plugins. +- Enable project-local plugins only for trusted repos. +- Treat hooks as code execution, not "just configuration." + +--- + +## 5. Split Main and Auxiliary Models + +The dashboard and `hermes model` now expose auxiliary model configuration. Use it. + +| Job | Good default | +|-----|--------------| +| Main agent | Your preferred coding/reasoning model | +| Compression | Cheap fast model | +| Vision | A model with actual image capability | +| Session search | Cheap summarizer/search-capable model | +| Title generation | Cheapest reliable model | +| Curator | Cheap model with enough context for skill review | + +This avoids spending premium tokens on titles, compression, and housekeeping. + +--- + +## 6. Chain Cron Jobs Instead of Repeating Context + +Cron is no longer just "run this prompt every morning." Use: + +- Per-job `workdir` for project-aware jobs. +- Per-job `enabled_toolsets` to shrink tool/context overhead. +- `context_from` to feed one job's output into the next. +- Webhook direct delivery for zero-LLM notifications. + +Example pattern: + +```yaml +cron: + jobs: + collect-build-status: + schedule: "*/30 * * * *" + workdir: ~/projects/app + enabled_toolsets: [terminal] + prompt: "Run the build status check and summarize failures only." + notify-build-status: + schedule: "*/30 * * * *" + context_from: collect-build-status + deliver: telegram_private + prompt: "Notify only if the upstream job found failures." +``` + +--- + +## 7. Upgrade Checklist for Existing Installs + +Before moving an older v0.9/v0.10 setup to v0.12: + +```bash +hermes update --check +hermes backup +hermes --version +hermes doctor +``` + +Then: + +1. Open `hermes dashboard`. +2. Configure main + auxiliary models. +3. Enable only the plugins you actually need. +4. Run `hermes curator run --dry-run`. +5. Test one gateway message, one tool call, one skill, and one cron job. +6. Review [Part 19](./part19-security-playbook.md) before enabling broad platform access. + +--- + +## What to Ignore + +Some old advice is no longer worth optimizing around: + +- Do not install external Gemini CLI just for Gemini auth; Hermes can do OAuth itself. +- Do not fork the dashboard for a custom tab; write a dashboard plugin. +- Do not keep a giant SOUL.md full of procedures; use skills and Curator. +- Do not use one expensive default model for every auxiliary task. +- Do not expose the dashboard publicly without a real reverse proxy and auth layer. diff --git a/part4-telegram-setup.md b/part4-telegram-setup.md index da8e49c..d6bb580 100644 --- a/part4-telegram-setup.md +++ b/part4-telegram-setup.md @@ -1,24 +1,24 @@ # Part 4: Telegram Setup (Chat From Anywhere) -*Connect Hermes to Telegram for mobile access, voice memos, group chats, and scheduled task delivery. This is the most battle-tested of the 16 messaging adapters — start here, branch out to the others as needed.* +*Connect Hermes to Telegram for mobile access, voice memos, group chats, and scheduled task delivery. This is the most battle-tested of the 18+ messaging adapters — start here, branch out to the others as needed.* --- -## The 16-Platform Gateway +## The 18+ Platform Gateway -As of v0.9.0 (April 2026), the Hermes gateway ships adapters for **16 platforms**. They all share the same session DB, the same `/fast` toggle, the same Tool Gateway plumbing, and the same cron delivery mechanism: +As of v0.12.0 (April 2026), the Hermes gateway ships adapters/plugins for **18+ platforms**. They all share the same session DB, the same `/fast` toggle, the same Tool Gateway plumbing, and the same cron delivery mechanism: | Flagship | New in v0.9 | Enterprise / regional | Self-hosted / generic | |----------|-------------|-----------------------|-----------------------| | Telegram (this part) | iMessage (BlueBubbles) | DingTalk | Signal | | Discord | WeChat / Weixin | Feishu / Lark | Matrix | | Slack | WeCom | Mattermost | SMS (Twilio) | -| WhatsApp | | | Email (IMAP+SMTP) | -| | | | Home Assistant | +| WhatsApp | QQBot | Microsoft Teams | Email (IMAP+SMTP) | +| | Tencent Yuanbao | | Home Assistant | | | | | Webhook (generic) | - For **iMessage, WeChat, and Android/Termux**, see [Part 15](./part15-new-platforms.md). -- For **gateway crash recovery** and health checks across all 16, see [Part 11](./part11-gateway-recovery.md). +- For **gateway crash recovery** and health checks across all platforms, see [Part 11](./part11-gateway-recovery.md). - For the browser UI that manages every platform's state, see [Part 12](./part12-web-dashboard.md). --- diff --git a/part5-creating-skills.md b/part5-creating-skills.md index b3df145..ac16f04 100644 --- a/part5-creating-skills.md +++ b/part5-creating-skills.md @@ -114,6 +114,43 @@ Hermes patches the skill with new information using `skill_manage(action='patch' --- +## Curator (v0.12): Keep the Skill Library From Rotting + +The old skill failure mode was predictable: after a month of "save that as a skill," `~/.hermes/skills/` filled with duplicates, stale commands, and one-off notes that should have been memory. Hermes v0.12 adds **Curator** to clean that up. + +Run it manually: + +```bash +hermes curator run --dry-run +hermes curator run +``` + +Or enable the default weekly schedule: + +```bash +hermes curator enable +hermes curator status +``` + +What Curator does: + +- **Scores skills** for freshness, usage, clarity, overlap, and safety. +- **Merges duplicates** instead of letting near-identical workflows compete. +- **Archives dead skills** without deleting them; restore if it was too aggressive. +- **Pins important skills** so core workflows survive pruning. +- **Focuses on agent-created skills** first, not bundled/vendor skills. + +Good operating pattern: + +1. Pin your production runbooks and irreplaceable workflows. +2. Run `hermes curator run --dry-run` after major upgrades. +3. Let it archive one-off skills, not memory facts or project instructions. +4. Ask Hermes to update a skill immediately after a failed run; don't wait for Curator to infer the fix later. + +Curator is a librarian, not a teammate. It keeps the shelves useful; you still decide what knowledge is important. + +--- + ## Skill Structure Every skill is a directory with a `SKILL.md` file: diff --git a/part9-custom-models.md b/part9-custom-models.md index 8ca0996..d043631 100644 --- a/part9-custom-models.md +++ b/part9-custom-models.md @@ -1,33 +1,38 @@ # Part 9: Custom Model Providers (Use Any Model You Want) -*Hermes supports any OpenAI-compatible API, plus first-class native adapters for Nous Portal, xAI, Xiaomi MiMo, Kimi/Moonshot, z.ai/GLM, MiniMax, Arcee, Hugging Face, Cerebras, Groq, Fireworks, and Ollama. OAuth providers landing post-v0.10 add Gemini CLI (free tier: 1500 req/day), Qwen, and Claude Code Pro/Max. This is the up-to-date (April 17, 2026) cheat sheet.* +*Hermes supports any OpenAI-compatible API, plus first-class native adapters for Nous Portal, Anthropic, OpenAI/Codex, OpenRouter, AWS Bedrock, Azure AI Foundry, Google Gemini, Gemini OAuth, LM Studio, xAI, Xiaomi MiMo, Kimi/Moonshot, z.ai/GLM, MiniMax, Arcee, GMI Cloud, Tencent TokenHub, Hugging Face, Cerebras, Groq, Fireworks, and Ollama. This is the April 30, 2026 cheat sheet.* -> **What's new since v0.10.0** — [Gemini CLI OAuth inference provider](https://github.com/NousResearch/hermes-agent/pull/11270) (#11270), [Gemini TTS provider](https://github.com/NousResearch/hermes-agent/pull/10922), [multi-model FAL image gen](https://github.com/NousResearch/hermes-agent/pull/11265), [GLM 5.1 in OpenCode Go catalogs](https://github.com/NousResearch/hermes-agent/pull/11269), [Azure OpenAI GPT-5.x on chat/completions](https://github.com/NousResearch/hermes-agent/pull/10086), plus [TCP keepalives](https://github.com/NousResearch/hermes-agent/pull/11277) that detect dead provider connections before you notice the hang. All shipping on `main`, targeted for v0.11. +> **What's new since the v0.10 guide refresh** — Gemini OAuth is now built into `hermes model` (no separate CLI install), AWS Bedrock uses the native Converse API, Azure AI Foundry auto-detects OpenAI vs Anthropic transports, LM Studio has `hermes doctor` checks and live `/models`, MiniMax OAuth uses PKCE, and OpenRouter/Nous model pickers update from a remote manifest instead of a hardcoded release snapshot. --- ## Native Adapters vs Generic OpenAI-Compatible -As of v0.10.0 (April 2026), Hermes ships **native adapters** for a growing list of providers. Native adapters know about provider-specific features that a generic OpenAI-compatible wrapper can't: +As of v0.12.0 (April 2026), Hermes ships **native adapters** for a large provider set. Native adapters know about provider-specific features that a generic OpenAI-compatible wrapper can't: | Provider | Native adapter? | Notable feature | |----------|-----------------|-----------------| | **Nous Portal** | Yes | Auth via `hermes model` (no bare API key). Unlocks the [Tool Gateway](./part13-tool-gateway.md). | | **Anthropic** | Yes | Native prompt caching, extended thinking, `/fast` priority tier | | **OpenAI** | Yes | Native responses API, reasoning effort levels, `/fast` priority tier | -| **xAI (Grok)** | **Yes, new in v0.10** | Native **live X/Twitter search** as a built-in tool | -| **Xiaomi MiMo** | **Yes, new in v0.10** | Native reasoning modes (`low`/`medium`/`high`) exposed as config | -| **Kimi / Moonshot** | Yes | 200K+ context, great for LightRAG entity extraction (see [Part 3](#part-3-lightrag--graph-rag-that-actually-works)) | -| **z.ai / GLM** | Yes | **GLM 5.1** (added to OpenCode Go catalogs [#11269](https://github.com/NousResearch/hermes-agent/pull/11269)) — currently strongest open-weights model for tool use | +| **OpenAI Codex OAuth** | Yes | ChatGPT/Codex login through `hermes model`, no API key | +| **AWS Bedrock** | Yes | Converse API, IAM credentials, cross-region inference profiles, Bedrock Guardrails | +| **Azure AI Foundry** | Yes | Auto-detects OpenAI-style vs Anthropic-style deployments and context length | +| **LM Studio** | Yes | Local `/models` discovery, optional auth, reasoning transport, `hermes doctor` checks | +| **xAI (Grok)** | Yes | Native live X search and xAI image/STT/TTS integrations | +| **Xiaomi MiMo** | Yes | Native reasoning modes (`low`/`medium`/`high`) exposed as config | +| **Kimi / Moonshot** | Yes | 200K+ context, great for LightRAG entity extraction (see [Part 3](./README.md#part-3-lightrag--graph-rag-that-actually-works)) | +| **z.ai / GLM** | Yes | Strong open-weight tool-use models; good cheap fallback for planning/exploration | | **Google Gemini (direct)** | Yes | 1M context; native prompt caching on Gemini 2.5 Pro | -| **Google Gemini CLI (OAuth)** | **Yes, new post-v0.10** | OAuth via `gemini auth` — **1500 requests/day free tier**. [#11270](https://github.com/NousResearch/hermes-agent/pull/11270) | -| **MiniMax** | Yes | M2.7 — balanced speed/quality; native streaming | +| **Google Gemini (OAuth)** | Yes | Browser PKCE login via `hermes model`; free tier supported; no external `gemini` install | +| **MiniMax** | Yes | API key or OAuth; native streaming and TTS | +| **GMI Cloud** | Yes | Hosted open models behind a native provider | +| **Tencent TokenHub** | Yes | Tencent model routing through TokenHub aliases | | **Arcee** | Yes | AFM-4.5 function-calling specialist, cheap | | **Cerebras** | Yes | 2000+ tok/s inference | | **Groq** | Yes | Fast hosted Llama / Qwen | -| **Qwen (OAuth)** | Yes | OAuth via portal-request flow, free-tier available | | **Fireworks** | Yes | Qwen3-Embedding-8B (recommended for LightRAG) | -| **Azure OpenAI** | Yes | GPT-5.x now via `/chat/completions` (was `/responses` only) [#10086](https://github.com/NousResearch/hermes-agent/pull/10086) | +| **Vercel AI Gateway** | Yes | Dynamic model discovery, pricing metadata, attribution | | **Hugging Face** | Yes | Any TGI / TEI endpoint (self-hosted or Inference Endpoints) | | **OpenRouter** | Yes | Pass-through to 200+ models; respects native adapter quirks when downstream is one | | **Ollama** (local) | Generic | OpenAI-compatible, zero auth | @@ -35,30 +40,22 @@ As of v0.10.0 (April 2026), Hermes ships **native adapters** for a growing list Pick the native adapter when one exists — you get the provider-specific features for free. Fall back to the generic OpenAI-compatible path only for endpoints that don't have a native adapter yet. -### Flagship Model Cheat Sheet (April 17, 2026) - -For the "which model should I pick right now?" question, this is the current state of the world: - -| Model | Provider | Input / Output ($/MTok) | Context | Best for | -|-------|----------|------------------------|---------|----------| -| **Claude Sonnet 4.5** | Anthropic | $3 / $15 | 200K | Default for coding, refactor, multi-step reasoning | -| **Claude Opus 4** | Anthropic | $15 / $75 | 200K | The hardest reasoning only; $15/MTok stings fast | -| **Claude Mythos** (Cyber) | Anthropic | Invite-only | 200K | Security research — vulnerability discovery, malware triage | -| **GPT-5.4** | OpenAI | $5 / $20 | 256K | Reasoning heavy-lift, agentic long chains | -| **GPT-5.4-Cyber** | OpenAI | Trusted Access only | 256K | Defensive cybersec workflows, reverse engineering | -| **GPT-5.4 Mini** | OpenAI | $0.60 / $4.80 | 256K | Cheap reasoning fallback | -| **Gemini 2.5 Pro** | Google / OpenRouter | $1.25 / $10 | 1M | Long-context, whole-repo reads, research synthesis | -| **Gemini 3 Flash Preview** | Google / OpenRouter | $0.50 / $3 | 1M | Fast agentic reasoning with 1M window | -| **Gemini 2.5 Flash** | Google / OpenRouter | $0.30 / $2.50 | 1M | Classification, triage, bulk extraction | -| **Kimi K2.5** | Moonshot | ~$0.15 / $2.50 | 200K | Best price/quality for coding in 2026 | -| **GLM 5.1** | z.ai | ~$0.20 / $2 | 128K | Strongest open-weights tool use | -| **xAI Grok 4** | xAI | $3 / $15 | 256K | Native live-X search; current-events questions | -| **Xiaomi MiMo** | Xiaomi | $0.50 / $3 | 200K | Three-mode reasoning toggle (low/med/high) | -| **MiniMax M2.7** | MiniMax | $10/mo flat | 256K | Flat-rate users doing bulk work | -| **Cerebras Llama 3.3 70B** | Cerebras | $0.60 / $0.60 | 128K | 3000+ tok/s — interactive chat, fast classification | -| **Local Nemotron 30B** | Ollama | Free | 128K | Privacy, offline, embedding, session search | - -> Prices are current per-provider retail as of April 17, 2026. Batch and prompt-caching discounts are not included — stack them via [Part 20](./part20-observability.md#rule-2-prompt-caching-is-free-money). +### Provider Cheat Sheet (April 30, 2026) + +The exact "best model" moves weekly, so treat this as a routing posture rather than a leaderboard. Use `hermes model` for live picker data, then pin only what you need reproducible. + +| Need | Start here | Why | +|------|------------|-----| +| Default coding / refactors | Anthropic Sonnet or Codex OAuth | Best reliability for patch-heavy work; Codex OAuth avoids API-key churn | +| Deep reasoning / high stakes | OpenAI reasoning or Anthropic Opus-class | Use explicitly; do not make it the default for cron/bulk tasks | +| Long-context repo or document reads | Gemini Pro/Flash or OpenRouter equivalent | Huge window, cheap enough for map/reduce and summarization | +| Cheap daily driver | Gemini OAuth + Kimi/Moonshot + z.ai/GLM | Good quality/cost mix, especially with auxiliary routing | +| Enterprise / VPC / compliance | AWS Bedrock or Azure AI Foundry | IAM/Azure auth, guardrails, private deployments, audit controls | +| Local/privacy/offline | LM Studio or Ollama | No cloud egress; great for extraction, embeddings, and drafts | +| Ultra-fast interactive turns | Cerebras or Groq | Very high tokens/sec; useful for classification and short-form chat | +| Current-events search | xAI Grok or tool-backed web search | Grok has native live-X search; Tool Gateway can cover broader web | + +> Pricing and context windows change too quickly to hardcode. Hermes now pulls OpenRouter and Nous Portal picker lists from a remote manifest, while provider APIs supply pricing/context metadata where available. --- @@ -73,22 +70,51 @@ hermes model If you're on a paid subscription, the setup also offers to enable the [Tool Gateway](./part13-tool-gateway.md) — web search, image gen, TTS, and browser automation through your subscription, no extra keys needed. -### Gemini CLI OAuth — Free 1500 req/day +### Gemini OAuth — Free-Tier Friendly + +If you have a Google account, skip the API key entirely and sign in from Hermes: + +```bash +hermes model +# Pick "Google Gemini (OAuth)" → complete the browser PKCE flow +``` + +Tokens are stored under `~/.hermes/auth/google_oauth.json` with 0600 permissions and automatic refresh. On headless SSH boxes, Hermes falls back to paste-mode auth. + +### AWS Bedrock and Azure AI Foundry — Enterprise Routing Without Proxy Glue + +Bedrock uses the native Converse API and the normal boto3 credential chain: + +```bash +pip install 'hermes-agent[bedrock]' +hermes model +# Choose "AWS Bedrock" → region → model/profile +``` + +Use this when you want IAM roles, Bedrock Guardrails, and cross-region inference profiles instead of direct vendor API keys. -If you have a Google account, skip the API key entirely and sign in with OAuth: +Azure AI Foundry handles both endpoint styles: ```bash -npm install -g @google/gemini-cli -gemini auth hermes model -# Pick "Gemini CLI (OAuth)" — Hermes detects the logged-in session +# Choose "Azure Foundry" → paste endpoint + key ``` -Hermes drives Gemini via the local CLI. You get 1500 requests/day on the free tier — plenty for exploration, classification, and Gemini's killer long-context reads. Merged in [#11270](https://github.com/NousResearch/hermes-agent/pull/11270) (April 16, 2026). +Hermes probes the endpoint, detects OpenAI-style `/chat/completions` vs Anthropic-style `/messages`, discovers deployments when possible, and stores the right `api_mode` in `config.yaml`. + +### Remote Model Catalog: Stop Hardcoding This Week's Winner + +OpenRouter and Nous Portal model pickers now fetch: + +```text +https://hermes-agent.nousresearch.com/docs/api/model-catalog.json +``` -### Gemini TTS — 7th Voice Provider +The cache lives at `~/.hermes/cache/model_catalog.json`. If the manifest is down, Hermes falls back to the disk cache or the bundled snapshot, so model selection still works offline. -As of [#10922](https://github.com/NousResearch/hermes-agent/issues/10922) (merged April 16), Gemini joins Edge, ElevenLabs, OpenAI, MiniMax, Mistral, and NeuTTS as a TTS backend: +### Gemini TTS + +Gemini is now one of the practical voice backends alongside Edge, ElevenLabs, OpenAI, MiniMax, Mistral, NeuTTS, and xAI: ```yaml tts: @@ -109,7 +135,7 @@ Models are configured in `~/.hermes/config.yaml`: ```yaml # Default model -model: claude-sonnet-4-20250514 +model: claude-sonnet provider: anthropic # Provider configurations @@ -120,11 +146,23 @@ providers: openai: api_key: ${OPENAI_API_KEY} - xai: # Native adapter (v0.10+) + bedrock: + region: us-east-2 # Auth via AWS_PROFILE, env vars, or instance role + + azure-foundry: + api_key: ${AZURE_FOUNDRY_API_KEY} + base_url: ${AZURE_FOUNDRY_ENDPOINT} + api_mode: chat_completions # Or anthropic_messages; wizard auto-detects + + lmstudio: + base_url: http://127.0.0.1:1234/v1 + api_key: ${LM_API_KEY} # Optional if your LM Studio server requires auth + + xai: api_key: ${XAI_API_KEY} live_search: true # Grok's live X/Twitter search - xiaomi: # Native adapter (v0.10+) + xiaomi: api_key: ${XIAOMI_API_KEY} reasoning_mode: high # low / medium / high @@ -137,6 +175,12 @@ providers: minimax: api_key: ${MINIMAX_API_KEY} + gmi: + api_key: ${GMI_API_KEY} + + tencent-tokenhub: + api_key: ${TOKENHUB_API_KEY} + arcee: api_key: ${ARCEE_API_KEY} @@ -220,12 +264,12 @@ Use these as opinionated defaults, then tune with [Part 20's cost-routing playbo | Task | First choice | Fallback (cheaper) | Fallback (fastest) | |------|--------------|--------------------|--------------------| -| Daily conversation | Claude Sonnet 4.5 | GLM 5.1 | Cerebras Llama 70B | -| Coding delegation | Claude Code via Sonnet 4.5 | OpenCode + Kimi K2.5 | OpenCode + Cerebras | +| Daily conversation | Anthropic Sonnet | Gemini OAuth or z.ai/GLM | Cerebras Llama/Qwen | +| Coding delegation | Claude Code / Codex OAuth | OpenCode + Kimi/Moonshot | OpenCode + Cerebras | | Long-context reads (>200K) | Gemini 2.5 Pro | Gemini 2.5 Flash | — | | Classification / triage | Gemini 2.5 Flash | Cerebras Qwen3 32B | Arcee AFM-4.5 | -| Reasoning (math, planning) | GPT-5.4 | Claude Opus 4 | GLM 5.1 | -| Current events / live search | xAI Grok 4 | Gemini with grounding | — | +| Reasoning (math, planning) | OpenAI reasoning model | Anthropic Opus-class | z.ai/GLM | +| Current events / live search | xAI Grok | Gemini with grounding | Tool Gateway web search | | Embeddings (LightRAG) | Qwen3-Embedding-8B (Fireworks) | nomic-embed-text (Ollama) | OpenAI `text-embedding-3-small` | | TTS (Telegram voice) | OpenAI TTS via Tool Gateway | Gemini 2.5 Flash TTS | Edge TTS (free) | | Vision | Gemini 2.5 Flash | GPT-4o | Claude Sonnet 4.5 | diff --git a/skills/dev/release-notes/SKILL.md b/skills/dev/release-notes/SKILL.md index 76bb61f..1393bee 100644 --- a/skills/dev/release-notes/SKILL.md +++ b/skills/dev/release-notes/SKILL.md @@ -58,7 +58,7 @@ Produce a release-notes document following the "What's New / Improvements / Fixe ## 🚀 What's New - HTTP MCP servers now reconnect automatically with exponential backoff. ([#1234](…)) -- Gemini CLI OAuth is now a first-class provider. ([#1270](…)) +- Gemini OAuth is now a first-class provider. ([#1270](…)) ## ⚡ Improvements - 40% faster skill load via async frontmatter parsing. ([#1205](…)) diff --git a/skills/ops/hermes-weekly/SKILL.md b/skills/ops/hermes-weekly/SKILL.md index ef56730..a8a0f9b 100644 --- a/skills/ops/hermes-weekly/SKILL.md +++ b/skills/ops/hermes-weekly/SKILL.md @@ -27,7 +27,7 @@ model_hint: google/gemini-2.5-flash # hermes-weekly — Weekly Digest -Automates the "Cooking on main" section of the guide — but for anyone running Hermes who wants a once-a-week summary of what landed upstream. +Automates a weekly upstream-change digest for anyone running Hermes who wants a concise summary of what landed. ## Procedure @@ -68,7 +68,7 @@ Automates the "Cooking on main" section of the guide — but for anyone running ## Why this skill -- The "Cooking on main" section in the guide's README is curated manually. This lets any Hermes user run it themselves with their own focus. +- The guide no longer tracks speculative "cooking on main" notes. This skill lets Hermes users make their own upgrade digest from merged upstream work. - Useful for users who are on a pinned version and want a checklist before upgrading. - Can be piped into Discord / Telegram channel / newsletter via `notify:` in the cron config. @@ -86,4 +86,4 @@ cron: - [release-notes](../../dev/release-notes/SKILL.md) — same pattern but for your own repo - [weekly-dep-audit](../weekly-dep-audit/SKILL.md) — upgrade-safety check -- README "[Cooking on main](../../../README.md)" — the manually-curated version +- [CHANGELOG](../../../CHANGELOG.md) — the manually curated guide history diff --git a/templates/config/cost-optimized.yaml b/templates/config/cost-optimized.yaml index 32ad1e1..0f759e7 100644 --- a/templates/config/cost-optimized.yaml +++ b/templates/config/cost-optimized.yaml @@ -3,9 +3,9 @@ # ------------------------------------------------------------ # Target: <$5/mo for personal daily-driver usage. # - Gemini 2.5 Flash / Pro for 90% of calls -# - Kimi K2.5 for bulk / background +# - Kimi/Moonshot for bulk / background # - Cerebras Llama 70B (free-ish tier) for classification -# - Gemini CLI OAuth (1500 req/day FREE) +# - Gemini OAuth free tier # - Anthropic Sonnet only when `intent: coding` on complex files # ------------------------------------------------------------ @@ -15,16 +15,18 @@ models: default: google/gemini-2.5-flash classification: cerebras/llama-3.1-70b long_context: google/gemini-2.5-pro - coding: moonshot/kimi-k2.5 # Fallback to Claude only for hard coding - coding_complex: anthropic/claude-sonnet-4-5 - reasoning: zai/glm-5.1 + coding: moonshot/kimi # Fallback to Claude only for hard coding + coding_complex: anthropic/claude-sonnet + reasoning: zai/glm providers: google: - oauth_enabled: true # <-- this is the free 1500/day tier + oauth_enabled: true # Hermes-managed Gemini OAuth free tier api_key: ${GOOGLE_API_KEY} # Used only when OAuth is unavailable anthropic: api_key: ${ANTHROPIC_API_KEY} prompt_caching: true # 90% discount on repeat context + openai: + api_key: ${OPENAI_API_KEY} # Required for LightRAG embeddings below moonshot: api_key: ${MOONSHOT_API_KEY} cerebras: @@ -38,13 +40,13 @@ routing: model: cerebras/llama-3.1-70b - intent: coding when: { complexity: high } - model: anthropic/claude-sonnet-4-5 + model: anthropic/claude-sonnet - intent: coding - model: moonshot/kimi-k2.5 + model: moonshot/kimi - intent: long_context model: google/gemini-2.5-pro - intent: reasoning - model: zai/glm-5.1 + model: zai/glm prefer_cached: true # Reroute if prompt is >80% cache-hit context: diff --git a/templates/config/minimum.yaml b/templates/config/minimum.yaml index 5e629c9..246d06f 100644 --- a/templates/config/minimum.yaml +++ b/templates/config/minimum.yaml @@ -12,7 +12,7 @@ version: 1 models: - default: anthropic/claude-sonnet-4-5 + default: anthropic/claude-sonnet providers: anthropic: api_key: ${ANTHROPIC_API_KEY} diff --git a/templates/config/production.yaml b/templates/config/production.yaml index 159d3d1..7d3ab79 100644 --- a/templates/config/production.yaml +++ b/templates/config/production.yaml @@ -14,12 +14,12 @@ version: 1 models: - default: anthropic/claude-sonnet-4-5 + default: anthropic/claude-sonnet classification: google/gemini-2.5-flash long_context: google/gemini-2.5-pro - coding: anthropic/claude-sonnet-4-5 - reasoning: openai/gpt-5.4 - cheap: moonshot/kimi-k2.5 + coding: anthropic/claude-sonnet + reasoning: openai/reasoning + cheap: moonshot/kimi providers: anthropic: api_key: ${ANTHROPIC_API_KEY} @@ -28,7 +28,7 @@ models: api_key: ${OPENAI_API_KEY} google: api_key: ${GOOGLE_API_KEY} - oauth_enabled: true # Use Gemini CLI OAuth when available + oauth_enabled: true # Use Gemini OAuth when available moonshot: api_key: ${MOONSHOT_API_KEY} zai: @@ -42,15 +42,15 @@ routing: - intent: classification model: google/gemini-2.5-flash - intent: coding - model: anthropic/claude-sonnet-4-5 + model: anthropic/claude-sonnet - intent: long_context when: { tokens_in: { gt: 200000 } } model: google/gemini-2.5-pro - intent: reasoning when: { needs_deep_reasoning: true } - model: openai/gpt-5.4 + model: openai/reasoning - intent: bulk_data - model: moonshot/kimi-k2.5 + model: moonshot/kimi gateways: cli: { enabled: true } diff --git a/templates/config/security-hardened.yaml b/templates/config/security-hardened.yaml index 3ac37c9..e6a98ce 100644 --- a/templates/config/security-hardened.yaml +++ b/templates/config/security-hardened.yaml @@ -29,10 +29,10 @@ profiles: - { tool: "*", actions: [exec, write, send, create, update, delete] } trusted: description: Admin-only. Full capability. - models: { default: anthropic/claude-sonnet-4-5 } + models: { default: anthropic/claude-sonnet } models: - default: anthropic/claude-sonnet-4-5 + default: anthropic/claude-sonnet providers: anthropic: api_key: "${ANTHROPIC_API_KEY}" diff --git a/templates/config/telegram-bot.yaml b/templates/config/telegram-bot.yaml index 7268b5c..9f19cb3 100644 --- a/templates/config/telegram-bot.yaml +++ b/templates/config/telegram-bot.yaml @@ -3,6 +3,7 @@ # ------------------------------------------------------------ # Opinionated setup for a personal Telegram assistant: # - Anthropic primary + Gemini Flash for classification +# - OpenAI embeddings for LightRAG memory # - Telegram gateway with a private admin DM + (optional) public bot # - LightRAG memory backend # - Sensible approval defaults @@ -12,7 +13,7 @@ version: 1 models: - default: anthropic/claude-sonnet-4-5 + default: anthropic/claude-sonnet classification: google/gemini-2.5-flash providers: anthropic: @@ -20,6 +21,8 @@ models: prompt_caching: true google: api_key: ${GOOGLE_API_KEY} + openai: + api_key: ${OPENAI_API_KEY} gateways: cli: