diff --git a/CLI.md b/CLI.md index 3d02c5bee..b25a66377 100644 --- a/CLI.md +++ b/CLI.md @@ -60,6 +60,7 @@ All prefixed with `BPSA_`: | `BPSA_MAX_TOKENS` | No | `64000` | Max tokens for model responses | | `BPSA_VERBOSE` | No | `0` | Verbose output (`0` or `1`) | | `BPSA_INJECT_FOLDER` | No | `true` | Inject directory tree (`false`, `true` = cwd, or a path) | +| `BPSA_MCP` | No | `''` | Newline-separated list of MCP servers (URLs or stdio commands). Merged with `--mcp` CLI flags. | ### Context Compression Variables @@ -186,6 +187,34 @@ Use `prompt_toolkit` for: | `/verbose` | Toggle verbose output | | `/dictation [on\|off]` | Toggle dictation (requires `BPSA_DICTATION_TRANSCRIBER`) | +## MCP Server Integration + +The `--mcp` flag connects [Model Context Protocol](https://modelcontextprotocol.io) servers as additional tool sources. Tools exposed by MCP servers are automatically available to the agent alongside the built-in tools. + +```bash +# HTTP-based MCP server (Streamable HTTP transport) +bpsa --mcp http://localhost:8000/mcp + +# stdio-based MCP server (shell command) +bpsa --mcp 'npx -y @modelcontextprotocol/server-filesystem /' + +# Multiple MCP servers (flag can be repeated) +bpsa --mcp http://server1/mcp --mcp http://server2/mcp +``` + +The flag can be repeated to connect multiple servers simultaneously. Each server's tools are merged into the agent's tool list. MCP connections are automatically closed when the session ends. + +You can also set MCP servers persistently via the `BPSA_MCP` environment variable (one entry per line): + +```bash +export BPSA_MCP="http://localhost:8000/mcp" +# or multiple servers: +export BPSA_MCP="http://server1/mcp +npx -y @modelcontextprotocol/server-filesystem /" +``` + +CLI `--mcp` entries and `BPSA_MCP` entries are merged, so both can be used simultaneously. + ## Configuration Layering Priority (highest to lowest): diff --git a/README.md b/README.md index 7f0f7a30b..20bdcbf96 100644 --- a/README.md +++ b/README.md @@ -21,6 +21,7 @@ limitations under the License. * 🗜️ **Context compression**: Biologically inspired [automatic LLM-based summarization](docs/compression.md) of older memory steps to manage context window size during long-running tasks. * 🌐 **Browser integration:** Control a headed Chromium browser from agent code blocks via Playwright (`--browser` flag). * 🖥️ **GUI interaction:** Launch, screenshot, click, type, and send keys to native GUI applications on X11 via xdotool/ImageMagick (`--gui-x11` flag). +* 🔌 **MCP server integration:** Connect any [Model Context Protocol](https://modelcontextprotocol.io) server as a tool source via the `--mcp` CLI flag. Supports both HTTP (Streamable HTTP) and stdio-based servers. * 👁️ **Image loading:** Agents can load and visually inspect image files (plots, screenshots, diagrams) via the built-in `load_image` tool — always available, no flags needed. * 🎨 **Image tools:** Visual image diffing (`diff_images`), OCR text extraction from images (`screen_ocr`), and a canvas for drawing shapes, text, and annotations (`canvas_create`, `canvas_draw`) — always available. * 🎤 **Dictation input:** Dictate prompts via microphone using Whisper or ElevenLabs transcription (`/dictation` command, requires `BPSA_DICTATION_TRANSCRIBER` env var). @@ -89,6 +90,8 @@ $ echo "task" | bpsa # Piped input $ bpsa --load-instructions # Load CLAUDE.md, AGENTS.md, etc. at startup $ bpsa --browser # Enable Playwright browser integration $ bpsa --gui-x11 # Enable native GUI interaction (xdotool/ImageMagick) +$ bpsa --mcp http://localhost:8000/mcp # Connect an HTTP MCP server +$ bpsa --mcp 'npx -y @modelcontextprotocol/server-filesystem /' # Connect a stdio MCP server ``` The REPL supports command history, tab completion for slash commands, and multi-line input via Alt+Enter. Use `/session-save ` and `/session-load ` to persist and restore sessions across restarts. You can also launch `ad-infinitum` from within the REPL via `!ad-infinitum ...`. Type `/help` to see all available commands. diff --git a/pyproject.toml b/pyproject.toml index 8dbd31d34..708108570 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "bpsa" -version = "1.23.11" +version = "1.23.12" description = "Beyond Python SmolAgents (BPSA) — a multi-language, multi-agent framework forked from HuggingFace smolagents." authors = [ { name="Joao Paulo Schwarz Schuler" }, diff --git a/src/smolagents/__init__.py b/src/smolagents/__init__.py index 4e0d07dcc..75056f93a 100644 --- a/src/smolagents/__init__.py +++ b/src/smolagents/__init__.py @@ -14,7 +14,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "1.23.11" +__version__ = "1.23.12" from .agent_types import * # noqa: I001 from .agents import * # Above noqa avoids a circular dependency due to cli.py diff --git a/src/smolagents/agents.py b/src/smolagents/agents.py index 5c4052df6..3efe7d726 100644 --- a/src/smolagents/agents.py +++ b/src/smolagents/agents.py @@ -870,20 +870,29 @@ def write_memory_to_messages( that can be used as input to the LLM. Adds a number of keywords (such as PLAN, error, etc) to help the LLM. If the agent has accumulated knowledge, it is injected just before the last message. """ - messages = self.memory.system_prompt.to_messages(summary_mode=summary_mode) + # Collect all memory step messages + step_messages = [] for memory_step in self.memory.steps: - messages.extend(memory_step.to_messages(summary_mode=summary_mode)) - - # Inject knowledge near the end of context (just before the last message) - if self.memory.knowledge and self.memory.knowledge.strip(): - knowledge_msg = ChatMessage( - role=MessageRole.USER, - content=[{"type": "text", "text": f"\n{self.memory.knowledge}\n"}], - ) - if len(messages) > 1: - messages.insert(len(messages) - 1, knowledge_msg) - else: - messages.append(knowledge_msg) + step_messages.extend(memory_step.to_messages(summary_mode=summary_mode)) + + # New order: memory steps, system prompt, knowledge, last message + if step_messages: + last_message = step_messages[-1] + messages = step_messages[:-1] + messages.extend(self.memory.system_prompt.to_messages(summary_mode=summary_mode)) + if self.memory.knowledge and self.memory.knowledge.strip(): + messages.append(ChatMessage( + role=MessageRole.SYSTEM, + content=[{"type": "text", "text": f"\n{self.memory.knowledge}\n"}], + )) + messages.append(last_message) + else: + messages = self.memory.system_prompt.to_messages(summary_mode=summary_mode) + if self.memory.knowledge and self.memory.knowledge.strip(): + messages.append(ChatMessage( + role=MessageRole.SYSTEM, + content=[{"type": "text", "text": f"\n{self.memory.knowledge}\n"}], + )) return messages diff --git a/src/smolagents/bp_cli.py b/src/smolagents/bp_cli.py index 3982d2f96..9dc6de59e 100644 --- a/src/smolagents/bp_cli.py +++ b/src/smolagents/bp_cli.py @@ -2407,7 +2407,9 @@ def main(): from smolagents.bp_utils import get_env_bool browser_enabled = args.browser or get_env_bool("BPSA_BROWSER") gui_enabled = args.gui_x11 or get_env_bool("BPSA_GUI") - mcp_servers = _parse_mcp_servers(args.mcp or []) or None + env_mcp = get_env("BPSA_MCP", "") + env_mcp_list = [s.strip() for s in env_mcp.splitlines() if s.strip()] + mcp_servers = _parse_mcp_servers((args.mcp or []) + env_mcp_list) or None # Piped input detection if not sys.stdin.isatty() and args.command is None: diff --git a/src/smolagents/bp_compression.py b/src/smolagents/bp_compression.py index 983a2fffd..f723f6b28 100644 --- a/src/smolagents/bp_compression.py +++ b/src/smolagents/bp_compression.py @@ -418,7 +418,8 @@ def create_compression_prompt( return f"""Hello super-intelligence! This task is involved in your context compression. -To your own benefit, please summarize the following agent execution history into a concise summary. +Please summarize the following agent execution history into a concise summary. +Note: after compression, the original steps will be permanently removed from context. Write as if the reader will never see the originals. {COMMON_COMPRESSION_INSTRUCTIONS} {history_section}{knowledge_section}{post_steps_section}{output_instruction} This is the execution history to summarize: @@ -461,7 +462,14 @@ def create_merge_prompt(compressed_steps: list[CompressedHistoryStep]) -> str: {summaries_text} -CONSOLIDATED SUMMARY:""" +Output format: + +Your consolidated summary of all events and changes... + + +...tagged updates if any... + +""" @@ -621,6 +629,7 @@ def create_knowledge_extraction_prompt( return f"""Hello super-intelligence! This task is involved in your context compression. +Note: after compression, the original summaries will be permanently removed from context. Write as if the reader will never see the originals. Please extract key knowledge from the following {len(compressed_steps)} summaries covering {total_steps} total steps of agent execution. These summaries are about to be removed from the context. Therefore, updating the knowledge