joaopauloschuler · joaopauloschuler · Mar 4, 2026 · Mar 4, 2026 · Mar 4, 2026 · Mar 4, 2026
diff --git a/CLI.md b/CLI.md
@@ -60,6 +60,7 @@ All prefixed with `BPSA_`:
 | `BPSA_MAX_TOKENS` | No | `64000` | Max tokens for model responses |
 | `BPSA_VERBOSE` | No | `0` | Verbose output (`0` or `1`) |
 | `BPSA_INJECT_FOLDER` | No | `true` | Inject directory tree (`false`, `true` = cwd, or a path) |
+| `BPSA_MCP` | No | `''` | Newline-separated list of MCP servers (URLs or stdio commands). Merged with `--mcp` CLI flags. |
 
 ### Context Compression Variables
 
@@ -186,6 +187,34 @@ Use `prompt_toolkit` for:
 | `/verbose` | Toggle verbose output |
 | `/dictation [on\|off]` | Toggle dictation (requires `BPSA_DICTATION_TRANSCRIBER`) |
 
+## MCP Server Integration
+
+The `--mcp` flag connects [Model Context Protocol](https://modelcontextprotocol.io) servers as additional tool sources. Tools exposed by MCP servers are automatically available to the agent alongside the built-in tools.
+
+```bash
+# HTTP-based MCP server (Streamable HTTP transport)
+bpsa --mcp http://localhost:8000/mcp
+
+# stdio-based MCP server (shell command)
+bpsa --mcp 'npx -y @modelcontextprotocol/server-filesystem /'
+
+# Multiple MCP servers (flag can be repeated)
+bpsa --mcp http://server1/mcp --mcp http://server2/mcp
+```
+
+The flag can be repeated to connect multiple servers simultaneously. Each server's tools are merged into the agent's tool list. MCP connections are automatically closed when the session ends.
+
+You can also set MCP servers persistently via the `BPSA_MCP` environment variable (one entry per line):
+
+```bash
+export BPSA_MCP="http://localhost:8000/mcp"
+# or multiple servers:
+export BPSA_MCP="http://server1/mcp
+npx -y @modelcontextprotocol/server-filesystem /"
+```
+
+CLI `--mcp` entries and `BPSA_MCP` entries are merged, so both can be used simultaneously.
+
 ## Configuration Layering
 
 Priority (highest to lowest):

diff --git a/README.md b/README.md
@@ -21,6 +21,7 @@ limitations under the License.
 * 🗜️ **Context compression**: Biologically inspired [automatic LLM-based summarization](docs/compression.md) of older memory steps to manage context window size during long-running tasks.
 * 🌐 **Browser integration:** Control a headed Chromium browser from agent code blocks via Playwright (`--browser` flag).
 * 🖥️ **GUI interaction:** Launch, screenshot, click, type, and send keys to native GUI applications on X11 via xdotool/ImageMagick (`--gui-x11` flag).
+* 🔌 **MCP server integration:** Connect any [Model Context Protocol](https://modelcontextprotocol.io) server as a tool source via the `--mcp` CLI flag. Supports both HTTP (Streamable HTTP) and stdio-based servers.
 * 👁️ **Image loading:** Agents can load and visually inspect image files (plots, screenshots, diagrams) via the built-in `load_image` tool — always available, no flags needed.
 * 🎨 **Image tools:** Visual image diffing (`diff_images`), OCR text extraction from images (`screen_ocr`), and a canvas for drawing shapes, text, and annotations (`canvas_create`, `canvas_draw`) — always available.
 * 🎤 **Dictation input:** Dictate prompts via microphone using Whisper or ElevenLabs transcription (`/dictation` command, requires `BPSA_DICTATION_TRANSCRIBER` env var).
@@ -89,6 +90,8 @@ $ echo "task" | bpsa                # Piped input
 $ bpsa --load-instructions          # Load CLAUDE.md, AGENTS.md, etc. at startup
 $ bpsa --browser                    # Enable Playwright browser integration
 $ bpsa --gui-x11                     # Enable native GUI interaction (xdotool/ImageMagick)
+$ bpsa --mcp http://localhost:8000/mcp  # Connect an HTTP MCP server
+$ bpsa --mcp 'npx -y @modelcontextprotocol/server-filesystem /'  # Connect a stdio MCP server
 ```
 
 The REPL supports command history, tab completion for slash commands, and multi-line input via Alt+Enter. Use `/session-save <file>` and `/session-load <file>` to persist and restore sessions across restarts. You can also launch `ad-infinitum` from within the REPL via `!ad-infinitum ...`. Type `/help` to see all available commands.

diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "bpsa"
-version = "1.23.11"
+version = "1.23.12"
 description = "Beyond Python SmolAgents (BPSA) — a multi-language, multi-agent framework forked from HuggingFace smolagents."
 authors = [
   { name="Joao Paulo Schwarz Schuler" },

diff --git a/src/smolagents/__init__.py b/src/smolagents/__init__.py
@@ -14,7 +14,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-__version__ = "1.23.11"
+__version__ = "1.23.12"
 
 from .agent_types import *  # noqa: I001
 from .agents import *  # Above noqa avoids a circular dependency due to cli.py

diff --git a/src/smolagents/agents.py b/src/smolagents/agents.py
@@ -870,20 +870,29 @@ def write_memory_to_messages(
         that can be used as input to the LLM. Adds a number of keywords (such as PLAN, error, etc) to help
         the LLM. If the agent has accumulated knowledge, it is injected just before the last message.
         """
-        messages = self.memory.system_prompt.to_messages(summary_mode=summary_mode)
+        # Collect all memory step messages
+        step_messages = []
         for memory_step in self.memory.steps:
-            messages.extend(memory_step.to_messages(summary_mode=summary_mode))
-
-        # Inject knowledge near the end of context (just before the last message)
-        if self.memory.knowledge and self.memory.knowledge.strip():
-            knowledge_msg = ChatMessage(
-                role=MessageRole.USER,
-                content=[{"type": "text", "text": f"<knowledge>\n{self.memory.knowledge}\n</knowledge>"}],
-            )
-            if len(messages) > 1:
-                messages.insert(len(messages) - 1, knowledge_msg)
-            else:
-                messages.append(knowledge_msg)
+            step_messages.extend(memory_step.to_messages(summary_mode=summary_mode))
+
+        # New order: memory steps, system prompt, knowledge, last message
+        if step_messages:
+            last_message = step_messages[-1]
+            messages = step_messages[:-1]
+            messages.extend(self.memory.system_prompt.to_messages(summary_mode=summary_mode))
+            if self.memory.knowledge and self.memory.knowledge.strip():
+                messages.append(ChatMessage(
+                    role=MessageRole.SYSTEM,
+                    content=[{"type": "text", "text": f"<knowledge>\n{self.memory.knowledge}\n</knowledge>"}],
+                ))
+            messages.append(last_message)
+        else:
+            messages = self.memory.system_prompt.to_messages(summary_mode=summary_mode)
+            if self.memory.knowledge and self.memory.knowledge.strip():
+                messages.append(ChatMessage(
+                    role=MessageRole.SYSTEM,
+                    content=[{"type": "text", "text": f"<knowledge>\n{self.memory.knowledge}\n</knowledge>"}],
+                ))
 
         return messages
 

diff --git a/src/smolagents/bp_cli.py b/src/smolagents/bp_cli.py
@@ -2407,7 +2407,9 @@ def main():
     from smolagents.bp_utils import get_env_bool
     browser_enabled = args.browser or get_env_bool("BPSA_BROWSER")
     gui_enabled = args.gui_x11 or get_env_bool("BPSA_GUI")
-    mcp_servers = _parse_mcp_servers(args.mcp or []) or None
+    env_mcp = get_env("BPSA_MCP", "")
+    env_mcp_list = [s.strip() for s in env_mcp.splitlines() if s.strip()]
+    mcp_servers = _parse_mcp_servers((args.mcp or []) + env_mcp_list) or None
 
     # Piped input detection
     if not sys.stdin.isatty() and args.command is None:

diff --git a/src/smolagents/bp_compression.py b/src/smolagents/bp_compression.py
@@ -418,7 +418,8 @@ def create_compression_prompt(
 
     return f"""Hello super-intelligence!
 This task is involved in your context compression.
-To your own benefit, please summarize the following agent execution history into a concise summary.
+Please summarize the following agent execution history into a concise summary.
+Note: after compression, the original steps will be permanently removed from context. Write as if the reader will never see the originals.
 {COMMON_COMPRESSION_INSTRUCTIONS}
 {history_section}{knowledge_section}{post_steps_section}{output_instruction}
 This is the execution history to summarize:
@@ -461,7 +462,14 @@ def create_merge_prompt(compressed_steps: list[CompressedHistoryStep]) -> str:
 {summaries_text}
 </SUMMARIES_TO_MERGE>
 
-CONSOLIDATED SUMMARY:"""
+Output format:
+<summary>
+Your consolidated summary of all events and changes...
+</summary>
+<knowledge_updates>
+...tagged updates if any...
+</knowledge_updates>
+"""
 
 
 
@@ -621,6 +629,7 @@ def create_knowledge_extraction_prompt(
 
     return f"""Hello super-intelligence!
 This task is involved in your context compression.
+Note: after compression, the original summaries will be permanently removed from context. Write as if the reader will never see the originals.
 Please extract key knowledge from the following {len(compressed_steps)} summaries
 covering {total_steps} total steps of agent execution.
 These summaries are about to be removed from the context. Therefore, updating the knowledge