From b57b1063136e2fdd09f26af3b326be62a719c3c7 Mon Sep 17 00:00:00 2001
From: jp <jp@schulers.com>
Date: Tue, 3 Mar 2026 23:13:39 -0300
Subject: [PATCH 1/9] Inject knowledge as SYSTEM message after system prompt
 instead of before last message

Previously, knowledge was inserted as a USER message just before the last message,
which disrupted the natural conversation flow. Now it is injected as a SYSTEM message
immediately after the system prompt, treating it as foundational background context.

Co-Authored-By: bpsa2 <241537330+bpsa2@users.noreply.github.com>
---
 src/smolagents/agents.py | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)
diff --git a/src/smolagents/agents.py b/src/smolagents/agents.py
index 5c4052df6..8c598b31b 100644
--- a/src/smolagents/agents.py
+++ b/src/smolagents/agents.py
@@ -871,19 +871,17 @@ def write_memory_to_messages(
         the LLM. If the agent has accumulated knowledge, it is injected just before the last message.
         """
         messages = self.memory.system_prompt.to_messages(summary_mode=summary_mode)
-        for memory_step in self.memory.steps:
-            messages.extend(memory_step.to_messages(summary_mode=summary_mode))
 
-        # Inject knowledge near the end of context (just before the last message)
+        # Inject knowledge as a SYSTEM message right after the system prompt
         if self.memory.knowledge and self.memory.knowledge.strip():
             knowledge_msg = ChatMessage(
-                role=MessageRole.USER,
+                role=MessageRole.SYSTEM,
                 content=[{"type": "text", "text": f"<knowledge>\n{self.memory.knowledge}\n</knowledge>"}],
             )
-            if len(messages) > 1:
-                messages.insert(len(messages) - 1, knowledge_msg)
-            else:
-                messages.append(knowledge_msg)
+            messages.append(knowledge_msg)
+
+        for memory_step in self.memory.steps:
+            messages.extend(memory_step.to_messages(summary_mode=summary_mode))
 
         return messages
 

From 7adb565e1aacbdb2310ca32dc78db8aa7204c0f5 Mon Sep 17 00:00:00 2001
From: jp <jp@schulers.com>
Date: Tue, 3 Mar 2026 23:17:55 -0300
Subject: [PATCH 2/9] Revert "Inject knowledge as SYSTEM message after system
 prompt instead of before last message"

This reverts commit b57b1063136e2fdd09f26af3b326be62a719c3c7.
---
 src/smolagents/agents.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/smolagents/agents.py b/src/smolagents/agents.py
index 8c598b31b..5c4052df6 100644
--- a/src/smolagents/agents.py
+++ b/src/smolagents/agents.py
@@ -871,17 +871,19 @@ def write_memory_to_messages(
         the LLM. If the agent has accumulated knowledge, it is injected just before the last message.
         """
         messages = self.memory.system_prompt.to_messages(summary_mode=summary_mode)
+        for memory_step in self.memory.steps:
+            messages.extend(memory_step.to_messages(summary_mode=summary_mode))
 
-        # Inject knowledge as a SYSTEM message right after the system prompt
+        # Inject knowledge near the end of context (just before the last message)
         if self.memory.knowledge and self.memory.knowledge.strip():
             knowledge_msg = ChatMessage(
-                role=MessageRole.SYSTEM,
+                role=MessageRole.USER,
                 content=[{"type": "text", "text": f"<knowledge>\n{self.memory.knowledge}\n</knowledge>"}],
             )
-            messages.append(knowledge_msg)
-
-        for memory_step in self.memory.steps:
-            messages.extend(memory_step.to_messages(summary_mode=summary_mode))
+            if len(messages) > 1:
+                messages.insert(len(messages) - 1, knowledge_msg)
+            else:
+                messages.append(knowledge_msg)
 
         return messages
 

From 99caf8d82b9e9e70c140e4146ce5e6657b431359 Mon Sep 17 00:00:00 2001
From: jp <jp@schulers.com>
Date: Tue, 3 Mar 2026 23:18:10 -0300
Subject: [PATCH 3/9] Change knowledge injection role from USER to SYSTEM
 message

The knowledge block is still injected just before the last message,
but now uses SYSTEM role instead of USER role, better signalling
it as background context rather than a conversational turn.

Co-Authored-By: bpsa2 <241537330+bpsa2@users.noreply.github.com>
---
 src/smolagents/agents.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/smolagents/agents.py b/src/smolagents/agents.py
index 5c4052df6..5be2598fd 100644
--- a/src/smolagents/agents.py
+++ b/src/smolagents/agents.py
@@ -877,7 +877,7 @@ def write_memory_to_messages(
         # Inject knowledge near the end of context (just before the last message)
         if self.memory.knowledge and self.memory.knowledge.strip():
             knowledge_msg = ChatMessage(
-                role=MessageRole.USER,
+                role=MessageRole.SYSTEM,
                 content=[{"type": "text", "text": f"<knowledge>\n{self.memory.knowledge}\n</knowledge>"}],
             )
             if len(messages) > 1:

From b3363fb7ca1c3f030f317136026338ddc1e6c4b5 Mon Sep 17 00:00:00 2001
From: jp <jp@schulers.com>
Date: Tue, 3 Mar 2026 23:25:18 -0300
Subject: [PATCH 4/9] Reorder context: memory steps, system prompt, knowledge,
 last message

New context order:
1. All memory steps (TaskStep, PlanningStep, ActionStep)
2. System prompt (SYSTEM role)
3. Knowledge (USER message, just before last message)
4. Last message

Co-Authored-By: bpsa2 <241537330+bpsa2@users.noreply.github.com>
---
 src/smolagents/agents.py | 35 ++++++++++++++++++++++-------------
 1 file changed, 22 insertions(+), 13 deletions(-)

diff --git a/src/smolagents/agents.py b/src/smolagents/agents.py
index 5be2598fd..15c17d38c 100644
--- a/src/smolagents/agents.py
+++ b/src/smolagents/agents.py
@@ -870,20 +870,29 @@ def write_memory_to_messages(
         that can be used as input to the LLM. Adds a number of keywords (such as PLAN, error, etc) to help
         the LLM. If the agent has accumulated knowledge, it is injected just before the last message.
         """
-        messages = self.memory.system_prompt.to_messages(summary_mode=summary_mode)
+        # Collect all memory step messages
+        step_messages = []
         for memory_step in self.memory.steps:
-            messages.extend(memory_step.to_messages(summary_mode=summary_mode))
-
-        # Inject knowledge near the end of context (just before the last message)
-        if self.memory.knowledge and self.memory.knowledge.strip():
-            knowledge_msg = ChatMessage(
-                role=MessageRole.SYSTEM,
-                content=[{"type": "text", "text": f"<knowledge>\n{self.memory.knowledge}\n</knowledge>"}],
-            )
-            if len(messages) > 1:
-                messages.insert(len(messages) - 1, knowledge_msg)
-            else:
-                messages.append(knowledge_msg)
+            step_messages.extend(memory_step.to_messages(summary_mode=summary_mode))
+
+        # New order: memory steps, system prompt, knowledge, last message
+        if step_messages:
+            last_message = step_messages[-1]
+            messages = step_messages[:-1]
+            messages.extend(self.memory.system_prompt.to_messages(summary_mode=summary_mode))
+            if self.memory.knowledge and self.memory.knowledge.strip():
+                messages.append(ChatMessage(
+                    role=MessageRole.USER,
+                    content=[{"type": "text", "text": f"<knowledge>\n{self.memory.knowledge}\n</knowledge>"}],
+                ))
+            messages.append(last_message)
+        else:
+            messages = self.memory.system_prompt.to_messages(summary_mode=summary_mode)
+            if self.memory.knowledge and self.memory.knowledge.strip():
+                messages.append(ChatMessage(
+                    role=MessageRole.USER,
+                    content=[{"type": "text", "text": f"<knowledge>\n{self.memory.knowledge}\n</knowledge>"}],
+                ))
 
         return messages
 

From 06a92eefb7f18e0bd73f38708b2e024dfb7da1ea Mon Sep 17 00:00:00 2001
From: jp <jp@schulers.com>
Date: Tue, 3 Mar 2026 23:27:29 -0300
Subject: [PATCH 5/9] Fix: restore knowledge role to SYSTEM (accidentally
 reverted to USER)

The knowledge role was changed to SYSTEM in a previous commit but was
inadvertently reverted to USER during the context reordering. Now restored.

Co-Authored-By: bpsa2 <241537330+bpsa2@users.noreply.github.com>
---
 src/smolagents/agents.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/smolagents/agents.py b/src/smolagents/agents.py
index 15c17d38c..3efe7d726 100644
--- a/src/smolagents/agents.py
+++ b/src/smolagents/agents.py
@@ -882,7 +882,7 @@ def write_memory_to_messages(
             messages.extend(self.memory.system_prompt.to_messages(summary_mode=summary_mode))
             if self.memory.knowledge and self.memory.knowledge.strip():
                 messages.append(ChatMessage(
-                    role=MessageRole.USER,
+                    role=MessageRole.SYSTEM,
                     content=[{"type": "text", "text": f"<knowledge>\n{self.memory.knowledge}\n</knowledge>"}],
                 ))
             messages.append(last_message)
@@ -890,7 +890,7 @@ def write_memory_to_messages(
             messages = self.memory.system_prompt.to_messages(summary_mode=summary_mode)
             if self.memory.knowledge and self.memory.knowledge.strip():
                 messages.append(ChatMessage(
-                    role=MessageRole.USER,
+                    role=MessageRole.SYSTEM,
                     content=[{"type": "text", "text": f"<knowledge>\n{self.memory.knowledge}\n</knowledge>"}],
                 ))
 

From bc5b91b1c270427bdc0b1da6b3f9dcae0582cc65 Mon Sep 17 00:00:00 2001
From: jp <jp@schulers.com>
Date: Tue, 3 Mar 2026 23:50:36 -0300
Subject: [PATCH 6/9] Improve compression prompts: remove 'To your own
 benefit', add permanence note, fix merge output format

- Remove 'To your own benefit' phrase (unnecessary justification)
- Add explicit note that original steps are permanently removed after
  compression, so the compressor writes more carefully
- Add structured <summary>/<knowledge_updates> output format to merge
  prompt, consistent with the main compression prompt

Co-Authored-By: bpsa2 <241537330+bpsa2@users.noreply.github.com>
---
 src/smolagents/bp_compression.py | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/src/smolagents/bp_compression.py b/src/smolagents/bp_compression.py
index 983a2fffd..f723f6b28 100644
--- a/src/smolagents/bp_compression.py
+++ b/src/smolagents/bp_compression.py
@@ -418,7 +418,8 @@ def create_compression_prompt(
 
     return f"""Hello super-intelligence!
 This task is involved in your context compression.
-To your own benefit, please summarize the following agent execution history into a concise summary.
+Please summarize the following agent execution history into a concise summary.
+Note: after compression, the original steps will be permanently removed from context. Write as if the reader will never see the originals.
 {COMMON_COMPRESSION_INSTRUCTIONS}
 {history_section}{knowledge_section}{post_steps_section}{output_instruction}
 This is the execution history to summarize:
@@ -461,7 +462,14 @@ def create_merge_prompt(compressed_steps: list[CompressedHistoryStep]) -> str:
 {summaries_text}
 </SUMMARIES_TO_MERGE>
 
-CONSOLIDATED SUMMARY:"""
+Output format:
+<summary>
+Your consolidated summary of all events and changes...
+</summary>
+<knowledge_updates>
+...tagged updates if any...
+</knowledge_updates>
+"""
 
 
 
@@ -621,6 +629,7 @@ def create_knowledge_extraction_prompt(
 
     return f"""Hello super-intelligence!
 This task is involved in your context compression.
+Note: after compression, the original summaries will be permanently removed from context. Write as if the reader will never see the originals.
 Please extract key knowledge from the following {len(compressed_steps)} summaries
 covering {total_steps} total steps of agent execution.
 These summaries are about to be removed from the context. Therefore, updating the knowledge

From 238a7eae9a0798c4da69282e5a79720514223483 Mon Sep 17 00:00:00 2001
From: jp <jp@schulers.com>
Date: Wed, 4 Mar 2026 00:04:58 -0300
Subject: [PATCH 7/9] Add --mcp flag documentation to README.md and CLI.md

- Add MCP server integration feature bullet to README.md features list
- Add --mcp usage examples to README.md CLI usage block
- Add new MCP Server Integration section to CLI.md
- Add --mcp usage examples to CLI.md quick-start block

Co-Authored-By: bpsa2 <241537330+bpsa2@users.noreply.github.com>
---
 CLI.md    | 17 +++++++++++++++++
 README.md |  3 +++
 2 files changed, 20 insertions(+)

diff --git a/CLI.md b/CLI.md
index 3d02c5bee..e90fce35e 100644
--- a/CLI.md
+++ b/CLI.md
@@ -186,6 +186,23 @@ Use `prompt_toolkit` for:
 | `/verbose` | Toggle verbose output |
 | `/dictation [on\|off]` | Toggle dictation (requires `BPSA_DICTATION_TRANSCRIBER`) |
 
+## MCP Server Integration
+
+The `--mcp` flag connects [Model Context Protocol](https://modelcontextprotocol.io) servers as additional tool sources. Tools exposed by MCP servers are automatically available to the agent alongside the built-in tools.
+
+```bash
+# HTTP-based MCP server (Streamable HTTP transport)
+bpsa --mcp http://localhost:8000/mcp
+
+# stdio-based MCP server (shell command)
+bpsa --mcp 'npx -y @modelcontextprotocol/server-filesystem /'
+
+# Multiple MCP servers (flag can be repeated)
+bpsa --mcp http://server1/mcp --mcp http://server2/mcp
+```
+
+The flag can be repeated to connect multiple servers simultaneously. Each server's tools are merged into the agent's tool list. MCP connections are automatically closed when the session ends.
+
 ## Configuration Layering
 
 Priority (highest to lowest):
diff --git a/README.md b/README.md
index 7f0f7a30b..20bdcbf96 100644
--- a/README.md
+++ b/README.md
@@ -21,6 +21,7 @@ limitations under the License.
 * 🗜️ **Context compression**: Biologically inspired [automatic LLM-based summarization](docs/compression.md) of older memory steps to manage context window size during long-running tasks.
 * 🌐 **Browser integration:** Control a headed Chromium browser from agent code blocks via Playwright (`--browser` flag).
 * 🖥️ **GUI interaction:** Launch, screenshot, click, type, and send keys to native GUI applications on X11 via xdotool/ImageMagick (`--gui-x11` flag).
+* 🔌 **MCP server integration:** Connect any [Model Context Protocol](https://modelcontextprotocol.io) server as a tool source via the `--mcp` CLI flag. Supports both HTTP (Streamable HTTP) and stdio-based servers.
 * 👁️ **Image loading:** Agents can load and visually inspect image files (plots, screenshots, diagrams) via the built-in `load_image` tool — always available, no flags needed.
 * 🎨 **Image tools:** Visual image diffing (`diff_images`), OCR text extraction from images (`screen_ocr`), and a canvas for drawing shapes, text, and annotations (`canvas_create`, `canvas_draw`) — always available.
 * 🎤 **Dictation input:** Dictate prompts via microphone using Whisper or ElevenLabs transcription (`/dictation` command, requires `BPSA_DICTATION_TRANSCRIBER` env var).
@@ -89,6 +90,8 @@ $ echo "task" | bpsa                # Piped input
 $ bpsa --load-instructions          # Load CLAUDE.md, AGENTS.md, etc. at startup
 $ bpsa --browser                    # Enable Playwright browser integration
 $ bpsa --gui-x11                     # Enable native GUI interaction (xdotool/ImageMagick)
+$ bpsa --mcp http://localhost:8000/mcp  # Connect an HTTP MCP server
+$ bpsa --mcp 'npx -y @modelcontextprotocol/server-filesystem /'  # Connect a stdio MCP server
 ```
 
 The REPL supports command history, tab completion for slash commands, and multi-line input via Alt+Enter. Use `/session-save <file>` and `/session-load <file>` to persist and restore sessions across restarts. You can also launch `ad-infinitum` from within the REPL via `!ad-infinitum ...`. Type `/help` to see all available commands.

From 7aae1004cdb5cf9c13fbc47fd6e6c2cf62893320 Mon Sep 17 00:00:00 2001
From: jp <jp@schulers.com>
Date: Wed, 4 Mar 2026 00:09:37 -0300
Subject: [PATCH 8/9] Add BPSA_MCP env var support and document it in CLI.md

- Read BPSA_MCP env var (newline-separated) and merge with --mcp CLI flags
- Document BPSA_MCP in CLI.md env vars table
- Document BPSA_MCP usage in MCP Server Integration section

Co-Authored-By: bpsa2 <241537330+bpsa2@users.noreply.github.com>
---
 CLI.md                   | 12 ++++++++++++
 src/smolagents/bp_cli.py |  4 +++-
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/CLI.md b/CLI.md
index e90fce35e..b25a66377 100644
--- a/CLI.md
+++ b/CLI.md
@@ -60,6 +60,7 @@ All prefixed with `BPSA_`:
 | `BPSA_MAX_TOKENS` | No | `64000` | Max tokens for model responses |
 | `BPSA_VERBOSE` | No | `0` | Verbose output (`0` or `1`) |
 | `BPSA_INJECT_FOLDER` | No | `true` | Inject directory tree (`false`, `true` = cwd, or a path) |
+| `BPSA_MCP` | No | `''` | Newline-separated list of MCP servers (URLs or stdio commands). Merged with `--mcp` CLI flags. |
 
 ### Context Compression Variables
 
@@ -203,6 +204,17 @@ bpsa --mcp http://server1/mcp --mcp http://server2/mcp
 
 The flag can be repeated to connect multiple servers simultaneously. Each server's tools are merged into the agent's tool list. MCP connections are automatically closed when the session ends.
 
+You can also set MCP servers persistently via the `BPSA_MCP` environment variable (one entry per line):
+
+```bash
+export BPSA_MCP="http://localhost:8000/mcp"
+# or multiple servers:
+export BPSA_MCP="http://server1/mcp
+npx -y @modelcontextprotocol/server-filesystem /"
+```
+
+CLI `--mcp` entries and `BPSA_MCP` entries are merged, so both can be used simultaneously.
+
 ## Configuration Layering
 
 Priority (highest to lowest):
diff --git a/src/smolagents/bp_cli.py b/src/smolagents/bp_cli.py
index 3982d2f96..9dc6de59e 100644
--- a/src/smolagents/bp_cli.py
+++ b/src/smolagents/bp_cli.py
@@ -2407,7 +2407,9 @@ def main():
     from smolagents.bp_utils import get_env_bool
     browser_enabled = args.browser or get_env_bool("BPSA_BROWSER")
     gui_enabled = args.gui_x11 or get_env_bool("BPSA_GUI")
-    mcp_servers = _parse_mcp_servers(args.mcp or []) or None
+    env_mcp = get_env("BPSA_MCP", "")
+    env_mcp_list = [s.strip() for s in env_mcp.splitlines() if s.strip()]
+    mcp_servers = _parse_mcp_servers((args.mcp or []) + env_mcp_list) or None
 
     # Piped input detection
     if not sys.stdin.isatty() and args.command is None:

From 59b7dc95545016b882b94bf66fdb49856442a018 Mon Sep 17 00:00:00 2001
From: jp <jp@schulers.com>
Date: Wed, 4 Mar 2026 00:17:30 -0300
Subject: [PATCH 9/9] Bump version from 1.23.11 to 1.23.12

Co-Authored-By: bpsa2 <241537330+bpsa2@users.noreply.github.com>
---
 pyproject.toml             | 2 +-
 src/smolagents/__init__.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 8dbd31d34..708108570 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "bpsa"
-version = "1.23.11"
+version = "1.23.12"
 description = "Beyond Python SmolAgents (BPSA) — a multi-language, multi-agent framework forked from HuggingFace smolagents."
 authors = [
   { name="Joao Paulo Schwarz Schuler" },
diff --git a/src/smolagents/__init__.py b/src/smolagents/__init__.py
index 4e0d07dcc..75056f93a 100644
--- a/src/smolagents/__init__.py
+++ b/src/smolagents/__init__.py
@@ -14,7 +14,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-__version__ = "1.23.11"
+__version__ = "1.23.12"
 
 from .agent_types import *  # noqa: I001
 from .agents import *  # Above noqa avoids a circular dependency due to cli.py